diff options
Diffstat (limited to 'conf.d')
24 files changed, 1223 insertions, 619 deletions
diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am index b725e249..efe1f2a6 100644 --- a/conf.d/Makefile.am +++ b/conf.d/Makefile.am @@ -11,6 +11,7 @@ dist_config_DATA = \ python.d.conf \ health_alarm_notify.conf \ health_email_recipients.conf \ + stream.conf \ $(NULL) nodeconfigdir=$(configdir)/node.d @@ -33,16 +34,16 @@ dist_pythonconfig_DATA = \ python.d/exim.conf \ python.d/fail2ban.conf \ python.d/freeradius.conf \ - python.d/gunicorn_log.conf \ python.d/haproxy.conf \ python.d/hddtemp.conf \ python.d/ipfs.conf \ python.d/isc_dhcpd.conf \ python.d/mdstat.conf \ python.d/memcached.conf \ + python.d/mongodb.conf \ python.d/mysql.conf \ python.d/nginx.conf \ - python.d/nginx_log.conf \ + python.d/nsd.conf \ python.d/ovpn_status_log.conf \ python.d/phpfpm.conf \ python.d/postfix.conf \ @@ -51,43 +52,54 @@ dist_pythonconfig_DATA = \ python.d/retroshare.conf \ python.d/sensors.conf \ python.d/squid.conf \ + python.d/smartd_log.conf \ python.d/tomcat.conf \ python.d/varnish.conf \ + python.d/web_log.conf \ $(NULL) healthconfigdir=$(configdir)/health.d + dist_healthconfig_DATA = \ health.d/apache.conf \ health.d/backend.conf \ health.d/bind_rndc.conf \ - health.d/cpu.conf \ - health.d/disks.conf \ health.d/elasticsearch.conf \ - health.d/entropy.conf \ + health.d/fping.conf \ health.d/haproxy.conf \ - health.d/ipc.conf \ health.d/ipfs.conf \ + health.d/ipmi.conf \ health.d/isc_dhcpd.conf \ health.d/mdstat.conf \ health.d/memcached.conf \ - health.d/memory.conf \ health.d/mysql.conf \ health.d/named.conf \ - health.d/net.conf \ - health.d/netfilter.conf \ health.d/nginx.conf \ health.d/postgres.conf \ - health.d/qos.conf \ - health.d/ram.conf \ health.d/redis.conf \ health.d/retroshare.conf \ - health.d/softnet.conf \ health.d/squid.conf \ + health.d/varnish.conf \ + health.d/web_log.conf \ + $(NULL) + +if LINUX +dist_healthconfig_DATA += \ + health.d/cpu.conf \ + health.d/disks.conf \ + health.d/entropy.conf \ + health.d/ipc.conf \ + health.d/memory.conf \ + health.d/net.conf \ + health.d/netfilter.conf \ + health.d/qos.conf \ + health.d/ram.conf \ + health.d/softnet.conf \ health.d/swap.conf \ health.d/tcp_resets.conf \ health.d/udp_errors.conf \ - health.d/varnish.conf \ $(NULL) +endif LINUX chartsconfigdir=$(configdir)/charts.d dist_chartsconfig_DATA = \ diff --git a/conf.d/Makefile.in b/conf.d/Makefile.in index 344f1c41..fb05396f 100644 --- a/conf.d/Makefile.in +++ b/conf.d/Makefile.in @@ -1,8 +1,9 @@ -# Makefile.in generated by automake 1.15 from Makefile.am. +# Makefile.in generated by automake 1.11.3 from Makefile.am. # @configure_input@ -# Copyright (C) 1994-2014 Free Software Foundation, Inc. - +# Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, +# 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software +# Foundation, Inc. # This Makefile.in is free software; the Free Software Foundation # gives unlimited permission to copy and/or distribute it, # with or without modifications, as long as this notice is preserved. @@ -15,61 +16,6 @@ @SET_MAKE@ VPATH = @srcdir@ -am__is_gnu_make = { \ - if test -z '$(MAKELEVEL)'; then \ - false; \ - elif test -n '$(MAKE_HOST)'; then \ - true; \ - elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ - true; \ - else \ - false; \ - fi; \ -} -am__make_running_with_option = \ - case $${target_option-} in \ - ?) ;; \ - *) echo "am__make_running_with_option: internal error: invalid" \ - "target option '$${target_option-}' specified" >&2; \ - exit 1;; \ - esac; \ - has_opt=no; \ - sane_makeflags=$$MAKEFLAGS; \ - if $(am__is_gnu_make); then \ - sane_makeflags=$$MFLAGS; \ - else \ - case $$MAKEFLAGS in \ - *\\[\ \ ]*) \ - bs=\\; \ - sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ - | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ - esac; \ - fi; \ - skip_next=no; \ - strip_trailopt () \ - { \ - flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ - }; \ - for flg in $$sane_makeflags; do \ - test $$skip_next = yes && { skip_next=no; continue; }; \ - case $$flg in \ - *=*|--*) continue;; \ - -*I) strip_trailopt 'I'; skip_next=yes;; \ - -*I?*) strip_trailopt 'I';; \ - -*O) strip_trailopt 'O'; skip_next=yes;; \ - -*O?*) strip_trailopt 'O';; \ - -*l) strip_trailopt 'l'; skip_next=yes;; \ - -*l?*) strip_trailopt 'l';; \ - -[dEDm]) skip_next=yes;; \ - -[JT]) skip_next=yes;; \ - esac; \ - case $$flg in \ - *$$target_option*) has_opt=yes; break;; \ - esac; \ - done; \ - test $$has_opt = yes -am__make_dryrun = (target_option=n; $(am__make_running_with_option)) -am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) pkgdatadir = $(datadir)/@PACKAGE@ pkgincludedir = $(includedir)/@PACKAGE@ pkglibdir = $(libdir)/@PACKAGE@ @@ -88,10 +34,30 @@ PRE_UNINSTALL = : POST_UNINSTALL = : build_triplet = @build@ host_triplet = @host@ +@LINUX_TRUE@am__append_1 = \ +@LINUX_TRUE@ health.d/cpu.conf \ +@LINUX_TRUE@ health.d/disks.conf \ +@LINUX_TRUE@ health.d/entropy.conf \ +@LINUX_TRUE@ health.d/ipc.conf \ +@LINUX_TRUE@ health.d/memory.conf \ +@LINUX_TRUE@ health.d/net.conf \ +@LINUX_TRUE@ health.d/netfilter.conf \ +@LINUX_TRUE@ health.d/qos.conf \ +@LINUX_TRUE@ health.d/ram.conf \ +@LINUX_TRUE@ health.d/softnet.conf \ +@LINUX_TRUE@ health.d/swap.conf \ +@LINUX_TRUE@ health.d/tcp_resets.conf \ +@LINUX_TRUE@ health.d/udp_errors.conf \ +@LINUX_TRUE@ $(NULL) + subdir = conf.d +DIST_COMMON = $(am__dist_healthconfig_DATA_DIST) \ + $(dist_chartsconfig_DATA) $(dist_config_DATA) \ + $(dist_nodeconfig_DATA) $(dist_pythonconfig_DATA) \ + $(srcdir)/Makefile.am $(srcdir)/Makefile.in ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/m4/ax_c___atomic.m4 \ - $(top_srcdir)/m4/ax_c__generic.m4 \ + $(top_srcdir)/m4/ax_c__generic.m4 $(top_srcdir)/m4/ax_c_lto.m4 \ $(top_srcdir)/m4/ax_c_mallinfo.m4 \ $(top_srcdir)/m4/ax_c_mallopt.m4 \ $(top_srcdir)/m4/ax_check_compile_flag.m4 \ @@ -100,33 +66,12 @@ am__aclocal_m4_deps = $(top_srcdir)/m4/ax_c___atomic.m4 \ $(top_srcdir)/m4/tcmalloc.m4 $(top_srcdir)/configure.ac am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ $(ACLOCAL_M4) -DIST_COMMON = $(srcdir)/Makefile.am $(dist_chartsconfig_DATA) \ - $(dist_config_DATA) $(dist_healthconfig_DATA) \ - $(dist_nodeconfig_DATA) $(dist_pythonconfig_DATA) \ - $(am__DIST_COMMON) mkinstalldirs = $(install_sh) -d CONFIG_HEADER = $(top_builddir)/config.h CONFIG_CLEAN_FILES = CONFIG_CLEAN_VPATH_FILES = -AM_V_P = $(am__v_P_@AM_V@) -am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) -am__v_P_0 = false -am__v_P_1 = : -AM_V_GEN = $(am__v_GEN_@AM_V@) -am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) -am__v_GEN_0 = @echo " GEN " $@; -am__v_GEN_1 = -AM_V_at = $(am__v_at_@AM_V@) -am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) -am__v_at_0 = @ -am__v_at_1 = SOURCES = DIST_SOURCES = -am__can_run_installinfo = \ - case $$AM_UPDATE_INFO_DIR in \ - n|no|NO) false;; \ - *) (install-info --version) >/dev/null 2>&1;; \ - esac am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ @@ -157,15 +102,26 @@ am__uninstall_files_from_dir = { \ am__installdirs = "$(DESTDIR)$(chartsconfigdir)" \ "$(DESTDIR)$(configdir)" "$(DESTDIR)$(healthconfigdir)" \ "$(DESTDIR)$(nodeconfigdir)" "$(DESTDIR)$(pythonconfigdir)" +am__dist_healthconfig_DATA_DIST = health.d/apache.conf \ + health.d/backend.conf health.d/bind_rndc.conf \ + health.d/elasticsearch.conf health.d/fping.conf \ + health.d/haproxy.conf health.d/ipfs.conf health.d/ipmi.conf \ + health.d/isc_dhcpd.conf health.d/mdstat.conf \ + health.d/memcached.conf health.d/mysql.conf \ + health.d/named.conf health.d/nginx.conf health.d/postgres.conf \ + health.d/redis.conf health.d/retroshare.conf \ + health.d/squid.conf health.d/varnish.conf \ + health.d/web_log.conf health.d/cpu.conf health.d/disks.conf \ + health.d/entropy.conf health.d/ipc.conf health.d/memory.conf \ + health.d/net.conf health.d/netfilter.conf health.d/qos.conf \ + health.d/ram.conf health.d/softnet.conf health.d/swap.conf \ + health.d/tcp_resets.conf health.d/udp_errors.conf DATA = $(dist_chartsconfig_DATA) $(dist_config_DATA) \ $(dist_healthconfig_DATA) $(dist_nodeconfig_DATA) \ $(dist_pythonconfig_DATA) -am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) -am__DIST_COMMON = $(srcdir)/Makefile.in DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) ACLOCAL = @ACLOCAL@ AMTAR = @AMTAR@ -AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ AUTOCONF = @AUTOCONF@ AUTOHEADER = @AUTOHEADER@ AUTOMAKE = @AUTOMAKE@ @@ -189,7 +145,11 @@ INSTALL_DATA = @INSTALL_DATA@ INSTALL_PROGRAM = @INSTALL_PROGRAM@ INSTALL_SCRIPT = @INSTALL_SCRIPT@ INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +IPMIMONITORING_CFLAGS = @IPMIMONITORING_CFLAGS@ +IPMIMONITORING_LIBS = @IPMIMONITORING_LIBS@ LDFLAGS = @LDFLAGS@ +LIBCAP_CFLAGS = @LIBCAP_CFLAGS@ +LIBCAP_LIBS = @LIBCAP_LIBS@ LIBMNL_CFLAGS = @LIBMNL_CFLAGS@ LIBMNL_LIBS = @LIBMNL_LIBS@ LIBOBJS = @LIBOBJS@ @@ -203,6 +163,10 @@ MKDIR_P = @MKDIR_P@ NFACCT_CFLAGS = @NFACCT_CFLAGS@ NFACCT_LIBS = @NFACCT_LIBS@ OBJEXT = @OBJEXT@ +OPTIONAL_IPMIMONITORING_CFLAGS = @OPTIONAL_IPMIMONITORING_CFLAGS@ +OPTIONAL_IPMIMONITORING_LIBS = @OPTIONAL_IPMIMONITORING_LIBS@ +OPTIONAL_LIBCAP_CFLAGS = @OPTIONAL_LIBCAP_CFLAGS@ +OPTIONAL_LIBCAP_LIBS = @OPTIONAL_LIBCAP_LIBS@ OPTIONAL_MATH_CLFAGS = @OPTIONAL_MATH_CLFAGS@ OPTIONAL_MATH_LIBS = @OPTIONAL_MATH_LIBS@ OPTIONAL_NFACCT_CLFAGS = @OPTIONAL_NFACCT_CLFAGS@ @@ -312,6 +276,7 @@ dist_config_DATA = \ python.d.conf \ health_alarm_notify.conf \ health_email_recipients.conf \ + stream.conf \ $(NULL) nodeconfigdir = $(configdir)/node.d @@ -334,16 +299,16 @@ dist_pythonconfig_DATA = \ python.d/exim.conf \ python.d/fail2ban.conf \ python.d/freeradius.conf \ - python.d/gunicorn_log.conf \ python.d/haproxy.conf \ python.d/hddtemp.conf \ python.d/ipfs.conf \ python.d/isc_dhcpd.conf \ python.d/mdstat.conf \ python.d/memcached.conf \ + python.d/mongodb.conf \ python.d/mysql.conf \ python.d/nginx.conf \ - python.d/nginx_log.conf \ + python.d/nsd.conf \ python.d/ovpn_status_log.conf \ python.d/phpfpm.conf \ python.d/postfix.conf \ @@ -352,44 +317,23 @@ dist_pythonconfig_DATA = \ python.d/retroshare.conf \ python.d/sensors.conf \ python.d/squid.conf \ + python.d/smartd_log.conf \ python.d/tomcat.conf \ python.d/varnish.conf \ + python.d/web_log.conf \ $(NULL) healthconfigdir = $(configdir)/health.d -dist_healthconfig_DATA = \ - health.d/apache.conf \ - health.d/backend.conf \ - health.d/bind_rndc.conf \ - health.d/cpu.conf \ - health.d/disks.conf \ - health.d/elasticsearch.conf \ - health.d/entropy.conf \ - health.d/haproxy.conf \ - health.d/ipc.conf \ - health.d/ipfs.conf \ - health.d/isc_dhcpd.conf \ - health.d/mdstat.conf \ - health.d/memcached.conf \ - health.d/memory.conf \ - health.d/mysql.conf \ - health.d/named.conf \ - health.d/net.conf \ - health.d/netfilter.conf \ - health.d/nginx.conf \ - health.d/postgres.conf \ - health.d/qos.conf \ - health.d/ram.conf \ - health.d/redis.conf \ - health.d/retroshare.conf \ - health.d/softnet.conf \ - health.d/squid.conf \ - health.d/swap.conf \ - health.d/tcp_resets.conf \ - health.d/udp_errors.conf \ - health.d/varnish.conf \ - $(NULL) - +dist_healthconfig_DATA = health.d/apache.conf health.d/backend.conf \ + health.d/bind_rndc.conf health.d/elasticsearch.conf \ + health.d/fping.conf health.d/haproxy.conf health.d/ipfs.conf \ + health.d/ipmi.conf health.d/isc_dhcpd.conf \ + health.d/mdstat.conf health.d/memcached.conf \ + health.d/mysql.conf health.d/named.conf health.d/nginx.conf \ + health.d/postgres.conf health.d/redis.conf \ + health.d/retroshare.conf health.d/squid.conf \ + health.d/varnish.conf health.d/web_log.conf $(NULL) \ + $(am__append_1) chartsconfigdir = $(configdir)/charts.d dist_chartsconfig_DATA = \ charts.d/apache.conf \ @@ -428,6 +372,7 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__confi echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu conf.d/Makefile'; \ $(am__cd) $(top_srcdir) && \ $(AUTOMAKE) --gnu conf.d/Makefile +.PRECIOUS: Makefile Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status @case '$?' in \ *config.status*) \ @@ -447,11 +392,8 @@ $(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) $(am__aclocal_m4_deps): install-dist_chartsconfigDATA: $(dist_chartsconfig_DATA) @$(NORMAL_INSTALL) + test -z "$(chartsconfigdir)" || $(MKDIR_P) "$(DESTDIR)$(chartsconfigdir)" @list='$(dist_chartsconfig_DATA)'; test -n "$(chartsconfigdir)" || list=; \ - if test -n "$$list"; then \ - echo " $(MKDIR_P) '$(DESTDIR)$(chartsconfigdir)'"; \ - $(MKDIR_P) "$(DESTDIR)$(chartsconfigdir)" || exit 1; \ - fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ @@ -468,11 +410,8 @@ uninstall-dist_chartsconfigDATA: dir='$(DESTDIR)$(chartsconfigdir)'; $(am__uninstall_files_from_dir) install-dist_configDATA: $(dist_config_DATA) @$(NORMAL_INSTALL) + test -z "$(configdir)" || $(MKDIR_P) "$(DESTDIR)$(configdir)" @list='$(dist_config_DATA)'; test -n "$(configdir)" || list=; \ - if test -n "$$list"; then \ - echo " $(MKDIR_P) '$(DESTDIR)$(configdir)'"; \ - $(MKDIR_P) "$(DESTDIR)$(configdir)" || exit 1; \ - fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ @@ -489,11 +428,8 @@ uninstall-dist_configDATA: dir='$(DESTDIR)$(configdir)'; $(am__uninstall_files_from_dir) install-dist_healthconfigDATA: $(dist_healthconfig_DATA) @$(NORMAL_INSTALL) + test -z "$(healthconfigdir)" || $(MKDIR_P) "$(DESTDIR)$(healthconfigdir)" @list='$(dist_healthconfig_DATA)'; test -n "$(healthconfigdir)" || list=; \ - if test -n "$$list"; then \ - echo " $(MKDIR_P) '$(DESTDIR)$(healthconfigdir)'"; \ - $(MKDIR_P) "$(DESTDIR)$(healthconfigdir)" || exit 1; \ - fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ @@ -510,11 +446,8 @@ uninstall-dist_healthconfigDATA: dir='$(DESTDIR)$(healthconfigdir)'; $(am__uninstall_files_from_dir) install-dist_nodeconfigDATA: $(dist_nodeconfig_DATA) @$(NORMAL_INSTALL) + test -z "$(nodeconfigdir)" || $(MKDIR_P) "$(DESTDIR)$(nodeconfigdir)" @list='$(dist_nodeconfig_DATA)'; test -n "$(nodeconfigdir)" || list=; \ - if test -n "$$list"; then \ - echo " $(MKDIR_P) '$(DESTDIR)$(nodeconfigdir)'"; \ - $(MKDIR_P) "$(DESTDIR)$(nodeconfigdir)" || exit 1; \ - fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ @@ -531,11 +464,8 @@ uninstall-dist_nodeconfigDATA: dir='$(DESTDIR)$(nodeconfigdir)'; $(am__uninstall_files_from_dir) install-dist_pythonconfigDATA: $(dist_pythonconfig_DATA) @$(NORMAL_INSTALL) + test -z "$(pythonconfigdir)" || $(MKDIR_P) "$(DESTDIR)$(pythonconfigdir)" @list='$(dist_pythonconfig_DATA)'; test -n "$(pythonconfigdir)" || list=; \ - if test -n "$$list"; then \ - echo " $(MKDIR_P) '$(DESTDIR)$(pythonconfigdir)'"; \ - $(MKDIR_P) "$(DESTDIR)$(pythonconfigdir)" || exit 1; \ - fi; \ for p in $$list; do \ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ echo "$$d$$p"; \ @@ -550,11 +480,11 @@ uninstall-dist_pythonconfigDATA: @list='$(dist_pythonconfig_DATA)'; test -n "$(pythonconfigdir)" || list=; \ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ dir='$(DESTDIR)$(pythonconfigdir)'; $(am__uninstall_files_from_dir) -tags TAGS: +tags: TAGS +TAGS: -ctags CTAGS: - -cscope cscopelist: +ctags: CTAGS +CTAGS: distdir: $(DISTFILES) @@ -697,23 +627,21 @@ uninstall-am: uninstall-dist_chartsconfigDATA \ .MAKE: install-am install-strip -.PHONY: all all-am check check-am clean clean-generic cscopelist-am \ - ctags-am distclean distclean-generic distdir dvi dvi-am html \ - html-am info info-am install install-am install-data \ - install-data-am install-dist_chartsconfigDATA \ - install-dist_configDATA install-dist_healthconfigDATA \ - install-dist_nodeconfigDATA install-dist_pythonconfigDATA \ - install-dvi install-dvi-am install-exec install-exec-am \ - install-html install-html-am install-info install-info-am \ - install-man install-pdf install-pdf-am install-ps \ - install-ps-am install-strip installcheck installcheck-am \ - installdirs maintainer-clean maintainer-clean-generic \ - mostlyclean mostlyclean-generic pdf pdf-am ps ps-am tags-am \ - uninstall uninstall-am uninstall-dist_chartsconfigDATA \ - uninstall-dist_configDATA uninstall-dist_healthconfigDATA \ - uninstall-dist_nodeconfigDATA uninstall-dist_pythonconfigDATA - -.PRECIOUS: Makefile +.PHONY: all all-am check check-am clean clean-generic distclean \ + distclean-generic distdir dvi dvi-am html html-am info info-am \ + install install-am install-data install-data-am \ + install-dist_chartsconfigDATA install-dist_configDATA \ + install-dist_healthconfigDATA install-dist_nodeconfigDATA \ + install-dist_pythonconfigDATA install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-generic pdf \ + pdf-am ps ps-am uninstall uninstall-am \ + uninstall-dist_chartsconfigDATA uninstall-dist_configDATA \ + uninstall-dist_healthconfigDATA uninstall-dist_nodeconfigDATA \ + uninstall-dist_pythonconfigDATA # Tell versions [3.59,3.63) of GNU make to not export all variables. diff --git a/conf.d/apps_groups.conf b/conf.d/apps_groups.conf index e2836877..4c5171b3 100644 --- a/conf.d/apps_groups.conf +++ b/conf.d/apps_groups.conf @@ -73,10 +73,12 @@ netdata: netdata # netdata known plugins # plugins not defined here will be accumulated in netdata, above apps.plugin: apps.plugin +freeipmi.plugin: freeipmi.plugin charts.d.plugin: *charts.d.plugin* node.d.plugin: *node.d.plugin* python.d.plugin: *python.d.plugin* tc-qos-helper: *tc-qos-helper.sh* +fping: fping # ----------------------------------------------------------------------------- # authentication/authorization related servers @@ -106,11 +108,12 @@ nosql: mongod redis* memcached email: dovecot imapd pop3d amavis* master zmstat* zmmailboxdmgr qmgr oqmgr # ----------------------------------------------------------------------------- -# networking and VPN servers +# network, routing, VPN ppp: ppp* vpn: openvpn pptp* cjdroute wifi: hostapd wpa_supplicant +routing: ospfd* ospf6d* bgpd isisd ripd ripngd pimd ldpd zebra vtysh bird* # ----------------------------------------------------------------------------- # high availability and balancers diff --git a/conf.d/charts.d/nut.conf b/conf.d/charts.d/nut.conf index 2844849d..a836692d 100644 --- a/conf.d/charts.d/nut.conf +++ b/conf.d/charts.d/nut.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2016-2017 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # a space separated list of UPS names @@ -12,6 +12,10 @@ # how much time in seconds, to wait for nut to respond #nut_timeout=2 +# set this to 1, to enable another chart showing the number +# of UPS clients connected to upsd +#nut_clients_chart=1 + # the data collection frequency # if unset, will inherit the netdata update frequency #nut_update_every=2 diff --git a/conf.d/health.d/cpu.conf b/conf.d/health.d/cpu.conf index 60f494d7..30a71409 100644 --- a/conf.d/health.d/cpu.conf +++ b/conf.d/health.d/cpu.conf @@ -1,13 +1,13 @@ template: 10min_cpu_usage on: system.cpu - lookup: average -10m unaligned of user,system,nice,softirq,irq,guest,guest_nice + lookup: average -10m unaligned of user,system,softirq,irq,guest units: % every: 1m warn: $this > (($status >= $WARNING) ? (75) : (85)) crit: $this > (($status == $CRITICAL) ? (85) : (95)) delay: down 15m multiplier 1.5 max 1h - info: average cpu utilization for the last 10 minutes + info: average cpu utilization for the last 10 minutes (excluding iowait, nice and steal) to: sysadmin template: 10min_cpu_iowait diff --git a/conf.d/health.d/disks.conf b/conf.d/health.d/disks.conf index 0549bac2..9548f9ee 100644 --- a/conf.d/health.d/disks.conf +++ b/conf.d/health.d/disks.conf @@ -1,34 +1,4 @@ # ----------------------------------------------------------------------------- -# make sure we collect values for each disk - -# for mount points -template: disk_space_last_collected_secs - on: disk.space -families: * - calc: $now - $last_collected_t - units: seconds ago - every: 10s - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - delay: down 5m multiplier 1.5 max 1h - info: number of seconds since the last successful data collection of the mount point - to: sysadmin - -# for block devices -template: disk_last_collected_secs - on: disk.io -families: * - calc: $now - $last_collected_t - units: seconds ago - every: 10s - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - delay: down 5m multiplier 1.5 max 1h - info: number of seconds since the last successful data collection of the block device - to: sysadmin - - -# ----------------------------------------------------------------------------- # low disk space # checking the latest collected values @@ -88,7 +58,7 @@ families: * template: out_of_disk_space_time on: disk.space families: * - calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (0) + calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf) units: hours every: 10s warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8)) diff --git a/conf.d/health.d/fping.conf b/conf.d/health.d/fping.conf new file mode 100644 index 00000000..69251b18 --- /dev/null +++ b/conf.d/health.d/fping.conf @@ -0,0 +1,53 @@ + +template: fping_last_collected_secs +families: * + on: fping.latency + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sysadmin + +template: host_reachable +families: * + on: fping.latency + calc: $average != nan + units: up/down + every: 10s + crit: $this == 0 + info: states if the remote host is reachable + delay: down 30m multiplier 1.5 max 2h + to: sysadmin + +template: host_latency +families: * + on: fping.latency + lookup: average -10s unaligned of average + units: ms + every: 10s + green: 300 + red: 1000 + warn: $this > $green OR $max > $red + crit: $this > $red + info: average round trip delay during the last 10 seconds + delay: down 30m multiplier 1.5 max 2h + to: sysadmin + +template: packet_loss +families: * + on: fping.quality + lookup: average -10m unaligned of returned + calc: 100 - $this + green: 1 + red: 10 + units: % + every: 10s + warn: $this > $green + crit: $this > $red + info: packet loss percentage + delay: down 30m multiplier 1.5 max 2h + to: sysadmin + diff --git a/conf.d/health.d/ipmi.conf b/conf.d/health.d/ipmi.conf new file mode 100644 index 00000000..c2558196 --- /dev/null +++ b/conf.d/health.d/ipmi.conf @@ -0,0 +1,20 @@ + alarm: ipmi_sensors_states + on: ipmi.sensors_states + calc: $warning + $critical + units: sensors + every: 10s + warn: $this > 0 + crit: $critical > 0 + delay: up 5m down 15m multiplier 1.5 max 1h + info: the number IPMI sensors in non-nominal state + to: sysadmin + + alarm: ipmi_events + on: ipmi.events + calc: $events + units: events + every: 10s + warn: $this > 0 + delay: up 5m down 15m multiplier 1.5 max 1h + info: the number of events in the IPMI System Event Log (SEL) + to: sysadmin diff --git a/conf.d/health.d/memcached.conf b/conf.d/health.d/memcached.conf index 7917e36a..d248ef57 100644 --- a/conf.d/health.d/memcached.conf +++ b/conf.d/health.d/memcached.conf @@ -42,7 +42,7 @@ template: cache_fill_rate template: out_of_cache_space_time on: memcached.cache - calc: ($cache_fill_rate > 0) ? ($available / $cache_fill_rate) : (0) + calc: ($cache_fill_rate > 0) ? ($available / $cache_fill_rate) : (inf) units: hours every: 10s warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8)) diff --git a/conf.d/health.d/mysql.conf b/conf.d/health.d/mysql.conf index 78773e5b..1eeb993f 100644 --- a/conf.d/health.d/mysql.conf +++ b/conf.d/health.d/mysql.conf @@ -49,7 +49,7 @@ template: mysql_10s_table_locks_waited template: mysql_10s_waited_locks_ratio on: mysql.table_locks - calc: ($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) + calc: ( ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate) > 0 ) ? (($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)) : 0 units: % every: 10s warn: $this > (($status >= $WARNING) ? (10) : (25)) @@ -65,7 +65,7 @@ template: mysql_10s_waited_locks_ratio template: mysql_replication on: mysql.slave_status calc: ($sql_running == -1 OR $io_running == -1)?0:1 - units: status + units: ok/failed every: 10s crit: $this == 0 delay: down 5m multiplier 1.5 max 1h diff --git a/conf.d/health.d/net.conf b/conf.d/health.d/net.conf index 924acccc..0232395a 100644 --- a/conf.d/health.d/net.conf +++ b/conf.d/health.d/net.conf @@ -1,18 +1,3 @@ -# ----------------------------------------------------------------------------- -# make sure we collect values for each interface - -template: interface_last_collected_secs - on: net.net -families: * - calc: $now - $last_collected_t - units: seconds ago - every: 10s - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - delay: down 5m multiplier 1.5 max 1h - info: number of seconds since the last successful data collection - to: sysadmin - # ----------------------------------------------------------------------------- # dropped packets @@ -116,6 +101,7 @@ families: * units: % warn: $this > (($status >= $WARNING)?(200):(1000)) crit: $this > (($status >= $WARNING)?(1000):(2000)) +options: no-clear-notification info: the % of the rate of received packets in the last 10 seconds, compared to the rate of the last minute - to: silent + to: sysadmin diff --git a/conf.d/health.d/tcp_resets.conf b/conf.d/health.d/tcp_resets.conf index daf24a1c..49fb1b92 100644 --- a/conf.d/health.d/tcp_resets.conf +++ b/conf.d/health.d/tcp_resets.conf @@ -28,8 +28,9 @@ every: 10s warn: $this > ((($1m_ipv4_tcp_resets_sent < 5)?(5):($1m_ipv4_tcp_resets_sent)) * (($status >= $WARNING) ? (1) : (4))) delay: up 0 down 60m multiplier 1.2 max 2h +options: no-clear-notification info: average TCP RESETS this host is sending, over the last 10 seconds (this can be an indication that a port scan is made, or that a service running on this host has crashed) - to: silent + to: sysadmin # ----------------------------------------------------------------------------- # tcp resets this host receives @@ -48,5 +49,6 @@ every: 10s warn: $this > ((($1m_ipv4_tcp_resets_received < 5)?(5):($1m_ipv4_tcp_resets_received)) * (($status >= $WARNING) ? (1) : (4))) delay: up 0 down 60m multiplier 1.2 max 2h +options: no-clear-notification info: average TCP RESETS this host is receiving, over the last 10 seconds (this can be an indication that a service this host needs, has crashed) - to: silent + to: sysadmin diff --git a/conf.d/health.d/web_log.conf b/conf.d/health.d/web_log.conf new file mode 100644 index 00000000..c668959f --- /dev/null +++ b/conf.d/health.d/web_log.conf @@ -0,0 +1,161 @@ + +# make sure we can collect web log data + +template: last_collected_secs + on: web_log.response_codes +families: * + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: webmaster + + +# ----------------------------------------------------------------------------- +# high level response code alarms + +# the following alarms trigger only when there are enough data. +# we assume there are enough data when: +# +# $1m_requests > 120 +# +# i.e. when there are at least 120 requests during the last minute + +template: 1m_requests + on: web_log.response_statuses +families: * + lookup: sum -1m unaligned + calc: ($this == 0)?(1):($this) + units: requests + every: 10s + info: the sum of all HTTP requests over the last minute + +template: 1m_successful + on: web_log.response_statuses +families: * + lookup: sum -1m unaligned of successful_requests + calc: $this * 100 / $1m_requests + units: % + every: 10s + warn: ($1m_requests > 120) ? ($this < (($status >= $WARNING ) ? ( 95 ) : ( 85 )) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this < (($status == $CRITICAL) ? ( 85 ) : ( 75 )) ) : ( 0 ) + delay: up 2m down 15m multiplier 1.5 max 1h + info: the ratio of successful HTTP responses (1xx, 2xx, 304) over the last minute + to: webmaster + +template: 1m_redirects + on: web_log.response_statuses +families: * + lookup: sum -1m unaligned of redirects + calc: $this * 100 / $1m_requests + units: % + every: 10s + warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING ) ? ( 1 ) : ( 20 )) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 20 ) : ( 30 )) ) : ( 0 ) + delay: up 2m down 15m multiplier 1.5 max 1h + info: the ratio of HTTP redirects (3xx except 304) over the last minute + to: webmaster + +template: 1m_bad_requests + on: web_log.response_statuses +families: * + lookup: sum -1m unaligned of bad_requests + calc: $this * 100 / $1m_requests + units: % + every: 10s + warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 10 ) : ( 30 )) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 30 ) : ( 50 )) ) : ( 0 ) + delay: up 2m down 15m multiplier 1.5 max 1h + info: the ratio of HTTP bad requests (4xx) over the last minute + to: webmaster + +template: 1m_internal_errors + on: web_log.response_statuses +families: * + lookup: sum -1m unaligned of server_errors + calc: $this * 100 / $1m_requests + units: % + every: 10s + warn: ($1m_requests > 120) ? ($this > (($status >= $WARNING) ? ( 1 ) : ( 2 )) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this > (($status == $CRITICAL) ? ( 2 ) : ( 5 )) ) : ( 0 ) + delay: up 2m down 15m multiplier 1.5 max 1h + info: the ratio of HTTP internal server errors (5xx), over the last minute + to: webmaster + + +# ----------------------------------------------------------------------------- +# web slow + +# the following alarms trigger only when there are enough data. +# we assume there are enough data when: +# +# $1m_requests > 120 +# +# i.e. when there are at least 120 requests during the last minute + +template: 10m_response_time + on: web_log.response_time +families: * + lookup: average -10m unaligned of avg + units: ms + every: 30s + info: the average time to respond to HTTP requests, over the last 10 minutes + +template: web_slow + on: web_log.response_time +families: * + lookup: average -1m unaligned of avg + units: ms + every: 10s + green: 500 + red: 1000 + warn: ($1m_requests > 120) ? ($this > $green && $this > ($10m_response_time * 2) ) : ( 0 ) + crit: ($1m_requests > 120) ? ($this > $red && $this > ($10m_response_time * 4) ) : ( 0 ) + delay: down 15m multiplier 1.5 max 1h + info: the average time to respond to HTTP requests, over the last 1 minute + to: webmaster + +# ----------------------------------------------------------------------------- +# web too many or too few requests + +# the following alarms trigger only when there are enough data. +# we assume there are enough data when: +# +# $5m_successful_old > 120 +# +# i.e. when there were at least 120 requests during the 5 minutes starting +# at -10m and ending at -5m + +template: 5m_successful_old + on: web_log.response_statuses +families: * + lookup: average -5m at -5m unaligned of successful_requests + units: requests/s + every: 30s + info: average rate of successful HTTP requests over the last 5 minutes + +template: 5m_successful + on: web_log.response_statuses +families: * + lookup: average -5m unaligned of successful_requests + units: requests/s + every: 30s + info: average successful HTTP requests over the last 5 minutes + +template: 5m_requests_ratio + on: web_log.response_codes +families: * + calc: ($5m_successful_old > 0)?($5m_successful * 100 / $5m_successful_old):(100) + units: % + every: 30s + warn: ($5m_successful_old > 120) ? ($this > 200 OR $this < 50) : (0) + crit: ($5m_successful_old > 120) ? ($this > 400 OR $this < 25) : (0) + delay: down 15m multiplier 1.5 max 1h +options: no-clear-notification + info: the percentage of successful web requests over the last 5 minutes, \ + compared with the previous 5 minutes + to: webmaster + diff --git a/conf.d/health_alarm_notify.conf b/conf.d/health_alarm_notify.conf index b838e802..23776b96 100644 --- a/conf.d/health_alarm_notify.conf +++ b/conf.d/health_alarm_notify.conf @@ -7,6 +7,7 @@ # - e-mails (using the sendmail command), # - push notifications to your mobile phone (pushover.net), # - messages to your slack team (slack.com), +# - messages to your discord guild (discordapp.com), # - messages to your telegram chat / group chat (telegram.org) # - sms messages to your cell phone or any sms enabled device (twilio.com) # - sms messages to your cell phone or any sms enabled device (messagebird.com) @@ -22,7 +23,7 @@ # proxy configuration # # If you need to send curl based notifications (pushover, pushbullet, slack, -# telegram) via a proxy, set these to your proxy address: +# discord, telegram) via a proxy, set these to your proxy address: #export http_proxy="http://10.0.0.1:3128/" #export https_proxy="http://10.0.0.1:3128/" @@ -62,6 +63,7 @@ curl="" # - pushover user tokens # - telegram chat ids # - slack channels +# - discord channels # - hipchat rooms # - sms phone numbers # - pagerduty.com (pd) services @@ -75,6 +77,7 @@ curl="" # pushover : "2987343...9437837 8756278...2362736|critical" # telegram : "111827421 112746832|critical" # slack : "alarms disasters|critical" +# discord : "alarms disasters|critical" # twilio : "+15555555555 +17777777777|critical" # messagebird: "+15555555555 +17777777777|critical" # pd : "<pd_service_key_1> <pd_service_key_2>|critical" @@ -228,6 +231,25 @@ DEFAULT_RECIPIENT_SLACK="" #------------------------------------------------------------------------------ +# discord (discordapp.com) global notification options + +# multiple recipients can be given like this: +# "CHANNEL1 CHANNEL2 ..." + +# enable/disable sending discord notifications +SEND_DISCORD="YES" + +# Create a webhook by following the official documentation - +# https://support.discordapp.com/hc/en-us/articles/228383668-Intro-to-Webhooks +DISCORD_WEBHOOK_URL="" + +# if a role's recipients are not configured, a notification will be send to +# this discord channel (empty = do not send a notification for unconfigured +# roles): +DEFAULT_RECIPIENT_DISCORD="" + + +#------------------------------------------------------------------------------ # hipchat global notification options # multiple recipients can be given like this: @@ -236,6 +258,9 @@ DEFAULT_RECIPIENT_SLACK="" # enable/disable sending hipchat notifications SEND_HIPCHAT="YES" +# define hipchat server +HIPCHAT_SERVER="api.hipchat.com" + # api.hipchat.com authorization token # Without this, netdata cannot send hipchat notifications. HIPCHAT_AUTH_TOKEN="" @@ -295,6 +320,8 @@ role_recipients_telegram[sysadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[sysadmin]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_discord[sysadmin]="${DEFAULT_RECIPIENT_DISCORD}" + role_recipients_hipchat[sysadmin]="${DEFAULT_RECIPIENT_HIPCHAT}" role_recipients_twilio[sysadmin]="${DEFAULT_RECIPIENT_TWILIO}" @@ -316,6 +343,8 @@ role_recipients_telegram[domainadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[domainadmin]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_discord[domainadmin]="${DEFAULT_RECIPIENT_DISCORD}" + role_recipients_hipchat[domainadmin]="${DEFAULT_RECIPIENT_HIPCHAT}" role_recipients_twilio[domainadmin]="${DEFAULT_RECIPIENT_TWILIO}" @@ -338,6 +367,8 @@ role_recipients_telegram[dba]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[dba]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_discord[dba]="${DEFAULT_RECIPIENT_DISCORD}" + role_recipients_hipchat[dba]="${DEFAULT_RECIPIENT_HIPCHAT}" role_recipients_twilio[dba]="${DEFAULT_RECIPIENT_TWILIO}" @@ -360,6 +391,8 @@ role_recipients_telegram[webmaster]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[webmaster]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_discord[webmaster]="${DEFAULT_RECIPIENT_DISCORD}" + role_recipients_hipchat[webmaster]="${DEFAULT_RECIPIENT_HIPCHAT}" role_recipients_twilio[webmaster]="${DEFAULT_RECIPIENT_TWILIO}" @@ -382,6 +415,8 @@ role_recipients_telegram[proxyadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[proxyadmin]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_discord[proxyadmin]="${DEFAULT_RECIPIENT_DISCORD}" + role_recipients_hipchat[proxyadmin]="${DEFAULT_RECIPIENT_HIPCHAT}" role_recipients_twilio[proxyadmin]="${DEFAULT_RECIPIENT_TWILIO}" diff --git a/conf.d/node.d/snmp.conf.md b/conf.d/node.d/snmp.conf.md index bae5bf20..6b496f7a 100644 --- a/conf.d/node.d/snmp.conf.md +++ b/conf.d/node.d/snmp.conf.md @@ -1,341 +1,359 @@ -# SNMP Data Collector
-
-Using this collector, netdata can collect data from any SNMP device.
-
-This collector supports:
-
-- any number of SNMP devices
-- each SNMP device can be used to collect data for any number of charts
-- each chart may have any number of dimensions
-- each SNMP device may have a different update frequency
-- each SNMP device will accept one or more batches to report values (you can set `max_request_size` per SNMP server, to control the size of batches).
-
-The source code of the plugin is [here](https://github.com/firehol/netdata/blob/master/node.d/snmp.node.js).
-
-## Configuration
-
-You will need to create the file `/etc/netdata/node.d/snmp.conf` with data like the following.
-
-In this example:
-
- - the SNMP device is `10.11.12.8`.
- - the SNMP community is `public`.
- - we will update the values every 10 seconds (`update_every: 10` under the server `10.11.12.8`).
- - we define 2 charts `snmp_switch.bandwidth_port1` and `snmp_switch.bandwidth_port2`, each having 2 dimensions: `in` and `out`.
-
-```js
-{
- "enable_autodetect": false,
- "update_every": 5,
- "max_request_size": 100,
- "servers": [
- {
- "hostname": "10.11.12.8",
- "community": "public",
- "update_every": 10,
- "max_request_size": 50,
- "options": { "timeout": 10000 },
- "charts": {
- "snmp_switch.bandwidth_port1": {
- "title": "Switch Bandwidth for port 1",
- "units": "kilobits/s",
- "type": "area",
- "priority": 1,
- "family": "ports",
- "dimensions": {
- "in": {
- "oid": "1.3.6.1.2.1.2.2.1.10.1",
- "algorithm": "incremental",
- "multiplier": 8,
- "divisor": 1024
- },
- "out": {
- "oid": "1.3.6.1.2.1.2.2.1.16.1",
- "algorithm": "incremental",
- "multiplier": -8,
- "divisor": 1024
- }
- }
- },
- "snmp_switch.bandwidth_port2": {
- "title": "Switch Bandwidth for port 2",
- "units": "kilobits/s",
- "type": "area",
- "priority": 1,
- "family": "ports",
- "dimensions": {
- "in": {
- "oid": "1.3.6.1.2.1.2.2.1.10.2",
- "algorithm": "incremental",
- "multiplier": 8,
- "divisor": 1024
- },
- "out": {
- "oid": "1.3.6.1.2.1.2.2.1.16.2",
- "algorithm": "incremental",
- "multiplier": -8,
- "divisor": 1024
- }
- }
- }
- }
- }
- ]
-}
-```
-
-`update_every` is the update frequency for each server, in seconds.
-
-`max_request_size` limits the maximum number of OIDs that will be requested in a single call. The default is 50. Lower this number of you get `TooBig` errors in netdata error.log.
-
-`family` sets the name of the submenu of the dashboard each chart will appear under.
-
-If you need to define many charts using incremental OIDs, you can use something like this:
-
-This is like the previous, but the option `multiply_range` given, will multiply the current chart from `1` to `24` inclusive, producing 24 charts in total for the 24 ports of the switch `10.11.12.8`.
-
-Each of the 24 new charts will have its id (1-24) appended at:
-
-1. its chart unique id, i.e. `snmp_switch.bandwidth_port1` to `snmp_switch.bandwidth_port24`
-2. its `title`, i.e. `Switch Bandwidth for port 1` to `Switch Bandwidth for port 24`
-3. its `oid` (for all dimensions), i.e. dimension `in` will be `1.3.6.1.2.1.2.2.1.10.1` to `1.3.6.1.2.1.2.2.1.10.24`
-3. its priority (which will be incremented for each chart so that the charts will appear on the dashboard in this order)
-
-```js
-{
- "enable_autodetect": false,
- "update_every": 10,
- "servers": [
- {
- "hostname": "10.11.12.8",
- "community": "public",
- "update_every": 10,
- "options": { "timeout": 20000 },
- "charts": {
- "snmp_switch.bandwidth_port": {
- "title": "Switch Bandwidth for port ",
- "units": "kilobits/s",
- "type": "area",
- "priority": 1,
- "family": "ports",
- "multiply_range": [ 1, 24 ],
- "dimensions": {
- "in": {
- "oid": "1.3.6.1.2.1.2.2.1.10.",
- "algorithm": "incremental",
- "multiplier": 8,
- "divisor": 1024
- },
- "out": {
- "oid": "1.3.6.1.2.1.2.2.1.16.",
- "algorithm": "incremental",
- "multiplier": -8,
- "divisor": 1024
- }
- }
- }
- }
- }
- ]
-}
-```
-
-The `options` given for each server, are:
-
- - `timeout`, the time to wait for the SNMP device to respond. The default is 5000 ms.
- - `version`, the SNMP version to use. `0` is Version 1, `1` is Version 2c. The default is Version 1 (`0`).
- - `transport`, the default is `udp4`.
- - `port`, the port of the SNMP device to connect to. The default is `161`.
- - `retries`, the number of attempts to make to fetch the data. The default is `1`.
-
-## Retreiving names from snmp
-
-You can append a value retrieved from SNMP to the title, by adding `titleoid` to the chart.
-
-You can set a dimension name to a value retrieved from SNMP, by adding `oidname` to the dimension.
-
-Both of the above will participate in `multiply_range`.
-
-
-## Testing the configuration
-
-To test it, you can run:
-
-```sh
-/usr/libexec/netdata/plugins.d/node.d.plugin 1 snmp
-```
-
-The above will run it on your console and you will be able to see what netdata sees, but also errors. You can get a very detailed output by appending `debug` to the command line.
-
-If it works, restart netdata to activate the snmp collector and refresh the dashboard (if your SNMP device responds with a delay, you may need to refresh the dashboard in a few seconds).
-
-## Data collection speed
-
-Keep in mind that many SNMP switches are routers are very slow. They may not be able to report values per second. If you run `node.d.plugin` in `debug` mode, it will report the time it took for the SNMP device to respond. My switch, for example, needs 7-8 seconds to respond for the traffic on 24 ports (48 OIDs, in/out).
-
-Also, if you use many SNMP clients on the same SNMP device at the same time, values may be skipped. This is a problem of the SNMP device, not this collector.
-
-## Finding OIDs
-
-Use `snmpwalk`, like this:
-
-```sh
-snmpwalk -t 20 -v 1 -O fn -c public 10.11.12.8
-```
-
-- `-t 20` is the timeout in seconds
-- `-v 1` is the SNMP version
-- `-O fn` will display full OIDs in numeric format (you may want to run it also without this option to see human readable output of OIDs)
-- `-c public` is the SNMP community
-- `10.11.12.8` is the SNMP device
-
-Keep in mind that `snmpwalk` outputs the OIDs with a dot in front them. You should remove this dot when adding OIDs to the configuration file of this collector.
-
-## Example: Linksys SRW2024P
-
-This is what I use for my Linksys SRW2024P. It creates:
-
-1. A chart for power consumption (it is a PoE switch)
-2. Two charts for packets received (total packets received and packets received with errors)
-3. One chart for packets output
-4. 24 charts, one for each port of the switch. It also appends the port names, as defined at the switch, to the chart titles.
-
-This switch also reports various other metrics, like snmp, packets per port, etc. Unfortunately it does not report CPU utilization or backplane utilization.
-
-This switch has a very slow SNMP processors. To respond, it needs about 8 seconds, so I have set the refresh frequency (`update_every`) to 15 seconds.
-
-```js
-{
- "enable_autodetect": false,
- "update_every": 5,
- "servers": [
- {
- "hostname": "10.11.12.8",
- "community": "public",
- "update_every": 15,
- "options": { "timeout": 20000, "version": 1 },
- "charts": {
- "snmp_switch.power": {
- "title": "Switch Power Supply",
- "units": "watts",
- "type": "line",
- "priority": 10,
- "family": "power",
- "dimensions": {
- "supply": {
- "oid": ".1.3.6.1.2.1.105.1.3.1.1.2.1",
- "algorithm": "absolute",
- "multiplier": 1,
- "divisor": 1
- },
- "used": {
- "oid": ".1.3.6.1.2.1.105.1.3.1.1.4.1",
- "algorithm": "absolute",
- "multiplier": 1,
- "divisor": 1
- }
- }
- }
- , "snmp_switch.input": {
- "title": "Switch Packets Input",
- "units": "packets/s",
- "type": "area",
- "priority": 20,
- "family": "IP",
- "dimensions": {
- "receives": {
- "oid": ".1.3.6.1.2.1.4.3.0",
- "algorithm": "incremental",
- "multiplier": 1,
- "divisor": 1
- }
- , "discards": {
- "oid": ".1.3.6.1.2.1.4.8.0",
- "algorithm": "incremental",
- "multiplier": 1,
- "divisor": 1
- }
- }
- }
- , "snmp_switch.input_errors": {
- "title": "Switch Received Packets with Errors",
- "units": "packets/s",
- "type": "line",
- "priority": 30,
- "family": "IP",
- "dimensions": {
- "bad_header": {
- "oid": ".1.3.6.1.2.1.4.4.0",
- "algorithm": "incremental",
- "multiplier": 1,
- "divisor": 1
- }
- , "bad_address": {
- "oid": ".1.3.6.1.2.1.4.5.0",
- "algorithm": "incremental",
- "multiplier": 1,
- "divisor": 1
- }
- , "unknown_protocol": {
- "oid": ".1.3.6.1.2.1.4.7.0",
- "algorithm": "incremental",
- "multiplier": 1,
- "divisor": 1
- }
- }
- }
- , "snmp_switch.output": {
- "title": "Switch Output Packets",
- "units": "packets/s",
- "type": "line",
- "priority": 40,
- "family": "IP",
- "dimensions": {
- "requests": {
- "oid": ".1.3.6.1.2.1.4.10.0",
- "algorithm": "incremental",
- "multiplier": 1,
- "divisor": 1
- }
- , "discards": {
- "oid": ".1.3.6.1.2.1.4.11.0",
- "algorithm": "incremental",
- "multiplier": -1,
- "divisor": 1
- }
- , "no_route": {
- "oid": ".1.3.6.1.2.1.4.12.0",
- "algorithm": "incremental",
- "multiplier": -1,
- "divisor": 1
- }
- }
- }
- , "snmp_switch.bandwidth_port": {
- "title": "Switch Bandwidth for port ",
- "titleoid": ".1.3.6.1.2.1.31.1.1.1.18.",
- "units": "kilobits/s",
- "type": "area",
- "priority": 100,
- "family": "ports",
- "multiply_range": [ 1, 24 ],
- "dimensions": {
- "in": {
- "oid": ".1.3.6.1.2.1.2.2.1.10.",
- "algorithm": "incremental",
- "multiplier": 8,
- "divisor": 1024
- }
- , "out": {
- "oid": ".1.3.6.1.2.1.2.2.1.16.",
- "algorithm": "incremental",
- "multiplier": -8,
- "divisor": 1024
- }
- }
- }
- }
- }
- ]
-}
-```
+# SNMP Data Collector + +Using this collector, netdata can collect data from any SNMP device. + +This collector supports: + +- any number of SNMP devices +- each SNMP device can be used to collect data for any number of charts +- each chart may have any number of dimensions +- each SNMP device may have a different update frequency +- each SNMP device will accept one or more batches to report values (you can set `max_request_size` per SNMP server, to control the size of batches). + +The source code of the plugin is [here](https://github.com/firehol/netdata/blob/master/node.d/snmp.node.js). + +## Configuration + +You will need to create the file `/etc/netdata/node.d/snmp.conf` with data like the following. + +In this example: + + - the SNMP device is `10.11.12.8`. + - the SNMP community is `public`. + - we will update the values every 10 seconds (`update_every: 10` under the server `10.11.12.8`). + - we define 2 charts `snmp_switch.bandwidth_port1` and `snmp_switch.bandwidth_port2`, each having 2 dimensions: `in` and `out`. + +```js +{ + "enable_autodetect": false, + "update_every": 5, + "max_request_size": 100, + "servers": [ + { + "hostname": "10.11.12.8", + "community": "public", + "update_every": 10, + "max_request_size": 50, + "options": { "timeout": 10000 }, + "charts": { + "snmp_switch.bandwidth_port1": { + "title": "Switch Bandwidth for port 1", + "units": "kilobits/s", + "type": "area", + "priority": 1, + "family": "ports", + "dimensions": { + "in": { + "oid": "1.3.6.1.2.1.2.2.1.10.1", + "algorithm": "incremental", + "multiplier": 8, + "divisor": 1024, + "offset": 0 + }, + "out": { + "oid": "1.3.6.1.2.1.2.2.1.16.1", + "algorithm": "incremental", + "multiplier": -8, + "divisor": 1024, + "offset": 0 + } + } + }, + "snmp_switch.bandwidth_port2": { + "title": "Switch Bandwidth for port 2", + "units": "kilobits/s", + "type": "area", + "priority": 1, + "family": "ports", + "dimensions": { + "in": { + "oid": "1.3.6.1.2.1.2.2.1.10.2", + "algorithm": "incremental", + "multiplier": 8, + "divisor": 1024, + "offset": 0 + }, + "out": { + "oid": "1.3.6.1.2.1.2.2.1.16.2", + "algorithm": "incremental", + "multiplier": -8, + "divisor": 1024, + "offset": 0 + } + } + } + } + } + ] +} +``` + +`update_every` is the update frequency for each server, in seconds. + +`max_request_size` limits the maximum number of OIDs that will be requested in a single call. The default is 50. Lower this number of you get `TooBig` errors in netdata error.log. + +`family` sets the name of the submenu of the dashboard each chart will appear under. + +If you need to define many charts using incremental OIDs, you can use something like this: + +This is like the previous, but the option `multiply_range` given, will multiply the current chart from `1` to `24` inclusive, producing 24 charts in total for the 24 ports of the switch `10.11.12.8`. + +Each of the 24 new charts will have its id (1-24) appended at: + +1. its chart unique id, i.e. `snmp_switch.bandwidth_port1` to `snmp_switch.bandwidth_port24` +2. its `title`, i.e. `Switch Bandwidth for port 1` to `Switch Bandwidth for port 24` +3. its `oid` (for all dimensions), i.e. dimension `in` will be `1.3.6.1.2.1.2.2.1.10.1` to `1.3.6.1.2.1.2.2.1.10.24` +3. its priority (which will be incremented for each chart so that the charts will appear on the dashboard in this order) + +```js +{ + "enable_autodetect": false, + "update_every": 10, + "servers": [ + { + "hostname": "10.11.12.8", + "community": "public", + "update_every": 10, + "options": { "timeout": 20000 }, + "charts": { + "snmp_switch.bandwidth_port": { + "title": "Switch Bandwidth for port ", + "units": "kilobits/s", + "type": "area", + "priority": 1, + "family": "ports", + "multiply_range": [ 1, 24 ], + "dimensions": { + "in": { + "oid": "1.3.6.1.2.1.2.2.1.10.", + "algorithm": "incremental", + "multiplier": 8, + "divisor": 1024, + "offset": 0 + }, + "out": { + "oid": "1.3.6.1.2.1.2.2.1.16.", + "algorithm": "incremental", + "multiplier": -8, + "divisor": 1024, + "offset": 0 + } + } + } + } + } + ] +} +``` + +The `options` given for each server, are: + + - `timeout`, the time to wait for the SNMP device to respond. The default is 5000 ms. + - `version`, the SNMP version to use. `0` is Version 1, `1` is Version 2c. The default is Version 1 (`0`). + - `transport`, the default is `udp4`. + - `port`, the port of the SNMP device to connect to. The default is `161`. + - `retries`, the number of attempts to make to fetch the data. The default is `1`. + +## Retreiving names from snmp + +You can append a value retrieved from SNMP to the title, by adding `titleoid` to the chart. + +You can set a dimension name to a value retrieved from SNMP, by adding `oidname` to the dimension. + +Both of the above will participate in `multiply_range`. + + +## Testing the configuration + +To test it, you can run: + +```sh +/usr/libexec/netdata/plugins.d/node.d.plugin 1 snmp +``` + +The above will run it on your console and you will be able to see what netdata sees, but also errors. You can get a very detailed output by appending `debug` to the command line. + +If it works, restart netdata to activate the snmp collector and refresh the dashboard (if your SNMP device responds with a delay, you may need to refresh the dashboard in a few seconds). + +## Data collection speed + +Keep in mind that many SNMP switches are routers are very slow. They may not be able to report values per second. If you run `node.d.plugin` in `debug` mode, it will report the time it took for the SNMP device to respond. My switch, for example, needs 7-8 seconds to respond for the traffic on 24 ports (48 OIDs, in/out). + +Also, if you use many SNMP clients on the same SNMP device at the same time, values may be skipped. This is a problem of the SNMP device, not this collector. + +## Finding OIDs + +Use `snmpwalk`, like this: + +```sh +snmpwalk -t 20 -v 1 -O fn -c public 10.11.12.8 +``` + +- `-t 20` is the timeout in seconds +- `-v 1` is the SNMP version +- `-O fn` will display full OIDs in numeric format (you may want to run it also without this option to see human readable output of OIDs) +- `-c public` is the SNMP community +- `10.11.12.8` is the SNMP device + +Keep in mind that `snmpwalk` outputs the OIDs with a dot in front them. You should remove this dot when adding OIDs to the configuration file of this collector. + +## Example: Linksys SRW2024P + +This is what I use for my Linksys SRW2024P. It creates: + +1. A chart for power consumption (it is a PoE switch) +2. Two charts for packets received (total packets received and packets received with errors) +3. One chart for packets output +4. 24 charts, one for each port of the switch. It also appends the port names, as defined at the switch, to the chart titles. + +This switch also reports various other metrics, like snmp, packets per port, etc. Unfortunately it does not report CPU utilization or backplane utilization. + +This switch has a very slow SNMP processors. To respond, it needs about 8 seconds, so I have set the refresh frequency (`update_every`) to 15 seconds. + +```js +{ + "enable_autodetect": false, + "update_every": 5, + "servers": [ + { + "hostname": "10.11.12.8", + "community": "public", + "update_every": 15, + "options": { "timeout": 20000, "version": 1 }, + "charts": { + "snmp_switch.power": { + "title": "Switch Power Supply", + "units": "watts", + "type": "line", + "priority": 10, + "family": "power", + "dimensions": { + "supply": { + "oid": ".1.3.6.1.2.1.105.1.3.1.1.2.1", + "algorithm": "absolute", + "multiplier": 1, + "divisor": 1, + "offset": 0 + }, + "used": { + "oid": ".1.3.6.1.2.1.105.1.3.1.1.4.1", + "algorithm": "absolute", + "multiplier": 1, + "divisor": 1, + "offset": 0 + } + } + } + , "snmp_switch.input": { + "title": "Switch Packets Input", + "units": "packets/s", + "type": "area", + "priority": 20, + "family": "IP", + "dimensions": { + "receives": { + "oid": ".1.3.6.1.2.1.4.3.0", + "algorithm": "incremental", + "multiplier": 1, + "divisor": 1, + "offset": 0 + } + , "discards": { + "oid": ".1.3.6.1.2.1.4.8.0", + "algorithm": "incremental", + "multiplier": 1, + "divisor": 1, + "offset": 0 + } + } + } + , "snmp_switch.input_errors": { + "title": "Switch Received Packets with Errors", + "units": "packets/s", + "type": "line", + "priority": 30, + "family": "IP", + "dimensions": { + "bad_header": { + "oid": ".1.3.6.1.2.1.4.4.0", + "algorithm": "incremental", + "multiplier": 1, + "divisor": 1, + "offset": 0 + } + , "bad_address": { + "oid": ".1.3.6.1.2.1.4.5.0", + "algorithm": "incremental", + "multiplier": 1, + "divisor": 1, + "offset": 0 + } + , "unknown_protocol": { + "oid": ".1.3.6.1.2.1.4.7.0", + "algorithm": "incremental", + "multiplier": 1, + "divisor": 1, + "offset": 0 + } + } + } + , "snmp_switch.output": { + "title": "Switch Output Packets", + "units": "packets/s", + "type": "line", + "priority": 40, + "family": "IP", + "dimensions": { + "requests": { + "oid": ".1.3.6.1.2.1.4.10.0", + "algorithm": "incremental", + "multiplier": 1, + "divisor": 1, + "offset": 0 + } + , "discards": { + "oid": ".1.3.6.1.2.1.4.11.0", + "algorithm": "incremental", + "multiplier": -1, + "divisor": 1, + "offset": 0 + } + , "no_route": { + "oid": ".1.3.6.1.2.1.4.12.0", + "algorithm": "incremental", + "multiplier": -1, + "divisor": 1, + "offset": 0 + } + } + } + , "snmp_switch.bandwidth_port": { + "title": "Switch Bandwidth for port ", + "titleoid": ".1.3.6.1.2.1.31.1.1.1.18.", + "units": "kilobits/s", + "type": "area", + "priority": 100, + "family": "ports", + "multiply_range": [ 1, 24 ], + "dimensions": { + "in": { + "oid": ".1.3.6.1.2.1.2.2.1.10.", + "algorithm": "incremental", + "multiplier": 8, + "divisor": 1024, + "offset": 0 + } + , "out": { + "oid": ".1.3.6.1.2.1.2.2.1.16.", + "algorithm": "incremental", + "multiplier": -8, + "divisor": 1024, + "offset": 0 + } + } + } + } + } + ] +} +``` diff --git a/conf.d/python.d.conf b/conf.d/python.d.conf index 7e4fa801..9ed346cd 100644 --- a/conf.d/python.d.conf +++ b/conf.d/python.d.conf @@ -18,28 +18,53 @@ log_interval: 3600 # ---------------------------------------------------------------------- # Enable / Disable python.d.plugin modules +#default_run: yes # -# The default for all modules is enabled (yes). -# Setting any of these to no will disable it. +# If "default_run" = "yes" the default for all modules is enabled (yes). +# Setting any of these to "no" will disable it. +# +# If "default_run" = "no" the default for all modules is disabled (no). +# Setting any of these to "yes" will enable it. -# apache: yes # apache_cache: yes +# apache: yes +# bind_rndc: yes # cpufreq: yes +# cpuidle: yes # dovecot: yes +# elasticsearch: yes + +# this is just an example example: no + # exim: yes +# fail2ban: yes +# freeradius: yes + +# gunicorn_log has been replaced by web_log +gunicorn_log: no + +# haproxy: yes # hddtemp: yes # ipfs: yes # isc_dhcpd: yes +# mdstat: yes # memcached: yes # mysql: yes # nginx: yes -# nginx_log: yes + +# nginx_log has been replaced by web_log +nginx_log: no + +# ovpn_status_log: yes # phpfpm: yes # postfix: yes +# postgres: yes # redis: yes +# retroshare: yes # sensors: yes +# smartd_log: yes # squid: yes # tomcat: yes -# freeradius: yes -# ovpn_status_log: yes +# varnish: yes +# web_log: yes diff --git a/conf.d/python.d/elasticsearch.conf b/conf.d/python.d/elasticsearch.conf index 1faee858..f98aaece 100644 --- a/conf.d/python.d/elasticsearch.conf +++ b/conf.d/python.d/elasticsearch.conf @@ -62,6 +62,13 @@ # cluster_stats: False/True # Calls to cluster stats elasticsearch API. Enabled by default. # # ---------------------------------------------------------------------- +# IMPORTANT Information +# +# Module uses python `requests` package +# +# You need to install it manually. (python-requests or python3-requests depending on the version of python). +# +# # AUTO-DETECTION JOBS # only one of them will run (they have the same name) # diff --git a/conf.d/python.d/fail2ban.conf b/conf.d/python.d/fail2ban.conf index cd805be8..d9664e35 100644 --- a/conf.d/python.d/fail2ban.conf +++ b/conf.d/python.d/fail2ban.conf @@ -56,22 +56,17 @@ # # Additionally to the above, fail2ban also supports the following: # -# log_path: 'path to fail2ban.log' # Default: '/var/log/fail2ban.log' +# log_path: 'path to fail2ban.log' # Default: '/var/log/fail2ban.log' # conf_path: 'path to jail.local/jail.conf' # Default: '/etc/fail2ban/jail.local' -# exclude: 'jails you want to exclude from autodetection' # Default: '[]' empty list +# conf_dir: 'path to jail.d/' # Default: '' empty +# exclude: 'jails you want to exclude from autodetection' # Default: '[]' empty list #------------------------------------------------------------------------------------------------------------------ -# IMPORTANT Information -# -# fail2ban.log file MUST BE readable by netdata. -# A good idea is to do this by adding the -# # create 0640 root netdata -# to fail2ban conf at logrotate.d -# # ------------------------------------------------------------------------------------------------------------------ # AUTO-DETECTION JOBS # only one of them will run (they have the same name) -#local: -# log_path: '/var/log/fail2ban.log' -# conf_path: '/etc/fail2ban/jail.local' +local: + log_path: '/var/log/fail2ban.log' + conf_path: '/etc/fail2ban/jail.local' +# conf_dir: '/etc/fail2ban/jail.d/' # exclude: 'dropbear apache' diff --git a/conf.d/python.d/gunicorn_log.conf b/conf.d/python.d/mongodb.conf index 8fea483f..a19b6570 100644 --- a/conf.d/python.d/gunicorn_log.conf +++ b/conf.d/python.d/mongodb.conf @@ -1,4 +1,4 @@ -# netdata python.d.plugin configuration for nginx gunicorn log +# netdata python.d.plugin configuration for mongodb # # This file is in YaML format. Generally the format is: # @@ -54,20 +54,24 @@ # priority: 60000 # the JOB's order on the dashboard # retries: 5 # the JOB's number of restoration attempts # -# Additionally to the above, gunicorn_log also supports the following: +# Additionally to the above, mongodb also supports the following: # -# path: 'PATH' # the path to gunicorn's access.log +# host: 'IP or HOSTNAME' # type <str> the host to connect to +# port: PORT # type <int> the port to connect to +# +# in all cases, the following can also be set: +# +# user: 'username' # the mongodb username to use +# pass: 'password' # the mongodb password to use # # ---------------------------------------------------------------------- +# to connect to the mongodb on localhost, without a password: +# ---------------------------------------------------------------------- # AUTO-DETECTION JOBS # only one of them will run (they have the same name) -gunicorn_log: - name: 'local' - path: '/var/log/gunicorn/access.log' - -gunicorn_log2: - name: 'local' - path: '/var/log/gunicorn/gunicorn-access.log' - +local: + name : 'local' + host : '127.0.0.1' + port : 27017 diff --git a/conf.d/python.d/nginx_log.conf b/conf.d/python.d/nsd.conf index 6a53c520..7566fe85 100644 --- a/conf.d/python.d/nginx_log.conf +++ b/conf.d/python.d/nsd.conf @@ -1,4 +1,4 @@ -# netdata python.d.plugin configuration for nginx log +# netdata python.d.plugin configuration for nsd # # This file is in YaML format. Generally the format is: # @@ -20,7 +20,8 @@ # update_every sets the default data collection frequency. # If unset, the python.d.plugin default is used. -# update_every: 1 +# nsd-control is slow, so once every 30 seconds +# update_every: 30 # priority controls the order of charts at the netdata dashboard. # Lower numbers move the charts towards the top of the page. @@ -54,19 +55,32 @@ # priority: 60000 # the JOB's order on the dashboard # retries: 5 # the JOB's number of restoration attempts # -# Additionally to the above, nginx_log also supports the following: +# Additionally to the above, nsd also supports the following: # -# path: 'PATH' # the path to nginx's access.log +# command: 'nsd-control stats_noreset' # the command to run # # ---------------------------------------------------------------------- -# AUTO-DETECTION JOBS -# only one of them will run (they have the same name) +# IMPORTANT Information +# +# Netdata must have permissions to run `nsd-control stats_noreset` command +# +# - Example-1 (use "sudo") +# 1. sudoers (e.g. visudo -f /etc/sudoers.d/netdata) +# Defaults:netdata !requiretty +# netdata ALL=(ALL) NOPASSWD: /usr/sbin/nsd-control stats_noreset +# 2. etc/netdata/python.d/nsd.conf +# local: +# update_every: 30 +# command: 'sudo /usr/sbin/nsd-control stats_noreset' +# +# - Example-2 (add "netdata" user to "nsd" group) +# usermod -aG nsd netdata +# -nginx_log: - name: 'local' - path: '/var/log/nginx/access.log' +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS -nginx_log2: - name: 'local' - path: '/var/log/nginx/nginx-access.log' +local: + update_every: 30 + command: 'nsd-control stats_noreset' diff --git a/conf.d/python.d/smartd_log.conf b/conf.d/python.d/smartd_log.conf new file mode 100644 index 00000000..e16454df --- /dev/null +++ b/conf.d/python.d/smartd_log.conf @@ -0,0 +1,85 @@ +# netdata python.d.plugin configuration for smartd log +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 5 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# retries: 5 # the JOB's number of restoration attempts +# +# Additionally to the above, smartd_log also supports the following: +# +# log_path: '/path/to/smartdlogs' # path to smartd log files. Default is /var/log/smartd +# raw_values: no # raw or normalized values on charts. Default is normalized. +# smart_attributes: '1 2 3 4 44' # add additional smart attributes charts. Default are ['1', '4', '5', '7', '9', '12', '193', '194', '197', '198', '200']. +# +# ---------------------------------------------------------------------- +# Additional information +# Plugin reads smartd log files (-A option). +# You need to add (man smartd) to /etc/default/smartmontools '-i 600 -A /var/log/smartd/' to pass additional options to smartd on startup +# Then restart smartd service and check /path/log/smartdlogs +# ls /var/log/smartd/ +# CDC_WD10EZEX_00BN5A0-WD_WCC3F7FLVZS9.ata.csv WDC_WD10EZEX_00BN5A0-WD_WCC3F7FLVZS9.ata.csv ZDC_WD10EZEX_00BN5A0-WD_WCC3F7FLVZS9.ata.csv +# +# Smartd APPEND logs at every run. Its NOT RECOMMENDED to set '-i' option below 60 sec. +# STRONGLY RECOMMENDED to create smartd conf file for logrotate +# +# RAW vs NORMALIZED values +# "Normalized value", commonly referred to as just "value". This is a most universal measurement, on the scale from 0 (bad) to some maximum (good) value. +# Maximum values are typically 100, 200 or 253. Rule of thumb is: high values are good, low values are bad. +# +# "Raw value" - the value of the attribute as it is tracked by the device, before any normalization takes place. +# Some raw numbers provide valuable insight when properly interpreted. These cases will be discussed later on. +# Raw values are typically listed in hexadecimal numbers. The raw value has different structure for different vendors and is often not meaningful as a decimal number. +# +# +# JOB configuration +# +log_path: '/var/log/smartd' diff --git a/conf.d/python.d/varnish.conf b/conf.d/python.d/varnish.conf index 56dc6334..c25f3010 100644 --- a/conf.d/python.d/varnish.conf +++ b/conf.d/python.d/varnish.conf @@ -55,11 +55,3 @@ # retries: 5 # the JOB's number of restoration attempts # # -# -# The only you need is to add netdata to 'varnish' group -# -# Check it from cmd -# id netdata -# -# uid=999(netdata) gid=999(netdata) группы=999(netdata),118(varnish) -# diff --git a/conf.d/python.d/web_log.conf b/conf.d/python.d/web_log.conf new file mode 100644 index 00000000..06656285 --- /dev/null +++ b/conf.d/python.d/web_log.conf @@ -0,0 +1,147 @@ +# netdata python.d.plugin configuration for web log +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 5 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. + +# ---------------------------------------------------------------------- +# PLUGIN CONFIGURATION +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# retries: 5 # the JOB's number of restoration attempts +# +# Additionally to the above, web_log also supports the following: +# +# path: 'PATH' # the path to web server log file +# detailed_response_codes: yes/no # Default: yes. Additional chart where response codes are not grouped +# detailed_response_aggregate: yes/no # Default: yes. Not aggregated detailed response codes charts +# all_time : yes/no # Default: yes. All time unique client IPs chart (50000 addresses ~ 400KB) +# categories: # requests per url chart configuration +# cacti: 'cacti.*' # name(dimension): REGEX to match +# observium: 'observium.*' # name(dimension): REGEX to match +# stub_status: 'stub_status' # name(dimension): REGEX to match +# custom_log_format: # define a custom log format +# pattern: '(?P<address>[\da-f.:]+) -.*?"(?P<method>[A-Z]+) (?P<url>.*?)" (?P<code>[1-9]\d{2}) (?P<bytes_sent>\d+) (?P<resp_length>\d+) (?P<resp_time>\d\.\d+) ' +# time_multiplier: 1000000 # type <int> - convert time to microseconds + +# ---------------------------------------------------------------------- +# WEB SERVER CONFIGURATION +# +# Make sure the web server log directory and the web server log files +# can be read by user 'netdata'. +# +# To enable the timings chart and the requests size dimension, the +# web server needs to log them. This is how to add them: +# +# nginx: +# log_format netdata '$remote_addr - $remote_user [$time_local] ' +# '"$request" $status $body_bytes_sent ' +# '$request_length $request_time ' +# '"$http_referer" "$http_user_agent"'; +# access_log /var/log/nginx/access.log netdata; +# +# apache (you need mod_logio enabled): +# LogFormat "%h %l %u %t \"%r\" %>s %O %I %D \"%{Referer}i\" \"%{User-Agent}i\"" vhost_netdata +# LogFormat "%h %l %u %t \"%r\" %>s %O %I %D \"%{Referer}i\" \"%{User-Agent}i\"" netdata +# CustomLog "/var/log/apache2/access.log" netdata + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them per web server will run (when they have the same name) + + +# ------------------------------------------- +# nginx log on various distros + +# debian, arch +nginx_log: + name: 'nginx' + path: '/var/log/nginx/access.log' + +# gentoo +nginx_log2: + name: 'nginx' + path: '/var/log/nginx/localhost.access_log' + + +# ------------------------------------------- +# apache log on various distros + +# debian +apache_log: + name: 'apache' + path: '/var/log/apache2/access.log' + +# gentoo +apache_log2: + name: 'apache' + path: '/var/log/apache2/access_log' + +# arch +apache_log3: + name: 'apache' + path: '/var/log/httpd/access_log' + +# debian +apache_vhosts_log: + name: 'apache_vhosts' + path: '/var/log/apache2/other_vhosts_access.log' + + +# ------------------------------------------- +# gunicorn log on various distros + +gunicorn_log: + name: 'gunicorn' + path: '/var/log/gunicorn/access.log' + +gunicorn_log2: + name: 'gunicorn' + path: '/var/log/gunicorn/gunicorn-access.log' diff --git a/conf.d/stream.conf b/conf.d/stream.conf new file mode 100644 index 00000000..0ebdccb8 --- /dev/null +++ b/conf.d/stream.conf @@ -0,0 +1,143 @@ +# netdata configuration for aggregating data from remote hosts +# +# API keys authorize a pair of sending-receiving netdata servers. +# Once their communication is authorized, they can exchange metrics for any +# number of hosts. +# +# You can generate API keys, with the linux command: uuidgen +# +# ----------------------------------------------------------------------------- +# 1. ON SLAVE NETDATA - THE ONE THAT WILL BE SENDING METRICS + +[stream] + # Enable this on slaves, to have them send metrics. + enabled = no + + # The destination to send metrics to. + # A space separated list of: + # [PROTOCOL:]HOST[%INTERFACE][:PORT] + # The first available will get the metrics. + # PROTOCOL = tcp or udp (only tcp is supported by masters) + # HOST = an IPv4, IPv6 IP, or a hostname. + # IPv6 IPs should be given with brackets [ip:address] + # INTERFACE = the network interface to use + # PORT = the port number or service name (/etc/services) + # This communication is not HTTP (cannot be proxied by web proxies). + destination = + + # The API_KEY to use (as the sender) + api key = + + # The timeout to connect and send metrics + timeout seconds = 60 + + # If the destination line above does specify a port, use this + default port = 19999 + + # The buffer to use for sending metrics. + # 1MB by default is good for 2-3 seconds of data, so increase this + # if you expect latencies. + buffer size bytes = 1048576 + + # If the connection fails, or it disconnects, + # retry after that many seconds. + reconnect delay seconds = 5 + + # Attempt to sync the clock the of the master with the clock of the + # slave for that many iterations, when starting. + initial clock resync iterations = 60 + + +# ----------------------------------------------------------------------------- +# 2. ON MASTER NETDATA - THE ONE THAT WILL BE RECEIVING METRICS +# +# You can have one API key per slave, or the same API key for all slaves. +# +# All options below are used in this order: +# +# a) MACHINE_GUID (settings for each machine) +# b) API_KEY (settings for the API key) +# c) this netdata defaults (as in netdata.conf) +# +# You can combine the above (the more specific setting will be used). + +# API key authentication +# If the key is not listed here, it will not be able to connect. + +[API_KEY] + # Default settings for the API key + + # You can disable the API key, by setting this to: no + # The default (for unknown API keys) is also: no + enabled = no + + # The default history in entries, for all hosts using this API key. + # You can also set it per host below. + # If you don't set it here, the history size of the central netdata + # will be used + default history = 3600 + + # The default memory mode to be used for all hosts using this API key. + # You can also set it per host below. + # If you don't set it here, the memory mode of netdata.conf will be used. + # Valid modes: + # save save on exit, load on start + # map like swap (continuously syncing to disks) + # ram keep it in RAM, don't touch the disk + # none no database (passing through this netdata) + default memory mode = ram + + # Shall we enable health monitoring for the hosts using this API key? + # 3 values: + # yes enable alarms + # no do not enable alarms + # auto enable alarms, only when the sending netdata is connected + # You can also set it per host, below. + # The default is the same as to netdata.conf + health enabled by default = auto + + # postpone alarms for a short period after the sender is connected + default postpone alarms on connect seconds = 60 + + # need to route metrics differently? set these. + # the defaults are the ones at the [stream] section + #default proxy enabled = yes | no + #default proxy destination = IP:PORT IP:PORT ... + #default proxy api key = API_KEY + + +# ----------------------------------------------------------------------------- +# 3. ON MASTER NETDATA - THE ONE THAT WILL BE RECEIVING METRICS +# +# THIS IS OPTIONAL - YOU DON'T NEED IT BY DEFAULT +# It only exists to give you finer control of the master settings for each +# slave host, when the same API key is used by many netdata slaves / proxies. +# +# Each netdata has a unique GUID - generated the first time netdata starts. +# You can find it at /var/lib/netdata/registry/netdata.public.unique.id +# The host sending data will have one. If the host is not ephemeral, +# you can give settings for each specific host here. + +[MACHINE_GUID] + # enable this host: yes | no + # When disabled, the master will not receive metrics for this host. + # THIS IS NOT A SECURITY MECHANISM - AN ATTACKER CAN SET ANY OTHER GUID. + # Use only the API key for security. + enabled = no + + # The number of entries in the database + history = 3600 + + # The memory mode of the database: save | map | ram | none + memory mode = save + + # Health / alarms control: yes | no | auto + health enabled = yes + + # postpone alarms when the sender connects + postpone alarms on connect seconds = 60 + + # need to route metrics differently? + #proxy enabled = yes | no + #proxy destination = IP:PORT IP:PORT ... + #proxy api key = API_KEY |