summaryrefslogtreecommitdiffstats
path: root/conf.d
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--conf.d/Makefile.am147
-rw-r--r--conf.d/Makefile.in176
-rw-r--r--conf.d/apps_groups.conf88
-rw-r--r--conf.d/charts.d/ap.conf19
-rw-r--r--conf.d/charts.d/apache.conf26
-rw-r--r--conf.d/charts.d/apcupsd.conf19
-rw-r--r--conf.d/charts.d/cpu_apps.conf15
-rw-r--r--conf.d/charts.d/cpufreq.conf20
-rw-r--r--conf.d/charts.d/example.conf17
-rw-r--r--conf.d/charts.d/exim.conf20
-rw-r--r--conf.d/charts.d/hddtemp.conf20
-rw-r--r--conf.d/charts.d/load_average.conf18
-rw-r--r--conf.d/charts.d/mem_apps.conf15
-rw-r--r--conf.d/charts.d/mysql.conf19
-rw-r--r--conf.d/charts.d/nginx.conf19
-rw-r--r--conf.d/charts.d/nut.conf20
-rw-r--r--conf.d/charts.d/opensips.conf17
-rw-r--r--conf.d/charts.d/phpfpm.conf22
-rw-r--r--conf.d/charts.d/postfix.conf20
-rw-r--r--conf.d/charts.d/sensors.conf27
-rw-r--r--conf.d/charts.d/squid.conf21
-rw-r--r--conf.d/charts.d/tomcat.conf34
-rw-r--r--conf.d/fping.conf44
-rw-r--r--conf.d/health.d/apache.conf4
-rw-r--r--conf.d/health.d/backend.conf45
-rw-r--r--conf.d/health.d/bind_rndc.conf9
-rw-r--r--conf.d/health.d/cpu.conf10
-rw-r--r--conf.d/health.d/disks.conf26
-rw-r--r--conf.d/health.d/elasticsearch.conf9
-rw-r--r--conf.d/health.d/entropy.conf8
-rw-r--r--conf.d/health.d/haproxy.conf27
-rw-r--r--conf.d/health.d/ipc.conf22
-rw-r--r--conf.d/health.d/ipfs.conf11
-rw-r--r--conf.d/health.d/isc_dhcpd.conf10
-rw-r--r--conf.d/health.d/mdstat.conf18
-rw-r--r--conf.d/health.d/memcached.conf6
-rw-r--r--conf.d/health.d/memory.conf30
-rw-r--r--conf.d/health.d/mysql.conf76
-rw-r--r--conf.d/health.d/named.conf4
-rw-r--r--conf.d/health.d/net.conf97
-rw-r--r--conf.d/health.d/netfilter.conf23
-rw-r--r--conf.d/health.d/nginx.conf4
-rw-r--r--conf.d/health.d/postgres.conf13
-rw-r--r--conf.d/health.d/ram.conf4
-rw-r--r--conf.d/health.d/redis.conf4
-rw-r--r--conf.d/health.d/retroshare.conf4
-rw-r--r--conf.d/health.d/softnet.conf18
-rw-r--r--conf.d/health.d/squid.conf4
-rw-r--r--conf.d/health.d/swap.conf17
-rw-r--r--conf.d/health.d/tcp_resets.conf32
-rw-r--r--conf.d/health.d/udp_errors.conf40
-rw-r--r--conf.d/health.d/varnish.conf9
-rw-r--r--conf.d/health_alarm_notify.conf211
-rw-r--r--conf.d/node.d.conf39
-rw-r--r--conf.d/node.d/README.md7
-rw-r--r--conf.d/node.d/named.conf.md344
-rw-r--r--conf.d/node.d/sma_webbox.conf.md25
-rw-r--r--conf.d/node.d/snmp.conf.md341
-rw-r--r--conf.d/python.d.conf3
-rw-r--r--conf.d/python.d/bind_rndc.conf109
-rw-r--r--conf.d/python.d/elasticsearch.conf72
-rw-r--r--conf.d/python.d/exim.conf4
-rw-r--r--conf.d/python.d/fail2ban.conf77
-rw-r--r--conf.d/python.d/freeradius.conf86
-rw-r--r--conf.d/python.d/gunicorn_log.conf73
-rw-r--r--conf.d/python.d/haproxy.conf78
-rw-r--r--conf.d/python.d/hddtemp.conf3
-rw-r--r--conf.d/python.d/isc_dhcpd.conf78
-rw-r--r--conf.d/python.d/mdstat.conf26
-rw-r--r--conf.d/python.d/mysql.conf97
-rw-r--r--conf.d/python.d/ovpn_status_log.conf86
-rw-r--r--conf.d/python.d/postgres.conf104
-rw-r--r--conf.d/python.d/redis.conf10
-rw-r--r--conf.d/python.d/sensors.conf4
-rw-r--r--conf.d/python.d/varnish.conf65
75 files changed, 3138 insertions, 231 deletions
diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am
index 066744ca..b725e249 100644
--- a/conf.d/Makefile.am
+++ b/conf.d/Makefile.am
@@ -4,61 +4,110 @@
MAINTAINERCLEANFILES= $(srcdir)/Makefile.in
dist_config_DATA = \
- apps_groups.conf \
- charts.d.conf \
- python.d.conf \
- health_alarm_notify.conf \
- health_email_recipients.conf \
- $(NULL)
-
-chartsconfigdir=$(configdir)/charts.d
-dist_chartsconfig_DATA = \
- $(NULL)
+ apps_groups.conf \
+ charts.d.conf \
+ fping.conf \
+ node.d.conf \
+ python.d.conf \
+ health_alarm_notify.conf \
+ health_email_recipients.conf \
+ $(NULL)
nodeconfigdir=$(configdir)/node.d
dist_nodeconfig_DATA = \
- $(NULL)
+ node.d/README.md \
+ node.d/named.conf.md \
+ node.d/sma_webbox.conf.md \
+ node.d/snmp.conf.md \
+ $(NULL)
pythonconfigdir=$(configdir)/python.d
dist_pythonconfig_DATA = \
- python.d/apache.conf \
- python.d/apache_cache.conf \
- python.d/cpufreq.conf \
- python.d/dovecot.conf \
- python.d/example.conf \
- python.d/exim.conf \
- python.d/hddtemp.conf \
- python.d/ipfs.conf \
- python.d/memcached.conf \
- python.d/mysql.conf \
- python.d/nginx.conf \
- python.d/nginx_log.conf \
- python.d/phpfpm.conf \
- python.d/postfix.conf \
- python.d/redis.conf \
- python.d/retroshare.conf \
- python.d/sensors.conf \
- python.d/squid.conf \
- python.d/tomcat.conf \
- $(NULL)
+ python.d/apache.conf \
+ python.d/apache_cache.conf \
+ python.d/bind_rndc.conf \
+ python.d/cpufreq.conf \
+ python.d/dovecot.conf \
+ python.d/elasticsearch.conf \
+ python.d/example.conf \
+ python.d/exim.conf \
+ python.d/fail2ban.conf \
+ python.d/freeradius.conf \
+ python.d/gunicorn_log.conf \
+ python.d/haproxy.conf \
+ python.d/hddtemp.conf \
+ python.d/ipfs.conf \
+ python.d/isc_dhcpd.conf \
+ python.d/mdstat.conf \
+ python.d/memcached.conf \
+ python.d/mysql.conf \
+ python.d/nginx.conf \
+ python.d/nginx_log.conf \
+ python.d/ovpn_status_log.conf \
+ python.d/phpfpm.conf \
+ python.d/postfix.conf \
+ python.d/postgres.conf \
+ python.d/redis.conf \
+ python.d/retroshare.conf \
+ python.d/sensors.conf \
+ python.d/squid.conf \
+ python.d/tomcat.conf \
+ python.d/varnish.conf \
+ $(NULL)
healthconfigdir=$(configdir)/health.d
dist_healthconfig_DATA = \
- health.d/apache.conf \
- health.d/cpu.conf \
- health.d/disks.conf \
- health.d/entropy.conf \
- health.d/tcp_resets.conf \
- health.d/memcached.conf \
- health.d/mysql.conf \
- health.d/named.conf \
- health.d/net.conf \
- health.d/nginx.conf \
- health.d/qos.conf \
- health.d/ram.conf \
- health.d/redis.conf \
- health.d/retroshare.conf \
- health.d/softnet.conf \
- health.d/swap.conf \
- health.d/squid.conf \
- $(NULL)
+ health.d/apache.conf \
+ health.d/backend.conf \
+ health.d/bind_rndc.conf \
+ health.d/cpu.conf \
+ health.d/disks.conf \
+ health.d/elasticsearch.conf \
+ health.d/entropy.conf \
+ health.d/haproxy.conf \
+ health.d/ipc.conf \
+ health.d/ipfs.conf \
+ health.d/isc_dhcpd.conf \
+ health.d/mdstat.conf \
+ health.d/memcached.conf \
+ health.d/memory.conf \
+ health.d/mysql.conf \
+ health.d/named.conf \
+ health.d/net.conf \
+ health.d/netfilter.conf \
+ health.d/nginx.conf \
+ health.d/postgres.conf \
+ health.d/qos.conf \
+ health.d/ram.conf \
+ health.d/redis.conf \
+ health.d/retroshare.conf \
+ health.d/softnet.conf \
+ health.d/squid.conf \
+ health.d/swap.conf \
+ health.d/tcp_resets.conf \
+ health.d/udp_errors.conf \
+ health.d/varnish.conf \
+ $(NULL)
+
+chartsconfigdir=$(configdir)/charts.d
+dist_chartsconfig_DATA = \
+ charts.d/apache.conf \
+ charts.d/apcupsd.conf \
+ charts.d/cpufreq.conf \
+ charts.d/exim.conf \
+ charts.d/load_average.conf \
+ charts.d/mysql.conf \
+ charts.d/nut.conf \
+ charts.d/phpfpm.conf \
+ charts.d/sensors.conf \
+ charts.d/tomcat.conf \
+ charts.d/ap.conf \
+ charts.d/cpu_apps.conf \
+ charts.d/example.conf \
+ charts.d/hddtemp.conf \
+ charts.d/mem_apps.conf \
+ charts.d/nginx.conf \
+ charts.d/opensips.conf \
+ charts.d/postfix.conf \
+ charts.d/squid.conf \
+ $(NULL)
diff --git a/conf.d/Makefile.in b/conf.d/Makefile.in
index 823713bf..344f1c41 100644
--- a/conf.d/Makefile.in
+++ b/conf.d/Makefile.in
@@ -1,7 +1,7 @@
-# Makefile.in generated by automake 1.14.1 from Makefile.am.
+# Makefile.in generated by automake 1.15 from Makefile.am.
# @configure_input@
-# Copyright (C) 1994-2013 Free Software Foundation, Inc.
+# Copyright (C) 1994-2014 Free Software Foundation, Inc.
# This Makefile.in is free software; the Free Software Foundation
# gives unlimited permission to copy and/or distribute it,
@@ -15,7 +15,17 @@
@SET_MAKE@
VPATH = @srcdir@
-am__is_gnu_make = test -n '$(MAKEFILE_LIST)' && test -n '$(MAKELEVEL)'
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
am__make_running_with_option = \
case $${target_option-} in \
?) ;; \
@@ -79,20 +89,21 @@ POST_UNINSTALL = :
build_triplet = @build@
host_triplet = @host@
subdir = conf.d
-DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/Makefile.am \
- $(dist_chartsconfig_DATA) $(dist_config_DATA) \
- $(dist_healthconfig_DATA) $(dist_nodeconfig_DATA) \
- $(dist_pythonconfig_DATA)
ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
am__aclocal_m4_deps = $(top_srcdir)/m4/ax_c___atomic.m4 \
$(top_srcdir)/m4/ax_c__generic.m4 \
$(top_srcdir)/m4/ax_c_mallinfo.m4 \
$(top_srcdir)/m4/ax_c_mallopt.m4 \
$(top_srcdir)/m4/ax_check_compile_flag.m4 \
+ $(top_srcdir)/m4/ax_gcc_func_attribute.m4 \
$(top_srcdir)/m4/ax_pthread.m4 $(top_srcdir)/m4/jemalloc.m4 \
$(top_srcdir)/m4/tcmalloc.m4 $(top_srcdir)/configure.ac
am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
$(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(dist_chartsconfig_DATA) \
+ $(dist_config_DATA) $(dist_healthconfig_DATA) \
+ $(dist_nodeconfig_DATA) $(dist_pythonconfig_DATA) \
+ $(am__DIST_COMMON)
mkinstalldirs = $(install_sh) -d
CONFIG_HEADER = $(top_builddir)/config.h
CONFIG_CLEAN_FILES =
@@ -150,6 +161,7 @@ DATA = $(dist_chartsconfig_DATA) $(dist_config_DATA) \
$(dist_healthconfig_DATA) $(dist_nodeconfig_DATA) \
$(dist_pythonconfig_DATA)
am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+am__DIST_COMMON = $(srcdir)/Makefile.in
DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
ACLOCAL = @ACLOCAL@
AMTAR = @AMTAR@
@@ -293,64 +305,113 @@ webdir = @webdir@
#
MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
dist_config_DATA = \
- apps_groups.conf \
- charts.d.conf \
- python.d.conf \
- health_alarm_notify.conf \
- health_email_recipients.conf \
- $(NULL)
-
-chartsconfigdir = $(configdir)/charts.d
-dist_chartsconfig_DATA = \
- $(NULL)
+ apps_groups.conf \
+ charts.d.conf \
+ fping.conf \
+ node.d.conf \
+ python.d.conf \
+ health_alarm_notify.conf \
+ health_email_recipients.conf \
+ $(NULL)
nodeconfigdir = $(configdir)/node.d
dist_nodeconfig_DATA = \
- $(NULL)
+ node.d/README.md \
+ node.d/named.conf.md \
+ node.d/sma_webbox.conf.md \
+ node.d/snmp.conf.md \
+ $(NULL)
pythonconfigdir = $(configdir)/python.d
dist_pythonconfig_DATA = \
- python.d/apache.conf \
- python.d/apache_cache.conf \
- python.d/cpufreq.conf \
- python.d/dovecot.conf \
- python.d/example.conf \
- python.d/exim.conf \
- python.d/hddtemp.conf \
- python.d/ipfs.conf \
- python.d/memcached.conf \
- python.d/mysql.conf \
- python.d/nginx.conf \
- python.d/nginx_log.conf \
- python.d/phpfpm.conf \
- python.d/postfix.conf \
- python.d/redis.conf \
- python.d/retroshare.conf \
- python.d/sensors.conf \
- python.d/squid.conf \
- python.d/tomcat.conf \
- $(NULL)
+ python.d/apache.conf \
+ python.d/apache_cache.conf \
+ python.d/bind_rndc.conf \
+ python.d/cpufreq.conf \
+ python.d/dovecot.conf \
+ python.d/elasticsearch.conf \
+ python.d/example.conf \
+ python.d/exim.conf \
+ python.d/fail2ban.conf \
+ python.d/freeradius.conf \
+ python.d/gunicorn_log.conf \
+ python.d/haproxy.conf \
+ python.d/hddtemp.conf \
+ python.d/ipfs.conf \
+ python.d/isc_dhcpd.conf \
+ python.d/mdstat.conf \
+ python.d/memcached.conf \
+ python.d/mysql.conf \
+ python.d/nginx.conf \
+ python.d/nginx_log.conf \
+ python.d/ovpn_status_log.conf \
+ python.d/phpfpm.conf \
+ python.d/postfix.conf \
+ python.d/postgres.conf \
+ python.d/redis.conf \
+ python.d/retroshare.conf \
+ python.d/sensors.conf \
+ python.d/squid.conf \
+ python.d/tomcat.conf \
+ python.d/varnish.conf \
+ $(NULL)
healthconfigdir = $(configdir)/health.d
dist_healthconfig_DATA = \
- health.d/apache.conf \
- health.d/cpu.conf \
- health.d/disks.conf \
- health.d/entropy.conf \
- health.d/tcp_resets.conf \
- health.d/memcached.conf \
- health.d/mysql.conf \
- health.d/named.conf \
- health.d/net.conf \
- health.d/nginx.conf \
- health.d/qos.conf \
- health.d/ram.conf \
- health.d/redis.conf \
- health.d/retroshare.conf \
- health.d/softnet.conf \
- health.d/swap.conf \
- health.d/squid.conf \
- $(NULL)
+ health.d/apache.conf \
+ health.d/backend.conf \
+ health.d/bind_rndc.conf \
+ health.d/cpu.conf \
+ health.d/disks.conf \
+ health.d/elasticsearch.conf \
+ health.d/entropy.conf \
+ health.d/haproxy.conf \
+ health.d/ipc.conf \
+ health.d/ipfs.conf \
+ health.d/isc_dhcpd.conf \
+ health.d/mdstat.conf \
+ health.d/memcached.conf \
+ health.d/memory.conf \
+ health.d/mysql.conf \
+ health.d/named.conf \
+ health.d/net.conf \
+ health.d/netfilter.conf \
+ health.d/nginx.conf \
+ health.d/postgres.conf \
+ health.d/qos.conf \
+ health.d/ram.conf \
+ health.d/redis.conf \
+ health.d/retroshare.conf \
+ health.d/softnet.conf \
+ health.d/squid.conf \
+ health.d/swap.conf \
+ health.d/tcp_resets.conf \
+ health.d/udp_errors.conf \
+ health.d/varnish.conf \
+ $(NULL)
+
+chartsconfigdir = $(configdir)/charts.d
+dist_chartsconfig_DATA = \
+ charts.d/apache.conf \
+ charts.d/apcupsd.conf \
+ charts.d/cpufreq.conf \
+ charts.d/exim.conf \
+ charts.d/load_average.conf \
+ charts.d/mysql.conf \
+ charts.d/nut.conf \
+ charts.d/phpfpm.conf \
+ charts.d/sensors.conf \
+ charts.d/tomcat.conf \
+ charts.d/ap.conf \
+ charts.d/cpu_apps.conf \
+ charts.d/example.conf \
+ charts.d/hddtemp.conf \
+ charts.d/mem_apps.conf \
+ charts.d/nginx.conf \
+ charts.d/opensips.conf \
+ charts.d/postfix.conf \
+ charts.d/squid.conf \
+ $(NULL)
all: all-am
@@ -367,7 +428,6 @@ $(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__confi
echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu conf.d/Makefile'; \
$(am__cd) $(top_srcdir) && \
$(AUTOMAKE) --gnu conf.d/Makefile
-.PRECIOUS: Makefile
Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
@case '$?' in \
*config.status*) \
@@ -653,6 +713,8 @@ uninstall-am: uninstall-dist_chartsconfigDATA \
uninstall-dist_configDATA uninstall-dist_healthconfigDATA \
uninstall-dist_nodeconfigDATA uninstall-dist_pythonconfigDATA
+.PRECIOUS: Makefile
+
# Tell versions [3.59,3.63) of GNU make to not export all variables.
# Otherwise a system limit (for SysV at least) may be exceeded.
diff --git a/conf.d/apps_groups.conf b/conf.d/apps_groups.conf
index 57357a87..e2836877 100644
--- a/conf.d/apps_groups.conf
+++ b/conf.d/apps_groups.conf
@@ -3,7 +3,7 @@
#
# The apps.plugin displays charts with information about the processes running.
# This config allows grouping processes together, so that several processes
-# will be reported together.
+# will be reported as one.
#
# Only groups in this file are reported. All other processes will be reported
# as 'other'.
@@ -13,32 +13,43 @@
#
# The format is:
#
-# group_name: process1 process2 process3 ...
+# group: process1 process2 process3 ...
#
-# The process names are the same to the ones returned by: ps -e
-# or /proc/PID/stat
+# Each group can be given multiple times, to add more processes to it.
+#
+# The process names are the ones returned by:
+#
+# - ps -e or /proc/PID/stat
+# - in case of substring mode (see below): /proc/PID/cmdline
#
# To add process names with spaces, enclose them in quotes (single or double)
-# example: 'Plex Media Serv' "my other process"
+# example: 'Plex Media Serv' "my other process".
#
# Wildcard support:
-# You can add an asterisk (*) at the beginning and/or the end of a process name:
-# *name suffix mode: will search for processes ending with 'name' (/proc/PID/stat)
-# name* prefix mode: will search for processes beginning with 'name' (/proc/PID/stat)
-# *name* substring mode: will search for 'name' in the whole command line (/proc/PID/cmdline)
+# You can add an asterisk (*) at the beginning and/or the end of a process:
+#
+# *name suffix mode: will search for processes ending with 'name'
+# (/proc/PID/stat)
+#
+# name* prefix mode: will search for processes beginning with 'name'
+# (/proc/PID/stat)
+#
+# *name* substring mode: will search for 'name' in the whole command line
+# (/proc/PID/cmdline)
#
# If you enter even just one *name* (substring), apps.plugin will process
# /proc/PID/cmdline for all processes, just once (when they are first seen).
#
-# To add process names with single quotes, enclose them in double quotes
+# To add processes with single quotes, enclose them in double quotes
# example: "process with this ' single quote"
#
-# To add process names with double quotes, enclose them in single quotes:
+# To add processes with double quotes, enclose them in single quotes:
# example: 'process with this " double quote'
#
-# If a group name starts with a -, the dimension will be hidden (cpu chart only)
+# If a group or process name starts with a -, the dimension will be hidden
+# (cpu chart only).
#
-# If any process name starts with a +, debugging will be enabled for it
+# If a process starts with a +, debugging will be enabled for it
# (debugging produces a lot of output - do not enable it in production systems)
#
# You can add any number of groups you like. Only the ones found running will
@@ -86,8 +97,8 @@ unicorn: *unicorn*
# -----------------------------------------------------------------------------
# database servers
-sql: mysqld* mariad* postgres*
-nosql: mongod redis*
+sql: mysqld* mariad* postgres* oracle_* ora_*
+nosql: mongod redis* memcached
# -----------------------------------------------------------------------------
# email servers
@@ -106,22 +117,26 @@ wifi: hostapd wpa_supplicant
camo: *camo*
balancer: ipvs_* haproxy
-ha: corosync hs_logd ha_logd stonithd
+ha: corosync hs_logd ha_logd stonithd pacemakerd lrmd crmd
# -----------------------------------------------------------------------------
# telephony
pbx: asterisk safe_asterisk *vicidial*
sip: opensips* stund
-murmur: murmurd
-xmpp: *vines* *prosody*
+
+# -----------------------------------------------------------------------------
+# chat
+
+chat: irssi *vines* *prosody* murmurd
# -----------------------------------------------------------------------------
# monitoring
-logs: ulogd* syslog* rsyslog* logrotate
+logs: ulogd* syslog* rsyslog* logrotate systemd-journald
nms: snmpd vnstatd smokeping zabbix* monit munin* mon openhpid watchdog tailon nrpe
splunk: splunkd
+azure: mdsd *waagent* *omiserver* *omiagent* hv_kvp_daemon hv_vss_daemon
# -----------------------------------------------------------------------------
# file systems and file servers
@@ -130,6 +145,7 @@ samba: smbd nmbd winbindd
nfs: rpcbind rpc.* nfs*
zfs: spl_* z_* txg_* zil_* arc_* l2arc*
btrfs: btrfs*
+iscsi: iscsid iscsi_eh
# -----------------------------------------------------------------------------
# containers & virtual machines
@@ -150,7 +166,7 @@ print: cups* lpd lpq
# -----------------------------------------------------------------------------
# time servers and clients
-time: ntp*
+time: ntp* systemd-timesyncd
# -----------------------------------------------------------------------------
# dhcp servers and clients
@@ -165,7 +181,8 @@ named: named rncd dig
# -----------------------------------------------------------------------------
# installation / compilation / debugging
-build: cc1 cc1plus as gcc* ld make automake autoconf autoreconf git valgrind*
+build: cc1 cc1plus as gcc* cppcheck ld make cmake automake autoconf autoreconf
+build: git gdb valgrind*
# -----------------------------------------------------------------------------
# antivirus
@@ -175,7 +192,7 @@ antivirus: clam* *clam
# -----------------------------------------------------------------------------
# torrent clients
-torrents: *deluge* transmission* *SickBeard*
+torrents: *deluge* transmission* *SickBeard* *CouchPotato* *rtorrent*
# -----------------------------------------------------------------------------
# backup servers and clients
@@ -185,7 +202,7 @@ backup: rsync bacula*
# -----------------------------------------------------------------------------
# cron
-cron: cron atd anacron
+cron: cron* atd anacron systemd-cron*
# -----------------------------------------------------------------------------
# UPS
@@ -193,21 +210,29 @@ cron: cron atd anacron
ups: upsmon upsd */nut/*
# -----------------------------------------------------------------------------
-# Kernel / System
+# media players, servers, clients
-system: systemd* udisks* udevd* *udevd connmand ipv6_addrconf dbus-* inetd xinetd mdadm
-kernel: kthreadd kauditd lockd khelper kdevtmpfs khungtaskd rpciod fsnotify_mark kthrotld iscsi_eh deferwq
-ksmd: ksmd
+media: mplayer vlc xine mediatomb omxplayer* kodi* xbmc* mediacenter eventlircd
+media: mpd minidlnad mt-daapd avahi* Plex*
# -----------------------------------------------------------------------------
-# media players, servers, clients
+# X
-media: mplayer vlc xine mediatomb omxplayer* kodi* xbmc* mediacenter eventlircd mpd minidlnad mt-daapd avahi*
+X: X Xorg xinit lightdm xdm pulseaudio gkrellm xfwm4 xfdesktop xfce* Thunar
+X: xfsettingsd xfconfd gnome-* gdm gconf* dconf* xfconf* *gvfs gvfs* kdm slim
+X: evolution-* firefox chromium opera epiphany WebKit*
# -----------------------------------------------------------------------------
-# X
+# Kernel / System
+
+ksmd: ksmd
+
+system: systemd* udisks* udevd* *udevd connmand ipv6_addrconf dbus-* rtkit*
+system: inetd xinetd mdadm polkitd acpid uuidd packagekitd upowerd colord
+system: accounts-daemon rngd haveged
-X: X lightdm xdm pulseaudio gkrellm xfwm4 xfdesktop xfce* Thunar xfsettingsd xfconfd gnome-* gdm gconfd-2 *gvfsd gvfsd* kdm slim
+kernel: kthreadd kauditd lockd khelper kdevtmpfs khungtaskd rpciod
+kernel: fsnotify_mark kthrotld deferwq scsi_*
# -----------------------------------------------------------------------------
# other application servers
@@ -215,5 +240,4 @@ X: X lightdm xdm pulseaudio gkrellm xfwm4 xfdesktop xfce* Thunar xfsettingsd xfc
crsproxy: crsproxy
sidekiq: *sidekiq*
java: java
-chat: irssi
ipfs: ipfs
diff --git a/conf.d/charts.d/ap.conf b/conf.d/charts.d/ap.conf
new file mode 100644
index 00000000..88a447eb
--- /dev/null
+++ b/conf.d/charts.d/ap.conf
@@ -0,0 +1,19 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# nothing fancy to configure.
+# this module will run
+# iw dev - to find wireless devices in AP mode
+# iw ${dev} station dump - to get connected clients
+# based on the above, it generates several charts
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#ap_update_every=
+
+# the charts priority on the dashboard
+#ap_priority=6900
diff --git a/conf.d/charts.d/apache.conf b/conf.d/charts.d/apache.conf
new file mode 100644
index 00000000..b82c2a7f
--- /dev/null
+++ b/conf.d/charts.d/apache.conf
@@ -0,0 +1,26 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+# the URL to download apache status info
+#apache_url="http://127.0.0.1:80/server-status?auto"
+#apache_curl_opts=
+
+# convert apache floating point values
+# to integer using this multiplier
+# this only affects precision - the values
+# will be in the proper units
+#apache_decimal_detail=1000000
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#apache_update_every=
+
+# the charts priority on the dashboard
+#apache_priority=60000
diff --git a/conf.d/charts.d/apcupsd.conf b/conf.d/charts.d/apcupsd.conf
new file mode 100644
index 00000000..f8bf7ed6
--- /dev/null
+++ b/conf.d/charts.d/apcupsd.conf
@@ -0,0 +1,19 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+#apcupsd_ip=127.0.0.1
+#apcupsd_port=3551
+
+# how long to wait for apcupsd to respond
+#apcupsd_timeout=3
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#apcupsd_update_every=10
+
+# the charts priority on the dashboard
+#apcupsd_priority=90000
diff --git a/conf.d/charts.d/cpu_apps.conf b/conf.d/charts.d/cpu_apps.conf
new file mode 100644
index 00000000..46d70362
--- /dev/null
+++ b/conf.d/charts.d/cpu_apps.conf
@@ -0,0 +1,15 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# app.plugin can do better
+
+#cpu_apps_apps=
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#cpu_apps_update_every=2
diff --git a/conf.d/charts.d/cpufreq.conf b/conf.d/charts.d/cpufreq.conf
new file mode 100644
index 00000000..4f26562e
--- /dev/null
+++ b/conf.d/charts.d/cpufreq.conf
@@ -0,0 +1,20 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+#cpufreq_sys_dir="/sys/devices"
+#cpufreq_sys_depth=10
+#cpufreq_source_update=1
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#cpufreq_update_every=
+
+# the charts priority on the dashboard
+#cpufreq_priority=10000
diff --git a/conf.d/charts.d/example.conf b/conf.d/charts.d/example.conf
new file mode 100644
index 00000000..dc4b6900
--- /dev/null
+++ b/conf.d/charts.d/example.conf
@@ -0,0 +1,17 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# to enable this chart, you have to set this to 12345
+# (just a demonstration for something that needs to be checked)
+#example_magic_number=12345
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#example_update_every=
+
+# the charts priority on the dashboard
+#example_priority=150000
diff --git a/conf.d/charts.d/exim.conf b/conf.d/charts.d/exim.conf
new file mode 100644
index 00000000..4a1464bb
--- /dev/null
+++ b/conf.d/charts.d/exim.conf
@@ -0,0 +1,20 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+# the exim command to run
+# if empty, it will use the one found in the system path
+#exim_command=
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#exim_update_every=5
+
+# the charts priority on the dashboard
+#exim_priority=60000
diff --git a/conf.d/charts.d/hddtemp.conf b/conf.d/charts.d/hddtemp.conf
new file mode 100644
index 00000000..535cb017
--- /dev/null
+++ b/conf.d/charts.d/hddtemp.conf
@@ -0,0 +1,20 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+#hddtemp_host="localhost"
+#hddtemp_port="7634"
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#hddtemp_update_every=3
+
+# the charts priority on the dashboard
+#hddtemp_priority=90000
+
diff --git a/conf.d/charts.d/load_average.conf b/conf.d/charts.d/load_average.conf
new file mode 100644
index 00000000..abbe80ca
--- /dev/null
+++ b/conf.d/charts.d/load_average.conf
@@ -0,0 +1,18 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# netdata can collect this metric already
+
+#load_average_enabled=0
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#load_average_update_every=5
+
+# the charts priority on the dashboard
+#load_priority=100
diff --git a/conf.d/charts.d/mem_apps.conf b/conf.d/charts.d/mem_apps.conf
new file mode 100644
index 00000000..aa4ac680
--- /dev/null
+++ b/conf.d/charts.d/mem_apps.conf
@@ -0,0 +1,15 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# app.plugin can do better
+
+#mem_apps_apps=
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#mem_apps_update_every=2
diff --git a/conf.d/charts.d/mysql.conf b/conf.d/charts.d/mysql.conf
new file mode 100644
index 00000000..6a0b55a4
--- /dev/null
+++ b/conf.d/charts.d/mysql.conf
@@ -0,0 +1,19 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+#mysql_cmds[name]=""
+#mysql_opts[name]=""
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#mysql_update_every=2
+
+# the charts priority on the dashboard
+#mysql_priority=60000
diff --git a/conf.d/charts.d/nginx.conf b/conf.d/charts.d/nginx.conf
new file mode 100644
index 00000000..8b88b0e3
--- /dev/null
+++ b/conf.d/charts.d/nginx.conf
@@ -0,0 +1,19 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+#nginx_url="http://127.0.0.1:80/stub_status"
+#nginx_curl_opts=""
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#nginx_update_every=
+
+# the charts priority on the dashboard
+#nginx_priority=60000
diff --git a/conf.d/charts.d/nut.conf b/conf.d/charts.d/nut.conf
new file mode 100644
index 00000000..2844849d
--- /dev/null
+++ b/conf.d/charts.d/nut.conf
@@ -0,0 +1,20 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# a space separated list of UPS names
+# if empty, the list returned by 'upsc -l' will be used
+#nut_ups=
+
+# how much time in seconds, to wait for nut to respond
+#nut_timeout=2
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#nut_update_every=2
+
+# the charts priority on the dashboard
+#nut_priority=90000
diff --git a/conf.d/charts.d/opensips.conf b/conf.d/charts.d/opensips.conf
new file mode 100644
index 00000000..abc4c70e
--- /dev/null
+++ b/conf.d/charts.d/opensips.conf
@@ -0,0 +1,17 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+#opensips_opts="fifo get_statistics all"
+#opensips_cmd=
+#opensips_timeout=2
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#opensips_update_every=5
+
+# the charts priority on the dashboard
+#opensips_priority=80000
diff --git a/conf.d/charts.d/phpfpm.conf b/conf.d/charts.d/phpfpm.conf
new file mode 100644
index 00000000..1e857638
--- /dev/null
+++ b/conf.d/charts.d/phpfpm.conf
@@ -0,0 +1,22 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+# first, you need open php-fpm status in php-fpm.conf
+# second, you need add status location in nginx.conf
+# you can see, https://easyengine.io/tutorials/php/fpm-status-page/
+#phpfpm_urls[name]=""
+#phpfpm_curl_opts[name]=""
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#phpfpm_update_every=
+
+# the charts priority on the dashboard
+#phpfpm_priority=60000
diff --git a/conf.d/charts.d/postfix.conf b/conf.d/charts.d/postfix.conf
new file mode 100644
index 00000000..7d33d266
--- /dev/null
+++ b/conf.d/charts.d/postfix.conf
@@ -0,0 +1,20 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+# the postqueue command
+# if empty, it will use the one found in the system path
+#postfix_postqueue=
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#postfix_update_every=15
+
+# the charts priority on the dashboard
+#postfix_priority=60000
diff --git a/conf.d/charts.d/sensors.conf b/conf.d/charts.d/sensors.conf
new file mode 100644
index 00000000..d42d17d2
--- /dev/null
+++ b/conf.d/charts.d/sensors.conf
@@ -0,0 +1,27 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+# the directory the kernel keeps sensor data
+#sensors_sys_dir="/sys/devices"
+
+# how deep in the tree to check for sensor data
+#sensors_sys_depth=10
+
+# if set to 1, the script will overwrite internal
+# script functions with code generated ones
+# leave to 1, is faster
+#sensors_source_update=1
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#sensors_update_every=
+
+# the charts priority on the dashboard
+#sensors_priority=90000
diff --git a/conf.d/charts.d/squid.conf b/conf.d/charts.d/squid.conf
new file mode 100644
index 00000000..cf92c124
--- /dev/null
+++ b/conf.d/charts.d/squid.conf
@@ -0,0 +1,21 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+#squid_host=
+#squid_port=
+#squid_url=
+#squid_timeout=2
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#squid_update_every=2
+
+# the charts priority on the dashboard
+#squid_priority=60000
diff --git a/conf.d/charts.d/tomcat.conf b/conf.d/charts.d/tomcat.conf
new file mode 100644
index 00000000..71066942
--- /dev/null
+++ b/conf.d/charts.d/tomcat.conf
@@ -0,0 +1,34 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+
+# THIS PLUGIN IS DEPRECATED
+# USE THE PYTHON.D ONE
+
+# the URL to download tomcat status info
+# usually http://localhost:8080/manager/status?XML=true
+#tomcat_url=""
+#tomcat_curl_opts=""
+
+# set tomcat username/password here
+#tomcat_user=""
+#tomcat_password=""
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#tomcat_update_every=1
+
+# the charts priority on the dashboard
+#tomcat_priority=60000
+
+# convert tomcat floating point values
+# to integer using this multiplier
+# this only affects precision - the values
+# will be in the proper units
+#tomcat_decimal_detail=1000000
+
+# used by volume chart to convert bytes to KB
+#tomcat_decimal_KB_detail=1000
diff --git a/conf.d/fping.conf b/conf.d/fping.conf
new file mode 100644
index 00000000..82ee2332
--- /dev/null
+++ b/conf.d/fping.conf
@@ -0,0 +1,44 @@
+# no need for shebang - this file is sourced from fping.plugin
+
+# fping.plugin requires a recent version of fping.
+#
+# You can get it on your system, by running:
+#
+# /usr/libexec/netdata/plugins.d/fping.plugin install
+
+# -----------------------------------------------------------------------------
+# configuration options
+
+# The fping binary to use. We need one that can output netdata friendly info
+# (supporting: -N). If you have multiple versions, put here the full filename
+# of the right one
+
+#fping="/usr/local/bin/fping"
+
+
+# a space separated list of hosts to fping
+# we suggest to put names here and the IPs of these names in /etc/hosts
+
+hosts=""
+
+
+# The update frequency of the chart - the default is inherited from netdata
+
+#update_every=2
+
+
+# The time in milliseconds (1 sec = 1000 ms) to ping the hosts
+# by default 5 pings per host per iteration
+# fping will now allow this to be below 20ms
+
+#ping_every="200"
+
+
+# other fping options - defaults:
+# -R = send packets with random data
+# -b 56 = the number of bytes per packet
+# -i 1 = 1 ms when sending packets to others hosts (switching hosts)
+# -r 0 = never retry packets
+# -t 5000 = per packet timeout at 5000 ms
+
+#fping_opts="-R -b 56 -i 1 -r 0 -t 5000"
diff --git a/conf.d/health.d/apache.conf b/conf.d/health.d/apache.conf
index 0aaf0e00..0c98b877 100644
--- a/conf.d/health.d/apache.conf
+++ b/conf.d/health.d/apache.conf
@@ -6,8 +6,8 @@ template: apache_last_collected_secs
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: webmaster
diff --git a/conf.d/health.d/backend.conf b/conf.d/health.d/backend.conf
new file mode 100644
index 00000000..9c193e7b
--- /dev/null
+++ b/conf.d/health.d/backend.conf
@@ -0,0 +1,45 @@
+
+# make sure we are sending data to backend
+
+ alarm: backend_last_buffering
+ on: netdata.backend_metrics
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful buffering of backend data
+ to: dba
+
+ alarm: backend_metrics_sent
+ on: netdata.backend_metrics
+ units: %
+ calc: abs($sent) * 100 / abs($buffered)
+ every: 10s
+ warn: $this != 100
+ delay: down 5m multiplier 1.5 max 1h
+ info: percentage of metrics sent to the backend server
+ to: dba
+
+ alarm: backend_metrics_lost
+ on: netdata.backend_metrics
+ units: metrics
+ calc: abs($lost)
+ every: 10s
+ crit: $this != 0
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of metrics lost due to repeating failures to contact the backend server
+ to: dba
+
+# this chart has been removed from netdata
+# alarm: backend_slow
+# on: netdata.backend_latency
+# units: %
+# calc: $latency * 100 / ($update_every * 1000)
+# every: 10s
+# warn: $this > 50
+# crit: $this > 100
+# delay: down 5m multiplier 1.5 max 1h
+# info: the percentage of time between iterations needed by the backend time to process the data sent by netdata
+# to: dba
diff --git a/conf.d/health.d/bind_rndc.conf b/conf.d/health.d/bind_rndc.conf
new file mode 100644
index 00000000..028bc9d0
--- /dev/null
+++ b/conf.d/health.d/bind_rndc.conf
@@ -0,0 +1,9 @@
+ alarm: bind_rndc_stats_file_size
+ on: bind_rndc.stats_size
+ units: megabytes
+ every: 60
+ calc: $stats_size
+ warn: $this > 512
+ crit: $this > 1024
+ info: Bind stats file is very large! Consider to create logrotate conf file for it!
+ to: sysadmin
diff --git a/conf.d/health.d/cpu.conf b/conf.d/health.d/cpu.conf
index 4d79fc79..60f494d7 100644
--- a/conf.d/health.d/cpu.conf
+++ b/conf.d/health.d/cpu.conf
@@ -4,8 +4,8 @@ template: 10min_cpu_usage
lookup: average -10m unaligned of user,system,nice,softirq,irq,guest,guest_nice
units: %
every: 1m
- warn: $this > (($status >= $WARNING) ? (70) : (80))
- crit: $this > (($status == $CRITICAL) ? (80) : (90))
+ warn: $this > (($status >= $WARNING) ? (75) : (85))
+ crit: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
info: average cpu utilization for the last 10 minutes
to: sysadmin
@@ -15,8 +15,8 @@ template: 10min_cpu_iowait
lookup: average -10m unaligned of iowait
units: %
every: 1m
- warn: $this > (($status >= $WARNING) ? (5) : (10))
- crit: $this > (($status == $CRITICAL) ? (20) : (30))
+ warn: $this > (($status >= $WARNING) ? (20) : (40))
+ crit: $this > (($status == $CRITICAL) ? (40) : (50))
delay: down 15m multiplier 1.5 max 1h
info: average CPU wait I/O for the last 10 minutes
to: sysadmin
@@ -28,6 +28,6 @@ template: 20min_steal_cpu
every: 5m
warn: $this > (($status >= $WARNING) ? (5) : (10))
crit: $this > (($status == $CRITICAL) ? (20) : (30))
- delay: down 15m multiplier 1.5 max 1h
+ delay: down 1h multiplier 1.5 max 2h
info: average CPU steal time for the last 20 minutes
to: sysadmin
diff --git a/conf.d/health.d/disks.conf b/conf.d/health.d/disks.conf
index cc7a4766..0549bac2 100644
--- a/conf.d/health.d/disks.conf
+++ b/conf.d/health.d/disks.conf
@@ -4,11 +4,12 @@
# for mount points
template: disk_space_last_collected_secs
on: disk.space
+families: *
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection of the mount point
to: sysadmin
@@ -16,11 +17,12 @@ template: disk_space_last_collected_secs
# for block devices
template: disk_last_collected_secs
on: disk.io
+families: *
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection of the block device
to: sysadmin
@@ -35,22 +37,24 @@ template: disk_last_collected_secs
template: disk_space_usage
on: disk.space
+families: *
calc: $used * 100 / ($avail + $used)
units: %
every: 1m
- warn: $this > (($status >= $WARNING ) ? (70) : (80))
- crit: $this > (($status == $CRITICAL) ? (85) : (95))
+ warn: $this > (($status >= $WARNING ) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: up 1m down 15m multiplier 1.5 max 1h
info: current disk space usage
to: sysadmin
template: disk_inode_usage
on: disk.inodes
+families: *
calc: $used * 100 / ($avail + $used)
units: %
every: 1m
- warn: $this > (($status >= $WARNING) ? (75) : (80))
- crit: $this > (($status == $CRITICAL) ? (90) : (95))
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: up 1m down 15m multiplier 1.5 max 1h
info: current disk inode usage
to: sysadmin
@@ -69,6 +73,7 @@ template: disk_inode_usage
template: disk_fill_rate
on: disk.space
+families: *
lookup: min -10m at -50m unaligned of avail
calc: ($this - $avail) / (($now - $after) / 3600)
every: 1m
@@ -82,7 +87,8 @@ template: disk_fill_rate
template: out_of_disk_space_time
on: disk.space
- calc: $avail / $disk_fill_rate
+families: *
+ calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (0)
units: hours
every: 10s
warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
@@ -101,6 +107,7 @@ template: out_of_disk_space_time
template: 10min_disk_utilization
on: disk.util
+families: *
lookup: average -10m unaligned
units: %
every: 1m
@@ -120,6 +127,7 @@ template: 10min_disk_utilization
template: 10min_disk_backlog
on: disk.backlog
+families: *
lookup: average -10m unaligned
units: ms
every: 1m
diff --git a/conf.d/health.d/elasticsearch.conf b/conf.d/health.d/elasticsearch.conf
new file mode 100644
index 00000000..dffd4096
--- /dev/null
+++ b/conf.d/health.d/elasticsearch.conf
@@ -0,0 +1,9 @@
+ alarm: elasticsearch_last_collected
+ on: elasticsearch_local.cluster_health_status
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ info: number of seconds since the last successful data collection
+ to: sysadmin
diff --git a/conf.d/health.d/entropy.conf b/conf.d/health.d/entropy.conf
index d0eca8a6..5dd8af50 100644
--- a/conf.d/health.d/entropy.conf
+++ b/conf.d/health.d/entropy.conf
@@ -3,12 +3,12 @@
# the alarm is checked every 1 minute
# and examines the last hour of data
- alarm: 1hour_lowest_entropy
+ alarm: lowest_entropy
on: system.entropy
- lookup: min -1h unaligned
+ lookup: min -10m unaligned
units: entries
every: 5m
warn: $this < (($status >= $WARNING) ? (200) : (100))
- delay: down 1h multiplier 1.5 max 1h
- info: minimum entries in the random numbers pool in the last 30 minutes
+ delay: down 1h multiplier 1.5 max 2h
+ info: minimum entries in the random numbers pool in the last 10 minutes
to: silent
diff --git a/conf.d/health.d/haproxy.conf b/conf.d/health.d/haproxy.conf
new file mode 100644
index 00000000..e49c70d4
--- /dev/null
+++ b/conf.d/health.d/haproxy.conf
@@ -0,0 +1,27 @@
+template: haproxy_backend_server_status
+ on: haproxy_hs.down
+ units: failed servers
+ every: 10s
+ lookup: average -10s
+ crit: $this > 0
+ info: number of failed haproxy backend servers
+ to: sysadmin
+
+template: haproxy_backend_status
+ on: haproxy_hb.down
+ units: failed backend
+ every: 10s
+ lookup: average -10s
+ crit: $this > 0
+ info: number of failed haproxy backends
+ to: sysadmin
+
+template: haproxy_last_collected
+ on: haproxy_hb.down
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ info: number of seconds since the last successful data collection
+ to: sysadmin
diff --git a/conf.d/health.d/ipc.conf b/conf.d/health.d/ipc.conf
new file mode 100644
index 00000000..ee7c4bad
--- /dev/null
+++ b/conf.d/health.d/ipc.conf
@@ -0,0 +1,22 @@
+
+ alarm: semaphores_used
+ on: system.ipc_semaphores
+ calc: $semaphores * 100 / $ipc.semaphores.max
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (70) : (80))
+ crit: $this > (($status == $CRITICAL) ? (70) : (90))
+ delay: down 5m multiplier 1.5 max 1h
+ info: the percentage of IPC semaphores used
+ to: sysadmin
+
+ alarm: semaphore_arrays_used
+ on: system.ipc_semaphore_arrays
+ calc: $arrays * 100 / $ipc.semaphores.arrays.max
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (70) : (80))
+ crit: $this > (($status == $CRITICAL) ? (70) : (90))
+ delay: down 5m multiplier 1.5 max 1h
+ info: the percentage of IPC semaphore arrays used
+ to: sysadmin
diff --git a/conf.d/health.d/ipfs.conf b/conf.d/health.d/ipfs.conf
new file mode 100644
index 00000000..3f77572d
--- /dev/null
+++ b/conf.d/health.d/ipfs.conf
@@ -0,0 +1,11 @@
+
+template: ipfs_datastore_usage
+ on: ipfs.repo_size
+ calc: $size * 100 / $avail
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
+ delay: down 15m multiplier 1.5 max 1h
+ info: ipfs Datastore close to running out of space
+ to: sysadmin
diff --git a/conf.d/health.d/isc_dhcpd.conf b/conf.d/health.d/isc_dhcpd.conf
new file mode 100644
index 00000000..4345619a
--- /dev/null
+++ b/conf.d/health.d/isc_dhcpd.conf
@@ -0,0 +1,10 @@
+ alarm: isc_dhcpd_parse_time
+ on: isc_dhcpd.parse_time
+ units: ms
+ every: 60
+ calc: $ptime
+ warn: $this > 100
+ crit: $this > 250
+ delay: up 2m down 5m
+ info: Parsing too slow! It can slow down your server. Check dhcpd.leases file size.
+ to: sysadmin
diff --git a/conf.d/health.d/mdstat.conf b/conf.d/health.d/mdstat.conf
new file mode 100644
index 00000000..c9e7d20d
--- /dev/null
+++ b/conf.d/health.d/mdstat.conf
@@ -0,0 +1,18 @@
+template: mdstat_disks
+ on: md.disks
+ units: failed devices
+ every: 10s
+ calc: $total - $inuse
+ crit: $this > 0
+ info: Array is degraded!
+ to: sysadmin
+
+template: mdstat_last_collected
+ on: md.disks
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ info: number of seconds since the last successful data collection
+ to: sysadmin
diff --git a/conf.d/health.d/memcached.conf b/conf.d/health.d/memcached.conf
index 46a8ca0e..7917e36a 100644
--- a/conf.d/health.d/memcached.conf
+++ b/conf.d/health.d/memcached.conf
@@ -6,8 +6,8 @@ template: memcached_last_collected_secs
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: dba
@@ -42,7 +42,7 @@ template: cache_fill_rate
template: out_of_cache_space_time
on: memcached.cache
- calc: $available / $cache_fill_rate
+ calc: ($cache_fill_rate > 0) ? ($available / $cache_fill_rate) : (0)
units: hours
every: 10s
warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
diff --git a/conf.d/health.d/memory.conf b/conf.d/health.d/memory.conf
new file mode 100644
index 00000000..3c904f6b
--- /dev/null
+++ b/conf.d/health.d/memory.conf
@@ -0,0 +1,30 @@
+
+ alarm: 1hour_ecc_memory_correctable
+ on: mem.ecc_ce
+ lookup: sum -10m unaligned
+ units: errors
+ every: 1m
+ warn: $this > 0
+ delay: down 1h multiplier 1.5 max 1h
+ info: number of ECC correctable errors during the last hour
+ to: sysadmin
+
+ alarm: 1hour_ecc_memory_uncorrectable
+ on: mem.ecc_ue
+ lookup: sum -10m unaligned
+ units: errors
+ every: 1m
+ crit: $this > 0
+ delay: down 1h multiplier 1.5 max 1h
+ info: number of ECC uncorrectable errors during the last hour
+ to: sysadmin
+
+ alarm: 1hour_memory_hw_corrupted
+ on: mem.hwcorrupt
+ calc: $HardwareCorrupted
+ units: MB
+ every: 10s
+ warn: $this > 0
+ delay: down 1h multiplier 1.5 max 1h
+ info: amount of memory corrupted due to a hardware failure
+ to: sysadmin
diff --git a/conf.d/health.d/mysql.conf b/conf.d/health.d/mysql.conf
index a2cfa3ec..78773e5b 100644
--- a/conf.d/health.d/mysql.conf
+++ b/conf.d/health.d/mysql.conf
@@ -6,8 +6,80 @@ template: mysql_last_collected_secs
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: dba
+
+
+# -----------------------------------------------------------------------------
+# slow queries
+
+template: mysql_10s_slow_queries
+ on: mysql.queries
+ lookup: sum -10s of slow_queries
+ units: slow queries
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (5) : (10))
+ crit: $this > (($status == $CRITICAL) ? (10) : (20))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of mysql slow queries over the last 10 seconds
+ to: dba
+
+
+# -----------------------------------------------------------------------------
+# lock waits
+
+template: mysql_10s_table_locks_immediate
+ on: mysql.table_locks
+ lookup: sum -10s absolute of immediate
+ units: immediate locks
+ every: 10s
+ info: number of table immediate locks over the last 10 seconds
+ to: dba
+
+template: mysql_10s_table_locks_waited
+ on: mysql.table_locks
+ lookup: sum -10s absolute of waited
+ units: waited locks
+ every: 10s
+ info: number of table waited locks over the last 10 seconds
+ to: dba
+
+template: mysql_10s_waited_locks_ratio
+ on: mysql.table_locks
+ calc: ($mysql_10s_table_locks_waited * 100) / ($mysql_10s_table_locks_waited + $mysql_10s_table_locks_immediate)
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (10) : (25))
+ crit: $this > (($status == $CRITICAL) ? (25) : (50))
+ delay: down 30m multiplier 1.5 max 1h
+ info: the ratio of mysql waited table locks, for the last 10 seconds
+ to: dba
+
+
+# -----------------------------------------------------------------------------
+# replication
+
+template: mysql_replication
+ on: mysql.slave_status
+ calc: ($sql_running == -1 OR $io_running == -1)?0:1
+ units: status
+ every: 10s
+ crit: $this == 0
+ delay: down 5m multiplier 1.5 max 1h
+ info: checks if mysql replication has stopped
+ to: dba
+
+template: mysql_replication_lag
+ on: mysql.slave_behind
+ calc: $seconds
+ units: seconds
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (5) : (10))
+ crit: $this > (($status == $CRITICAL) ? (10) : (30))
+ delay: down 15m multiplier 1.5 max 1h
+ info: the number of seconds mysql replication is behind this master
+ to: dba
+
diff --git a/conf.d/health.d/named.conf b/conf.d/health.d/named.conf
index f2eaa83c..4fc65c8e 100644
--- a/conf.d/health.d/named.conf
+++ b/conf.d/health.d/named.conf
@@ -6,8 +6,8 @@ template: named_last_collected_secs
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: domainadmin
diff --git a/conf.d/health.d/net.conf b/conf.d/health.d/net.conf
index 7753aa18..924acccc 100644
--- a/conf.d/health.d/net.conf
+++ b/conf.d/health.d/net.conf
@@ -3,46 +3,119 @@
template: interface_last_collected_secs
on: net.net
+families: *
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: sysadmin
# -----------------------------------------------------------------------------
+# dropped packets
# check if an interface is dropping packets
# the alarm is checked every 1 minute
-# and examines the last hour of data
+# and examines the last 10 minutes of data
-template: 1hour_packet_drops
+template: inbound_packets_dropped
on: net.drops
- lookup: sum -1h unaligned absolute
+families: *
+ lookup: sum -10m unaligned absolute of inbound
units: packets
every: 1m
warn: $this > 0
- delay: down 30m multiplier 1.5 max 1h
- info: interface dropped packets in the last hour
+ delay: down 1h multiplier 1.5 max 2h
+ info: interface inbound dropped packets in the last 10 minutes
+ to: sysadmin
+
+template: outbound_packets_dropped
+ on: net.drops
+families: *
+ lookup: sum -10m unaligned absolute of outbound
+ units: packets
+ every: 1m
+ warn: $this > 0
+ delay: down 1h multiplier 1.5 max 2h
+ info: interface outbound dropped packets in the last 10 minutes
+ to: sysadmin
+
+template: inbound_packets_dropped_ratio
+ on: net.packets
+families: *
+ lookup: sum -10m unaligned absolute of received
+ calc: (($inbound_packets_dropped != nan AND $this > 0) ? ($inbound_packets_dropped * 100 / $this) : (0))
+ units: %
+ every: 1m
+ warn: $this > 0.5
+ crit: $this > 3
+ delay: down 1h multiplier 1.5 max 2h
+ info: the ratio of inbound dropped packets vs the total number of received packets of the network interface, during the last 10 minutes
+ to: sysadmin
+
+template: outbound_packets_dropped_ratio
+ on: net.packets
+families: *
+ lookup: sum -10m unaligned absolute of sent
+ calc: (($outbound_packets_dropped != nan AND $this > 0) ? ($outbound_packets_dropped * 100 / $this) : (0))
+ units: %
+ every: 1m
+ warn: $this > 0.5
+ crit: $this > 3
+ delay: down 1h multiplier 1.5 max 2h
+ info: the ratio of outbound dropped packets vs the total number of sent packets of the network interface, during the last 10 minutes
to: sysadmin
# -----------------------------------------------------------------------------
+# FIFO errors
# check if an interface is having FIFO
# buffer errors
# the alarm is checked every 1 minute
-# and examines the last hour of data
+# and examines the last 10 minutes of data
-template: 1hour_fifo_errors
+template: 10min_fifo_errors
on: net.fifo
- lookup: sum -1h unaligned absolute
+families: *
+ lookup: sum -10m unaligned absolute
units: errors
every: 1m
warn: $this > 0
- delay: down 30m multiplier 1.5 max 1h
- info: interface fifo errors in the last hour
+ delay: down 1h multiplier 1.5 max 2h
+ info: interface fifo errors in the last 10 minutes
to: sysadmin
+
+
+# -----------------------------------------------------------------------------
+# check for packet storms
+
+# 1. calculate the rate packets are received in 1m: 1m_received_packets_rate
+# 2. do the same for the last 10s
+# 3. raise an alarm if the later is 10x or 20x the first
+# we assume the minimum packet storm should at least have
+# 10000 packets/s, average of the last 10 seconds
+
+template: 1m_received_packets_rate
+ on: net.packets
+families: *
+ lookup: average -1m of received
+ units: packets
+ every: 10s
+ info: the average number of packets received during the last minute
+
+template: 10s_received_packets_storm
+ on: net.packets
+families: *
+ lookup: average -10s of received
+ calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate))
+ every: 10s
+ units: %
+ warn: $this > (($status >= $WARNING)?(200):(1000))
+ crit: $this > (($status >= $WARNING)?(1000):(2000))
+ info: the % of the rate of received packets in the last 10 seconds, compared to the rate of the last minute
+ to: silent
+
diff --git a/conf.d/health.d/netfilter.conf b/conf.d/health.d/netfilter.conf
new file mode 100644
index 00000000..3dd6a67b
--- /dev/null
+++ b/conf.d/health.d/netfilter.conf
@@ -0,0 +1,23 @@
+
+ alarm: netfilter_last_collected_secs
+ on: netfilter.conntrack_sockets
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sysadmin
+
+ alarm: netfilter_conntrack_full
+ on: netfilter.conntrack_sockets
+ lookup: max -10s unaligned of connections
+ calc: $this * 100 / $netfilter.conntrack.max
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (70) : (80))
+ crit: $this > (($status == $CRITICAL) ? (80) : (90))
+ delay: down 5m multiplier 1.5 max 1h
+ info: the number of connections tracked by the netfilter connection tracker, as a percentage of the connection tracker table size
+ to: sysadmin
diff --git a/conf.d/health.d/nginx.conf b/conf.d/health.d/nginx.conf
index d70d6a59..a686c3d9 100644
--- a/conf.d/health.d/nginx.conf
+++ b/conf.d/health.d/nginx.conf
@@ -6,8 +6,8 @@ template: nginx_last_collected_secs
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: webmaster
diff --git a/conf.d/health.d/postgres.conf b/conf.d/health.d/postgres.conf
new file mode 100644
index 00000000..4e0583b8
--- /dev/null
+++ b/conf.d/health.d/postgres.conf
@@ -0,0 +1,13 @@
+
+# make sure postgres is running
+
+template: postgres_last_collected_secs
+ on: postgres.db_stat_transactions
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: dba
diff --git a/conf.d/health.d/ram.conf b/conf.d/health.d/ram.conf
index 216b82fe..d60df75b 100644
--- a/conf.d/health.d/ram.conf
+++ b/conf.d/health.d/ram.conf
@@ -4,8 +4,8 @@
calc: $used * 100 / ($used + $cached + $free)
units: %
every: 10s
- warn: $this > (($status >= $WARNING) ? (70) : (80))
- crit: $this > (($status == $CRITICAL) ? (80) : (90))
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
info: system RAM usage
to: sysadmin
diff --git a/conf.d/health.d/redis.conf b/conf.d/health.d/redis.conf
index 3e648d85..5f6d397e 100644
--- a/conf.d/health.d/redis.conf
+++ b/conf.d/health.d/redis.conf
@@ -6,8 +6,8 @@ template: redis_last_collected_secs
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: dba
diff --git a/conf.d/health.d/retroshare.conf b/conf.d/health.d/retroshare.conf
index 1af7b468..2344b60e 100644
--- a/conf.d/health.d/retroshare.conf
+++ b/conf.d/health.d/retroshare.conf
@@ -5,8 +5,8 @@ template: retroshare_last_collected_secs
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: sysadmin
diff --git a/conf.d/health.d/softnet.conf b/conf.d/health.d/softnet.conf
index 0c3709f4..5faf9a9e 100644
--- a/conf.d/health.d/softnet.conf
+++ b/conf.d/health.d/softnet.conf
@@ -1,21 +1,21 @@
# check for common /proc/net/softnet_stat errors
- alarm: 1hour_netdev_backlog_exceeded
+ alarm: 10min_netdev_backlog_exceeded
on: system.softnet_stat
- lookup: sum -1h unaligned absolute of dropped
+ lookup: sum -10m unaligned absolute of dropped
units: packets
every: 1m
warn: $this > 0
- delay: down 30m multiplier 1.5 max 1h
- info: number of packets dropped because sysctl net.core.netdev_max_backlog was exceeded (this can be a cause for dropped packets)
+ delay: down 1h multiplier 1.5 max 2h
+ info: number of packets dropped in the last 10min, because sysctl net.core.netdev_max_backlog was exceeded (this can be a cause for dropped packets)
to: sysadmin
- alarm: 1hour_netdev_budget_ran_outs
+ alarm: 10min_netdev_budget_ran_outs
on: system.softnet_stat
- lookup: sum -1h unaligned absolute of squeezed
+ lookup: sum -10m unaligned absolute of squeezed
units: events
every: 1m
- warn: $this > 0
- delay: down 30m multiplier 1.5 max 1h
- info: number of times ksoftirq ran out of sysctl net.core.netdev_budget or time slice, with work remaining (this can be a cause for dropped packets)
+ warn: $this > (($status >= $WARNING) ? (0) : (10))
+ delay: down 1h multiplier 1.5 max 2h
+ info: number of times, during the last 10min, ksoftirq ran out of sysctl net.core.netdev_budget or time slice, with work remaining (this can be a cause for dropped packets)
to: silent
diff --git a/conf.d/health.d/squid.conf b/conf.d/health.d/squid.conf
index 76143c5d..06cc9678 100644
--- a/conf.d/health.d/squid.conf
+++ b/conf.d/health.d/squid.conf
@@ -6,8 +6,8 @@ template: squid_last_collected_secs
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: proxyadmin
diff --git a/conf.d/health.d/swap.conf b/conf.d/health.d/swap.conf
index 0cfa888c..7f57560e 100644
--- a/conf.d/health.d/swap.conf
+++ b/conf.d/health.d/swap.conf
@@ -6,13 +6,13 @@
calc: $this / 1024 * 100 / ( $system.ram.used + $system.ram.cached + $system.ram.free )
units: % of RAM
every: 1m
- warn: $this > (($status >= $WARNING) ? (5) : (10))
- crit: $this > (($status == $CRITICAL) ? (15) : (20))
+ warn: $this > (($status >= $WARNING) ? (10) : (20))
+ crit: $this > (($status == $CRITICAL) ? (20) : (30))
delay: up 0 down 15m multiplier 1.5 max 1h
info: the amount of memory swapped in the last 30 minutes, as a percentage of the system RAM
to: sysadmin
- alarm: used_swap_space
+ alarm: ram_in_swap
on: system.swap
calc: $used * 100 / ( $system.ram.used + $system.ram.cached + $system.ram.free )
units: % of RAM
@@ -22,3 +22,14 @@
delay: up 0 down 15m multiplier 1.5 max 1h
info: the swap memory used, as a percentage of the system RAM
to: sysadmin
+
+ alarm: used_swap
+ on: system.swap
+ calc: $used * 100 / ( $used + $free )
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
+ delay: up 0 down 15m multiplier 1.5 max 1h
+ info: the percentage of swap memory used
+ to: sysadmin
diff --git a/conf.d/health.d/tcp_resets.conf b/conf.d/health.d/tcp_resets.conf
index 8e93c479..daf24a1c 100644
--- a/conf.d/health.d/tcp_resets.conf
+++ b/conf.d/health.d/tcp_resets.conf
@@ -5,28 +5,48 @@
calc: $now - $last_collected_t
units: seconds ago
every: 10s
- warn: $this > (($status >= $WARNING) ? (0) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? (0) : (60 * $update_every))
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
delay: up 0 down 5m multiplier 1.5 max 1h
info: number of seconds since the last successful data collection
to: sysadmin
# -----------------------------------------------------------------------------
+# tcp resets this host sends
- alarm: 1m_ipv4_tcp_resets
+ alarm: 1m_ipv4_tcp_resets_sent
on: ipv4.tcphandshake
lookup: average -1m at -10s unaligned absolute of OutRsts
units: tcp resets/s
every: 10s
info: average TCP RESETS this host is sending, over the last minute
- alarm: 10s_ipv4_tcp_resets
+ alarm: 10s_ipv4_tcp_resets_sent
on: ipv4.tcphandshake
lookup: average -10s unaligned absolute of OutRsts
units: tcp resets/s
every: 10s
- warn: $this > ((($1m_ipv4_tcp_resets < 5)?(5):($1m_ipv4_tcp_resets)) * (($status >= $WARNING) ? (1) : (4)))
+ warn: $this > ((($1m_ipv4_tcp_resets_sent < 5)?(5):($1m_ipv4_tcp_resets_sent)) * (($status >= $WARNING) ? (1) : (4)))
delay: up 0 down 60m multiplier 1.2 max 2h
info: average TCP RESETS this host is sending, over the last 10 seconds (this can be an indication that a port scan is made, or that a service running on this host has crashed)
- to: sysadmin
+ to: silent
+
+# -----------------------------------------------------------------------------
+# tcp resets this host receives
+
+ alarm: 1m_ipv4_tcp_resets_received
+ on: ipv4.tcphandshake
+ lookup: average -1m at -10s unaligned absolute of AttemptFails
+ units: tcp resets/s
+ every: 10s
+ info: average TCP RESETS this host is sending, over the last minute
+ alarm: 10s_ipv4_tcp_resets_received
+ on: ipv4.tcphandshake
+ lookup: average -10s unaligned absolute of AttemptFails
+ units: tcp resets/s
+ every: 10s
+ warn: $this > ((($1m_ipv4_tcp_resets_received < 5)?(5):($1m_ipv4_tcp_resets_received)) * (($status >= $WARNING) ? (1) : (4)))
+ delay: up 0 down 60m multiplier 1.2 max 2h
+ info: average TCP RESETS this host is receiving, over the last 10 seconds (this can be an indication that a service this host needs, has crashed)
+ to: silent
diff --git a/conf.d/health.d/udp_errors.conf b/conf.d/health.d/udp_errors.conf
new file mode 100644
index 00000000..98e955c0
--- /dev/null
+++ b/conf.d/health.d/udp_errors.conf
@@ -0,0 +1,40 @@
+# -----------------------------------------------------------------------------
+
+ alarm: ipv4_udperrors_last_collected_secs
+ on: ipv4.udperrors
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: up 0 down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sysadmin
+
+# -----------------------------------------------------------------------------
+# UDP receive buffer errors
+
+ alarm: 1m_ipv4_udp_receive_buffer_errors
+ on: ipv4.udperrors
+ lookup: sum -1m unaligned absolute of RcvbufErrors
+ units: errors
+ every: 10s
+ warn: $this > 0
+ crit: $this > 100
+ info: number of UDP receive buffer errors during the last minute
+ delay: up 0 down 60m multiplier 1.2 max 2h
+ to: sysadmin
+
+# -----------------------------------------------------------------------------
+# UDP send buffer errors
+
+ alarm: 1m_ipv4_udp_send_buffer_errors
+ on: ipv4.udperrors
+ lookup: sum -1m unaligned absolute of SndbufErrors
+ units: errors
+ every: 10s
+ warn: $this > 0
+ crit: $this > 100
+ info: number of UDP send buffer errors during the last minute
+ delay: up 0 down 60m multiplier 1.2 max 2h
+ to: sysadmin
diff --git a/conf.d/health.d/varnish.conf b/conf.d/health.d/varnish.conf
new file mode 100644
index 00000000..cca7446b
--- /dev/null
+++ b/conf.d/health.d/varnish.conf
@@ -0,0 +1,9 @@
+ alarm: varnish_last_collected
+ on: varnish.uptime
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ info: number of seconds since the last successful data collection
+ to: sysadmin
diff --git a/conf.d/health_alarm_notify.conf b/conf.d/health_alarm_notify.conf
index d451cafe..b838e802 100644
--- a/conf.d/health_alarm_notify.conf
+++ b/conf.d/health_alarm_notify.conf
@@ -8,6 +8,9 @@
# - push notifications to your mobile phone (pushover.net),
# - messages to your slack team (slack.com),
# - messages to your telegram chat / group chat (telegram.org)
+# - sms messages to your cell phone or any sms enabled device (twilio.com)
+# - sms messages to your cell phone or any sms enabled device (messagebird.com)
+# - notifications to users on pagerduty.com
#
# The 'to' line given at netdata alarms defines a *role*, so that many
# people can be notified for each role.
@@ -15,16 +18,16 @@
# This file is a BASH script itself.
#
#
-###############################################################################
+#------------------------------------------------------------------------------
# proxy configuration
#
-# If you need to send curl based notifications (pushover, slack, telegram)
-# via a proxy, set these to your proxy address:
+# If you need to send curl based notifications (pushover, pushbullet, slack,
+# telegram) via a proxy, set these to your proxy address:
#export http_proxy="http://10.0.0.1:3128/"
#export https_proxy="http://10.0.0.1:3128/"
-###############################################################################
+#------------------------------------------------------------------------------
# notifications images
#
# Images in notifications need to be downloaded from an Internet facing site.
@@ -36,7 +39,7 @@
#images_base_url="http://my.public.netdata.server:19999"
-###############################################################################
+#------------------------------------------------------------------------------
# external commands
# The full path to the sendmail command.
@@ -46,12 +49,11 @@ sendmail=""
# The full path of the curl command.
# If empty, the system $PATH will be searched for it.
-# If not found, pushover, telegram and slack notifications will be
-# silently disabled.
+# If not found, most notifications will be silently disabled.
curl=""
-###############################################################################
+#------------------------------------------------------------------------------
# NOTE ABOUT RECIPIENTS
#
# When you define recipients (all types):
@@ -60,24 +62,30 @@ curl=""
# - pushover user tokens
# - telegram chat ids
# - slack channels
+# - hipchat rooms
+# - sms phone numbers
+# - pagerduty.com (pd) services
#
# You can append |critical to limit the notifications to be sent.
#
# In these examples, the first recipient receives all the alarms
# while the second one receives only the critical ones:
#
-# email : "user1@example.com user2@example.com|critical"
-# pushover: "2987343...9437837 8756278...2362736|critical"
-# telegram: "111827421 112746832|critical"
-# slack : "alarms disasters|critical"
+# email : "user1@example.com user2@example.com|critical"
+# pushover : "2987343...9437837 8756278...2362736|critical"
+# telegram : "111827421 112746832|critical"
+# slack : "alarms disasters|critical"
+# twilio : "+15555555555 +17777777777|critical"
+# messagebird: "+15555555555 +17777777777|critical"
+# pd : "<pd_service_key_1> <pd_service_key_2>|critical"
#
# If a recipient is set to empty string, the default recipient of the given
-# notification method (email, pushover, telegram, slack) will be used.
+# notification method (email, pushover, telegram, slack, etc) will be used.
# To disable a notification, use the recipient called: disabled
# This works for all notification methods (including the default recipients).
-###############################################################################
+#------------------------------------------------------------------------------
# email global notification options
# multiple recipients can be given like this:
@@ -91,7 +99,7 @@ DEFAULT_RECIPIENT_EMAIL="root"
# to receive only critical alarms, set it to "root|critical"
-###############################################################################
+#------------------------------------------------------------------------------
# pushover (pushover.net) global notification options
# multiple recipients can be given like this:
@@ -112,14 +120,79 @@ PUSHOVER_APP_TOKEN=""
DEFAULT_RECIPIENT_PUSHOVER=""
-###############################################################################
+#------------------------------------------------------------------------------
+# pushbullet (pushbullet.com) push notification options
+
+# multiple recipients can be given like this:
+# "user1@email.com user2@mail.com"
+
+# enable/disable sending pushbullet notifications
+SEND_PUSHBULLET="YES"
+
+# Signup and Login to pushbullet.com
+# To get your Access Token, go to https://www.pushbullet.com/#settings/account
+# Create a new access token and paste it below.
+# Then just set the recipients' emails.
+# Please note that the if the email in the DEFAULT_RECIPIENT_PUSHBULLET does
+# not have a pushbullet account, the pushbullet service will send an email
+# to that address instead.
+
+# Without an access token, netdata cannot send pushbullet notifications.
+PUSHBULLET_ACCESS_TOKEN=""
+DEFAULT_RECIPIENT_PUSHBULLET=""
+
+
+#------------------------------------------------------------------------------
+# Twilio (twilio.com) SMS options
+
+# multiple recipients can be given like this:
+# "+15555555555 +17777777777"
+
+# enable/disable sending twilio SMS
+SEND_TWILIO="YES"
+
+# Signup for free trial and select a SMS capable Twilio Number
+# To get your Account SID and Token, go to https://www.twilio.com/console
+# Place your sid, token and number below.
+# Then just set the recipients' phone numbers.
+# The trial account is only allowed to use the number specified when set up.
+
+# Without an account sid and token, netdata cannot send Twilio text messages.
+TWILIO_ACCOUNT_SID=""
+TWILIO_ACCOUNT_TOKEN=""
+TWILIO_NUMBER=""
+DEFAULT_RECIPIENT_TWILIO=""
+
+
+#------------------------------------------------------------------------------
+# Messagebird (messagebird.com) SMS options
+
+# multiple recipients can be given like this:
+# "+15555555555 +17777777777"
+
+# enable/disable sending messagebird SMS
+SEND_MESSAGEBIRD="YES"
+
+# to get an access key, create a free account at https://www.messagebird.com
+# verify and activate the account (no CC info needed)
+# login to your account and enter your phonenumber to get some free credits
+# to get the API key, click on 'API' in the sidebar, then 'API Access (REST)'
+# click 'Add access key' and fill in data (you want a live key to send SMS)
+
+# Without an access key, netdata cannot send Messagebird text messages.
+MESSAGEBIRD_ACCESS_KEY=""
+MESSAGEBIRD_NUMBER=""
+DEFAULT_RECIPIENT_MESSAGEBIRD=""
+
+
+#------------------------------------------------------------------------------
# telegram (telegram.org) global notification options
# To get your chat ID send the command /my_id to telegram bot @get_id.
# Users also need to open a query with the bot (see below).
# note: multiple recipients can be given like this:
-# "CHAT_ID_1 CHAT_ID_1 ..."
+# "CHAT_ID_1 CHAT_ID_2 ..."
# enable/disable sending telegram messages
SEND_TELEGRAM="YES"
@@ -133,7 +206,7 @@ TELEGRAM_BOT_TOKEN=""
DEFAULT_RECIPIENT_TELEGRAM=""
-###############################################################################
+#------------------------------------------------------------------------------
# slack (slack.com) global notification options
# multiple recipients can be given like this:
@@ -154,6 +227,57 @@ SLACK_WEBHOOK_URL=""
DEFAULT_RECIPIENT_SLACK=""
+#------------------------------------------------------------------------------
+# hipchat global notification options
+
+# multiple recipients can be given like this:
+# "ROOM1 ROOM2 ..."
+
+# enable/disable sending hipchat notifications
+SEND_HIPCHAT="YES"
+
+# api.hipchat.com authorization token
+# Without this, netdata cannot send hipchat notifications.
+HIPCHAT_AUTH_TOKEN=""
+
+# if a role's recipients are not configured, a notification will be send to
+# this hipchat room (empty = do not send a notification for unconfigured
+# roles):
+DEFAULT_RECIPIENT_HIPCHAT=""
+
+
+#------------------------------------------------------------------------------
+# kafka notification options
+
+# enable/disable sending kafka notifications
+SEND_KAFKA="YES"
+
+# The URL to POST kafka alarm data to. It should be the full URL.
+KAFKA_URL=""
+
+# The IP to be used in the kafka message as the sender.
+KAFKA_SENDER_IP=""
+
+
+#------------------------------------------------------------------------------
+# pagerduty.com notification options
+#
+# pagerduty.com notifications require the pagerduty agent to be installed and
+# a "Generic API" pagerduty service.
+# https://www.pagerduty.com/docs/guides/agent-install-guide/
+
+# multiple recipients can be given like this:
+# "<pd_service_key_1> <pd_service_key_2> ..."
+
+# enable/disable sending pagerduty notifications
+SEND_PD="YES"
+
+# if a role's recipients are not configured, a notification will be sent to
+# the "General API" pagerduty.com service that uses this service key.
+# (empty = do not send a notification for unconfigured roles):
+DEFAULT_RECIPIENT_PD=""
+
+
###############################################################################
# RECIPIENTS PER ROLE
@@ -165,10 +289,19 @@ role_recipients_email[sysadmin]="${DEFAULT_RECIPIENT_EMAIL}"
role_recipients_pushover[sysadmin]="${DEFAULT_RECIPIENT_PUSHOVER}"
+role_recipients_pushbullet[sysadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}"
+
role_recipients_telegram[sysadmin]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[sysadmin]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_hipchat[sysadmin]="${DEFAULT_RECIPIENT_HIPCHAT}"
+
+role_recipients_twilio[sysadmin]="${DEFAULT_RECIPIENT_TWILIO}"
+
+role_recipients_messagebird[sysadmin]="${DEFAULT_RECIPIENT_MESSAGEBIRD}"
+
+role_recipients_pd[sysadmin]="${DEFAULT_RECIPIENT_PD}"
# -----------------------------------------------------------------------------
# DNS related alarms
@@ -177,46 +310,82 @@ role_recipients_email[domainadmin]="${DEFAULT_RECIPIENT_EMAIL}"
role_recipients_pushover[domainadmin]="${DEFAULT_RECIPIENT_PUSHOVER}"
+role_recipients_pushbullet[domainadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}"
+
role_recipients_telegram[domainadmin]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[domainadmin]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_hipchat[domainadmin]="${DEFAULT_RECIPIENT_HIPCHAT}"
+
+role_recipients_twilio[domainadmin]="${DEFAULT_RECIPIENT_TWILIO}"
+
+role_recipients_messagebird[domainadmin]="${DEFAULT_RECIPIENT_MESSAGEBIRD}"
+
+role_recipients_pd[domainadmin]="${DEFAULT_RECIPIENT_PD}"
# -----------------------------------------------------------------------------
# database servers alarms
-# mysql, redis, memcached, etc
+# mysql, redis, memcached, postgres, etc
role_recipients_email[dba]="${DEFAULT_RECIPIENT_EMAIL}"
role_recipients_pushover[dba]="${DEFAULT_RECIPIENT_PUSHOVER}"
+role_recipients_pushbullet[dba]="${DEFAULT_RECIPIENT_PUSHBULLET}"
+
role_recipients_telegram[dba]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[dba]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_hipchat[dba]="${DEFAULT_RECIPIENT_HIPCHAT}"
+
+role_recipients_twilio[dba]="${DEFAULT_RECIPIENT_TWILIO}"
+
+role_recipients_messagebird[dba]="${DEFAULT_RECIPIENT_MESSAGEBIRD}"
+
+role_recipients_pd[dba]="${DEFAULT_RECIPIENT_PD}"
# -----------------------------------------------------------------------------
# web servers alarms
-# apache, nginx, etc
+# apache, nginx, lighttpd, etc
role_recipients_email[webmaster]="${DEFAULT_RECIPIENT_EMAIL}"
role_recipients_pushover[webmaster]="${DEFAULT_RECIPIENT_PUSHOVER}"
+role_recipients_pushbullet[webmaster]="${DEFAULT_RECIPIENT_PUSHBULLET}"
+
role_recipients_telegram[webmaster]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[webmaster]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_hipchat[webmaster]="${DEFAULT_RECIPIENT_HIPCHAT}"
+
+role_recipients_twilio[webmaster]="${DEFAULT_RECIPIENT_TWILIO}"
+
+role_recipients_messagebird[webmaster]="${DEFAULT_RECIPIENT_MESSAGEBIRD}"
+
+role_recipients_pd[webmaster]="${DEFAULT_RECIPIENT_PD}"
# -----------------------------------------------------------------------------
# proxy servers alarms
-# apache, nginx, etc
+# squid, etc
role_recipients_email[proxyadmin]="${DEFAULT_RECIPIENT_EMAIL}"
role_recipients_pushover[proxyadmin]="${DEFAULT_RECIPIENT_PUSHOVER}"
+role_recipients_pushbullet[proxyadmin]="${DEFAULT_RECIPIENT_PUSHBULLET}"
+
role_recipients_telegram[proxyadmin]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[proxyadmin]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_hipchat[proxyadmin]="${DEFAULT_RECIPIENT_HIPCHAT}"
+
+role_recipients_twilio[proxyadmin]="${DEFAULT_RECIPIENT_TWILIO}"
+
+role_recipients_messagebird[proxyadmin]="${DEFAULT_RECIPIENT_MESSAGEBIRD}"
+
+role_recipients_pd[proxyadmin]="${DEFAULT_RECIPIENT_PD}"
diff --git a/conf.d/node.d.conf b/conf.d/node.d.conf
new file mode 100644
index 00000000..95aec99c
--- /dev/null
+++ b/conf.d/node.d.conf
@@ -0,0 +1,39 @@
+{
+ "___help_1": "Default options for node.d.plugin - this is a JSON file.",
+ "___help_2": "Use http://jsonlint.com/ to verify it is valid JSON.",
+ "___help_3": "------------------------------------------------------------",
+
+ "___help_update_every": "Minimum data collection frequency for all node.d/*.node.js modules. Set it to 0 to inherit it from netdata.",
+ "update_every": 0,
+
+ "___help_modules_enable_autodetect": "Enable/disable auto-detection for node.d/*.node.js modules that support it.",
+ "modules_enable_autodetect": true,
+
+ "___help_modules_enable_all": "Enable all node.d/*.node.js modules by default.",
+ "modules_enable_all": true,
+
+ "___help_modules": "Enable/disable the following modules. Give only XXX for node.d/XXX.node.js",
+ "modules": {
+ "named": {
+ "enabled": true
+ },
+ "sma_webbox": {
+ "enabled": true
+ },
+ "snmp": {
+ "enabled": true
+ }
+ },
+
+ "___help_paths": "Paths that control the operation of node.d.plugin",
+ "paths": {
+ "___help_plugins": "The full path to the modules javascript node.d/ directory",
+ "plugins": null,
+
+ "___help_config": "The full path to the modules configs node.d/ directory",
+ "config": null,
+
+ "___help_modules": "Array of paths to add to node.js when searching for node_modules",
+ "modules": []
+ }
+}
diff --git a/conf.d/node.d/README.md b/conf.d/node.d/README.md
new file mode 100644
index 00000000..45e3d02a
--- /dev/null
+++ b/conf.d/node.d/README.md
@@ -0,0 +1,7 @@
+`node.d.plugin` modules accept configuration in JSON format.
+
+Unfortunately, JSON files do not accept comments. So, the best way to describe them is to have markdown text files with instructions.
+
+JSON has a very strict formatting. If you get errors from netdata at `/var/log/netdata/error.log` that a certain configuration file cannot be loaded, we suggest to verify it at [http://jsonlint.com/](http://jsonlint.com/).
+
+The files in this directory, provide usable examples for configuring each `node.d.plugin` module.
diff --git a/conf.d/node.d/named.conf.md b/conf.d/node.d/named.conf.md
new file mode 100644
index 00000000..fa843dd5
--- /dev/null
+++ b/conf.d/node.d/named.conf.md
@@ -0,0 +1,344 @@
+# ISC Bind Statistics
+
+Using this netdata collector, you can monitor one or more ISC Bind servers.
+
+The source code for this plugin in [here](https://github.com/firehol/netdata/blob/master/node.d/named.node.js).
+
+## Example netdata charts
+
+Depending on the number of views your bind has, you may get a large number of charts.
+Here this is with just one view:
+
+![image](https://cloud.githubusercontent.com/assets/2662304/12765473/879b8e04-ca07-11e5-817d-b0651996c42b.png)
+![image](https://cloud.githubusercontent.com/assets/2662304/12766538/12b272fa-ca0d-11e5-81e1-6a9f8ff488ff.png)
+
+## How it works
+
+The plugin will execute (from within node.js) the equivalent of:
+
+```sh
+curl "http://localhost:8888/json/v1/server"
+```
+
+Here is a sample of the output this command produces.
+
+```js
+{
+ "json-stats-version":"1.0",
+ "boot-time":"2016-01-31T08:20:48Z",
+ "config-time":"2016-01-31T09:28:03Z",
+ "current-time":"2016-02-02T22:22:20Z",
+ "opcodes":{
+ "QUERY":247816,
+ "IQUERY":0,
+ "STATUS":0,
+ "RESERVED3":0,
+ "NOTIFY":0,
+ "UPDATE":3813,
+ "RESERVED6":0,
+ "RESERVED7":0,
+ "RESERVED8":0,
+ "RESERVED9":0,
+ "RESERVED10":0,
+ "RESERVED11":0,
+ "RESERVED12":0,
+ "RESERVED13":0,
+ "RESERVED14":0,
+ "RESERVED15":0
+ },
+ "qtypes":{
+ "A":89519,
+ "NS":863,
+ "CNAME":1,
+ "SOA":1,
+ "PTR":116779,
+ "MX":276,
+ "TXT":198,
+ "AAAA":39324,
+ "SRV":850,
+ "ANY":5
+ },
+ "nsstats":{
+ "Requestv4":251630,
+ "ReqEdns0":1255,
+ "ReqTSIG":3813,
+ "ReqTCP":57,
+ "AuthQryRej":1455,
+ "RecQryRej":122,
+ "Response":245918,
+ "TruncatedResp":44,
+ "RespEDNS0":1255,
+ "RespTSIG":3813,
+ "QrySuccess":205159,
+ "QryAuthAns":119495,
+ "QryNoauthAns":120770,
+ "QryNxrrset":32711,
+ "QrySERVFAIL":262,
+ "QryNXDOMAIN":2395,
+ "QryRecursion":40885,
+ "QryDuplicate":5712,
+ "QryFailure":1577,
+ "UpdateDone":2514,
+ "UpdateFail":1299,
+ "UpdateBadPrereq":1276,
+ "QryUDP":246194,
+ "QryTCP":45,
+ "OtherOpt":101
+ },
+ "views":{
+ "local":{
+ "resolver":{
+ "stats":{
+ "Queryv4":74577,
+ "Responsev4":67032,
+ "NXDOMAIN":601,
+ "SERVFAIL":5,
+ "FORMERR":7,
+ "EDNS0Fail":7,
+ "Truncated":3071,
+ "Lame":4,
+ "Retry":11826,
+ "QueryTimeout":1838,
+ "GlueFetchv4":6864,
+ "GlueFetchv4Fail":30,
+ "QryRTT10":112,
+ "QryRTT100":42900,
+ "QryRTT500":23275,
+ "QryRTT800":534,
+ "QryRTT1600":97,
+ "QryRTT1600+":20,
+ "BucketSize":31,
+ "REFUSED":13
+ },
+ "qtypes":{
+ "A":64931,
+ "NS":870,
+ "CNAME":185,
+ "PTR":5,
+ "MX":49,
+ "TXT":149,
+ "AAAA":7972,
+ "SRV":416
+ },
+ "cache":{
+ "A":40356,
+ "NS":8032,
+ "CNAME":14477,
+ "PTR":2,
+ "MX":21,
+ "TXT":32,
+ "AAAA":3301,
+ "SRV":94,
+ "DS":237,
+ "RRSIG":2301,
+ "NSEC":126,
+ "!A":52,
+ "!NS":4,
+ "!TXT":1,
+ "!AAAA":3797,
+ "!SRV":9,
+ "NXDOMAIN":590
+ },
+ "cachestats":{
+ "CacheHits":1085188,
+ "CacheMisses":109,
+ "QueryHits":464755,
+ "QueryMisses":55624,
+ "DeleteLRU":0,
+ "DeleteTTL":42615,
+ "CacheNodes":5188,
+ "CacheBuckets":2079,
+ "TreeMemTotal":2326026,
+ "TreeMemInUse":1508075,
+ "HeapMemMax":132096,
+ "HeapMemTotal":393216,
+ "HeapMemInUse":132096
+ },
+ "adb":{
+ "nentries":1021,
+ "entriescnt":3157,
+ "nnames":1021,
+ "namescnt":3022
+ }
+ }
+ },
+ "public":{
+ "resolver":{
+ "stats":{
+ "BucketSize":31
+ },
+ "qtypes":{
+ },
+ "cache":{
+ },
+ "cachestats":{
+ "CacheHits":0,
+ "CacheMisses":0,
+ "QueryHits":0,
+ "QueryMisses":0,
+ "DeleteLRU":0,
+ "DeleteTTL":0,
+ "CacheNodes":0,
+ "CacheBuckets":64,
+ "TreeMemTotal":287392,
+ "TreeMemInUse":29608,
+ "HeapMemMax":1024,
+ "HeapMemTotal":262144,
+ "HeapMemInUse":1024
+ },
+ "adb":{
+ "nentries":1021,
+ "nnames":1021
+ }
+ }
+ },
+ "_bind":{
+ "resolver":{
+ "stats":{
+ "BucketSize":31
+ },
+ "qtypes":{
+ },
+ "cache":{
+ },
+ "cachestats":{
+ "CacheHits":0,
+ "CacheMisses":0,
+ "QueryHits":0,
+ "QueryMisses":0,
+ "DeleteLRU":0,
+ "DeleteTTL":0,
+ "CacheNodes":0,
+ "CacheBuckets":64,
+ "TreeMemTotal":287392,
+ "TreeMemInUse":29608,
+ "HeapMemMax":1024,
+ "HeapMemTotal":262144,
+ "HeapMemInUse":1024
+ },
+ "adb":{
+ "nentries":1021,
+ "nnames":1021
+ }
+ }
+ }
+ }
+}
+```
+
+
+From this output it collects:
+
+- Global Received Requests by IP version (IPv4, IPv6)
+- Global Successful Queries
+- Current Recursive Clients
+- Global Queries by IP Protocol (TCP, UDP)
+- Global Queries Analysis
+- Global Received Updates
+- Global Query Failures
+- Global Query Failures Analysis
+- Other Global Server Statistics
+- Global Incoming Requests by OpCode
+- Global Incoming Requests by Query Type
+- Global Socket Statistics (will only work if the url is `http://127.0.0.1:8888/json/v1`, i.e. without `/server`, but keep in mind this produces a very long output and probably will account for 0.5% CPU overhead alone, per bind server added)
+- Per View Statistics (the following set will be added for each bind view):
+ - View, Resolver Active Queries
+ - View, Resolver Statistics
+ - View, Resolver Round Trip Timings
+ - View, Requests by Query Type
+
+## Configuration
+
+The collector (optionally) reads a configuration file named `/etc/netdata/node.d/named.conf`, with the following contents:
+
+```js
+{
+ "enable_autodetect": true,
+ "update_every": 5,
+ "servers": [
+ {
+ "name": "bind1",
+ "url": "http://127.0.0.1:8888/json/v1/server",
+ "update_every": 1
+ },
+ {
+ "name": "bind2",
+ "url": "http://10.1.2.3:8888/json/v1/server",
+ "update_every": 2
+ }
+ ]
+}
+```
+
+You can add any number of bind servers.
+
+If the configuration file is missing, or the key `enable_autodetect` is `true`, the collector will also attempt to fetch `http://localhost:8888/json/v1/server` which, if successful will be added too.
+
+### XML instead of JSON, from bind
+
+The collector can also accept bind URLs that return XML output. This might required if you cannot have bind 9.10+ with JSON but you have an version of bind that supports XML statistics v3. Check [this](https://www.isc.org/blogs/bind-9-10-statistics-troubleshooting-and-zone-configuration/) for versions supported.
+
+In such cases, use a URL like this:
+
+```sh
+curl "http://localhost:8888/xml/v3/server"
+```
+
+Only `xml` and `v3` has been tested.
+
+Keep in mind though, that XML parsing is done using javascript code, which requires a triple conversion:
+
+1. from XML to JSON using a javascript XML parser (**CPU intensive**),
+2. which is then transformed to emulate the output of the JSON output of bind (**CPU intensive** - and yes the converted JSON from XML is different to the native JSON - even bind produces different names for various attributes),
+3. which is then processed to generate the data for the charts (this will happen even if bind is producing JSON).
+
+In general, expect XML parsing to be 2 to 3 times more CPU intensive than JSON.
+
+**So, if you can use the JSON output of bind, prefer it over XML**. Keep also in mind that even bind will use more CPU when generating XML instead of JSON.
+
+The XML interface of bind is not autodetected.
+You will have to provide the config file `/etc/netdata/node.d/named.conf`, like this:
+
+```js
+{
+ "enable_autodetect": false,
+ "update_every": 1,
+ "servers": [
+ {
+ "name": "local",
+ "url": "http://localhost:8888/xml/v3/server",
+ "update_every": 1
+ }
+ ]
+}
+```
+
+Of course, you can monitor more than one bind servers. Each one can be configured with either JSON or XML output.
+
+## Auto-detection
+
+Auto-detection is controlled by `enable_autodetect` in the config file. The default is enabled, so that if the collector can connect to `http://localhost:8888/json/v1/server` to receive bind statistics, it will automatically enable it.
+
+## Bind (named) configuration
+
+To use this plugin, you have to have bind v9.10+ properly compiled to provide statistics in `JSON` format.
+
+For more information on how to get your bind installation ready, please refer to the [bind statistics channel developer comments](http://jpmens.net/2013/03/18/json-in-bind-9-s-statistics-server/) and to [bind documentation](https://ftp.isc.org/isc/bind/9.10.3/doc/arm/Bv9ARM.ch06.html#statistics) or [bind Knowledge Base article AA-01123](https://kb.isc.org/article/AA-01123/0).
+
+Normally, you will need something like this in your `named.conf`:
+
+```
+statistics-channels {
+ inet 127.0.0.1 port 8888 allow { 127.0.0.1; };
+ inet ::1 port 8888 allow { ::1; };
+};
+```
+
+(use the IPv4 or IPv6 line depending on what you are using, you can also use both)
+
+Verify it works by running the following command (the collector is written in node.js and will query your bind server directly, but if this command works, the collector should be able to work too):
+
+```sh
+curl "http://localhost:8888/json/v1/server"
+```
+
diff --git a/conf.d/node.d/sma_webbox.conf.md b/conf.d/node.d/sma_webbox.conf.md
new file mode 100644
index 00000000..19fdc9dd
--- /dev/null
+++ b/conf.d/node.d/sma_webbox.conf.md
@@ -0,0 +1,25 @@
+
+[SMA Sunny Webbox](http://www.solar-is-future.com/sma-technology-for-our-future/products/sunny-webbox/index.html)
+
+Example netdata configuration for node.d/sma_webbox.conf
+
+The module supports any number of name servers, like this:
+
+```json
+{
+ "enable_autodetect": false,
+ "update_every": 5,
+ "servers": [
+ {
+ "name": "plant1",
+ "hostname": "10.0.1.1",
+ "update_every": 10
+ },
+ {
+ "name": "plant2",
+ "hostname": "10.0.2.1",
+ "update_every": 15
+ }
+ ]
+}
+```
diff --git a/conf.d/node.d/snmp.conf.md b/conf.d/node.d/snmp.conf.md
new file mode 100644
index 00000000..bae5bf20
--- /dev/null
+++ b/conf.d/node.d/snmp.conf.md
@@ -0,0 +1,341 @@
+# SNMP Data Collector
+
+Using this collector, netdata can collect data from any SNMP device.
+
+This collector supports:
+
+- any number of SNMP devices
+- each SNMP device can be used to collect data for any number of charts
+- each chart may have any number of dimensions
+- each SNMP device may have a different update frequency
+- each SNMP device will accept one or more batches to report values (you can set `max_request_size` per SNMP server, to control the size of batches).
+
+The source code of the plugin is [here](https://github.com/firehol/netdata/blob/master/node.d/snmp.node.js).
+
+## Configuration
+
+You will need to create the file `/etc/netdata/node.d/snmp.conf` with data like the following.
+
+In this example:
+
+ - the SNMP device is `10.11.12.8`.
+ - the SNMP community is `public`.
+ - we will update the values every 10 seconds (`update_every: 10` under the server `10.11.12.8`).
+ - we define 2 charts `snmp_switch.bandwidth_port1` and `snmp_switch.bandwidth_port2`, each having 2 dimensions: `in` and `out`.
+
+```js
+{
+ "enable_autodetect": false,
+ "update_every": 5,
+ "max_request_size": 100,
+ "servers": [
+ {
+ "hostname": "10.11.12.8",
+ "community": "public",
+ "update_every": 10,
+ "max_request_size": 50,
+ "options": { "timeout": 10000 },
+ "charts": {
+ "snmp_switch.bandwidth_port1": {
+ "title": "Switch Bandwidth for port 1",
+ "units": "kilobits/s",
+ "type": "area",
+ "priority": 1,
+ "family": "ports",
+ "dimensions": {
+ "in": {
+ "oid": "1.3.6.1.2.1.2.2.1.10.1",
+ "algorithm": "incremental",
+ "multiplier": 8,
+ "divisor": 1024
+ },
+ "out": {
+ "oid": "1.3.6.1.2.1.2.2.1.16.1",
+ "algorithm": "incremental",
+ "multiplier": -8,
+ "divisor": 1024
+ }
+ }
+ },
+ "snmp_switch.bandwidth_port2": {
+ "title": "Switch Bandwidth for port 2",
+ "units": "kilobits/s",
+ "type": "area",
+ "priority": 1,
+ "family": "ports",
+ "dimensions": {
+ "in": {
+ "oid": "1.3.6.1.2.1.2.2.1.10.2",
+ "algorithm": "incremental",
+ "multiplier": 8,
+ "divisor": 1024
+ },
+ "out": {
+ "oid": "1.3.6.1.2.1.2.2.1.16.2",
+ "algorithm": "incremental",
+ "multiplier": -8,
+ "divisor": 1024
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+`update_every` is the update frequency for each server, in seconds.
+
+`max_request_size` limits the maximum number of OIDs that will be requested in a single call. The default is 50. Lower this number of you get `TooBig` errors in netdata error.log.
+
+`family` sets the name of the submenu of the dashboard each chart will appear under.
+
+If you need to define many charts using incremental OIDs, you can use something like this:
+
+This is like the previous, but the option `multiply_range` given, will multiply the current chart from `1` to `24` inclusive, producing 24 charts in total for the 24 ports of the switch `10.11.12.8`.
+
+Each of the 24 new charts will have its id (1-24) appended at:
+
+1. its chart unique id, i.e. `snmp_switch.bandwidth_port1` to `snmp_switch.bandwidth_port24`
+2. its `title`, i.e. `Switch Bandwidth for port 1` to `Switch Bandwidth for port 24`
+3. its `oid` (for all dimensions), i.e. dimension `in` will be `1.3.6.1.2.1.2.2.1.10.1` to `1.3.6.1.2.1.2.2.1.10.24`
+3. its priority (which will be incremented for each chart so that the charts will appear on the dashboard in this order)
+
+```js
+{
+ "enable_autodetect": false,
+ "update_every": 10,
+ "servers": [
+ {
+ "hostname": "10.11.12.8",
+ "community": "public",
+ "update_every": 10,
+ "options": { "timeout": 20000 },
+ "charts": {
+ "snmp_switch.bandwidth_port": {
+ "title": "Switch Bandwidth for port ",
+ "units": "kilobits/s",
+ "type": "area",
+ "priority": 1,
+ "family": "ports",
+ "multiply_range": [ 1, 24 ],
+ "dimensions": {
+ "in": {
+ "oid": "1.3.6.1.2.1.2.2.1.10.",
+ "algorithm": "incremental",
+ "multiplier": 8,
+ "divisor": 1024
+ },
+ "out": {
+ "oid": "1.3.6.1.2.1.2.2.1.16.",
+ "algorithm": "incremental",
+ "multiplier": -8,
+ "divisor": 1024
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
+
+The `options` given for each server, are:
+
+ - `timeout`, the time to wait for the SNMP device to respond. The default is 5000 ms.
+ - `version`, the SNMP version to use. `0` is Version 1, `1` is Version 2c. The default is Version 1 (`0`).
+ - `transport`, the default is `udp4`.
+ - `port`, the port of the SNMP device to connect to. The default is `161`.
+ - `retries`, the number of attempts to make to fetch the data. The default is `1`.
+
+## Retreiving names from snmp
+
+You can append a value retrieved from SNMP to the title, by adding `titleoid` to the chart.
+
+You can set a dimension name to a value retrieved from SNMP, by adding `oidname` to the dimension.
+
+Both of the above will participate in `multiply_range`.
+
+
+## Testing the configuration
+
+To test it, you can run:
+
+```sh
+/usr/libexec/netdata/plugins.d/node.d.plugin 1 snmp
+```
+
+The above will run it on your console and you will be able to see what netdata sees, but also errors. You can get a very detailed output by appending `debug` to the command line.
+
+If it works, restart netdata to activate the snmp collector and refresh the dashboard (if your SNMP device responds with a delay, you may need to refresh the dashboard in a few seconds).
+
+## Data collection speed
+
+Keep in mind that many SNMP switches are routers are very slow. They may not be able to report values per second. If you run `node.d.plugin` in `debug` mode, it will report the time it took for the SNMP device to respond. My switch, for example, needs 7-8 seconds to respond for the traffic on 24 ports (48 OIDs, in/out).
+
+Also, if you use many SNMP clients on the same SNMP device at the same time, values may be skipped. This is a problem of the SNMP device, not this collector.
+
+## Finding OIDs
+
+Use `snmpwalk`, like this:
+
+```sh
+snmpwalk -t 20 -v 1 -O fn -c public 10.11.12.8
+```
+
+- `-t 20` is the timeout in seconds
+- `-v 1` is the SNMP version
+- `-O fn` will display full OIDs in numeric format (you may want to run it also without this option to see human readable output of OIDs)
+- `-c public` is the SNMP community
+- `10.11.12.8` is the SNMP device
+
+Keep in mind that `snmpwalk` outputs the OIDs with a dot in front them. You should remove this dot when adding OIDs to the configuration file of this collector.
+
+## Example: Linksys SRW2024P
+
+This is what I use for my Linksys SRW2024P. It creates:
+
+1. A chart for power consumption (it is a PoE switch)
+2. Two charts for packets received (total packets received and packets received with errors)
+3. One chart for packets output
+4. 24 charts, one for each port of the switch. It also appends the port names, as defined at the switch, to the chart titles.
+
+This switch also reports various other metrics, like snmp, packets per port, etc. Unfortunately it does not report CPU utilization or backplane utilization.
+
+This switch has a very slow SNMP processors. To respond, it needs about 8 seconds, so I have set the refresh frequency (`update_every`) to 15 seconds.
+
+```js
+{
+ "enable_autodetect": false,
+ "update_every": 5,
+ "servers": [
+ {
+ "hostname": "10.11.12.8",
+ "community": "public",
+ "update_every": 15,
+ "options": { "timeout": 20000, "version": 1 },
+ "charts": {
+ "snmp_switch.power": {
+ "title": "Switch Power Supply",
+ "units": "watts",
+ "type": "line",
+ "priority": 10,
+ "family": "power",
+ "dimensions": {
+ "supply": {
+ "oid": ".1.3.6.1.2.1.105.1.3.1.1.2.1",
+ "algorithm": "absolute",
+ "multiplier": 1,
+ "divisor": 1
+ },
+ "used": {
+ "oid": ".1.3.6.1.2.1.105.1.3.1.1.4.1",
+ "algorithm": "absolute",
+ "multiplier": 1,
+ "divisor": 1
+ }
+ }
+ }
+ , "snmp_switch.input": {
+ "title": "Switch Packets Input",
+ "units": "packets/s",
+ "type": "area",
+ "priority": 20,
+ "family": "IP",
+ "dimensions": {
+ "receives": {
+ "oid": ".1.3.6.1.2.1.4.3.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1
+ }
+ , "discards": {
+ "oid": ".1.3.6.1.2.1.4.8.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1
+ }
+ }
+ }
+ , "snmp_switch.input_errors": {
+ "title": "Switch Received Packets with Errors",
+ "units": "packets/s",
+ "type": "line",
+ "priority": 30,
+ "family": "IP",
+ "dimensions": {
+ "bad_header": {
+ "oid": ".1.3.6.1.2.1.4.4.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1
+ }
+ , "bad_address": {
+ "oid": ".1.3.6.1.2.1.4.5.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1
+ }
+ , "unknown_protocol": {
+ "oid": ".1.3.6.1.2.1.4.7.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1
+ }
+ }
+ }
+ , "snmp_switch.output": {
+ "title": "Switch Output Packets",
+ "units": "packets/s",
+ "type": "line",
+ "priority": 40,
+ "family": "IP",
+ "dimensions": {
+ "requests": {
+ "oid": ".1.3.6.1.2.1.4.10.0",
+ "algorithm": "incremental",
+ "multiplier": 1,
+ "divisor": 1
+ }
+ , "discards": {
+ "oid": ".1.3.6.1.2.1.4.11.0",
+ "algorithm": "incremental",
+ "multiplier": -1,
+ "divisor": 1
+ }
+ , "no_route": {
+ "oid": ".1.3.6.1.2.1.4.12.0",
+ "algorithm": "incremental",
+ "multiplier": -1,
+ "divisor": 1
+ }
+ }
+ }
+ , "snmp_switch.bandwidth_port": {
+ "title": "Switch Bandwidth for port ",
+ "titleoid": ".1.3.6.1.2.1.31.1.1.1.18.",
+ "units": "kilobits/s",
+ "type": "area",
+ "priority": 100,
+ "family": "ports",
+ "multiply_range": [ 1, 24 ],
+ "dimensions": {
+ "in": {
+ "oid": ".1.3.6.1.2.1.2.2.1.10.",
+ "algorithm": "incremental",
+ "multiplier": 8,
+ "divisor": 1024
+ }
+ , "out": {
+ "oid": ".1.3.6.1.2.1.2.2.1.16.",
+ "algorithm": "incremental",
+ "multiplier": -8,
+ "divisor": 1024
+ }
+ }
+ }
+ }
+ }
+ ]
+}
+```
diff --git a/conf.d/python.d.conf b/conf.d/python.d.conf
index 940bd918..7e4fa801 100644
--- a/conf.d/python.d.conf
+++ b/conf.d/python.d.conf
@@ -30,6 +30,7 @@ example: no
# exim: yes
# hddtemp: yes
# ipfs: yes
+# isc_dhcpd: yes
# memcached: yes
# mysql: yes
# nginx: yes
@@ -40,3 +41,5 @@ example: no
# sensors: yes
# squid: yes
# tomcat: yes
+# freeradius: yes
+# ovpn_status_log: yes
diff --git a/conf.d/python.d/bind_rndc.conf b/conf.d/python.d/bind_rndc.conf
new file mode 100644
index 00000000..e4f7ac82
--- /dev/null
+++ b/conf.d/python.d/bind_rndc.conf
@@ -0,0 +1,109 @@
+# netdata python.d.plugin configuration for bind_rndc
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# Additionally to the above, bind_rndc also supports the following:
+#
+# named_stats_path: 'path to named.stats' # Default: '/var/log/bind/named.stats'
+#------------------------------------------------------------------------------------------------------------------
+# IMPORTANT Information
+#
+# BIND APPEND logs at EVERY RUN. Its NOT RECOMMENDED to set update_every below 30 sec.
+# STRONGLY RECOMMENDED to create a bind-rndc conf file for logrotate
+#
+# To set up your BIND to dump stats do the following:
+#
+# 1. add to 'named.conf.options' options {}:
+# statistics-file "/var/log/bind/named.stats";
+#
+# 2. Create bind/ directory in /var/log
+# cd /var/log/ && mkdir bind
+#
+# 3. Change owner of directory to 'bind' user
+# chown bind bind/
+#
+# 4. RELOAD (NOT restart) BIND
+# systemctl reload bind9.serice
+#
+# 5. Run as a root 'rndc stats' to dump (BIND will create named.stats in new directory)
+#
+#
+# To ALLOW NETDATA TO RUN 'rndc stats' change '/etc/bind/rndc.key' group to netdata
+# chown :netdata rndc.key
+#
+# The last BUT NOT least is to create bind-rndc.conf in logrotate.d/
+# The working one
+# /var/log/bind/named.stats {
+#
+# daily
+# rotate 4
+# compress
+# delaycompress
+# create 0644 bind bind
+# missingok
+# postrotate
+# rndc reload > /dev/null
+# endscript
+# }
+#
+# To test your logrotate conf file run as root:
+#
+# logrotate /etc/logrotate.d/bind-rndc -d (debug dry-run mode)
+# ------------------------------------------------------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+#
+#local:
+# named_stats_path: '/var/log/bind/named.stats'
diff --git a/conf.d/python.d/elasticsearch.conf b/conf.d/python.d/elasticsearch.conf
new file mode 100644
index 00000000..1faee858
--- /dev/null
+++ b/conf.d/python.d/elasticsearch.conf
@@ -0,0 +1,72 @@
+# netdata python.d.plugin configuration for elasticsearch stats
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# Additionally to the above, elasticsearch plugin also supports the following:
+#
+# host: 'ipaddress' # Server ip address or hostname.
+# port: 'port' # Port on which elasticsearch listen.
+# cluster_health: False/True # Calls to cluster health elasticsearch API. Enabled by default.
+# cluster_stats: False/True # Calls to cluster stats elasticsearch API. Enabled by default.
+#
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+#
+#local:
+# host: '127.0.0.1'
+# port: '9200'
+# cluster_health: True
+# cluster_stats: True
diff --git a/conf.d/python.d/exim.conf b/conf.d/python.d/exim.conf
index 6aca13c3..07d72c5a 100644
--- a/conf.d/python.d/exim.conf
+++ b/conf.d/python.d/exim.conf
@@ -1,4 +1,4 @@
-# netdata python.d.plugin configuration for postfix
+# netdata python.d.plugin configuration for exim
#
# This file is in YaML format. Generally the format is:
#
@@ -55,7 +55,7 @@ update_every: 10
# priority: 60000 # the JOB's order on the dashboard
# retries: 5 # the JOB's number of restoration attempts
#
-# Additionally to the above, postfix also supports the following:
+# Additionally to the above, exim also supports the following:
#
# command: 'exim -bpc' # the command to run
#
diff --git a/conf.d/python.d/fail2ban.conf b/conf.d/python.d/fail2ban.conf
new file mode 100644
index 00000000..cd805be8
--- /dev/null
+++ b/conf.d/python.d/fail2ban.conf
@@ -0,0 +1,77 @@
+# netdata python.d.plugin configuration for fail2ban
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# Additionally to the above, fail2ban also supports the following:
+#
+# log_path: 'path to fail2ban.log' # Default: '/var/log/fail2ban.log'
+# conf_path: 'path to jail.local/jail.conf' # Default: '/etc/fail2ban/jail.local'
+# exclude: 'jails you want to exclude from autodetection' # Default: '[]' empty list
+#------------------------------------------------------------------------------------------------------------------
+# IMPORTANT Information
+#
+# fail2ban.log file MUST BE readable by netdata.
+# A good idea is to do this by adding the
+# # create 0640 root netdata
+# to fail2ban conf at logrotate.d
+#
+# ------------------------------------------------------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+
+#local:
+# log_path: '/var/log/fail2ban.log'
+# conf_path: '/etc/fail2ban/jail.local'
+# exclude: 'dropbear apache'
diff --git a/conf.d/python.d/freeradius.conf b/conf.d/python.d/freeradius.conf
new file mode 100644
index 00000000..b2c8abf6
--- /dev/null
+++ b/conf.d/python.d/freeradius.conf
@@ -0,0 +1,86 @@
+# netdata python.d.plugin configuration for freeradius
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# Additionally to the above, freeradius also supports the following:
+#
+# host: 'host' # Default: 'localhost'. Server ip address or hostname.
+# port: 'port' # Default: '18121'. Port on which freeradius server listen (type = status).
+# secret: 'secret' # Default: 'adminsecret'.
+# acct: True/False # Defalt: False. Freeradius accounting statistics.
+# proxy_auth: True/False # Default: False. Freeradius proxy authentication statistics.
+# proxy_acct: True/False # Default: False. Freeradius proxy accounting statistics.
+#
+# ------------------------------------------------------------------------------------------------------------------
+# Freeradius server configuration:
+# The configuration for the status server is automatically created in the sites-available directory.
+# By default, server is enabled and can be queried from every client.
+# FreeRADIUS will only respond to status-server messages, if the status-server virtual server has been enabled.
+# To do this, create a link from the sites-enabled directory to the status file in the sites-available directory:
+# cd sites-enabled
+# ln -s ../sites-available/status status
+# and restart/reload your FREERADIUS server.
+# ------------------------------------------------------------------------------------------------------------------
+#
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+
+local:
+ host: 'localhost'
+ port: '18121'
+ secret: 'adminsecret'
+#acct: False
+#proxy_auth: False
+#proxy_acct: False
diff --git a/conf.d/python.d/gunicorn_log.conf b/conf.d/python.d/gunicorn_log.conf
new file mode 100644
index 00000000..8fea483f
--- /dev/null
+++ b/conf.d/python.d/gunicorn_log.conf
@@ -0,0 +1,73 @@
+# netdata python.d.plugin configuration for nginx gunicorn log
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# Additionally to the above, gunicorn_log also supports the following:
+#
+# path: 'PATH' # the path to gunicorn's access.log
+#
+
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+
+gunicorn_log:
+ name: 'local'
+ path: '/var/log/gunicorn/access.log'
+
+gunicorn_log2:
+ name: 'local'
+ path: '/var/log/gunicorn/gunicorn-access.log'
+
diff --git a/conf.d/python.d/haproxy.conf b/conf.d/python.d/haproxy.conf
new file mode 100644
index 00000000..a9e04879
--- /dev/null
+++ b/conf.d/python.d/haproxy.conf
@@ -0,0 +1,78 @@
+# netdata python.d.plugin configuration for haproxy
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# Additionally to the above, haproxy also supports the following:
+#
+# IMPORTANT: socket MUST BE readable AND writable by netdata user
+#
+# socket: 'path/to/haproxy/sock'
+#
+# OR
+# url: 'http://<ip.address>:<port>/<url>;csv;norefresh'
+# [user: USERNAME] only if stats auth is used
+# [pass: PASSWORD] only if stats auth is used
+
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+
+#via_url:
+# user : 'admin'
+# pass : 'password'
+# url : 'http://127.0.0.1:7000/haproxy_stats;csv;norefresh'
+
+#via_socket:
+# socket: '/var/run/haproxy/admin.sock'
diff --git a/conf.d/python.d/hddtemp.conf b/conf.d/python.d/hddtemp.conf
index 0c78449b..f74a0980 100644
--- a/conf.d/python.d/hddtemp.conf
+++ b/conf.d/python.d/hddtemp.conf
@@ -58,6 +58,7 @@
#
# host: 'IP or HOSTNAME' # the host to connect to
# port: PORT # the port to connect to
+# fahrenheit: True/False # fahrenheit instead of celsius. Default is False
#
# By default this module will try to autodetect disks
@@ -77,11 +78,13 @@
localhost:
name: 'local'
host: 'localhost'
+ fahrenheit: False
port: 7634
localipv4:
name: 'local'
host: '127.0.0.1'
+ fahrenheit: False
port: 7634
localipv6:
diff --git a/conf.d/python.d/isc_dhcpd.conf b/conf.d/python.d/isc_dhcpd.conf
new file mode 100644
index 00000000..7c8fe3ce
--- /dev/null
+++ b/conf.d/python.d/isc_dhcpd.conf
@@ -0,0 +1,78 @@
+# netdata python.d.plugin configuration for isc dhcpd leases
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# Additionally to the above, isc_dhcpd supports the following:
+#
+# leases_path: 'PATH' # the path to dhcpd.leases file
+# pools: 'dhcpd pools list' # Pools in CIDR format
+#
+#-----------------------------------------------------------------------
+# IMPORTANT notes
+#
+# 1. Make sure leases file is readable by netdata.
+# 2. Current implementation works only with 'default' db-time-format
+# (weekday year/month/day hour:minute:second).
+# This is the default, so it will work in most cases.
+# 3. Pools MUST BE in CIDR format.
+#
+#-----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# This is disabled by default.
+# To enable it, uncomment the following.
+#
+#leases:
+# leases_path : '/var/lib/dhcp/dhcpd.leases'
+# pools : '192.168.3.0/24 192.168.4.0/24 192.168.5.0/24'
diff --git a/conf.d/python.d/mdstat.conf b/conf.d/python.d/mdstat.conf
new file mode 100644
index 00000000..c89d463b
--- /dev/null
+++ b/conf.d/python.d/mdstat.conf
@@ -0,0 +1,26 @@
+# netdata python.d.plugin configuration for mdstat
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
diff --git a/conf.d/python.d/mysql.conf b/conf.d/python.d/mysql.conf
index 8fbbe651..63d63517 100644
--- a/conf.d/python.d/mysql.conf
+++ b/conf.d/python.d/mysql.conf
@@ -69,6 +69,23 @@
#
# ----------------------------------------------------------------------
+# mySQL CONFIGURATION
+#
+# netdata does not need any privilege - only the ability to connect
+# to the mysql server (netdata will not be able to see any data).
+#
+# Execute these commands to give the local user 'netdata' the ability
+# to connect to the mysql server on localhost, without a password:
+#
+# > create user 'netdata'@'localhost';
+# > grant usage on *.* to 'netdata'@'localhost' with grant option;
+# > flush privileges;
+#
+# with the above statements, netdata will be able to gather mysql
+# statistics, without the ability to see or alter any data or affect
+# mysql operation in any way. No change is required below.
+
+# ----------------------------------------------------------------------
# AUTO-DETECTION JOBS
# only one of them will run (they have the same name)
@@ -80,6 +97,10 @@ mycnf2:
name : 'local'
'my.cnf' : '/etc/mysql/my.cnf'
+debiancnf:
+ name : 'local'
+ 'my.cnf' : '/etc/mysql/debian.cnf'
+
socket1:
name : 'local'
# user : ''
@@ -90,12 +111,18 @@ socket2:
name : 'local'
# user : ''
# pass : ''
- socket : '/var/lib/mysql/mysql.sock'
+ socket : '/var/run/mysqld/mysql.sock'
socket3:
name : 'local'
# user : ''
# pass : ''
+ socket : '/var/lib/mysql/mysql.sock'
+
+socket4:
+ name : 'local'
+ # user : ''
+ # pass : ''
socket : '/tmp/mysql.sock'
tcp:
@@ -146,12 +173,18 @@ socket2_root:
name : 'local'
user : 'root'
# pass : ''
- socket : '/var/lib/mysql/mysql.sock'
+ socket : '/var/run/mysqld/mysql.sock'
socket3_root:
name : 'local'
user : 'root'
# pass : ''
+ socket : '/var/lib/mysql/mysql.sock'
+
+socket4_root:
+ name : 'local'
+ user : 'root'
+ # pass : ''
socket : '/tmp/mysql.sock'
tcp_root:
@@ -177,3 +210,63 @@ tcpipv6_root:
host : '::1'
port : '3306'
+
+# Now we try the same as above with user: netdata
+
+mycnf1_netdata:
+ name : 'local'
+ user : 'netdata'
+ 'my.cnf' : '/etc/my.cnf'
+
+mycnf2_netdata:
+ name : 'local'
+ user : 'netdata'
+ 'my.cnf' : '/etc/mysql/my.cnf'
+
+socket1_netdata:
+ name : 'local'
+ user : 'netdata'
+ # pass : ''
+ socket : '/var/run/mysqld/mysqld.sock'
+
+socket2_netdata:
+ name : 'local'
+ user : 'netdata'
+ # pass : ''
+ socket : '/var/run/mysqld/mysql.sock'
+
+socket3_netdata:
+ name : 'local'
+ user : 'netdata'
+ # pass : ''
+ socket : '/var/lib/mysql/mysql.sock'
+
+socket4_netdata:
+ name : 'local'
+ user : 'netdata'
+ # pass : ''
+ socket : '/tmp/mysql.sock'
+
+tcp_netdata:
+ name : 'local'
+ user : 'netdata'
+ # pass : ''
+ host : 'localhost'
+ port : '3306'
+ # keep in mind port might be ignored by mysql, if host = 'localhost'
+ # http://serverfault.com/questions/337818/how-to-force-mysql-to-connect-by-tcp-instead-of-a-unix-socket/337844#337844
+
+tcpipv4_netdata:
+ name : 'local'
+ user : 'netdata'
+ # pass : ''
+ host : '127.0.0.1'
+ port : '3306'
+
+tcpipv6_netdata:
+ name : 'local'
+ user : 'netdata'
+ # pass : ''
+ host : '::1'
+ port : '3306'
+
diff --git a/conf.d/python.d/ovpn_status_log.conf b/conf.d/python.d/ovpn_status_log.conf
new file mode 100644
index 00000000..39bc8e9d
--- /dev/null
+++ b/conf.d/python.d/ovpn_status_log.conf
@@ -0,0 +1,86 @@
+# netdata python.d.plugin configuration for openvpn status log
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# Additionally to the above, openvpn status log also supports the following:
+#
+# log_path: 'PATH' # the path to openvpn status log file
+#
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+#
+# IMPORTANT information
+#
+# 1. If you are running multiple OpenVPN instances out of the same directory, MAKE SURE TO EDIT DIRECTIVES which create output files
+# so that multiple instances do not overwrite each other's output files.
+# 2. Make sure NETDATA USER CAN READ openvpn-status.log
+#
+# * cd into directory with openvpn-status.log and run the following commands as root
+# * #chown :netdata openvpn-status.log && chmod 640 openvpn-status.log
+# * To check permission and group membership run
+# * #ls -l openvpn-status.log
+# -rw-r----- 1 root netdata 359 dec 21 21:22 openvpn-status.log
+#
+# 3. Update_every interval MUST MATCH interval on which OpenVPN writes operational status to log file.
+# If its not true traffic chart WILL DISPLAY WRONG values
+#
+# Default OpenVPN update interval is 10 second on Debian 8
+# # ps -C openvpn -o command=
+# /usr/sbin/openvpn --daemon ovpn-server --status /run/openvpn/server.status 10 --cd /etc/openvpn --config /etc/openvpn/server.conf
+#
+#
+#default:
+# log_path: '/var/log/openvpn-status.log'
diff --git a/conf.d/python.d/postgres.conf b/conf.d/python.d/postgres.conf
new file mode 100644
index 00000000..d4d2bafc
--- /dev/null
+++ b/conf.d/python.d/postgres.conf
@@ -0,0 +1,104 @@
+# netdata python.d.plugin configuration for postgresql
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+# A single connection is required in order to pull statistics.
+#
+# Connections can be configured with the following options:
+#
+# database : 'example_db_name'
+# user : 'example_user'
+# password : 'example_pass'
+# host : 'localhost'
+# port : 5432
+#
+# Additionally, the following options allow selective disabling of charts
+#
+# table_stats : false
+# index_stats : false
+#
+# Postfix permissions are configured at its pg_hba.conf file. You can
+# "trust" local clients to allow netdata to connect, or you can create
+# a postgres user for netdata and add its password below to allow
+# netdata connect.
+#
+# ----------------------------------------------------------------------
+
+socket:
+ name : 'local'
+ user : 'postgres'
+ database : 'postgres'
+
+tcp:
+ name : 'local'
+ database : 'postgres'
+ user : 'postgres'
+ host : 'localhost'
+ port : 5432
+
+tcpipv4:
+ name : 'local'
+ database : 'postgres'
+ user : 'postgres'
+ host : '127.0.0.1'
+ port : 5432
+
+tcpipv6:
+ name : 'local'
+ database : 'postgres'
+ user : 'postgres'
+ host : '::1'
+ port : 5432
+
diff --git a/conf.d/python.d/redis.conf b/conf.d/python.d/redis.conf
index 9935bff7..983fbfbd 100644
--- a/conf.d/python.d/redis.conf
+++ b/conf.d/python.d/redis.conf
@@ -56,12 +56,14 @@
#
# Additionally to the above, redis also supports the following:
#
-# socket: 'path/to/mysql.sock'
+# socket: 'path/to/redis.sock'
#
# or
# host: 'IP or HOSTNAME' # the host to connect to
# port: PORT # the port to connect to
#
+# and
+# pass: 'password' # the redis password to use for AUTH command
#
# ----------------------------------------------------------------------
@@ -71,27 +73,33 @@
socket1:
name : 'local'
socket : '/tmp/redis.sock'
+ # pass : ''
socket2:
name : 'local'
socket : '/var/run/redis/redis.sock'
+ # pass : ''
socket3:
name : 'local'
socket : '/var/lib/redis/redis.sock'
+ # pass : ''
localhost:
name : 'local'
host : 'localhost'
port : 6379
+ # pass : ''
localipv4:
name : 'local'
host : '127.0.0.1'
port : 6379
+ # pass : ''
localipv6:
name : 'local'
host : '::1'
port : 6379
+ # pass : ''
diff --git a/conf.d/python.d/sensors.conf b/conf.d/python.d/sensors.conf
index 7d895c34..2e9a4133 100644
--- a/conf.d/python.d/sensors.conf
+++ b/conf.d/python.d/sensors.conf
@@ -52,3 +52,7 @@ types:
#
# chip names can be found using the sensors shell command
# the prefix is matched (anything that starts like that)
+#
+#----------------------------------------------------------------------
+# To change celsius to fahrenheit uncomment line below
+#fahrenheit: True
diff --git a/conf.d/python.d/varnish.conf b/conf.d/python.d/varnish.conf
new file mode 100644
index 00000000..56dc6334
--- /dev/null
+++ b/conf.d/python.d/varnish.conf
@@ -0,0 +1,65 @@
+# netdata python.d.plugin configuration for varnish
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 5
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 5 # the JOB's number of restoration attempts
+#
+#
+#
+# The only you need is to add netdata to 'varnish' group
+#
+# Check it from cmd
+# id netdata
+#
+# uid=999(netdata) gid=999(netdata) группы=999(netdata),118(varnish)
+#