summaryrefslogtreecommitdiffstats
path: root/conf.d
diff options
context:
space:
mode:
Diffstat (limited to 'conf.d')
-rw-r--r--conf.d/Makefile.am16
-rw-r--r--conf.d/Makefile.in16
-rw-r--r--conf.d/charts.d/ap.conf6
-rw-r--r--conf.d/charts.d/apache.conf6
-rw-r--r--conf.d/charts.d/apcupsd.conf12
-rw-r--r--conf.d/charts.d/cpu_apps.conf6
-rw-r--r--conf.d/charts.d/cpufreq.conf6
-rw-r--r--conf.d/charts.d/example.conf6
-rw-r--r--conf.d/charts.d/exim.conf6
-rw-r--r--conf.d/charts.d/hddtemp.conf5
-rw-r--r--conf.d/charts.d/libreswan.conf29
-rw-r--r--conf.d/charts.d/load_average.conf8
-rw-r--r--conf.d/charts.d/mem_apps.conf6
-rw-r--r--conf.d/charts.d/mysql.conf6
-rw-r--r--conf.d/charts.d/nginx.conf6
-rw-r--r--conf.d/charts.d/nut.conf6
-rw-r--r--conf.d/charts.d/opensips.conf6
-rw-r--r--conf.d/charts.d/phpfpm.conf7
-rw-r--r--conf.d/charts.d/postfix.conf7
-rw-r--r--conf.d/charts.d/sensors.conf7
-rw-r--r--conf.d/charts.d/squid.conf7
-rw-r--r--conf.d/charts.d/tomcat.conf6
-rw-r--r--conf.d/health.d/backend.conf2
-rw-r--r--conf.d/health.d/btrfs.conf57
-rw-r--r--conf.d/health.d/ceph.conf13
-rw-r--r--conf.d/health.d/cpu.conf14
-rw-r--r--conf.d/health.d/disks.conf51
-rw-r--r--conf.d/health.d/fronius.conf11
-rw-r--r--conf.d/health.d/httpcheck.conf99
-rw-r--r--conf.d/health.d/isc_dhcpd.conf14
-rw-r--r--conf.d/health.d/net.conf5
-rw-r--r--conf.d/health.d/nginx_plus.conf14
-rw-r--r--conf.d/health.d/portcheck.conf48
-rw-r--r--conf.d/health.d/ram.conf42
-rw-r--r--conf.d/health.d/softnet.conf14
-rw-r--r--conf.d/health.d/stiebeleltron.conf11
-rw-r--r--conf.d/health.d/swap.conf8
-rw-r--r--conf.d/health.d/tcp_resets.conf6
-rw-r--r--conf.d/health.d/udp_errors.conf4
-rw-r--r--conf.d/health.d/web_log.conf1
-rwxr-xr-x[-rw-r--r--]conf.d/health_alarm_notify.conf134
-rw-r--r--conf.d/python.d.conf9
-rw-r--r--conf.d/python.d/ceph.conf75
-rw-r--r--conf.d/python.d/httpcheck.conf99
-rw-r--r--conf.d/python.d/icecast.conf83
-rw-r--r--conf.d/python.d/mysql.conf9
-rw-r--r--conf.d/python.d/nginx_plus.conf87
-rw-r--r--conf.d/python.d/ntpd.conf91
-rw-r--r--conf.d/python.d/portcheck.conf70
-rw-r--r--conf.d/python.d/postgres.conf15
-rw-r--r--conf.d/python.d/springboot.conf120
-rw-r--r--conf.d/python.d/traefik.conf79
-rw-r--r--conf.d/python.d/web_log.conf1
-rw-r--r--conf.d/stream.conf14
54 files changed, 1429 insertions, 57 deletions
diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am
index 095e891af..d79bb5ab8 100644
--- a/conf.d/Makefile.am
+++ b/conf.d/Makefile.am
@@ -29,6 +29,7 @@ dist_pythonconfig_DATA = \
python.d/apache.conf \
python.d/beanstalk.conf \
python.d/bind_rndc.conf \
+ python.d/ceph.conf \
python.d/chrony.conf \
python.d/couchdb.conf \
python.d/cpufreq.conf \
@@ -43,6 +44,8 @@ dist_pythonconfig_DATA = \
python.d/go_expvar.conf \
python.d/haproxy.conf \
python.d/hddtemp.conf \
+ python.d/httpcheck.conf \
+ python.d/icecast.conf \
python.d/ipfs.conf \
python.d/isc_dhcpd.conf \
python.d/mdstat.conf \
@@ -50,9 +53,12 @@ dist_pythonconfig_DATA = \
python.d/mongodb.conf \
python.d/mysql.conf \
python.d/nginx.conf \
+ python.d/nginx_plus.conf \
python.d/nsd.conf \
+ python.d/ntpd.conf \
python.d/ovpn_status_log.conf \
python.d/phpfpm.conf \
+ python.d/portcheck.conf \
python.d/postfix.conf \
python.d/postgres.conf \
python.d/powerdns.conf \
@@ -61,9 +67,11 @@ dist_pythonconfig_DATA = \
python.d/retroshare.conf \
python.d/samba.conf \
python.d/sensors.conf \
+ python.d/springboot.conf \
python.d/squid.conf \
python.d/smartd_log.conf \
python.d/tomcat.conf \
+ python.d/traefik.conf \
python.d/varnish.conf \
python.d/web_log.conf \
$(NULL)
@@ -75,13 +83,17 @@ dist_healthconfig_DATA = \
health.d/backend.conf \
health.d/beanstalkd.conf \
health.d/bind_rndc.conf \
+ health.d/btrfs.conf \
+ health.d/ceph.conf \
health.d/cpu.conf \
health.d/couchdb.conf \
health.d/disks.conf \
health.d/elasticsearch.conf \
health.d/entropy.conf \
health.d/fping.conf \
+ health.d/fronius.conf \
health.d/haproxy.conf \
+ health.d/httpcheck.conf \
health.d/ipc.conf \
health.d/ipfs.conf \
health.d/ipmi.conf \
@@ -96,6 +108,8 @@ dist_healthconfig_DATA = \
health.d/net.conf \
health.d/netfilter.conf \
health.d/nginx.conf \
+ health.d/nginx_plus.conf \
+ health.d/portcheck.conf \
health.d/postgres.conf \
health.d/qos.conf \
health.d/ram.conf \
@@ -103,6 +117,7 @@ dist_healthconfig_DATA = \
health.d/retroshare.conf \
health.d/softnet.conf \
health.d/squid.conf \
+ health.d/stiebeleltron.conf \
health.d/swap.conf \
health.d/tcp_conn.conf \
health.d/tcp_listen.conf \
@@ -121,6 +136,7 @@ dist_chartsconfig_DATA = \
charts.d/apcupsd.conf \
charts.d/cpufreq.conf \
charts.d/exim.conf \
+ charts.d/libreswan.conf \
charts.d/load_average.conf \
charts.d/mysql.conf \
charts.d/nut.conf \
diff --git a/conf.d/Makefile.in b/conf.d/Makefile.in
index c1c291bcc..48ce51191 100644
--- a/conf.d/Makefile.in
+++ b/conf.d/Makefile.in
@@ -328,6 +328,7 @@ dist_pythonconfig_DATA = \
python.d/apache.conf \
python.d/beanstalk.conf \
python.d/bind_rndc.conf \
+ python.d/ceph.conf \
python.d/chrony.conf \
python.d/couchdb.conf \
python.d/cpufreq.conf \
@@ -342,6 +343,8 @@ dist_pythonconfig_DATA = \
python.d/go_expvar.conf \
python.d/haproxy.conf \
python.d/hddtemp.conf \
+ python.d/httpcheck.conf \
+ python.d/icecast.conf \
python.d/ipfs.conf \
python.d/isc_dhcpd.conf \
python.d/mdstat.conf \
@@ -349,9 +352,12 @@ dist_pythonconfig_DATA = \
python.d/mongodb.conf \
python.d/mysql.conf \
python.d/nginx.conf \
+ python.d/nginx_plus.conf \
python.d/nsd.conf \
+ python.d/ntpd.conf \
python.d/ovpn_status_log.conf \
python.d/phpfpm.conf \
+ python.d/portcheck.conf \
python.d/postfix.conf \
python.d/postgres.conf \
python.d/powerdns.conf \
@@ -360,9 +366,11 @@ dist_pythonconfig_DATA = \
python.d/retroshare.conf \
python.d/samba.conf \
python.d/sensors.conf \
+ python.d/springboot.conf \
python.d/squid.conf \
python.d/smartd_log.conf \
python.d/tomcat.conf \
+ python.d/traefik.conf \
python.d/varnish.conf \
python.d/web_log.conf \
$(NULL)
@@ -373,13 +381,17 @@ dist_healthconfig_DATA = \
health.d/backend.conf \
health.d/beanstalkd.conf \
health.d/bind_rndc.conf \
+ health.d/btrfs.conf \
+ health.d/ceph.conf \
health.d/cpu.conf \
health.d/couchdb.conf \
health.d/disks.conf \
health.d/elasticsearch.conf \
health.d/entropy.conf \
health.d/fping.conf \
+ health.d/fronius.conf \
health.d/haproxy.conf \
+ health.d/httpcheck.conf \
health.d/ipc.conf \
health.d/ipfs.conf \
health.d/ipmi.conf \
@@ -394,6 +406,8 @@ dist_healthconfig_DATA = \
health.d/net.conf \
health.d/netfilter.conf \
health.d/nginx.conf \
+ health.d/nginx_plus.conf \
+ health.d/portcheck.conf \
health.d/postgres.conf \
health.d/qos.conf \
health.d/ram.conf \
@@ -401,6 +415,7 @@ dist_healthconfig_DATA = \
health.d/retroshare.conf \
health.d/softnet.conf \
health.d/squid.conf \
+ health.d/stiebeleltron.conf \
health.d/swap.conf \
health.d/tcp_conn.conf \
health.d/tcp_listen.conf \
@@ -419,6 +434,7 @@ dist_chartsconfig_DATA = \
charts.d/apcupsd.conf \
charts.d/cpufreq.conf \
charts.d/exim.conf \
+ charts.d/libreswan.conf \
charts.d/load_average.conf \
charts.d/mysql.conf \
charts.d/nut.conf \
diff --git a/conf.d/charts.d/ap.conf b/conf.d/charts.d/ap.conf
index 88a447eb9..38fc157ce 100644
--- a/conf.d/charts.d/ap.conf
+++ b/conf.d/charts.d/ap.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# nothing fancy to configure.
@@ -17,3 +17,7 @@
# the charts priority on the dashboard
#ap_priority=6900
+
+# the number of retries to do in case of failure
+# before disabling the module
+#ap_retries=10
diff --git a/conf.d/charts.d/apache.conf b/conf.d/charts.d/apache.conf
index b82c2a7fb..50914cf32 100644
--- a/conf.d/charts.d/apache.conf
+++ b/conf.d/charts.d/apache.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -24,3 +24,7 @@
# the charts priority on the dashboard
#apache_priority=60000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#apache_retries=10
diff --git a/conf.d/charts.d/apcupsd.conf b/conf.d/charts.d/apcupsd.conf
index f8bf7ed60..679c0d61b 100644
--- a/conf.d/charts.d/apcupsd.conf
+++ b/conf.d/charts.d/apcupsd.conf
@@ -2,11 +2,13 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
-#apcupsd_ip=127.0.0.1
-#apcupsd_port=3551
+# add all your APC UPSes in this array - uncomment it too
+#declare -A apcupsd_sources=(
+# ["local"]="127.0.0.1:3551"
+#)
# how long to wait for apcupsd to respond
#apcupsd_timeout=3
@@ -17,3 +19,7 @@
# the charts priority on the dashboard
#apcupsd_priority=90000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#apcupsd_retries=10
diff --git a/conf.d/charts.d/cpu_apps.conf b/conf.d/charts.d/cpu_apps.conf
index 46d70362e..850cd0c6f 100644
--- a/conf.d/charts.d/cpu_apps.conf
+++ b/conf.d/charts.d/cpu_apps.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -13,3 +13,7 @@
# the data collection frequency
# if unset, will inherit the netdata update frequency
#cpu_apps_update_every=2
+
+# the number of retries to do in case of failure
+# before disabling the module
+#cpu_apps_retries=10
diff --git a/conf.d/charts.d/cpufreq.conf b/conf.d/charts.d/cpufreq.conf
index 4f26562ec..7130555af 100644
--- a/conf.d/charts.d/cpufreq.conf
+++ b/conf.d/charts.d/cpufreq.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -18,3 +18,7 @@
# the charts priority on the dashboard
#cpufreq_priority=10000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#cpufreq_retries=10
diff --git a/conf.d/charts.d/example.conf b/conf.d/charts.d/example.conf
index dc4b6900e..6232ca584 100644
--- a/conf.d/charts.d/example.conf
+++ b/conf.d/charts.d/example.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# to enable this chart, you have to set this to 12345
@@ -15,3 +15,7 @@
# the charts priority on the dashboard
#example_priority=150000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#example_retries=10
diff --git a/conf.d/charts.d/exim.conf b/conf.d/charts.d/exim.conf
index 4a1464bbd..f96ac4dbb 100644
--- a/conf.d/charts.d/exim.conf
+++ b/conf.d/charts.d/exim.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -18,3 +18,7 @@
# the charts priority on the dashboard
#exim_priority=60000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#exim_retries=10
diff --git a/conf.d/charts.d/hddtemp.conf b/conf.d/charts.d/hddtemp.conf
index 535cb0173..b6037b40e 100644
--- a/conf.d/charts.d/hddtemp.conf
+++ b/conf.d/charts.d/hddtemp.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -18,3 +18,6 @@
# the charts priority on the dashboard
#hddtemp_priority=90000
+# the number of retries to do in case of failure
+# before disabling the module
+#hddtemp_retries=10
diff --git a/conf.d/charts.d/libreswan.conf b/conf.d/charts.d/libreswan.conf
new file mode 100644
index 000000000..9b3ee77b7
--- /dev/null
+++ b/conf.d/charts.d/libreswan.conf
@@ -0,0 +1,29 @@
+# no need for shebang - this file is loaded from charts.d.plugin
+
+# netdata
+# real-time performance and health monitoring, done right!
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
+# GPL v3+
+#
+
+# the data collection frequency
+# if unset, will inherit the netdata update frequency
+#libreswan_update_every=1
+
+# the charts priority on the dashboard
+#libreswan_priority=90000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#libreswan_retries=10
+
+# set to 1, to run ipsec with sudo (the default)
+# set to 0, to run ipsec without sudo
+#libreswan_sudo=1
+
+# TO ALLOW NETDATA RUN ipsec AS ROOT
+# CREATE THE FILE: /etc/sudoers.d/netdata
+# WITH THESE 2 LINES (uncommented of course):
+#
+# netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --status
+# netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --trafficstatus
diff --git a/conf.d/charts.d/load_average.conf b/conf.d/charts.d/load_average.conf
index abbe80cad..68979275f 100644
--- a/conf.d/charts.d/load_average.conf
+++ b/conf.d/charts.d/load_average.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -15,4 +15,8 @@
#load_average_update_every=5
# the charts priority on the dashboard
-#load_priority=100
+#load_average_priority=100
+
+# the number of retries to do in case of failure
+# before disabling the module
+#load_average_retries=10
diff --git a/conf.d/charts.d/mem_apps.conf b/conf.d/charts.d/mem_apps.conf
index aa4ac680b..75d24dc3e 100644
--- a/conf.d/charts.d/mem_apps.conf
+++ b/conf.d/charts.d/mem_apps.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -13,3 +13,7 @@
# the data collection frequency
# if unset, will inherit the netdata update frequency
#mem_apps_update_every=2
+
+# the number of retries to do in case of failure
+# before disabling the module
+#mem_apps_retries=10
diff --git a/conf.d/charts.d/mysql.conf b/conf.d/charts.d/mysql.conf
index 6a0b55a4b..683e4af35 100644
--- a/conf.d/charts.d/mysql.conf
+++ b/conf.d/charts.d/mysql.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -17,3 +17,7 @@
# the charts priority on the dashboard
#mysql_priority=60000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#mysql_retries=10
diff --git a/conf.d/charts.d/nginx.conf b/conf.d/charts.d/nginx.conf
index 8b88b0e30..c46100a58 100644
--- a/conf.d/charts.d/nginx.conf
+++ b/conf.d/charts.d/nginx.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -17,3 +17,7 @@
# the charts priority on the dashboard
#nginx_priority=60000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#nginx_retries=10
diff --git a/conf.d/charts.d/nut.conf b/conf.d/charts.d/nut.conf
index a836692d8..d477ddd34 100644
--- a/conf.d/charts.d/nut.conf
+++ b/conf.d/charts.d/nut.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016-2017 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# a space separated list of UPS names
@@ -22,3 +22,7 @@
# the charts priority on the dashboard
#nut_priority=90000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#nut_retries=10
diff --git a/conf.d/charts.d/opensips.conf b/conf.d/charts.d/opensips.conf
index abc4c70e0..e25111dce 100644
--- a/conf.d/charts.d/opensips.conf
+++ b/conf.d/charts.d/opensips.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
#opensips_opts="fifo get_statistics all"
@@ -15,3 +15,7 @@
# the charts priority on the dashboard
#opensips_priority=80000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#opensips_retries=10
diff --git a/conf.d/charts.d/phpfpm.conf b/conf.d/charts.d/phpfpm.conf
index 1e8576384..e4dd0231b 100644
--- a/conf.d/charts.d/phpfpm.conf
+++ b/conf.d/charts.d/phpfpm.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -20,3 +20,8 @@
# the charts priority on the dashboard
#phpfpm_priority=60000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#phpfpm_retries=10
+
diff --git a/conf.d/charts.d/postfix.conf b/conf.d/charts.d/postfix.conf
index 7d33d2660..b77817bd6 100644
--- a/conf.d/charts.d/postfix.conf
+++ b/conf.d/charts.d/postfix.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -18,3 +18,8 @@
# the charts priority on the dashboard
#postfix_priority=60000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#postfix_retries=10
+
diff --git a/conf.d/charts.d/sensors.conf b/conf.d/charts.d/sensors.conf
index d42d17d27..bcb28807d 100644
--- a/conf.d/charts.d/sensors.conf
+++ b/conf.d/charts.d/sensors.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -25,3 +25,8 @@
# the charts priority on the dashboard
#sensors_priority=90000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#sensors_retries=10
+
diff --git a/conf.d/charts.d/squid.conf b/conf.d/charts.d/squid.conf
index cf92c1245..19e928f25 100644
--- a/conf.d/charts.d/squid.conf
+++ b/conf.d/charts.d/squid.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -19,3 +19,8 @@
# the charts priority on the dashboard
#squid_priority=60000
+
+# the number of retries to do in case of failure
+# before disabling the module
+#squid_retries=10
+
diff --git a/conf.d/charts.d/tomcat.conf b/conf.d/charts.d/tomcat.conf
index 710669423..e9f3eefa9 100644
--- a/conf.d/charts.d/tomcat.conf
+++ b/conf.d/charts.d/tomcat.conf
@@ -2,7 +2,7 @@
# netdata
# real-time performance and health monitoring, done right!
-# (C) 2016 Costa Tsaousis <costa@tsaousis.gr>
+# (C) 2018 Costa Tsaousis <costa@tsaousis.gr>
# GPL v3+
# THIS PLUGIN IS DEPRECATED
@@ -24,6 +24,10 @@
# the charts priority on the dashboard
#tomcat_priority=60000
+# the number of retries to do in case of failure
+# before disabling the module
+#tomcat_retries=10
+
# convert tomcat floating point values
# to integer using this multiplier
# this only affects precision - the values
diff --git a/conf.d/health.d/backend.conf b/conf.d/health.d/backend.conf
index 9c193e7b9..7af100d8f 100644
--- a/conf.d/health.d/backend.conf
+++ b/conf.d/health.d/backend.conf
@@ -27,7 +27,7 @@
units: metrics
calc: abs($lost)
every: 10s
- crit: $this != 0
+ crit: ($this != 0) || ($status == $CRITICAL && abs($sent) == 0)
delay: down 5m multiplier 1.5 max 1h
info: number of metrics lost due to repeating failures to contact the backend server
to: dba
diff --git a/conf.d/health.d/btrfs.conf b/conf.d/health.d/btrfs.conf
new file mode 100644
index 000000000..b27aa544f
--- /dev/null
+++ b/conf.d/health.d/btrfs.conf
@@ -0,0 +1,57 @@
+
+template: btrfs_allocated
+ on: btrfs.disk
+ os: *
+ hosts: *
+families: *
+ calc: 100 - ($unallocated * 100 / ($unallocated + $data_used + $data_free + $meta_used + $meta_free + $sys_used + $sys_free))
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (90) : (95))
+ crit: $this > (($status == $CRITICAL) ? (95) : (98))
+ delay: up 1m down 15m multiplier 1.5 max 1h
+ info: the percentage of allocated BTRFS physical disk space
+ to: sysadmin
+
+template: btrfs_data
+ on: btrfs.data
+ os: *
+ hosts: *
+families: *
+ calc: $used * 100 / ($used + $free)
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (90) : (95)) && $btrfs_allocated > 98
+ crit: $this > (($status == $CRITICAL) ? (95) : (98)) && $btrfs_allocated > 98
+ delay: up 1m down 15m multiplier 1.5 max 1h
+ info: the percentage of used BTRFS data space
+ to: sysadmin
+
+template: btrfs_metadata
+ on: btrfs.metadata
+ os: *
+ hosts: *
+families: *
+ calc: ($used + $reserved) * 100 / ($used + $free + $reserved)
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (90) : (95)) && $btrfs_allocated > 98
+ crit: $this > (($status == $CRITICAL) ? (95) : (98)) && $btrfs_allocated > 98
+ delay: up 1m down 15m multiplier 1.5 max 1h
+ info: the percentage of used BTRFS metadata space
+ to: sysadmin
+
+template: btrfs_system
+ on: btrfs.system
+ os: *
+ hosts: *
+families: *
+ calc: $used * 100 / ($used + $free)
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (90) : (95)) && $btrfs_allocated > 98
+ crit: $this > (($status == $CRITICAL) ? (95) : (98)) && $btrfs_allocated > 98
+ delay: up 1m down 15m multiplier 1.5 max 1h
+ info: the percentage of used BTRFS system space
+ to: sysadmin
+
diff --git a/conf.d/health.d/ceph.conf b/conf.d/health.d/ceph.conf
new file mode 100644
index 000000000..de16f7b6f
--- /dev/null
+++ b/conf.d/health.d/ceph.conf
@@ -0,0 +1,13 @@
+# low ceph disk available
+
+template: cluster_space_usage
+ on: ceph.general_usage
+ calc: $avail * 100 / ($avail + $used)
+ units: %
+ every: 10s
+ warn: $this < 10
+ crit: $this < 1
+ delay: down 5m multiplier 1.2 max 1h
+ info: ceph disk usage is almost full
+ to: sysadmin
+
diff --git a/conf.d/health.d/cpu.conf b/conf.d/health.d/cpu.conf
index db6285561..fa8189856 100644
--- a/conf.d/health.d/cpu.conf
+++ b/conf.d/health.d/cpu.conf
@@ -39,3 +39,17 @@ template: 20min_steal_cpu
delay: down 1h multiplier 1.5 max 2h
info: average CPU steal time for the last 20 minutes
to: sysadmin
+
+## FreeBSD
+template: 10min_cpu_usage
+ on: system.cpu
+ os: freebsd
+ hosts: *
+ lookup: average -10m unaligned of user,system,interrupt
+ units: %
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (75) : (85))
+ crit: $this > (($status == $CRITICAL) ? (85) : (95))
+ delay: down 15m multiplier 1.5 max 1h
+ info: average cpu utilization for the last 10 minutes (excluding nice)
+ to: sysadmin
diff --git a/conf.d/health.d/disks.conf b/conf.d/health.d/disks.conf
index 63053491e..26f85848a 100644
--- a/conf.d/health.d/disks.conf
+++ b/conf.d/health.d/disks.conf
@@ -11,7 +11,7 @@
template: disk_space_usage
on: disk.space
- os: linux
+ os: linux freebsd
hosts: *
families: *
calc: $used * 100 / ($avail + $used)
@@ -25,7 +25,7 @@ families: *
template: disk_inode_usage
on: disk.inodes
- os: linux
+ os: linux freebsd
hosts: *
families: *
calc: $used * 100 / ($avail + $used)
@@ -51,7 +51,7 @@ families: *
template: disk_fill_rate
on: disk.space
- os: linux
+ os: linux freebsd
hosts: *
families: *
lookup: min -10m at -50m unaligned of avail
@@ -67,7 +67,7 @@ families: *
template: out_of_disk_space_time
on: disk.space
- os: linux
+ os: linux freebsd
hosts: *
families: *
calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf)
@@ -81,6 +81,47 @@ families: *
# -----------------------------------------------------------------------------
+# disk inode fill rate
+
+# calculate the rate the disk inodes are allocated
+# use as base, the available inodes change
+# during the last hour
+
+# this is just a calculation - it has no alarm
+# we will use it in the next template to find
+# the hours remaining
+
+template: disk_inode_rate
+ on: disk.inodes
+ os: linux freebsd
+ hosts: *
+families: *
+ lookup: min -10m at -50m unaligned of avail
+ calc: ($this - $avail) / (($now - $after) / 3600)
+ every: 1m
+ units: inodes/hour
+ info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour
+
+# calculate the hours remaining
+# if the disk inodes are allocated
+# in this rate
+
+template: out_of_disk_inodes_time
+ on: disk.inodes
+ os: linux freebsd
+ hosts: *
+families: *
+ calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf)
+ units: hours
+ every: 10s
+ warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8))
+ crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2))
+ delay: down 15m multiplier 1.2 max 1h
+ info: estimated time the disk will run out of inodes, if the system continues to allocate inodes with the rate of the last hour
+ to: sysadmin
+
+
+# -----------------------------------------------------------------------------
# disk congestion
# raise an alarm if the disk is congested
@@ -89,7 +130,7 @@ families: *
template: 10min_disk_utilization
on: disk.util
- os: linux
+ os: linux freebsd
hosts: *
families: *
lookup: average -10m unaligned
diff --git a/conf.d/health.d/fronius.conf b/conf.d/health.d/fronius.conf
new file mode 100644
index 000000000..cdf6c8fcb
--- /dev/null
+++ b/conf.d/health.d/fronius.conf
@@ -0,0 +1,11 @@
+template: fronius_last_collected_secs
+families: *
+ on: fronius.power
+ calc: $now - $last_collected_t
+ every: 10s
+ units: seconds ago
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sitemgr
diff --git a/conf.d/health.d/httpcheck.conf b/conf.d/health.d/httpcheck.conf
new file mode 100644
index 000000000..0ddf35eab
--- /dev/null
+++ b/conf.d/health.d/httpcheck.conf
@@ -0,0 +1,99 @@
+template: httpcheck_last_collected_secs
+families: *
+ on: httpcheck.status
+ calc: $now - $last_collected_t
+ every: 10s
+ units: seconds ago
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sysadmin
+
+# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
+template: web_service_up
+families: *
+ on: httpcheck.status
+ lookup: average -1m unaligned percentage of success
+ calc: ($this < 75) ? (0) : ($this)
+ every: 5s
+ units: up/down
+ info: at least 75% verified responses during last 60 seconds, ideal for badges
+ to: silent
+
+template: web_service_bad_content
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of bad_content
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of unexpected http response content during the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+template: web_service_bad_status
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of bad_status
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of unexpected http status during the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+template: web_service_timeouts
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of timeout
+ every: 10s
+ units: %
+ info: average of timeouts during the last 5 minutes
+
+template: no_web_service_connections
+families: *
+ on: httpcheck.status
+ lookup: average -5m unaligned percentage of no_connection
+ every: 10s
+ units: %
+ info: average of failed requests during the last 5 minutes
+
+# combined timeout & no connection alarm
+template: web_service_unreachable
+families: *
+ on: httpcheck.status
+ calc: ($no_web_service_connections >= $web_service_timeouts) ? ($no_web_service_connections) : ($web_service_timeouts)
+ units: %
+ every: 10s
+ warn: ($no_web_service_connections >= 10 OR $web_service_timeouts >= 10) AND ($no_web_service_connections < 40 OR $web_service_timeouts < 40)
+ crit: $no_web_service_connections >= 40 OR $web_service_timeouts >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of failed requests either due to timeouts or no connection during the last 5 minutes
+ options: no-clear-notification
+ to: webmaster
+
+template: 1h_web_service_response_time
+families: *
+ on: httpcheck.responsetime
+ lookup: average -1h unaligned of time
+ every: 30s
+ units: ms
+ info: average response time over the last hour
+
+template: web_service_slow
+families: *
+ on: httpcheck.responsetime
+ lookup: average -3m unaligned of time
+ units: ms
+ every: 10s
+ warn: ($this > ($1h_web_service_response_time * 2) )
+ crit: ($this > ($1h_web_service_response_time * 3) )
+ info: average response time over the last 3 minutes, compared to the average over the last hour
+ delay: down 5m multiplier 1.5 max 1h
+ options: no-clear-notification
+ to: webmaster
diff --git a/conf.d/health.d/isc_dhcpd.conf b/conf.d/health.d/isc_dhcpd.conf
index 4345619aa..8054656ff 100644
--- a/conf.d/health.d/isc_dhcpd.conf
+++ b/conf.d/health.d/isc_dhcpd.conf
@@ -1,10 +1,10 @@
- alarm: isc_dhcpd_parse_time
- on: isc_dhcpd.parse_time
- units: ms
+ template: isc_dhcpd_leases_size
+ on: isc_dhcpd.leases_total
+ units: KB
every: 60
- calc: $ptime
- warn: $this > 100
- crit: $this > 250
+ calc: $leases_size
+ warn: $this > 3072
+ crit: $this > 6144
delay: up 2m down 5m
- info: Parsing too slow! It can slow down your server. Check dhcpd.leases file size.
+ info: dhcpd.leases file too big! Module can slow down your server.
to: sysadmin
diff --git a/conf.d/health.d/net.conf b/conf.d/health.d/net.conf
index 00a198612..22a88927d 100644
--- a/conf.d/health.d/net.conf
+++ b/conf.d/health.d/net.conf
@@ -98,7 +98,7 @@ families: *
template: 1m_received_packets_rate
on: net.packets
- os: linux
+ os: linux freebsd
hosts: *
families: *
lookup: average -1m of received
@@ -108,7 +108,7 @@ families: *
template: 10s_received_packets_storm
on: net.packets
- os: linux
+ os: linux freebsd
hosts: *
families: *
lookup: average -10s of received
@@ -120,4 +120,3 @@ families: *
options: no-clear-notification
info: the % of the rate of received packets in the last 10 seconds, compared to the rate of the last minute (clear notification for this alarm will not be sent)
to: sysadmin
-
diff --git a/conf.d/health.d/nginx_plus.conf b/conf.d/health.d/nginx_plus.conf
new file mode 100644
index 000000000..5a171a76d
--- /dev/null
+++ b/conf.d/health.d/nginx_plus.conf
@@ -0,0 +1,14 @@
+
+# make sure nginx_plus is running
+
+template: nginx_plus_last_collected_secs
+ on: nginx_plus.requests_total
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: webmaster
+
diff --git a/conf.d/health.d/portcheck.conf b/conf.d/health.d/portcheck.conf
new file mode 100644
index 000000000..f42b63d30
--- /dev/null
+++ b/conf.d/health.d/portcheck.conf
@@ -0,0 +1,48 @@
+template: portcheck_last_collected_secs
+families: *
+ on: portcheck.status
+ calc: $now - $last_collected_t
+ every: 10s
+ units: seconds ago
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sysadmin
+
+# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges
+template: service_reachable
+families: *
+ on: portcheck.status
+ lookup: average -1m unaligned percentage of success
+ calc: ($this < 75) ? (0) : ($this)
+ every: 5s
+ units: up/down
+ info: at least 75% successful connections during last 60 seconds, ideal for badges
+ to: silent
+
+template: connection_timeouts
+families: *
+ on: portcheck.status
+ lookup: average -5m unaligned percentage of timeout
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of timeouts during the last 5 minutes
+ options: no-clear-notification
+ to: sysadmin
+
+template: connection_fails
+families: *
+ on: portcheck.status
+ lookup: average -5m unaligned percentage of no_connection
+ every: 10s
+ units: %
+ warn: $this >= 10 AND $this < 40
+ crit: $this >= 40
+ delay: down 5m multiplier 1.5 max 1h
+ info: average of failed connections during the last 5 minutes
+ options: no-clear-notification
+ to: sysadmin
diff --git a/conf.d/health.d/ram.conf b/conf.d/health.d/ram.conf
index 8d0e8838d..b6dc5f945 100644
--- a/conf.d/health.d/ram.conf
+++ b/conf.d/health.d/ram.conf
@@ -20,5 +20,45 @@
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
- info: system RAM usage
+ info: system RAM used
to: sysadmin
+
+ alarm: ram_available
+ on: mem.available
+ os: linux
+ hosts: *
+ calc: ($avail + $used_ram_to_ignore) * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers)
+ units: %
+ every: 10s
+ warn: $this < (($status >= $WARNING) ? ( 5) : (10))
+ crit: $this < (($status == $CRITICAL) ? (10) : ( 5))
+ delay: down 15m multiplier 1.5 max 1h
+ info: estimated amount of RAM available for userspace processes, without causing swapping
+ to: sysadmin
+
+## FreeBSD
+alarm: ram_in_use
+ on: system.ram
+ os: freebsd
+hosts: *
+ calc: (($active + $wired) - $used_ram_to_ignore) * 100 / (($active + $wired) - $used_ram_to_ignore + $cached + $free)
+units: %
+every: 10s
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
+delay: down 15m multiplier 1.5 max 1h
+ info: system RAM usage
+ to: sysadmin
+
+ alarm: ram_available
+ on: system.ram
+ os: freebsd
+ hosts: *
+ calc: ($free + $inactive + $used_ram_to_ignore) * 100 / ($free + $active + $inactive + $wired + $cache + $buffers)
+ units: %
+ every: 10s
+ warn: $this < (($status >= $WARNING) ? ( 5) : (10))
+ crit: $this < (($status == $CRITICAL) ? (10) : ( 5))
+ delay: down 15m multiplier 1.5 max 1h
+ info: estimated amount of RAM available for userspace processes, without causing swapping
+ to: sysadmin
diff --git a/conf.d/health.d/softnet.conf b/conf.d/health.d/softnet.conf
index 64e1c6784..77c804bfd 100644
--- a/conf.d/health.d/softnet.conf
+++ b/conf.d/health.d/softnet.conf
@@ -24,5 +24,17 @@
every: 1m
warn: $this > (($status >= $WARNING) ? (0) : (10))
delay: down 1h multiplier 1.5 max 2h
- info: number of times, during the last 10min, ksoftirq ran out of sysctl net.core.netdev_budget or time slice, with work remaining (this can be a cause for dropped packets)
+ info: number of times, during the last 10min, ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs, with work remaining (this can be a cause for dropped packets)
to: silent
+
+ alarm: 10min_netisr_backlog_exceeded
+ on: system.softnet_stat
+ os: freebsd
+ hosts: *
+ lookup: sum -10m unaligned absolute of qdrops
+ units: packets
+ every: 1m
+ warn: $this > 0
+ delay: down 1h multiplier 1.5 max 2h
+ info: number of drops in the last 10min, because sysctl net.route.netisr_maxqlen was exceeded (this can be a cause for dropped packets)
+ to: sysadmin
diff --git a/conf.d/health.d/stiebeleltron.conf b/conf.d/health.d/stiebeleltron.conf
new file mode 100644
index 000000000..e0361eb20
--- /dev/null
+++ b/conf.d/health.d/stiebeleltron.conf
@@ -0,0 +1,11 @@
+template: stiebeleltron_last_collected_secs
+families: *
+ on: stiebeleltron.heating.hc1
+ calc: $now - $last_collected_t
+ every: 10s
+ units: seconds ago
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful data collection
+ to: sitemgr
diff --git a/conf.d/health.d/swap.conf b/conf.d/health.d/swap.conf
index 830a9af95..f920b0807 100644
--- a/conf.d/health.d/swap.conf
+++ b/conf.d/health.d/swap.conf
@@ -3,7 +3,7 @@
alarm: 30min_ram_swapped_out
on: system.swapio
- os: linux
+ os: linux freebsd
hosts: *
lookup: sum -30m unaligned absolute of out
# we have to convert KB to MB by dividing $this (i.e. the result of the lookup) with 1024
@@ -25,19 +25,19 @@
every: 10s
warn: $this > (($status >= $WARNING) ? (15) : (20))
crit: $this > (($status == $CRITICAL) ? (40) : (50))
- delay: up 0 down 15m multiplier 1.5 max 1h
+ delay: up 30s down 15m multiplier 1.5 max 1h
info: the swap memory used, as a percentage of the system RAM
to: sysadmin
alarm: used_swap
on: system.swap
- os: linux
+ os: linux freebsd
hosts: *
calc: $used * 100 / ( $used + $free )
units: %
every: 10s
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
- delay: up 0 down 15m multiplier 1.5 max 1h
+ delay: up 30s down 15m multiplier 1.5 max 1h
info: the percentage of swap memory used
to: sysadmin
diff --git a/conf.d/health.d/tcp_resets.conf b/conf.d/health.d/tcp_resets.conf
index e6cfd39ab..91dad3c6a 100644
--- a/conf.d/health.d/tcp_resets.conf
+++ b/conf.d/health.d/tcp_resets.conf
@@ -5,7 +5,7 @@
alarm: ipv4_tcphandshake_last_collected_secs
on: ipv4.tcphandshake
- os: linux
+ os: linux freebsd
hosts: *
calc: $now - $last_collected_t
units: seconds ago
@@ -46,7 +46,7 @@
alarm: 1m_ipv4_tcp_resets_received
on: ipv4.tcphandshake
- os: linux
+ os: linux freebsd
hosts: *
lookup: average -1m at -10s unaligned absolute of AttemptFails
units: tcp resets/s
@@ -55,7 +55,7 @@
alarm: 10s_ipv4_tcp_resets_received
on: ipv4.tcphandshake
- os: linux
+ os: linux freebsd
hosts: *
lookup: average -10s unaligned absolute of AttemptFails
units: tcp resets/s
diff --git a/conf.d/health.d/udp_errors.conf b/conf.d/health.d/udp_errors.conf
index 33338b83e..382b39658 100644
--- a/conf.d/health.d/udp_errors.conf
+++ b/conf.d/health.d/udp_errors.conf
@@ -5,7 +5,7 @@
alarm: ipv4_udperrors_last_collected_secs
on: ipv4.udperrors
- os: linux
+ os: linux freebsd
hosts: *
calc: $now - $last_collected_t
units: seconds ago
@@ -21,7 +21,7 @@
alarm: 1m_ipv4_udp_receive_buffer_errors
on: ipv4.udperrors
- os: linux
+ os: linux freebsd
hosts: *
lookup: sum -1m unaligned absolute of RcvbufErrors
units: errors
diff --git a/conf.d/health.d/web_log.conf b/conf.d/health.d/web_log.conf
index d18088172..d8be88b47 100644
--- a/conf.d/health.d/web_log.conf
+++ b/conf.d/health.d/web_log.conf
@@ -116,6 +116,7 @@ families: *
crit: ($1m_requests > 120) ? ($this > $red && $this > ($10m_response_time * 4) ) : ( 0 )
delay: down 15m multiplier 1.5 max 1h
info: the average time to respond to HTTP requests, over the last 1 minute
+ options: no-clear-notification
to: webmaster
# -----------------------------------------------------------------------------
diff --git a/conf.d/health_alarm_notify.conf b/conf.d/health_alarm_notify.conf
index eb01e2bb9..0a95931ec 100644..100755
--- a/conf.d/health_alarm_notify.conf
+++ b/conf.d/health_alarm_notify.conf
@@ -7,12 +7,14 @@
# - e-mails (using the sendmail command),
# - push notifications to your mobile phone (pushover.net),
# - messages to your slack team (slack.com),
+# - messages to your alerta server (alerta.io),
# - messages to your flock team (flock.com),
# - messages to your discord guild (discordapp.com),
# - messages to your telegram chat / group chat (telegram.org)
# - sms messages to your cell phone or any sms enabled device (twilio.com)
# - sms messages to your cell phone or any sms enabled device (messagebird.com)
# - notifications to users on pagerduty.com
+# - messages to your irc channel on your selected network
#
# The 'to' line given at netdata alarms defines a *role*, so that many
# people can be notified for each role.
@@ -23,7 +25,7 @@
#------------------------------------------------------------------------------
# proxy configuration
#
-# If you need to send curl based notifications (pushover, pushbullet, slack,
+# If you need to send curl based notifications (pushover, pushbullet, slack, alerta,
# flock, discord, telegram) via a proxy, set these to your proxy address:
#export http_proxy="http://10.0.0.1:3128/"
#export https_proxy="http://10.0.0.1:3128/"
@@ -54,6 +56,23 @@ sendmail=""
# If not found, most notifications will be silently disabled.
curl=""
+# The full path of the nc command.
+# If empty, the system $PATH will be searched for it.
+# If not found, irc notifications will be silently disabled.
+nc=""
+
+#------------------------------------------------------------------------------
+# extra options for external commands
+#
+# In some cases, you may need to change what options get passed to an
+# external command. Such cases are covered here.
+
+# Extra options to pass to curl. In most cases, you shouldn't need to add anything
+# to this. If you're having issues with HTTPS connections, you might try adding
+# '--insecure' here, but be warned that it will make it much easier for
+# third-parties to block notification delivery, and may allow disclosure
+# of potentially sensitive information.
+#curl_options="--insecure"
#------------------------------------------------------------------------------
# NOTE ABOUT RECIPIENTS
@@ -64,11 +83,13 @@ curl=""
# - pushover user tokens
# - telegram chat ids
# - slack channels
+# - alerta environment
# - flock rooms
# - discord channels
# - hipchat rooms
# - sms phone numbers
# - pagerduty.com (pd) services
+# - irc channels
#
# You can append |critical to limit the notifications to be sent.
#
@@ -79,15 +100,17 @@ curl=""
# pushover : "2987343...9437837 8756278...2362736|critical"
# telegram : "111827421 112746832|critical"
# slack : "alarms disasters|critical"
+# alerta : "alarms disasters|critical"
# flock : "alarms disasters|critical"
# discord : "alarms disasters|critical"
# twilio : "+15555555555 +17777777777|critical"
# messagebird: "+15555555555 +17777777777|critical"
# kavenegar : "09155555555 09177777777|critical"
# pd : "<pd_service_key_1> <pd_service_key_2>|critical"
+# irc : "<irc_channel_1> <irc_channel_2>|critical"
#
# If a recipient is set to empty string, the default recipient of the given
-# notification method (email, pushover, telegram, slack, etc) will be used.
+# notification method (email, pushover, telegram, slack, alerta, etc) will be used.
# To disable a notification, use the recipient called: disabled
# This works for all notification methods (including the default recipients).
@@ -276,6 +299,32 @@ DEFAULT_RECIPIENT_SLACK=""
#------------------------------------------------------------------------------
+# alerta (alerta.io) global notification options
+
+# multiple recipients (Environments) can be given like this:
+# "Production Development ..."
+
+# enable/disable sending alerta notifications
+SEND_ALERTA="YES"
+
+# here set your alerta server API url
+# this is the API url you defined when installed Alerta server,
+# it is the same for all users. Do not include last slash.
+# ALERTA_WEBHOOK_URL="https://<server>/alerta/api"
+ALERTA_WEBHOOK_URL=""
+
+# Login with an administrative user to you Alerta server and create an API KEY
+# with write permissions.
+ALERTA_API_KEY=""
+
+# you can define environments in /etc/alertad.conf option ALLOWED_ENVIRONMENTS
+# standard environments are Production and Development
+# if a role's recipients are not configured, a notification will be send to
+# this Environment (empty = do not send a notification for unconfigured roles):
+DEFAULT_RECIPIENT_ALERTA=""
+
+
+#------------------------------------------------------------------------------
# flock (flock.com) global notification options
# enable/disable sending flock notifications
@@ -364,6 +413,34 @@ DEFAULT_RECIPIENT_PD=""
#------------------------------------------------------------------------------
+# irc notification options
+#
+# irc notifications require only the nc utility to be installed.
+
+# multiple recipients can be given like this:
+# "<irc_channel_1> <irc_channel_2> ..."
+
+# enable/disable sending irc notifications
+SEND_IRC="YES"
+
+# if a role's recipients are not configured, a notification will not be sent.
+# (empty = do not send a notification for unconfigured roles):
+DEFAULT_RECIPIENT_IRC=""
+
+# The irc network to which the recipients belong. It must be the full network.
+# e.g. "irc.freenode.net"
+IRC_NETWORK=""
+
+# The irc nickname which is required to send the notification. It must not be
+# an already registered name as the connection's MODE is defined as a 'guest'.
+IRC_NICKNAME=""
+
+# The irc realname which is required in order to make the connection and is an
+# extra identifier.
+IRC_REALNAME=""
+
+
+#------------------------------------------------------------------------------
# custom notifications
#
@@ -442,6 +519,8 @@ role_recipients_telegram[sysadmin]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[sysadmin]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_alerta[sysadmin]="${DEFAULT_RECIPIENT_ALERTA}"
+
role_recipients_flock[sysadmin]="${DEFAULT_RECIPIENT_FLOCK}"
role_recipients_discord[sysadmin]="${DEFAULT_RECIPIENT_DISCORD}"
@@ -456,6 +535,8 @@ role_recipients_kavenegar[sysadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}"
role_recipients_pd[sysadmin]="${DEFAULT_RECIPIENT_PD}"
+role_recipients_irc[sysadmin]="${DEFAULT_RECIPIENT_IRC}"
+
role_recipients_custom[sysadmin]="${DEFAULT_RECIPIENT_CUSTOM}"
# -----------------------------------------------------------------------------
@@ -471,6 +552,8 @@ role_recipients_telegram[domainadmin]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[domainadmin]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_alerta[domainadmin]="${DEFAULT_RECIPIENT_ALERTA}"
+
role_recipients_flock[domainadmin]="${DEFAULT_RECIPIENT_FLOCK}"
role_recipients_discord[domainadmin]="${DEFAULT_RECIPIENT_DISCORD}"
@@ -485,6 +568,8 @@ role_recipients_kavenegar[domainadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}"
role_recipients_pd[domainadmin]="${DEFAULT_RECIPIENT_PD}"
+role_recipients_irc[domainadmin]="${DEFAULT_RECIPIENT_IRC}"
+
role_recipients_custom[domainadmin]="${DEFAULT_RECIPIENT_CUSTOM}"
# -----------------------------------------------------------------------------
@@ -501,6 +586,8 @@ role_recipients_telegram[dba]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[dba]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_alerta[dba]="${DEFAULT_RECIPIENT_ALERTA}"
+
role_recipients_flock[dba]="${DEFAULT_RECIPIENT_FLOCK}"
role_recipients_discord[dba]="${DEFAULT_RECIPIENT_DISCORD}"
@@ -515,6 +602,8 @@ role_recipients_kavenegar[dba]="${DEFAULT_RECIPIENT_KAVENEGAR}"
role_recipients_pd[dba]="${DEFAULT_RECIPIENT_PD}"
+role_recipients_irc[dba]="${DEFAULT_RECIPIENT_IRC}"
+
role_recipients_custom[dba]="${DEFAULT_RECIPIENT_CUSTOM}"
# -----------------------------------------------------------------------------
@@ -531,6 +620,8 @@ role_recipients_telegram[webmaster]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[webmaster]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_alerta[webmaster]="${DEFAULT_RECIPIENT_ALERTA}"
+
role_recipients_flock[webmaster]="${DEFAULT_RECIPIENT_FLOCK}"
role_recipients_discord[webmaster]="${DEFAULT_RECIPIENT_DISCORD}"
@@ -545,6 +636,8 @@ role_recipients_kavenegar[webmaster]="${DEFAULT_RECIPIENT_KAVENEGAR}"
role_recipients_pd[webmaster]="${DEFAULT_RECIPIENT_PD}"
+role_recipients_irc[webmaster]="${DEFAULT_RECIPIENT_IRC}"
+
role_recipients_custom[webmaster]="${DEFAULT_RECIPIENT_CUSTOM}"
# -----------------------------------------------------------------------------
@@ -561,6 +654,8 @@ role_recipients_telegram[proxyadmin]="${DEFAULT_RECIPIENT_TELEGRAM}"
role_recipients_slack[proxyadmin]="${DEFAULT_RECIPIENT_SLACK}"
+role_recipients_alerta[proxyadmin]="${DEFAULT_RECIPIENT_ALERTA}"
+
role_recipients_flock[proxyadmin]="${DEFAULT_RECIPIENT_FLOCK}"
role_recipients_discord[proxyadmin]="${DEFAULT_RECIPIENT_DISCORD}"
@@ -575,4 +670,39 @@ role_recipients_kavenegar[proxyadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}"
role_recipients_pd[proxyadmin]="${DEFAULT_RECIPIENT_PD}"
+role_recipients_irc[proxyadmin]="${DEFAULT_RECIPIENT_IRC}"
+
role_recipients_custom[proxyadmin]="${DEFAULT_RECIPIENT_CUSTOM}"
+
+# -----------------------------------------------------------------------------
+# peripheral devices
+# UPS, photovoltaics, etc
+
+role_recipients_email[sitemgr]="${DEFAULT_RECIPIENT_EMAIL}"
+
+role_recipients_pushover[sitemgr]="${DEFAULT_RECIPIENT_PUSHOVER}"
+
+role_recipients_pushbullet[sitemgr]="${DEFAULT_RECIPIENT_PUSHBULLET}"
+
+role_recipients_telegram[sitemgr]="${DEFAULT_RECIPIENT_TELEGRAM}"
+
+role_recipients_slack[sitemgr]="${DEFAULT_RECIPIENT_SLACK}"
+
+role_recipients_alerta[sitemgr]="${DEFAULT_RECIPIENT_ALERTA}"
+
+role_recipients_flock[sitemgr]="${DEFAULT_RECIPIENT_FLOCK}"
+
+role_recipients_discord[sitemgr]="${DEFAULT_RECIPIENT_DISCORD}"
+
+role_recipients_hipchat[sitemgr]="${DEFAULT_RECIPIENT_HIPCHAT}"
+
+role_recipients_twilio[sitemgr]="${DEFAULT_RECIPIENT_TWILIO}"
+
+role_recipients_messagebird[sitemgr]="${DEFAULT_RECIPIENT_MESSAGEBIRD}"
+
+role_recipients_kavenegar[sitemgr]="${DEFAULT_RECIPIENT_KAVENEGAR}"
+
+role_recipients_pd[sitemgr]="${DEFAULT_RECIPIENT_PD}"
+
+role_recipients_custom[sitemgr]="${DEFAULT_RECIPIENT_CUSTOM}"
+
diff --git a/conf.d/python.d.conf b/conf.d/python.d.conf
index 2c3d400ca..bb57738bb 100644
--- a/conf.d/python.d.conf
+++ b/conf.d/python.d.conf
@@ -15,7 +15,7 @@ enabled: yes
#
# If "default_run" = "yes" the default for all modules is enabled (yes).
# Setting any of these to "no" will disable it.
-#
+#
# If "default_run" = "no" the default for all modules is disabled (no).
# Setting any of these to "yes" will enable it.
@@ -24,6 +24,7 @@ apache_cache: no
# apache: yes
# beanstalk: yes
# bind_rndc: yes
+# ceph: yes
chrony: no
# couchdb: yes
# cpufreq: yes
@@ -45,6 +46,7 @@ gunicorn_log: no
go_expvar: no
# haproxy: yes
# hddtemp: yes
+# icecast: yes
# ipfs: yes
# isc_dhcpd: yes
# mdstat: yes
@@ -52,11 +54,13 @@ go_expvar: no
# mongodb: yes
# mysql: yes
# nginx: yes
+# nginx_plus: yes
# nsd: yes
+# ntpd: yes
# nginx_log has been replaced by web_log
nginx_log: no
-
+# ntpd: yes
# ovpn_status_log: yes
# phpfpm: yes
# postfix: yes
@@ -69,6 +73,7 @@ nginx_log: no
# samba: yes
# smartd_log: yes
# squid: yes
+# springboot: yes
# tomcat: yes
# varnish: yes
# web_log: yes
diff --git a/conf.d/python.d/ceph.conf b/conf.d/python.d/ceph.conf
new file mode 100644
index 000000000..78ac1e251
--- /dev/null
+++ b/conf.d/python.d/ceph.conf
@@ -0,0 +1,75 @@
+# netdata python.d.plugin configuration for ceph stats
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 10
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 10 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 60 # the JOB's number of restoration attempts
+# autodetection_retry: 0 # the JOB's re-check interval in seconds
+#
+# Additionally to the above, ceph plugin also supports the following:
+#
+# config_file: 'config_file' # Ceph config file.
+# keyring_file: 'keyring_file' # Ceph keyring file. netdata user must be added into ceph group
+# # and keyring file must be read group permission.
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+#
+config_file: '/etc/ceph/ceph.conf'
+keyring_file: '/etc/ceph/ceph.client.admin.keyring'
+
diff --git a/conf.d/python.d/httpcheck.conf b/conf.d/python.d/httpcheck.conf
new file mode 100644
index 000000000..058e057a6
--- /dev/null
+++ b/conf.d/python.d/httpcheck.conf
@@ -0,0 +1,99 @@
+# netdata python.d.plugin configuration for httpcheck
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the httpcheck default is used, which is at 3 seconds.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# chart_cleanup sets the default chart cleanup interval in iterations.
+# A chart is marked as obsolete if it has not been updated
+# 'chart_cleanup' iterations in a row.
+# They will be hidden immediately (not offered to dashboard viewer,
+# streamed upstream and archived to backends) and deleted one hour
+# later (configurable from netdata.conf).
+# -- For this plugin, cleanup MUST be disabled, otherwise we lose response
+# time charts
+chart_cleanup: 0
+
+# Autodetection and retries do not work for this plugin
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# -------------------------------
+# ATTENTION: Any valid configuration will be accepted, even if initial connection fails!
+# -------------------------------
+#
+# There is intentionally no default config, e.g. for 'localhost'
+
+# job_name:
+# name: myname # [optional] the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 3 # [optional] the JOB's data collection frequency
+# priority: 60000 # [optional] the JOB's order on the dashboard
+# retries: 60 # [optional] the JOB's number of restoration attempts
+# timeout: 1 # [optional] the timeout when connecting, supports decimals (e.g. 0.5s)
+# url: 'http[s]://host-ip-or-dns[:port][path]'
+# # [required] the remote host url to connect to. If [:port] is missing, it defaults to 80
+# # for HTTP and 443 for HTTPS. [path] is optional too, defaults to /
+# redirect: yes # [optional] If the remote host returns 3xx status codes, the redirection url will be
+# # followed (default).
+# status_accepted: # [optional] By default, 200 is accepted. Anything else will result in 'bad status' in the
+# # status chart, however: The response time will still be > 0, since the
+# # host responded with something.
+# # If redirect is enabled, the accepted status will be checked against the redirected page.
+# - 200 # Multiple status codes are possible. If you specify 'status_accepted', you would still
+# # need to add '200'. E.g. 'status_accepted: [301]' will trigger an error in 'bad status'
+# # if code is 200. Do specify numerical entries such as 200, not 'OK'.
+# regex: None # [optional] If the status code is accepted, the content of the response will be searched for this
+# # regex (if defined). Be aware that you may need to escape the regex string. If redirect is enabled,
+# # the regex will be matched to the redirected page, not the initial 3xx response.
+
+# Simple example:
+#
+# jira:
+# url: 'https://jira.localdomain/'
+
+
+# Complex example:
+#
+# cool_website:
+# url: 'http://cool.website:8080/home'
+# status_accepted:
+# - 200
+# - 204
+# regex: <title>My cool website!<\/title>
+# timeout: 2
+
+# This plugin is intended for simple cases. Currently, the accuracy of the response time is low and should be used as reference only.
+
diff --git a/conf.d/python.d/icecast.conf b/conf.d/python.d/icecast.conf
new file mode 100644
index 000000000..a900d06d3
--- /dev/null
+++ b/conf.d/python.d/icecast.conf
@@ -0,0 +1,83 @@
+# netdata python.d.plugin configuration for icecast
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 60 # the JOB's number of restoration attempts
+# autodetection_retry: 0 # the JOB's re-check interval in seconds
+#
+# Additionally to the above, icecast also supports the following:
+#
+# url: 'URL' # the URL to fetch icecast's stats
+#
+# if the URL is password protected, the following are supported:
+#
+# user: 'username'
+# pass: 'password'
+
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+
+localhost:
+ name : 'local'
+ url : 'http://localhost:8443/status-json.xsl'
+
+localipv4:
+ name : 'local'
+ url : 'http://127.0.0.1:8443/status-json.xsl' \ No newline at end of file
diff --git a/conf.d/python.d/mysql.conf b/conf.d/python.d/mysql.conf
index def9f7e96..b5956a2c6 100644
--- a/conf.d/python.d/mysql.conf
+++ b/conf.d/python.d/mysql.conf
@@ -85,12 +85,19 @@
# to connect to the mysql server on localhost, without a password:
#
# > create user 'netdata'@'localhost';
-# > grant usage on *.* to 'netdata'@'localhost' with grant option;
+# > grant usage on *.* to 'netdata'@'localhost';
# > flush privileges;
#
# with the above statements, netdata will be able to gather mysql
# statistics, without the ability to see or alter any data or affect
# mysql operation in any way. No change is required below.
+#
+# If you need to monitor mysql replication too, use this instead:
+#
+# > create user 'netdata'@'localhost';
+# > grant replication client on *.* to 'netdata'@'localhost';
+# > flush privileges;
+#
# ----------------------------------------------------------------------
# AUTO-DETECTION JOBS
diff --git a/conf.d/python.d/nginx_plus.conf b/conf.d/python.d/nginx_plus.conf
new file mode 100644
index 000000000..7b5c8f43f
--- /dev/null
+++ b/conf.d/python.d/nginx_plus.conf
@@ -0,0 +1,87 @@
+# netdata python.d.plugin configuration for nginx_plus
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 60 # the JOB's number of restoration attempts
+# autodetection_retry: 0 # the JOB's re-check interval in seconds
+#
+# Additionally to the above, nginx_plus also supports the following:
+#
+# url: 'URL' # the URL to fetch nginx_plus's stats
+#
+# if the URL is password protected, the following are supported:
+#
+# user: 'username'
+# pass: 'password'
+
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+
+localhost:
+ name : 'local'
+ url : 'http://localhost/status'
+
+localipv4:
+ name : 'local'
+ url : 'http://127.0.0.1/status'
+
+localipv6:
+ name : 'local'
+ url : 'http://[::1]/status'
diff --git a/conf.d/python.d/ntpd.conf b/conf.d/python.d/ntpd.conf
new file mode 100644
index 000000000..7adc4074b
--- /dev/null
+++ b/conf.d/python.d/ntpd.conf
@@ -0,0 +1,91 @@
+# netdata python.d.plugin configuration for ntpd
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 60 # the JOB's number of restoration attempts
+#
+# Additionally to the above, ntp also supports the following:
+#
+# host: 'localhost' # the host to query
+# port: '123' # the UDP port where `ntpd` listens
+# show_peers: no # use `yes` to show peer charts. enabling this
+# # option is recommended only for debugging, as
+# # it could possibly imply memory leaks if the
+# # peers change frequently.
+# peer_filter: '127\..*' # regex to exclude peers
+# # by default local peers are hidden
+# # use `''` to show all peers.
+# peer_rescan: 60 # interval (>0) to check for new/changed peers
+# # use `1` to check on every update
+#
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+
+localhost:
+ name: 'local'
+ host: 'localhost'
+ port: '123'
+ show_peers: no
+
+localhost_ipv4:
+ name: 'local'
+ host: '127.0.0.1'
+ port: '123'
+ show_peers: no
+
+localhost_ipv6:
+ name: 'local'
+ host: '::1'
+ port: '123'
+ show_peers: no
diff --git a/conf.d/python.d/portcheck.conf b/conf.d/python.d/portcheck.conf
new file mode 100644
index 000000000..b3dd8bd3f
--- /dev/null
+++ b/conf.d/python.d/portcheck.conf
@@ -0,0 +1,70 @@
+# netdata python.d.plugin configuration for portcheck
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# chart_cleanup sets the default chart cleanup interval in iterations.
+# A chart is marked as obsolete if it has not been updated
+# 'chart_cleanup' iterations in a row.
+# They will be hidden immediately (not offered to dashboard viewer,
+# streamed upstream and archived to backends) and deleted one hour
+# later (configurable from netdata.conf).
+# -- For this plugin, cleanup MUST be disabled, otherwise we lose latency chart
+chart_cleanup: 0
+
+# Autodetection and retries do not work for this plugin
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# -------------------------------
+# ATTENTION: Any valid configuration will be accepted, even if initial connection fails!
+# -------------------------------
+#
+# There is intentionally no default config for 'localhost'
+
+# job_name:
+# name: myname # [optional] the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # [optional] the JOB's data collection frequency
+# priority: 60000 # [optional] the JOB's order on the dashboard
+# retries: 60 # [optional] the JOB's number of restoration attempts
+# timeout: 1 # [optional] the socket timeout when connecting
+# host: 'dns or ip' # [required] the remote host address in either IPv4, IPv6 or as DNS name.
+# port: 22 # [required] the port number to check. Specify an integer, not service name.
+
+# You just have been warned about possible portscan blocking. The portcheck plugin is meant for simple use cases.
+# Currently, the accuracy of the latency is low and should be used as reference only.
+
diff --git a/conf.d/python.d/postgres.conf b/conf.d/python.d/postgres.conf
index 3a70a7184..b69ca3717 100644
--- a/conf.d/python.d/postgres.conf
+++ b/conf.d/python.d/postgres.conf
@@ -82,9 +82,18 @@
# a postgres user for netdata and add its password below to allow
# netdata connect.
#
-# Without superuser access, netdata won't be able to generate the write
-# ahead log and the background writer charts.
-#
+# Postgres supported versions are :
+# - 9.3 (without autovacuum)
+# - 9.4
+# - 9.5
+# - 9.6
+# - 10
+#
+# Superuser access is needed for theses charts:
+# Write-Ahead Logs
+# Archive Write-Ahead Logs
+#
+# Autovacuum charts is allowed since Postgres 9.4
# ----------------------------------------------------------------------
socket:
diff --git a/conf.d/python.d/springboot.conf b/conf.d/python.d/springboot.conf
new file mode 100644
index 000000000..40b5fb437
--- /dev/null
+++ b/conf.d/python.d/springboot.conf
@@ -0,0 +1,120 @@
+# netdata python.d.plugin configuration for springboot
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 60 # the JOB's number of restoration attempts
+# autodetection_retry: 0 # the JOB's re-check interval in seconds
+#
+# Additionally to the above, this plugin also supports the following:
+#
+# url: 'http://127.0.0.1/metrics' # the URL of the spring boot actuator metrics
+#
+# if the URL is password protected, the following are supported:
+#
+# user: 'username'
+# pass: 'password'
+#
+# defaults:
+# [chart_id]: true | false # enables/disables default charts, defaults true.
+# extras: {} # defines extra charts to monitor, please see the example below
+# - id: [chart_id]
+# options: {}
+# lines: []
+#
+# If all defaults is disabled and no extra charts are defined, this module will disable itself, as it has no data to
+# collect.
+#
+# Configuration example
+# ---------------------
+# expample:
+# name: 'example'
+# url: 'http://localhost:8080/metrics'
+# defaults:
+# response_code: true
+# threads: true
+# gc_time: true
+# gc_ope: true
+# heap: false
+# extras:
+# - id: 'heap'
+# options: { title: 'Heap Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap', charttype: 'stacked' }
+# lines:
+# - { dimension: 'mem_free', name: 'free'}
+# - { dimension: 'mempool_eden_used', name: 'eden', algorithm: 'absolute', multiplier: 1, divisor: 1}
+# - { dimension: 'mempool_survivor_used', name: 'survivor', algorithm: 'absolute', multiplier: 1, divisor: 1}
+# - { dimension: 'mempool_tenured_used', name: 'tenured', algorithm: 'absolute', multiplier: 1, divisor: 1}
+# - id: 'heap_eden'
+# options: { title: 'Eden Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap_eden', charttype: 'area' }
+# lines:
+# - { dimension: 'mempool_eden_used', name: 'used'}
+# - { dimension: 'mempool_eden_committed', name: 'commited'}
+# - id: 'heap_survivor'
+# options: { title: 'Survivor Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap_survivor', charttype: 'area' }
+# lines:
+# - { dimension: 'mempool_survivor_used', name: 'used'}
+# - { dimension: 'mempool_survivor_committed', name: 'commited'}
+# - id: 'heap_tenured'
+# options: { title: 'Tenured Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap_tenured', charttype: 'area' }
+# lines:
+# - { dimension: 'mempool_tenured_used', name: 'used'}
+# - { dimension: 'mempool_tenured_committed', name: 'commited'}
+
+
+local:
+ name: 'local'
+ url: 'http://localhost:8080/metrics'
+
+local_ip:
+ name: 'local'
+ url: 'http://127.0.0.1:8080/metrics'
diff --git a/conf.d/python.d/traefik.conf b/conf.d/python.d/traefik.conf
new file mode 100644
index 000000000..909b9e549
--- /dev/null
+++ b/conf.d/python.d/traefik.conf
@@ -0,0 +1,79 @@
+# netdata python.d.plugin configuration for traefik health data API
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 1
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# retries sets the number of retries to be made in case of failures.
+# If unset, the default for python.d.plugin is used.
+# Attempts to restore the service are made once every update_every
+# and only if the module has collected values in the past.
+# retries: 60
+
+# autodetection_retry sets the job re-check interval in seconds.
+# The job is not deleted if check fails.
+# Attempts to start the job are made once every autodetection_retry.
+# This feature is disabled by default.
+# autodetection_retry: 0
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+#
+# The default JOBS share the same *name*. JOBS with the same name
+# are mutually exclusive. Only one of them will be allowed running at
+# any time. This allows autodetection to try several alternatives and
+# pick the one that works.
+#
+# Any number of jobs is supported.
+#
+# All python.d.plugin JOBS (for all its modules) support a set of
+# predefined parameters. These are:
+#
+# job_name:
+# name: myname # the JOB's name as it will appear at the
+# # dashboard (by default is the job_name)
+# # JOBs sharing a name are mutually exclusive
+# update_every: 1 # the JOB's data collection frequency
+# priority: 60000 # the JOB's order on the dashboard
+# retries: 10 # the JOB's number of restoration attempts
+# autodetection_retry: 0 # the JOB's re-check interval in seconds
+#
+# Additionally to the above, traefik plugin also supports the following:
+#
+# url: '<scheme>://<host>:<port>/<health_page_api>'
+# # http://localhost:8080/health
+#
+# if the URL is password protected, the following are supported:
+#
+# user: 'username'
+# pass: 'password'
+#
+# ----------------------------------------------------------------------
+# AUTO-DETECTION JOBS
+# only one of them will run (they have the same name)
+#
+local:
+ url: 'http://localhost:8080/health'
diff --git a/conf.d/python.d/web_log.conf b/conf.d/python.d/web_log.conf
index dd1fff075..c185f8d85 100644
--- a/conf.d/python.d/web_log.conf
+++ b/conf.d/python.d/web_log.conf
@@ -85,6 +85,7 @@
# custom_log_format: # define a custom log format
# pattern: '(?P<address>[\da-f.:]+) -.*?"(?P<method>[A-Z]+) (?P<url>.*?)" (?P<code>[1-9]\d{2}) (?P<bytes_sent>\d+) (?P<resp_length>\d+) (?P<resp_time>\d+\.\d+) '
# time_multiplier: 1000000 # type <int> - convert time to microseconds
+# histogram: [1,3,10,30,100, ...] # type list of int - Cumulative histogram of response time in milli seconds
# ----------------------------------------------------------------------
# WEB SERVER CONFIGURATION
diff --git a/conf.d/stream.conf b/conf.d/stream.conf
index 8945529ee..d0c9a8b18 100644
--- a/conf.d/stream.conf
+++ b/conf.d/stream.conf
@@ -112,6 +112,13 @@
# postpone alarms for a short period after the sender is connected
default postpone alarms on connect seconds = 60
+ # allow or deny multiple connections for the same host?
+ # If you are sure all your netdata have their own machine GUID,
+ # set this to 'allow', since it allows faster reconnects.
+ # When set to 'deny', new connections for a host will not be
+ # accepted until an existing connection is cleared.
+ multiple connections = allow
+
# need to route metrics differently? set these.
# the defaults are the ones at the [stream] section
#default proxy enabled = yes | no
@@ -159,6 +166,13 @@
# postpone alarms when the sender connects
postpone alarms on connect seconds = 60
+ # allow or deny multiple connections for the same host?
+ # If you are sure all your netdata have their own machine GUID,
+ # set this to 'allow', since it allows faster reconnects.
+ # When set to 'deny', new connections for a host will not be
+ # accepted until an existing connection is cleared.
+ multiple connections = allow
+
# need to route metrics differently?
#proxy enabled = yes | no
#proxy destination = IP:PORT IP:PORT ...