diff options
Diffstat (limited to 'conf.d')
54 files changed, 1429 insertions, 57 deletions
diff --git a/conf.d/Makefile.am b/conf.d/Makefile.am index 095e891a..d79bb5ab 100644 --- a/conf.d/Makefile.am +++ b/conf.d/Makefile.am @@ -29,6 +29,7 @@ dist_pythonconfig_DATA = \ python.d/apache.conf \ python.d/beanstalk.conf \ python.d/bind_rndc.conf \ + python.d/ceph.conf \ python.d/chrony.conf \ python.d/couchdb.conf \ python.d/cpufreq.conf \ @@ -43,6 +44,8 @@ dist_pythonconfig_DATA = \ python.d/go_expvar.conf \ python.d/haproxy.conf \ python.d/hddtemp.conf \ + python.d/httpcheck.conf \ + python.d/icecast.conf \ python.d/ipfs.conf \ python.d/isc_dhcpd.conf \ python.d/mdstat.conf \ @@ -50,9 +53,12 @@ dist_pythonconfig_DATA = \ python.d/mongodb.conf \ python.d/mysql.conf \ python.d/nginx.conf \ + python.d/nginx_plus.conf \ python.d/nsd.conf \ + python.d/ntpd.conf \ python.d/ovpn_status_log.conf \ python.d/phpfpm.conf \ + python.d/portcheck.conf \ python.d/postfix.conf \ python.d/postgres.conf \ python.d/powerdns.conf \ @@ -61,9 +67,11 @@ dist_pythonconfig_DATA = \ python.d/retroshare.conf \ python.d/samba.conf \ python.d/sensors.conf \ + python.d/springboot.conf \ python.d/squid.conf \ python.d/smartd_log.conf \ python.d/tomcat.conf \ + python.d/traefik.conf \ python.d/varnish.conf \ python.d/web_log.conf \ $(NULL) @@ -75,13 +83,17 @@ dist_healthconfig_DATA = \ health.d/backend.conf \ health.d/beanstalkd.conf \ health.d/bind_rndc.conf \ + health.d/btrfs.conf \ + health.d/ceph.conf \ health.d/cpu.conf \ health.d/couchdb.conf \ health.d/disks.conf \ health.d/elasticsearch.conf \ health.d/entropy.conf \ health.d/fping.conf \ + health.d/fronius.conf \ health.d/haproxy.conf \ + health.d/httpcheck.conf \ health.d/ipc.conf \ health.d/ipfs.conf \ health.d/ipmi.conf \ @@ -96,6 +108,8 @@ dist_healthconfig_DATA = \ health.d/net.conf \ health.d/netfilter.conf \ health.d/nginx.conf \ + health.d/nginx_plus.conf \ + health.d/portcheck.conf \ health.d/postgres.conf \ health.d/qos.conf \ health.d/ram.conf \ @@ -103,6 +117,7 @@ dist_healthconfig_DATA = \ health.d/retroshare.conf \ health.d/softnet.conf \ health.d/squid.conf \ + health.d/stiebeleltron.conf \ health.d/swap.conf \ health.d/tcp_conn.conf \ health.d/tcp_listen.conf \ @@ -121,6 +136,7 @@ dist_chartsconfig_DATA = \ charts.d/apcupsd.conf \ charts.d/cpufreq.conf \ charts.d/exim.conf \ + charts.d/libreswan.conf \ charts.d/load_average.conf \ charts.d/mysql.conf \ charts.d/nut.conf \ diff --git a/conf.d/Makefile.in b/conf.d/Makefile.in index c1c291bc..48ce5119 100644 --- a/conf.d/Makefile.in +++ b/conf.d/Makefile.in @@ -328,6 +328,7 @@ dist_pythonconfig_DATA = \ python.d/apache.conf \ python.d/beanstalk.conf \ python.d/bind_rndc.conf \ + python.d/ceph.conf \ python.d/chrony.conf \ python.d/couchdb.conf \ python.d/cpufreq.conf \ @@ -342,6 +343,8 @@ dist_pythonconfig_DATA = \ python.d/go_expvar.conf \ python.d/haproxy.conf \ python.d/hddtemp.conf \ + python.d/httpcheck.conf \ + python.d/icecast.conf \ python.d/ipfs.conf \ python.d/isc_dhcpd.conf \ python.d/mdstat.conf \ @@ -349,9 +352,12 @@ dist_pythonconfig_DATA = \ python.d/mongodb.conf \ python.d/mysql.conf \ python.d/nginx.conf \ + python.d/nginx_plus.conf \ python.d/nsd.conf \ + python.d/ntpd.conf \ python.d/ovpn_status_log.conf \ python.d/phpfpm.conf \ + python.d/portcheck.conf \ python.d/postfix.conf \ python.d/postgres.conf \ python.d/powerdns.conf \ @@ -360,9 +366,11 @@ dist_pythonconfig_DATA = \ python.d/retroshare.conf \ python.d/samba.conf \ python.d/sensors.conf \ + python.d/springboot.conf \ python.d/squid.conf \ python.d/smartd_log.conf \ python.d/tomcat.conf \ + python.d/traefik.conf \ python.d/varnish.conf \ python.d/web_log.conf \ $(NULL) @@ -373,13 +381,17 @@ dist_healthconfig_DATA = \ health.d/backend.conf \ health.d/beanstalkd.conf \ health.d/bind_rndc.conf \ + health.d/btrfs.conf \ + health.d/ceph.conf \ health.d/cpu.conf \ health.d/couchdb.conf \ health.d/disks.conf \ health.d/elasticsearch.conf \ health.d/entropy.conf \ health.d/fping.conf \ + health.d/fronius.conf \ health.d/haproxy.conf \ + health.d/httpcheck.conf \ health.d/ipc.conf \ health.d/ipfs.conf \ health.d/ipmi.conf \ @@ -394,6 +406,8 @@ dist_healthconfig_DATA = \ health.d/net.conf \ health.d/netfilter.conf \ health.d/nginx.conf \ + health.d/nginx_plus.conf \ + health.d/portcheck.conf \ health.d/postgres.conf \ health.d/qos.conf \ health.d/ram.conf \ @@ -401,6 +415,7 @@ dist_healthconfig_DATA = \ health.d/retroshare.conf \ health.d/softnet.conf \ health.d/squid.conf \ + health.d/stiebeleltron.conf \ health.d/swap.conf \ health.d/tcp_conn.conf \ health.d/tcp_listen.conf \ @@ -419,6 +434,7 @@ dist_chartsconfig_DATA = \ charts.d/apcupsd.conf \ charts.d/cpufreq.conf \ charts.d/exim.conf \ + charts.d/libreswan.conf \ charts.d/load_average.conf \ charts.d/mysql.conf \ charts.d/nut.conf \ diff --git a/conf.d/charts.d/ap.conf b/conf.d/charts.d/ap.conf index 88a447eb..38fc157c 100644 --- a/conf.d/charts.d/ap.conf +++ b/conf.d/charts.d/ap.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # nothing fancy to configure. @@ -17,3 +17,7 @@ # the charts priority on the dashboard #ap_priority=6900 + +# the number of retries to do in case of failure +# before disabling the module +#ap_retries=10 diff --git a/conf.d/charts.d/apache.conf b/conf.d/charts.d/apache.conf index b82c2a7f..50914cf3 100644 --- a/conf.d/charts.d/apache.conf +++ b/conf.d/charts.d/apache.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -24,3 +24,7 @@ # the charts priority on the dashboard #apache_priority=60000 + +# the number of retries to do in case of failure +# before disabling the module +#apache_retries=10 diff --git a/conf.d/charts.d/apcupsd.conf b/conf.d/charts.d/apcupsd.conf index f8bf7ed6..679c0d61 100644 --- a/conf.d/charts.d/apcupsd.conf +++ b/conf.d/charts.d/apcupsd.conf @@ -2,11 +2,13 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ -#apcupsd_ip=127.0.0.1 -#apcupsd_port=3551 +# add all your APC UPSes in this array - uncomment it too +#declare -A apcupsd_sources=( +# ["local"]="127.0.0.1:3551" +#) # how long to wait for apcupsd to respond #apcupsd_timeout=3 @@ -17,3 +19,7 @@ # the charts priority on the dashboard #apcupsd_priority=90000 + +# the number of retries to do in case of failure +# before disabling the module +#apcupsd_retries=10 diff --git a/conf.d/charts.d/cpu_apps.conf b/conf.d/charts.d/cpu_apps.conf index 46d70362..850cd0c6 100644 --- a/conf.d/charts.d/cpu_apps.conf +++ b/conf.d/charts.d/cpu_apps.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -13,3 +13,7 @@ # the data collection frequency # if unset, will inherit the netdata update frequency #cpu_apps_update_every=2 + +# the number of retries to do in case of failure +# before disabling the module +#cpu_apps_retries=10 diff --git a/conf.d/charts.d/cpufreq.conf b/conf.d/charts.d/cpufreq.conf index 4f26562e..7130555a 100644 --- a/conf.d/charts.d/cpufreq.conf +++ b/conf.d/charts.d/cpufreq.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -18,3 +18,7 @@ # the charts priority on the dashboard #cpufreq_priority=10000 + +# the number of retries to do in case of failure +# before disabling the module +#cpufreq_retries=10 diff --git a/conf.d/charts.d/example.conf b/conf.d/charts.d/example.conf index dc4b6900..6232ca58 100644 --- a/conf.d/charts.d/example.conf +++ b/conf.d/charts.d/example.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # to enable this chart, you have to set this to 12345 @@ -15,3 +15,7 @@ # the charts priority on the dashboard #example_priority=150000 + +# the number of retries to do in case of failure +# before disabling the module +#example_retries=10 diff --git a/conf.d/charts.d/exim.conf b/conf.d/charts.d/exim.conf index 4a1464bb..f96ac4db 100644 --- a/conf.d/charts.d/exim.conf +++ b/conf.d/charts.d/exim.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -18,3 +18,7 @@ # the charts priority on the dashboard #exim_priority=60000 + +# the number of retries to do in case of failure +# before disabling the module +#exim_retries=10 diff --git a/conf.d/charts.d/hddtemp.conf b/conf.d/charts.d/hddtemp.conf index 535cb017..b6037b40 100644 --- a/conf.d/charts.d/hddtemp.conf +++ b/conf.d/charts.d/hddtemp.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -18,3 +18,6 @@ # the charts priority on the dashboard #hddtemp_priority=90000 +# the number of retries to do in case of failure +# before disabling the module +#hddtemp_retries=10 diff --git a/conf.d/charts.d/libreswan.conf b/conf.d/charts.d/libreswan.conf new file mode 100644 index 00000000..9b3ee77b --- /dev/null +++ b/conf.d/charts.d/libreswan.conf @@ -0,0 +1,29 @@ +# no need for shebang - this file is loaded from charts.d.plugin + +# netdata +# real-time performance and health monitoring, done right! +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> +# GPL v3+ +# + +# the data collection frequency +# if unset, will inherit the netdata update frequency +#libreswan_update_every=1 + +# the charts priority on the dashboard +#libreswan_priority=90000 + +# the number of retries to do in case of failure +# before disabling the module +#libreswan_retries=10 + +# set to 1, to run ipsec with sudo (the default) +# set to 0, to run ipsec without sudo +#libreswan_sudo=1 + +# TO ALLOW NETDATA RUN ipsec AS ROOT +# CREATE THE FILE: /etc/sudoers.d/netdata +# WITH THESE 2 LINES (uncommented of course): +# +# netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --status +# netdata ALL = (root) NOPASSWD: /sbin/ipsec whack --trafficstatus diff --git a/conf.d/charts.d/load_average.conf b/conf.d/charts.d/load_average.conf index abbe80ca..68979275 100644 --- a/conf.d/charts.d/load_average.conf +++ b/conf.d/charts.d/load_average.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -15,4 +15,8 @@ #load_average_update_every=5 # the charts priority on the dashboard -#load_priority=100 +#load_average_priority=100 + +# the number of retries to do in case of failure +# before disabling the module +#load_average_retries=10 diff --git a/conf.d/charts.d/mem_apps.conf b/conf.d/charts.d/mem_apps.conf index aa4ac680..75d24dc3 100644 --- a/conf.d/charts.d/mem_apps.conf +++ b/conf.d/charts.d/mem_apps.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -13,3 +13,7 @@ # the data collection frequency # if unset, will inherit the netdata update frequency #mem_apps_update_every=2 + +# the number of retries to do in case of failure +# before disabling the module +#mem_apps_retries=10 diff --git a/conf.d/charts.d/mysql.conf b/conf.d/charts.d/mysql.conf index 6a0b55a4..683e4af3 100644 --- a/conf.d/charts.d/mysql.conf +++ b/conf.d/charts.d/mysql.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -17,3 +17,7 @@ # the charts priority on the dashboard #mysql_priority=60000 + +# the number of retries to do in case of failure +# before disabling the module +#mysql_retries=10 diff --git a/conf.d/charts.d/nginx.conf b/conf.d/charts.d/nginx.conf index 8b88b0e3..c46100a5 100644 --- a/conf.d/charts.d/nginx.conf +++ b/conf.d/charts.d/nginx.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -17,3 +17,7 @@ # the charts priority on the dashboard #nginx_priority=60000 + +# the number of retries to do in case of failure +# before disabling the module +#nginx_retries=10 diff --git a/conf.d/charts.d/nut.conf b/conf.d/charts.d/nut.conf index a836692d..d477ddd3 100644 --- a/conf.d/charts.d/nut.conf +++ b/conf.d/charts.d/nut.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016-2017 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # a space separated list of UPS names @@ -22,3 +22,7 @@ # the charts priority on the dashboard #nut_priority=90000 + +# the number of retries to do in case of failure +# before disabling the module +#nut_retries=10 diff --git a/conf.d/charts.d/opensips.conf b/conf.d/charts.d/opensips.conf index abc4c70e..e25111dc 100644 --- a/conf.d/charts.d/opensips.conf +++ b/conf.d/charts.d/opensips.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ #opensips_opts="fifo get_statistics all" @@ -15,3 +15,7 @@ # the charts priority on the dashboard #opensips_priority=80000 + +# the number of retries to do in case of failure +# before disabling the module +#opensips_retries=10 diff --git a/conf.d/charts.d/phpfpm.conf b/conf.d/charts.d/phpfpm.conf index 1e857638..e4dd0231 100644 --- a/conf.d/charts.d/phpfpm.conf +++ b/conf.d/charts.d/phpfpm.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -20,3 +20,8 @@ # the charts priority on the dashboard #phpfpm_priority=60000 + +# the number of retries to do in case of failure +# before disabling the module +#phpfpm_retries=10 + diff --git a/conf.d/charts.d/postfix.conf b/conf.d/charts.d/postfix.conf index 7d33d266..b77817bd 100644 --- a/conf.d/charts.d/postfix.conf +++ b/conf.d/charts.d/postfix.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -18,3 +18,8 @@ # the charts priority on the dashboard #postfix_priority=60000 + +# the number of retries to do in case of failure +# before disabling the module +#postfix_retries=10 + diff --git a/conf.d/charts.d/sensors.conf b/conf.d/charts.d/sensors.conf index d42d17d2..bcb28807 100644 --- a/conf.d/charts.d/sensors.conf +++ b/conf.d/charts.d/sensors.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -25,3 +25,8 @@ # the charts priority on the dashboard #sensors_priority=90000 + +# the number of retries to do in case of failure +# before disabling the module +#sensors_retries=10 + diff --git a/conf.d/charts.d/squid.conf b/conf.d/charts.d/squid.conf index cf92c124..19e928f2 100644 --- a/conf.d/charts.d/squid.conf +++ b/conf.d/charts.d/squid.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -19,3 +19,8 @@ # the charts priority on the dashboard #squid_priority=60000 + +# the number of retries to do in case of failure +# before disabling the module +#squid_retries=10 + diff --git a/conf.d/charts.d/tomcat.conf b/conf.d/charts.d/tomcat.conf index 71066942..e9f3eefa 100644 --- a/conf.d/charts.d/tomcat.conf +++ b/conf.d/charts.d/tomcat.conf @@ -2,7 +2,7 @@ # netdata # real-time performance and health monitoring, done right! -# (C) 2016 Costa Tsaousis <costa@tsaousis.gr> +# (C) 2018 Costa Tsaousis <costa@tsaousis.gr> # GPL v3+ # THIS PLUGIN IS DEPRECATED @@ -24,6 +24,10 @@ # the charts priority on the dashboard #tomcat_priority=60000 +# the number of retries to do in case of failure +# before disabling the module +#tomcat_retries=10 + # convert tomcat floating point values # to integer using this multiplier # this only affects precision - the values diff --git a/conf.d/health.d/backend.conf b/conf.d/health.d/backend.conf index 9c193e7b..7af100d8 100644 --- a/conf.d/health.d/backend.conf +++ b/conf.d/health.d/backend.conf @@ -27,7 +27,7 @@ units: metrics calc: abs($lost) every: 10s - crit: $this != 0 + crit: ($this != 0) || ($status == $CRITICAL && abs($sent) == 0) delay: down 5m multiplier 1.5 max 1h info: number of metrics lost due to repeating failures to contact the backend server to: dba diff --git a/conf.d/health.d/btrfs.conf b/conf.d/health.d/btrfs.conf new file mode 100644 index 00000000..b27aa544 --- /dev/null +++ b/conf.d/health.d/btrfs.conf @@ -0,0 +1,57 @@ + +template: btrfs_allocated + on: btrfs.disk + os: * + hosts: * +families: * + calc: 100 - ($unallocated * 100 / ($unallocated + $data_used + $data_free + $meta_used + $meta_free + $sys_used + $sys_free)) + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (90) : (95)) + crit: $this > (($status == $CRITICAL) ? (95) : (98)) + delay: up 1m down 15m multiplier 1.5 max 1h + info: the percentage of allocated BTRFS physical disk space + to: sysadmin + +template: btrfs_data + on: btrfs.data + os: * + hosts: * +families: * + calc: $used * 100 / ($used + $free) + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (90) : (95)) && $btrfs_allocated > 98 + crit: $this > (($status == $CRITICAL) ? (95) : (98)) && $btrfs_allocated > 98 + delay: up 1m down 15m multiplier 1.5 max 1h + info: the percentage of used BTRFS data space + to: sysadmin + +template: btrfs_metadata + on: btrfs.metadata + os: * + hosts: * +families: * + calc: ($used + $reserved) * 100 / ($used + $free + $reserved) + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (90) : (95)) && $btrfs_allocated > 98 + crit: $this > (($status == $CRITICAL) ? (95) : (98)) && $btrfs_allocated > 98 + delay: up 1m down 15m multiplier 1.5 max 1h + info: the percentage of used BTRFS metadata space + to: sysadmin + +template: btrfs_system + on: btrfs.system + os: * + hosts: * +families: * + calc: $used * 100 / ($used + $free) + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (90) : (95)) && $btrfs_allocated > 98 + crit: $this > (($status == $CRITICAL) ? (95) : (98)) && $btrfs_allocated > 98 + delay: up 1m down 15m multiplier 1.5 max 1h + info: the percentage of used BTRFS system space + to: sysadmin + diff --git a/conf.d/health.d/ceph.conf b/conf.d/health.d/ceph.conf new file mode 100644 index 00000000..de16f7b6 --- /dev/null +++ b/conf.d/health.d/ceph.conf @@ -0,0 +1,13 @@ +# low ceph disk available + +template: cluster_space_usage + on: ceph.general_usage + calc: $avail * 100 / ($avail + $used) + units: % + every: 10s + warn: $this < 10 + crit: $this < 1 + delay: down 5m multiplier 1.2 max 1h + info: ceph disk usage is almost full + to: sysadmin + diff --git a/conf.d/health.d/cpu.conf b/conf.d/health.d/cpu.conf index db628556..fa818985 100644 --- a/conf.d/health.d/cpu.conf +++ b/conf.d/health.d/cpu.conf @@ -39,3 +39,17 @@ template: 20min_steal_cpu delay: down 1h multiplier 1.5 max 2h info: average CPU steal time for the last 20 minutes to: sysadmin + +## FreeBSD +template: 10min_cpu_usage + on: system.cpu + os: freebsd + hosts: * + lookup: average -10m unaligned of user,system,interrupt + units: % + every: 1m + warn: $this > (($status >= $WARNING) ? (75) : (85)) + crit: $this > (($status == $CRITICAL) ? (85) : (95)) + delay: down 15m multiplier 1.5 max 1h + info: average cpu utilization for the last 10 minutes (excluding nice) + to: sysadmin diff --git a/conf.d/health.d/disks.conf b/conf.d/health.d/disks.conf index 63053491..26f85848 100644 --- a/conf.d/health.d/disks.conf +++ b/conf.d/health.d/disks.conf @@ -11,7 +11,7 @@ template: disk_space_usage on: disk.space - os: linux + os: linux freebsd hosts: * families: * calc: $used * 100 / ($avail + $used) @@ -25,7 +25,7 @@ families: * template: disk_inode_usage on: disk.inodes - os: linux + os: linux freebsd hosts: * families: * calc: $used * 100 / ($avail + $used) @@ -51,7 +51,7 @@ families: * template: disk_fill_rate on: disk.space - os: linux + os: linux freebsd hosts: * families: * lookup: min -10m at -50m unaligned of avail @@ -67,7 +67,7 @@ families: * template: out_of_disk_space_time on: disk.space - os: linux + os: linux freebsd hosts: * families: * calc: ($disk_fill_rate > 0) ? ($avail / $disk_fill_rate) : (inf) @@ -81,6 +81,47 @@ families: * # ----------------------------------------------------------------------------- +# disk inode fill rate + +# calculate the rate the disk inodes are allocated +# use as base, the available inodes change +# during the last hour + +# this is just a calculation - it has no alarm +# we will use it in the next template to find +# the hours remaining + +template: disk_inode_rate + on: disk.inodes + os: linux freebsd + hosts: * +families: * + lookup: min -10m at -50m unaligned of avail + calc: ($this - $avail) / (($now - $after) / 3600) + every: 1m + units: inodes/hour + info: average rate at which disk inodes are allocated (positive), or freed (negative), for the last hour + +# calculate the hours remaining +# if the disk inodes are allocated +# in this rate + +template: out_of_disk_inodes_time + on: disk.inodes + os: linux freebsd + hosts: * +families: * + calc: ($disk_inode_rate > 0) ? ($avail / $disk_inode_rate) : (inf) + units: hours + every: 10s + warn: $this > 0 and $this < (($status >= $WARNING) ? (48) : (8)) + crit: $this > 0 and $this < (($status == $CRITICAL) ? (24) : (2)) + delay: down 15m multiplier 1.2 max 1h + info: estimated time the disk will run out of inodes, if the system continues to allocate inodes with the rate of the last hour + to: sysadmin + + +# ----------------------------------------------------------------------------- # disk congestion # raise an alarm if the disk is congested @@ -89,7 +130,7 @@ families: * template: 10min_disk_utilization on: disk.util - os: linux + os: linux freebsd hosts: * families: * lookup: average -10m unaligned diff --git a/conf.d/health.d/fronius.conf b/conf.d/health.d/fronius.conf new file mode 100644 index 00000000..cdf6c8fc --- /dev/null +++ b/conf.d/health.d/fronius.conf @@ -0,0 +1,11 @@ +template: fronius_last_collected_secs +families: * + on: fronius.power + calc: $now - $last_collected_t + every: 10s + units: seconds ago + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sitemgr diff --git a/conf.d/health.d/httpcheck.conf b/conf.d/health.d/httpcheck.conf new file mode 100644 index 00000000..0ddf35ea --- /dev/null +++ b/conf.d/health.d/httpcheck.conf @@ -0,0 +1,99 @@ +template: httpcheck_last_collected_secs +families: * + on: httpcheck.status + calc: $now - $last_collected_t + every: 10s + units: seconds ago + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sysadmin + +# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges +template: web_service_up +families: * + on: httpcheck.status + lookup: average -1m unaligned percentage of success + calc: ($this < 75) ? (0) : ($this) + every: 5s + units: up/down + info: at least 75% verified responses during last 60 seconds, ideal for badges + to: silent + +template: web_service_bad_content +families: * + on: httpcheck.status + lookup: average -5m unaligned percentage of bad_content + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average of unexpected http response content during the last 5 minutes + options: no-clear-notification + to: webmaster + +template: web_service_bad_status +families: * + on: httpcheck.status + lookup: average -5m unaligned percentage of bad_status + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average of unexpected http status during the last 5 minutes + options: no-clear-notification + to: webmaster + +template: web_service_timeouts +families: * + on: httpcheck.status + lookup: average -5m unaligned percentage of timeout + every: 10s + units: % + info: average of timeouts during the last 5 minutes + +template: no_web_service_connections +families: * + on: httpcheck.status + lookup: average -5m unaligned percentage of no_connection + every: 10s + units: % + info: average of failed requests during the last 5 minutes + +# combined timeout & no connection alarm +template: web_service_unreachable +families: * + on: httpcheck.status + calc: ($no_web_service_connections >= $web_service_timeouts) ? ($no_web_service_connections) : ($web_service_timeouts) + units: % + every: 10s + warn: ($no_web_service_connections >= 10 OR $web_service_timeouts >= 10) AND ($no_web_service_connections < 40 OR $web_service_timeouts < 40) + crit: $no_web_service_connections >= 40 OR $web_service_timeouts >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average of failed requests either due to timeouts or no connection during the last 5 minutes + options: no-clear-notification + to: webmaster + +template: 1h_web_service_response_time +families: * + on: httpcheck.responsetime + lookup: average -1h unaligned of time + every: 30s + units: ms + info: average response time over the last hour + +template: web_service_slow +families: * + on: httpcheck.responsetime + lookup: average -3m unaligned of time + units: ms + every: 10s + warn: ($this > ($1h_web_service_response_time * 2) ) + crit: ($this > ($1h_web_service_response_time * 3) ) + info: average response time over the last 3 minutes, compared to the average over the last hour + delay: down 5m multiplier 1.5 max 1h + options: no-clear-notification + to: webmaster diff --git a/conf.d/health.d/isc_dhcpd.conf b/conf.d/health.d/isc_dhcpd.conf index 4345619a..8054656f 100644 --- a/conf.d/health.d/isc_dhcpd.conf +++ b/conf.d/health.d/isc_dhcpd.conf @@ -1,10 +1,10 @@ - alarm: isc_dhcpd_parse_time - on: isc_dhcpd.parse_time - units: ms + template: isc_dhcpd_leases_size + on: isc_dhcpd.leases_total + units: KB every: 60 - calc: $ptime - warn: $this > 100 - crit: $this > 250 + calc: $leases_size + warn: $this > 3072 + crit: $this > 6144 delay: up 2m down 5m - info: Parsing too slow! It can slow down your server. Check dhcpd.leases file size. + info: dhcpd.leases file too big! Module can slow down your server. to: sysadmin diff --git a/conf.d/health.d/net.conf b/conf.d/health.d/net.conf index 00a19861..22a88927 100644 --- a/conf.d/health.d/net.conf +++ b/conf.d/health.d/net.conf @@ -98,7 +98,7 @@ families: * template: 1m_received_packets_rate on: net.packets - os: linux + os: linux freebsd hosts: * families: * lookup: average -1m of received @@ -108,7 +108,7 @@ families: * template: 10s_received_packets_storm on: net.packets - os: linux + os: linux freebsd hosts: * families: * lookup: average -10s of received @@ -120,4 +120,3 @@ families: * options: no-clear-notification info: the % of the rate of received packets in the last 10 seconds, compared to the rate of the last minute (clear notification for this alarm will not be sent) to: sysadmin - diff --git a/conf.d/health.d/nginx_plus.conf b/conf.d/health.d/nginx_plus.conf new file mode 100644 index 00000000..5a171a76 --- /dev/null +++ b/conf.d/health.d/nginx_plus.conf @@ -0,0 +1,14 @@ + +# make sure nginx_plus is running + +template: nginx_plus_last_collected_secs + on: nginx_plus.requests_total + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: webmaster + diff --git a/conf.d/health.d/portcheck.conf b/conf.d/health.d/portcheck.conf new file mode 100644 index 00000000..f42b63d3 --- /dev/null +++ b/conf.d/health.d/portcheck.conf @@ -0,0 +1,48 @@ +template: portcheck_last_collected_secs +families: * + on: portcheck.status + calc: $now - $last_collected_t + every: 10s + units: seconds ago + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sysadmin + +# This is a fast-reacting no-notification alarm ideal for custom dashboards or badges +template: service_reachable +families: * + on: portcheck.status + lookup: average -1m unaligned percentage of success + calc: ($this < 75) ? (0) : ($this) + every: 5s + units: up/down + info: at least 75% successful connections during last 60 seconds, ideal for badges + to: silent + +template: connection_timeouts +families: * + on: portcheck.status + lookup: average -5m unaligned percentage of timeout + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average of timeouts during the last 5 minutes + options: no-clear-notification + to: sysadmin + +template: connection_fails +families: * + on: portcheck.status + lookup: average -5m unaligned percentage of no_connection + every: 10s + units: % + warn: $this >= 10 AND $this < 40 + crit: $this >= 40 + delay: down 5m multiplier 1.5 max 1h + info: average of failed connections during the last 5 minutes + options: no-clear-notification + to: sysadmin diff --git a/conf.d/health.d/ram.conf b/conf.d/health.d/ram.conf index 8d0e8838..b6dc5f94 100644 --- a/conf.d/health.d/ram.conf +++ b/conf.d/health.d/ram.conf @@ -20,5 +20,45 @@ warn: $this > (($status >= $WARNING) ? (80) : (90)) crit: $this > (($status == $CRITICAL) ? (90) : (98)) delay: down 15m multiplier 1.5 max 1h - info: system RAM usage + info: system RAM used to: sysadmin + + alarm: ram_available + on: mem.available + os: linux + hosts: * + calc: ($avail + $used_ram_to_ignore) * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers) + units: % + every: 10s + warn: $this < (($status >= $WARNING) ? ( 5) : (10)) + crit: $this < (($status == $CRITICAL) ? (10) : ( 5)) + delay: down 15m multiplier 1.5 max 1h + info: estimated amount of RAM available for userspace processes, without causing swapping + to: sysadmin + +## FreeBSD +alarm: ram_in_use + on: system.ram + os: freebsd +hosts: * + calc: (($active + $wired) - $used_ram_to_ignore) * 100 / (($active + $wired) - $used_ram_to_ignore + $cached + $free) +units: % +every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) +delay: down 15m multiplier 1.5 max 1h + info: system RAM usage + to: sysadmin + + alarm: ram_available + on: system.ram + os: freebsd + hosts: * + calc: ($free + $inactive + $used_ram_to_ignore) * 100 / ($free + $active + $inactive + $wired + $cache + $buffers) + units: % + every: 10s + warn: $this < (($status >= $WARNING) ? ( 5) : (10)) + crit: $this < (($status == $CRITICAL) ? (10) : ( 5)) + delay: down 15m multiplier 1.5 max 1h + info: estimated amount of RAM available for userspace processes, without causing swapping + to: sysadmin diff --git a/conf.d/health.d/softnet.conf b/conf.d/health.d/softnet.conf index 64e1c678..77c804bf 100644 --- a/conf.d/health.d/softnet.conf +++ b/conf.d/health.d/softnet.conf @@ -24,5 +24,17 @@ every: 1m warn: $this > (($status >= $WARNING) ? (0) : (10)) delay: down 1h multiplier 1.5 max 2h - info: number of times, during the last 10min, ksoftirq ran out of sysctl net.core.netdev_budget or time slice, with work remaining (this can be a cause for dropped packets) + info: number of times, during the last 10min, ksoftirq ran out of sysctl net.core.netdev_budget or net.core.netdev_budget_usecs, with work remaining (this can be a cause for dropped packets) to: silent + + alarm: 10min_netisr_backlog_exceeded + on: system.softnet_stat + os: freebsd + hosts: * + lookup: sum -10m unaligned absolute of qdrops + units: packets + every: 1m + warn: $this > 0 + delay: down 1h multiplier 1.5 max 2h + info: number of drops in the last 10min, because sysctl net.route.netisr_maxqlen was exceeded (this can be a cause for dropped packets) + to: sysadmin diff --git a/conf.d/health.d/stiebeleltron.conf b/conf.d/health.d/stiebeleltron.conf new file mode 100644 index 00000000..e0361eb2 --- /dev/null +++ b/conf.d/health.d/stiebeleltron.conf @@ -0,0 +1,11 @@ +template: stiebeleltron_last_collected_secs +families: * + on: stiebeleltron.heating.hc1 + calc: $now - $last_collected_t + every: 10s + units: seconds ago + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful data collection + to: sitemgr diff --git a/conf.d/health.d/swap.conf b/conf.d/health.d/swap.conf index 830a9af9..f920b080 100644 --- a/conf.d/health.d/swap.conf +++ b/conf.d/health.d/swap.conf @@ -3,7 +3,7 @@ alarm: 30min_ram_swapped_out on: system.swapio - os: linux + os: linux freebsd hosts: * lookup: sum -30m unaligned absolute of out # we have to convert KB to MB by dividing $this (i.e. the result of the lookup) with 1024 @@ -25,19 +25,19 @@ every: 10s warn: $this > (($status >= $WARNING) ? (15) : (20)) crit: $this > (($status == $CRITICAL) ? (40) : (50)) - delay: up 0 down 15m multiplier 1.5 max 1h + delay: up 30s down 15m multiplier 1.5 max 1h info: the swap memory used, as a percentage of the system RAM to: sysadmin alarm: used_swap on: system.swap - os: linux + os: linux freebsd hosts: * calc: $used * 100 / ( $used + $free ) units: % every: 10s warn: $this > (($status >= $WARNING) ? (80) : (90)) crit: $this > (($status == $CRITICAL) ? (90) : (98)) - delay: up 0 down 15m multiplier 1.5 max 1h + delay: up 30s down 15m multiplier 1.5 max 1h info: the percentage of swap memory used to: sysadmin diff --git a/conf.d/health.d/tcp_resets.conf b/conf.d/health.d/tcp_resets.conf index e6cfd39a..91dad3c6 100644 --- a/conf.d/health.d/tcp_resets.conf +++ b/conf.d/health.d/tcp_resets.conf @@ -5,7 +5,7 @@ alarm: ipv4_tcphandshake_last_collected_secs on: ipv4.tcphandshake - os: linux + os: linux freebsd hosts: * calc: $now - $last_collected_t units: seconds ago @@ -46,7 +46,7 @@ alarm: 1m_ipv4_tcp_resets_received on: ipv4.tcphandshake - os: linux + os: linux freebsd hosts: * lookup: average -1m at -10s unaligned absolute of AttemptFails units: tcp resets/s @@ -55,7 +55,7 @@ alarm: 10s_ipv4_tcp_resets_received on: ipv4.tcphandshake - os: linux + os: linux freebsd hosts: * lookup: average -10s unaligned absolute of AttemptFails units: tcp resets/s diff --git a/conf.d/health.d/udp_errors.conf b/conf.d/health.d/udp_errors.conf index 33338b83..382b3965 100644 --- a/conf.d/health.d/udp_errors.conf +++ b/conf.d/health.d/udp_errors.conf @@ -5,7 +5,7 @@ alarm: ipv4_udperrors_last_collected_secs on: ipv4.udperrors - os: linux + os: linux freebsd hosts: * calc: $now - $last_collected_t units: seconds ago @@ -21,7 +21,7 @@ alarm: 1m_ipv4_udp_receive_buffer_errors on: ipv4.udperrors - os: linux + os: linux freebsd hosts: * lookup: sum -1m unaligned absolute of RcvbufErrors units: errors diff --git a/conf.d/health.d/web_log.conf b/conf.d/health.d/web_log.conf index d1808817..d8be88b4 100644 --- a/conf.d/health.d/web_log.conf +++ b/conf.d/health.d/web_log.conf @@ -116,6 +116,7 @@ families: * crit: ($1m_requests > 120) ? ($this > $red && $this > ($10m_response_time * 4) ) : ( 0 ) delay: down 15m multiplier 1.5 max 1h info: the average time to respond to HTTP requests, over the last 1 minute + options: no-clear-notification to: webmaster # ----------------------------------------------------------------------------- diff --git a/conf.d/health_alarm_notify.conf b/conf.d/health_alarm_notify.conf index eb01e2bb..0a95931e 100644..100755 --- a/conf.d/health_alarm_notify.conf +++ b/conf.d/health_alarm_notify.conf @@ -7,12 +7,14 @@ # - e-mails (using the sendmail command), # - push notifications to your mobile phone (pushover.net), # - messages to your slack team (slack.com), +# - messages to your alerta server (alerta.io), # - messages to your flock team (flock.com), # - messages to your discord guild (discordapp.com), # - messages to your telegram chat / group chat (telegram.org) # - sms messages to your cell phone or any sms enabled device (twilio.com) # - sms messages to your cell phone or any sms enabled device (messagebird.com) # - notifications to users on pagerduty.com +# - messages to your irc channel on your selected network # # The 'to' line given at netdata alarms defines a *role*, so that many # people can be notified for each role. @@ -23,7 +25,7 @@ #------------------------------------------------------------------------------ # proxy configuration # -# If you need to send curl based notifications (pushover, pushbullet, slack, +# If you need to send curl based notifications (pushover, pushbullet, slack, alerta, # flock, discord, telegram) via a proxy, set these to your proxy address: #export http_proxy="http://10.0.0.1:3128/" #export https_proxy="http://10.0.0.1:3128/" @@ -54,6 +56,23 @@ sendmail="" # If not found, most notifications will be silently disabled. curl="" +# The full path of the nc command. +# If empty, the system $PATH will be searched for it. +# If not found, irc notifications will be silently disabled. +nc="" + +#------------------------------------------------------------------------------ +# extra options for external commands +# +# In some cases, you may need to change what options get passed to an +# external command. Such cases are covered here. + +# Extra options to pass to curl. In most cases, you shouldn't need to add anything +# to this. If you're having issues with HTTPS connections, you might try adding +# '--insecure' here, but be warned that it will make it much easier for +# third-parties to block notification delivery, and may allow disclosure +# of potentially sensitive information. +#curl_options="--insecure" #------------------------------------------------------------------------------ # NOTE ABOUT RECIPIENTS @@ -64,11 +83,13 @@ curl="" # - pushover user tokens # - telegram chat ids # - slack channels +# - alerta environment # - flock rooms # - discord channels # - hipchat rooms # - sms phone numbers # - pagerduty.com (pd) services +# - irc channels # # You can append |critical to limit the notifications to be sent. # @@ -79,15 +100,17 @@ curl="" # pushover : "2987343...9437837 8756278...2362736|critical" # telegram : "111827421 112746832|critical" # slack : "alarms disasters|critical" +# alerta : "alarms disasters|critical" # flock : "alarms disasters|critical" # discord : "alarms disasters|critical" # twilio : "+15555555555 +17777777777|critical" # messagebird: "+15555555555 +17777777777|critical" # kavenegar : "09155555555 09177777777|critical" # pd : "<pd_service_key_1> <pd_service_key_2>|critical" +# irc : "<irc_channel_1> <irc_channel_2>|critical" # # If a recipient is set to empty string, the default recipient of the given -# notification method (email, pushover, telegram, slack, etc) will be used. +# notification method (email, pushover, telegram, slack, alerta, etc) will be used. # To disable a notification, use the recipient called: disabled # This works for all notification methods (including the default recipients). @@ -276,6 +299,32 @@ DEFAULT_RECIPIENT_SLACK="" #------------------------------------------------------------------------------ +# alerta (alerta.io) global notification options + +# multiple recipients (Environments) can be given like this: +# "Production Development ..." + +# enable/disable sending alerta notifications +SEND_ALERTA="YES" + +# here set your alerta server API url +# this is the API url you defined when installed Alerta server, +# it is the same for all users. Do not include last slash. +# ALERTA_WEBHOOK_URL="https://<server>/alerta/api" +ALERTA_WEBHOOK_URL="" + +# Login with an administrative user to you Alerta server and create an API KEY +# with write permissions. +ALERTA_API_KEY="" + +# you can define environments in /etc/alertad.conf option ALLOWED_ENVIRONMENTS +# standard environments are Production and Development +# if a role's recipients are not configured, a notification will be send to +# this Environment (empty = do not send a notification for unconfigured roles): +DEFAULT_RECIPIENT_ALERTA="" + + +#------------------------------------------------------------------------------ # flock (flock.com) global notification options # enable/disable sending flock notifications @@ -364,6 +413,34 @@ DEFAULT_RECIPIENT_PD="" #------------------------------------------------------------------------------ +# irc notification options +# +# irc notifications require only the nc utility to be installed. + +# multiple recipients can be given like this: +# "<irc_channel_1> <irc_channel_2> ..." + +# enable/disable sending irc notifications +SEND_IRC="YES" + +# if a role's recipients are not configured, a notification will not be sent. +# (empty = do not send a notification for unconfigured roles): +DEFAULT_RECIPIENT_IRC="" + +# The irc network to which the recipients belong. It must be the full network. +# e.g. "irc.freenode.net" +IRC_NETWORK="" + +# The irc nickname which is required to send the notification. It must not be +# an already registered name as the connection's MODE is defined as a 'guest'. +IRC_NICKNAME="" + +# The irc realname which is required in order to make the connection and is an +# extra identifier. +IRC_REALNAME="" + + +#------------------------------------------------------------------------------ # custom notifications # @@ -442,6 +519,8 @@ role_recipients_telegram[sysadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[sysadmin]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_alerta[sysadmin]="${DEFAULT_RECIPIENT_ALERTA}" + role_recipients_flock[sysadmin]="${DEFAULT_RECIPIENT_FLOCK}" role_recipients_discord[sysadmin]="${DEFAULT_RECIPIENT_DISCORD}" @@ -456,6 +535,8 @@ role_recipients_kavenegar[sysadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}" role_recipients_pd[sysadmin]="${DEFAULT_RECIPIENT_PD}" +role_recipients_irc[sysadmin]="${DEFAULT_RECIPIENT_IRC}" + role_recipients_custom[sysadmin]="${DEFAULT_RECIPIENT_CUSTOM}" # ----------------------------------------------------------------------------- @@ -471,6 +552,8 @@ role_recipients_telegram[domainadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[domainadmin]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_alerta[domainadmin]="${DEFAULT_RECIPIENT_ALERTA}" + role_recipients_flock[domainadmin]="${DEFAULT_RECIPIENT_FLOCK}" role_recipients_discord[domainadmin]="${DEFAULT_RECIPIENT_DISCORD}" @@ -485,6 +568,8 @@ role_recipients_kavenegar[domainadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}" role_recipients_pd[domainadmin]="${DEFAULT_RECIPIENT_PD}" +role_recipients_irc[domainadmin]="${DEFAULT_RECIPIENT_IRC}" + role_recipients_custom[domainadmin]="${DEFAULT_RECIPIENT_CUSTOM}" # ----------------------------------------------------------------------------- @@ -501,6 +586,8 @@ role_recipients_telegram[dba]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[dba]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_alerta[dba]="${DEFAULT_RECIPIENT_ALERTA}" + role_recipients_flock[dba]="${DEFAULT_RECIPIENT_FLOCK}" role_recipients_discord[dba]="${DEFAULT_RECIPIENT_DISCORD}" @@ -515,6 +602,8 @@ role_recipients_kavenegar[dba]="${DEFAULT_RECIPIENT_KAVENEGAR}" role_recipients_pd[dba]="${DEFAULT_RECIPIENT_PD}" +role_recipients_irc[dba]="${DEFAULT_RECIPIENT_IRC}" + role_recipients_custom[dba]="${DEFAULT_RECIPIENT_CUSTOM}" # ----------------------------------------------------------------------------- @@ -531,6 +620,8 @@ role_recipients_telegram[webmaster]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[webmaster]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_alerta[webmaster]="${DEFAULT_RECIPIENT_ALERTA}" + role_recipients_flock[webmaster]="${DEFAULT_RECIPIENT_FLOCK}" role_recipients_discord[webmaster]="${DEFAULT_RECIPIENT_DISCORD}" @@ -545,6 +636,8 @@ role_recipients_kavenegar[webmaster]="${DEFAULT_RECIPIENT_KAVENEGAR}" role_recipients_pd[webmaster]="${DEFAULT_RECIPIENT_PD}" +role_recipients_irc[webmaster]="${DEFAULT_RECIPIENT_IRC}" + role_recipients_custom[webmaster]="${DEFAULT_RECIPIENT_CUSTOM}" # ----------------------------------------------------------------------------- @@ -561,6 +654,8 @@ role_recipients_telegram[proxyadmin]="${DEFAULT_RECIPIENT_TELEGRAM}" role_recipients_slack[proxyadmin]="${DEFAULT_RECIPIENT_SLACK}" +role_recipients_alerta[proxyadmin]="${DEFAULT_RECIPIENT_ALERTA}" + role_recipients_flock[proxyadmin]="${DEFAULT_RECIPIENT_FLOCK}" role_recipients_discord[proxyadmin]="${DEFAULT_RECIPIENT_DISCORD}" @@ -575,4 +670,39 @@ role_recipients_kavenegar[proxyadmin]="${DEFAULT_RECIPIENT_KAVENEGAR}" role_recipients_pd[proxyadmin]="${DEFAULT_RECIPIENT_PD}" +role_recipients_irc[proxyadmin]="${DEFAULT_RECIPIENT_IRC}" + role_recipients_custom[proxyadmin]="${DEFAULT_RECIPIENT_CUSTOM}" + +# ----------------------------------------------------------------------------- +# peripheral devices +# UPS, photovoltaics, etc + +role_recipients_email[sitemgr]="${DEFAULT_RECIPIENT_EMAIL}" + +role_recipients_pushover[sitemgr]="${DEFAULT_RECIPIENT_PUSHOVER}" + +role_recipients_pushbullet[sitemgr]="${DEFAULT_RECIPIENT_PUSHBULLET}" + +role_recipients_telegram[sitemgr]="${DEFAULT_RECIPIENT_TELEGRAM}" + +role_recipients_slack[sitemgr]="${DEFAULT_RECIPIENT_SLACK}" + +role_recipients_alerta[sitemgr]="${DEFAULT_RECIPIENT_ALERTA}" + +role_recipients_flock[sitemgr]="${DEFAULT_RECIPIENT_FLOCK}" + +role_recipients_discord[sitemgr]="${DEFAULT_RECIPIENT_DISCORD}" + +role_recipients_hipchat[sitemgr]="${DEFAULT_RECIPIENT_HIPCHAT}" + +role_recipients_twilio[sitemgr]="${DEFAULT_RECIPIENT_TWILIO}" + +role_recipients_messagebird[sitemgr]="${DEFAULT_RECIPIENT_MESSAGEBIRD}" + +role_recipients_kavenegar[sitemgr]="${DEFAULT_RECIPIENT_KAVENEGAR}" + +role_recipients_pd[sitemgr]="${DEFAULT_RECIPIENT_PD}" + +role_recipients_custom[sitemgr]="${DEFAULT_RECIPIENT_CUSTOM}" + diff --git a/conf.d/python.d.conf b/conf.d/python.d.conf index 2c3d400c..bb57738b 100644 --- a/conf.d/python.d.conf +++ b/conf.d/python.d.conf @@ -15,7 +15,7 @@ enabled: yes # # If "default_run" = "yes" the default for all modules is enabled (yes). # Setting any of these to "no" will disable it. -# +# # If "default_run" = "no" the default for all modules is disabled (no). # Setting any of these to "yes" will enable it. @@ -24,6 +24,7 @@ apache_cache: no # apache: yes # beanstalk: yes # bind_rndc: yes +# ceph: yes chrony: no # couchdb: yes # cpufreq: yes @@ -45,6 +46,7 @@ gunicorn_log: no go_expvar: no # haproxy: yes # hddtemp: yes +# icecast: yes # ipfs: yes # isc_dhcpd: yes # mdstat: yes @@ -52,11 +54,13 @@ go_expvar: no # mongodb: yes # mysql: yes # nginx: yes +# nginx_plus: yes # nsd: yes +# ntpd: yes # nginx_log has been replaced by web_log nginx_log: no - +# ntpd: yes # ovpn_status_log: yes # phpfpm: yes # postfix: yes @@ -69,6 +73,7 @@ nginx_log: no # samba: yes # smartd_log: yes # squid: yes +# springboot: yes # tomcat: yes # varnish: yes # web_log: yes diff --git a/conf.d/python.d/ceph.conf b/conf.d/python.d/ceph.conf new file mode 100644 index 00000000..78ac1e25 --- /dev/null +++ b/conf.d/python.d/ceph.conf @@ -0,0 +1,75 @@ +# netdata python.d.plugin configuration for ceph stats +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 10 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 60 + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 10 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# retries: 60 # the JOB's number of restoration attempts +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, ceph plugin also supports the following: +# +# config_file: 'config_file' # Ceph config file. +# keyring_file: 'keyring_file' # Ceph keyring file. netdata user must be added into ceph group +# # and keyring file must be read group permission. +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# +config_file: '/etc/ceph/ceph.conf' +keyring_file: '/etc/ceph/ceph.client.admin.keyring' + diff --git a/conf.d/python.d/httpcheck.conf b/conf.d/python.d/httpcheck.conf new file mode 100644 index 00000000..058e057a --- /dev/null +++ b/conf.d/python.d/httpcheck.conf @@ -0,0 +1,99 @@ +# netdata python.d.plugin configuration for httpcheck +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the httpcheck default is used, which is at 3 seconds. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# chart_cleanup sets the default chart cleanup interval in iterations. +# A chart is marked as obsolete if it has not been updated +# 'chart_cleanup' iterations in a row. +# They will be hidden immediately (not offered to dashboard viewer, +# streamed upstream and archived to backends) and deleted one hour +# later (configurable from netdata.conf). +# -- For this plugin, cleanup MUST be disabled, otherwise we lose response +# time charts +chart_cleanup: 0 + +# Autodetection and retries do not work for this plugin + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# ------------------------------- +# ATTENTION: Any valid configuration will be accepted, even if initial connection fails! +# ------------------------------- +# +# There is intentionally no default config, e.g. for 'localhost' + +# job_name: +# name: myname # [optional] the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 3 # [optional] the JOB's data collection frequency +# priority: 60000 # [optional] the JOB's order on the dashboard +# retries: 60 # [optional] the JOB's number of restoration attempts +# timeout: 1 # [optional] the timeout when connecting, supports decimals (e.g. 0.5s) +# url: 'http[s]://host-ip-or-dns[:port][path]' +# # [required] the remote host url to connect to. If [:port] is missing, it defaults to 80 +# # for HTTP and 443 for HTTPS. [path] is optional too, defaults to / +# redirect: yes # [optional] If the remote host returns 3xx status codes, the redirection url will be +# # followed (default). +# status_accepted: # [optional] By default, 200 is accepted. Anything else will result in 'bad status' in the +# # status chart, however: The response time will still be > 0, since the +# # host responded with something. +# # If redirect is enabled, the accepted status will be checked against the redirected page. +# - 200 # Multiple status codes are possible. If you specify 'status_accepted', you would still +# # need to add '200'. E.g. 'status_accepted: [301]' will trigger an error in 'bad status' +# # if code is 200. Do specify numerical entries such as 200, not 'OK'. +# regex: None # [optional] If the status code is accepted, the content of the response will be searched for this +# # regex (if defined). Be aware that you may need to escape the regex string. If redirect is enabled, +# # the regex will be matched to the redirected page, not the initial 3xx response. + +# Simple example: +# +# jira: +# url: 'https://jira.localdomain/' + + +# Complex example: +# +# cool_website: +# url: 'http://cool.website:8080/home' +# status_accepted: +# - 200 +# - 204 +# regex: <title>My cool website!<\/title> +# timeout: 2 + +# This plugin is intended for simple cases. Currently, the accuracy of the response time is low and should be used as reference only. + diff --git a/conf.d/python.d/icecast.conf b/conf.d/python.d/icecast.conf new file mode 100644 index 00000000..a900d06d --- /dev/null +++ b/conf.d/python.d/icecast.conf @@ -0,0 +1,83 @@ +# netdata python.d.plugin configuration for icecast +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 60 + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# retries: 60 # the JOB's number of restoration attempts +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, icecast also supports the following: +# +# url: 'URL' # the URL to fetch icecast's stats +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name : 'local' + url : 'http://localhost:8443/status-json.xsl' + +localipv4: + name : 'local' + url : 'http://127.0.0.1:8443/status-json.xsl'
\ No newline at end of file diff --git a/conf.d/python.d/mysql.conf b/conf.d/python.d/mysql.conf index def9f7e9..b5956a2c 100644 --- a/conf.d/python.d/mysql.conf +++ b/conf.d/python.d/mysql.conf @@ -85,12 +85,19 @@ # to connect to the mysql server on localhost, without a password: # # > create user 'netdata'@'localhost'; -# > grant usage on *.* to 'netdata'@'localhost' with grant option; +# > grant usage on *.* to 'netdata'@'localhost'; # > flush privileges; # # with the above statements, netdata will be able to gather mysql # statistics, without the ability to see or alter any data or affect # mysql operation in any way. No change is required below. +# +# If you need to monitor mysql replication too, use this instead: +# +# > create user 'netdata'@'localhost'; +# > grant replication client on *.* to 'netdata'@'localhost'; +# > flush privileges; +# # ---------------------------------------------------------------------- # AUTO-DETECTION JOBS diff --git a/conf.d/python.d/nginx_plus.conf b/conf.d/python.d/nginx_plus.conf new file mode 100644 index 00000000..7b5c8f43 --- /dev/null +++ b/conf.d/python.d/nginx_plus.conf @@ -0,0 +1,87 @@ +# netdata python.d.plugin configuration for nginx_plus +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 60 + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# retries: 60 # the JOB's number of restoration attempts +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, nginx_plus also supports the following: +# +# url: 'URL' # the URL to fetch nginx_plus's stats +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' + +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name : 'local' + url : 'http://localhost/status' + +localipv4: + name : 'local' + url : 'http://127.0.0.1/status' + +localipv6: + name : 'local' + url : 'http://[::1]/status' diff --git a/conf.d/python.d/ntpd.conf b/conf.d/python.d/ntpd.conf new file mode 100644 index 00000000..7adc4074 --- /dev/null +++ b/conf.d/python.d/ntpd.conf @@ -0,0 +1,91 @@ +# netdata python.d.plugin configuration for ntpd +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 60 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# retries: 60 # the JOB's number of restoration attempts +# +# Additionally to the above, ntp also supports the following: +# +# host: 'localhost' # the host to query +# port: '123' # the UDP port where `ntpd` listens +# show_peers: no # use `yes` to show peer charts. enabling this +# # option is recommended only for debugging, as +# # it could possibly imply memory leaks if the +# # peers change frequently. +# peer_filter: '127\..*' # regex to exclude peers +# # by default local peers are hidden +# # use `''` to show all peers. +# peer_rescan: 60 # interval (>0) to check for new/changed peers +# # use `1` to check on every update +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) + +localhost: + name: 'local' + host: 'localhost' + port: '123' + show_peers: no + +localhost_ipv4: + name: 'local' + host: '127.0.0.1' + port: '123' + show_peers: no + +localhost_ipv6: + name: 'local' + host: '::1' + port: '123' + show_peers: no diff --git a/conf.d/python.d/portcheck.conf b/conf.d/python.d/portcheck.conf new file mode 100644 index 00000000..b3dd8bd3 --- /dev/null +++ b/conf.d/python.d/portcheck.conf @@ -0,0 +1,70 @@ +# netdata python.d.plugin configuration for portcheck +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# chart_cleanup sets the default chart cleanup interval in iterations. +# A chart is marked as obsolete if it has not been updated +# 'chart_cleanup' iterations in a row. +# They will be hidden immediately (not offered to dashboard viewer, +# streamed upstream and archived to backends) and deleted one hour +# later (configurable from netdata.conf). +# -- For this plugin, cleanup MUST be disabled, otherwise we lose latency chart +chart_cleanup: 0 + +# Autodetection and retries do not work for this plugin + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# ------------------------------- +# ATTENTION: Any valid configuration will be accepted, even if initial connection fails! +# ------------------------------- +# +# There is intentionally no default config for 'localhost' + +# job_name: +# name: myname # [optional] the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # [optional] the JOB's data collection frequency +# priority: 60000 # [optional] the JOB's order on the dashboard +# retries: 60 # [optional] the JOB's number of restoration attempts +# timeout: 1 # [optional] the socket timeout when connecting +# host: 'dns or ip' # [required] the remote host address in either IPv4, IPv6 or as DNS name. +# port: 22 # [required] the port number to check. Specify an integer, not service name. + +# You just have been warned about possible portscan blocking. The portcheck plugin is meant for simple use cases. +# Currently, the accuracy of the latency is low and should be used as reference only. + diff --git a/conf.d/python.d/postgres.conf b/conf.d/python.d/postgres.conf index 3a70a718..b69ca371 100644 --- a/conf.d/python.d/postgres.conf +++ b/conf.d/python.d/postgres.conf @@ -82,9 +82,18 @@ # a postgres user for netdata and add its password below to allow # netdata connect. # -# Without superuser access, netdata won't be able to generate the write -# ahead log and the background writer charts. -# +# Postgres supported versions are : +# - 9.3 (without autovacuum) +# - 9.4 +# - 9.5 +# - 9.6 +# - 10 +# +# Superuser access is needed for theses charts: +# Write-Ahead Logs +# Archive Write-Ahead Logs +# +# Autovacuum charts is allowed since Postgres 9.4 # ---------------------------------------------------------------------- socket: diff --git a/conf.d/python.d/springboot.conf b/conf.d/python.d/springboot.conf new file mode 100644 index 00000000..40b5fb43 --- /dev/null +++ b/conf.d/python.d/springboot.conf @@ -0,0 +1,120 @@ +# netdata python.d.plugin configuration for springboot +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 60 + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# retries: 60 # the JOB's number of restoration attempts +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, this plugin also supports the following: +# +# url: 'http://127.0.0.1/metrics' # the URL of the spring boot actuator metrics +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# defaults: +# [chart_id]: true | false # enables/disables default charts, defaults true. +# extras: {} # defines extra charts to monitor, please see the example below +# - id: [chart_id] +# options: {} +# lines: [] +# +# If all defaults is disabled and no extra charts are defined, this module will disable itself, as it has no data to +# collect. +# +# Configuration example +# --------------------- +# expample: +# name: 'example' +# url: 'http://localhost:8080/metrics' +# defaults: +# response_code: true +# threads: true +# gc_time: true +# gc_ope: true +# heap: false +# extras: +# - id: 'heap' +# options: { title: 'Heap Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap', charttype: 'stacked' } +# lines: +# - { dimension: 'mem_free', name: 'free'} +# - { dimension: 'mempool_eden_used', name: 'eden', algorithm: 'absolute', multiplier: 1, divisor: 1} +# - { dimension: 'mempool_survivor_used', name: 'survivor', algorithm: 'absolute', multiplier: 1, divisor: 1} +# - { dimension: 'mempool_tenured_used', name: 'tenured', algorithm: 'absolute', multiplier: 1, divisor: 1} +# - id: 'heap_eden' +# options: { title: 'Eden Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap_eden', charttype: 'area' } +# lines: +# - { dimension: 'mempool_eden_used', name: 'used'} +# - { dimension: 'mempool_eden_committed', name: 'commited'} +# - id: 'heap_survivor' +# options: { title: 'Survivor Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap_survivor', charttype: 'area' } +# lines: +# - { dimension: 'mempool_survivor_used', name: 'used'} +# - { dimension: 'mempool_survivor_committed', name: 'commited'} +# - id: 'heap_tenured' +# options: { title: 'Tenured Memory Usage', units: 'KB', family: 'heap memory', context: 'springboot.heap_tenured', charttype: 'area' } +# lines: +# - { dimension: 'mempool_tenured_used', name: 'used'} +# - { dimension: 'mempool_tenured_committed', name: 'commited'} + + +local: + name: 'local' + url: 'http://localhost:8080/metrics' + +local_ip: + name: 'local' + url: 'http://127.0.0.1:8080/metrics' diff --git a/conf.d/python.d/traefik.conf b/conf.d/python.d/traefik.conf new file mode 100644 index 00000000..909b9e54 --- /dev/null +++ b/conf.d/python.d/traefik.conf @@ -0,0 +1,79 @@ +# netdata python.d.plugin configuration for traefik health data API +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 1 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# retries sets the number of retries to be made in case of failures. +# If unset, the default for python.d.plugin is used. +# Attempts to restore the service are made once every update_every +# and only if the module has collected values in the past. +# retries: 60 + +# autodetection_retry sets the job re-check interval in seconds. +# The job is not deleted if check fails. +# Attempts to start the job are made once every autodetection_retry. +# This feature is disabled by default. +# autodetection_retry: 0 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) +# +# The default JOBS share the same *name*. JOBS with the same name +# are mutually exclusive. Only one of them will be allowed running at +# any time. This allows autodetection to try several alternatives and +# pick the one that works. +# +# Any number of jobs is supported. +# +# All python.d.plugin JOBS (for all its modules) support a set of +# predefined parameters. These are: +# +# job_name: +# name: myname # the JOB's name as it will appear at the +# # dashboard (by default is the job_name) +# # JOBs sharing a name are mutually exclusive +# update_every: 1 # the JOB's data collection frequency +# priority: 60000 # the JOB's order on the dashboard +# retries: 10 # the JOB's number of restoration attempts +# autodetection_retry: 0 # the JOB's re-check interval in seconds +# +# Additionally to the above, traefik plugin also supports the following: +# +# url: '<scheme>://<host>:<port>/<health_page_api>' +# # http://localhost:8080/health +# +# if the URL is password protected, the following are supported: +# +# user: 'username' +# pass: 'password' +# +# ---------------------------------------------------------------------- +# AUTO-DETECTION JOBS +# only one of them will run (they have the same name) +# +local: + url: 'http://localhost:8080/health' diff --git a/conf.d/python.d/web_log.conf b/conf.d/python.d/web_log.conf index dd1fff07..c185f8d8 100644 --- a/conf.d/python.d/web_log.conf +++ b/conf.d/python.d/web_log.conf @@ -85,6 +85,7 @@ # custom_log_format: # define a custom log format # pattern: '(?P<address>[\da-f.:]+) -.*?"(?P<method>[A-Z]+) (?P<url>.*?)" (?P<code>[1-9]\d{2}) (?P<bytes_sent>\d+) (?P<resp_length>\d+) (?P<resp_time>\d+\.\d+) ' # time_multiplier: 1000000 # type <int> - convert time to microseconds +# histogram: [1,3,10,30,100, ...] # type list of int - Cumulative histogram of response time in milli seconds # ---------------------------------------------------------------------- # WEB SERVER CONFIGURATION diff --git a/conf.d/stream.conf b/conf.d/stream.conf index 8945529e..d0c9a8b1 100644 --- a/conf.d/stream.conf +++ b/conf.d/stream.conf @@ -112,6 +112,13 @@ # postpone alarms for a short period after the sender is connected default postpone alarms on connect seconds = 60 + # allow or deny multiple connections for the same host? + # If you are sure all your netdata have their own machine GUID, + # set this to 'allow', since it allows faster reconnects. + # When set to 'deny', new connections for a host will not be + # accepted until an existing connection is cleared. + multiple connections = allow + # need to route metrics differently? set these. # the defaults are the ones at the [stream] section #default proxy enabled = yes | no @@ -159,6 +166,13 @@ # postpone alarms when the sender connects postpone alarms on connect seconds = 60 + # allow or deny multiple connections for the same host? + # If you are sure all your netdata have their own machine GUID, + # set this to 'allow', since it allows faster reconnects. + # When set to 'deny', new connections for a host will not be + # accepted until an existing connection is cleared. + multiple connections = allow + # need to route metrics differently? #proxy enabled = yes | no #proxy destination = IP:PORT IP:PORT ... |