diff options
Diffstat (limited to '')
-rw-r--r-- | conf.d/health.d/tcp_listen.conf | 27 | ||||
-rw-r--r-- | health/health.d/apache.conf (renamed from conf.d/health.d/apache.conf) | 0 | ||||
-rw-r--r-- | health/health.d/backend.conf (renamed from conf.d/health.d/backend.conf) | 0 | ||||
-rw-r--r-- | health/health.d/beanstalkd.conf (renamed from conf.d/health.d/beanstalkd.conf) | 0 | ||||
-rw-r--r-- | health/health.d/bind_rndc.conf (renamed from conf.d/health.d/bind_rndc.conf) | 0 | ||||
-rw-r--r-- | health/health.d/btrfs.conf (renamed from conf.d/health.d/btrfs.conf) | 0 | ||||
-rw-r--r-- | health/health.d/ceph.conf (renamed from conf.d/health.d/ceph.conf) | 0 | ||||
-rw-r--r-- | health/health.d/couchdb.conf (renamed from conf.d/health.d/couchdb.conf) | 0 | ||||
-rw-r--r-- | health/health.d/cpu.conf (renamed from conf.d/health.d/cpu.conf) | 0 | ||||
-rw-r--r-- | health/health.d/disks.conf (renamed from conf.d/health.d/disks.conf) | 0 | ||||
-rw-r--r-- | health/health.d/elasticsearch.conf (renamed from conf.d/health.d/elasticsearch.conf) | 0 | ||||
-rw-r--r-- | health/health.d/entropy.conf (renamed from conf.d/health.d/entropy.conf) | 0 | ||||
-rw-r--r-- | health/health.d/fping.conf (renamed from conf.d/health.d/fping.conf) | 0 | ||||
-rw-r--r-- | health/health.d/fronius.conf (renamed from conf.d/health.d/fronius.conf) | 0 | ||||
-rw-r--r-- | health/health.d/haproxy.conf (renamed from conf.d/health.d/haproxy.conf) | 0 | ||||
-rw-r--r-- | health/health.d/httpcheck.conf (renamed from conf.d/health.d/httpcheck.conf) | 0 | ||||
-rw-r--r-- | health/health.d/ipc.conf (renamed from conf.d/health.d/ipc.conf) | 4 | ||||
-rw-r--r-- | health/health.d/ipfs.conf (renamed from conf.d/health.d/ipfs.conf) | 0 | ||||
-rw-r--r-- | health/health.d/ipmi.conf (renamed from conf.d/health.d/ipmi.conf) | 0 | ||||
-rw-r--r-- | health/health.d/isc_dhcpd.conf (renamed from conf.d/health.d/isc_dhcpd.conf) | 0 | ||||
-rw-r--r-- | health/health.d/lighttpd.conf (renamed from conf.d/health.d/lighttpd.conf) | 0 | ||||
-rw-r--r-- | health/health.d/mdstat.conf (renamed from conf.d/health.d/mdstat.conf) | 23 | ||||
-rw-r--r-- | health/health.d/memcached.conf (renamed from conf.d/health.d/memcached.conf) | 0 | ||||
-rw-r--r-- | health/health.d/memory.conf (renamed from conf.d/health.d/memory.conf) | 0 | ||||
-rw-r--r-- | health/health.d/mongodb.conf (renamed from conf.d/health.d/mongodb.conf) | 0 | ||||
-rw-r--r-- | health/health.d/mysql.conf (renamed from conf.d/health.d/mysql.conf) | 15 | ||||
-rw-r--r-- | health/health.d/named.conf (renamed from conf.d/health.d/named.conf) | 0 | ||||
-rw-r--r-- | health/health.d/net.conf (renamed from conf.d/health.d/net.conf) | 37 | ||||
-rw-r--r-- | health/health.d/netfilter.conf (renamed from conf.d/health.d/netfilter.conf) | 2 | ||||
-rw-r--r-- | health/health.d/nginx.conf (renamed from conf.d/health.d/nginx.conf) | 0 | ||||
-rw-r--r-- | health/health.d/nginx_plus.conf (renamed from conf.d/health.d/nginx_plus.conf) | 0 | ||||
-rw-r--r-- | health/health.d/portcheck.conf (renamed from conf.d/health.d/portcheck.conf) | 0 | ||||
-rw-r--r-- | health/health.d/postgres.conf (renamed from conf.d/health.d/postgres.conf) | 0 | ||||
-rw-r--r-- | health/health.d/qos.conf (renamed from conf.d/health.d/qos.conf) | 0 | ||||
-rw-r--r-- | health/health.d/ram.conf (renamed from conf.d/health.d/ram.conf) | 6 | ||||
-rw-r--r-- | health/health.d/redis.conf (renamed from conf.d/health.d/redis.conf) | 0 | ||||
-rw-r--r-- | health/health.d/retroshare.conf (renamed from conf.d/health.d/retroshare.conf) | 0 | ||||
-rw-r--r-- | health/health.d/softnet.conf (renamed from conf.d/health.d/softnet.conf) | 0 | ||||
-rw-r--r-- | health/health.d/squid.conf (renamed from conf.d/health.d/squid.conf) | 0 | ||||
-rw-r--r-- | health/health.d/stiebeleltron.conf (renamed from conf.d/health.d/stiebeleltron.conf) | 0 | ||||
-rw-r--r-- | health/health.d/swap.conf (renamed from conf.d/health.d/swap.conf) | 0 | ||||
-rw-r--r-- | health/health.d/tcp_conn.conf (renamed from conf.d/health.d/tcp_conn.conf) | 0 | ||||
-rw-r--r-- | health/health.d/tcp_mem.conf (renamed from conf.d/health.d/tcp_mem.conf) | 0 | ||||
-rw-r--r-- | health/health.d/tcp_orphans.conf (renamed from conf.d/health.d/tcp_orphans.conf) | 0 | ||||
-rw-r--r-- | health/health.d/tcp_resets.conf (renamed from conf.d/health.d/tcp_resets.conf) | 0 | ||||
-rw-r--r-- | health/health.d/udp_errors.conf (renamed from conf.d/health.d/udp_errors.conf) | 4 | ||||
-rw-r--r-- | health/health.d/varnish.conf (renamed from conf.d/health.d/varnish.conf) | 0 | ||||
-rw-r--r-- | health/health.d/web_log.conf (renamed from conf.d/health.d/web_log.conf) | 0 | ||||
-rw-r--r-- | health/health.d/zfs.conf (renamed from conf.d/health.d/zfs.conf) | 0 |
49 files changed, 74 insertions, 44 deletions
diff --git a/conf.d/health.d/tcp_listen.conf b/conf.d/health.d/tcp_listen.conf deleted file mode 100644 index 957964ae4..000000000 --- a/conf.d/health.d/tcp_listen.conf +++ /dev/null @@ -1,27 +0,0 @@ -# ----------------------------------------------------------------------------- -# tcp listen sockets issues - - alarm: 1m_ipv4_tcp_listen_overflows - on: ipv4.tcplistenissues - os: linux freebsd - hosts: * - lookup: sum -60s unaligned absolute of ListenOverflows - units: overflows - every: 10s - crit: $this > 0 - delay: up 0 down 5m multiplier 1.5 max 1h - info: the number of TCP listen socket overflows during the last minute - to: sysadmin - - alarm: 1m_ipv4_tcp_listen_drops - on: ipv4.tcplistenissues - os: linux - hosts: * - lookup: sum -60s unaligned absolute of ListenDrops - units: drops - every: 10s - crit: $this > 0 - delay: up 0 down 5m multiplier 1.5 max 1h - info: the number of TCP listen socket drops during the last minute - to: sysadmin - diff --git a/conf.d/health.d/apache.conf b/health/health.d/apache.conf index 0c98b8778..0c98b8778 100644 --- a/conf.d/health.d/apache.conf +++ b/health/health.d/apache.conf diff --git a/conf.d/health.d/backend.conf b/health/health.d/backend.conf index 7af100d8f..7af100d8f 100644 --- a/conf.d/health.d/backend.conf +++ b/health/health.d/backend.conf diff --git a/conf.d/health.d/beanstalkd.conf b/health/health.d/beanstalkd.conf index 30dc27328..30dc27328 100644 --- a/conf.d/health.d/beanstalkd.conf +++ b/health/health.d/beanstalkd.conf diff --git a/conf.d/health.d/bind_rndc.conf b/health/health.d/bind_rndc.conf index 4145e77cd..4145e77cd 100644 --- a/conf.d/health.d/bind_rndc.conf +++ b/health/health.d/bind_rndc.conf diff --git a/conf.d/health.d/btrfs.conf b/health/health.d/btrfs.conf index b27aa544f..b27aa544f 100644 --- a/conf.d/health.d/btrfs.conf +++ b/health/health.d/btrfs.conf diff --git a/conf.d/health.d/ceph.conf b/health/health.d/ceph.conf index de16f7b6f..de16f7b6f 100644 --- a/conf.d/health.d/ceph.conf +++ b/health/health.d/ceph.conf diff --git a/conf.d/health.d/couchdb.conf b/health/health.d/couchdb.conf index 4a2895280..4a2895280 100644 --- a/conf.d/health.d/couchdb.conf +++ b/health/health.d/couchdb.conf diff --git a/conf.d/health.d/cpu.conf b/health/health.d/cpu.conf index fa8189856..fa8189856 100644 --- a/conf.d/health.d/cpu.conf +++ b/health/health.d/cpu.conf diff --git a/conf.d/health.d/disks.conf b/health/health.d/disks.conf index 26f85848a..26f85848a 100644 --- a/conf.d/health.d/disks.conf +++ b/health/health.d/disks.conf diff --git a/conf.d/health.d/elasticsearch.conf b/health/health.d/elasticsearch.conf index dffd40965..dffd40965 100644 --- a/conf.d/health.d/elasticsearch.conf +++ b/health/health.d/elasticsearch.conf diff --git a/conf.d/health.d/entropy.conf b/health/health.d/entropy.conf index 66d44ec13..66d44ec13 100644 --- a/conf.d/health.d/entropy.conf +++ b/health/health.d/entropy.conf diff --git a/conf.d/health.d/fping.conf b/health/health.d/fping.conf index 43658fef6..43658fef6 100644 --- a/conf.d/health.d/fping.conf +++ b/health/health.d/fping.conf diff --git a/conf.d/health.d/fronius.conf b/health/health.d/fronius.conf index cdf6c8fcb..cdf6c8fcb 100644 --- a/conf.d/health.d/fronius.conf +++ b/health/health.d/fronius.conf diff --git a/conf.d/health.d/haproxy.conf b/health/health.d/haproxy.conf index e49c70d48..e49c70d48 100644 --- a/conf.d/health.d/haproxy.conf +++ b/health/health.d/haproxy.conf diff --git a/conf.d/health.d/httpcheck.conf b/health/health.d/httpcheck.conf index 0ddf35eab..0ddf35eab 100644 --- a/conf.d/health.d/httpcheck.conf +++ b/health/health.d/httpcheck.conf diff --git a/conf.d/health.d/ipc.conf b/health/health.d/ipc.conf index 03cf264d8..989d6e912 100644 --- a/conf.d/health.d/ipc.conf +++ b/health/health.d/ipc.conf @@ -5,7 +5,7 @@ on: system.ipc_semaphores os: linux hosts: * - calc: $semaphores * 100 / $ipc.semaphores.max + calc: $semaphores * 100 / $ipc_semaphores_max units: % every: 10s warn: $this > (($status >= $WARNING) ? (70) : (80)) @@ -18,7 +18,7 @@ on: system.ipc_semaphore_arrays os: linux hosts: * - calc: $arrays * 100 / $ipc.semaphores.arrays.max + calc: $arrays * 100 / $ipc_semaphores_arrays_max units: % every: 10s warn: $this > (($status >= $WARNING) ? (70) : (80)) diff --git a/conf.d/health.d/ipfs.conf b/health/health.d/ipfs.conf index 3f77572d6..3f77572d6 100644 --- a/conf.d/health.d/ipfs.conf +++ b/health/health.d/ipfs.conf diff --git a/conf.d/health.d/ipmi.conf b/health/health.d/ipmi.conf index c25581964..c25581964 100644 --- a/conf.d/health.d/ipmi.conf +++ b/health/health.d/ipmi.conf diff --git a/conf.d/health.d/isc_dhcpd.conf b/health/health.d/isc_dhcpd.conf index 8054656ff..8054656ff 100644 --- a/conf.d/health.d/isc_dhcpd.conf +++ b/health/health.d/isc_dhcpd.conf diff --git a/conf.d/health.d/lighttpd.conf b/health/health.d/lighttpd.conf index 915907a4a..915907a4a 100644 --- a/conf.d/health.d/lighttpd.conf +++ b/health/health.d/lighttpd.conf diff --git a/conf.d/health.d/mdstat.conf b/health/health.d/mdstat.conf index c9e7d20db..0f5f2837e 100644 --- a/conf.d/health.d/mdstat.conf +++ b/health/health.d/mdstat.conf @@ -1,3 +1,13 @@ +template: mdstat_last_collected + on: md.disks + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + info: number of seconds since the last successful data collection + to: sysadmin + template: mdstat_disks on: md.disks units: failed devices @@ -7,12 +17,11 @@ template: mdstat_disks info: Array is degraded! to: sysadmin -template: mdstat_last_collected - on: md.disks - calc: $now - $last_collected_t - units: seconds ago +template: mdstat_mismatch_cnt + on: md.mismatch_cnt + units: unsynchronized blocks + calc: $count every: 10s - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - info: number of seconds since the last successful data collection + crit: $this > 0 + info: Mismatch count! to: sysadmin diff --git a/conf.d/health.d/memcached.conf b/health/health.d/memcached.conf index d248ef57a..d248ef57a 100644 --- a/conf.d/health.d/memcached.conf +++ b/health/health.d/memcached.conf diff --git a/conf.d/health.d/memory.conf b/health/health.d/memory.conf index 4a0e6e522..4a0e6e522 100644 --- a/conf.d/health.d/memory.conf +++ b/health/health.d/memory.conf diff --git a/conf.d/health.d/mongodb.conf b/health/health.d/mongodb.conf index a80cb3112..a80cb3112 100644 --- a/conf.d/health.d/mongodb.conf +++ b/health/health.d/mongodb.conf diff --git a/conf.d/health.d/mysql.conf b/health/health.d/mysql.conf index 1eeb993f0..39c401915 100644 --- a/conf.d/health.d/mysql.conf +++ b/health/health.d/mysql.conf @@ -60,6 +60,21 @@ template: mysql_10s_waited_locks_ratio # ----------------------------------------------------------------------------- +# connections + +template: mysql_connections + on: mysql.connections_active + calc: $active * 100 / $limit + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (60) : (70)) + crit: $this > (($status == $CRITICAL) ? (80) : (90)) + delay: down 15m multiplier 1.5 max 1h + info: the ratio of current active connections vs the maximum possible number of connections + to: dba + + +# ----------------------------------------------------------------------------- # replication template: mysql_replication diff --git a/conf.d/health.d/named.conf b/health/health.d/named.conf index 4fc65c8ee..4fc65c8ee 100644 --- a/conf.d/health.d/named.conf +++ b/health/health.d/named.conf diff --git a/conf.d/health.d/net.conf b/health/health.d/net.conf index 22a88927d..489016dd5 100644 --- a/conf.d/health.d/net.conf +++ b/health/health.d/net.conf @@ -2,6 +2,39 @@ # you can disable an alarm notification by setting the 'to' line to: silent # ----------------------------------------------------------------------------- +# net traffic overflow + + template: 1m_received_traffic_overflow + on: net.net + os: linux + hosts: * + families: * + lookup: average -1m unaligned absolute of received + calc: ($nic_speed_max > 0) ? ($this * 100 / ($nic_speed_max * 1000)) : ( nan ) + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (85)) + crit: $this > (($status == $CRITICAL) ? (85) : (90)) + delay: down 1m multiplier 1.5 max 1h + info: interface received bandwidth usage over net device speed max + to: sysadmin + + template: 1m_sent_traffic_overflow + on: net.net + os: linux + hosts: * + families: * + lookup: average -1m unaligned absolute of sent + calc: ($nic_speed_max > 0) ? ($this * 100 / ($nic_speed_max * 1000)) : ( nan ) + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (85)) + crit: $this > (($status == $CRITICAL) ? (85) : (90)) + delay: down 1m multiplier 1.5 max 1h + info: interface sent bandwidth usage over net device speed max + to: sysadmin + +# ----------------------------------------------------------------------------- # dropped packets # check if an interface is dropping packets @@ -101,7 +134,7 @@ template: 1m_received_packets_rate os: linux freebsd hosts: * families: * - lookup: average -1m of received + lookup: average -1m unaligned of received units: packets every: 10s info: the average number of packets received during the last minute @@ -111,7 +144,7 @@ template: 10s_received_packets_storm os: linux freebsd hosts: * families: * - lookup: average -10s of received + lookup: average -10s unaligned of received calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate)) every: 10s units: % diff --git a/conf.d/health.d/netfilter.conf b/health/health.d/netfilter.conf index fa1732b33..1d07752cc 100644 --- a/conf.d/health.d/netfilter.conf +++ b/health/health.d/netfilter.conf @@ -19,7 +19,7 @@ os: linux hosts: * lookup: max -10s unaligned of connections - calc: $this * 100 / $netfilter.conntrack.max + calc: $this * 100 / $netfilter_conntrack_max units: % every: 10s warn: $this > (($status >= $WARNING) ? (70) : (80)) diff --git a/conf.d/health.d/nginx.conf b/health/health.d/nginx.conf index a686c3d99..a686c3d99 100644 --- a/conf.d/health.d/nginx.conf +++ b/health/health.d/nginx.conf diff --git a/conf.d/health.d/nginx_plus.conf b/health/health.d/nginx_plus.conf index 5a171a76d..5a171a76d 100644 --- a/conf.d/health.d/nginx_plus.conf +++ b/health/health.d/nginx_plus.conf diff --git a/conf.d/health.d/portcheck.conf b/health/health.d/portcheck.conf index f42b63d30..f42b63d30 100644 --- a/conf.d/health.d/portcheck.conf +++ b/health/health.d/portcheck.conf diff --git a/conf.d/health.d/postgres.conf b/health/health.d/postgres.conf index 4e0583b85..4e0583b85 100644 --- a/conf.d/health.d/postgres.conf +++ b/health/health.d/postgres.conf diff --git a/conf.d/health.d/qos.conf b/health/health.d/qos.conf index 7290d15ff..7290d15ff 100644 --- a/conf.d/health.d/qos.conf +++ b/health/health.d/qos.conf diff --git a/conf.d/health.d/ram.conf b/health/health.d/ram.conf index b6dc5f945..4e437322c 100644 --- a/conf.d/health.d/ram.conf +++ b/health/health.d/ram.conf @@ -3,7 +3,7 @@ alarm: used_ram_to_ignore on: system.ram - os: linux + os: linux freebsd hosts: * calc: ($zfs.arc_size.arcsz = nan)?(0):($zfs.arc_size.arcsz) every: 10s @@ -41,7 +41,7 @@ alarm: ram_in_use on: system.ram os: freebsd hosts: * - calc: (($active + $wired) - $used_ram_to_ignore) * 100 / (($active + $wired) - $used_ram_to_ignore + $cached + $free) + calc: ($active + $wired + $laundry + $buffers - $used_ram_to_ignore) * 100 / ($active + $wired + $laundry + $buffers - $used_ram_to_ignore + $cache + $free + $inactive) units: % every: 10s warn: $this > (($status >= $WARNING) ? (80) : (90)) @@ -54,7 +54,7 @@ delay: down 15m multiplier 1.5 max 1h on: system.ram os: freebsd hosts: * - calc: ($free + $inactive + $used_ram_to_ignore) * 100 / ($free + $active + $inactive + $wired + $cache + $buffers) + calc: ($free + $inactive + $used_ram_to_ignore) * 100 / ($free + $active + $inactive + $wired + $cache + $laundry + $buffers) units: % every: 10s warn: $this < (($status >= $WARNING) ? ( 5) : (10)) diff --git a/conf.d/health.d/redis.conf b/health/health.d/redis.conf index c08a884a6..c08a884a6 100644 --- a/conf.d/health.d/redis.conf +++ b/health/health.d/redis.conf diff --git a/conf.d/health.d/retroshare.conf b/health/health.d/retroshare.conf index 2344b60ec..2344b60ec 100644 --- a/conf.d/health.d/retroshare.conf +++ b/health/health.d/retroshare.conf diff --git a/conf.d/health.d/softnet.conf b/health/health.d/softnet.conf index 77c804bfd..77c804bfd 100644 --- a/conf.d/health.d/softnet.conf +++ b/health/health.d/softnet.conf diff --git a/conf.d/health.d/squid.conf b/health/health.d/squid.conf index 06cc9678f..06cc9678f 100644 --- a/conf.d/health.d/squid.conf +++ b/health/health.d/squid.conf diff --git a/conf.d/health.d/stiebeleltron.conf b/health/health.d/stiebeleltron.conf index e0361eb20..e0361eb20 100644 --- a/conf.d/health.d/stiebeleltron.conf +++ b/health/health.d/stiebeleltron.conf diff --git a/conf.d/health.d/swap.conf b/health/health.d/swap.conf index f920b0807..f920b0807 100644 --- a/conf.d/health.d/swap.conf +++ b/health/health.d/swap.conf diff --git a/conf.d/health.d/tcp_conn.conf b/health/health.d/tcp_conn.conf index 7aa9a9800..7aa9a9800 100644 --- a/conf.d/health.d/tcp_conn.conf +++ b/health/health.d/tcp_conn.conf diff --git a/conf.d/health.d/tcp_mem.conf b/health/health.d/tcp_mem.conf index 6927d5765..6927d5765 100644 --- a/conf.d/health.d/tcp_mem.conf +++ b/health/health.d/tcp_mem.conf diff --git a/conf.d/health.d/tcp_orphans.conf b/health/health.d/tcp_orphans.conf index 280d6590f..280d6590f 100644 --- a/conf.d/health.d/tcp_orphans.conf +++ b/health/health.d/tcp_orphans.conf diff --git a/conf.d/health.d/tcp_resets.conf b/health/health.d/tcp_resets.conf index 91dad3c6a..91dad3c6a 100644 --- a/conf.d/health.d/tcp_resets.conf +++ b/health/health.d/tcp_resets.conf diff --git a/conf.d/health.d/udp_errors.conf b/health/health.d/udp_errors.conf index 382b39658..5140228f5 100644 --- a/conf.d/health.d/udp_errors.conf +++ b/health/health.d/udp_errors.conf @@ -27,7 +27,7 @@ units: errors every: 10s warn: $this > 0 - crit: $this > 100 + crit: $this > (($status == $CRITICAL) ? (0) : (100)) info: number of UDP receive buffer errors during the last minute delay: up 0 down 60m multiplier 1.2 max 2h to: sysadmin @@ -43,7 +43,7 @@ units: errors every: 10s warn: $this > 0 - crit: $this > 100 + crit: $this > (($status == $CRITICAL) ? (0) : (100)) info: number of UDP send buffer errors during the last minute delay: up 0 down 60m multiplier 1.2 max 2h to: sysadmin diff --git a/conf.d/health.d/varnish.conf b/health/health.d/varnish.conf index cca7446b4..cca7446b4 100644 --- a/conf.d/health.d/varnish.conf +++ b/health/health.d/varnish.conf diff --git a/conf.d/health.d/web_log.conf b/health/health.d/web_log.conf index d8be88b47..d8be88b47 100644 --- a/conf.d/health.d/web_log.conf +++ b/health/health.d/web_log.conf diff --git a/conf.d/health.d/zfs.conf b/health/health.d/zfs.conf index af73824e6..af73824e6 100644 --- a/conf.d/health.d/zfs.conf +++ b/health/health.d/zfs.conf |