summaryrefslogtreecommitdiffstats
path: root/health/health.d
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d')
-rw-r--r--health/health.d/linux_power_supply.conf2
-rw-r--r--health/health.d/mdstat.conf10
-rw-r--r--health/health.d/web_log.conf30
3 files changed, 41 insertions, 1 deletions
diff --git a/health/health.d/linux_power_supply.conf b/health/health.d/linux_power_supply.conf
index 27a172a1..745d2c3d 100644
--- a/health/health.d/linux_power_supply.conf
+++ b/health/health.d/linux_power_supply.conf
@@ -1,7 +1,7 @@
# Alert on low battery capacity.
template: linux_power_supply_capacity
- on: power_supply.capacity
+ on: powersupply.capacity
calc: $capacity
units: %
every: 10s
diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf
index 0f5f2837..a53ec7a5 100644
--- a/health/health.d/mdstat.conf
+++ b/health/health.d/mdstat.conf
@@ -25,3 +25,13 @@ template: mdstat_mismatch_cnt
crit: $this > 0
info: Mismatch count!
to: sysadmin
+
+template: mdstat_nonredundant_last_collected
+ on: md.nonredundant
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ info: number of seconds since the last successful data collection
+ to: sysadmin \ No newline at end of file
diff --git a/health/health.d/web_log.conf b/health/health.d/web_log.conf
index d8be88b4..031adc2e 100644
--- a/health/health.d/web_log.conf
+++ b/health/health.d/web_log.conf
@@ -85,6 +85,36 @@ families: *
info: the ratio of HTTP internal server errors (5xx), over the last minute
to: webmaster
+# unmatched lines
+
+# the following alarms trigger only when there are enough data.
+# we assume there are enough data when:
+#
+# $1m_total_requests > 120
+#
+# i.e. when there are at least 120 requests during the last minute
+
+template: 1m_total_requests
+ on: web_log.response_codes
+families: *
+ lookup: sum -1m unaligned
+ calc: ($this == 0)?(1):($this)
+ units: requests
+ every: 10s
+ info: the sum of all HTTP requests over the last minute
+
+template: 1m_unmatched
+on: web_log.response_codes
+families: *
+ lookup: sum -1m unaligned of unmatched
+ calc: $this * 100 / $1m_total_requests
+ units: %
+ every: 10s
+ warn: ($1m_total_requests > 120) ? ($this > 1) : ( 0 )
+ crit: ($1m_total_requests > 120) ? ($this > 5) : ( 0 )
+ delay: up 1m down 5m multiplier 1.5 max 1h
+ info: the ratio of unmatched lines, over the last minute
+ to: webmaster
# -----------------------------------------------------------------------------
# web slow