summaryrefslogtreecommitdiffstats
path: root/health/health.d/mdstat.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/mdstat.conf')
-rw-r--r--health/health.d/mdstat.conf85
1 files changed, 49 insertions, 36 deletions
diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf
index ca2d0d9fb..67483b201 100644
--- a/health/health.d/mdstat.conf
+++ b/health/health.d/mdstat.conf
@@ -1,39 +1,52 @@
-template: mdstat_last_collected
- on: md.disks
- calc: $now - $last_collected_t
- units: seconds ago
- every: 10s
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- info: number of seconds since the last successful data collection
- to: sysadmin
+ template: mdstat_last_collected
+ on: md.disks
+ class: System
+component: RAID
+ type: Latency
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ info: number of seconds since the last successful data collection
+ to: sysadmin
-template: mdstat_disks
- on: md.disks
- units: failed devices
- every: 10s
- calc: $down
- crit: $this > 0
- info: number of devices in the down state. \
- Any number > 0 indicates that the array is degraded.
- to: sysadmin
+ template: mdstat_disks
+ on: md.disks
+ class: System
+component: RAID
+ type: Errors
+ units: failed devices
+ every: 10s
+ calc: $down
+ crit: $this > 0
+ info: number of devices in the down state for the $family array. \
+ Any number > 0 indicates that the array is degraded.
+ to: sysadmin
-template: mdstat_mismatch_cnt
- on: md.mismatch_cnt
- units: unsynchronized blocks
- calc: $count
- every: 60s
- warn: $this > 1024
- delay: up 30m
- info: number of unsynchronized blocks
- to: sysadmin
+ template: mdstat_mismatch_cnt
+ on: md.mismatch_cnt
+ class: System
+component: RAID
+ type: Errors
+ families: !*(raid1) !*(raid10) *
+ units: unsynchronized blocks
+ calc: $count
+ every: 60s
+ warn: $this > 1024
+ delay: up 30m
+ info: number of unsynchronized blocks for the $family array
+ to: sysadmin
-template: mdstat_nonredundant_last_collected
- on: md.nonredundant
- calc: $now - $last_collected_t
- units: seconds ago
- every: 10s
- warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
- crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
- info: number of seconds since the last successful data collection
- to: sysadmin
+ template: mdstat_nonredundant_last_collected
+ on: md.nonredundant
+ class: System
+component: RAID
+ type: Latency
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ info: number of seconds since the last successful data collection
+ to: sysadmin