summaryrefslogtreecommitdiffstats
path: root/health/health.d/mdstat.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/mdstat.conf')
-rw-r--r--health/health.d/mdstat.conf43
1 files changed, 43 insertions, 0 deletions
diff --git a/health/health.d/mdstat.conf b/health/health.d/mdstat.conf
new file mode 100644
index 00000000..90f97d85
--- /dev/null
+++ b/health/health.d/mdstat.conf
@@ -0,0 +1,43 @@
+
+ template: mdstat_disks
+ on: md.disks
+ class: Errors
+ type: System
+component: RAID
+ units: failed devices
+ every: 10s
+ calc: $down
+ warn: $this > 0
+ summary: MD array device ${label:device} down
+ info: Number of devices in the down state for the ${label:device} ${label:raid_level} array. \
+ Any number > 0 indicates that the array is degraded.
+ to: sysadmin
+
+ template: mdstat_mismatch_cnt
+ on: md.mismatch_cnt
+ class: Errors
+ type: System
+component: RAID
+chart labels: raid_level=!raid1 !raid10 *
+ units: unsynchronized blocks
+ calc: $count
+ every: 60s
+ warn: $this > 1024
+ delay: up 30m
+ summary: MD array device ${label:device} unsynchronized blocks
+ info: Number of unsynchronized blocks for the ${label:device} ${label:raid_level} array
+ to: silent
+
+ template: mdstat_nonredundant_last_collected
+ on: md.nonredundant
+ class: Latency
+ type: System
+component: RAID
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ summary: MD array last collected
+ info: Number of seconds since the last successful data collection
+ to: sysadmin