summaryrefslogtreecommitdiffstats
path: root/src/health/health.d/storcli.conf
diff options
context:
space:
mode:
Diffstat (limited to 'src/health/health.d/storcli.conf')
-rw-r--r--src/health/health.d/storcli.conf61
1 files changed, 61 insertions, 0 deletions
diff --git a/src/health/health.d/storcli.conf b/src/health/health.d/storcli.conf
new file mode 100644
index 000000000..be71b517e
--- /dev/null
+++ b/src/health/health.d/storcli.conf
@@ -0,0 +1,61 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+# Controllers
+
+ template: storcli_controller_health_status
+ on: storcli.controller_health_status
+ class: Errors
+ type: System
+component: RAID
+ lookup: average -1m unaligned percentage of healthy
+ units: %
+ every: 10s
+ crit: $this < 100
+ delay: down 5m multiplier 2 max 10m
+ summary: RAID controller ${label:controller_number} health
+ info: RAID controller ${label:controller_number} is unhealthy
+ to: sysadmin
+
+ template: storcli_controller_bbu_status
+ on: storcli.controller_bbu_status
+ class: Errors
+ type: System
+component: RAID
+ lookup: average -1m unaligned percentage of healthy,na
+ units: %
+ every: 10s
+ crit: $this < 100
+ delay: down 5m multiplier 2 max 10m
+ summary: RAID controller ${label:controller_number} BBU health
+ info: RAID controller ${label:controller_number} BBU is unhealthy
+ to: sysadmin
+
+# Physical Drives
+
+ template: storcli_phys_drive_errors
+ on: storcli.phys_drive_errors
+ class: Errors
+ type: System
+component: RAID
+ lookup: sum -10s
+ units: errors
+ every: 10s
+ warn: $this > 0
+ delay: up 1m down 5m multiplier 2 max 10m
+ summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors
+ info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors
+ to: sysadmin
+
+ template: storcli_phys_drive_predictive_failures
+ on: storcli.phys_drive_predictive_failures
+ class: Errors
+ type: System
+component: RAID
+ lookup: sum -10s
+ units: failures
+ every: 10s
+ warn: $this > 0
+ delay: up 1m down 5m multiplier 2 max 10m
+ summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures
+ info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures
+ to: sysadmin