summaryrefslogtreecommitdiffstats
path: root/health/health.d/megacli.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/megacli.conf')
-rw-r--r--health/health.d/megacli.conf71
1 files changed, 71 insertions, 0 deletions
diff --git a/health/health.d/megacli.conf b/health/health.d/megacli.conf
new file mode 100644
index 0000000..9fbcfdb
--- /dev/null
+++ b/health/health.d/megacli.conf
@@ -0,0 +1,71 @@
+
+## Adapters (controllers)
+
+ template: megacli_adapter_state
+ on: megacli.adapter_degraded
+ class: Errors
+ type: System
+component: RAID
+ lookup: max -10s foreach *
+ units: boolean
+ every: 10s
+ crit: $this > 0
+ delay: down 5m multiplier 2 max 10m
+ info: adapter is in the degraded state (0: false, 1: true)
+ to: sysadmin
+
+## Physical Disks
+
+ template: megacli_pd_predictive_failures
+ on: megacli.pd_predictive_failure
+ class: Errors
+ type: System
+component: RAID
+ lookup: sum -10s foreach *
+ units: predictive failures
+ every: 10s
+ warn: $this > 0
+ delay: up 1m down 5m multiplier 2 max 10m
+ info: number of physical drive predictive failures
+ to: sysadmin
+
+ template: megacli_pd_media_errors
+ on: megacli.pd_media_error
+ class: Errors
+ type: System
+component: RAID
+ lookup: sum -10s foreach *
+ units: media errors
+ every: 10s
+ warn: $this > 0
+ delay: up 1m down 5m multiplier 2 max 10m
+ info: number of physical drive media errors
+ to: sysadmin
+
+## Battery Backup Units (BBU)
+
+ template: megacli_bbu_relative_charge
+ on: megacli.bbu_relative_charge
+ class: Workload
+ type: System
+component: RAID
+ lookup: average -10s
+ units: percent
+ every: 10s
+ warn: $this <= (($status >= $WARNING) ? (85) : (80))
+ crit: $this <= (($status == $CRITICAL) ? (50) : (40))
+ info: average battery backup unit (BBU) relative state of charge over the last 10 seconds
+ to: sysadmin
+
+ template: megacli_bbu_cycle_count
+ on: megacli.bbu_cycle_count
+ class: Workload
+ type: System
+component: RAID
+ lookup: average -10s
+ units: cycles
+ every: 10s
+ warn: $this >= 100
+ crit: $this >= 500
+ info: average battery backup unit (BBU) charge cycles count over the last 10 seconds
+ to: sysadmin