diff options
Diffstat (limited to 'health/health.d/megacli.conf')
-rw-r--r-- | health/health.d/megacli.conf | 71 |
1 files changed, 71 insertions, 0 deletions
diff --git a/health/health.d/megacli.conf b/health/health.d/megacli.conf new file mode 100644 index 0000000..9fbcfdb --- /dev/null +++ b/health/health.d/megacli.conf @@ -0,0 +1,71 @@ + +## Adapters (controllers) + + template: megacli_adapter_state + on: megacli.adapter_degraded + class: Errors + type: System +component: RAID + lookup: max -10s foreach * + units: boolean + every: 10s + crit: $this > 0 + delay: down 5m multiplier 2 max 10m + info: adapter is in the degraded state (0: false, 1: true) + to: sysadmin + +## Physical Disks + + template: megacli_pd_predictive_failures + on: megacli.pd_predictive_failure + class: Errors + type: System +component: RAID + lookup: sum -10s foreach * + units: predictive failures + every: 10s + warn: $this > 0 + delay: up 1m down 5m multiplier 2 max 10m + info: number of physical drive predictive failures + to: sysadmin + + template: megacli_pd_media_errors + on: megacli.pd_media_error + class: Errors + type: System +component: RAID + lookup: sum -10s foreach * + units: media errors + every: 10s + warn: $this > 0 + delay: up 1m down 5m multiplier 2 max 10m + info: number of physical drive media errors + to: sysadmin + +## Battery Backup Units (BBU) + + template: megacli_bbu_relative_charge + on: megacli.bbu_relative_charge + class: Workload + type: System +component: RAID + lookup: average -10s + units: percent + every: 10s + warn: $this <= (($status >= $WARNING) ? (85) : (80)) + crit: $this <= (($status == $CRITICAL) ? (50) : (40)) + info: average battery backup unit (BBU) relative state of charge over the last 10 seconds + to: sysadmin + + template: megacli_bbu_cycle_count + on: megacli.bbu_cycle_count + class: Workload + type: System +component: RAID + lookup: average -10s + units: cycles + every: 10s + warn: $this >= 100 + crit: $this >= 500 + info: average battery backup unit (BBU) charge cycles count over the last 10 seconds + to: sysadmin |