diff options
Diffstat (limited to 'health/health.d/megacli.conf')
-rw-r--r-- | health/health.d/megacli.conf | 68 |
1 files changed, 38 insertions, 30 deletions
diff --git a/health/health.d/megacli.conf b/health/health.d/megacli.conf index 6e81a2a0..f861765d 100644 --- a/health/health.d/megacli.conf +++ b/health/health.d/megacli.conf @@ -1,48 +1,56 @@ -template: adapter_state + +## Adapters (controllers) + +template: megacli_adapter_state on: megacli.adapter_degraded - units: is degraded - lookup: sum -10s + lookup: max -10s foreach * + units: boolean every: 10s crit: $this > 0 - info: adapter state + delay: down 5m multiplier 2 max 10m + info: adapter is in the degraded state (0: false, 1: true) + to: sysadmin + +## Physical Disks + +template: megacli_pd_predictive_failures + on: megacli.pd_predictive_failure + lookup: sum -10s foreach * + units: predictive failures + every: 10s + warn: $this > 0 + delay: up 1m down 5m multiplier 2 max 10m + info: number of physical drive predictive failures + to: sysadmin + +template: megacli_pd_media_errors + on: megacli.pd_media_error + lookup: sum -10s foreach * + units: media errors + every: 10s + warn: $this > 0 + delay: up 1m down 5m multiplier 2 max 10m + info: number of physical drive media errors to: sysadmin -template: bbu_relative_charge +## Battery Backup Units (BBU) + +template: megacli_bbu_relative_charge on: megacli.bbu_relative_charge - units: percent lookup: average -10s + units: percent every: 10s warn: $this <= (($status >= $WARNING) ? (85) : (80)) crit: $this <= (($status == $CRITICAL) ? (50) : (40)) - info: BBU relative state of charge + info: average battery backup unit (BBU) relative state of charge over the last 10 seconds to: sysadmin -template: bbu_cycle_count +template: megacli_bbu_cycle_count on: megacli.bbu_cycle_count - units: cycle count lookup: average -10s + units: cycles every: 10s warn: $this >= 100 crit: $this >= 500 - info: BBU cycle count - to: sysadmin - -template: pd_media_errors - on: megacli.pd_media_error - units: media errors - lookup: sum -10s - every: 10s - warn: $this > 0 - delay: down 1m multiplier 2 max 10m - info: physical drive media errors - to: sysadmin - -template: pd_predictive_failures - on: megacli.pd_predictive_failure - units: predictive failures - lookup: sum -10s - every: 10s - warn: $this > 0 - delay: down 1m multiplier 2 max 10m - info: physical drive predictive failures + info: average battery backup unit (BBU) charge cycles count over the last 10 seconds to: sysadmin |