summaryrefslogtreecommitdiffstats
path: root/src/health/health.d/megacli.conf
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/health/health.d/megacli.conf59
1 files changed, 30 insertions, 29 deletions
diff --git a/src/health/health.d/megacli.conf b/src/health/health.d/megacli.conf
index d1e2e7acf..27721fa9a 100644
--- a/src/health/health.d/megacli.conf
+++ b/src/health/health.d/megacli.conf
@@ -1,54 +1,55 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
-## Adapters (controllers)
+# Adapters (controllers)
- template: megacli_adapter_state
- on: megacli.adapter_degraded
+ template: megacli_adapter_health_state
+ on: megacli.adapter_health_state
class: Errors
type: System
component: RAID
- lookup: max -10s
- units: boolean
+ lookup: average -1m unaligned percentage of optimal
+ units: %
every: 10s
- crit: $this > 0
+ crit: $this < 100
delay: down 5m multiplier 2 max 10m
- summary: MegaCLI adapter state
- info: Adapter is in the degraded state (0: false, 1: true)
+ summary: MegaCLI adapter ${label:adapter_number} health
+ info: MegaCLI adapter ${label:adapter_number} is in the degraded state
to: sysadmin
-## Physical Disks
-
- template: megacli_pd_predictive_failures
- on: megacli.pd_predictive_failure
+ template: megacli_phys_drive_media_errors
+ on: megacli.phys_drive_media_errors
class: Errors
type: System
component: RAID
lookup: sum -10s
- units: predictive failures
+ units: media errors
every: 10s
warn: $this > 0
delay: up 1m down 5m multiplier 2 max 10m
- summary: MegaCLI physical drive predictive failures
- info: Number of physical drive predictive failures
+ summary: MegaCLI PD adapter ${label:adapter_number} slot ${label:slot_number} media errors
+ info: MegaCLI physical drive adapter ${label:adapter_number} slot ${label:slot_number} media errors
to: sysadmin
- template: megacli_pd_media_errors
- on: megacli.pd_media_error
+# Physical Drives
+
+ template: megacli_phys_drive_predictive_failures
+ on: megacli.phys_drive_predictive_failures
class: Errors
type: System
component: RAID
lookup: sum -10s
- units: media errors
+ units: failures
every: 10s
warn: $this > 0
delay: up 1m down 5m multiplier 2 max 10m
- summary: MegaCLI physical drive errors
- info: Number of physical drive media errors
+ summary: MegaCLI PD adapter ${label:adapter_number} slot ${label:slot_number} predictive failures
+ info: MegaCLI physical drive (adapter ${label:adapter_number} slot ${label:slot_number}) predictive failures
to: sysadmin
-## Battery Backup Units (BBU)
+# Backup Battery Unit
- template: megacli_bbu_relative_charge
- on: megacli.bbu_relative_charge
+ template: megacli_bbu_charge
+ on: megacli.bbu_charge
class: Workload
type: System
component: RAID
@@ -57,12 +58,12 @@ component: RAID
every: 10s
warn: $this <= (($status >= $WARNING) ? (85) : (80))
crit: $this <= (($status == $CRITICAL) ? (50) : (40))
- summary: MegaCLI BBU charge state
- info: Average battery backup unit (BBU) relative state of charge over the last 10 seconds
+ summary: MegaCLI BBU charge
+ info: MegaCLI Backup Battery Unit (adapter ${label:adapter_number}) average charge over the last minute
to: sysadmin
- template: megacli_bbu_cycle_count
- on: megacli.bbu_cycle_count
+ template: megacli_bbu_recharge_cycles
+ on: megacli.bbu_recharge_cycles
class: Workload
type: System
component: RAID
@@ -71,6 +72,6 @@ component: RAID
every: 10s
warn: $this >= 100
crit: $this >= 500
- summary: MegaCLI BBU cycles count
- info: Average battery backup unit (BBU) charge cycles count over the last 10 seconds
+ summary: MegaCLI BBU recharge cycles
+ info: MegaCLI Backup Battery Unit (adapter ${label:adapter_number}) recharge cycles
to: sysadmin