diff options
Diffstat (limited to 'src/health/health.d/storcli.conf')
-rw-r--r-- | src/health/health.d/storcli.conf | 61 |
1 files changed, 61 insertions, 0 deletions
diff --git a/src/health/health.d/storcli.conf b/src/health/health.d/storcli.conf new file mode 100644 index 000000000..be71b517e --- /dev/null +++ b/src/health/health.d/storcli.conf @@ -0,0 +1,61 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + +# Controllers + + template: storcli_controller_health_status + on: storcli.controller_health_status + class: Errors + type: System +component: RAID + lookup: average -1m unaligned percentage of healthy + units: % + every: 10s + crit: $this < 100 + delay: down 5m multiplier 2 max 10m + summary: RAID controller ${label:controller_number} health + info: RAID controller ${label:controller_number} is unhealthy + to: sysadmin + + template: storcli_controller_bbu_status + on: storcli.controller_bbu_status + class: Errors + type: System +component: RAID + lookup: average -1m unaligned percentage of healthy,na + units: % + every: 10s + crit: $this < 100 + delay: down 5m multiplier 2 max 10m + summary: RAID controller ${label:controller_number} BBU health + info: RAID controller ${label:controller_number} BBU is unhealthy + to: sysadmin + +# Physical Drives + + template: storcli_phys_drive_errors + on: storcli.phys_drive_errors + class: Errors + type: System +component: RAID + lookup: sum -10s + units: errors + every: 10s + warn: $this > 0 + delay: up 1m down 5m multiplier 2 max 10m + summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors + info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} errors + to: sysadmin + + template: storcli_phys_drive_predictive_failures + on: storcli.phys_drive_predictive_failures + class: Errors + type: System +component: RAID + lookup: sum -10s + units: failures + every: 10s + warn: $this > 0 + delay: up 1m down 5m multiplier 2 max 10m + summary: RAID PD c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures + info: RAID physical drive c${label:controller_number}/e${label:enclosure_number}/s${label:slot_number} predictive failures + to: sysadmin |