diff options
Diffstat (limited to 'src/health/health.d/apcupsd.conf')
-rw-r--r-- | src/health/health.d/apcupsd.conf | 121 |
1 files changed, 121 insertions, 0 deletions
diff --git a/src/health/health.d/apcupsd.conf b/src/health/health.d/apcupsd.conf new file mode 100644 index 000000000..5fd7aa112 --- /dev/null +++ b/src/health/health.d/apcupsd.conf @@ -0,0 +1,121 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + + template: apcupsd_10min_ups_load + on: apcupsd.load + class: Utilization + type: Power Supply +component: UPS + lookup: average -10m unaligned of percentage + units: % + every: 1m + warn: $this > (($status >= $WARNING) ? (70) : (80)) + delay: down 10m multiplier 1.5 max 1h + summary: APC UPS load + info: APC UPS average load over the last 10 minutes + to: sitemgr + +# Discussion in https://github.com/netdata/netdata/pull/3928: +# Fire the alarm as soon as it's going on battery (99% charge) and clear only when full. + template: apcupsd_ups_charge + on: apcupsd.charge + class: Errors + type: Power Supply +component: UPS + lookup: average -60s unaligned of charge + units: % + every: 60s + warn: $this < 100 + crit: $this < 40 + delay: down 10m multiplier 1.5 max 1h + summary: APC UPS battery charge + info: APC UPS average battery charge over the last minute + to: sitemgr + + template: apcupsd_last_collected_secs + on: apcupsd.load + class: Latency + type: Power Supply +component: UPS device + calc: $now - $last_collected_t + every: 10s + units: seconds ago + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + summary: APC UPS last collection + info: APC UPS number of seconds since the last successful data collection + to: sitemgr + +#Send out a warning when SELFTEST code is BT or NG. Code descriptions can be found at: +#http://www.apcupsd.org/manual/#:~:text=or%20N/A.-,SELFTEST,-The%20results%20of + template: apcupsd_selftest_warning + on: apcupsd.selftest + lookup: max -1s unaligned match-names of BT,NG + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS self-test failed due to insufficient battery capacity or due to overload. + to: sitemgr + +#Send out a warning when STATUS code is ONBATT,OVERLOAD,LOWBATT,REPLACEBATT,NOBATT,COMMLOST +#https://man.archlinux.org/man/apcaccess.8.en#:~:text=apcupsd%20was%20started-,STATUS,-%3A%20UPS%20status.%20One + + template: apcupsd_status_onbatt + on: apcupsd.status + lookup: max -1s unaligned match-names of ONBATT + units: status + every: 10s + warn: $this == 1 + delay: up 1m down 15m multiplier 1.5 max 1h + info: APC UPS has switched to battery power because the input power has failed + to: sitemgr + + template: apcupsd_status_overload + on: apcupsd.status + lookup: max -1s unaligned match-names of OVERLOAD + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS is overloaded and cannot supply enough power to the load + to: sitemgr + + template: apcupsd_status_lowbatt + on: apcupsd.status + lookup: max -1s unaligned match-names of LOWBATT + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS battery is low and needs to be recharged + to: sitemgr + + template: apcupsd_status_replacebatt + on: apcupsd.status + lookup: max -1s unaligned match-names of REPLACEBATT + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS battery has reached the end of its lifespan and needs to be replaced + to: sitemgr + + template: apcupsd_status_nobatt + on: apcupsd.status + lookup: max -1s unaligned match-names of NOBATT + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS has no battery + to: sitemgr + + template: apcupsd_status_commlost + on: apcupsd.status + lookup: max -1s unaligned match-names of COMMLOST + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS communication link is lost + to: sitemgr |