summaryrefslogtreecommitdiffstats
path: root/health/health.d/apcupsd.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/apcupsd.conf')
-rw-r--r--health/health.d/apcupsd.conf125
1 files changed, 125 insertions, 0 deletions
diff --git a/health/health.d/apcupsd.conf b/health/health.d/apcupsd.conf
new file mode 100644
index 00000000..90a72af1
--- /dev/null
+++ b/health/health.d/apcupsd.conf
@@ -0,0 +1,125 @@
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+ template: apcupsd_10min_ups_load
+ on: apcupsd.load
+ class: Utilization
+ type: Power Supply
+component: UPS
+ os: *
+ hosts: *
+ lookup: average -10m unaligned of percentage
+ units: %
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (70) : (80))
+ delay: down 10m multiplier 1.5 max 1h
+ summary: APC UPS load
+ info: APC UPS average load over the last 10 minutes
+ to: sitemgr
+
+# Discussion in https://github.com/netdata/netdata/pull/3928:
+# Fire the alarm as soon as it's going on battery (99% charge) and clear only when full.
+ template: apcupsd_ups_charge
+ on: apcupsd.charge
+ class: Errors
+ type: Power Supply
+component: UPS
+ os: *
+ hosts: *
+ lookup: average -60s unaligned of charge
+ units: %
+ every: 60s
+ warn: $this < 100
+ crit: $this < 40
+ delay: down 10m multiplier 1.5 max 1h
+ summary: APC UPS battery charge
+ info: APC UPS average battery charge over the last minute
+ to: sitemgr
+
+ template: apcupsd_last_collected_secs
+ on: apcupsd.load
+ class: Latency
+ type: Power Supply
+component: UPS device
+ calc: $now - $last_collected_t
+ every: 10s
+ units: seconds ago
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ summary: APC UPS last collection
+ info: APC UPS number of seconds since the last successful data collection
+ to: sitemgr
+
+#Send out a warning when SELFTEST code is BT or NG. Code descriptions can be found at:
+#http://www.apcupsd.org/manual/#:~:text=or%20N/A.-,SELFTEST,-The%20results%20of
+ template: apcupsd_selftest_warning
+ on: apcupsd.selftest
+ lookup: max -1s unaligned match-names of BT,NG
+ units: status
+ every: 10s
+ warn: $this == 1
+ delay: up 0 down 15m multiplier 1.5 max 1h
+ info: APC UPS self-test failed due to insufficient battery capacity or due to overload.
+ to: sitemgr
+
+#Send out a warning when STATUS code is ONBATT,OVERLOAD,LOWBATT,REPLACEBATT,NOBATT,COMMLOST
+#https://man.archlinux.org/man/apcaccess.8.en#:~:text=apcupsd%20was%20started-,STATUS,-%3A%20UPS%20status.%20One
+
+ template: apcupsd_status_onbatt
+ on: apcupsd.status
+ lookup: max -1s unaligned match-names of ONBATT
+ units: status
+ every: 10s
+ warn: $this == 1
+ delay: up 1m down 15m multiplier 1.5 max 1h
+ info: APC UPS has switched to battery power because the input power has failed
+ to: sitemgr
+
+ template: apcupsd_status_overload
+ on: apcupsd.status
+ lookup: max -1s unaligned match-names of OVERLOAD
+ units: status
+ every: 10s
+ warn: $this == 1
+ delay: up 0 down 15m multiplier 1.5 max 1h
+ info: APC UPS is overloaded and cannot supply enough power to the load
+ to: sitemgr
+
+ template: apcupsd_status_lowbatt
+ on: apcupsd.status
+ lookup: max -1s unaligned match-names of LOWBATT
+ units: status
+ every: 10s
+ warn: $this == 1
+ delay: up 0 down 15m multiplier 1.5 max 1h
+ info: APC UPS battery is low and needs to be recharged
+ to: sitemgr
+
+ template: apcupsd_status_replacebatt
+ on: apcupsd.status
+ lookup: max -1s unaligned match-names of REPLACEBATT
+ units: status
+ every: 10s
+ warn: $this == 1
+ delay: up 0 down 15m multiplier 1.5 max 1h
+ info: APC UPS battery has reached the end of its lifespan and needs to be replaced
+ to: sitemgr
+
+ template: apcupsd_status_nobatt
+ on: apcupsd.status
+ lookup: max -1s unaligned match-names of NOBATT
+ units: status
+ every: 10s
+ warn: $this == 1
+ delay: up 0 down 15m multiplier 1.5 max 1h
+ info: APC UPS has no battery
+ to: sitemgr
+
+ template: apcupsd_status_commlost
+ on: apcupsd.status
+ lookup: max -1s unaligned match-names of COMMLOST
+ units: status
+ every: 10s
+ warn: $this == 1
+ delay: up 0 down 15m multiplier 1.5 max 1h
+ info: APC UPS communication link is lost
+ to: sitemgr