diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:22 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-03-09 13:19:22 +0000 |
commit | c21c3b0befeb46a51b6bf3758ffa30813bea0ff0 (patch) | |
tree | 9754ff1ca740f6346cf8483ec915d4054bc5da2d /health/health.d | |
parent | Adding upstream version 1.43.2. (diff) | |
download | netdata-0d980fd06561f4670f5d8170c5aedd74023e3702.tar.xz netdata-0d980fd06561f4670f5d8170c5aedd74023e3702.zip |
Adding upstream version 1.44.3.upstream/1.44.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | health/health.d/apcupsd.conf | 75 | ||||
-rw-r--r-- | health/health.d/exporting.conf | 4 | ||||
-rw-r--r-- | health/health.d/ipmi.conf | 2 | ||||
-rw-r--r-- | health/health.d/net.conf | 6 | ||||
-rw-r--r-- | health/health.d/nut.conf | 50 |
5 files changed, 81 insertions, 56 deletions
diff --git a/health/health.d/apcupsd.conf b/health/health.d/apcupsd.conf index fc8f2cd0f..90a72af19 100644 --- a/health/health.d/apcupsd.conf +++ b/health/health.d/apcupsd.conf @@ -48,3 +48,78 @@ component: UPS device summary: APC UPS last collection info: APC UPS number of seconds since the last successful data collection to: sitemgr + +#Send out a warning when SELFTEST code is BT or NG. Code descriptions can be found at: +#http://www.apcupsd.org/manual/#:~:text=or%20N/A.-,SELFTEST,-The%20results%20of + template: apcupsd_selftest_warning + on: apcupsd.selftest + lookup: max -1s unaligned match-names of BT,NG + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS self-test failed due to insufficient battery capacity or due to overload. + to: sitemgr + +#Send out a warning when STATUS code is ONBATT,OVERLOAD,LOWBATT,REPLACEBATT,NOBATT,COMMLOST +#https://man.archlinux.org/man/apcaccess.8.en#:~:text=apcupsd%20was%20started-,STATUS,-%3A%20UPS%20status.%20One + + template: apcupsd_status_onbatt + on: apcupsd.status + lookup: max -1s unaligned match-names of ONBATT + units: status + every: 10s + warn: $this == 1 + delay: up 1m down 15m multiplier 1.5 max 1h + info: APC UPS has switched to battery power because the input power has failed + to: sitemgr + + template: apcupsd_status_overload + on: apcupsd.status + lookup: max -1s unaligned match-names of OVERLOAD + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS is overloaded and cannot supply enough power to the load + to: sitemgr + + template: apcupsd_status_lowbatt + on: apcupsd.status + lookup: max -1s unaligned match-names of LOWBATT + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS battery is low and needs to be recharged + to: sitemgr + + template: apcupsd_status_replacebatt + on: apcupsd.status + lookup: max -1s unaligned match-names of REPLACEBATT + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS battery has reached the end of its lifespan and needs to be replaced + to: sitemgr + + template: apcupsd_status_nobatt + on: apcupsd.status + lookup: max -1s unaligned match-names of NOBATT + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS has no battery + to: sitemgr + + template: apcupsd_status_commlost + on: apcupsd.status + lookup: max -1s unaligned match-names of COMMLOST + units: status + every: 10s + warn: $this == 1 + delay: up 0 down 15m multiplier 1.5 max 1h + info: APC UPS communication link is lost + to: sitemgr diff --git a/health/health.d/exporting.conf b/health/health.d/exporting.conf index 37d4fd648..c0320193c 100644 --- a/health/health.d/exporting.conf +++ b/health/health.d/exporting.conf @@ -1,6 +1,6 @@ template: exporting_last_buffering - on: exporting_data_size + on: netdata.exporting_data_size class: Latency type: Netdata component: Exporting engine @@ -15,7 +15,7 @@ component: Exporting engine to: dba template: exporting_metrics_sent - on: exporting_data_size + on: netdata.exporting_data_size class: Workload type: Netdata component: Exporting engine diff --git a/health/health.d/ipmi.conf b/health/health.d/ipmi.conf index 942dc070b..cec2320a9 100644 --- a/health/health.d/ipmi.conf +++ b/health/health.d/ipmi.conf @@ -20,7 +20,7 @@ component: IPMI component: IPMI calc: $events units: events - every: 10s + every: 30s warn: $this > 0 delay: up 5m down 15m multiplier 1.5 max 1h summary: IPMI entries in System Event Log diff --git a/health/health.d/net.conf b/health/health.d/net.conf index ea4954187..2dfe6bbaf 100644 --- a/health/health.d/net.conf +++ b/health/health.d/net.conf @@ -11,7 +11,7 @@ component: Network os: * hosts: * - calc: ( $nic_speed_max > 0 ) ? ( $nic_speed_max) : ( nan ) + calc: ( $nic_speed_max > 0 ) ? ( $nic_speed_max / 1000) : ( nan ) units: Mbit every: 10s info: Network interface ${label:device} current speed @@ -24,7 +24,7 @@ component: Network os: linux hosts: * lookup: average -1m unaligned absolute of received - calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed)) : ( nan ) + calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan ) units: % every: 10s warn: $this > (($status >= $WARNING) ? (85) : (90)) @@ -41,7 +41,7 @@ component: Network os: linux hosts: * lookup: average -1m unaligned absolute of sent - calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed)) : ( nan ) + calc: ($interface_speed > 0) ? ($this * 100 / ($interface_speed * 1000)) : ( nan ) units: % every: 10s warn: $this > (($status >= $WARNING) ? (85) : (90)) diff --git a/health/health.d/nut.conf b/health/health.d/nut.conf deleted file mode 100644 index 7a74653e9..000000000 --- a/health/health.d/nut.conf +++ /dev/null @@ -1,50 +0,0 @@ -# you can disable an alarm notification by setting the 'to' line to: silent - - template: nut_10min_ups_load - on: nut.load - class: Utilization - type: Power Supply -component: UPS - os: * - hosts: * - lookup: average -10m unaligned of load - units: % - every: 1m - warn: $this > (($status >= $WARNING) ? (70) : (80)) - crit: $this > (($status == $CRITICAL) ? (85) : (95)) - delay: down 10m multiplier 1.5 max 1h - summary: UPS load - info: UPS average load over the last 10 minutes - to: sitemgr - - template: nut_ups_charge - on: nut.charge - class: Errors - type: Power Supply -component: UPS - os: * - hosts: * - lookup: average -60s unaligned of battery_charge - units: % - every: 60s - warn: $this < 75 - crit: $this < 40 - delay: down 10m multiplier 1.5 max 1h - summary: UPS battery charge - info: UPS average battery charge over the last minute - to: sitemgr - - template: nut_last_collected_secs - on: nut.load - class: Latency - type: Power Supply -component: UPS device - calc: $now - $last_collected_t - every: 10s - units: seconds ago - warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) - crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) - delay: down 5m multiplier 1.5 max 1h - summary: NUT last collected - info: Number of seconds since the last successful data collection - to: sitemgr |