From 841395dd16f470e3c051a0a4fff5b91efc983c30 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 19 May 2021 14:33:27 +0200 Subject: Adding upstream version 1.31.0. Signed-off-by: Daniel Baumann --- health/health.d/load.conf | 94 ++++++++++++++++++++++++++--------------------- 1 file changed, 53 insertions(+), 41 deletions(-) (limited to 'health/health.d/load.conf') diff --git a/health/health.d/load.conf b/health/health.d/load.conf index ffaea1723..e811f6ee2 100644 --- a/health/health.d/load.conf +++ b/health/health.d/load.conf @@ -4,51 +4,63 @@ # Calculate the base trigger point for the load average alarms. # This is the maximum number of CPU's in the system over the past 1 # minute, with a special case for a single CPU of setting the trigger at 2. - alarm: load_cpu_number - on: system.load - os: linux - hosts: * - calc: ($active_processors == nan or $active_processors == inf or $active_processors < 2) ? ( 2 ) : ( $active_processors ) - units: cpus - every: 1m - info: number of active CPU cores in the system + alarm: load_cpu_number + on: system.load + class: System +component: Load + type: Utilization + os: linux + hosts: * + calc: ($active_processors == nan or $active_processors == inf or $active_processors < 2) ? ( 2 ) : ( $active_processors ) + units: cpus + every: 1m + info: number of active CPU cores in the system # Send alarms if the load average is unusually high. # These intentionally _do not_ calculate the average over the sampled # time period because the values being checked already are averages. - alarm: load_average_15 - on: system.load - os: linux - hosts: * - lookup: max -1m unaligned of load15 - units: load - every: 1m - warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 175 : 200) - delay: down 15m multiplier 1.5 max 1h - info: system fifteen-minute load average - to: sysadmin + alarm: load_average_15 + on: system.load + class: System +component: Load + type: Utilization + os: linux + hosts: * + lookup: max -1m unaligned of load15 + units: load + every: 1m + warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 175 : 200) + delay: down 15m multiplier 1.5 max 1h + info: system fifteen-minute load average + to: sysadmin - alarm: load_average_5 - on: system.load - os: linux - hosts: * - lookup: max -1m unaligned of load5 - units: load - every: 1m - warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 350 : 400) - delay: down 15m multiplier 1.5 max 1h - info: system five-minute load average - to: sysadmin + alarm: load_average_5 + on: system.load + class: System +component: Load + type: Utilization + os: linux + hosts: * + lookup: max -1m unaligned of load5 + units: load + every: 1m + warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 350 : 400) + delay: down 15m multiplier 1.5 max 1h + info: system five-minute load average + to: sysadmin - alarm: load_average_1 - on: system.load - os: linux - hosts: * - lookup: max -1m unaligned of load1 - units: load - every: 1m - warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 700 : 800) - delay: down 15m multiplier 1.5 max 1h - info: system one-minute load average - to: sysadmin + alarm: load_average_1 + on: system.load + class: System +component: Load + type: Utilization + os: linux + hosts: * + lookup: max -1m unaligned of load1 + units: load + every: 1m + warn: ($this * 100 / $load_cpu_number) > (($status >= $WARNING) ? 700 : 800) + delay: down 15m multiplier 1.5 max 1h + info: system one-minute load average + to: sysadmin -- cgit v1.2.3