diff options
Diffstat (limited to 'health/health.d')
-rw-r--r-- | health/health.d/cgroups.conf | 41 | ||||
-rw-r--r-- | health/health.d/linux_power_supply.conf | 2 | ||||
-rw-r--r-- | health/health.d/net.conf | 3 | ||||
-rw-r--r-- | health/health.d/ram.conf | 4 |
4 files changed, 47 insertions, 3 deletions
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf new file mode 100644 index 00000000..79ece53f --- /dev/null +++ b/health/health.d/cgroups.conf @@ -0,0 +1,41 @@ + +# you can disable an alarm notification by setting the 'to' line to: silent + +template: cgroup_10min_cpu_usage + on: cgroup.cpu_limit + os: linux + hosts: * + lookup: average -10m unaligned + units: % + every: 1m + warn: $this > (($status >= $WARNING) ? (75) : (85)) + crit: $this > (($status == $CRITICAL) ? (85) : (95)) + delay: down 15m multiplier 1.5 max 1h + info: cpu utilization for the last 10 minutes + to: sysadmin + +template: cgroup_ram_in_use + on: cgroup.mem_usage + os: linux + hosts: * + calc: ($ram) * 100 / $memory_limit + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 15m multiplier 1.5 max 1h + info: RAM used by cgroup + to: sysadmin + +template: cgroup_ram_and_swap_in_use + on: cgroup.mem_usage + os: linux + hosts: * + calc: ($ram + $swap) * 100 / $memory_and_swap_limit + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 15m multiplier 1.5 max 1h + info: RAM and Swap used by cgroup + to: sysadmin diff --git a/health/health.d/linux_power_supply.conf b/health/health.d/linux_power_supply.conf index 745d2c3d..38727be2 100644 --- a/health/health.d/linux_power_supply.conf +++ b/health/health.d/linux_power_supply.conf @@ -7,6 +7,6 @@ template: linux_power_supply_capacity every: 10s warn: $this < 10 crit: $this < 5 - delay: up 0 down 5m multiplier 1.2 max 1h + delay: up 30s down 5m multiplier 1.2 max 1h info: the percentage remaining capacity of the power supply to: sysadmin diff --git a/health/health.d/net.conf b/health/health.d/net.conf index ae3c26ec..255ab998 100644 --- a/health/health.d/net.conf +++ b/health/health.d/net.conf @@ -50,6 +50,9 @@ # check if an interface is dropping packets # the alarm is checked every 1 minute # and examines the last 10 minutes of data +# +# it is possible to have expected packet drops on an interface for some network configurations +# look at the Monitoring Network Interfaces section in the proc.plugin documentation for more information template: inbound_packets_dropped on: net.drops diff --git a/health/health.d/ram.conf b/health/health.d/ram.conf index 4e437322..93883f73 100644 --- a/health/health.d/ram.conf +++ b/health/health.d/ram.conf @@ -30,7 +30,7 @@ calc: ($avail + $used_ram_to_ignore) * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers) units: % every: 10s - warn: $this < (($status >= $WARNING) ? ( 5) : (10)) + warn: $this < (($status >= $WARNING) ? (15) : (10)) crit: $this < (($status == $CRITICAL) ? (10) : ( 5)) delay: down 15m multiplier 1.5 max 1h info: estimated amount of RAM available for userspace processes, without causing swapping @@ -57,7 +57,7 @@ delay: down 15m multiplier 1.5 max 1h calc: ($free + $inactive + $used_ram_to_ignore) * 100 / ($free + $active + $inactive + $wired + $cache + $laundry + $buffers) units: % every: 10s - warn: $this < (($status >= $WARNING) ? ( 5) : (10)) + warn: $this < (($status >= $WARNING) ? (15) : (10)) crit: $this < (($status == $CRITICAL) ? (10) : ( 5)) delay: down 15m multiplier 1.5 max 1h info: estimated amount of RAM available for userspace processes, without causing swapping |