summaryrefslogtreecommitdiffstats
path: root/health/health.d
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d')
-rw-r--r--health/health.d/cgroups.conf41
-rw-r--r--health/health.d/linux_power_supply.conf2
-rw-r--r--health/health.d/net.conf3
-rw-r--r--health/health.d/ram.conf4
4 files changed, 47 insertions, 3 deletions
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf
new file mode 100644
index 000000000..79ece53f9
--- /dev/null
+++ b/health/health.d/cgroups.conf
@@ -0,0 +1,41 @@
+
+# you can disable an alarm notification by setting the 'to' line to: silent
+
+template: cgroup_10min_cpu_usage
+ on: cgroup.cpu_limit
+ os: linux
+ hosts: *
+ lookup: average -10m unaligned
+ units: %
+ every: 1m
+ warn: $this > (($status >= $WARNING) ? (75) : (85))
+ crit: $this > (($status == $CRITICAL) ? (85) : (95))
+ delay: down 15m multiplier 1.5 max 1h
+ info: cpu utilization for the last 10 minutes
+ to: sysadmin
+
+template: cgroup_ram_in_use
+ on: cgroup.mem_usage
+ os: linux
+ hosts: *
+ calc: ($ram) * 100 / $memory_limit
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
+ delay: down 15m multiplier 1.5 max 1h
+ info: RAM used by cgroup
+ to: sysadmin
+
+template: cgroup_ram_and_swap_in_use
+ on: cgroup.mem_usage
+ os: linux
+ hosts: *
+ calc: ($ram + $swap) * 100 / $memory_and_swap_limit
+ units: %
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? (80) : (90))
+ crit: $this > (($status == $CRITICAL) ? (90) : (98))
+ delay: down 15m multiplier 1.5 max 1h
+ info: RAM and Swap used by cgroup
+ to: sysadmin
diff --git a/health/health.d/linux_power_supply.conf b/health/health.d/linux_power_supply.conf
index 745d2c3dd..38727be2f 100644
--- a/health/health.d/linux_power_supply.conf
+++ b/health/health.d/linux_power_supply.conf
@@ -7,6 +7,6 @@ template: linux_power_supply_capacity
every: 10s
warn: $this < 10
crit: $this < 5
- delay: up 0 down 5m multiplier 1.2 max 1h
+ delay: up 30s down 5m multiplier 1.2 max 1h
info: the percentage remaining capacity of the power supply
to: sysadmin
diff --git a/health/health.d/net.conf b/health/health.d/net.conf
index ae3c26ec6..255ab9982 100644
--- a/health/health.d/net.conf
+++ b/health/health.d/net.conf
@@ -50,6 +50,9 @@
# check if an interface is dropping packets
# the alarm is checked every 1 minute
# and examines the last 10 minutes of data
+#
+# it is possible to have expected packet drops on an interface for some network configurations
+# look at the Monitoring Network Interfaces section in the proc.plugin documentation for more information
template: inbound_packets_dropped
on: net.drops
diff --git a/health/health.d/ram.conf b/health/health.d/ram.conf
index 4e437322c..93883f73b 100644
--- a/health/health.d/ram.conf
+++ b/health/health.d/ram.conf
@@ -30,7 +30,7 @@
calc: ($avail + $used_ram_to_ignore) * 100 / ($system.ram.used + $system.ram.cached + $system.ram.free + $system.ram.buffers)
units: %
every: 10s
- warn: $this < (($status >= $WARNING) ? ( 5) : (10))
+ warn: $this < (($status >= $WARNING) ? (15) : (10))
crit: $this < (($status == $CRITICAL) ? (10) : ( 5))
delay: down 15m multiplier 1.5 max 1h
info: estimated amount of RAM available for userspace processes, without causing swapping
@@ -57,7 +57,7 @@ delay: down 15m multiplier 1.5 max 1h
calc: ($free + $inactive + $used_ram_to_ignore) * 100 / ($free + $active + $inactive + $wired + $cache + $laundry + $buffers)
units: %
every: 10s
- warn: $this < (($status >= $WARNING) ? ( 5) : (10))
+ warn: $this < (($status >= $WARNING) ? (15) : (10))
crit: $this < (($status == $CRITICAL) ? (10) : ( 5))
delay: down 15m multiplier 1.5 max 1h
info: estimated amount of RAM available for userspace processes, without causing swapping