diff options
Diffstat (limited to '')
-rw-r--r-- | health/health.d/cgroups.conf | 72 |
1 files changed, 72 insertions, 0 deletions
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf new file mode 100644 index 00000000..9c55633e --- /dev/null +++ b/health/health.d/cgroups.conf @@ -0,0 +1,72 @@ + +# you can disable an alarm notification by setting the 'to' line to: silent + + template: cgroup_10min_cpu_usage + on: cgroup.cpu_limit + class: Utilization + type: Cgroups +component: CPU + os: linux + hosts: * + lookup: average -10m unaligned + units: % + every: 1m + warn: $this > (($status == $CRITICAL) ? (85) : (95)) + delay: down 15m multiplier 1.5 max 1h + summary: Cgroup ${label:cgroup_name} CPU utilization + info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes + to: silent + + template: cgroup_ram_in_use + on: cgroup.mem_usage + class: Utilization + type: Cgroups +component: Memory + os: linux + hosts: * + calc: ($ram) * 100 / $memory_limit + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 15m multiplier 1.5 max 1h + summary: Cgroup ${label:cgroup_name} memory utilization + info: Cgroup ${label:cgroup_name} memory utilization + to: silent + +# ---------------------------------K8s containers-------------------------------------------- + + template: k8s_cgroup_10min_cpu_usage + on: k8s.cgroup.cpu_limit + class: Utilization + type: Cgroups +component: CPU + os: linux + hosts: * + lookup: average -10m unaligned + units: % + every: 1m + warn: $this > (($status >= $WARNING) ? (75) : (85)) + delay: down 15m multiplier 1.5 max 1h + summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization + info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \ + average CPU utilization over the last 10 minutes + to: silent + + template: k8s_cgroup_ram_in_use + on: k8s.cgroup.mem_usage + class: Utilization + type: Cgroups +component: Memory + os: linux + hosts: * + calc: ($ram) * 100 / $memory_limit + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 15m multiplier 1.5 max 1h + summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization + info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \ + memory utilization + to: silent |