diff options
Diffstat (limited to 'src/health/health.d/cgroups.conf')
-rw-r--r-- | src/health/health.d/cgroups.conf | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/src/health/health.d/cgroups.conf b/src/health/health.d/cgroups.conf new file mode 100644 index 000000000..52ca02624 --- /dev/null +++ b/src/health/health.d/cgroups.conf @@ -0,0 +1,67 @@ +# you can disable an alarm notification by setting the 'to' line to: silent + + template: cgroup_10min_cpu_usage + on: cgroup.cpu_limit + class: Utilization + type: Cgroups + component: CPU +host labels: _os=linux + lookup: average -10m unaligned + units: % + every: 1m + warn: $this > (($status == $CRITICAL) ? (85) : (95)) + delay: down 15m multiplier 1.5 max 1h + summary: Cgroup ${label:cgroup_name} CPU utilization + info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes + to: silent + + template: cgroup_ram_in_use + on: cgroup.mem_usage + class: Utilization + type: Cgroups + component: Memory +host labels: _os=linux + calc: ($ram) * 100 / $memory_limit + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 15m multiplier 1.5 max 1h + summary: Cgroup ${label:cgroup_name} memory utilization + info: Cgroup ${label:cgroup_name} memory utilization + to: silent + +# ---------------------------------K8s containers-------------------------------------------- + + template: k8s_cgroup_10min_cpu_usage + on: k8s.cgroup.cpu_limit + class: Utilization + type: Cgroups + component: CPU +host labels: _os=linux + lookup: average -10m unaligned + units: % + every: 1m + warn: $this > (($status >= $WARNING) ? (75) : (85)) + delay: down 15m multiplier 1.5 max 1h + summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization + info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \ + average CPU utilization over the last 10 minutes + to: silent + + template: k8s_cgroup_ram_in_use + on: k8s.cgroup.mem_usage + class: Utilization + type: Cgroups + component: Memory +host labels: _os=linux + calc: ($ram) * 100 / $memory_limit + units: % + every: 10s + warn: $this > (($status >= $WARNING) ? (80) : (90)) + crit: $this > (($status == $CRITICAL) ? (90) : (98)) + delay: down 15m multiplier 1.5 max 1h + summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization + info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \ + memory utilization + to: silent |