diff options
Diffstat (limited to 'health/health.d/cgroups.conf')
-rw-r--r-- | health/health.d/cgroups.conf | 83 |
1 files changed, 7 insertions, 76 deletions
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf index 53a6ea00f..9c55633ef 100644 --- a/health/health.d/cgroups.conf +++ b/health/health.d/cgroups.conf @@ -13,7 +13,8 @@ component: CPU every: 1m warn: $this > (($status == $CRITICAL) ? (85) : (95)) delay: down 15m multiplier 1.5 max 1h - info: average cgroup CPU utilization over the last 10 minutes + summary: Cgroup ${label:cgroup_name} CPU utilization + info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes to: silent template: cgroup_ram_in_use @@ -29,46 +30,10 @@ component: Memory warn: $this > (($status >= $WARNING) ? (80) : (90)) crit: $this > (($status == $CRITICAL) ? (90) : (98)) delay: down 15m multiplier 1.5 max 1h - info: cgroup memory utilization + summary: Cgroup ${label:cgroup_name} memory utilization + info: Cgroup ${label:cgroup_name} memory utilization to: silent -# FIXME COMMENTED DUE TO A BUG IN NETDATA -## ----------------------------------------------------------------------------- -## check for packet storms -# -## 1. calculate the rate packets are received in 1m: 1m_received_packets_rate -## 2. do the same for the last 10s -## 3. raise an alarm if the later is 10x or 20x the first -## we assume the minimum packet storm should at least have -## 10000 packets/s, average of the last 10 seconds -# -# template: cgroup_1m_received_packets_rate -# on: cgroup.net_packets -# class: Workload -# type: Cgroups -#component: Network -# hosts: * -# lookup: average -1m unaligned of received -# units: packets -# every: 10s -# info: average number of packets received by the network interface ${label:device} over the last minute -# -# template: cgroup_10s_received_packets_storm -# on: cgroup.net_packets -# class: Workload -# type: Cgroups -#component: Network -# hosts: * -# lookup: average -10s unaligned of received -# calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate)) -# every: 10s -# units: % -# warn: $this > (($status >= $WARNING)?(200):(5000)) -# options: no-clear-notification -# info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \ -# compared to the rate over the last minute -# to: sysadmin -# # ---------------------------------K8s containers-------------------------------------------- template: k8s_cgroup_10min_cpu_usage @@ -83,7 +48,8 @@ component: CPU every: 1m warn: $this > (($status >= $WARNING) ? (75) : (85)) delay: down 15m multiplier 1.5 max 1h - info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \ + summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization + info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \ average CPU utilization over the last 10 minutes to: silent @@ -100,42 +66,7 @@ component: Memory warn: $this > (($status >= $WARNING) ? (80) : (90)) crit: $this > (($status == $CRITICAL) ? (90) : (98)) delay: down 15m multiplier 1.5 max 1h + summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \ memory utilization to: silent - -# check for packet storms - -# FIXME COMMENTED DUE TO A BUG IN NETDATA -## 1. calculate the rate packets are received in 1m: 1m_received_packets_rate -## 2. do the same for the last 10s -## 3. raise an alarm if the later is 10x or 20x the first -## we assume the minimum packet storm should at least have -## 10000 packets/s, average of the last 10 seconds -# -# template: k8s_cgroup_1m_received_packets_rate -# on: k8s.cgroup.net_packets -# class: Workload -# type: Cgroups -#component: Network -# hosts: * -# lookup: average -1m unaligned of received -# units: packets -# every: 10s -# info: average number of packets received by the network interface ${label:device} over the last minute -# -# template: k8s_cgroup_10s_received_packets_storm -# on: k8s.cgroup.net_packets -# class: Workload -# type: Cgroups -#component: Network -# hosts: * -# lookup: average -10s unaligned of received -# calc: $this * 100 / (($k8s_cgroup_10s_received_packets_storm < 1000)?(1000):($k8s_cgroup_10s_received_packets_storm)) -# every: 10s -# units: % -# warn: $this > (($status >= $WARNING)?(200):(5000)) -# options: no-clear-notification -# info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \ -# compared to the rate over the last minute -# to: sysadmin |