summaryrefslogtreecommitdiffstats
path: root/health/health.d/cgroups.conf
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.d/cgroups.conf')
-rw-r--r--health/health.d/cgroups.conf83
1 files changed, 7 insertions, 76 deletions
diff --git a/health/health.d/cgroups.conf b/health/health.d/cgroups.conf
index 53a6ea00f..9c55633ef 100644
--- a/health/health.d/cgroups.conf
+++ b/health/health.d/cgroups.conf
@@ -13,7 +13,8 @@ component: CPU
every: 1m
warn: $this > (($status == $CRITICAL) ? (85) : (95))
delay: down 15m multiplier 1.5 max 1h
- info: average cgroup CPU utilization over the last 10 minutes
+ summary: Cgroup ${label:cgroup_name} CPU utilization
+ info: Cgroup ${label:cgroup_name} average CPU utilization over the last 10 minutes
to: silent
template: cgroup_ram_in_use
@@ -29,46 +30,10 @@ component: Memory
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
- info: cgroup memory utilization
+ summary: Cgroup ${label:cgroup_name} memory utilization
+ info: Cgroup ${label:cgroup_name} memory utilization
to: silent
-# FIXME COMMENTED DUE TO A BUG IN NETDATA
-## -----------------------------------------------------------------------------
-## check for packet storms
-#
-## 1. calculate the rate packets are received in 1m: 1m_received_packets_rate
-## 2. do the same for the last 10s
-## 3. raise an alarm if the later is 10x or 20x the first
-## we assume the minimum packet storm should at least have
-## 10000 packets/s, average of the last 10 seconds
-#
-# template: cgroup_1m_received_packets_rate
-# on: cgroup.net_packets
-# class: Workload
-# type: Cgroups
-#component: Network
-# hosts: *
-# lookup: average -1m unaligned of received
-# units: packets
-# every: 10s
-# info: average number of packets received by the network interface ${label:device} over the last minute
-#
-# template: cgroup_10s_received_packets_storm
-# on: cgroup.net_packets
-# class: Workload
-# type: Cgroups
-#component: Network
-# hosts: *
-# lookup: average -10s unaligned of received
-# calc: $this * 100 / (($1m_received_packets_rate < 1000)?(1000):($1m_received_packets_rate))
-# every: 10s
-# units: %
-# warn: $this > (($status >= $WARNING)?(200):(5000))
-# options: no-clear-notification
-# info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \
-# compared to the rate over the last minute
-# to: sysadmin
-#
# ---------------------------------K8s containers--------------------------------------------
template: k8s_cgroup_10min_cpu_usage
@@ -83,7 +48,8 @@ component: CPU
every: 1m
warn: $this > (($status >= $WARNING) ? (75) : (85))
delay: down 15m multiplier 1.5 max 1h
- info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
+ summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} CPU utilization
+ info: Container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
average CPU utilization over the last 10 minutes
to: silent
@@ -100,42 +66,7 @@ component: Memory
warn: $this > (($status >= $WARNING) ? (80) : (90))
crit: $this > (($status == $CRITICAL) ? (90) : (98))
delay: down 15m multiplier 1.5 max 1h
+ summary: Container ${label:k8s_container_name} pod ${label:k8s_pod_name} memory utilization
info: container ${label:k8s_container_name} of pod ${label:k8s_pod_name} of namespace ${label:k8s_namespace}, \
memory utilization
to: silent
-
-# check for packet storms
-
-# FIXME COMMENTED DUE TO A BUG IN NETDATA
-## 1. calculate the rate packets are received in 1m: 1m_received_packets_rate
-## 2. do the same for the last 10s
-## 3. raise an alarm if the later is 10x or 20x the first
-## we assume the minimum packet storm should at least have
-## 10000 packets/s, average of the last 10 seconds
-#
-# template: k8s_cgroup_1m_received_packets_rate
-# on: k8s.cgroup.net_packets
-# class: Workload
-# type: Cgroups
-#component: Network
-# hosts: *
-# lookup: average -1m unaligned of received
-# units: packets
-# every: 10s
-# info: average number of packets received by the network interface ${label:device} over the last minute
-#
-# template: k8s_cgroup_10s_received_packets_storm
-# on: k8s.cgroup.net_packets
-# class: Workload
-# type: Cgroups
-#component: Network
-# hosts: *
-# lookup: average -10s unaligned of received
-# calc: $this * 100 / (($k8s_cgroup_10s_received_packets_storm < 1000)?(1000):($k8s_cgroup_10s_received_packets_storm))
-# every: 10s
-# units: %
-# warn: $this > (($status >= $WARNING)?(200):(5000))
-# options: no-clear-notification
-# info: ratio of average number of received packets for the network interface ${label:device} over the last 10 seconds, \
-# compared to the rate over the last minute
-# to: sysadmin