summaryrefslogtreecommitdiffstats
path: root/conf.d/health.d/backend.conf
diff options
context:
space:
mode:
Diffstat (limited to 'conf.d/health.d/backend.conf')
-rw-r--r--conf.d/health.d/backend.conf45
1 files changed, 45 insertions, 0 deletions
diff --git a/conf.d/health.d/backend.conf b/conf.d/health.d/backend.conf
new file mode 100644
index 000000000..9c193e7b9
--- /dev/null
+++ b/conf.d/health.d/backend.conf
@@ -0,0 +1,45 @@
+
+# make sure we are sending data to backend
+
+ alarm: backend_last_buffering
+ on: netdata.backend_metrics
+ calc: $now - $last_collected_t
+ units: seconds ago
+ every: 10s
+ warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every))
+ crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every))
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of seconds since the last successful buffering of backend data
+ to: dba
+
+ alarm: backend_metrics_sent
+ on: netdata.backend_metrics
+ units: %
+ calc: abs($sent) * 100 / abs($buffered)
+ every: 10s
+ warn: $this != 100
+ delay: down 5m multiplier 1.5 max 1h
+ info: percentage of metrics sent to the backend server
+ to: dba
+
+ alarm: backend_metrics_lost
+ on: netdata.backend_metrics
+ units: metrics
+ calc: abs($lost)
+ every: 10s
+ crit: $this != 0
+ delay: down 5m multiplier 1.5 max 1h
+ info: number of metrics lost due to repeating failures to contact the backend server
+ to: dba
+
+# this chart has been removed from netdata
+# alarm: backend_slow
+# on: netdata.backend_latency
+# units: %
+# calc: $latency * 100 / ($update_every * 1000)
+# every: 10s
+# warn: $this > 50
+# crit: $this > 100
+# delay: down 5m multiplier 1.5 max 1h
+# info: the percentage of time between iterations needed by the backend time to process the data sent by netdata
+# to: dba