From 1e6c93250172946eeb38e94a92a1fd12c9d3011e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 7 Nov 2018 13:22:44 +0100 Subject: Merging upstream version 1.11.0+dfsg. Signed-off-by: Daniel Baumann --- health/health.d/backend.conf | 45 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 health/health.d/backend.conf (limited to 'health/health.d/backend.conf') diff --git a/health/health.d/backend.conf b/health/health.d/backend.conf new file mode 100644 index 000000000..7af100d8f --- /dev/null +++ b/health/health.d/backend.conf @@ -0,0 +1,45 @@ + +# make sure we are sending data to backend + + alarm: backend_last_buffering + on: netdata.backend_metrics + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful buffering of backend data + to: dba + + alarm: backend_metrics_sent + on: netdata.backend_metrics + units: % + calc: abs($sent) * 100 / abs($buffered) + every: 10s + warn: $this != 100 + delay: down 5m multiplier 1.5 max 1h + info: percentage of metrics sent to the backend server + to: dba + + alarm: backend_metrics_lost + on: netdata.backend_metrics + units: metrics + calc: abs($lost) + every: 10s + crit: ($this != 0) || ($status == $CRITICAL && abs($sent) == 0) + delay: down 5m multiplier 1.5 max 1h + info: number of metrics lost due to repeating failures to contact the backend server + to: dba + +# this chart has been removed from netdata +# alarm: backend_slow +# on: netdata.backend_latency +# units: % +# calc: $latency * 100 / ($update_every * 1000) +# every: 10s +# warn: $this > 50 +# crit: $this > 100 +# delay: down 5m multiplier 1.5 max 1h +# info: the percentage of time between iterations needed by the backend time to process the data sent by netdata +# to: dba -- cgit v1.2.3