diff options
Diffstat (limited to 'health/health.d/backend.conf')
-rw-r--r-- | health/health.d/backend.conf | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/health/health.d/backend.conf b/health/health.d/backend.conf new file mode 100644 index 0000000..7af100d --- /dev/null +++ b/health/health.d/backend.conf @@ -0,0 +1,45 @@ + +# make sure we are sending data to backend + + alarm: backend_last_buffering + on: netdata.backend_metrics + calc: $now - $last_collected_t + units: seconds ago + every: 10s + warn: $this > (($status >= $WARNING) ? ($update_every) : ( 5 * $update_every)) + crit: $this > (($status == $CRITICAL) ? ($update_every) : (60 * $update_every)) + delay: down 5m multiplier 1.5 max 1h + info: number of seconds since the last successful buffering of backend data + to: dba + + alarm: backend_metrics_sent + on: netdata.backend_metrics + units: % + calc: abs($sent) * 100 / abs($buffered) + every: 10s + warn: $this != 100 + delay: down 5m multiplier 1.5 max 1h + info: percentage of metrics sent to the backend server + to: dba + + alarm: backend_metrics_lost + on: netdata.backend_metrics + units: metrics + calc: abs($lost) + every: 10s + crit: ($this != 0) || ($status == $CRITICAL && abs($sent) == 0) + delay: down 5m multiplier 1.5 max 1h + info: number of metrics lost due to repeating failures to contact the backend server + to: dba + +# this chart has been removed from netdata +# alarm: backend_slow +# on: netdata.backend_latency +# units: % +# calc: $latency * 100 / ($update_every * 1000) +# every: 10s +# warn: $this > 50 +# crit: $this > 100 +# delay: down 5m multiplier 1.5 max 1h +# info: the percentage of time between iterations needed by the backend time to process the data sent by netdata +# to: dba |