diff options
Diffstat (limited to 'health/health.d/ml.conf')
-rw-r--r-- | health/health.d/ml.conf | 56 |
1 files changed, 0 insertions, 56 deletions
diff --git a/health/health.d/ml.conf b/health/health.d/ml.conf deleted file mode 100644 index aef9b0368..000000000 --- a/health/health.d/ml.conf +++ /dev/null @@ -1,56 +0,0 @@ -# below are some examples of using the `anomaly-bit` option to define alerts based on anomaly -# rates as opposed to raw metric values. You can read more about the anomaly-bit and Netdata's -# native anomaly detection here: -# https://learn.netdata.cloud/docs/agent/ml#anomaly-bit---100--anomalous-0--normal - -# some examples below are commented, you would need to uncomment and adjust as desired to enable them. - -# node level anomaly rate -# https://learn.netdata.cloud/docs/agent/ml#node-anomaly-rate -# if node level anomaly rate is above 1% then warning (pick your own threshold that works best via trial and error). - template: ml_1min_node_ar - on: anomaly_detection.anomaly_rate - class: Workload - type: System -component: ML - os: * - hosts: * - lookup: average -1m of anomaly_rate - calc: $this - units: % - every: 30s - warn: $this > 1 - summary: ML node anomaly rate - info: Rolling 1min node level anomaly rate - to: silent - -# alert per dimension example -# if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error). -# if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error). -# template: ml_5min_cpu_dims -# on: system.cpu -# os: linux -# hosts: * -# lookup: average -5m anomaly-bit foreach * -# calc: $this -# units: % -# every: 30s -# warn: $this > (($status >= $WARNING) ? (5) : (20)) -# crit: $this > (($status == $CRITICAL) ? (20) : (100)) -# info: rolling 5min anomaly rate for each system.cpu dimension - -# alert per chart example -# if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error). -# if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error). -# template: ml_5min_cpu_chart -# on: system.cpu -# os: linux -# hosts: * -# lookup: average -5m anomaly-bit of * -# calc: $this -# units: % -# every: 30s -# warn: $this > (($status >= $WARNING) ? (5) : (20)) -# crit: $this > (($status == $CRITICAL) ? (20) : (100)) -# info: rolling 5min anomaly rate for system.cpu chart - |