summaryrefslogtreecommitdiffstats
path: root/health/health.d/ml.conf
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-10-17 09:30:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-10-17 09:30:20 +0000
commit386ccdd61e8256c8b21ee27ee2fc12438fc5ca98 (patch)
treec9fbcacdb01f029f46133a5ba7ecd610c2bcb041 /health/health.d/ml.conf
parentAdding upstream version 1.42.4. (diff)
downloadnetdata-386ccdd61e8256c8b21ee27ee2fc12438fc5ca98.tar.xz
netdata-386ccdd61e8256c8b21ee27ee2fc12438fc5ca98.zip
Adding upstream version 1.43.0.upstream/1.43.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'health/health.d/ml.conf')
-rw-r--r--health/health.d/ml.conf33
1 files changed, 18 insertions, 15 deletions
diff --git a/health/health.d/ml.conf b/health/health.d/ml.conf
index 6836ce7b1..aef9b0368 100644
--- a/health/health.d/ml.conf
+++ b/health/health.d/ml.conf
@@ -3,23 +3,26 @@
# native anomaly detection here:
# https://learn.netdata.cloud/docs/agent/ml#anomaly-bit---100--anomalous-0--normal
-# examples below are commented, you would need to uncomment and adjust as desired to enable them.
+# some examples below are commented, you would need to uncomment and adjust as desired to enable them.
-# node level anomaly rate example
+# node level anomaly rate
# https://learn.netdata.cloud/docs/agent/ml#node-anomaly-rate
-# if node level anomaly rate is between 1-5% then warning (pick your own threshold that works best via tial and error).
-# if node level anomaly rate is above 5% then critical (pick your own threshold that works best via tial and error).
-# template: ml_1min_node_ar
-# on: anomaly_detection.anomaly_rate
-# os: linux
-# hosts: *
-# lookup: average -1m foreach anomaly_rate
-# calc: $this
-# units: %
-# every: 30s
-# warn: $this > (($status >= $WARNING) ? (1) : (5))
-# crit: $this > (($status == $CRITICAL) ? (5) : (100))
-# info: rolling 1min node level anomaly rate
+# if node level anomaly rate is above 1% then warning (pick your own threshold that works best via trial and error).
+ template: ml_1min_node_ar
+ on: anomaly_detection.anomaly_rate
+ class: Workload
+ type: System
+component: ML
+ os: *
+ hosts: *
+ lookup: average -1m of anomaly_rate
+ calc: $this
+ units: %
+ every: 30s
+ warn: $this > 1
+ summary: ML node anomaly rate
+ info: Rolling 1min node level anomaly rate
+ to: silent
# alert per dimension example
# if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error).