Adding upstream version 1.46.3.

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-05-05 11:19:16 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-07-24 09:53:24 +0000
commit: b5f8ee61a7f7e9bd291dd26b0585d03eb686c941 (patch)
tree: d4d31289c39fc00da064a825df13a0b98ce95b10 /src/health/health.d/ml.conf
parent: Adding upstream version 1.44.3. (diff)
download: netdata-b5f8ee61a7f7e9bd291dd26b0585d03eb686c941.tar.xz
netdata-b5f8ee61a7f7e9bd291dd26b0585d03eb686c941.zip
1 files changed, 49 insertions, 0 deletions
diff --git a/src/health/health.d/ml.conf b/src/health/health.d/ml.conf
new file mode 100644
index 000000000..b6a5df6dd
--- /dev/null
+++ b/src/health/health.d/ml.conf
@@ -0,0 +1,49 @@
+# below are some examples of using the `anomaly-bit` option to define alerts based on anomaly 
+# rates as opposed to raw metric values. You can read more about the anomaly-bit and Netdata's 
+# native anomaly detection here: 
+# https://learn.netdata.cloud/docs/agent/ml#anomaly-bit---100--anomalous-0--normal
+
+# some examples below are commented, you would need to uncomment and adjust as desired to enable them.
+
+# node level anomaly rate
+# https://learn.netdata.cloud/docs/agent/ml#node-anomaly-rate
+# if node level anomaly rate is above 1% then warning (pick your own threshold that works best via trial and error).
+ template: ml_1min_node_ar
+       on: anomaly_detection.anomaly_rate
+    class: Workload
+     type: System
+component: ML
+   lookup: average -1m of anomaly_rate
+     calc: $this
+    units: %
+    every: 30s
+     warn: $this > 1
+  summary: ML node anomaly rate
+     info: Rolling 1min node level anomaly rate
+       to: silent
+
+# alert per dimension example
+# if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error).
+# if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
+# template: ml_5min_cpu_dims
+#       on: system.cpu
+#   lookup: average -5m anomaly-bit foreach *
+#     calc: $this
+#    units: %
+#    every: 30s
+#     warn: $this > (($status >= $WARNING)  ? (5) : (20))
+#     crit: $this > (($status == $CRITICAL) ? (20) : (100))
+#     info: rolling 5min anomaly rate for each system.cpu dimension
+
+# alert per chart example
+# if anomaly rate is between 5-20% then warning (pick your own threshold that works best via tial and error).
+# if anomaly rate is above 20% then critical (pick your own threshold that works best via tial and error).
+# template: ml_5min_cpu_chart
+#       on: system.cpu
+#   lookup: average -5m anomaly-bit of *
+#     calc: $this
+#    units: %
+#    every: 30s
+#     warn: $this > (($status >= $WARNING)  ? (5) : (20))
+#     crit: $this > (($status == $CRITICAL) ? (20) : (100))
+#     info: rolling 5min anomaly rate for system.cpu chart
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-05-05 11:19:16 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-07-24 09:53:24 +0000
commit	b5f8ee61a7f7e9bd291dd26b0585d03eb686c941 (patch)
tree	d4d31289c39fc00da064a825df13a0b98ce95b10 /src/health/health.d/ml.conf
parent	Adding upstream version 1.44.3. (diff)
download	netdata-b5f8ee61a7f7e9bd291dd26b0585d03eb686c941.tar.xz netdata-b5f8ee61a7f7e9bd291dd26b0585d03eb686c941.zip