summaryrefslogtreecommitdiffstats
path: root/collectors/python.d.plugin/anomalies/anomalies.conf
diff options
context:
space:
mode:
Diffstat (limited to 'collectors/python.d.plugin/anomalies/anomalies.conf')
-rw-r--r--collectors/python.d.plugin/anomalies/anomalies.conf181
1 files changed, 181 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/anomalies/anomalies.conf b/collectors/python.d.plugin/anomalies/anomalies.conf
new file mode 100644
index 000000000..9950534aa
--- /dev/null
+++ b/collectors/python.d.plugin/anomalies/anomalies.conf
@@ -0,0 +1,181 @@
+# netdata python.d.plugin configuration for anomalies
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+# - global variables
+# - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 2
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+
+# Pull data from local Netdata node.
+local:
+ name: 'local'
+
+ # Host to pull data from.
+ host: '127.0.0.1:19999'
+
+ # Username and Password for Netdata if using basic auth.
+ # username: '???'
+ # password: '???'
+
+ # Use http or https to pull data
+ protocol: 'http'
+
+ # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc.
+ charts_regex: 'system\..*'
+
+ # Charts to exclude, useful if you would like to exclude some specific charts.
+ # Note: should be a ',' separated string like 'chart.name,chart.name'.
+ charts_to_exclude: 'system.uptime,system.entropy'
+
+ # What model to use - can be one of 'pca', 'hbos', 'iforest', 'cblof', 'loda', 'copod' or 'feature_bagging'.
+ # More details here: https://pyod.readthedocs.io/en/latest/pyod.models.html.
+ model: 'pca'
+
+ # Max number of observations to train on, to help cap compute cost of training model if you set a very large train_n_secs.
+ train_max_n: 100000
+
+ # How often to re-train the model (assuming update_every=1 then train_every_n=1800 represents (re)training every 30 minutes).
+ # Note: If you want to turn off re-training set train_every_n=0 and after initial training the models will not be retrained.
+ train_every_n: 1800
+
+ # The length of the window of data to train on (14400 = last 4 hours).
+ train_n_secs: 14400
+
+ # How many prediction steps after a train event to just use previous prediction value for.
+ # Used to reduce possibility of the training step itself appearing as an anomaly on the charts.
+ train_no_prediction_n: 10
+
+ # If you would like to train the model for the first time on a specific window then you can define it using the below two variables.
+ # Start of training data for initial model.
+ # initial_train_data_after: 1604578857
+
+ # End of training data for initial model.
+ # initial_train_data_before: 1604593257
+
+ # If you would like to ignore recent data in training then you can offset it by offset_n_secs.
+ offset_n_secs: 0
+
+ # How many lagged values of each dimension to include in the 'feature vector' each model is trained on.
+ lags_n: 5
+
+ # How much smoothing to apply to each dimension in the 'feature vector' each model is trained on.
+ smooth_n: 3
+
+ # How many differences to take in preprocessing your data.
+ # More info on differencing here: https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average#Differencing
+ # diffs_n=0 would mean training models on the raw values of each dimension.
+ # diffs_n=1 means everything is done in terms of differences.
+ diffs_n: 1
+
+ # What is the typical proportion of anomalies in your data on average?
+ # This parameter can control the sensitivity of your models to anomalies.
+ # Some discussion here: https://github.com/yzhao062/pyod/issues/144
+ contamination: 0.001
+
+ # Set to true to include an "average_prob" dimension on anomalies probability chart which is
+ # just the average of all anomaly probabilities at each time step
+ include_average_prob: true
+
+ # Define any custom models you would like to create anomaly probabilities for, some examples below to show how.
+ # For example below example creates two custom models, one to run anomaly detection user and system cpu for our demo servers
+ # and one on the cpu and mem apps metrics for the python.d.plugin.
+ # custom_models:
+ # - name: 'demos_cpu'
+ # dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
+ # - name: 'apps_python_d_plugin'
+ # dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin'
+
+ # Set to true to normalize, using min-max standardization, features used for the custom models.
+ # Useful if your custom models contain dimensions on very different scales an model you use does
+ # not internally do its own normalization. Usually best to leave as false.
+ # custom_models_normalize: false
+
+# Standalone Custom models example as an additional collector job.
+# custom:
+# name: 'custom'
+# host: '127.0.0.1:19999'
+# protocol: 'http'
+# charts_regex: 'None'
+# charts_to_exclude: 'None'
+# model: 'pca'
+# train_max_n: 100000
+# train_every_n: 1800
+# train_n_secs: 14400
+# offset_n_secs: 0
+# lags_n: 5
+# smooth_n: 3
+# diffs_n: 1
+# contamination: 0.001
+# custom_models:
+# - name: 'user_netdata'
+# dimensions: 'users.cpu|netdata,users.mem|netdata,users.threads|netdata,users.processes|netdata,users.sockets|netdata'
+# - name: 'apps_python_d_plugin'
+# dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin,apps.threads|python.d.plugin,apps.processes|python.d.plugin,apps.sockets|python.d.plugin'
+
+# Pull data from some demo nodes for cross node custom models.
+# demos:
+# name: 'demos'
+# host: '127.0.0.1:19999'
+# protocol: 'http'
+# charts_regex: 'None'
+# charts_to_exclude: 'None'
+# model: 'pca'
+# train_max_n: 100000
+# train_every_n: 1800
+# train_n_secs: 14400
+# offset_n_secs: 0
+# lags_n: 5
+# smooth_n: 3
+# diffs_n: 1
+# contamination: 0.001
+# custom_models:
+# - name: 'system.cpu'
+# dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
+# - name: 'system.ip'
+# dimensions: 'london.my-netdata.io::system.ip|received,london.my-netdata.io::system.ip|sent,newyork.my-netdata.io::system.ip|received,newyork.my-netdata.io::system.ip|sent'
+# - name: 'system.net'
+# dimensions: 'london.my-netdata.io::system.net|received,london.my-netdata.io::system.net|sent,newyork.my-netdata.io::system.net|received,newyork.my-netdata.io::system.net|sent'
+# - name: 'system.io'
+# dimensions: 'london.my-netdata.io::system.io|in,london.my-netdata.io::system.io|out,newyork.my-netdata.io::system.io|in,newyork.my-netdata.io::system.io|out'
+
+# Example additional job if you want to also pull data from a child streaming to your
+# local parent or even a remote node so long as the Netdata REST API is accessible.
+# mychildnode1:
+# name: 'mychildnode1'
+# host: '127.0.0.1:19999/host/mychildnode1'
+# protocol: 'http'
+# charts_regex: 'system\..*'
+# charts_to_exclude: 'None'
+# model: 'pca'
+# train_max_n: 100000
+# train_every_n: 1800
+# train_n_secs: 14400
+# offset_n_secs: 0
+# lags_n: 5
+# smooth_n: 3
+# diffs_n: 1
+# contamination: 0.001