1 files changed, 181 insertions, 0 deletions
diff --git a/collectors/python.d.plugin/anomalies/anomalies.conf b/collectors/python.d.plugin/anomalies/anomalies.conf
new file mode 100644
index 000000000..9950534aa
--- /dev/null
+++ b/collectors/python.d.plugin/anomalies/anomalies.conf
@@ -0,0 +1,181 @@
+# netdata python.d.plugin configuration for anomalies
+#
+# This file is in YaML format. Generally the format is:
+#
+# name: value
+#
+# There are 2 sections:
+#  - global variables
+#  - one or more JOBS
+#
+# JOBS allow you to collect values from multiple sources.
+# Each source will have its own set of charts.
+#
+# JOB parameters have to be indented (using spaces only, example below).
+
+# ----------------------------------------------------------------------
+# Global Variables
+# These variables set the defaults for all JOBs, however each JOB
+# may define its own, overriding the defaults.
+
+# update_every sets the default data collection frequency.
+# If unset, the python.d.plugin default is used.
+# update_every: 2
+
+# priority controls the order of charts at the netdata dashboard.
+# Lower numbers move the charts towards the top of the page.
+# If unset, the default for python.d.plugin is used.
+# priority: 60000
+
+# ----------------------------------------------------------------------
+# JOBS (data collection sources)
+
+# Pull data from local Netdata node.
+local:
+    name: 'local'
+
+    # Host to pull data from.
+    host: '127.0.0.1:19999'
+
+    # Username and Password for Netdata if using basic auth.
+    # username: '???'
+    # password: '???'
+
+    # Use http or https to pull data
+    protocol: 'http'
+
+    # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc.
+    charts_regex: 'system\..*'
+
+    # Charts to exclude, useful if you would like to exclude some specific charts. 
+    # Note: should be a ',' separated string like 'chart.name,chart.name'.
+    charts_to_exclude: 'system.uptime,system.entropy'
+
+    # What model to use - can be one of 'pca', 'hbos', 'iforest', 'cblof', 'loda', 'copod' or 'feature_bagging'. 
+    # More details here: https://pyod.readthedocs.io/en/latest/pyod.models.html.
+    model: 'pca'
+
+    # Max number of observations to train on, to help cap compute cost of training model if you set a very large train_n_secs.
+    train_max_n: 100000
+
+    # How often to re-train the model (assuming update_every=1 then train_every_n=1800 represents (re)training every 30 minutes).
+    # Note: If you want to turn off re-training set train_every_n=0 and after initial training the models will not be retrained.
+    train_every_n: 1800
+
+    # The length of the window of data to train on (14400 = last 4 hours).
+    train_n_secs: 14400
+
+    # How many prediction steps after a train event to just use previous prediction value for. 
+    # Used to reduce possibility of the training step itself appearing as an anomaly on the charts.
+    train_no_prediction_n: 10
+
+    # If you would like to train the model for the first time on a specific window then you can define it using the below two variables.
+    # Start of training data for initial model.
+    # initial_train_data_after: 1604578857
+
+    # End of training data for initial model.
+    # initial_train_data_before: 1604593257
+
+    # If you would like to ignore recent data in training then you can offset it by offset_n_secs.
+    offset_n_secs: 0
+
+    # How many lagged values of each dimension to include in the 'feature vector' each model is trained on.
+    lags_n: 5
+
+    # How much smoothing to apply to each dimension in the 'feature vector' each model is trained on.
+    smooth_n: 3
+
+    # How many differences to take in preprocessing your data. 
+    # More info on differencing here: https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average#Differencing
+    # diffs_n=0 would mean training models on the raw values of each dimension.
+    # diffs_n=1 means everything is done in terms of differences. 
+    diffs_n: 1
+
+    # What is the typical proportion of anomalies in your data on average? 
+    # This parameter can control the sensitivity of your models to anomalies. 
+    # Some discussion here: https://github.com/yzhao062/pyod/issues/144
+    contamination: 0.001
+
+    # Set to true to include an "average_prob" dimension on anomalies probability chart which is 
+    # just the average of all anomaly probabilities at each time step
+    include_average_prob: true
+
+    # Define any custom models you would like to create anomaly probabilities for, some examples below to show how.
+    # For example below example creates two custom models, one to run anomaly detection user and system cpu for our demo servers
+    # and one on the cpu and mem apps metrics for the python.d.plugin.
+    # custom_models:
+    #   - name: 'demos_cpu'
+    #     dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
+    #   - name: 'apps_python_d_plugin'
+    #     dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin'
+
+    # Set to true to normalize, using min-max standardization, features used for the custom models. 
+    # Useful if your custom models contain dimensions on very different scales an model you use does 
+    # not internally do its own normalization. Usually best to leave as false.
+    # custom_models_normalize: false
+
+# Standalone Custom models example as an additional collector job.
+# custom:
+#     name: 'custom'
+#     host: '127.0.0.1:19999'
+#     protocol: 'http'
+#     charts_regex: 'None'
+#     charts_to_exclude: 'None'
+#     model: 'pca'
+#     train_max_n: 100000
+#     train_every_n: 1800
+#     train_n_secs: 14400
+#     offset_n_secs: 0
+#     lags_n: 5
+#     smooth_n: 3
+#     diffs_n: 1
+#     contamination: 0.001
+#     custom_models:
+#       - name: 'user_netdata'
+#         dimensions: 'users.cpu|netdata,users.mem|netdata,users.threads|netdata,users.processes|netdata,users.sockets|netdata'
+#       - name: 'apps_python_d_plugin'
+#         dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin,apps.threads|python.d.plugin,apps.processes|python.d.plugin,apps.sockets|python.d.plugin'
+
+# Pull data from some demo nodes for cross node custom models.
+# demos:
+#     name: 'demos'
+#     host: '127.0.0.1:19999'
+#     protocol: 'http'
+#     charts_regex: 'None'
+#     charts_to_exclude: 'None'
+#     model: 'pca'
+#     train_max_n: 100000
+#     train_every_n: 1800
+#     train_n_secs: 14400
+#     offset_n_secs: 0
+#     lags_n: 5
+#     smooth_n: 3
+#     diffs_n: 1
+#     contamination: 0.001
+#     custom_models:
+#       - name: 'system.cpu'
+#         dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system'
+#       - name: 'system.ip'
+#         dimensions: 'london.my-netdata.io::system.ip|received,london.my-netdata.io::system.ip|sent,newyork.my-netdata.io::system.ip|received,newyork.my-netdata.io::system.ip|sent'
+#       - name: 'system.net'
+#         dimensions: 'london.my-netdata.io::system.net|received,london.my-netdata.io::system.net|sent,newyork.my-netdata.io::system.net|received,newyork.my-netdata.io::system.net|sent'
+#       - name: 'system.io'
+#         dimensions: 'london.my-netdata.io::system.io|in,london.my-netdata.io::system.io|out,newyork.my-netdata.io::system.io|in,newyork.my-netdata.io::system.io|out'
+
+# Example additional job if you want to also pull data from a child streaming to your 
+# local parent or even a remote node so long as the Netdata REST API is accessible. 
+# mychildnode1:
+#     name: 'mychildnode1'
+#     host: '127.0.0.1:19999/host/mychildnode1'
+#     protocol: 'http'
+#     charts_regex: 'system\..*'
+#     charts_to_exclude: 'None'
+#     model: 'pca'
+#     train_max_n: 100000
+#     train_every_n: 1800
+#     train_n_secs: 14400
+#     offset_n_secs: 0
+#     lags_n: 5
+#     smooth_n: 3
+#     diffs_n: 1
+#     contamination: 0.001