From 2e85f9325a797977eea9dfea0a925775ddd211d9 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sun, 7 Feb 2021 12:49:00 +0100 Subject: Merging upstream version 1.29.0. Signed-off-by: Daniel Baumann --- .../python.d.plugin/anomalies/anomalies.conf | 181 +++++++++++++++++++++ 1 file changed, 181 insertions(+) create mode 100644 collectors/python.d.plugin/anomalies/anomalies.conf (limited to 'collectors/python.d.plugin/anomalies/anomalies.conf') diff --git a/collectors/python.d.plugin/anomalies/anomalies.conf b/collectors/python.d.plugin/anomalies/anomalies.conf new file mode 100644 index 000000000..9950534aa --- /dev/null +++ b/collectors/python.d.plugin/anomalies/anomalies.conf @@ -0,0 +1,181 @@ +# netdata python.d.plugin configuration for anomalies +# +# This file is in YaML format. Generally the format is: +# +# name: value +# +# There are 2 sections: +# - global variables +# - one or more JOBS +# +# JOBS allow you to collect values from multiple sources. +# Each source will have its own set of charts. +# +# JOB parameters have to be indented (using spaces only, example below). + +# ---------------------------------------------------------------------- +# Global Variables +# These variables set the defaults for all JOBs, however each JOB +# may define its own, overriding the defaults. + +# update_every sets the default data collection frequency. +# If unset, the python.d.plugin default is used. +# update_every: 2 + +# priority controls the order of charts at the netdata dashboard. +# Lower numbers move the charts towards the top of the page. +# If unset, the default for python.d.plugin is used. +# priority: 60000 + +# ---------------------------------------------------------------------- +# JOBS (data collection sources) + +# Pull data from local Netdata node. +local: + name: 'local' + + # Host to pull data from. + host: '127.0.0.1:19999' + + # Username and Password for Netdata if using basic auth. + # username: '???' + # password: '???' + + # Use http or https to pull data + protocol: 'http' + + # What charts to pull data for - A regex like 'system\..*|' or 'system\..*|apps.cpu|apps.mem' etc. + charts_regex: 'system\..*' + + # Charts to exclude, useful if you would like to exclude some specific charts. + # Note: should be a ',' separated string like 'chart.name,chart.name'. + charts_to_exclude: 'system.uptime,system.entropy' + + # What model to use - can be one of 'pca', 'hbos', 'iforest', 'cblof', 'loda', 'copod' or 'feature_bagging'. + # More details here: https://pyod.readthedocs.io/en/latest/pyod.models.html. + model: 'pca' + + # Max number of observations to train on, to help cap compute cost of training model if you set a very large train_n_secs. + train_max_n: 100000 + + # How often to re-train the model (assuming update_every=1 then train_every_n=1800 represents (re)training every 30 minutes). + # Note: If you want to turn off re-training set train_every_n=0 and after initial training the models will not be retrained. + train_every_n: 1800 + + # The length of the window of data to train on (14400 = last 4 hours). + train_n_secs: 14400 + + # How many prediction steps after a train event to just use previous prediction value for. + # Used to reduce possibility of the training step itself appearing as an anomaly on the charts. + train_no_prediction_n: 10 + + # If you would like to train the model for the first time on a specific window then you can define it using the below two variables. + # Start of training data for initial model. + # initial_train_data_after: 1604578857 + + # End of training data for initial model. + # initial_train_data_before: 1604593257 + + # If you would like to ignore recent data in training then you can offset it by offset_n_secs. + offset_n_secs: 0 + + # How many lagged values of each dimension to include in the 'feature vector' each model is trained on. + lags_n: 5 + + # How much smoothing to apply to each dimension in the 'feature vector' each model is trained on. + smooth_n: 3 + + # How many differences to take in preprocessing your data. + # More info on differencing here: https://en.wikipedia.org/wiki/Autoregressive_integrated_moving_average#Differencing + # diffs_n=0 would mean training models on the raw values of each dimension. + # diffs_n=1 means everything is done in terms of differences. + diffs_n: 1 + + # What is the typical proportion of anomalies in your data on average? + # This parameter can control the sensitivity of your models to anomalies. + # Some discussion here: https://github.com/yzhao062/pyod/issues/144 + contamination: 0.001 + + # Set to true to include an "average_prob" dimension on anomalies probability chart which is + # just the average of all anomaly probabilities at each time step + include_average_prob: true + + # Define any custom models you would like to create anomaly probabilities for, some examples below to show how. + # For example below example creates two custom models, one to run anomaly detection user and system cpu for our demo servers + # and one on the cpu and mem apps metrics for the python.d.plugin. + # custom_models: + # - name: 'demos_cpu' + # dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system' + # - name: 'apps_python_d_plugin' + # dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin' + + # Set to true to normalize, using min-max standardization, features used for the custom models. + # Useful if your custom models contain dimensions on very different scales an model you use does + # not internally do its own normalization. Usually best to leave as false. + # custom_models_normalize: false + +# Standalone Custom models example as an additional collector job. +# custom: +# name: 'custom' +# host: '127.0.0.1:19999' +# protocol: 'http' +# charts_regex: 'None' +# charts_to_exclude: 'None' +# model: 'pca' +# train_max_n: 100000 +# train_every_n: 1800 +# train_n_secs: 14400 +# offset_n_secs: 0 +# lags_n: 5 +# smooth_n: 3 +# diffs_n: 1 +# contamination: 0.001 +# custom_models: +# - name: 'user_netdata' +# dimensions: 'users.cpu|netdata,users.mem|netdata,users.threads|netdata,users.processes|netdata,users.sockets|netdata' +# - name: 'apps_python_d_plugin' +# dimensions: 'apps.cpu|python.d.plugin,apps.mem|python.d.plugin,apps.threads|python.d.plugin,apps.processes|python.d.plugin,apps.sockets|python.d.plugin' + +# Pull data from some demo nodes for cross node custom models. +# demos: +# name: 'demos' +# host: '127.0.0.1:19999' +# protocol: 'http' +# charts_regex: 'None' +# charts_to_exclude: 'None' +# model: 'pca' +# train_max_n: 100000 +# train_every_n: 1800 +# train_n_secs: 14400 +# offset_n_secs: 0 +# lags_n: 5 +# smooth_n: 3 +# diffs_n: 1 +# contamination: 0.001 +# custom_models: +# - name: 'system.cpu' +# dimensions: 'london.my-netdata.io::system.cpu|user,london.my-netdata.io::system.cpu|system,newyork.my-netdata.io::system.cpu|user,newyork.my-netdata.io::system.cpu|system' +# - name: 'system.ip' +# dimensions: 'london.my-netdata.io::system.ip|received,london.my-netdata.io::system.ip|sent,newyork.my-netdata.io::system.ip|received,newyork.my-netdata.io::system.ip|sent' +# - name: 'system.net' +# dimensions: 'london.my-netdata.io::system.net|received,london.my-netdata.io::system.net|sent,newyork.my-netdata.io::system.net|received,newyork.my-netdata.io::system.net|sent' +# - name: 'system.io' +# dimensions: 'london.my-netdata.io::system.io|in,london.my-netdata.io::system.io|out,newyork.my-netdata.io::system.io|in,newyork.my-netdata.io::system.io|out' + +# Example additional job if you want to also pull data from a child streaming to your +# local parent or even a remote node so long as the Netdata REST API is accessible. +# mychildnode1: +# name: 'mychildnode1' +# host: '127.0.0.1:19999/host/mychildnode1' +# protocol: 'http' +# charts_regex: 'system\..*' +# charts_to_exclude: 'None' +# model: 'pca' +# train_max_n: 100000 +# train_every_n: 1800 +# train_n_secs: 14400 +# offset_n_secs: 0 +# lags_n: 5 +# smooth_n: 3 +# diffs_n: 1 +# contamination: 0.001 -- cgit v1.2.3