summaryrefslogtreecommitdiffstats
path: root/ml/Config.cc
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ml/Config.cc114
1 files changed, 114 insertions, 0 deletions
diff --git a/ml/Config.cc b/ml/Config.cc
new file mode 100644
index 0000000..eedd8c2
--- /dev/null
+++ b/ml/Config.cc
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "Config.h"
+#include "ml-private.h"
+
+using namespace ml;
+
+/*
+ * Global configuration instance to be shared between training and
+ * prediction threads.
+ */
+Config ml::Cfg;
+
+template <typename T>
+static T clamp(const T& Value, const T& Min, const T& Max) {
+ return std::max(Min, std::min(Value, Max));
+}
+
+/*
+ * Initialize global configuration variable.
+ */
+void Config::readMLConfig(void) {
+ const char *ConfigSectionML = CONFIG_SECTION_ML;
+
+ bool EnableAnomalyDetection = config_get_boolean(ConfigSectionML, "enabled", true);
+
+ /*
+ * Read values
+ */
+
+ unsigned MaxTrainSamples = config_get_number(ConfigSectionML, "maximum num samples to train", 4 * 3600);
+ unsigned MinTrainSamples = config_get_number(ConfigSectionML, "minimum num samples to train", 1 * 900);
+ unsigned TrainEvery = config_get_number(ConfigSectionML, "train every", 1 * 3600);
+ unsigned NumModelsToUse = config_get_number(ConfigSectionML, "number of models per dimension", 1 * 24);
+
+ unsigned DiffN = config_get_number(ConfigSectionML, "num samples to diff", 1);
+ unsigned SmoothN = config_get_number(ConfigSectionML, "num samples to smooth", 3);
+ unsigned LagN = config_get_number(ConfigSectionML, "num samples to lag", 5);
+
+ double RandomSamplingRatio = config_get_float(ConfigSectionML, "random sampling ratio", 1.0 / LagN);
+ unsigned MaxKMeansIters = config_get_number(ConfigSectionML, "maximum number of k-means iterations", 1000);
+
+ double DimensionAnomalyScoreThreshold = config_get_float(ConfigSectionML, "dimension anomaly score threshold", 0.99);
+
+ double HostAnomalyRateThreshold = config_get_float(ConfigSectionML, "host anomaly rate threshold", 1.0);
+ std::string AnomalyDetectionGroupingMethod = config_get(ConfigSectionML, "anomaly detection grouping method", "average");
+ time_t AnomalyDetectionQueryDuration = config_get_number(ConfigSectionML, "anomaly detection grouping duration", 5 * 60);
+
+ /*
+ * Clamp
+ */
+
+ MaxTrainSamples = clamp<unsigned>(MaxTrainSamples, 1 * 3600, 24 * 3600);
+ MinTrainSamples = clamp<unsigned>(MinTrainSamples, 1 * 900, 6 * 3600);
+ TrainEvery = clamp<unsigned>(TrainEvery, 1 * 3600, 6 * 3600);
+ NumModelsToUse = clamp<unsigned>(TrainEvery, 1, 7 * 24);
+
+ DiffN = clamp(DiffN, 0u, 1u);
+ SmoothN = clamp(SmoothN, 0u, 5u);
+ LagN = clamp(LagN, 1u, 5u);
+
+ RandomSamplingRatio = clamp(RandomSamplingRatio, 0.2, 1.0);
+ MaxKMeansIters = clamp(MaxKMeansIters, 500u, 1000u);
+
+ DimensionAnomalyScoreThreshold = clamp(DimensionAnomalyScoreThreshold, 0.01, 5.00);
+
+ HostAnomalyRateThreshold = clamp(HostAnomalyRateThreshold, 0.1, 10.0);
+ AnomalyDetectionQueryDuration = clamp<time_t>(AnomalyDetectionQueryDuration, 60, 15 * 60);
+
+ /*
+ * Validate
+ */
+
+ if (MinTrainSamples >= MaxTrainSamples) {
+ error("invalid min/max train samples found (%u >= %u)", MinTrainSamples, MaxTrainSamples);
+
+ MinTrainSamples = 1 * 3600;
+ MaxTrainSamples = 4 * 3600;
+ }
+
+ /*
+ * Assign to config instance
+ */
+
+ Cfg.EnableAnomalyDetection = EnableAnomalyDetection;
+
+ Cfg.MaxTrainSamples = MaxTrainSamples;
+ Cfg.MinTrainSamples = MinTrainSamples;
+ Cfg.TrainEvery = TrainEvery;
+ Cfg.NumModelsToUse = NumModelsToUse;
+
+ Cfg.DiffN = DiffN;
+ Cfg.SmoothN = SmoothN;
+ Cfg.LagN = LagN;
+
+ Cfg.RandomSamplingRatio = RandomSamplingRatio;
+ Cfg.MaxKMeansIters = MaxKMeansIters;
+
+ Cfg.DimensionAnomalyScoreThreshold = DimensionAnomalyScoreThreshold;
+
+ Cfg.HostAnomalyRateThreshold = HostAnomalyRateThreshold;
+ Cfg.AnomalyDetectionGroupingMethod = web_client_api_request_v1_data_group(AnomalyDetectionGroupingMethod.c_str(), RRDR_GROUPING_AVERAGE);
+ Cfg.AnomalyDetectionQueryDuration = AnomalyDetectionQueryDuration;
+
+ Cfg.HostsToSkip = config_get(ConfigSectionML, "hosts to skip from training", "!*");
+ Cfg.SP_HostsToSkip = simple_pattern_create(Cfg.HostsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT);
+
+ // Always exclude anomaly_detection charts from training.
+ Cfg.ChartsToSkip = "anomaly_detection.* ";
+ Cfg.ChartsToSkip += config_get(ConfigSectionML, "charts to skip from training", "netdata.*");
+ Cfg.SP_ChartsToSkip = simple_pattern_create(ChartsToSkip.c_str(), NULL, SIMPLE_PATTERN_EXACT);
+
+ Cfg.StreamADCharts = config_get_boolean(ConfigSectionML, "stream anomaly detection charts", true);
+}