diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-12-01 06:15:04 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2021-12-01 06:15:04 +0000 |
commit | e970e0b37b8bd7f246feb3f70c4136418225e434 (patch) | |
tree | 0b67c0ca45f56f2f9d9c5c2e725279ecdf52d2eb /ml/kmeans/KMeans.cc | |
parent | Adding upstream version 1.31.0. (diff) | |
download | netdata-e970e0b37b8bd7f246feb3f70c4136418225e434.tar.xz netdata-e970e0b37b8bd7f246feb3f70c4136418225e434.zip |
Adding upstream version 1.32.0.upstream/1.32.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/kmeans/KMeans.cc')
-rw-r--r-- | ml/kmeans/KMeans.cc | 55 |
1 files changed, 55 insertions, 0 deletions
diff --git a/ml/kmeans/KMeans.cc b/ml/kmeans/KMeans.cc new file mode 100644 index 000000000..e66c66c16 --- /dev/null +++ b/ml/kmeans/KMeans.cc @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "KMeans.h" +#include <dlib/clustering.h> + +void KMeans::train(SamplesBuffer &SB, size_t MaxIterations) { + std::vector<DSample> Samples = SB.preprocess(); + + MinDist = std::numeric_limits<CalculatedNumber>::max(); + MaxDist = std::numeric_limits<CalculatedNumber>::min(); + + { + std::lock_guard<std::mutex> Lock(Mutex); + + ClusterCenters.clear(); + + dlib::pick_initial_centers(NumClusters, ClusterCenters, Samples); + dlib::find_clusters_using_kmeans(Samples, ClusterCenters, MaxIterations); + + for (const auto &S : Samples) { + CalculatedNumber MeanDist = 0.0; + + for (const auto &KMCenter : ClusterCenters) + MeanDist += dlib::length(KMCenter - S); + + MeanDist /= NumClusters; + + if (MeanDist < MinDist) + MinDist = MeanDist; + + if (MeanDist > MaxDist) + MaxDist = MeanDist; + } + } +} + +CalculatedNumber KMeans::anomalyScore(SamplesBuffer &SB) { + std::vector<DSample> DSamples = SB.preprocess(); + + std::unique_lock<std::mutex> Lock(Mutex, std::defer_lock); + if (!Lock.try_lock()) + return std::numeric_limits<CalculatedNumber>::quiet_NaN(); + + CalculatedNumber MeanDist = 0.0; + for (const auto &CC: ClusterCenters) + MeanDist += dlib::length(CC - DSamples.back()); + + MeanDist /= NumClusters; + + if (MaxDist == MinDist) + return 0.0; + + CalculatedNumber AnomalyScore = 100.0 * std::abs((MeanDist - MinDist) / (MaxDist - MinDist)); + return (AnomalyScore > 100.0) ? 100.0 : AnomalyScore; +} |