diff options
Diffstat (limited to '')
-rw-r--r-- | ml/KMeans.h (renamed from ml/kmeans/KMeans.h) | 15 | ||||
-rw-r--r-- | ml/SamplesBuffer.cc (renamed from ml/kmeans/SamplesBuffer.cc) | 0 | ||||
-rw-r--r-- | ml/SamplesBuffer.h (renamed from ml/kmeans/SamplesBuffer.h) | 0 | ||||
-rw-r--r-- | ml/SamplesBufferTests.cc (renamed from ml/kmeans/Tests.cc) | 9 | ||||
-rw-r--r-- | ml/kmeans/KMeans.cc | 55 | ||||
-rw-r--r-- | ml/kmeans/Makefile.am | 4 |
6 files changed, 17 insertions, 66 deletions
diff --git a/ml/kmeans/KMeans.h b/ml/KMeans.h index 4ea3b6a89..0398eeb86 100644 --- a/ml/kmeans/KMeans.h +++ b/ml/KMeans.h @@ -9,6 +9,7 @@ #include <mutex> #include "SamplesBuffer.h" +#include "json/single_include/nlohmann/json.hpp" class KMeans { public: @@ -17,8 +18,16 @@ public: MaxDist = std::numeric_limits<CalculatedNumber>::min(); }; - void train(SamplesBuffer &SB, size_t MaxIterations); - CalculatedNumber anomalyScore(SamplesBuffer &SB); + void train(const std::vector<DSample> &Samples, size_t MaxIterations); + CalculatedNumber anomalyScore(const DSample &Sample) const; + + void toJson(nlohmann::json &J) const { + J = nlohmann::json{ + {"CCs", ClusterCenters}, + {"MinDist", MinDist}, + {"MaxDist", MaxDist} + }; + } private: size_t NumClusters; @@ -27,8 +36,6 @@ private: CalculatedNumber MinDist; CalculatedNumber MaxDist; - - std::mutex Mutex; }; #endif /* KMEANS_H */ diff --git a/ml/kmeans/SamplesBuffer.cc b/ml/SamplesBuffer.cc index d276c6e09..d276c6e09 100644 --- a/ml/kmeans/SamplesBuffer.cc +++ b/ml/SamplesBuffer.cc diff --git a/ml/kmeans/SamplesBuffer.h b/ml/SamplesBuffer.h index 1c7215cca..1c7215cca 100644 --- a/ml/kmeans/SamplesBuffer.h +++ b/ml/SamplesBuffer.h diff --git a/ml/kmeans/Tests.cc b/ml/SamplesBufferTests.cc index 0cb595945..5997a2a15 100644 --- a/ml/kmeans/Tests.cc +++ b/ml/SamplesBufferTests.cc @@ -36,7 +36,8 @@ TEST(SamplesBufferTest, NS_8_NDPS_1_DN_1_SN_3_LN_1) { CNs[6] = 0.2684839023122384; CNs[7] = 0.851332948637479; - SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN); + std::vector<uint32_t> RandNums(NumSamples, std::numeric_limits<uint32_t>::max()); + SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN, 1.0, RandNums); SB.preprocess(); std::vector<Sample> Samples = SB.getPreprocessedSamples(); @@ -76,7 +77,8 @@ TEST(SamplesBufferTest, NS_8_NDPS_1_DN_2_SN_3_LN_2) { CNs[6] = 0.15552559051428083; CNs[7] = 0.6309750314597955; - SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN); + std::vector<uint32_t> RandNums(NumSamples, std::numeric_limits<uint32_t>::max()); + SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN, 1.0, RandNums); SB.preprocess(); std::vector<Sample> Samples = SB.getPreprocessedSamples(); @@ -114,7 +116,8 @@ TEST(SamplesBufferTest, NS_8_NDPS_3_DN_2_SN_4_LN_1) { CNs[18] = 0.9394494507088997; CNs[19] =0.17567223681734334; CNs[20] = 0.42732886195446984; CNs[21] = 0.9460522396152958; CNs[22] =0.23462747016780894; CNs[23] = 0.35983249900892145; - SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN); + std::vector<uint32_t> RandNums(NumSamples, std::numeric_limits<uint32_t>::max()); + SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN, 1.0, RandNums); SB.preprocess(); std::vector<Sample> Samples = SB.getPreprocessedSamples(); diff --git a/ml/kmeans/KMeans.cc b/ml/kmeans/KMeans.cc deleted file mode 100644 index e66c66c16..000000000 --- a/ml/kmeans/KMeans.cc +++ /dev/null @@ -1,55 +0,0 @@ -// SPDX-License-Identifier: GPL-3.0-or-later - -#include "KMeans.h" -#include <dlib/clustering.h> - -void KMeans::train(SamplesBuffer &SB, size_t MaxIterations) { - std::vector<DSample> Samples = SB.preprocess(); - - MinDist = std::numeric_limits<CalculatedNumber>::max(); - MaxDist = std::numeric_limits<CalculatedNumber>::min(); - - { - std::lock_guard<std::mutex> Lock(Mutex); - - ClusterCenters.clear(); - - dlib::pick_initial_centers(NumClusters, ClusterCenters, Samples); - dlib::find_clusters_using_kmeans(Samples, ClusterCenters, MaxIterations); - - for (const auto &S : Samples) { - CalculatedNumber MeanDist = 0.0; - - for (const auto &KMCenter : ClusterCenters) - MeanDist += dlib::length(KMCenter - S); - - MeanDist /= NumClusters; - - if (MeanDist < MinDist) - MinDist = MeanDist; - - if (MeanDist > MaxDist) - MaxDist = MeanDist; - } - } -} - -CalculatedNumber KMeans::anomalyScore(SamplesBuffer &SB) { - std::vector<DSample> DSamples = SB.preprocess(); - - std::unique_lock<std::mutex> Lock(Mutex, std::defer_lock); - if (!Lock.try_lock()) - return std::numeric_limits<CalculatedNumber>::quiet_NaN(); - - CalculatedNumber MeanDist = 0.0; - for (const auto &CC: ClusterCenters) - MeanDist += dlib::length(CC - DSamples.back()); - - MeanDist /= NumClusters; - - if (MaxDist == MinDist) - return 0.0; - - CalculatedNumber AnomalyScore = 100.0 * std::abs((MeanDist - MinDist) / (MaxDist - MinDist)); - return (AnomalyScore > 100.0) ? 100.0 : AnomalyScore; -} diff --git a/ml/kmeans/Makefile.am b/ml/kmeans/Makefile.am deleted file mode 100644 index babdcf0df..000000000 --- a/ml/kmeans/Makefile.am +++ /dev/null @@ -1,4 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -AUTOMAKE_OPTIONS = subdir-objects -MAINTAINERCLEANFILES = $(srcdir)/Makefile.in |