summaryrefslogtreecommitdiffstats
path: root/ml/kmeans
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--ml/KMeans.h (renamed from ml/kmeans/KMeans.h)15
-rw-r--r--ml/SamplesBuffer.cc (renamed from ml/kmeans/SamplesBuffer.cc)0
-rw-r--r--ml/SamplesBuffer.h (renamed from ml/kmeans/SamplesBuffer.h)0
-rw-r--r--ml/SamplesBufferTests.cc (renamed from ml/kmeans/Tests.cc)9
-rw-r--r--ml/kmeans/KMeans.cc55
-rw-r--r--ml/kmeans/Makefile.am4
6 files changed, 17 insertions, 66 deletions
diff --git a/ml/kmeans/KMeans.h b/ml/KMeans.h
index 4ea3b6a89..0398eeb86 100644
--- a/ml/kmeans/KMeans.h
+++ b/ml/KMeans.h
@@ -9,6 +9,7 @@
#include <mutex>
#include "SamplesBuffer.h"
+#include "json/single_include/nlohmann/json.hpp"
class KMeans {
public:
@@ -17,8 +18,16 @@ public:
MaxDist = std::numeric_limits<CalculatedNumber>::min();
};
- void train(SamplesBuffer &SB, size_t MaxIterations);
- CalculatedNumber anomalyScore(SamplesBuffer &SB);
+ void train(const std::vector<DSample> &Samples, size_t MaxIterations);
+ CalculatedNumber anomalyScore(const DSample &Sample) const;
+
+ void toJson(nlohmann::json &J) const {
+ J = nlohmann::json{
+ {"CCs", ClusterCenters},
+ {"MinDist", MinDist},
+ {"MaxDist", MaxDist}
+ };
+ }
private:
size_t NumClusters;
@@ -27,8 +36,6 @@ private:
CalculatedNumber MinDist;
CalculatedNumber MaxDist;
-
- std::mutex Mutex;
};
#endif /* KMEANS_H */
diff --git a/ml/kmeans/SamplesBuffer.cc b/ml/SamplesBuffer.cc
index d276c6e09..d276c6e09 100644
--- a/ml/kmeans/SamplesBuffer.cc
+++ b/ml/SamplesBuffer.cc
diff --git a/ml/kmeans/SamplesBuffer.h b/ml/SamplesBuffer.h
index 1c7215cca..1c7215cca 100644
--- a/ml/kmeans/SamplesBuffer.h
+++ b/ml/SamplesBuffer.h
diff --git a/ml/kmeans/Tests.cc b/ml/SamplesBufferTests.cc
index 0cb595945..5997a2a15 100644
--- a/ml/kmeans/Tests.cc
+++ b/ml/SamplesBufferTests.cc
@@ -36,7 +36,8 @@ TEST(SamplesBufferTest, NS_8_NDPS_1_DN_1_SN_3_LN_1) {
CNs[6] = 0.2684839023122384;
CNs[7] = 0.851332948637479;
- SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN);
+ std::vector<uint32_t> RandNums(NumSamples, std::numeric_limits<uint32_t>::max());
+ SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN, 1.0, RandNums);
SB.preprocess();
std::vector<Sample> Samples = SB.getPreprocessedSamples();
@@ -76,7 +77,8 @@ TEST(SamplesBufferTest, NS_8_NDPS_1_DN_2_SN_3_LN_2) {
CNs[6] = 0.15552559051428083;
CNs[7] = 0.6309750314597955;
- SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN);
+ std::vector<uint32_t> RandNums(NumSamples, std::numeric_limits<uint32_t>::max());
+ SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN, 1.0, RandNums);
SB.preprocess();
std::vector<Sample> Samples = SB.getPreprocessedSamples();
@@ -114,7 +116,8 @@ TEST(SamplesBufferTest, NS_8_NDPS_3_DN_2_SN_4_LN_1) {
CNs[18] = 0.9394494507088997; CNs[19] =0.17567223681734334; CNs[20] = 0.42732886195446984;
CNs[21] = 0.9460522396152958; CNs[22] =0.23462747016780894; CNs[23] = 0.35983249900892145;
- SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN);
+ std::vector<uint32_t> RandNums(NumSamples, std::numeric_limits<uint32_t>::max());
+ SamplesBuffer SB(CNs, NumSamples, NumDimsPerSample, DiffN, SmoothN, LagN, 1.0, RandNums);
SB.preprocess();
std::vector<Sample> Samples = SB.getPreprocessedSamples();
diff --git a/ml/kmeans/KMeans.cc b/ml/kmeans/KMeans.cc
deleted file mode 100644
index e66c66c16..000000000
--- a/ml/kmeans/KMeans.cc
+++ /dev/null
@@ -1,55 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-
-#include "KMeans.h"
-#include <dlib/clustering.h>
-
-void KMeans::train(SamplesBuffer &SB, size_t MaxIterations) {
- std::vector<DSample> Samples = SB.preprocess();
-
- MinDist = std::numeric_limits<CalculatedNumber>::max();
- MaxDist = std::numeric_limits<CalculatedNumber>::min();
-
- {
- std::lock_guard<std::mutex> Lock(Mutex);
-
- ClusterCenters.clear();
-
- dlib::pick_initial_centers(NumClusters, ClusterCenters, Samples);
- dlib::find_clusters_using_kmeans(Samples, ClusterCenters, MaxIterations);
-
- for (const auto &S : Samples) {
- CalculatedNumber MeanDist = 0.0;
-
- for (const auto &KMCenter : ClusterCenters)
- MeanDist += dlib::length(KMCenter - S);
-
- MeanDist /= NumClusters;
-
- if (MeanDist < MinDist)
- MinDist = MeanDist;
-
- if (MeanDist > MaxDist)
- MaxDist = MeanDist;
- }
- }
-}
-
-CalculatedNumber KMeans::anomalyScore(SamplesBuffer &SB) {
- std::vector<DSample> DSamples = SB.preprocess();
-
- std::unique_lock<std::mutex> Lock(Mutex, std::defer_lock);
- if (!Lock.try_lock())
- return std::numeric_limits<CalculatedNumber>::quiet_NaN();
-
- CalculatedNumber MeanDist = 0.0;
- for (const auto &CC: ClusterCenters)
- MeanDist += dlib::length(CC - DSamples.back());
-
- MeanDist /= NumClusters;
-
- if (MaxDist == MinDist)
- return 0.0;
-
- CalculatedNumber AnomalyScore = 100.0 * std::abs((MeanDist - MinDist) / (MaxDist - MinDist));
- return (AnomalyScore > 100.0) ? 100.0 : AnomalyScore;
-}
diff --git a/ml/kmeans/Makefile.am b/ml/kmeans/Makefile.am
deleted file mode 100644
index babdcf0df..000000000
--- a/ml/kmeans/Makefile.am
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-3.0-or-later
-
-AUTOMAKE_OPTIONS = subdir-objects
-MAINTAINERCLEANFILES = $(srcdir)/Makefile.in