From 97e01009d69b8fbebfebf68f51e3d126d0ed43fc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 30 Nov 2022 19:47:05 +0100 Subject: Merging upstream version 1.37.0. Signed-off-by: Daniel Baumann --- ml/KMeans.cc | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 ml/KMeans.cc (limited to 'ml/KMeans.cc') diff --git a/ml/KMeans.cc b/ml/KMeans.cc new file mode 100644 index 000000000..edc2ef49e --- /dev/null +++ b/ml/KMeans.cc @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "KMeans.h" +#include + +void KMeans::train(const std::vector &Samples, size_t MaxIterations) { + MinDist = std::numeric_limits::max(); + MaxDist = std::numeric_limits::min(); + + ClusterCenters.clear(); + + dlib::pick_initial_centers(NumClusters, ClusterCenters, Samples); + dlib::find_clusters_using_kmeans(Samples, ClusterCenters, MaxIterations); + + for (const auto &S : Samples) { + CalculatedNumber MeanDist = 0.0; + + for (const auto &KMCenter : ClusterCenters) + MeanDist += dlib::length(KMCenter - S); + + MeanDist /= NumClusters; + + if (MeanDist < MinDist) + MinDist = MeanDist; + + if (MeanDist > MaxDist) + MaxDist = MeanDist; + } +} + +CalculatedNumber KMeans::anomalyScore(const DSample &Sample) const { + CalculatedNumber MeanDist = 0.0; + for (const auto &CC: ClusterCenters) + MeanDist += dlib::length(CC - Sample); + + MeanDist /= NumClusters; + + if (MaxDist == MinDist) + return 0.0; + + CalculatedNumber AnomalyScore = 100.0 * std::abs((MeanDist - MinDist) / (MaxDist - MinDist)); + return (AnomalyScore > 100.0) ? 100.0 : AnomalyScore; +} -- cgit v1.2.3