summaryrefslogtreecommitdiffstats
path: root/ml/kmeans/KMeans.cc
blob: e66c66c164acc80708c1b5fd6a707e7207648c03 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
// SPDX-License-Identifier: GPL-3.0-or-later

#include "KMeans.h"
#include <dlib/clustering.h>

void KMeans::train(SamplesBuffer &SB, size_t MaxIterations) {
    std::vector<DSample> Samples = SB.preprocess();

    MinDist = std::numeric_limits<CalculatedNumber>::max();
    MaxDist = std::numeric_limits<CalculatedNumber>::min();

    {
        std::lock_guard<std::mutex> Lock(Mutex);

        ClusterCenters.clear();

        dlib::pick_initial_centers(NumClusters, ClusterCenters, Samples);
        dlib::find_clusters_using_kmeans(Samples, ClusterCenters, MaxIterations);

        for (const auto &S : Samples) {
            CalculatedNumber MeanDist = 0.0;

            for (const auto &KMCenter : ClusterCenters)
                MeanDist += dlib::length(KMCenter - S);

            MeanDist /= NumClusters;

            if (MeanDist < MinDist)
                MinDist = MeanDist;

            if (MeanDist > MaxDist)
                MaxDist = MeanDist;
        }
    }
}

CalculatedNumber KMeans::anomalyScore(SamplesBuffer &SB) {
    std::vector<DSample> DSamples = SB.preprocess();

    std::unique_lock<std::mutex> Lock(Mutex, std::defer_lock);
    if (!Lock.try_lock())
        return std::numeric_limits<CalculatedNumber>::quiet_NaN();

    CalculatedNumber MeanDist = 0.0;
    for (const auto &CC: ClusterCenters)
        MeanDist += dlib::length(CC - DSamples.back());

    MeanDist /= NumClusters;

    if (MaxDist == MinDist)
        return 0.0;

    CalculatedNumber AnomalyScore = 100.0 * std::abs((MeanDist - MinDist) / (MaxDist - MinDist));
    return (AnomalyScore > 100.0) ? 100.0 : AnomalyScore;
}