summaryrefslogtreecommitdiffstats
path: root/ml/Dimension.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2021-12-01 06:15:04 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2021-12-01 06:15:04 +0000
commite970e0b37b8bd7f246feb3f70c4136418225e434 (patch)
tree0b67c0ca45f56f2f9d9c5c2e725279ecdf52d2eb /ml/Dimension.cc
parentAdding upstream version 1.31.0. (diff)
downloadnetdata-e970e0b37b8bd7f246feb3f70c4136418225e434.tar.xz
netdata-e970e0b37b8bd7f246feb3f70c4136418225e434.zip
Adding upstream version 1.32.0.upstream/1.32.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/Dimension.cc')
-rw-r--r--ml/Dimension.cc169
1 files changed, 169 insertions, 0 deletions
diff --git a/ml/Dimension.cc b/ml/Dimension.cc
new file mode 100644
index 000000000..c27f30bb4
--- /dev/null
+++ b/ml/Dimension.cc
@@ -0,0 +1,169 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "Config.h"
+#include "Dimension.h"
+#include "Query.h"
+
+using namespace ml;
+
+/*
+ * Copy of the unpack_storage_number which allows us to convert
+ * a storage_number to double.
+ */
+static CalculatedNumber unpack_storage_number_dbl(storage_number value) {
+ if(!value)
+ return 0;
+
+ int sign = 0, exp = 0;
+ int factor = 10;
+
+ // bit 32 = 0:positive, 1:negative
+ if(unlikely(value & (1 << 31)))
+ sign = 1;
+
+ // bit 31 = 0:divide, 1:multiply
+ if(unlikely(value & (1 << 30)))
+ exp = 1;
+
+ // bit 27 SN_EXISTS_100
+ if(unlikely(value & (1 << 26)))
+ factor = 100;
+
+ // bit 26 SN_EXISTS_RESET
+ // bit 25 SN_ANOMALY_BIT
+
+ // bit 30, 29, 28 = (multiplier or divider) 0-7 (8 total)
+ int mul = (value & ((1<<29)|(1<<28)|(1<<27))) >> 27;
+
+ // bit 24 to bit 1 = the value, so remove all other bits
+ value ^= value & ((1<<31)|(1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<26)|(1<<25)|(1<<24));
+
+ CalculatedNumber CN = value;
+
+ if(exp) {
+ for(; mul; mul--)
+ CN *= factor;
+ }
+ else {
+ for( ; mul ; mul--)
+ CN /= 10;
+ }
+
+ if(sign)
+ CN = -CN;
+
+ return CN;
+}
+
+std::pair<CalculatedNumber *, size_t>
+TrainableDimension::getCalculatedNumbers() {
+ size_t MinN = Cfg.MinTrainSamples;
+ size_t MaxN = Cfg.MaxTrainSamples;
+
+ // Figure out what our time window should be.
+ time_t BeforeT = now_realtime_sec() - 1;
+ time_t AfterT = BeforeT - (MaxN * updateEvery());
+
+ BeforeT -= (BeforeT % updateEvery());
+ AfterT -= (AfterT % updateEvery());
+
+ BeforeT = std::min(BeforeT, latestTime());
+ AfterT = std::max(AfterT, oldestTime());
+
+ if (AfterT >= BeforeT)
+ return { nullptr, 0 };
+
+ CalculatedNumber *CNs = new CalculatedNumber[MaxN * (Cfg.LagN + 1)]();
+
+ // Start the query.
+ unsigned Idx = 0;
+ unsigned CollectedValues = 0;
+ unsigned TotalValues = 0;
+
+ CalculatedNumber LastValue = std::numeric_limits<CalculatedNumber>::quiet_NaN();
+ Query Q = Query(getRD());
+
+ Q.init(AfterT, BeforeT);
+ while (!Q.isFinished()) {
+ if (Idx == MaxN)
+ break;
+
+ auto P = Q.nextMetric();
+ storage_number SN = P.second;
+
+ if (does_storage_number_exist(SN)) {
+ CNs[Idx] = unpack_storage_number_dbl(SN);
+ LastValue = CNs[Idx];
+ CollectedValues++;
+ } else
+ CNs[Idx] = LastValue;
+
+ Idx++;
+ }
+ TotalValues = Idx;
+
+ if (CollectedValues < MinN) {
+ delete[] CNs;
+ return { nullptr, 0 };
+ }
+
+ // Find first non-NaN value.
+ for (Idx = 0; std::isnan(CNs[Idx]); Idx++, TotalValues--) { }
+
+ // Overwrite NaN values.
+ if (Idx != 0)
+ memmove(CNs, &CNs[Idx], sizeof(CalculatedNumber) * TotalValues);
+
+ return { CNs, TotalValues };
+}
+
+MLResult TrainableDimension::trainModel() {
+ auto P = getCalculatedNumbers();
+ CalculatedNumber *CNs = P.first;
+ unsigned N = P.second;
+
+ if (!CNs)
+ return MLResult::MissingData;
+
+ SamplesBuffer SB = SamplesBuffer(CNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN);
+ KM.train(SB, Cfg.MaxKMeansIters);
+ Trained = true;
+
+ delete[] CNs;
+ return MLResult::Success;
+}
+
+void PredictableDimension::addValue(CalculatedNumber Value, bool Exists) {
+ if (!Exists) {
+ CNs.clear();
+ return;
+ }
+
+ unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
+ if (CNs.size() < N) {
+ CNs.push_back(Value);
+ return;
+ }
+
+ std::rotate(std::begin(CNs), std::begin(CNs) + 1, std::end(CNs));
+ CNs[N - 1] = Value;
+}
+
+std::pair<MLResult, bool> PredictableDimension::predict() {
+ unsigned N = Cfg.DiffN + Cfg.SmoothN + Cfg.LagN;
+ if (CNs.size() != N)
+ return { MLResult::MissingData, AnomalyBit };
+
+ CalculatedNumber *TmpCNs = new CalculatedNumber[N * (Cfg.LagN + 1)]();
+ std::memcpy(TmpCNs, CNs.data(), N * sizeof(CalculatedNumber));
+
+ SamplesBuffer SB = SamplesBuffer(TmpCNs, N, 1, Cfg.DiffN, Cfg.SmoothN, Cfg.LagN);
+ AnomalyScore = computeAnomalyScore(SB);
+ delete[] TmpCNs;
+
+ if (AnomalyScore == std::numeric_limits<CalculatedNumber>::quiet_NaN())
+ return { MLResult::NaN, AnomalyBit };
+
+ AnomalyBit = AnomalyScore >= (100 * Cfg.DimensionAnomalyScoreThreshold);
+ return { MLResult::Success, AnomalyBit };
+}