summaryrefslogtreecommitdiffstats
path: root/ml/SamplesBuffer.cc
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-05-08 16:27:04 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-05-08 16:27:04 +0000
commita836a244a3d2bdd4da1ee2641e3e957850668cea (patch)
treecb87c75b3677fab7144f868435243f864048a1e6 /ml/SamplesBuffer.cc
parentAdding upstream version 1.38.1. (diff)
downloadnetdata-a836a244a3d2bdd4da1ee2641e3e957850668cea.tar.xz
netdata-a836a244a3d2bdd4da1ee2641e3e957850668cea.zip
Adding upstream version 1.39.0.upstream/1.39.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/SamplesBuffer.cc')
-rw-r--r--ml/SamplesBuffer.cc183
1 files changed, 0 insertions, 183 deletions
diff --git a/ml/SamplesBuffer.cc b/ml/SamplesBuffer.cc
deleted file mode 100644
index 359b60c2..00000000
--- a/ml/SamplesBuffer.cc
+++ /dev/null
@@ -1,183 +0,0 @@
-// SPDX-License-Identifier: GPL-3.0-or-later
-//
-#include "SamplesBuffer.h"
-
-#include <fstream>
-#include <sstream>
-#include <string>
-
-void Sample::print(std::ostream &OS) const {
- for (size_t Idx = 0; Idx != NumDims - 1; Idx++)
- OS << CNs[Idx] << ", ";
-
- OS << CNs[NumDims - 1];
-}
-
-void SamplesBuffer::print(std::ostream &OS) const {
- for (size_t Idx = Preprocessed ? (DiffN + (SmoothN - 1) + (LagN)) : 0;
- Idx != NumSamples; Idx++) {
- Sample S = Preprocessed ? getPreprocessedSample(Idx) : getSample(Idx);
- OS << S << std::endl;
- }
-}
-
-std::vector<Sample> SamplesBuffer::getPreprocessedSamples() const {
- std::vector<Sample> V;
-
- for (size_t Idx = Preprocessed ? (DiffN + (SmoothN - 1) + (LagN)) : 0;
- Idx != NumSamples; Idx++) {
- Sample S = Preprocessed ? getPreprocessedSample(Idx) : getSample(Idx);
- V.push_back(S);
- }
-
- return V;
-}
-
-void SamplesBuffer::diffSamples() {
- // Panda's DataFrame default behaviour is to subtract each element from
- // itself. For us `DiffN = 0` means "disable diff-ing" when preprocessing
- // the samples buffer. This deviation will make it easier for us to test
- // the KMeans implementation.
- if (DiffN == 0)
- return;
-
- for (size_t Idx = 0; Idx != (NumSamples - DiffN); Idx++) {
- size_t High = (NumSamples - 1) - Idx;
- size_t Low = High - DiffN;
-
- Sample LHS = getSample(High);
- Sample RHS = getSample(Low);
-
- LHS.diff(RHS);
- }
-}
-
-void SamplesBuffer::smoothSamples() {
- // Holds the mean value of each window
- CalculatedNumber AccCNs[1] = { 0 };
- Sample Acc(AccCNs, 1);
-
- // Used to avoid clobbering the accumulator when moving the window
- CalculatedNumber TmpCNs[1] = { 0 };
- Sample Tmp(TmpCNs, 1);
-
- CalculatedNumber Factor = (CalculatedNumber) 1 / SmoothN;
-
- // Calculate the value of the 1st window
- for (size_t Idx = 0; Idx != std::min(SmoothN, NumSamples); Idx++) {
- Tmp.add(getSample(NumSamples - (Idx + 1)));
- }
-
- Acc.add(Tmp);
- Acc.scale(Factor);
-
- // Move the window and update the samples
- for (size_t Idx = NumSamples; Idx != (DiffN + SmoothN - 1); Idx--) {
- Sample S = getSample(Idx - 1);
-
- // Tmp <- Next window (if any)
- if (Idx >= (SmoothN + 1)) {
- Tmp.diff(S);
- Tmp.add(getSample(Idx - (SmoothN + 1)));
- }
-
- // S <- Acc
- S.copy(Acc);
-
- // Acc <- Tmp
- Acc.copy(Tmp);
- Acc.scale(Factor);
- }
-}
-
-void SamplesBuffer::lagSamples() {
- if (LagN == 0)
- return;
-
- for (size_t Idx = NumSamples; Idx != LagN; Idx--) {
- Sample PS = getPreprocessedSample(Idx - 1);
- PS.lag(getSample(Idx - 1), LagN);
- }
-}
-
-void SamplesBuffer::preprocess(std::vector<DSample> &Samples) {
- assert(Preprocessed == false);
-
- size_t OutN = NumSamples;
-
- // Diff
- if (DiffN >= OutN)
- return;
- OutN -= DiffN;
- diffSamples();
-
- // Smooth
- if (SmoothN == 0 || SmoothN > OutN)
- return;
- OutN -= (SmoothN - 1);
- smoothSamples();
-
- // Lag
- if (LagN >= OutN)
- return;
- OutN -= LagN;
- lagSamples();
-
- Samples.reserve(OutN);
- Preprocessed = true;
-
- uint32_t MaxMT = std::numeric_limits<uint32_t>::max();
- uint32_t CutOff = static_cast<double>(MaxMT) * SamplingRatio;
-
- for (size_t Idx = NumSamples - OutN; Idx != NumSamples; Idx++) {
- if (RandNums[Idx] > CutOff)
- continue;
-
- DSample DS;
- DS.set_size(NumDimsPerSample * (LagN + 1));
-
- const Sample PS = getPreprocessedSample(Idx);
- PS.initDSample(DS);
-
- Samples.push_back(std::move(DS));
- }
-}
-
-void SamplesBuffer::preprocess(DSample &Feature) {
- assert(Preprocessed == false);
-
- size_t OutN = NumSamples;
-
- // Diff
- if (DiffN >= OutN)
- return;
- OutN -= DiffN;
- diffSamples();
-
- // Smooth
- if (SmoothN == 0 || SmoothN > OutN)
- return;
- OutN -= (SmoothN - 1);
- smoothSamples();
-
- // Lag
- if (LagN >= OutN)
- return;
- OutN -= LagN;
- lagSamples();
-
- Preprocessed = true;
-
- uint32_t MaxMT = std::numeric_limits<uint32_t>::max();
- uint32_t CutOff = static_cast<double>(MaxMT) * SamplingRatio;
-
- for (size_t Idx = NumSamples - OutN; Idx != NumSamples; Idx++) {
- if (RandNums[Idx] > CutOff)
- continue;
-
- Feature.set_size(NumDimsPerSample * (LagN + 1));
-
- const Sample PS = getPreprocessedSample(Idx);
- PS.initDSample(Feature);
- }
-}