summaryrefslogtreecommitdiffstats
path: root/ml/kmeans
diff options
context:
space:
mode:
Diffstat (limited to 'ml/kmeans')
-rw-r--r--ml/kmeans/SamplesBuffer.cc6
-rw-r--r--ml/kmeans/SamplesBuffer.h7
2 files changed, 12 insertions, 1 deletions
diff --git a/ml/kmeans/SamplesBuffer.cc b/ml/kmeans/SamplesBuffer.cc
index f8211fb54..d276c6e09 100644
--- a/ml/kmeans/SamplesBuffer.cc
+++ b/ml/kmeans/SamplesBuffer.cc
@@ -130,7 +130,13 @@ std::vector<DSample> SamplesBuffer::preprocess() {
DSamples.reserve(OutN);
Preprocessed = true;
+ uint32_t MaxMT = std::numeric_limits<uint32_t>::max();
+ uint32_t CutOff = static_cast<double>(MaxMT) * SamplingRatio;
+
for (size_t Idx = NumSamples - OutN; Idx != NumSamples; Idx++) {
+ if (RandNums[Idx] > CutOff)
+ continue;
+
DSample DS;
DS.set_size(NumDimsPerSample * (LagN + 1));
diff --git a/ml/kmeans/SamplesBuffer.h b/ml/kmeans/SamplesBuffer.h
index 5c3cb1a97..1c7215cca 100644
--- a/ml/kmeans/SamplesBuffer.h
+++ b/ml/kmeans/SamplesBuffer.h
@@ -80,9 +80,11 @@ class SamplesBuffer {
public:
SamplesBuffer(CalculatedNumber *CNs,
size_t NumSamples, size_t NumDimsPerSample,
- size_t DiffN = 1, size_t SmoothN = 3, size_t LagN = 3) :
+ size_t DiffN, size_t SmoothN, size_t LagN,
+ double SamplingRatio, std::vector<uint32_t> &RandNums) :
CNs(CNs), NumSamples(NumSamples), NumDimsPerSample(NumDimsPerSample),
DiffN(DiffN), SmoothN(SmoothN), LagN(LagN),
+ SamplingRatio(SamplingRatio), RandNums(RandNums),
BytesPerSample(NumDimsPerSample * sizeof(CalculatedNumber)),
Preprocessed(false) {};
@@ -129,6 +131,9 @@ private:
size_t DiffN;
size_t SmoothN;
size_t LagN;
+ double SamplingRatio;
+ std::vector<uint32_t> &RandNums;
+
size_t BytesPerSample;
bool Preprocessed;
};