summaryrefslogtreecommitdiffstats
path: root/ml/Dimension.h
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-02-06 16:11:34 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-02-06 16:11:34 +0000
commitd079b656b4719739b2247dcd9d46e9bec793095a (patch)
treed2c950c70a776bcf697c963151c5bd959f8a9f03 /ml/Dimension.h
parentReleasing debian version 1.37.1-2. (diff)
downloadnetdata-d079b656b4719739b2247dcd9d46e9bec793095a.tar.xz
netdata-d079b656b4719739b2247dcd9d46e9bec793095a.zip
Merging upstream version 1.38.0.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'ml/Dimension.h')
-rw-r--r--ml/Dimension.h178
1 files changed, 141 insertions, 37 deletions
diff --git a/ml/Dimension.h b/ml/Dimension.h
index 3ec56e098..2b1adfff9 100644
--- a/ml/Dimension.h
+++ b/ml/Dimension.h
@@ -3,6 +3,8 @@
#ifndef ML_DIMENSION_H
#define ML_DIMENSION_H
+#include "Mutex.h"
+#include "Stats.h"
#include "Query.h"
#include "Config.h"
@@ -10,12 +12,6 @@
namespace ml {
-enum class MLResult {
- Success = 0,
- MissingData,
- NaN,
-};
-
static inline std::string getMLDimensionID(RRDDIM *RD) {
RRDSET *RS = RD->rrdset;
@@ -24,16 +20,118 @@ static inline std::string getMLDimensionID(RRDDIM *RD) {
return SS.str();
}
+enum class MachineLearningStatus {
+ // Enable training/prediction
+ Enabled,
+
+ // Disable due to update every being different from the host's
+ DisabledDueToUniqueUpdateEvery,
+
+ // Disable because configuration pattern matches the chart's id
+ DisabledDueToExcludedChart,
+};
+
+enum class TrainingStatus {
+ // We don't have a model for this dimension
+ Untrained,
+
+ // Request for training sent, but we don't have any models yet
+ PendingWithoutModel,
+
+ // Request to update existing models sent
+ PendingWithModel,
+
+ // Have a valid, up-to-date model
+ Trained,
+};
+
+enum class MetricType {
+ // The dimension has constant values, no need to train
+ Constant,
+
+ // The dimension's values fluctuate, we need to generate a model
+ Variable,
+};
+
+struct TrainingRequest {
+ // Chart/dimension we want to train
+ STRING *ChartId;
+ STRING *DimensionId;
+
+ // Creation time of request
+ time_t RequestTime;
+
+ // First/last entry of this dimension in DB
+ // at the point the request was made
+ time_t FirstEntryOnRequest;
+ time_t LastEntryOnRequest;
+};
+
+void dumpTrainingRequest(const TrainingRequest &TrainingReq, const char *Prefix);
+
+enum TrainingResult {
+ // We managed to create a KMeans model
+ Ok,
+ // Could not query DB with a correct time range
+ InvalidQueryTimeRange,
+ // Did not gather enough data from DB to run KMeans
+ NotEnoughCollectedValues,
+ // Acquired a null dimension
+ NullAcquiredDimension,
+ // Chart is under replication
+ ChartUnderReplication,
+};
+
+struct TrainingResponse {
+ // Time when the request for this response was made
+ time_t RequestTime;
+
+ // First/last entry of the dimension in DB when generating the request
+ time_t FirstEntryOnRequest;
+ time_t LastEntryOnRequest;
+
+ // First/last entry of the dimension in DB when generating the response
+ time_t FirstEntryOnResponse;
+ time_t LastEntryOnResponse;
+
+ // After/Before timestamps of our DB query
+ time_t QueryAfterT;
+ time_t QueryBeforeT;
+
+ // Actual after/before returned by the DB query ops
+ time_t DbAfterT;
+ time_t DbBeforeT;
+
+ // Number of doubles returned by the DB query
+ size_t CollectedValues;
+
+ // Number of values we return to the caller
+ size_t TotalValues;
+
+ // Result of training response
+ TrainingResult Result;
+};
+
+void dumpTrainingResponse(const TrainingResponse &TrainingResp, const char *Prefix);
+
class Dimension {
public:
Dimension(RRDDIM *RD) :
RD(RD),
- LastTrainedAt(Seconds(0)),
- Trained(false),
- ConstantModel(false),
- AnomalyScore(0.0),
- AnomalyBit(0)
- { }
+ MT(MetricType::Constant),
+ TS(TrainingStatus::Untrained),
+ TR(),
+ LastTrainingTime(0)
+ {
+ if (simple_pattern_matches(Cfg.SP_ChartsToSkip, rrdset_name(RD->rrdset)))
+ MLS = MachineLearningStatus::DisabledDueToExcludedChart;
+ else if (RD->update_every != RD->rrdset->rrdhost->rrd_update_every)
+ MLS = MachineLearningStatus::DisabledDueToUniqueUpdateEvery;
+ else
+ MLS = MachineLearningStatus::Enabled;
+
+ Models.reserve(Cfg.NumModelsToUse);
+ }
RRDDIM *getRD() const {
return RD;
@@ -43,50 +141,56 @@ public:
return RD->update_every;
}
- time_t latestTime() const {
- return Query(RD).latestTime();
- }
-
- time_t oldestTime() const {
- return Query(RD).oldestTime();
+ MetricType getMT() const {
+ return MT;
}
- bool isTrained() const {
- return Trained;
+ TrainingStatus getTS() const {
+ return TS;
}
- bool isAnomalous() const {
- return AnomalyBit;
+ MachineLearningStatus getMLS() const {
+ return MLS;
}
- bool shouldTrain(const TimePoint &TP) const;
+ TrainingResult trainModel(const TrainingRequest &TR);
- bool isActive() const;
+ void scheduleForTraining(time_t CurrT);
- MLResult trainModel();
+ bool predict(time_t CurrT, CalculatedNumber Value, bool Exists);
- bool predict(CalculatedNumber Value, bool Exists);
+ std::vector<KMeans> getModels();
+
+ void dump() const;
- std::pair<bool, double> detect(size_t WindowLength, bool Reset);
-
- std::array<KMeans, 1> getModels();
+private:
+ TrainingRequest getTrainingRequest(time_t CurrT) const {
+ return TrainingRequest {
+ string_dup(RD->rrdset->id),
+ string_dup(RD->id),
+ CurrT,
+ rrddim_first_entry_s(RD),
+ rrddim_last_entry_s(RD)
+ };
+ }
private:
- std::pair<CalculatedNumber *, size_t> getCalculatedNumbers();
+ std::pair<CalculatedNumber *, TrainingResponse> getCalculatedNumbers(const TrainingRequest &TrainingReq);
public:
RRDDIM *RD;
+ MetricType MT;
+ TrainingStatus TS;
+ TrainingResponse TR;
- TimePoint LastTrainedAt;
- std::atomic<bool> Trained;
- std::atomic<bool> ConstantModel;
+ time_t LastTrainingTime;
- CalculatedNumber AnomalyScore;
- std::atomic<bool> AnomalyBit;
+ MachineLearningStatus MLS;
std::vector<CalculatedNumber> CNs;
- std::array<KMeans, 1> Models;
- std::mutex Mutex;
+ DSample Feature;
+ std::vector<KMeans> Models;
+ Mutex M;
};
} // namespace ml