diff options
Diffstat (limited to '')
-rw-r--r-- | ml/Dimension.h | 165 |
1 files changed, 50 insertions, 115 deletions
diff --git a/ml/Dimension.h b/ml/Dimension.h index 4fbc09b98..3ec56e098 100644 --- a/ml/Dimension.h +++ b/ml/Dimension.h @@ -3,157 +3,92 @@ #ifndef ML_DIMENSION_H #define ML_DIMENSION_H -#include "BitBufferCounter.h" +#include "Query.h" #include "Config.h" #include "ml-private.h" namespace ml { -class RrdDimension { -public: - RrdDimension(RRDDIM *RD) : RD(RD), Ops(&RD->tiers[0]->query_ops) { } - - RRDDIM *getRD() const { return RD; } - - time_t latestTime() { return Ops->latest_time(RD->tiers[0]->db_metric_handle); } - - time_t oldestTime() { return Ops->oldest_time(RD->tiers[0]->db_metric_handle); } - - unsigned updateEvery() const { return RD->update_every; } - - const std::string getID() const { - RRDSET *RS = RD->rrdset; - - std::stringstream SS; - SS << RS->context << "|" << RS->id << "|" << RD->name; - return SS.str(); - } - - bool isActive() const { - if (rrdset_flag_check(RD->rrdset, RRDSET_FLAG_OBSOLETE)) - return false; - - if (rrddim_flag_check(RD, RRDDIM_FLAG_OBSOLETE)) - return false; - - return true; - } - - void setAnomalyRateRD(RRDDIM *ARRD) { AnomalyRateRD = ARRD; } - RRDDIM *getAnomalyRateRD() const { return AnomalyRateRD; } - - void setAnomalyRateRDName(const char *Name) const { - rrddim_set_name(AnomalyRateRD->rrdset, AnomalyRateRD, Name); - } - - virtual ~RrdDimension() { - rrddim_free(AnomalyRateRD->rrdset, AnomalyRateRD); - } - -private: - RRDDIM *RD; - RRDDIM *AnomalyRateRD; - - struct rrddim_query_ops *Ops; - - std::string ID; -}; - enum class MLResult { Success = 0, MissingData, NaN, }; -class TrainableDimension : public RrdDimension { -public: - TrainableDimension(RRDDIM *RD) : - RrdDimension(RD), TrainEvery(Cfg.TrainEvery * updateEvery()) {} +static inline std::string getMLDimensionID(RRDDIM *RD) { + RRDSET *RS = RD->rrdset; - MLResult trainModel(); + std::stringstream SS; + SS << rrdset_context(RS) << "|" << rrdset_id(RS) << "|" << rrddim_name(RD); + return SS.str(); +} - CalculatedNumber computeAnomalyScore(SamplesBuffer &SB) { - return Trained ? KM.anomalyScore(SB) : 0.0; +class Dimension { +public: + Dimension(RRDDIM *RD) : + RD(RD), + LastTrainedAt(Seconds(0)), + Trained(false), + ConstantModel(false), + AnomalyScore(0.0), + AnomalyBit(0) + { } + + RRDDIM *getRD() const { + return RD; } - bool shouldTrain(const TimePoint &TP) const { - if (ConstantModel) - return false; - - return (LastTrainedAt + TrainEvery) < TP; + unsigned updateEvery() const { + return RD->update_every; } - bool isTrained() const { return Trained; } - -private: - std::pair<CalculatedNumber *, size_t> getCalculatedNumbers(); - -public: - TimePoint LastTrainedAt{Seconds{0}}; + time_t latestTime() const { + return Query(RD).latestTime(); + } -protected: - std::atomic<bool> ConstantModel{false}; + time_t oldestTime() const { + return Query(RD).oldestTime(); + } -private: - Seconds TrainEvery; - KMeans KM; + bool isTrained() const { + return Trained; + } - std::atomic<bool> Trained{false}; -}; + bool isAnomalous() const { + return AnomalyBit; + } -class PredictableDimension : public TrainableDimension { -public: - PredictableDimension(RRDDIM *RD) : TrainableDimension(RD) {} + bool shouldTrain(const TimePoint &TP) const; - std::pair<MLResult, bool> predict(); + bool isActive() const; - void addValue(CalculatedNumber Value, bool Exists); + MLResult trainModel(); - bool isAnomalous() { return AnomalyBit; } + bool predict(CalculatedNumber Value, bool Exists); - void updateAnomalyBitCounter(RRDSET *RS, unsigned Elapsed, bool IsAnomalous) { - AnomalyBitCounter += IsAnomalous; + std::pair<bool, double> detect(size_t WindowLength, bool Reset); - if (Elapsed == Cfg.DBEngineAnomalyRateEvery) { - double AR = static_cast<double>(AnomalyBitCounter) / Cfg.DBEngineAnomalyRateEvery; - rrddim_set_by_pointer(RS, getAnomalyRateRD(), AR * 1000); - AnomalyBitCounter = 0; - } - } + std::array<KMeans, 1> getModels(); private: - CalculatedNumber AnomalyScore{0.0}; - std::atomic<bool> AnomalyBit{false}; - unsigned AnomalyBitCounter{0}; - - std::vector<CalculatedNumber> CNs; -}; + std::pair<CalculatedNumber *, size_t> getCalculatedNumbers(); -class DetectableDimension : public PredictableDimension { public: - DetectableDimension(RRDDIM *RD) : PredictableDimension(RD) {} - - std::pair<bool, double> detect(size_t WindowLength, bool Reset) { - bool AnomalyBit = isAnomalous(); - - if (Reset) - NumSetBits = BBC.numSetBits(); + RRDDIM *RD; - NumSetBits += AnomalyBit; - BBC.insert(AnomalyBit); + TimePoint LastTrainedAt; + std::atomic<bool> Trained; + std::atomic<bool> ConstantModel; - double AnomalyRate = static_cast<double>(NumSetBits) / WindowLength; - return { AnomalyBit, AnomalyRate }; - } + CalculatedNumber AnomalyScore; + std::atomic<bool> AnomalyBit; -private: - BitBufferCounter BBC{static_cast<size_t>(Cfg.ADMinWindowSize)}; - size_t NumSetBits{0}; + std::vector<CalculatedNumber> CNs; + std::array<KMeans, 1> Models; + std::mutex Mutex; }; -using Dimension = DetectableDimension; - } // namespace ml #endif /* ML_DIMENSION_H */ |