1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
|
// SPDX-License-Identifier: GPL-3.0-or-later
#ifndef ML_DIMENSION_H
#define ML_DIMENSION_H
#include "Mutex.h"
#include "Stats.h"
#include "Query.h"
#include "Config.h"
#include "ml-private.h"
namespace ml {
static inline std::string getMLDimensionID(RRDDIM *RD) {
RRDSET *RS = RD->rrdset;
std::stringstream SS;
SS << rrdset_context(RS) << "|" << rrdset_id(RS) << "|" << rrddim_name(RD);
return SS.str();
}
enum class MachineLearningStatus {
// Enable training/prediction
Enabled,
// Disable due to update every being different from the host's
DisabledDueToUniqueUpdateEvery,
// Disable because configuration pattern matches the chart's id
DisabledDueToExcludedChart,
};
enum class TrainingStatus {
// We don't have a model for this dimension
Untrained,
// Request for training sent, but we don't have any models yet
PendingWithoutModel,
// Request to update existing models sent
PendingWithModel,
// Have a valid, up-to-date model
Trained,
};
enum class MetricType {
// The dimension has constant values, no need to train
Constant,
// The dimension's values fluctuate, we need to generate a model
Variable,
};
struct TrainingRequest {
// Chart/dimension we want to train
STRING *ChartId;
STRING *DimensionId;
// Creation time of request
time_t RequestTime;
// First/last entry of this dimension in DB
// at the point the request was made
time_t FirstEntryOnRequest;
time_t LastEntryOnRequest;
};
void dumpTrainingRequest(const TrainingRequest &TrainingReq, const char *Prefix);
enum TrainingResult {
// We managed to create a KMeans model
Ok,
// Could not query DB with a correct time range
InvalidQueryTimeRange,
// Did not gather enough data from DB to run KMeans
NotEnoughCollectedValues,
// Acquired a null dimension
NullAcquiredDimension,
// Chart is under replication
ChartUnderReplication,
};
struct TrainingResponse {
// Time when the request for this response was made
time_t RequestTime;
// First/last entry of the dimension in DB when generating the request
time_t FirstEntryOnRequest;
time_t LastEntryOnRequest;
// First/last entry of the dimension in DB when generating the response
time_t FirstEntryOnResponse;
time_t LastEntryOnResponse;
// After/Before timestamps of our DB query
time_t QueryAfterT;
time_t QueryBeforeT;
// Actual after/before returned by the DB query ops
time_t DbAfterT;
time_t DbBeforeT;
// Number of doubles returned by the DB query
size_t CollectedValues;
// Number of values we return to the caller
size_t TotalValues;
// Result of training response
TrainingResult Result;
};
void dumpTrainingResponse(const TrainingResponse &TrainingResp, const char *Prefix);
class Dimension {
public:
Dimension(RRDDIM *RD) :
RD(RD),
MT(MetricType::Constant),
TS(TrainingStatus::Untrained),
TR(),
LastTrainingTime(0)
{
if (simple_pattern_matches(Cfg.SP_ChartsToSkip, rrdset_name(RD->rrdset)))
MLS = MachineLearningStatus::DisabledDueToExcludedChart;
else if (RD->update_every != RD->rrdset->rrdhost->rrd_update_every)
MLS = MachineLearningStatus::DisabledDueToUniqueUpdateEvery;
else
MLS = MachineLearningStatus::Enabled;
Models.reserve(Cfg.NumModelsToUse);
}
RRDDIM *getRD() const {
return RD;
}
unsigned updateEvery() const {
return RD->update_every;
}
MetricType getMT() const {
return MT;
}
TrainingStatus getTS() const {
return TS;
}
MachineLearningStatus getMLS() const {
return MLS;
}
TrainingResult trainModel(const TrainingRequest &TR);
void scheduleForTraining(time_t CurrT);
bool predict(time_t CurrT, CalculatedNumber Value, bool Exists);
std::vector<KMeans> getModels();
void dump() const;
private:
TrainingRequest getTrainingRequest(time_t CurrT) const {
return TrainingRequest {
string_dup(RD->rrdset->id),
string_dup(RD->id),
CurrT,
rrddim_first_entry_s(RD),
rrddim_last_entry_s(RD)
};
}
private:
std::pair<CalculatedNumber *, TrainingResponse> getCalculatedNumbers(const TrainingRequest &TrainingReq);
public:
RRDDIM *RD;
MetricType MT;
TrainingStatus TS;
TrainingResponse TR;
time_t LastTrainingTime;
MachineLearningStatus MLS;
std::vector<CalculatedNumber> CNs;
DSample Feature;
std::vector<KMeans> Models;
Mutex M;
};
} // namespace ml
#endif /* ML_DIMENSION_H */
|