1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
# -*- coding: utf-8 -*-
# Description: zscores netdata python.d module
# Author: andrewm4894
# SPDX-License-Identifier: GPL-3.0-or-later
from datetime import datetime
import re
import requests
import numpy as np
import pandas as pd
from bases.FrameworkServices.SimpleService import SimpleService
from netdata_pandas.data import get_data, get_allmetrics
priority = 60000
update_every = 5
disabled_by_default = True
ORDER = [
'z',
'3stddev'
]
CHARTS = {
'z': {
'options': ['z', 'Z Score', 'z', 'Z Score', 'zscores.z', 'line'],
'lines': []
},
'3stddev': {
'options': ['3stddev', 'Z Score >3', 'count', '3 Stddev', 'zscores.3stddev', 'stacked'],
'lines': []
},
}
class Service(SimpleService):
def __init__(self, configuration=None, name=None):
SimpleService.__init__(self, configuration=configuration, name=name)
self.host = self.configuration.get('host', '127.0.0.1:19999')
self.charts_regex = re.compile(self.configuration.get('charts_regex', 'system.*'))
self.charts_to_exclude = self.configuration.get('charts_to_exclude', '').split(',')
self.charts_in_scope = [
c for c in
list(filter(self.charts_regex.match,
requests.get(f'http://{self.host}/api/v1/charts').json()['charts'].keys()))
if c not in self.charts_to_exclude
]
self.train_secs = self.configuration.get('train_secs', 14400)
self.offset_secs = self.configuration.get('offset_secs', 300)
self.train_every_n = self.configuration.get('train_every_n', 900)
self.z_smooth_n = self.configuration.get('z_smooth_n', 15)
self.z_clip = self.configuration.get('z_clip', 10)
self.z_abs = bool(self.configuration.get('z_abs', True))
self.burn_in = self.configuration.get('burn_in', 2)
self.mode = self.configuration.get('mode', 'per_chart')
self.per_chart_agg = self.configuration.get('per_chart_agg', 'mean')
self.order = ORDER
self.definitions = CHARTS
self.collected_dims = {'z': set(), '3stddev': set()}
self.df_mean = pd.DataFrame()
self.df_std = pd.DataFrame()
self.df_z_history = pd.DataFrame()
def check(self):
_ = get_allmetrics(self.host, self.charts_in_scope, wide=True, col_sep='.')
return True
def validate_charts(self, chart, data, algorithm='absolute', multiplier=1, divisor=1):
"""If dimension not in chart then add it.
"""
for dim in data:
if dim not in self.collected_dims[chart]:
self.collected_dims[chart].add(dim)
self.charts[chart].add_dimension([dim, dim, algorithm, multiplier, divisor])
for dim in list(self.collected_dims[chart]):
if dim not in data:
self.collected_dims[chart].remove(dim)
self.charts[chart].del_dimension(dim, hide=False)
def train_model(self):
"""Calculate the mean and stddev for all relevant metrics and store them for use in calulcating zscore at each timestep.
"""
before = int(datetime.now().timestamp()) - self.offset_secs
after = before - self.train_secs
self.df_mean = get_data(
self.host, self.charts_in_scope, after, before, points=10, group='average', col_sep='.'
).mean().to_frame().rename(columns={0: "mean"})
self.df_std = get_data(
self.host, self.charts_in_scope, after, before, points=10, group='stddev', col_sep='.'
).mean().to_frame().rename(columns={0: "std"})
def create_data(self, df_allmetrics):
"""Use x, mean, stddev to generate z scores and 3stddev flags via some pandas manipulation.
Returning two dictionaries of dimensions and measures, one for each chart.
:param df_allmetrics <pd.DataFrame>: pandas dataframe with latest data from api/v1/allmetrics.
:return: (<dict>,<dict>) tuple of dictionaries, one for zscores and the other for a flag if abs(z)>3.
"""
# calculate clipped z score for each available metric
df_z = pd.concat([self.df_mean, self.df_std, df_allmetrics], axis=1, join='inner')
df_z['z'] = ((df_z['value'] - df_z['mean']) / df_z['std']).clip(-self.z_clip, self.z_clip).fillna(0) * 100
if self.z_abs:
df_z['z'] = df_z['z'].abs()
# append last z_smooth_n rows of zscores to history table in wide format
self.df_z_history = self.df_z_history.append(
df_z[['z']].reset_index().pivot_table(values='z', columns='index'), sort=True
).tail(self.z_smooth_n)
# get average zscore for last z_smooth_n for each metric
df_z_smooth = self.df_z_history.melt(value_name='z').groupby('index')['z'].mean().to_frame()
df_z_smooth['3stddev'] = np.where(abs(df_z_smooth['z']) > 300, 1, 0)
data_z = df_z_smooth['z'].add_suffix('_z').to_dict()
# aggregate to chart level if specified
if self.mode == 'per_chart':
df_z_smooth['chart'] = ['.'.join(x[0:2]) + '_z' for x in df_z_smooth.index.str.split('.').to_list()]
if self.per_chart_agg == 'absmax':
data_z = \
list(df_z_smooth.groupby('chart').agg({'z': lambda x: max(x, key=abs)})['z'].to_dict().values())[0]
else:
data_z = list(df_z_smooth.groupby('chart').agg({'z': [self.per_chart_agg]})['z'].to_dict().values())[0]
data_3stddev = {}
for k in data_z:
data_3stddev[k.replace('_z', '')] = 1 if abs(data_z[k]) > 300 else 0
return data_z, data_3stddev
def get_data(self):
if self.runs_counter <= self.burn_in or self.runs_counter % self.train_every_n == 0:
self.train_model()
data_z, data_3stddev = self.create_data(
get_allmetrics(self.host, self.charts_in_scope, wide=True, col_sep='.').transpose())
data = {**data_z, **data_3stddev}
self.validate_charts('z', data_z, divisor=100)
self.validate_charts('3stddev', data_3stddev)
return data
|