summaryrefslogtreecommitdiffstats
path: root/src/rrdcalc.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/rrdcalc.c')
-rw-r--r--src/rrdcalc.c415
1 files changed, 415 insertions, 0 deletions
diff --git a/src/rrdcalc.c b/src/rrdcalc.c
new file mode 100644
index 00000000..1f184540
--- /dev/null
+++ b/src/rrdcalc.c
@@ -0,0 +1,415 @@
+#define NETDATA_HEALTH_INTERNALS
+#include "common.h"
+
+// ----------------------------------------------------------------------------
+// RRDCALC management
+
+inline const char *rrdcalc_status2string(int status) {
+ switch(status) {
+ case RRDCALC_STATUS_REMOVED:
+ return "REMOVED";
+
+ case RRDCALC_STATUS_UNDEFINED:
+ return "UNDEFINED";
+
+ case RRDCALC_STATUS_UNINITIALIZED:
+ return "UNINITIALIZED";
+
+ case RRDCALC_STATUS_CLEAR:
+ return "CLEAR";
+
+ case RRDCALC_STATUS_RAISED:
+ return "RAISED";
+
+ case RRDCALC_STATUS_WARNING:
+ return "WARNING";
+
+ case RRDCALC_STATUS_CRITICAL:
+ return "CRITICAL";
+
+ default:
+ error("Unknown alarm status %d", status);
+ return "UNKNOWN";
+ }
+}
+
+static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) {
+ debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, st->rrdhost->hostname);
+
+ rc->last_status_change = now_realtime_sec();
+ rc->rrdset = st;
+
+ rc->rrdset_next = st->alarms;
+ rc->rrdset_prev = NULL;
+
+ if(rc->rrdset_next)
+ rc->rrdset_next->rrdset_prev = rc;
+
+ st->alarms = rc;
+
+ if(rc->update_every < rc->rrdset->update_every) {
+ error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rc->rrdset->id, rc->name, rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every);
+ rc->update_every = rc->rrdset->update_every;
+ }
+
+ if(!isnan(rc->green) && isnan(st->green)) {
+ debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from %Lf to %Lf.", rc->rrdset->id, rc->name, rc->rrdset->green, rc->green);
+ st->green = rc->green;
+ }
+
+ if(!isnan(rc->red) && isnan(st->red)) {
+ debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from %Lf to %Lf.", rc->rrdset->id, rc->name, rc->rrdset->red, rc->red);
+ st->red = rc->red;
+ }
+
+ rc->local = rrdvar_create_and_index("local", &st->variables_root_index, rc->name, RRDVAR_TYPE_CALCULATED, &rc->value);
+ rc->family = rrdvar_create_and_index("family", &st->rrdfamily->variables_root_index, rc->name, RRDVAR_TYPE_CALCULATED, &rc->value);
+
+ char fullname[RRDVAR_MAX_LENGTH + 1];
+ snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rc->name);
+ rc->hostid = rrdvar_create_and_index("host", &st->rrdhost->variables_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value);
+
+ snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rc->name);
+ rc->hostname = rrdvar_create_and_index("host", &st->rrdhost->variables_root_index, fullname, RRDVAR_TYPE_CALCULATED, &rc->value);
+
+ if(!rc->units) rc->units = strdupz(st->units);
+
+ {
+ time_t now = now_realtime_sec();
+ health_alarm_log(
+ st->rrdhost,
+ rc->id,
+ rc->next_event_id++,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ RRDCALC_STATUS_UNINITIALIZED,
+ rc->source,
+ rc->units,
+ rc->info,
+ 0,
+ 0
+ );
+ }
+}
+
+static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) {
+ if( (rc->hash_chart == st->hash && !strcmp(rc->chart, st->id)) ||
+ (rc->hash_chart == st->hash_name && !strcmp(rc->chart, st->name)))
+ return 1;
+
+ return 0;
+}
+
+// this has to be called while the RRDHOST is locked
+inline void rrdsetcalc_link_matching(RRDSET *st) {
+ // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id);
+
+ RRDCALC *rc;
+ for(rc = st->rrdhost->alarms; rc ; rc = rc->next) {
+ if(unlikely(rc->rrdset))
+ continue;
+
+ if(unlikely(rrdcalc_is_matching_this_rrdset(rc, st)))
+ rrdsetcalc_link(st, rc);
+ }
+}
+
+// this has to be called while the RRDHOST is locked
+inline void rrdsetcalc_unlink(RRDCALC *rc) {
+ RRDSET *st = rc->rrdset;
+
+ if(!st) {
+ debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name);
+ error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name);
+ return;
+ }
+
+ {
+ time_t now = now_realtime_sec();
+ health_alarm_log(
+ st->rrdhost,
+ rc->id,
+ rc->next_event_id++,
+ now,
+ rc->name,
+ rc->rrdset->id,
+ rc->rrdset->family,
+ rc->exec,
+ rc->recipient,
+ now - rc->last_status_change,
+ rc->old_value,
+ rc->value,
+ rc->status,
+ RRDCALC_STATUS_REMOVED,
+ rc->source,
+ rc->units,
+ rc->info,
+ 0,
+ 0
+ );
+ }
+
+ RRDHOST *host = st->rrdhost;
+
+ debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname);
+
+ // unlink it
+ if(rc->rrdset_prev)
+ rc->rrdset_prev->rrdset_next = rc->rrdset_next;
+
+ if(rc->rrdset_next)
+ rc->rrdset_next->rrdset_prev = rc->rrdset_prev;
+
+ if(st->alarms == rc)
+ st->alarms = rc->rrdset_next;
+
+ rc->rrdset_prev = rc->rrdset_next = NULL;
+
+ rrdvar_free(st->rrdhost, &st->variables_root_index, rc->local);
+ rc->local = NULL;
+
+ rrdvar_free(st->rrdhost, &st->rrdfamily->variables_root_index, rc->family);
+ rc->family = NULL;
+
+ rrdvar_free(st->rrdhost, &st->rrdhost->variables_root_index, rc->hostid);
+ rc->hostid = NULL;
+
+ rrdvar_free(st->rrdhost, &st->rrdhost->variables_root_index, rc->hostname);
+ rc->hostname = NULL;
+
+ rc->rrdset = NULL;
+
+ // RRDCALC will remain in RRDHOST
+ // so that if the matching chart is found in the future
+ // it will be applied automatically
+}
+
+RRDCALC *rrdcalc_find(RRDSET *st, const char *name) {
+ RRDCALC *rc;
+ uint32_t hash = simple_hash(name);
+
+ for( rc = st->alarms; rc ; rc = rc->rrdset_next ) {
+ if(unlikely(rc->hash == hash && !strcmp(rc->name, name)))
+ return rc;
+ }
+
+ return NULL;
+}
+
+inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) {
+ RRDCALC *rc;
+
+ if(unlikely(!chart)) {
+ error("attempt to find RRDCALC '%s' without giving a chart name", name);
+ return 1;
+ }
+
+ if(unlikely(!hash_chart)) hash_chart = simple_hash(chart);
+ if(unlikely(!hash_name)) hash_name = simple_hash(name);
+
+ // make sure it does not already exist
+ for(rc = host->alarms; rc ; rc = rc->next) {
+ if (unlikely(rc->chart && rc->hash == hash_name && rc->hash_chart == hash_chart && !strcmp(name, rc->name) && !strcmp(chart, rc->chart))) {
+ debug(D_HEALTH, "Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname);
+ error("Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname);
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) {
+ if(chart && name) {
+ uint32_t hash_chart = simple_hash(chart);
+ uint32_t hash_name = simple_hash(name);
+
+ // re-use old IDs, by looking them up in the alarm log
+ ALARM_ENTRY *ae;
+ for(ae = host->health_log.alarms; ae ;ae = ae->next) {
+ if(unlikely(ae->hash_name == hash_name && ae->hash_chart == hash_chart && !strcmp(name, ae->name) && !strcmp(chart, ae->chart))) {
+ if(next_event_id) *next_event_id = ae->alarm_event_id + 1;
+ return ae->alarm_id;
+ }
+ }
+ }
+
+ return host->health_log.next_alarm_id++;
+}
+
+inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) {
+ rrdhost_check_rdlock(host);
+
+ if(rc->calculation) {
+ rc->calculation->status = &rc->status;
+ rc->calculation->this = &rc->value;
+ rc->calculation->after = &rc->db_after;
+ rc->calculation->before = &rc->db_before;
+ rc->calculation->rrdcalc = rc;
+ }
+
+ if(rc->warning) {
+ rc->warning->status = &rc->status;
+ rc->warning->this = &rc->value;
+ rc->warning->after = &rc->db_after;
+ rc->warning->before = &rc->db_before;
+ rc->warning->rrdcalc = rc;
+ }
+
+ if(rc->critical) {
+ rc->critical->status = &rc->status;
+ rc->critical->this = &rc->value;
+ rc->critical->after = &rc->db_after;
+ rc->critical->before = &rc->db_before;
+ rc->critical->rrdcalc = rc;
+ }
+
+ // link it to the host
+ if(likely(host->alarms)) {
+ // append it
+ RRDCALC *t;
+ for(t = host->alarms; t && t->next ; t = t->next) ;
+ t->next = rc;
+ }
+ else {
+ host->alarms = rc;
+ }
+
+ // link it to its chart
+ RRDSET *st;
+ rrdset_foreach_read(st, host) {
+ if(rrdcalc_is_matching_this_rrdset(rc, st)) {
+ rrdsetcalc_link(st, rc);
+ break;
+ }
+ }
+}
+
+inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) {
+
+ debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rt->name);
+
+ if(rrdcalc_exists(host, chart, rt->name, 0, 0))
+ return NULL;
+
+ RRDCALC *rc = callocz(1, sizeof(RRDCALC));
+ rc->next_event_id = 1;
+ rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id);
+ rc->name = strdupz(rt->name);
+ rc->hash = simple_hash(rc->name);
+ rc->chart = strdupz(chart);
+ rc->hash_chart = simple_hash(rc->chart);
+
+ if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions);
+
+ rc->green = rt->green;
+ rc->red = rt->red;
+ rc->value = NAN;
+ rc->old_value = NAN;
+
+ rc->delay_up_duration = rt->delay_up_duration;
+ rc->delay_down_duration = rt->delay_down_duration;
+ rc->delay_max_duration = rt->delay_max_duration;
+ rc->delay_multiplier = rt->delay_multiplier;
+
+ rc->group = rt->group;
+ rc->after = rt->after;
+ rc->before = rt->before;
+ rc->update_every = rt->update_every;
+ rc->options = rt->options;
+
+ if(rt->exec) rc->exec = strdupz(rt->exec);
+ if(rt->recipient) rc->recipient = strdupz(rt->recipient);
+ if(rt->source) rc->source = strdupz(rt->source);
+ if(rt->units) rc->units = strdupz(rt->units);
+ if(rt->info) rc->info = strdupz(rt->info);
+
+ if(rt->calculation) {
+ rc->calculation = expression_parse(rt->calculation->source, NULL, NULL);
+ if(!rc->calculation)
+ error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, rt->name, rt->calculation->source);
+ }
+ if(rt->warning) {
+ rc->warning = expression_parse(rt->warning->source, NULL, NULL);
+ if(!rc->warning)
+ error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, rt->name, rt->warning->source);
+ }
+ if(rt->critical) {
+ rc->critical = expression_parse(rt->critical->source, NULL, NULL);
+ if(!rc->critical)
+ error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source);
+ }
+
+ debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green %Lf, red %Lf, lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f",
+ (rc->chart)?rc->chart:"NOCHART",
+ rc->name,
+ (rc->exec)?rc->exec:"DEFAULT",
+ (rc->recipient)?rc->recipient:"DEFAULT",
+ rc->green,
+ rc->red,
+ rc->group,
+ rc->after,
+ rc->before,
+ rc->options,
+ (rc->dimensions)?rc->dimensions:"NONE",
+ rc->update_every,
+ (rc->calculation)?rc->calculation->parsed_as:"NONE",
+ (rc->warning)?rc->warning->parsed_as:"NONE",
+ (rc->critical)?rc->critical->parsed_as:"NONE",
+ rc->source,
+ rc->delay_up_duration,
+ rc->delay_down_duration,
+ rc->delay_max_duration,
+ rc->delay_multiplier
+ );
+
+ rrdcalc_create_part2(host, rc);
+ return rc;
+}
+
+void rrdcalc_free(RRDHOST *host, RRDCALC *rc) {
+ if(!rc) return;
+
+ debug(D_HEALTH, "Health removing alarm '%s.%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname);
+
+ // unlink it from RRDSET
+ if(rc->rrdset) rrdsetcalc_unlink(rc);
+
+ // unlink it from RRDHOST
+ if(unlikely(rc == host->alarms))
+ host->alarms = rc->next;
+
+ else {
+ RRDCALC *t;
+ for(t = host->alarms; t && t->next != rc; t = t->next) ;
+ if(t) {
+ t->next = rc->next;
+ rc->next = NULL;
+ }
+ else
+ error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname);
+ }
+
+ expression_free(rc->calculation);
+ expression_free(rc->warning);
+ expression_free(rc->critical);
+
+ freez(rc->name);
+ freez(rc->chart);
+ freez(rc->family);
+ freez(rc->dimensions);
+ freez(rc->exec);
+ freez(rc->recipient);
+ freez(rc->source);
+ freez(rc->units);
+ freez(rc->info);
+ freez(rc);
+}