diff options
Diffstat (limited to 'database/rrdcalc.c')
-rw-r--r-- | database/rrdcalc.c | 429 |
1 files changed, 429 insertions, 0 deletions
diff --git a/database/rrdcalc.c b/database/rrdcalc.c new file mode 100644 index 000000000..7f6a896b6 --- /dev/null +++ b/database/rrdcalc.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_HEALTH_INTERNALS +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDCALC management + +inline const char *rrdcalc_status2string(RRDCALC_STATUS status) { + switch(status) { + case RRDCALC_STATUS_REMOVED: + return "REMOVED"; + + case RRDCALC_STATUS_UNDEFINED: + return "UNDEFINED"; + + case RRDCALC_STATUS_UNINITIALIZED: + return "UNINITIALIZED"; + + case RRDCALC_STATUS_CLEAR: + return "CLEAR"; + + case RRDCALC_STATUS_RAISED: + return "RAISED"; + + case RRDCALC_STATUS_WARNING: + return "WARNING"; + + case RRDCALC_STATUS_CRITICAL: + return "CRITICAL"; + + default: + error("Unknown alarm status %d", status); + return "UNKNOWN"; + } +} + +static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { + RRDHOST *host = st->rrdhost; + + debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); + + rc->last_status_change = now_realtime_sec(); + rc->rrdset = st; + + rc->rrdset_next = st->alarms; + rc->rrdset_prev = NULL; + + if(rc->rrdset_next) + rc->rrdset_next->rrdset_prev = rc; + + st->alarms = rc; + + if(rc->update_every < rc->rrdset->update_every) { + error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rc->rrdset->id, rc->name, rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every); + rc->update_every = rc->rrdset->update_every; + } + + if(!isnan(rc->green) && isnan(st->green)) { + debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from " CALCULATED_NUMBER_FORMAT_AUTO " to " CALCULATED_NUMBER_FORMAT_AUTO ".", rc->rrdset->id, rc->name, rc->rrdset->green, rc->green); + st->green = rc->green; + } + + if(!isnan(rc->red) && isnan(st->red)) { + debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from " CALCULATED_NUMBER_FORMAT_AUTO " to " CALCULATED_NUMBER_FORMAT_AUTO ".", rc->rrdset->id, rc->name, rc->rrdset->red, rc->red); + st->red = rc->red; + } + + rc->local = rrdvar_create_and_index("local", &st->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_LOCAL_VAR, &rc->value); + rc->family = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_FAMILY_VAR, &rc->value); + + char fullname[RRDVAR_MAX_LENGTH + 1]; + snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rc->name); + rc->hostid = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTID_VAR, &rc->value); + + snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rc->name); + rc->hostname = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR, &rc->value); + + if(rc->hostid && !rc->hostname) + rc->hostid->options |= RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR; + + if(!rc->units) rc->units = strdupz(st->units); + + { + time_t now = now_realtime_sec(); + health_alarm_log( + host, + rc->id, + rc->next_event_id++, + now, + rc->name, + rc->rrdset->id, + rc->rrdset->family, + rc->exec, + rc->recipient, + now - rc->last_status_change, + rc->old_value, + rc->value, + rc->status, + RRDCALC_STATUS_UNINITIALIZED, + rc->source, + rc->units, + rc->info, + 0, + 0 + ); + } +} + +static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) { + if( (rc->hash_chart == st->hash && !strcmp(rc->chart, st->id)) || + (rc->hash_chart == st->hash_name && !strcmp(rc->chart, st->name))) + return 1; + + return 0; +} + +// this has to be called while the RRDHOST is locked +inline void rrdsetcalc_link_matching(RRDSET *st) { + RRDHOST *host = st->rrdhost; + // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id); + + RRDCALC *rc; + for(rc = host->alarms; rc ; rc = rc->next) { + if(unlikely(rc->rrdset)) + continue; + + if(unlikely(rrdcalc_is_matching_this_rrdset(rc, st))) + rrdsetcalc_link(st, rc); + } +} + +// this has to be called while the RRDHOST is locked +inline void rrdsetcalc_unlink(RRDCALC *rc) { + RRDSET *st = rc->rrdset; + + if(!st) { + debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); + error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); + return; + } + + RRDHOST *host = st->rrdhost; + + { + time_t now = now_realtime_sec(); + health_alarm_log( + host, + rc->id, + rc->next_event_id++, + now, + rc->name, + rc->rrdset->id, + rc->rrdset->family, + rc->exec, + rc->recipient, + now - rc->last_status_change, + rc->old_value, + rc->value, + rc->status, + RRDCALC_STATUS_REMOVED, + rc->source, + rc->units, + rc->info, + 0, + 0 + ); + } + + debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); + + // unlink it + if(rc->rrdset_prev) + rc->rrdset_prev->rrdset_next = rc->rrdset_next; + + if(rc->rrdset_next) + rc->rrdset_next->rrdset_prev = rc->rrdset_prev; + + if(st->alarms == rc) + st->alarms = rc->rrdset_next; + + rc->rrdset_prev = rc->rrdset_next = NULL; + + rrdvar_free(host, &st->rrdvar_root_index, rc->local); + rc->local = NULL; + + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rc->family); + rc->family = NULL; + + rrdvar_free(host, &host->rrdvar_root_index, rc->hostid); + rc->hostid = NULL; + + rrdvar_free(host, &host->rrdvar_root_index, rc->hostname); + rc->hostname = NULL; + + rc->rrdset = NULL; + + // RRDCALC will remain in RRDHOST + // so that if the matching chart is found in the future + // it will be applied automatically +} + +RRDCALC *rrdcalc_find(RRDSET *st, const char *name) { + RRDCALC *rc; + uint32_t hash = simple_hash(name); + + for( rc = st->alarms; rc ; rc = rc->rrdset_next ) { + if(unlikely(rc->hash == hash && !strcmp(rc->name, name))) + return rc; + } + + return NULL; +} + +inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) { + RRDCALC *rc; + + if(unlikely(!chart)) { + error("attempt to find RRDCALC '%s' without giving a chart name", name); + return 1; + } + + if(unlikely(!hash_chart)) hash_chart = simple_hash(chart); + if(unlikely(!hash_name)) hash_name = simple_hash(name); + + // make sure it does not already exist + for(rc = host->alarms; rc ; rc = rc->next) { + if (unlikely(rc->chart && rc->hash == hash_name && rc->hash_chart == hash_chart && !strcmp(name, rc->name) && !strcmp(chart, rc->chart))) { + debug(D_HEALTH, "Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); + info("Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); + return 1; + } + } + + return 0; +} + +inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) { + if(chart && name) { + uint32_t hash_chart = simple_hash(chart); + uint32_t hash_name = simple_hash(name); + + // re-use old IDs, by looking them up in the alarm log + ALARM_ENTRY *ae; + for(ae = host->health_log.alarms; ae ;ae = ae->next) { + if(unlikely(ae->hash_name == hash_name && ae->hash_chart == hash_chart && !strcmp(name, ae->name) && !strcmp(chart, ae->chart))) { + if(next_event_id) *next_event_id = ae->alarm_event_id + 1; + return ae->alarm_id; + } + } + } + + return host->health_log.next_alarm_id++; +} + +inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { + rrdhost_check_rdlock(host); + + if(rc->calculation) { + rc->calculation->status = &rc->status; + rc->calculation->this = &rc->value; + rc->calculation->after = &rc->db_after; + rc->calculation->before = &rc->db_before; + rc->calculation->rrdcalc = rc; + } + + if(rc->warning) { + rc->warning->status = &rc->status; + rc->warning->this = &rc->value; + rc->warning->after = &rc->db_after; + rc->warning->before = &rc->db_before; + rc->warning->rrdcalc = rc; + } + + if(rc->critical) { + rc->critical->status = &rc->status; + rc->critical->this = &rc->value; + rc->critical->after = &rc->db_after; + rc->critical->before = &rc->db_before; + rc->critical->rrdcalc = rc; + } + + // link it to the host + if(likely(host->alarms)) { + // append it + RRDCALC *t; + for(t = host->alarms; t && t->next ; t = t->next) ; + t->next = rc; + } + else { + host->alarms = rc; + } + + // link it to its chart + RRDSET *st; + rrdset_foreach_read(st, host) { + if(rrdcalc_is_matching_this_rrdset(rc, st)) { + rrdsetcalc_link(st, rc); + break; + } + } +} + +inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) { + + debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rt->name); + + if(rrdcalc_exists(host, chart, rt->name, 0, 0)) + return NULL; + + RRDCALC *rc = callocz(1, sizeof(RRDCALC)); + rc->next_event_id = 1; + rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id); + rc->name = strdupz(rt->name); + rc->hash = simple_hash(rc->name); + rc->chart = strdupz(chart); + rc->hash_chart = simple_hash(rc->chart); + + if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions); + + rc->green = rt->green; + rc->red = rt->red; + rc->value = NAN; + rc->old_value = NAN; + + rc->delay_up_duration = rt->delay_up_duration; + rc->delay_down_duration = rt->delay_down_duration; + rc->delay_max_duration = rt->delay_max_duration; + rc->delay_multiplier = rt->delay_multiplier; + + rc->group = rt->group; + rc->after = rt->after; + rc->before = rt->before; + rc->update_every = rt->update_every; + rc->options = rt->options; + + if(rt->exec) rc->exec = strdupz(rt->exec); + if(rt->recipient) rc->recipient = strdupz(rt->recipient); + if(rt->source) rc->source = strdupz(rt->source); + if(rt->units) rc->units = strdupz(rt->units); + if(rt->info) rc->info = strdupz(rt->info); + + if(rt->calculation) { + rc->calculation = expression_parse(rt->calculation->source, NULL, NULL); + if(!rc->calculation) + error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, rt->name, rt->calculation->source); + } + if(rt->warning) { + rc->warning = expression_parse(rt->warning->source, NULL, NULL); + if(!rc->warning) + error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, rt->name, rt->warning->source); + } + if(rt->critical) { + rc->critical = expression_parse(rt->critical->source, NULL, NULL); + if(!rc->critical) + error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source); + } + + debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", + (rc->chart)?rc->chart:"NOCHART", + rc->name, + (rc->exec)?rc->exec:"DEFAULT", + (rc->recipient)?rc->recipient:"DEFAULT", + rc->green, + rc->red, + (int)rc->group, + rc->after, + rc->before, + rc->options, + (rc->dimensions)?rc->dimensions:"NONE", + rc->update_every, + (rc->calculation)?rc->calculation->parsed_as:"NONE", + (rc->warning)?rc->warning->parsed_as:"NONE", + (rc->critical)?rc->critical->parsed_as:"NONE", + rc->source, + rc->delay_up_duration, + rc->delay_down_duration, + rc->delay_max_duration, + rc->delay_multiplier + ); + + rrdcalc_create_part2(host, rc); + return rc; +} + +void rrdcalc_free(RRDCALC *rc) { + if(unlikely(!rc)) return; + + expression_free(rc->calculation); + expression_free(rc->warning); + expression_free(rc->critical); + + freez(rc->name); + freez(rc->chart); + freez(rc->family); + freez(rc->dimensions); + freez(rc->exec); + freez(rc->recipient); + freez(rc->source); + freez(rc->units); + freez(rc->info); + freez(rc); +} + +void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc) { + if(unlikely(!rc)) return; + + debug(D_HEALTH, "Health removing alarm '%s.%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); + + // unlink it from RRDSET + if(rc->rrdset) rrdsetcalc_unlink(rc); + + // unlink it from RRDHOST + if(unlikely(rc == host->alarms)) + host->alarms = rc->next; + + else { + RRDCALC *t; + for(t = host->alarms; t && t->next != rc; t = t->next) ; + if(t) { + t->next = rc->next; + rc->next = NULL; + } + else + error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); + } + + rrdcalc_free(rc); +} |