// SPDX-License-Identifier: GPL-3.0-or-later #define NETDATA_HEALTH_INTERNALS #include "rrd.h" // ---------------------------------------------------------------------------- // RRDCALC management inline const char *rrdcalc_status2string(RRDCALC_STATUS status) { switch(status) { case RRDCALC_STATUS_REMOVED: return "REMOVED"; case RRDCALC_STATUS_UNDEFINED: return "UNDEFINED"; case RRDCALC_STATUS_UNINITIALIZED: return "UNINITIALIZED"; case RRDCALC_STATUS_CLEAR: return "CLEAR"; case RRDCALC_STATUS_RAISED: return "RAISED"; case RRDCALC_STATUS_WARNING: return "WARNING"; case RRDCALC_STATUS_CRITICAL: return "CRITICAL"; default: error("Unknown alarm status %d", status); return "UNKNOWN"; } } static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { RRDHOST *host = st->rrdhost; debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); rc->last_status_change = now_realtime_sec(); rc->rrdset = st; rc->rrdset_next = st->alarms; rc->rrdset_prev = NULL; if(rc->rrdset_next) rc->rrdset_next->rrdset_prev = rc; st->alarms = rc; if(rc->update_every < rc->rrdset->update_every) { error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rc->rrdset->id, rc->name, rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every); rc->update_every = rc->rrdset->update_every; } if(!isnan(rc->green) && isnan(st->green)) { debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from " CALCULATED_NUMBER_FORMAT_AUTO " to " CALCULATED_NUMBER_FORMAT_AUTO ".", rc->rrdset->id, rc->name, rc->rrdset->green, rc->green); st->green = rc->green; } if(!isnan(rc->red) && isnan(st->red)) { debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from " CALCULATED_NUMBER_FORMAT_AUTO " to " CALCULATED_NUMBER_FORMAT_AUTO ".", rc->rrdset->id, rc->name, rc->rrdset->red, rc->red); st->red = rc->red; } rc->local = rrdvar_create_and_index("local", &st->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_LOCAL_VAR, &rc->value); rc->family = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_FAMILY_VAR, &rc->value); char fullname[RRDVAR_MAX_LENGTH + 1]; snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rc->name); rc->hostid = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTID_VAR, &rc->value); snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rc->name); rc->hostname = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR, &rc->value); if(rc->hostid && !rc->hostname) rc->hostid->options |= RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR; if(!rc->units) rc->units = strdupz(st->units); { time_t now = now_realtime_sec(); health_alarm_log( host, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_UNINITIALIZED, rc->source, rc->units, rc->info, 0, 0 ); } } static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) { if( (rc->hash_chart == st->hash && !strcmp(rc->chart, st->id)) || (rc->hash_chart == st->hash_name && !strcmp(rc->chart, st->name))) return 1; return 0; } // this has to be called while the RRDHOST is locked inline void rrdsetcalc_link_matching(RRDSET *st) { RRDHOST *host = st->rrdhost; // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id); RRDCALC *rc; for(rc = host->alarms; rc ; rc = rc->next) { if(unlikely(rc->rrdset)) continue; if(unlikely(rrdcalc_is_matching_this_rrdset(rc, st))) rrdsetcalc_link(st, rc); } } // this has to be called while the RRDHOST is locked inline void rrdsetcalc_unlink(RRDCALC *rc) { RRDSET *st = rc->rrdset; if(!st) { debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); return; } RRDHOST *host = st->rrdhost; { time_t now = now_realtime_sec(); health_alarm_log( host, rc->id, rc->next_event_id++, now, rc->name, rc->rrdset->id, rc->rrdset->family, rc->exec, rc->recipient, now - rc->last_status_change, rc->old_value, rc->value, rc->status, RRDCALC_STATUS_REMOVED, rc->source, rc->units, rc->info, 0, 0 ); } debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); // unlink it if(rc->rrdset_prev) rc->rrdset_prev->rrdset_next = rc->rrdset_next; if(rc->rrdset_next) rc->rrdset_next->rrdset_prev = rc->rrdset_prev; if(st->alarms == rc) st->alarms = rc->rrdset_next; rc->rrdset_prev = rc->rrdset_next = NULL; rrdvar_free(host, &st->rrdvar_root_index, rc->local); rc->local = NULL; rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rc->family); rc->family = NULL; rrdvar_free(host, &host->rrdvar_root_index, rc->hostid); rc->hostid = NULL; rrdvar_free(host, &host->rrdvar_root_index, rc->hostname); rc->hostname = NULL; rc->rrdset = NULL; // RRDCALC will remain in RRDHOST // so that if the matching chart is found in the future // it will be applied automatically } RRDCALC *rrdcalc_find(RRDSET *st, const char *name) { RRDCALC *rc; uint32_t hash = simple_hash(name); for( rc = st->alarms; rc ; rc = rc->rrdset_next ) { if(unlikely(rc->hash == hash && !strcmp(rc->name, name))) return rc; } return NULL; } inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) { RRDCALC *rc; if(unlikely(!chart)) { error("attempt to find RRDCALC '%s' without giving a chart name", name); return 1; } if(unlikely(!hash_chart)) hash_chart = simple_hash(chart); if(unlikely(!hash_name)) hash_name = simple_hash(name); // make sure it does not already exist for(rc = host->alarms; rc ; rc = rc->next) { if (unlikely(rc->chart && rc->hash == hash_name && rc->hash_chart == hash_chart && !strcmp(name, rc->name) && !strcmp(chart, rc->chart))) { debug(D_HEALTH, "Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); info("Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); return 1; } } return 0; } inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) { if(chart && name) { uint32_t hash_chart = simple_hash(chart); uint32_t hash_name = simple_hash(name); // re-use old IDs, by looking them up in the alarm log ALARM_ENTRY *ae; for(ae = host->health_log.alarms; ae ;ae = ae->next) { if(unlikely(ae->hash_name == hash_name && ae->hash_chart == hash_chart && !strcmp(name, ae->name) && !strcmp(chart, ae->chart))) { if(next_event_id) *next_event_id = ae->alarm_event_id + 1; return ae->alarm_id; } } } return host->health_log.next_alarm_id++; } inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { rrdhost_check_rdlock(host); if(rc->calculation) { rc->calculation->status = &rc->status; rc->calculation->this = &rc->value; rc->calculation->after = &rc->db_after; rc->calculation->before = &rc->db_before; rc->calculation->rrdcalc = rc; } if(rc->warning) { rc->warning->status = &rc->status; rc->warning->this = &rc->value; rc->warning->after = &rc->db_after; rc->warning->before = &rc->db_before; rc->warning->rrdcalc = rc; } if(rc->critical) { rc->critical->status = &rc->status; rc->critical->this = &rc->value; rc->critical->after = &rc->db_after; rc->critical->before = &rc->db_before; rc->critical->rrdcalc = rc; } // link it to the host if(likely(host->alarms)) { // append it RRDCALC *t; for(t = host->alarms; t && t->next ; t = t->next) ; t->next = rc; } else { host->alarms = rc; } // link it to its chart RRDSET *st; rrdset_foreach_read(st, host) { if(rrdcalc_is_matching_this_rrdset(rc, st)) { rrdsetcalc_link(st, rc); break; } } } inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) { debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rt->name); if(rrdcalc_exists(host, chart, rt->name, 0, 0)) return NULL; RRDCALC *rc = callocz(1, sizeof(RRDCALC)); rc->next_event_id = 1; rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id); rc->name = strdupz(rt->name); rc->hash = simple_hash(rc->name); rc->chart = strdupz(chart); rc->hash_chart = simple_hash(rc->chart); if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions); rc->green = rt->green; rc->red = rt->red; rc->value = NAN; rc->old_value = NAN; rc->delay_up_duration = rt->delay_up_duration; rc->delay_down_duration = rt->delay_down_duration; rc->delay_max_duration = rt->delay_max_duration; rc->delay_multiplier = rt->delay_multiplier; rc->group = rt->group; rc->after = rt->after; rc->before = rt->before; rc->update_every = rt->update_every; rc->options = rt->options; if(rt->exec) rc->exec = strdupz(rt->exec); if(rt->recipient) rc->recipient = strdupz(rt->recipient); if(rt->source) rc->source = strdupz(rt->source); if(rt->units) rc->units = strdupz(rt->units); if(rt->info) rc->info = strdupz(rt->info); if(rt->calculation) { rc->calculation = expression_parse(rt->calculation->source, NULL, NULL); if(!rc->calculation) error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, rt->name, rt->calculation->source); } if(rt->warning) { rc->warning = expression_parse(rt->warning->source, NULL, NULL); if(!rc->warning) error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, rt->name, rt->warning->source); } if(rt->critical) { rc->critical = expression_parse(rt->critical->source, NULL, NULL); if(!rc->critical) error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source); } debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", (rc->chart)?rc->chart:"NOCHART", rc->name, (rc->exec)?rc->exec:"DEFAULT", (rc->recipient)?rc->recipient:"DEFAULT", rc->green, rc->red, (int)rc->group, rc->after, rc->before, rc->options, (rc->dimensions)?rc->dimensions:"NONE", rc->update_every, (rc->calculation)?rc->calculation->parsed_as:"NONE", (rc->warning)?rc->warning->parsed_as:"NONE", (rc->critical)?rc->critical->parsed_as:"NONE", rc->source, rc->delay_up_duration, rc->delay_down_duration, rc->delay_max_duration, rc->delay_multiplier ); rrdcalc_create_part2(host, rc); return rc; } void rrdcalc_free(RRDCALC *rc) { if(unlikely(!rc)) return; expression_free(rc->calculation); expression_free(rc->warning); expression_free(rc->critical); freez(rc->name); freez(rc->chart); freez(rc->family); freez(rc->dimensions); freez(rc->exec); freez(rc->recipient); freez(rc->source); freez(rc->units); freez(rc->info); freez(rc); } void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc) { if(unlikely(!rc)) return; debug(D_HEALTH, "Health removing alarm '%s.%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); // unlink it from RRDSET if(rc->rrdset) rrdsetcalc_unlink(rc); // unlink it from RRDHOST if(unlikely(rc == host->alarms)) host->alarms = rc->next; else { RRDCALC *t; for(t = host->alarms; t && t->next != rc; t = t->next) ; if(t) { t->next = rc->next; rc->next = NULL; } else error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); } rrdcalc_free(rc); }