summaryrefslogtreecommitdiffstats
path: root/health/health.c
diff options
context:
space:
mode:
Diffstat (limited to 'health/health.c')
-rw-r--r--health/health.c70
1 files changed, 68 insertions, 2 deletions
diff --git a/health/health.c b/health/health.c
index e94339fae..528238d74 100644
--- a/health/health.c
+++ b/health/health.c
@@ -223,6 +223,8 @@ void health_reload(void) {
if (netdata_cloud_setting)
aclk_single_update_disable();
#endif
+ sql_refresh_hashes();
+
rrd_rdlock();
RRDHOST *host;
@@ -653,6 +655,41 @@ static int update_disabled_silenced(RRDHOST *host, RRDCALC *rc) {
return 0;
}
+// Create alarms for dimensions that have been added to charts
+// since the previous iteration.
+static void init_pending_foreach_alarms(RRDHOST *host) {
+ rrdhost_wrlock(host);
+
+ if (host->alarms_with_foreach || host->alarms_template_with_foreach) {
+ if (rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_FOREACH_ALARMS)) {
+ RRDSET *st;
+
+ rrdset_foreach_read(st, host) {
+ rrdset_wrlock(st);
+
+ if (rrdset_flag_check(st, RRDSET_FLAG_PENDING_FOREACH_ALARMS)) {
+ RRDDIM *rd;
+
+ rrddim_foreach_write(rd, st) {
+ if (rrddim_flag_check(rd, RRDDIM_FLAG_PENDING_FOREACH_ALARM)) {
+ rrdcalc_link_to_rrddim(rd, st, host);
+ rrddim_flag_clear(rd, RRDDIM_FLAG_PENDING_FOREACH_ALARM);
+ }
+ }
+
+ rrdset_flag_clear(st, RRDSET_FLAG_PENDING_FOREACH_ALARMS);
+ }
+
+ rrdset_unlock(st);
+ }
+
+ rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_FOREACH_ALARMS);
+ }
+ }
+
+ rrdhost_unlock(host);
+}
+
/**
* Health Main
*
@@ -737,6 +774,8 @@ void *health_main(void *ptr) {
if(likely(!host->health_log_fp) && (loop == 1 || loop % cleanup_sql_every_loop == 0))
sql_health_alarm_log_cleanup(host);
+ init_pending_foreach_alarms(host);
+
rrdhost_rdlock(host);
// the first loop is to lookup values from the db
@@ -745,6 +784,33 @@ void *health_main(void *ptr) {
if (update_disabled_silenced(host, rc))
continue;
+ // create an alert removed event if the chart is obsolete and
+ // has stopped being collected for 60 seconds
+ if (unlikely(rc->rrdset && rc->status != RRDCALC_STATUS_REMOVED &&
+ rrdset_flag_check(rc->rrdset, RRDSET_FLAG_OBSOLETE) &&
+ now > (rc->rrdset->last_collected_time.tv_sec + 60))) {
+ if (!rrdcalc_isrepeating(rc)) {
+ time_t now = now_realtime_sec();
+ ALARM_ENTRY *ae = health_create_alarm_entry(
+ host, rc->id, rc->next_event_id++, rc->config_hash_id, now, rc->name, rc->rrdset->id,
+ rc->rrdset->family, rc->classification, rc->component, rc->type, rc->exec, rc->recipient, now - rc->last_status_change,
+ rc->value, NAN, rc->status, RRDCALC_STATUS_REMOVED, rc->source, rc->units, rc->info, 0, 0);
+ if (ae) {
+ health_alarm_log(host, ae);
+ rc->old_status = rc->status;
+ rc->status = RRDCALC_STATUS_REMOVED;
+ rc->last_status_change = now;
+ rc->last_updated = now;
+ rc->value = NAN;
+#if defined(ENABLE_ACLK) && defined(ENABLE_NEW_CLOUD_PROTOCOL)
+ if (netdata_cloud_setting && likely(!aclk_alert_reloaded))
+ sql_queue_removed_alerts_to_aclk(host);
+#endif
+ }
+ }
+ continue;
+ }
+
if (unlikely(!rrdcalc_isrunnable(rc, now, &next_run))) {
if (unlikely(rc->rrdcalc_flags & RRDCALC_FLAG_RUNNABLE))
rc->rrdcalc_flags &= ~RRDCALC_FLAG_RUNNABLE;
@@ -764,7 +830,7 @@ void *health_main(void *ptr) {
int ret = rrdset2value_api_v1(rc->rrdset, NULL, &rc->value, rc->dimensions, 1, rc->after,
rc->before, rc->group, 0, rc->options, &rc->db_after,
- &rc->db_before, &value_is_null
+ &rc->db_before, &value_is_null, 0
);
if (unlikely(ret != 200)) {
@@ -1004,7 +1070,7 @@ void *health_main(void *ptr) {
RRDCALC *rc;
for(rc = host->alarms; rc ; rc = rc->next) {
int repeat_every = 0;
- if(unlikely(rrdcalc_isrepeating(rc))) {
+ if(unlikely(rrdcalc_isrepeating(rc) && rc->delay_up_to_timestamp <= now)) {
if(unlikely(rc->status == RRDCALC_STATUS_WARNING)) {
rc->rrdcalc_flags &= ~RRDCALC_FLAG_RUN_ONCE;
repeat_every = rc->warn_repeat_every;