summaryrefslogtreecommitdiffstats
path: root/src/health/health_event_loop.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/health/health_event_loop.c')
-rw-r--r--src/health/health_event_loop.c195
1 files changed, 64 insertions, 131 deletions
diff --git a/src/health/health_event_loop.c b/src/health/health_event_loop.c
index 756ffa16..b50812f2 100644
--- a/src/health/health_event_loop.c
+++ b/src/health/health_event_loop.c
@@ -101,26 +101,10 @@ static void health_sleep(time_t next_run, unsigned int loop __maybe_unused) {
}
}
-static void sql_health_postpone_queue_removed(RRDHOST *host __maybe_unused) {
-#ifdef ENABLE_ACLK
- if (netdata_cloud_enabled) {
- struct aclk_sync_cfg_t *wc = host->aclk_config;
- if (unlikely(!wc)) {
- return;
- }
-
- if (wc->alert_queue_removed >= 1) {
- wc->alert_queue_removed+=6;
- }
- }
-#endif
-}
-
static void health_execute_delayed_initializations(RRDHOST *host) {
health_plugin_init();
RRDSET *st;
- bool must_postpone = false;
if (!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION)) return;
rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION);
@@ -131,11 +115,8 @@ static void health_execute_delayed_initializations(RRDHOST *host) {
worker_is_busy(WORKER_HEALTH_JOB_DELAYED_INIT_RRDSET);
health_prototype_alerts_for_rrdset_incrementally(st);
- must_postpone = true;
}
rrdset_foreach_done(st);
- if (must_postpone)
- sql_health_postpone_queue_removed(host);
}
static void health_initialize_rrdhost(RRDHOST *host) {
@@ -179,6 +160,50 @@ static inline int check_if_resumed_from_suspension(void) {
return ret;
}
+static void do_eval_expression(
+ RRDCALC *rc,
+ EVAL_EXPRESSION *expression,
+ const char *expression_type __maybe_unused,
+ size_t job_type,
+ RRDCALC_FLAGS error_type,
+ RRDCALC_STATUS *calc_status,
+ NETDATA_DOUBLE *result)
+{
+ if (!expression || (!calc_status && !result))
+ return;
+
+ worker_is_busy(job_type);
+
+ if (unlikely(!expression_evaluate(expression))) {
+ // calculation failed
+ rc->run_flags |= error_type;
+ if (result)
+ *result = NAN;
+
+ netdata_log_debug(D_HEALTH,
+ "Health on host '%s', alarm '%s.%s': %s expression failed with error: %s",
+ rrdhost_hostname(rc->rrdset->rrdhost), rrdcalc_chart_name(rc), rrdcalc_name(rc), expression_type,
+ expression_error_msg(expression)
+ );
+ return;
+ }
+ rc->run_flags &= ~error_type;
+ netdata_log_debug(D_HEALTH,
+ "Health on host '%s', alarm '%s.%s': %s expression gave value "
+ NETDATA_DOUBLE_FORMAT ": %s (source: %s)",
+ rrdhost_hostname(rc->rrdset->rrdhost),
+ rrdcalc_chart_name(rc),
+ rrdcalc_name(rc),
+ expression_type,
+ expression_result(expression),
+ expression_error_msg(expression),
+ rrdcalc_source(rc));
+ if (calc_status)
+ *calc_status = rrdcalc_value2status(expression_result(expression));
+ else
+ *result = expression_result(expression);
+}
+
static void health_event_loop(void) {
bool health_running_logged = false;
@@ -270,6 +295,13 @@ static void health_event_loop(void) {
}
worker_is_busy(WORKER_HEALTH_JOB_HOST_LOCK);
+#ifdef ENABLE_ACLK
+ if (netdata_cloud_enabled) {
+ struct aclk_sync_cfg_t *wc = host->aclk_config;
+ if (wc && wc->send_snapshot == 2)
+ continue;
+ }
+#endif
// the first loop is to lookup values from the db
foreach_rrdcalc_in_rrdhost_read(host, rc) {
@@ -314,11 +346,6 @@ static void health_event_loop(void) {
rc->last_status_change_value = rc->value;
rc->last_updated = now_tmp;
rc->value = NAN;
-
-#ifdef ENABLE_ACLK
- if (netdata_cloud_enabled)
- sql_queue_alarm_to_aclk(host, ae, true);
-#endif
}
}
}
@@ -404,36 +431,7 @@ static void health_event_loop(void) {
// ------------------------------------------------------------
// if there is calculation expression, run it
- if (unlikely(rc->config.calculation)) {
- worker_is_busy(WORKER_HEALTH_JOB_CALC_EVAL);
-
- if (unlikely(!expression_evaluate(rc->config.calculation))) {
- // calculation failed
- rc->value = NAN;
- rc->run_flags |= RRDCALC_FLAG_CALC_ERROR;
-
- netdata_log_debug(
- D_HEALTH, "Health on host '%s', alarm '%s.%s': expression '%s' failed: %s",
- rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
- expression_parsed_as(rc->config.calculation), expression_error_msg(rc->config.calculation)
- );
- }
- else {
- rc->run_flags &= ~RRDCALC_FLAG_CALC_ERROR;
-
- netdata_log_debug(
- D_HEALTH, "Health on host '%s', alarm '%s.%s': expression '%s' gave value "
- NETDATA_DOUBLE_FORMAT": %s (source: %s)",
- rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
- expression_parsed_as(rc->config.calculation),
- expression_result(rc->config.calculation),
- expression_error_msg(rc->config.calculation),
- rrdcalc_source(rc)
- );
-
- rc->value = expression_result(rc->config.calculation);
- }
- }
+ do_eval_expression(rc, rc->config.calculation, "calculation", WORKER_HEALTH_JOB_CALC_EVAL, RRDCALC_FLAG_CALC_ERROR, NULL, &rc->value);
}
foreach_rrdcalc_in_rrdhost_done(rc);
@@ -453,65 +451,8 @@ static void health_event_loop(void) {
RRDCALC_STATUS warning_status = RRDCALC_STATUS_UNDEFINED;
RRDCALC_STATUS critical_status = RRDCALC_STATUS_UNDEFINED;
- // --------------------------------------------------------
- // check the warning expression
-
- if (likely(rc->config.warning)) {
- worker_is_busy(WORKER_HEALTH_JOB_WARNING_EVAL);
-
- if (unlikely(!expression_evaluate(rc->config.warning))) {
- // calculation failed
- rc->run_flags |= RRDCALC_FLAG_WARN_ERROR;
-
- netdata_log_debug(D_HEALTH,
- "Health on host '%s', alarm '%s.%s': warning expression failed with error: %s",
- rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
- expression_error_msg(rc->config.warning)
- );
- } else {
- rc->run_flags &= ~RRDCALC_FLAG_WARN_ERROR;
- netdata_log_debug(D_HEALTH,
- "Health on host '%s', alarm '%s.%s': warning expression gave value "
- NETDATA_DOUBLE_FORMAT ": %s (source: %s)",
- rrdhost_hostname(host),
- rrdcalc_chart_name(rc),
- rrdcalc_name(rc),
- expression_result(rc->config.warning),
- expression_error_msg(rc->config.warning),
- rrdcalc_source(rc)
- );
- warning_status = rrdcalc_value2status(expression_result(rc->config.warning));
- }
- }
-
- // --------------------------------------------------------
- // check the critical expression
-
- if (likely(rc->config.critical)) {
- worker_is_busy(WORKER_HEALTH_JOB_CRITICAL_EVAL);
-
- if (unlikely(!expression_evaluate(rc->config.critical))) {
- // calculation failed
- rc->run_flags |= RRDCALC_FLAG_CRIT_ERROR;
-
- netdata_log_debug(D_HEALTH,
- "Health on host '%s', alarm '%s.%s': critical expression failed with error: %s",
- rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
- expression_error_msg(rc->config.critical)
- );
- } else {
- rc->run_flags &= ~RRDCALC_FLAG_CRIT_ERROR;
- netdata_log_debug(D_HEALTH,
- "Health on host '%s', alarm '%s.%s': critical expression gave value "
- NETDATA_DOUBLE_FORMAT ": %s (source: %s)",
- rrdhost_hostname(host), rrdcalc_chart_name(rc), rrdcalc_name(rc),
- expression_result(rc->config.critical),
- expression_error_msg(rc->config.critical),
- rrdcalc_source(rc)
- );
- critical_status = rrdcalc_value2status(expression_result(rc->config.critical));
- }
- }
+ do_eval_expression(rc, rc->config.warning, "warning", WORKER_HEALTH_JOB_WARNING_EVAL, RRDCALC_FLAG_WARN_ERROR, &warning_status, NULL);
+ do_eval_expression(rc, rc->config.critical, "critical", WORKER_HEALTH_JOB_CRITICAL_EVAL, RRDCALC_FLAG_CRIT_ERROR, &critical_status, NULL);
// --------------------------------------------------------
// decide the final alarm status
@@ -706,26 +647,18 @@ static void health_event_loop(void) {
wait_for_all_notifications_to_finish_before_allowing_health_to_be_cleaned_up();
break;
}
+ }
#ifdef ENABLE_ACLK
- if (netdata_cloud_enabled) {
- struct aclk_sync_cfg_t *wc = host->aclk_config;
- if (unlikely(!wc))
- continue;
-
- if (wc->alert_queue_removed == 1) {
- sql_queue_removed_alerts_to_aclk(host);
- } else if (wc->alert_queue_removed > 1) {
- wc->alert_queue_removed--;
- }
-
- if (wc->alert_checkpoint_req == 1) {
- aclk_push_alarm_checkpoint(host);
- } else if (wc->alert_checkpoint_req > 1) {
- wc->alert_checkpoint_req--;
- }
- }
-#endif
+ struct aclk_sync_cfg_t *wc = host->aclk_config;
+ if (wc && wc->send_snapshot == 1) {
+ wc->send_snapshot = 2;
+ rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
}
+ else
+ if (process_alert_pending_queue(host))
+ rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS);
+#endif
+
dfe_done(host);
// wait for all notifications to finish before allowing health to be cleaned up