diff options
Diffstat (limited to '')
-rw-r--r-- | src/health.c | 53 |
1 files changed, 31 insertions, 22 deletions
diff --git a/src/health.c b/src/health.c index cc470f81f..136a1ecd7 100644 --- a/src/health.c +++ b/src/health.c @@ -204,14 +204,13 @@ static inline void health_process_notifications(RRDHOST *host, ALARM_ENTRY *ae) } static inline void health_alarm_log_process(RRDHOST *host) { - static uint32_t stop_at_id = 0; uint32_t first_waiting = (host->health_log.alarms)?host->health_log.alarms->unique_id:0; time_t now = now_realtime_sec(); netdata_rwlock_rdlock(&host->health_log.alarm_log_rwlock); ALARM_ENTRY *ae; - for(ae = host->health_log.alarms; ae && ae->unique_id >= stop_at_id ; ae = ae->next) { + for(ae = host->health_log.alarms; ae && ae->unique_id >= host->health_last_processed_id ; ae = ae->next) { if(unlikely( !(ae->flags & HEALTH_ENTRY_FLAG_PROCESSED) && !(ae->flags & HEALTH_ENTRY_FLAG_UPDATED) @@ -226,7 +225,7 @@ static inline void health_alarm_log_process(RRDHOST *host) { } // remember this for the next iteration - stop_at_id = first_waiting; + host->health_last_processed_id = first_waiting; netdata_rwlock_unlock(&host->health_log.alarm_log_rwlock); @@ -323,6 +322,23 @@ static inline int rrdcalc_isrunnable(RRDCALC *rc, time_t now, time_t *next_run) return 1; } +static inline int check_if_resumed_from_suspention(void) { + static usec_t last_realtime = 0, last_monotonic = 0; + usec_t realtime = now_realtime_usec(), monotonic = now_monotonic_usec(); + int ret = 0; + + // detect if monotonic and realtime have twice the difference + // in which case we assume the system was just waken from hibernation + + if(last_realtime && last_monotonic && realtime - last_realtime > 2 * (monotonic - last_monotonic)) + ret = 1; + + last_realtime = realtime; + last_monotonic = monotonic; + + return ret; +} + void *health_main(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; @@ -339,11 +355,8 @@ void *health_main(void *ptr) { BUFFER *wb = buffer_create(100); - time_t now = now_realtime_sec(); - time_t now_boottime = now_boottime_sec(); - time_t last_now = now; - time_t last_now_boottime = now_boottime; - time_t hibernation_delay = config_get_number(CONFIG_SECTION_HEALTH, "postpone alarms during hibernation for seconds", 60); + time_t now = now_realtime_sec(); + time_t hibernation_delay = config_get_number(CONFIG_SECTION_HEALTH, "postpone alarms during hibernation for seconds", 60); unsigned int loop = 0; while(!netdata_exit) { @@ -354,21 +367,14 @@ void *health_main(void *ptr) { time_t next_run = now + min_run_every; RRDCALC *rc; - // detect if boottime and realtime have twice the difference - // in which case we assume the system was just waken from hibernation - if(unlikely(now - last_now > 2 * (now_boottime - last_now_boottime))) { + if(unlikely(check_if_resumed_from_suspention())) { apply_hibernation_delay = 1; - info("Postponing alarm checks for %ld seconds, due to boottime discrepancy (realtime dt: %ld, boottime dt: %ld)." + info("Postponing alarm checks for %ld seconds, because it seems that the system was just resumed from suspension." , hibernation_delay - , (long)(now - last_now) - , (long)(now_boottime - last_now_boottime) ); } - last_now = now; - last_now_boottime = now_boottime; - if(unlikely(pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &oldstate) != 0)) error("Cannot set pthread cancel state to DISABLE."); @@ -381,7 +387,7 @@ void *health_main(void *ptr) { if(unlikely(apply_hibernation_delay)) { - info("Postponing alarm checks for %ld seconds, on host '%s'." + info("Postponing health checks for %ld seconds, on host '%s'." , hibernation_delay , host->hostname ); @@ -389,8 +395,13 @@ void *health_main(void *ptr) { host->health_delay_up_to = now + hibernation_delay; } - if(unlikely(!host->health_enabled || now < host->health_delay_up_to)) - continue; + if(unlikely(host->health_delay_up_to)) { + if(unlikely(now < host->health_delay_up_to)) + continue; + + info("Resuming health checks on host '%s'.", host->hostname); + host->health_delay_up_to = 0; + } rrdhost_rdlock(host); @@ -726,8 +737,6 @@ void *health_main(void *ptr) { else debug(D_HEALTH, "Health monitoring iteration no %u done. Next iteration now", loop); - now_boottime = now_boottime_sec(); - } // forever buffer_free(wb); |