diff options
Diffstat (limited to 'health/health_log.c')
-rw-r--r-- | health/health_log.c | 57 |
1 files changed, 53 insertions, 4 deletions
diff --git a/health/health_log.c b/health/health_log.c index 009e42673..c91cde6cb 100644 --- a/health/health_log.c +++ b/health/health_log.c @@ -79,6 +79,7 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { "\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s" "\t%d\t%d\t%d\t%d" "\t" CALCULATED_NUMBER_FORMAT_AUTO "\t" CALCULATED_NUMBER_FORMAT_AUTO + "\t%016lx" "\n" , (ae->flags & HEALTH_ENTRY_FLAG_SAVED)?'U':'A' , host->hostname @@ -112,6 +113,7 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { , ae->new_value , ae->old_value + , (uint64_t)ae->last_repeat ) < 0)) error("HEALTH [%s]: failed to save alarm log entry to '%s'. Health data may be lost in case of abnormal restart.", host->hostname, host->health_log_filename); else { @@ -174,10 +176,40 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena continue; } + // Check if we got last_repeat field + time_t last_repeat = 0; + if(entries > 27) { + char* alarm_name = pointers[13]; + last_repeat = (time_t)strtoul(pointers[27], NULL, 16); + + RRDCALC *rc = alarm_max_last_repeat(host, alarm_name,simple_hash(alarm_name)); + if (!rc) { + for(rc = host->alarms; rc ; rc = rc->next) { + RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_name, (avl *)rc); + if(rdcmp != rc) { + error("Cannot insert the alarm index ID using log %s", rc->name); + } + } + + rc = alarm_max_last_repeat(host, alarm_name,simple_hash(alarm_name)); + } + + if(unlikely(rc)) { + if (rrdcalc_isrepeating(rc)) { + rc->last_repeat = last_repeat; + // We iterate through repeating alarm entries only to + // find the latest last_repeat timestamp. Otherwise, + // there is no need to keep them in memory. + continue; + } + } + } + if(unlikely(*pointers[0] == 'A')) { // make sure it is properly numbered if(unlikely(host->health_log.alarms && unique_id < host->health_log.alarms->unique_id)) { - error("HEALTH [%s]: line %zu of file '%s' has alarm log entry %u in wrong order. Ignoring it.", host->hostname, line, filename, unique_id); + error( "HEALTH [%s]: line %zu of file '%s' has alarm log entry %u in wrong order. Ignoring it." + , host->hostname, line, filename, unique_id); errored++; continue; } @@ -186,11 +218,11 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena } else if(unlikely(*pointers[0] == 'U')) { // find the original - for(ae = host->health_log.alarms; ae; ae = ae->next) { + for(ae = host->health_log.alarms; ae ; ae = ae->next) { if(unlikely(unique_id == ae->unique_id)) { if(unlikely(*pointers[0] == 'A')) { error("HEALTH [%s]: line %zu of file '%s' adds duplicate alarm log entry %u. Using the later." - , host->hostname, line, filename, unique_id); + , host->hostname, line, filename, unique_id); *pointers[0] = 'U'; duplicate++; } @@ -270,6 +302,8 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena ae->new_value = str2l(pointers[25]); ae->old_value = str2l(pointers[26]); + ae->last_repeat = last_repeat; + char value_string[100 + 1]; freez(ae->old_value_string); freez(ae->new_value_string); @@ -339,7 +373,7 @@ inline void health_alarm_log_load(RRDHOST *host) { // ---------------------------------------------------------------------------- // health alarm log management -inline void health_alarm_log( +inline ALARM_ENTRY* health_create_alarm_entry( RRDHOST *host, uint32_t alarm_id, uint32_t alarm_event_id, @@ -398,9 +432,24 @@ inline void health_alarm_log( ae->delay_up_to_timestamp = when + delay; ae->flags |= flags; + ae->last_repeat = 0; + if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL) ae->non_clear_duration += ae->duration; + return ae; +} + +inline void health_alarm_log( + RRDHOST *host, + ALARM_ENTRY *ae +) { + debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id); + + if(unlikely(alarm_entry_isrepeating(host, ae))) { + error("Repeating alarms cannot be added to host's alarm log entries. It seems somewhere in the logic, API is being misused. Alarm id: %u", ae->alarm_id); + return; + } // link it netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock); ae->next = host->health_log.alarms; |