summaryrefslogtreecommitdiffstats
path: root/health/health_log.c
diff options
context:
space:
mode:
Diffstat (limited to 'health/health_log.c')
-rw-r--r--health/health_log.c57
1 files changed, 53 insertions, 4 deletions
diff --git a/health/health_log.c b/health/health_log.c
index 009e42673..c91cde6cb 100644
--- a/health/health_log.c
+++ b/health/health_log.c
@@ -79,6 +79,7 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
"\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s"
"\t%d\t%d\t%d\t%d"
"\t" CALCULATED_NUMBER_FORMAT_AUTO "\t" CALCULATED_NUMBER_FORMAT_AUTO
+ "\t%016lx"
"\n"
, (ae->flags & HEALTH_ENTRY_FLAG_SAVED)?'U':'A'
, host->hostname
@@ -112,6 +113,7 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
, ae->new_value
, ae->old_value
+ , (uint64_t)ae->last_repeat
) < 0))
error("HEALTH [%s]: failed to save alarm log entry to '%s'. Health data may be lost in case of abnormal restart.", host->hostname, host->health_log_filename);
else {
@@ -174,10 +176,40 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena
continue;
}
+ // Check if we got last_repeat field
+ time_t last_repeat = 0;
+ if(entries > 27) {
+ char* alarm_name = pointers[13];
+ last_repeat = (time_t)strtoul(pointers[27], NULL, 16);
+
+ RRDCALC *rc = alarm_max_last_repeat(host, alarm_name,simple_hash(alarm_name));
+ if (!rc) {
+ for(rc = host->alarms; rc ; rc = rc->next) {
+ RRDCALC *rdcmp = (RRDCALC *) avl_insert_lock(&(host)->alarms_idx_name, (avl *)rc);
+ if(rdcmp != rc) {
+ error("Cannot insert the alarm index ID using log %s", rc->name);
+ }
+ }
+
+ rc = alarm_max_last_repeat(host, alarm_name,simple_hash(alarm_name));
+ }
+
+ if(unlikely(rc)) {
+ if (rrdcalc_isrepeating(rc)) {
+ rc->last_repeat = last_repeat;
+ // We iterate through repeating alarm entries only to
+ // find the latest last_repeat timestamp. Otherwise,
+ // there is no need to keep them in memory.
+ continue;
+ }
+ }
+ }
+
if(unlikely(*pointers[0] == 'A')) {
// make sure it is properly numbered
if(unlikely(host->health_log.alarms && unique_id < host->health_log.alarms->unique_id)) {
- error("HEALTH [%s]: line %zu of file '%s' has alarm log entry %u in wrong order. Ignoring it.", host->hostname, line, filename, unique_id);
+ error( "HEALTH [%s]: line %zu of file '%s' has alarm log entry %u in wrong order. Ignoring it."
+ , host->hostname, line, filename, unique_id);
errored++;
continue;
}
@@ -186,11 +218,11 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena
}
else if(unlikely(*pointers[0] == 'U')) {
// find the original
- for(ae = host->health_log.alarms; ae; ae = ae->next) {
+ for(ae = host->health_log.alarms; ae ; ae = ae->next) {
if(unlikely(unique_id == ae->unique_id)) {
if(unlikely(*pointers[0] == 'A')) {
error("HEALTH [%s]: line %zu of file '%s' adds duplicate alarm log entry %u. Using the later."
- , host->hostname, line, filename, unique_id);
+ , host->hostname, line, filename, unique_id);
*pointers[0] = 'U';
duplicate++;
}
@@ -270,6 +302,8 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena
ae->new_value = str2l(pointers[25]);
ae->old_value = str2l(pointers[26]);
+ ae->last_repeat = last_repeat;
+
char value_string[100 + 1];
freez(ae->old_value_string);
freez(ae->new_value_string);
@@ -339,7 +373,7 @@ inline void health_alarm_log_load(RRDHOST *host) {
// ----------------------------------------------------------------------------
// health alarm log management
-inline void health_alarm_log(
+inline ALARM_ENTRY* health_create_alarm_entry(
RRDHOST *host,
uint32_t alarm_id,
uint32_t alarm_event_id,
@@ -398,9 +432,24 @@ inline void health_alarm_log(
ae->delay_up_to_timestamp = when + delay;
ae->flags |= flags;
+ ae->last_repeat = 0;
+
if(ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL)
ae->non_clear_duration += ae->duration;
+ return ae;
+}
+
+inline void health_alarm_log(
+ RRDHOST *host,
+ ALARM_ENTRY *ae
+) {
+ debug(D_HEALTH, "Health adding alarm log entry with id: %u", ae->unique_id);
+
+ if(unlikely(alarm_entry_isrepeating(host, ae))) {
+ error("Repeating alarms cannot be added to host's alarm log entries. It seems somewhere in the logic, API is being misused. Alarm id: %u", ae->alarm_id);
+ return;
+ }
// link it
netdata_rwlock_wrlock(&host->health_log.alarm_log_rwlock);
ae->next = host->health_log.alarms;