diff options
Diffstat (limited to 'health/health_log.c')
-rw-r--r-- | health/health_log.c | 71 |
1 files changed, 66 insertions, 5 deletions
diff --git a/health/health_log.c b/health/health_log.c index c91cde6c..8c0bc5c3 100644 --- a/health/health_log.c +++ b/health/health_log.c @@ -22,14 +22,14 @@ inline int health_alarm_log_open(RRDHOST *host) { return -1; } -inline void health_alarm_log_close(RRDHOST *host) { +static inline void health_alarm_log_close(RRDHOST *host) { if(host->health_log_fp) { fclose(host->health_log_fp); host->health_log_fp = NULL; } } -inline void health_log_rotate(RRDHOST *host) { +static inline void health_log_rotate(RRDHOST *host) { static size_t rotate_every = 0; if(unlikely(rotate_every == 0)) { @@ -67,10 +67,41 @@ inline void health_log_rotate(RRDHOST *host) { } } -inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { +inline void health_label_log_save(RRDHOST *host) { health_log_rotate(host); if(likely(host->health_log_fp)) { + BUFFER *wb = buffer_create(1024); + rrdhost_check_rdlock(host); + netdata_rwlock_rdlock(&host->labels.labels_rwlock); + struct label *l=localhost->labels.head; + while (l != NULL) { + buffer_sprintf(wb,"%s=%s\t ", l->key, l->value); + l = l->next; + } + netdata_rwlock_unlock(&host->labels.labels_rwlock); + + char *write = (char *) buffer_tostring(wb) ; + + write[wb->len-2] = '\n'; + write[wb->len-1] = '\0'; + + if (unlikely(fprintf(host->health_log_fp, "L\t%s" + , write + ) < 0)) + error("HEALTH [%s]: failed to save alarm log entry to '%s'. Health data may be lost in case of abnormal restart.", + host->hostname, host->health_log_filename); + else { + host->health_log_entries_written++; + } + + buffer_free(wb); + } +} + +inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { + health_log_rotate(host); + if(likely(host->health_log_fp)) { if(unlikely(fprintf(host->health_log_fp , "%c\t%s" "\t%08x\t%08x\t%08x\t%08x\t%08x" @@ -121,9 +152,33 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { host->health_log_entries_written++; } } +#ifdef ENABLE_ACLK + if (netdata_cloud_setting) { + if ((ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL) || + ((ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL))) { + aclk_update_alarm(host, ae); + } + } +#endif +} + +static uint32_t is_valid_alarm_id(RRDHOST *host, const char *chart, const char *name, uint32_t alarm_id) +{ + uint32_t hash_chart = simple_hash(chart); + uint32_t hash_name = simple_hash(name); + + ALARM_ENTRY *ae; + for(ae = host->health_log.alarms; ae ;ae = ae->next) { + if (unlikely( + ae->alarm_id == alarm_id && (!(ae->hash_name == hash_name && ae->hash_chart == hash_chart && + !strcmp(name, ae->name) && !strcmp(chart, ae->chart))))) { + return 0; + } + } + return 1; } -inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename) { +static inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename) { errno = 0; char *s, *buf = mallocz(65536 + 1); @@ -152,6 +207,9 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena else s++; } + if(likely(*pointers[0] == 'L')) + continue; + if(likely(*pointers[0] == 'U' || *pointers[0] == 'A')) { ALARM_ENTRY *ae = NULL; @@ -248,6 +306,8 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena // error("HEALTH [%s]: line %zu of file '%s' provides an alarm for host '%s' but this is named '%s'.", host->hostname, line, filename, pointers[1], host->hostname); ae->unique_id = unique_id; + if (!is_valid_alarm_id(host, pointers[14], pointers[13], alarm_id)) + alarm_id = rrdcalc_get_unique_id(host, pointers[14], pointers[13], NULL); ae->alarm_id = alarm_id; ae->alarm_event_id = (uint32_t)strtoul(pointers[4], NULL, 16); ae->updated_by_id = (uint32_t)strtoul(pointers[5], NULL, 16); @@ -338,7 +398,8 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena if(!host->health_max_alarm_id) host->health_max_alarm_id = (uint32_t)now_realtime_sec(); host->health_log.next_log_id = host->health_max_unique_id + 1; - host->health_log.next_alarm_id = host->health_max_alarm_id + 1; + if (unlikely(!host->health_log.next_alarm_id || host->health_log.next_alarm_id <= host->health_max_alarm_id)) + host->health_log.next_alarm_id = host->health_max_alarm_id + 1; debug(D_HEALTH, "HEALTH [%s]: loaded file '%s' with %zd new alarm entries, updated %zd alarms, errors %zd entries, duplicate %zd", host->hostname, filename, loaded, updated, errored, duplicate); return loaded; |