summaryrefslogtreecommitdiffstats
path: root/health/health_log.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--health/health_log.c71
1 files changed, 66 insertions, 5 deletions
diff --git a/health/health_log.c b/health/health_log.c
index c91cde6cb..8c0bc5c34 100644
--- a/health/health_log.c
+++ b/health/health_log.c
@@ -22,14 +22,14 @@ inline int health_alarm_log_open(RRDHOST *host) {
return -1;
}
-inline void health_alarm_log_close(RRDHOST *host) {
+static inline void health_alarm_log_close(RRDHOST *host) {
if(host->health_log_fp) {
fclose(host->health_log_fp);
host->health_log_fp = NULL;
}
}
-inline void health_log_rotate(RRDHOST *host) {
+static inline void health_log_rotate(RRDHOST *host) {
static size_t rotate_every = 0;
if(unlikely(rotate_every == 0)) {
@@ -67,10 +67,41 @@ inline void health_log_rotate(RRDHOST *host) {
}
}
-inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
+inline void health_label_log_save(RRDHOST *host) {
health_log_rotate(host);
if(likely(host->health_log_fp)) {
+ BUFFER *wb = buffer_create(1024);
+ rrdhost_check_rdlock(host);
+ netdata_rwlock_rdlock(&host->labels.labels_rwlock);
+ struct label *l=localhost->labels.head;
+ while (l != NULL) {
+ buffer_sprintf(wb,"%s=%s\t ", l->key, l->value);
+ l = l->next;
+ }
+ netdata_rwlock_unlock(&host->labels.labels_rwlock);
+
+ char *write = (char *) buffer_tostring(wb) ;
+
+ write[wb->len-2] = '\n';
+ write[wb->len-1] = '\0';
+
+ if (unlikely(fprintf(host->health_log_fp, "L\t%s"
+ , write
+ ) < 0))
+ error("HEALTH [%s]: failed to save alarm log entry to '%s'. Health data may be lost in case of abnormal restart.",
+ host->hostname, host->health_log_filename);
+ else {
+ host->health_log_entries_written++;
+ }
+
+ buffer_free(wb);
+ }
+}
+
+inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
+ health_log_rotate(host);
+ if(likely(host->health_log_fp)) {
if(unlikely(fprintf(host->health_log_fp
, "%c\t%s"
"\t%08x\t%08x\t%08x\t%08x\t%08x"
@@ -121,9 +152,33 @@ inline void health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) {
host->health_log_entries_written++;
}
}
+#ifdef ENABLE_ACLK
+ if (netdata_cloud_setting) {
+ if ((ae->new_status == RRDCALC_STATUS_WARNING || ae->new_status == RRDCALC_STATUS_CRITICAL) ||
+ ((ae->old_status == RRDCALC_STATUS_WARNING || ae->old_status == RRDCALC_STATUS_CRITICAL))) {
+ aclk_update_alarm(host, ae);
+ }
+ }
+#endif
+}
+
+static uint32_t is_valid_alarm_id(RRDHOST *host, const char *chart, const char *name, uint32_t alarm_id)
+{
+ uint32_t hash_chart = simple_hash(chart);
+ uint32_t hash_name = simple_hash(name);
+
+ ALARM_ENTRY *ae;
+ for(ae = host->health_log.alarms; ae ;ae = ae->next) {
+ if (unlikely(
+ ae->alarm_id == alarm_id && (!(ae->hash_name == hash_name && ae->hash_chart == hash_chart &&
+ !strcmp(name, ae->name) && !strcmp(chart, ae->chart))))) {
+ return 0;
+ }
+ }
+ return 1;
}
-inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename) {
+static inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filename) {
errno = 0;
char *s, *buf = mallocz(65536 + 1);
@@ -152,6 +207,9 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena
else s++;
}
+ if(likely(*pointers[0] == 'L'))
+ continue;
+
if(likely(*pointers[0] == 'U' || *pointers[0] == 'A')) {
ALARM_ENTRY *ae = NULL;
@@ -248,6 +306,8 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena
// error("HEALTH [%s]: line %zu of file '%s' provides an alarm for host '%s' but this is named '%s'.", host->hostname, line, filename, pointers[1], host->hostname);
ae->unique_id = unique_id;
+ if (!is_valid_alarm_id(host, pointers[14], pointers[13], alarm_id))
+ alarm_id = rrdcalc_get_unique_id(host, pointers[14], pointers[13], NULL);
ae->alarm_id = alarm_id;
ae->alarm_event_id = (uint32_t)strtoul(pointers[4], NULL, 16);
ae->updated_by_id = (uint32_t)strtoul(pointers[5], NULL, 16);
@@ -338,7 +398,8 @@ inline ssize_t health_alarm_log_read(RRDHOST *host, FILE *fp, const char *filena
if(!host->health_max_alarm_id) host->health_max_alarm_id = (uint32_t)now_realtime_sec();
host->health_log.next_log_id = host->health_max_unique_id + 1;
- host->health_log.next_alarm_id = host->health_max_alarm_id + 1;
+ if (unlikely(!host->health_log.next_alarm_id || host->health_log.next_alarm_id <= host->health_max_alarm_id))
+ host->health_log.next_alarm_id = host->health_max_alarm_id + 1;
debug(D_HEALTH, "HEALTH [%s]: loaded file '%s' with %zd new alarm entries, updated %zd alarms, errors %zd entries, duplicate %zd", host->hostname, filename, loaded, updated, errored, duplicate);
return loaded;