summaryrefslogtreecommitdiffstats
path: root/database/sqlite/sqlite_aclk_alert.c
diff options
context:
space:
mode:
Diffstat (limited to 'database/sqlite/sqlite_aclk_alert.c')
-rw-r--r--database/sqlite/sqlite_aclk_alert.c101
1 files changed, 54 insertions, 47 deletions
diff --git a/database/sqlite/sqlite_aclk_alert.c b/database/sqlite/sqlite_aclk_alert.c
index 1e5bd0b74..52d343acb 100644
--- a/database/sqlite/sqlite_aclk_alert.c
+++ b/database/sqlite/sqlite_aclk_alert.c
@@ -75,7 +75,7 @@ static inline bool is_event_from_alert_variable_config(uint32_t unique_id, char
return ret;
}
-#define MAX_REMOVED_PERIOD 86400
+#define MAX_REMOVED_PERIOD 604800 //a week
//decide if some events should be sent or not
#define SQL_SELECT_ALERT_BY_ID "SELECT hl.new_status, hl.config_hash_id, hl.unique_id FROM health_log_%s hl, aclk_alert_%s aa " \
@@ -255,6 +255,29 @@ int rrdcalc_status_to_proto_enum(RRDCALC_STATUS status)
#endif
}
+static inline char *sqlite3_uuid_unparse_strdupz(sqlite3_stmt *res, int iCol) {
+ char uuid_str[UUID_STR_LEN];
+
+ if(sqlite3_column_type(res, iCol) == SQLITE_NULL)
+ uuid_str[0] = '\0';
+ else
+ uuid_unparse_lower(*((uuid_t *) sqlite3_column_blob(res, iCol)), uuid_str);
+
+ return strdupz(uuid_str);
+}
+
+static inline char *sqlite3_text_strdupz_empty(sqlite3_stmt *res, int iCol) {
+ char *ret;
+
+ if(sqlite3_column_type(res, iCol) == SQLITE_NULL)
+ ret = "";
+ else
+ ret = (char *)sqlite3_column_text(res, iCol);
+
+ return strdupz(ret);
+}
+
+
void aclk_push_alert_event(struct aclk_sync_host_config *wc)
{
#ifndef ENABLE_ACLK
@@ -285,7 +308,7 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
buffer_sprintf(sql, "select aa.sequence_id, hl.unique_id, hl.alarm_id, hl.config_hash_id, hl.updated_by_id, hl.when_key, " \
" hl.duration, hl.non_clear_duration, hl.flags, hl.exec_run_timestamp, hl.delay_up_to_timestamp, hl.name, " \
" hl.chart, hl.family, hl.exec, hl.recipient, hl.source, hl.units, hl.info, hl.exec_code, hl.new_status, " \
- " hl.old_status, hl.delay, hl.new_value, hl.old_value, hl.last_repeat, hl.chart_context " \
+ " hl.old_status, hl.delay, hl.new_value, hl.old_value, hl.last_repeat, hl.chart_context, hl.transition_id, hl.alarm_event_id " \
" from health_log_%s hl, aclk_alert_%s aa " \
" where hl.unique_id = aa.alert_unique_id and aa.date_submitted is null " \
" order by aa.sequence_id asc limit %d;", wc->uuid_str, wc->uuid_str, limit);
@@ -321,7 +344,6 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
}
}
- char uuid_str[GUID_LEN + 1];
uint64_t first_sequence_id = 0;
uint64_t last_sequence_id = 0;
static __thread uint64_t log_first_sequence_id = 0;
@@ -343,8 +365,7 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
//alarm_log.sequence_id = (uint64_t) sqlite3_column_int64(res, 0);
alarm_log.when = (time_t) sqlite3_column_int64(res, 5);
- uuid_unparse_lower(*((uuid_t *) sqlite3_column_blob(res, 3)), uuid_str);
- alarm_log.config_hash = strdupz((char *)uuid_str);
+ alarm_log.config_hash = sqlite3_uuid_unparse_strdupz(res, 3);
alarm_log.utc_offset = wc->host->utc_offset;
alarm_log.timezone = strdupz(rrdhost_abbrev_timezone(wc->host));
@@ -387,13 +408,12 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc)
alarm_log.old_value = (NETDATA_DOUBLE) sqlite3_column_double(res, 24);
alarm_log.updated = (sqlite3_column_int64(res, 8) & HEALTH_ENTRY_FLAG_UPDATED) ? 1 : 0;
- alarm_log.rendered_info = sqlite3_column_type(res, 18) == SQLITE_NULL ?
- strdupz((char *)"") :
- strdupz((char *)sqlite3_column_text(res, 18));
+ alarm_log.rendered_info = sqlite3_text_strdupz_empty(res, 18);
- alarm_log.chart_context = sqlite3_column_type(res, 26) == SQLITE_NULL ?
- strdupz((char *)"") :
- strdupz((char *)sqlite3_column_text(res, 26));
+ alarm_log.chart_context = sqlite3_text_strdupz_empty(res, 26);
+ alarm_log.transition_id = sqlite3_uuid_unparse_strdupz(res, 27);
+
+ alarm_log.event_id = (time_t) sqlite3_column_int64(res, 28);
aclk_send_alarm_log_entry(&alarm_log);
@@ -463,7 +483,7 @@ void aclk_push_alert_events_for_all_hosts(void)
void sql_queue_existing_alerts_to_aclk(RRDHOST *host)
{
- char uuid_str[GUID_LEN + 1];
+ char uuid_str[UUID_STR_LEN];
uuid_unparse_lower_fix(&host->host_uuid, uuid_str);
BUFFER *sql = buffer_create(1024, &netdata_buffers_statistics.buffers_sqlite);
@@ -747,8 +767,10 @@ void aclk_process_send_alarm_snapshot(char *node_id, char *claim_id __maybe_unus
void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_ENTRY *ae, RRDHOST *host)
{
char *edit_command = ae->source ? health_edit_command_from_source(ae_source(ae)) : strdupz("UNKNOWN=0=UNKNOWN");
- char config_hash_id[GUID_LEN + 1];
+ char config_hash_id[UUID_STR_LEN];
uuid_unparse_lower(ae->config_hash_id, config_hash_id);
+ char transition_id[UUID_STR_LEN];
+ uuid_unparse_lower(ae->transition_id, transition_id);
alarm_log->chart = strdupz(ae_chart_name(ae));
alarm_log->name = strdupz(ae_name(ae));
@@ -790,6 +812,9 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN
alarm_log->rendered_info = strdupz(ae_info(ae));
alarm_log->chart_context = strdupz(ae_chart_context(ae));
+ alarm_log->transition_id = strdupz((char *)transition_id);
+ alarm_log->event_id = (uint64_t) ae->alarm_event_id;
+
freez(edit_command);
}
#endif
@@ -939,18 +964,14 @@ void aclk_push_alert_snapshot_event(char *node_id __maybe_unused)
#endif
}
-#define SQL_DELETE_ALERT_ENTRIES "DELETE FROM aclk_alert_%s WHERE filtered_alert_unique_id NOT IN (SELECT unique_id FROM health_log_%s);"
-
+#define SQL_DELETE_ALERT_ENTRIES "DELETE FROM aclk_alert_%s WHERE filtered_alert_unique_id + %d < UNIXEPOCH();"
void sql_aclk_alert_clean_dead_entries(RRDHOST *host)
{
- if (!claimed())
- return;
-
char uuid_str[UUID_STR_LEN];
uuid_unparse_lower_fix(&host->host_uuid, uuid_str);
- char sql[512];
- snprintfz(sql,511,SQL_DELETE_ALERT_ENTRIES, uuid_str, uuid_str);
+ char sql[ACLK_SYNC_QUERY_SIZE];
+ snprintfz(sql, ACLK_SYNC_QUERY_SIZE - 1, SQL_DELETE_ALERT_ENTRIES, uuid_str, MAX_REMOVED_PERIOD);
char *err_msg = NULL;
int rc = sqlite3_exec_monitored(db_meta, sql, NULL, NULL, &err_msg);
@@ -1038,6 +1059,7 @@ static inline int compare_active_alerts(const void * a, const void * b) {
return strcmp(active_alerts_a->name, active_alerts_b->name);
}
+#define BATCH_ALLOCATED 10
void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused)
{
#ifdef ENABLE_ACLK
@@ -1047,21 +1069,18 @@ void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused)
return;
}
- //TODO: make sure all pending events are sent.
if (rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS)) {
//postpone checkpoint send
- wc->alert_checkpoint_req++;
+ wc->alert_checkpoint_req+=3;
log_access("ACLK REQ [%s (N/A)]: ALERTS CHECKPOINT POSTPONED", rrdhost_hostname(host));
return;
}
- //TODO: lock rc here, or make sure it's called when health decides
- //count them
RRDCALC *rc;
uint32_t cnt = 0;
size_t len = 0;
- active_alerts_t *active_alerts = NULL;
+ active_alerts_t *active_alerts = callocz(BATCH_ALLOCATED, sizeof(active_alerts_t));
foreach_rrdcalc_in_rrdhost_read(host, rc) {
if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
continue;
@@ -1069,33 +1088,21 @@ void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused)
if (rc->status == RRDCALC_STATUS_WARNING ||
rc->status == RRDCALC_STATUS_CRITICAL) {
+ if (cnt && !(cnt % BATCH_ALLOCATED)) {
+ active_alerts = reallocz(active_alerts, (BATCH_ALLOCATED * ((cnt / BATCH_ALLOCATED) + 1)) * sizeof(active_alerts_t));
+ }
+
+ active_alerts[cnt].name = (char *)rrdcalc_name(rc);
+ len += string_strlen(rc->name);
+ active_alerts[cnt].chart = (char *)rrdcalc_chart_name(rc);
+ len += string_strlen(rc->chart);
+ active_alerts[cnt].status = rc->status;
+ len++;
cnt++;
}
}
foreach_rrdcalc_in_rrdhost_done(rc);
- if (cnt) {
- active_alerts = callocz(cnt, sizeof(active_alerts_t));
- cnt = 0;
- foreach_rrdcalc_in_rrdhost_read(host, rc) {
- if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec))
- continue;
-
- if (rc->status == RRDCALC_STATUS_WARNING ||
- rc->status == RRDCALC_STATUS_CRITICAL) {
-
- active_alerts[cnt].name = (char *)rrdcalc_name(rc);
- len += string_strlen(rc->name);
- active_alerts[cnt].chart = (char *)rrdcalc_chart_name(rc);
- len += string_strlen(rc->chart);
- active_alerts[cnt].status = rc->status;
- len++;
- cnt++;
- }
- }
- foreach_rrdcalc_in_rrdhost_done(rc);
- }
-
BUFFER *alarms_to_hash;
if (cnt) {
qsort (active_alerts, cnt, sizeof(active_alerts_t), compare_active_alerts);