From 6cf8f2d5174a53f582e61d715edbb88d6e3367cc Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 14 Jun 2023 21:20:33 +0200 Subject: Adding upstream version 1.40.0. Signed-off-by: Daniel Baumann --- database/sqlite/sqlite_aclk_alert.c | 101 +++++++++++++++++++----------------- 1 file changed, 54 insertions(+), 47 deletions(-) (limited to 'database/sqlite/sqlite_aclk_alert.c') diff --git a/database/sqlite/sqlite_aclk_alert.c b/database/sqlite/sqlite_aclk_alert.c index 1e5bd0b74..52d343acb 100644 --- a/database/sqlite/sqlite_aclk_alert.c +++ b/database/sqlite/sqlite_aclk_alert.c @@ -75,7 +75,7 @@ static inline bool is_event_from_alert_variable_config(uint32_t unique_id, char return ret; } -#define MAX_REMOVED_PERIOD 86400 +#define MAX_REMOVED_PERIOD 604800 //a week //decide if some events should be sent or not #define SQL_SELECT_ALERT_BY_ID "SELECT hl.new_status, hl.config_hash_id, hl.unique_id FROM health_log_%s hl, aclk_alert_%s aa " \ @@ -255,6 +255,29 @@ int rrdcalc_status_to_proto_enum(RRDCALC_STATUS status) #endif } +static inline char *sqlite3_uuid_unparse_strdupz(sqlite3_stmt *res, int iCol) { + char uuid_str[UUID_STR_LEN]; + + if(sqlite3_column_type(res, iCol) == SQLITE_NULL) + uuid_str[0] = '\0'; + else + uuid_unparse_lower(*((uuid_t *) sqlite3_column_blob(res, iCol)), uuid_str); + + return strdupz(uuid_str); +} + +static inline char *sqlite3_text_strdupz_empty(sqlite3_stmt *res, int iCol) { + char *ret; + + if(sqlite3_column_type(res, iCol) == SQLITE_NULL) + ret = ""; + else + ret = (char *)sqlite3_column_text(res, iCol); + + return strdupz(ret); +} + + void aclk_push_alert_event(struct aclk_sync_host_config *wc) { #ifndef ENABLE_ACLK @@ -285,7 +308,7 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc) buffer_sprintf(sql, "select aa.sequence_id, hl.unique_id, hl.alarm_id, hl.config_hash_id, hl.updated_by_id, hl.when_key, " \ " hl.duration, hl.non_clear_duration, hl.flags, hl.exec_run_timestamp, hl.delay_up_to_timestamp, hl.name, " \ " hl.chart, hl.family, hl.exec, hl.recipient, hl.source, hl.units, hl.info, hl.exec_code, hl.new_status, " \ - " hl.old_status, hl.delay, hl.new_value, hl.old_value, hl.last_repeat, hl.chart_context " \ + " hl.old_status, hl.delay, hl.new_value, hl.old_value, hl.last_repeat, hl.chart_context, hl.transition_id, hl.alarm_event_id " \ " from health_log_%s hl, aclk_alert_%s aa " \ " where hl.unique_id = aa.alert_unique_id and aa.date_submitted is null " \ " order by aa.sequence_id asc limit %d;", wc->uuid_str, wc->uuid_str, limit); @@ -321,7 +344,6 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc) } } - char uuid_str[GUID_LEN + 1]; uint64_t first_sequence_id = 0; uint64_t last_sequence_id = 0; static __thread uint64_t log_first_sequence_id = 0; @@ -343,8 +365,7 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc) //alarm_log.sequence_id = (uint64_t) sqlite3_column_int64(res, 0); alarm_log.when = (time_t) sqlite3_column_int64(res, 5); - uuid_unparse_lower(*((uuid_t *) sqlite3_column_blob(res, 3)), uuid_str); - alarm_log.config_hash = strdupz((char *)uuid_str); + alarm_log.config_hash = sqlite3_uuid_unparse_strdupz(res, 3); alarm_log.utc_offset = wc->host->utc_offset; alarm_log.timezone = strdupz(rrdhost_abbrev_timezone(wc->host)); @@ -387,13 +408,12 @@ void aclk_push_alert_event(struct aclk_sync_host_config *wc) alarm_log.old_value = (NETDATA_DOUBLE) sqlite3_column_double(res, 24); alarm_log.updated = (sqlite3_column_int64(res, 8) & HEALTH_ENTRY_FLAG_UPDATED) ? 1 : 0; - alarm_log.rendered_info = sqlite3_column_type(res, 18) == SQLITE_NULL ? - strdupz((char *)"") : - strdupz((char *)sqlite3_column_text(res, 18)); + alarm_log.rendered_info = sqlite3_text_strdupz_empty(res, 18); - alarm_log.chart_context = sqlite3_column_type(res, 26) == SQLITE_NULL ? - strdupz((char *)"") : - strdupz((char *)sqlite3_column_text(res, 26)); + alarm_log.chart_context = sqlite3_text_strdupz_empty(res, 26); + alarm_log.transition_id = sqlite3_uuid_unparse_strdupz(res, 27); + + alarm_log.event_id = (time_t) sqlite3_column_int64(res, 28); aclk_send_alarm_log_entry(&alarm_log); @@ -463,7 +483,7 @@ void aclk_push_alert_events_for_all_hosts(void) void sql_queue_existing_alerts_to_aclk(RRDHOST *host) { - char uuid_str[GUID_LEN + 1]; + char uuid_str[UUID_STR_LEN]; uuid_unparse_lower_fix(&host->host_uuid, uuid_str); BUFFER *sql = buffer_create(1024, &netdata_buffers_statistics.buffers_sqlite); @@ -747,8 +767,10 @@ void aclk_process_send_alarm_snapshot(char *node_id, char *claim_id __maybe_unus void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_ENTRY *ae, RRDHOST *host) { char *edit_command = ae->source ? health_edit_command_from_source(ae_source(ae)) : strdupz("UNKNOWN=0=UNKNOWN"); - char config_hash_id[GUID_LEN + 1]; + char config_hash_id[UUID_STR_LEN]; uuid_unparse_lower(ae->config_hash_id, config_hash_id); + char transition_id[UUID_STR_LEN]; + uuid_unparse_lower(ae->transition_id, transition_id); alarm_log->chart = strdupz(ae_chart_name(ae)); alarm_log->name = strdupz(ae_name(ae)); @@ -790,6 +812,9 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN alarm_log->rendered_info = strdupz(ae_info(ae)); alarm_log->chart_context = strdupz(ae_chart_context(ae)); + alarm_log->transition_id = strdupz((char *)transition_id); + alarm_log->event_id = (uint64_t) ae->alarm_event_id; + freez(edit_command); } #endif @@ -939,18 +964,14 @@ void aclk_push_alert_snapshot_event(char *node_id __maybe_unused) #endif } -#define SQL_DELETE_ALERT_ENTRIES "DELETE FROM aclk_alert_%s WHERE filtered_alert_unique_id NOT IN (SELECT unique_id FROM health_log_%s);" - +#define SQL_DELETE_ALERT_ENTRIES "DELETE FROM aclk_alert_%s WHERE filtered_alert_unique_id + %d < UNIXEPOCH();" void sql_aclk_alert_clean_dead_entries(RRDHOST *host) { - if (!claimed()) - return; - char uuid_str[UUID_STR_LEN]; uuid_unparse_lower_fix(&host->host_uuid, uuid_str); - char sql[512]; - snprintfz(sql,511,SQL_DELETE_ALERT_ENTRIES, uuid_str, uuid_str); + char sql[ACLK_SYNC_QUERY_SIZE]; + snprintfz(sql, ACLK_SYNC_QUERY_SIZE - 1, SQL_DELETE_ALERT_ENTRIES, uuid_str, MAX_REMOVED_PERIOD); char *err_msg = NULL; int rc = sqlite3_exec_monitored(db_meta, sql, NULL, NULL, &err_msg); @@ -1038,6 +1059,7 @@ static inline int compare_active_alerts(const void * a, const void * b) { return strcmp(active_alerts_a->name, active_alerts_b->name); } +#define BATCH_ALLOCATED 10 void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused) { #ifdef ENABLE_ACLK @@ -1047,21 +1069,18 @@ void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused) return; } - //TODO: make sure all pending events are sent. if (rrdhost_flag_check(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS)) { //postpone checkpoint send - wc->alert_checkpoint_req++; + wc->alert_checkpoint_req+=3; log_access("ACLK REQ [%s (N/A)]: ALERTS CHECKPOINT POSTPONED", rrdhost_hostname(host)); return; } - //TODO: lock rc here, or make sure it's called when health decides - //count them RRDCALC *rc; uint32_t cnt = 0; size_t len = 0; - active_alerts_t *active_alerts = NULL; + active_alerts_t *active_alerts = callocz(BATCH_ALLOCATED, sizeof(active_alerts_t)); foreach_rrdcalc_in_rrdhost_read(host, rc) { if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) continue; @@ -1069,33 +1088,21 @@ void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused) if (rc->status == RRDCALC_STATUS_WARNING || rc->status == RRDCALC_STATUS_CRITICAL) { + if (cnt && !(cnt % BATCH_ALLOCATED)) { + active_alerts = reallocz(active_alerts, (BATCH_ALLOCATED * ((cnt / BATCH_ALLOCATED) + 1)) * sizeof(active_alerts_t)); + } + + active_alerts[cnt].name = (char *)rrdcalc_name(rc); + len += string_strlen(rc->name); + active_alerts[cnt].chart = (char *)rrdcalc_chart_name(rc); + len += string_strlen(rc->chart); + active_alerts[cnt].status = rc->status; + len++; cnt++; } } foreach_rrdcalc_in_rrdhost_done(rc); - if (cnt) { - active_alerts = callocz(cnt, sizeof(active_alerts_t)); - cnt = 0; - foreach_rrdcalc_in_rrdhost_read(host, rc) { - if(unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) - continue; - - if (rc->status == RRDCALC_STATUS_WARNING || - rc->status == RRDCALC_STATUS_CRITICAL) { - - active_alerts[cnt].name = (char *)rrdcalc_name(rc); - len += string_strlen(rc->name); - active_alerts[cnt].chart = (char *)rrdcalc_chart_name(rc); - len += string_strlen(rc->chart); - active_alerts[cnt].status = rc->status; - len++; - cnt++; - } - } - foreach_rrdcalc_in_rrdhost_done(rc); - } - BUFFER *alarms_to_hash; if (cnt) { qsort (active_alerts, cnt, sizeof(active_alerts_t), compare_active_alerts); -- cgit v1.2.3