diff options
Diffstat (limited to 'src/database/sqlite/sqlite_health.c')
-rw-r--r-- | src/database/sqlite/sqlite_health.c | 263 |
1 files changed, 181 insertions, 82 deletions
diff --git a/src/database/sqlite/sqlite_health.c b/src/database/sqlite/sqlite_health.c index 51e38d05a..a632fd494 100644 --- a/src/database/sqlite/sqlite_health.c +++ b/src/database/sqlite/sqlite_health.c @@ -55,15 +55,137 @@ done: } /* Health related SQL queries - Inserts an entry in the table + * + * Inserts an entry in the tables + * alert_queue + * health_log + * health_log_detail + * */ +int calculate_delay(RRDCALC_STATUS old_status, RRDCALC_STATUS new_status) +{ + int delay = ALERT_TRANSITION_DELAY_NONE; + switch(old_status) { + case RRDCALC_STATUS_REMOVED: + switch (new_status) { + case RRDCALC_STATUS_UNINITIALIZED: + delay = ALERT_TRANSITION_DELAY_LONG; + break; + case RRDCALC_STATUS_CLEAR: + delay = ALERT_TRANSITION_DELAY_SHORT; + break; + default: + delay = ALERT_TRANSITION_DELAY_NONE; + break; + } + break; + case RRDCALC_STATUS_UNDEFINED: + case RRDCALC_STATUS_UNINITIALIZED: + switch (new_status) { + case RRDCALC_STATUS_REMOVED: + case RRDCALC_STATUS_UNINITIALIZED: + case RRDCALC_STATUS_UNDEFINED: + delay = ALERT_TRANSITION_DELAY_LONG; + break; + case RRDCALC_STATUS_CLEAR: + delay = ALERT_TRANSITION_DELAY_SHORT; + break; + default: + delay = ALERT_TRANSITION_DELAY_NONE; + break; + } + break; + case RRDCALC_STATUS_CLEAR: + switch (new_status) { + case RRDCALC_STATUS_REMOVED: + case RRDCALC_STATUS_UNINITIALIZED: + case RRDCALC_STATUS_UNDEFINED: + delay = ALERT_TRANSITION_DELAY_LONG; + break; + case RRDCALC_STATUS_WARNING: + case RRDCALC_STATUS_CRITICAL: + default: + delay = ALERT_TRANSITION_DELAY_NONE; + break; + + } + break; + case RRDCALC_STATUS_WARNING: + case RRDCALC_STATUS_CRITICAL: + switch (new_status) { + case RRDCALC_STATUS_REMOVED: + case RRDCALC_STATUS_UNINITIALIZED: + case RRDCALC_STATUS_UNDEFINED: + delay = ALERT_TRANSITION_DELAY_LONG; + break; + case RRDCALC_STATUS_CLEAR: + delay = ALERT_TRANSITION_DELAY_SHORT; + break; + default: + delay = ALERT_TRANSITION_DELAY_NONE; + break; + } + break; + default: + delay = ALERT_TRANSITION_DELAY_NONE; + break; + } + return delay; +} + +#ifdef ENABLE_ACLK +#define SQL_INSERT_ALERT_PENDING_QUEUE \ + "INSERT INTO alert_queue (host_id, health_log_id, unique_id, alarm_id, status, date_scheduled)" \ + " VALUES (@host_id, @health_log_id, @unique_id, @alarm_id, @new_status, UNIXEPOCH() + @delay)" \ + " ON CONFLICT (host_id, health_log_id, alarm_id)" \ + " DO UPDATE SET status = excluded.status, unique_id = excluded.unique_id, " \ + " date_scheduled = MIN(date_scheduled, excluded.date_scheduled)" + +static void insert_alert_queue( + RRDHOST *host, + uint64_t health_log_id, + int64_t unique_id, + uint32_t alarm_id, + RRDCALC_STATUS old_status, + RRDCALC_STATUS new_status) +{ + static __thread sqlite3_stmt *res = NULL; + int rc; + + if (!host->aclk_config) + return; + + if (!PREPARE_COMPILED_STATEMENT(db_meta, SQL_INSERT_ALERT_PENDING_QUEUE, &res)) + return; + + int submit_delay = calculate_delay(old_status, new_status); + + int param = 0; + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); + SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, (sqlite3_int64)health_log_id)); + SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, unique_id)); + SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, alarm_id)); + SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, new_status)); + SQLITE_BIND_FAIL(done, sqlite3_bind_int(res, ++param, submit_delay)); + + param = 0; + rc = execute_insert(res); + if (rc != SQLITE_DONE) + error_report( + "HEALTH [%s]: Failed to execute insert_alert_queue, rc = %d", rrdhost_hostname(host), rc); + +done: + REPORT_BIND_FAIL(res, param); + SQLITE_RESET(res); +} +#endif #define SQL_INSERT_HEALTH_LOG_DETAIL \ "INSERT INTO health_log_detail (health_log_id, unique_id, alarm_id, alarm_event_id, " \ "updated_by_id, updates_id, when_key, duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, " \ "info, exec_code, new_status, old_status, delay, new_value, old_value, last_repeat, transition_id, global_id, summary) " \ - "VALUES (@health_log_id,@unique_id,@alarm_id,@alarm_event_id,@updated_by_id,@updates_id,@when_key,@duration," \ + " VALUES (@health_log_id,@unique_id,@alarm_id,@alarm_event_id,@updated_by_id,@updates_id,@when_key,@duration," \ "@non_clear_duration,@flags,@exec_run_timestamp,@delay_up_to_timestamp, @info,@exec_code,@new_status,@old_status," \ "@delay,@new_value,@old_value,@last_repeat,@transition_id,@global_id,@summary)" @@ -150,6 +272,11 @@ static void sql_health_alarm_log_insert(RRDHOST *host, ALARM_ENTRY *ae) if (rc == SQLITE_ROW) { health_log_id = (size_t)sqlite3_column_int64(res, 0); sql_health_alarm_log_insert_detail(host, health_log_id, ae); +#ifdef ENABLE_ACLK + if (netdata_cloud_enabled) + insert_alert_queue( + host, health_log_id, (int64_t)ae->unique_id, (int64_t)ae->alarm_id, ae->old_status, ae->new_status); +#endif } else error_report("HEALTH [%s]: Failed to execute SQL_INSERT_HEALTH_LOG, rc = %d", rrdhost_hostname(host), rc); @@ -162,14 +289,8 @@ void sql_health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) { if (ae->flags & HEALTH_ENTRY_FLAG_SAVED) sql_health_alarm_log_update(host, ae); - else { + else sql_health_alarm_log_insert(host, ae); -#ifdef ENABLE_ACLK - if (netdata_cloud_enabled) { - sql_queue_alarm_to_aclk(host, ae, false); - } -#endif - } } /* @@ -179,44 +300,18 @@ void sql_health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae) * */ -#define SQL_CLEANUP_HEALTH_LOG_DETAIL_NOT_CLAIMED \ +#define SQL_CLEANUP_HEALTH_LOG_DETAIL \ "DELETE FROM health_log_detail WHERE health_log_id IN " \ - "(SELECT health_log_id FROM health_log WHERE host_id = @host_id) AND when_key < UNIXEPOCH() - @history " \ - "AND updated_by_id <> 0 AND transition_id NOT IN " \ - "(SELECT last_transition_id FROM health_log hl WHERE hl.host_id = @host_id)" - -#define SQL_CLEANUP_HEALTH_LOG_DETAIL_CLAIMED(guid) \ - "DELETE from health_log_detail WHERE unique_id NOT IN " \ - "(SELECT filtered_alert_unique_id FROM aclk_alert_%s) " \ - "AND unique_id IN (SELECT hld.unique_id FROM health_log hl, health_log_detail hld WHERE " \ - "hl.host_id = @host_id AND hl.health_log_id = hld.health_log_id) " \ - "AND health_log_id IN (SELECT health_log_id FROM health_log WHERE host_id = @host_id) " \ - "AND when_key < unixepoch() - @history " \ - "AND updated_by_id <> 0 AND transition_id NOT IN " \ - "(SELECT last_transition_id FROM health_log hl WHERE hl.host_id = @host_id)", \ - guid - -void sql_health_alarm_log_cleanup(RRDHOST *host, bool claimed) { + " (SELECT health_log_id FROM health_log WHERE host_id = @host_id) AND when_key < UNIXEPOCH() - @history " \ + " AND updated_by_id <> 0 AND transition_id NOT IN " \ + " (SELECT last_transition_id FROM health_log hl WHERE hl.host_id = @host_id)" + +void sql_health_alarm_log_cleanup(RRDHOST *host) +{ sqlite3_stmt *res = NULL; int rc; - char command[MAX_HEALTH_SQL_SIZE + 1]; - - REQUIRE_DB(db_meta); - - char uuid_str[UUID_STR_LEN]; - uuid_unparse_lower_fix(&host->host_uuid, uuid_str); - snprintfz(command, sizeof(command) - 1, "aclk_alert_%s", uuid_str); - bool aclk_table_exists = table_exists_in_database(db_meta, command); - - char *sql = SQL_CLEANUP_HEALTH_LOG_DETAIL_NOT_CLAIMED; - - if (claimed && aclk_table_exists) { - snprintfz(command, sizeof(command) - 1, SQL_CLEANUP_HEALTH_LOG_DETAIL_CLAIMED(uuid_str)); - sql = command; - } - - if (!PREPARE_STATEMENT(db_meta, sql, &res)) + if (!PREPARE_STATEMENT(db_meta, SQL_CLEANUP_HEALTH_LOG_DETAIL, &res)) return; int param = 0; @@ -228,33 +323,21 @@ void sql_health_alarm_log_cleanup(RRDHOST *host, bool claimed) { if (unlikely(rc != SQLITE_DONE)) error_report("Failed to cleanup health log detail table, rc = %d", rc); - if (aclk_table_exists) - sql_aclk_alert_clean_dead_entries(host); - done: REPORT_BIND_FAIL(res, param); SQLITE_FINALIZE(res); } -#define SQL_INJECT_REMOVED \ - "INSERT INTO health_log_detail (health_log_id, unique_id, alarm_id, alarm_event_id, updated_by_id, updates_id, when_key, " \ - "duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, info, exec_code, new_status, old_status, " \ - "delay, new_value, old_value, last_repeat, transition_id, global_id, summary) " \ - "SELECT health_log_id, ?1, ?2, ?3, 0, ?4, UNIXEPOCH(), 0, 0, flags, exec_run_timestamp, UNIXEPOCH(), info, exec_code, -2, " \ - "new_status, delay, NULL, new_value, 0, ?5, NOW_USEC(0), summary FROM health_log_detail WHERE unique_id = ?6 AND transition_id = ?7" - -#define SQL_INJECT_REMOVED_UPDATE_DETAIL \ - "UPDATE health_log_detail SET flags = flags | ?1, updated_by_id = ?2 WHERE unique_id = ?3 AND transition_id = ?4" - -#define SQL_INJECT_REMOVED_UPDATE_LOG \ - "UPDATE health_log SET last_transition_id = ?1 WHERE alarm_id = ?2 AND last_transition_id = ?3 AND host_id = ?4" +#define SQL_UPDATE_TRANSITION_IN_HEALTH_LOG \ + "UPDATE health_log SET last_transition_id = @transition WHERE alarm_id = @alarm_id AND " \ + " last_transition_id = @prev_trans AND host_id = @host_id" -bool sql_update_removed_in_health_log(RRDHOST *host, uint32_t alarm_id, nd_uuid_t *transition_id, nd_uuid_t *last_transition) +bool sql_update_transition_in_health_log(RRDHOST *host, uint32_t alarm_id, nd_uuid_t *transition_id, nd_uuid_t *last_transition) { int rc = 0; sqlite3_stmt *res; - if (!PREPARE_STATEMENT(db_meta, SQL_INJECT_REMOVED_UPDATE_LOG, &res)) + if (!PREPARE_STATEMENT(db_meta, SQL_UPDATE_TRANSITION_IN_HEALTH_LOG, &res)) return false; int param = 0; @@ -275,12 +358,16 @@ done: return (param == 0 && rc == SQLITE_DONE); } -bool sql_update_removed_in_health_log_detail(uint32_t unique_id, uint32_t max_unique_id, nd_uuid_t *prev_transition_id) +#define SQL_SET_UPDATED_BY_IN_HEALTH_LOG_DETAIL \ + "UPDATE health_log_detail SET flags = flags | @flag, updated_by_id = @updated_by WHERE" \ + " unique_id = @unique_id AND transition_id = @transition_id" + +bool sql_set_updated_by_in_health_log_detail(uint32_t unique_id, uint32_t max_unique_id, nd_uuid_t *prev_transition_id) { int rc = 0; sqlite3_stmt *res; - if (!PREPARE_STATEMENT(db_meta, SQL_INJECT_REMOVED_UPDATE_DETAIL, &res)) + if (!PREPARE_STATEMENT(db_meta, SQL_SET_UPDATED_BY_IN_HEALTH_LOG_DETAIL, &res)) return false; int param = 0; @@ -301,7 +388,16 @@ done: return (param == 0 && rc == SQLITE_DONE); } -void sql_inject_removed_status( +#define SQL_INJECT_REMOVED \ + "INSERT INTO health_log_detail (health_log_id, unique_id, alarm_id, alarm_event_id, updated_by_id, updates_id, when_key, " \ + "duration, non_clear_duration, flags, exec_run_timestamp, delay_up_to_timestamp, info, exec_code, new_status, old_status, " \ + "delay, new_value, old_value, last_repeat, transition_id, global_id, summary) " \ + "SELECT health_log_id, @max_unique_id, @alarm_id, @alarm_event_id, 0, @unique_id, UNIXEPOCH(), 0, 0, flags, " \ + " exec_run_timestamp, UNIXEPOCH(), info, exec_code, -2, " \ + " new_status, delay, NULL, new_value, 0, @transition_id, NOW_USEC(0), summary FROM health_log_detail " \ + " WHERE unique_id = @unique_id AND transition_id = @last_transition_id RETURNING health_log_id, old_status" + +static void sql_inject_removed_status( RRDHOST *host, uint32_t alarm_id, uint32_t alarm_event_id, @@ -326,19 +422,27 @@ void sql_inject_removed_status( SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, (sqlite3_int64) alarm_event_id + 1)); SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, (sqlite3_int64) unique_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &transition_id, sizeof(transition_id), SQLITE_STATIC)); - SQLITE_BIND_FAIL(done, sqlite3_bind_int64(res, ++param, (sqlite3_int64) unique_id)); SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, last_transition, sizeof(*last_transition), SQLITE_STATIC)); param = 0; - int rc = execute_insert(res); - if (rc == SQLITE_DONE) { + //int rc = execute_insert(res); + while (sqlite3_step_monitored(res) == SQLITE_ROW) { //update the old entry in health_log_detail - sql_update_removed_in_health_log_detail(unique_id, max_unique_id, last_transition); + sql_set_updated_by_in_health_log_detail(unique_id, max_unique_id, last_transition); //update the old entry in health_log - sql_update_removed_in_health_log(host, alarm_id, &transition_id, last_transition); + sql_update_transition_in_health_log(host, alarm_id, &transition_id, last_transition); + +#ifdef ENABLE_ACLK + if (netdata_cloud_enabled) { + int64_t health_log_id = sqlite3_column_int64(res, 0); + RRDCALC_STATUS old_status = (RRDCALC_STATUS)sqlite3_column_double(res, 1); + insert_alert_queue( + host, health_log_id, (int64_t)unique_id, (int64_t)alarm_id, old_status, RRDCALC_STATUS_REMOVED); + } +#endif } - else - error_report("HEALTH [N/A]: Failed to execute SQL_INJECT_REMOVED, rc = %d", rc); + //else + // error_report("HEALTH [N/A]: Failed to execute SQL_INJECT_REMOVED, rc = %d", rc); done: REPORT_BIND_FAIL(res, param); @@ -461,7 +565,7 @@ void sql_alert_cleanup(bool cli) { UNUSED(cli); - errno = 0; + errno_clear(); if (sql_init_meta_database(DB_CHECK_NONE, 0)) { netdata_log_error("Failed to open database"); return; @@ -489,7 +593,6 @@ void sql_alert_cleanup(bool cli) void sql_health_alarm_log_load(RRDHOST *host) { sqlite3_stmt *res = NULL; - int ret; ssize_t errored = 0, loaded = 0; if (!REQUIRE_DB(db_meta)) @@ -500,21 +603,19 @@ void sql_health_alarm_log_load(RRDHOST *host) if (!PREPARE_STATEMENT(db_meta, SQL_LOAD_HEALTH_LOG, &res)) return; - ret = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); - if (unlikely(ret != SQLITE_OK)) { - error_report("Failed to bind host_id parameter for SQL_LOAD_HEALTH_LOG."); - SQLITE_FINALIZE(res); - return; - } + int param = 0; + SQLITE_BIND_FAIL(done, sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC)); DICTIONARY *all_rrdcalcs = dictionary_create( DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + RRDCALC *rc; foreach_rrdcalc_in_rrdhost_read(host, rc) { dictionary_set(all_rrdcalcs, rrdcalc_name(rc), rc, sizeof(*rc)); } foreach_rrdcalc_in_rrdhost_done(rc); + param = 0; rw_spinlock_read_lock(&host->health_log.spinlock); while (sqlite3_step_monitored(res) == SQLITE_ROW) { @@ -618,8 +719,6 @@ void sql_health_alarm_log_load(RRDHOST *host) ae->summary = SQLITE3_COLUMN_STRINGDUP_OR_NULL(res, 33); char value_string[100 + 1]; - string_freez(ae->old_value_string); - string_freez(ae->new_value_string); ae->old_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->old_value, ae_units(ae), -1)); ae->new_value_string = string_strdupz(format_value_and_unit(value_string, 100, ae->new_value, ae_units(ae), -1)); @@ -631,7 +730,6 @@ void sql_health_alarm_log_load(RRDHOST *host) if(unlikely(ae->alarm_id >= host->health_max_alarm_id)) host->health_max_alarm_id = ae->alarm_id; - loaded++; } @@ -650,7 +748,8 @@ void sql_health_alarm_log_load(RRDHOST *host) nd_log(NDLS_DAEMON, errored ? NDLP_WARNING : NDLP_DEBUG, "[%s]: Table health_log, loaded %zd alarm entries, errors in %zd entries.", rrdhost_hostname(host), loaded, errored); - +done: + REPORT_BIND_FAIL(res, param); SQLITE_FINALIZE(res); } |