diff options
Diffstat (limited to 'database/sqlite/sqlite_aclk.c')
-rw-r--r-- | database/sqlite/sqlite_aclk.c | 323 |
1 files changed, 35 insertions, 288 deletions
diff --git a/database/sqlite/sqlite_aclk.c b/database/sqlite/sqlite_aclk.c index 43b34109..7e3a9b2e 100644 --- a/database/sqlite/sqlite_aclk.c +++ b/database/sqlite/sqlite_aclk.c @@ -3,59 +3,20 @@ #include "sqlite_functions.h" #include "sqlite_aclk.h" -#include "sqlite_aclk_chart.h" #include "sqlite_aclk_node.h" -#ifdef ENABLE_ACLK -#include "../../aclk/aclk.h" -#endif - void sanity_check(void) { // make sure the compiler will stop on misconfigurations BUILD_BUG_ON(WORKER_UTILIZATION_MAX_JOB_TYPES < ACLK_MAX_ENUMERATIONS_DEFINED); } const char *aclk_sync_config[] = { - "CREATE TABLE IF NOT EXISTS dimension_delete (dimension_id blob, dimension_name text, chart_type_id text, " - "dim_id blob, chart_id blob, host_id blob, date_created);", - - "CREATE INDEX IF NOT EXISTS ind_h1 ON dimension_delete (host_id);", - - "CREATE TRIGGER IF NOT EXISTS tr_dim_del AFTER DELETE ON dimension BEGIN INSERT INTO dimension_delete " - "(dimension_id, dimension_name, chart_type_id, dim_id, chart_id, host_id, date_created)" - " select old.id, old.name, c.type||\".\"||c.id, old.dim_id, old.chart_id, c.host_id, unixepoch() FROM" - " chart c WHERE c.chart_id = old.chart_id; END;", - - "DELETE FROM dimension_delete WHERE host_id NOT IN" - " (SELECT host_id FROM host) OR unixepoch() - date_created > 604800;", NULL, }; uv_mutex_t aclk_async_lock; struct aclk_database_worker_config *aclk_thread_head = NULL; -int retention_running = 0; - -#ifdef ENABLE_ACLK -static void stop_retention_run() -{ - uv_mutex_lock(&aclk_async_lock); - retention_running = 0; - uv_mutex_unlock(&aclk_async_lock); -} - -static int request_retention_run() -{ - int rc = 0; - uv_mutex_lock(&aclk_async_lock); - if (unlikely(retention_running)) - rc = 1; - else - retention_running = 1; - uv_mutex_unlock(&aclk_async_lock); - return rc; -} -#endif int claimed() { @@ -197,25 +158,6 @@ struct aclk_database_cmd aclk_database_deq_cmd(struct aclk_database_worker_confi return ret; } -int aclk_worker_enq_cmd(char *node_id, struct aclk_database_cmd *cmd) -{ - if (unlikely(!node_id || !cmd)) - return 0; - - uv_mutex_lock(&aclk_async_lock); - struct aclk_database_worker_config *wc = aclk_thread_head; - - while (wc) { - if (!strcmp(wc->node_id, node_id)) - break; - wc = wc->next; - } - uv_mutex_unlock(&aclk_async_lock); - if (wc) - aclk_database_enq_cmd(wc, cmd); - return (wc == NULL); -} - struct aclk_database_worker_config *find_inactive_wc_by_node_id(char *node_id) { if (unlikely(!node_id)) @@ -237,15 +179,14 @@ struct aclk_database_worker_config *find_inactive_wc_by_node_id(char *node_id) void aclk_sync_exit_all() { rrd_rdlock(); - RRDHOST *host = localhost; - while(host) { + RRDHOST *host; + rrdhost_foreach_read(host) { struct aclk_database_worker_config *wc = host->dbsync_worker; if (wc) { wc->is_shutting_down = 1; (void) aclk_database_deq_cmd(wc); uv_cond_signal(&wc->cmd_cond); } - host = host->next; } rrd_unlock(); @@ -304,23 +245,26 @@ static int create_host_callback(void *data, int argc, char **argv, char **column , (const char *) (argv[IDX_PROGRAM_VERSION] ? argv[IDX_PROGRAM_VERSION] : "unknown") , argv[3] ? str2i(argv[IDX_UPDATE_EVERY]) : 1 , argv[13] ? str2i(argv[IDX_ENTRIES]) : 0 - , RRD_MEMORY_MODE_DBENGINE + , default_rrd_memory_mode , 0 // health , 0 // rrdpush enabled , NULL //destination , NULL // api key , NULL // send charts matching + , false // rrdpush_enable_replication + , 0 // rrdpush_seconds_to_replicate + , 0 // rrdpush_replication_step , system_info , 1 ); if (likely(host)) - host->host_labels = sql_load_host_labels((uuid_t *)argv[IDX_HOST_ID]); + host->rrdlabels = sql_load_host_labels((uuid_t *)argv[IDX_HOST_ID]); #ifdef NETDATA_INTERNAL_CHECKS char node_str[UUID_STR_LEN] = "<none>"; if (likely(host->node_id)) uuid_unparse_lower(*host->node_id, node_str); - internal_error(true, "Adding archived host \"%s\" with GUID \"%s\" node id = \"%s\"", host->hostname, host->machine_guid, node_str); + internal_error(true, "Adding archived host \"%s\" with GUID \"%s\" node id = \"%s\"", rrdhost_hostname(host), host->machine_guid, node_str); #endif return 0; } @@ -335,7 +279,7 @@ int aclk_start_sync_thread(void *data, int argc, char **argv, char **column) uuid_unparse_lower(*((uuid_t *) argv[0]), uuid_str); - RRDHOST *host = rrdhost_find_by_guid(uuid_str, 0); + RRDHOST *host = rrdhost_find_by_guid(uuid_str); if (host == localhost) return 0; @@ -361,7 +305,7 @@ void sql_aclk_sync_init(void) for (int i = 0; aclk_sync_config[i]; i++) { debug(D_ACLK_SYNC, "Executing %s", aclk_sync_config[i]); - rc = sqlite3_exec(db_meta, aclk_sync_config[i], 0, 0, &err_msg); + rc = sqlite3_exec_monitored(db_meta, aclk_sync_config[i], 0, 0, &err_msg); if (rc != SQLITE_OK) { error_report("SQLite error aclk sync initialization setup, rc = %d (%s)", rc, err_msg); error_report("SQLite failed statement %s", aclk_sync_config[i]); @@ -372,18 +316,16 @@ void sql_aclk_sync_init(void) info("SQLite aclk sync initialization completed"); fatal_assert(0 == uv_mutex_init(&aclk_async_lock)); - if (likely(rrdcontext_enabled == CONFIG_BOOLEAN_YES)) { - rc = sqlite3_exec(db_meta, "SELECT host_id, hostname, registry_hostname, update_every, os, " - "timezone, tags, hops, memory_mode, abbrev_timezone, utc_offset, program_name, " - "program_version, entries, health_enabled FROM host WHERE hops >0;", - create_host_callback, NULL, &err_msg); - if (rc != SQLITE_OK) { - error_report("SQLite error when loading archived hosts, rc = %d (%s)", rc, err_msg); - sqlite3_free(err_msg); - } + rc = sqlite3_exec_monitored(db_meta, "SELECT host_id, hostname, registry_hostname, update_every, os, " + "timezone, tags, hops, memory_mode, abbrev_timezone, utc_offset, program_name, " + "program_version, entries, health_enabled FROM host WHERE hops >0;", + create_host_callback, NULL, &err_msg); + if (rc != SQLITE_OK) { + error_report("SQLite error when loading archived hosts, rc = %d (%s)", rc, err_msg); + sqlite3_free(err_msg); } - rc = sqlite3_exec(db_meta, "SELECT ni.host_id, ni.node_id FROM host h, node_instance ni WHERE " + rc = sqlite3_exec_monitored(db_meta, "SELECT ni.host_id, ni.node_id FROM host h, node_instance ni WHERE " "h.host_id = ni.host_id AND ni.node_id IS NOT NULL;", aclk_start_sync_thread, NULL, &err_msg); if (rc != SQLITE_OK) { error_report("SQLite error when starting ACLK sync threads, rc = %d (%s)", rc, err_msg); @@ -423,30 +365,6 @@ static void timer_cb(uv_timer_t* handle) } if (aclk_connected) { - if (wc->rotation_after && wc->rotation_after < now) { - cmd.opcode = ACLK_DATABASE_UPD_RETENTION; - if (!aclk_database_enq_cmd_noblock(wc, &cmd)) - wc->rotation_after += ACLK_DATABASE_ROTATION_INTERVAL; - } - - if (wc->chart_updates && !wc->chart_pending && wc->chart_payload_count) { - cmd.opcode = ACLK_DATABASE_PUSH_CHART; - cmd.count = ACLK_MAX_CHART_BATCH; - cmd.param1 = ACLK_MAX_CHART_BATCH_COUNT; - if (!aclk_database_enq_cmd_noblock(wc, &cmd)) { - if (wc->retry_count) - info("Queued chart/dimension payload command %s, retry count = %u", wc->host_guid, wc->retry_count); - wc->chart_pending = 1; - wc->retry_count = 0; - } else { - wc->retry_count++; - if (wc->retry_count % 100 == 0) - error_report("Failed to queue chart/dimension payload command %s, retry count = %u", - wc->host_guid, - wc->retry_count); - } - } - if (wc->alert_updates && !wc->pause_alert_updates) { cmd.opcode = ACLK_DATABASE_PUSH_ALERT; cmd.count = ACLK_MAX_ALERT_UPDATES; @@ -456,52 +374,12 @@ static void timer_cb(uv_timer_t* handle) #endif } - -#ifdef ENABLE_ACLK -void after_send_retention(uv_work_t *req, int status) -{ - struct aclk_database_worker_config *wc = req->data; - (void)status; - stop_retention_run(); - wc->retention_running = 0; - - struct aclk_database_cmd cmd; - memset(&cmd, 0, sizeof(cmd)); - cmd.opcode = ACLK_DATABASE_DIM_DELETION; - if (aclk_database_enq_cmd_noblock(wc, &cmd)) - info("Failed to queue a dimension deletion message"); - - cmd.opcode = ACLK_DATABASE_NODE_INFO; - if (aclk_database_enq_cmd_noblock(wc, &cmd)) - info("Failed to queue a node update info message"); -} - - -static void send_retention(uv_work_t *req) -{ - struct aclk_database_worker_config *wc = req->data; - - if (unlikely(wc->is_shutting_down)) - return; - - aclk_update_retention(wc); -} -#endif - #define MAX_CMD_BATCH_SIZE (256) void aclk_database_worker(void *arg) { worker_register("ACLKSYNC"); worker_register_job_name(ACLK_DATABASE_NOOP, "noop"); - worker_register_job_name(ACLK_DATABASE_ADD_CHART, "chart add"); - worker_register_job_name(ACLK_DATABASE_ADD_DIMENSION, "dimension add"); - worker_register_job_name(ACLK_DATABASE_PUSH_CHART, "chart push"); - worker_register_job_name(ACLK_DATABASE_PUSH_CHART_CONFIG, "chart conf push"); - worker_register_job_name(ACLK_DATABASE_RESET_CHART, "chart reset"); - worker_register_job_name(ACLK_DATABASE_CHART_ACK, "chart ack"); - worker_register_job_name(ACLK_DATABASE_UPD_RETENTION, "retention check"); - worker_register_job_name(ACLK_DATABASE_DIM_DELETION, "dimension delete"); worker_register_job_name(ACLK_DATABASE_ORPHAN_HOST, "node orphan"); worker_register_job_name(ACLK_DATABASE_ALARM_HEALTH_LOG, "alert log"); worker_register_job_name(ACLK_DATABASE_CLEANUP, "cleanup"); @@ -526,7 +404,7 @@ void aclk_database_worker(void *arg) char threadname[NETDATA_THREAD_NAME_MAX+1]; if (wc->host) - snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", wc->host->hostname); + snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", rrdhost_hostname(wc->host)); else { snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", wc->uuid_str); threadname[11] = '\0'; @@ -556,23 +434,13 @@ void aclk_database_worker(void *arg) timer_req.data = wc; fatal_assert(0 == uv_timer_start(&timer_req, timer_cb, TIMER_PERIOD_MS, TIMER_PERIOD_MS)); -// wc->retry_count = 0; wc->node_info_send = 1; -// aclk_add_worker_thread(wc); - info("Starting ACLK sync thread for host %s -- scratch area %lu bytes", wc->host_guid, sizeof(*wc)); + info("Starting ACLK sync thread for host %s -- scratch area %lu bytes", wc->host_guid, (unsigned long int) sizeof(*wc)); memset(&cmd, 0, sizeof(cmd)); -#ifdef ENABLE_ACLK - uv_work_t retention_work; - sql_get_last_chart_sequence(wc); - wc->chart_payload_count = sql_get_pending_count(wc); - if (!wc->chart_payload_count) - info("%s: No pending charts and dimensions detected during startup", wc->host_guid); -#endif wc->startup_time = now_realtime_sec(); wc->cleanup_after = wc->startup_time + ACLK_DATABASE_CLEANUP_FIRST; - wc->rotation_after = wc->startup_time + ACLK_DATABASE_ROTATION_DELAY; debug(D_ACLK_SYNC,"Node %s reports pending message count = %u", wc->node_id, wc->chart_payload_count); @@ -604,6 +472,13 @@ void aclk_database_worker(void *arg) // MAINTENANCE case ACLK_DATABASE_CLEANUP: debug(D_ACLK_SYNC, "Database cleanup for %s", wc->host_guid); + + if (wc->startup_time + ACLK_DATABASE_CLEANUP_FIRST + 2 < now_realtime_sec() && claimed() && aclk_connected) { + cmd.opcode = ACLK_DATABASE_NODE_INFO; + cmd.completion = NULL; + (void) aclk_database_enq_cmd_noblock(wc, &cmd); + } + sql_maint_aclk_sync_database(wc, cmd); if (wc->host == localhost) sql_check_aclk_table_list(wc); @@ -614,33 +489,6 @@ void aclk_database_worker(void *arg) sql_delete_aclk_table_list(wc, cmd); break; -// CHART / DIMENSION OPERATIONS -#ifdef ENABLE_ACLK - case ACLK_DATABASE_ADD_CHART: - debug(D_ACLK_SYNC, "Adding chart event for %s", wc->host_guid); - aclk_add_chart_event(wc, cmd); - break; - case ACLK_DATABASE_ADD_DIMENSION: - debug(D_ACLK_SYNC, "Adding dimension event for %s", wc->host_guid); - aclk_add_dimension_event(wc, cmd); - break; - case ACLK_DATABASE_PUSH_CHART: - debug(D_ACLK_SYNC, "Pushing chart info to the cloud for node %s", wc->host_guid); - aclk_send_chart_event(wc, cmd); - break; - case ACLK_DATABASE_PUSH_CHART_CONFIG: - debug(D_ACLK_SYNC, "Pushing chart config info to the cloud for node %s", wc->host_guid); - aclk_send_chart_config(wc, cmd); - break; - case ACLK_DATABASE_CHART_ACK: - debug(D_ACLK_SYNC, "ACK chart SEQ for %s to %"PRIu64, wc->uuid_str, (uint64_t) cmd.param1); - aclk_receive_chart_ack(wc, cmd); - break; - case ACLK_DATABASE_RESET_CHART: - debug(D_ACLK_SYNC, "RESET chart SEQ for %s to %"PRIu64, wc->uuid_str, (uint64_t) cmd.param1); - aclk_receive_chart_reset(wc, cmd); - break; -#endif // ALERTS case ACLK_DATABASE_PUSH_ALERT_CONFIG: debug(D_ACLK_SYNC,"Pushing chart config info to the cloud for %s", wc->host_guid); @@ -673,27 +521,6 @@ void aclk_database_worker(void *arg) sql_build_node_collectors(wc); break; #ifdef ENABLE_ACLK - case ACLK_DATABASE_DIM_DELETION: - debug(D_ACLK_SYNC,"Sending dimension deletion information %s", wc->uuid_str); - aclk_process_dimension_deletion(wc, cmd); - break; - case ACLK_DATABASE_UPD_RETENTION: - if (unlikely(wc->retention_running)) - break; - - if (unlikely(request_retention_run())) { - wc->rotation_after = now_realtime_sec() + ACLK_DATABASE_RETENTION_RETRY; - break; - } - - debug(D_ACLK_SYNC,"Sending retention info for %s", wc->uuid_str); - retention_work.data = wc; - wc->retention_running = 1; - if (unlikely(uv_queue_work(loop, &retention_work, send_retention, after_send_retention))) { - wc->retention_running = 0; - stop_retention_run(); - } - break; // NODE_INSTANCE DETECTION case ACLK_DATABASE_ORPHAN_HOST: @@ -705,14 +532,14 @@ void aclk_database_worker(void *arg) case ACLK_DATABASE_TIMER: if (unlikely(localhost && !wc->host && !wc->is_orphan)) { if (claimed()) { - wc->host = rrdhost_find_by_guid(wc->host_guid, 0); + wc->host = rrdhost_find_by_guid(wc->host_guid); if (wc->host) { - info("HOST %s (%s) detected as active", wc->host->hostname, wc->host_guid); - snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", wc->host->hostname); + info("HOST %s (%s) detected as active", rrdhost_hostname(wc->host), wc->host_guid); + snprintfz(threadname, NETDATA_THREAD_NAME_MAX, "AS_%s", rrdhost_hostname(wc->host)); uv_thread_set_name_np(wc->thread, threadname); wc->host->dbsync_worker = wc; if (unlikely(!wc->hostname)) - wc->hostname = strdupz(wc->host->hostname); + wc->hostname = strdupz(rrdhost_hostname(wc->host)); aclk_del_worker_thread(wc); wc->node_info_send = 1; } @@ -803,30 +630,6 @@ void sql_create_aclk_table(RRDHOST *host, uuid_t *host_uuid, uuid_t *node_id) BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE); - buffer_sprintf(sql, TABLE_ACLK_CHART, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, TABLE_ACLK_CHART_PAYLOAD, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, TABLE_ACLK_CHART_LATEST, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, INDEX_ACLK_CHART, uuid_str, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, INDEX_ACLK_CHART_LATEST, uuid_str, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql, TRIGGER_ACLK_CHART_PAYLOAD, uuid_str, uuid_str, uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str); db_execute(buffer_tostring(sql)); buffer_flush(sql); @@ -844,16 +647,14 @@ void sql_create_aclk_table(RRDHOST *host, uuid_t *host_uuid, uuid_t *node_id) uuid_unparse_lower(*node_id, wc->node_id); if (likely(host)) { host->dbsync_worker = (void *)wc; - wc->hostname = strdupz(host->hostname); + wc->hostname = strdupz(rrdhost_hostname(host)); } else wc->hostname = get_hostname_by_node_id(wc->node_id); wc->host = host; strcpy(wc->uuid_str, uuid_str); strcpy(wc->host_guid, host_guid); - wc->chart_updates = 0; wc->alert_updates = 0; - wc->retry_count = 0; aclk_database_init_cmd_queue(wc); aclk_add_worker_thread(wc); fatal_assert(0 == uv_thread_create(&(wc->thread), aclk_database_worker, wc)); @@ -873,31 +674,9 @@ void sql_maint_aclk_sync_database(struct aclk_database_worker_config *wc, struct BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE); - buffer_sprintf(sql,"DELETE FROM aclk_chart_%s WHERE date_submitted IS NOT NULL AND " - "CAST(date_updated AS INT) < unixepoch()-%d;", wc->uuid_str, ACLK_DELETE_ACK_INTERNAL); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql,"DELETE FROM aclk_chart_payload_%s WHERE unique_id NOT IN " - "(SELECT unique_id FROM aclk_chart_%s) AND unique_id NOT IN (SELECT unique_id FROM aclk_chart_latest_%s);", - wc->uuid_str, wc->uuid_str, wc->uuid_str); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - buffer_sprintf(sql,"DELETE FROM aclk_alert_%s WHERE date_submitted IS NOT NULL AND " "CAST(date_cloud_ack AS INT) < unixepoch()-%d;", wc->uuid_str, ACLK_DELETE_ACK_ALERTS_INTERNAL); db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql,"UPDATE aclk_chart_%s SET status = NULL, date_submitted=unixepoch() WHERE " - "date_submitted IS NULL AND CAST(date_created AS INT) < unixepoch()-%d;", wc->uuid_str, ACLK_AUTO_MARK_SUBMIT_INTERVAL); - db_execute(buffer_tostring(sql)); - buffer_flush(sql); - - buffer_sprintf(sql,"UPDATE aclk_chart_%s SET date_updated = unixepoch() WHERE date_updated IS NULL" - " AND date_submitted IS NOT NULL AND CAST(date_submitted AS INT) < unixepoch()-%d;", - wc->uuid_str, ACLK_AUTO_MARK_UPDATED_INTERVAL); - db_execute(buffer_tostring(sql)); buffer_free(sql); return; @@ -927,7 +706,7 @@ static int is_host_available(uuid_t *host_id) error_report("Failed to bind host_id parameter to select node instance information"); goto failed; } - rc = sqlite3_step(res); + rc = sqlite3_step_monitored(res); failed: if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) @@ -980,7 +759,7 @@ void sql_delete_aclk_table_list(struct aclk_database_worker_config *wc, struct a } buffer_flush(sql); - while (sqlite3_step(res) == SQLITE_ROW) + while (sqlite3_step_monitored(res) == SQLITE_ROW) buffer_strcat(sql, (char *) sqlite3_column_text(res, 0)); rc = sqlite3_finalize(res); @@ -1016,42 +795,10 @@ void sql_check_aclk_table_list(struct aclk_database_worker_config *wc) { char *err_msg = NULL; debug(D_ACLK_SYNC,"Cleaning tables for nodes that do not exist"); - int rc = sqlite3_exec(db_meta, SQL_SELECT_ACLK_ACTIVE_LIST, sql_check_aclk_table, (void *) wc, &err_msg); + int rc = sqlite3_exec_monitored(db_meta, SQL_SELECT_ACLK_ACTIVE_LIST, sql_check_aclk_table, (void *) wc, &err_msg); if (rc != SQLITE_OK) { error_report("Query failed when trying to check for obsolete ACLK sync tables, %s", err_msg); sqlite3_free(err_msg); } - db_execute("DELETE FROM dimension_delete WHERE host_id NOT IN (SELECT host_id FROM host) " - " OR unixepoch() - date_created > 604800;"); - return; -} - -void aclk_data_rotated(void) -{ -#ifdef ENABLE_ACLK - - if (!aclk_connected) - return; - - time_t next_rotation_time = now_realtime_sec()+ACLK_DATABASE_ROTATION_DELAY; - rrd_rdlock(); - RRDHOST *this_host = localhost; - while (this_host) { - struct aclk_database_worker_config *wc = this_host->dbsync_worker; - if (wc) - wc->rotation_after = next_rotation_time; - this_host = this_host->next; - } - rrd_unlock(); - - struct aclk_database_worker_config *tmp = aclk_thread_head; - - uv_mutex_lock(&aclk_async_lock); - while (tmp) { - tmp->rotation_after = next_rotation_time; - tmp = tmp->next; - } - uv_mutex_unlock(&aclk_async_lock); -#endif return; } |