From 2d852004321781e79bb7f59bf61603d66000daae Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 5 Dec 2022 17:29:34 +0100 Subject: Merging upstream version 1.37.1. Signed-off-by: Daniel Baumann --- daemon/global_statistics.c | 267 +++++++++++++++++++++++++++++++-------------- daemon/service.c | 6 +- 2 files changed, 187 insertions(+), 86 deletions(-) (limited to 'daemon') diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index 53fd6c45a..a4e9d321f 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -12,9 +12,10 @@ #define WORKER_JOB_STRINGS 5 #define WORKER_JOB_DICTIONARIES 6 #define WORKER_JOB_MALLOC_TRACE 7 +#define WORKER_JOB_SQLITE3 8 -#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8 -#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 8 +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 9 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 9 #endif bool global_statistics_enabled = true; @@ -60,21 +61,6 @@ static struct global_statistics { uint64_t db_points_stored_per_tier[RRD_STORAGE_TIERS]; - uint64_t sqlite3_queries_made; - uint64_t sqlite3_queries_ok; - uint64_t sqlite3_queries_failed; - uint64_t sqlite3_queries_failed_busy; - uint64_t sqlite3_queries_failed_locked; - uint64_t sqlite3_rows; - uint64_t sqlite3_metadata_cache_hit; - uint64_t sqlite3_context_cache_hit; - uint64_t sqlite3_metadata_cache_miss; - uint64_t sqlite3_context_cache_miss; - uint64_t sqlite3_metadata_cache_spill; - uint64_t sqlite3_context_cache_spill; - uint64_t sqlite3_metadata_cache_write; - uint64_t sqlite3_context_cache_write; - } global_statistics = { .connected_clients = 0, .web_requests = 0, @@ -112,27 +98,6 @@ void global_statistics_backfill_query_completed(size_t points_read) { __atomic_fetch_add(&global_statistics.backfill_db_points_read, points_read, __ATOMIC_RELAXED); } -void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked) { - __atomic_fetch_add(&global_statistics.sqlite3_queries_made, 1, __ATOMIC_RELAXED); - - if(success) { - __atomic_fetch_add(&global_statistics.sqlite3_queries_ok, 1, __ATOMIC_RELAXED); - } - else { - __atomic_fetch_add(&global_statistics.sqlite3_queries_failed, 1, __ATOMIC_RELAXED); - - if(busy) - __atomic_fetch_add(&global_statistics.sqlite3_queries_failed_busy, 1, __ATOMIC_RELAXED); - - if(locked) - __atomic_fetch_add(&global_statistics.sqlite3_queries_failed_locked, 1, __ATOMIC_RELAXED); - } -} - -void global_statistics_sqlite3_row_completed(void) { - __atomic_fetch_add(&global_statistics.sqlite3_rows, 1, __ATOMIC_RELAXED); -} - void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source) { switch(query_source) { case QUERY_SOURCE_API_DATA: @@ -241,25 +206,6 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t uint64_t n = 0; __atomic_compare_exchange(&global_statistics.web_usec_max, (uint64_t *) &gs->web_usec_max, &n, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); } - - gs->sqlite3_queries_made = __atomic_load_n(&global_statistics.sqlite3_queries_made, __ATOMIC_RELAXED); - gs->sqlite3_queries_ok = __atomic_load_n(&global_statistics.sqlite3_queries_ok, __ATOMIC_RELAXED); - gs->sqlite3_queries_failed = __atomic_load_n(&global_statistics.sqlite3_queries_failed, __ATOMIC_RELAXED); - gs->sqlite3_queries_failed_busy = __atomic_load_n(&global_statistics.sqlite3_queries_failed_busy, __ATOMIC_RELAXED); - gs->sqlite3_queries_failed_locked = __atomic_load_n(&global_statistics.sqlite3_queries_failed_locked, __ATOMIC_RELAXED); - gs->sqlite3_rows = __atomic_load_n(&global_statistics.sqlite3_rows, __ATOMIC_RELAXED); - - gs->sqlite3_metadata_cache_hit = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); - gs->sqlite3_context_cache_hit = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); - - gs->sqlite3_metadata_cache_miss = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); - gs->sqlite3_context_cache_miss = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); - - gs->sqlite3_metadata_cache_spill = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); - gs->sqlite3_context_cache_spill = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); - - gs->sqlite3_metadata_cache_write = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); - gs->sqlite3_context_cache_write = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); } static void global_statistics_charts(void) { @@ -707,8 +653,129 @@ static void global_statistics_charts(void) { rrdset_done(st_points_stored); } +} - // ---------------------------------------------------------------- +// ---------------------------------------------------------------------------- +// sqlite3 statistics + +struct sqlite3_statistics { + uint64_t sqlite3_queries_made; + uint64_t sqlite3_queries_ok; + uint64_t sqlite3_queries_failed; + uint64_t sqlite3_queries_failed_busy; + uint64_t sqlite3_queries_failed_locked; + uint64_t sqlite3_rows; + uint64_t sqlite3_metadata_cache_hit; + uint64_t sqlite3_context_cache_hit; + uint64_t sqlite3_metadata_cache_miss; + uint64_t sqlite3_context_cache_miss; + uint64_t sqlite3_metadata_cache_spill; + uint64_t sqlite3_context_cache_spill; + uint64_t sqlite3_metadata_cache_write; + uint64_t sqlite3_context_cache_write; + +} sqlite3_statistics = { }; + +void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_made, 1, __ATOMIC_RELAXED); + + if(success) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_ok, 1, __ATOMIC_RELAXED); + } + else { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed, 1, __ATOMIC_RELAXED); + + if(busy) + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_busy, 1, __ATOMIC_RELAXED); + + if(locked) + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_locked, 1, __ATOMIC_RELAXED); + } +} + +void global_statistics_sqlite3_row_completed(void) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_rows, 1, __ATOMIC_RELAXED); +} + +static inline void sqlite3_statistics_copy(struct sqlite3_statistics *gs) { + static usec_t last_run = 0; + + gs->sqlite3_queries_made = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_made, __ATOMIC_RELAXED); + gs->sqlite3_queries_ok = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_ok, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_busy = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_busy, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_locked = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_locked, __ATOMIC_RELAXED); + gs->sqlite3_rows = __atomic_load_n(&sqlite3_statistics.sqlite3_rows, __ATOMIC_RELAXED); + + usec_t timeout = default_rrd_update_every * USEC_PER_SEC + default_rrd_update_every * USEC_PER_SEC / 3; + usec_t now = now_monotonic_usec(); + if(!last_run) + last_run = now; + usec_t delta = now - last_run; + bool query_sqlite3 = delta < timeout; + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_hit = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + else { + gs->sqlite3_metadata_cache_hit = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_hit = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + else { + gs->sqlite3_context_cache_hit = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_miss = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + else { + gs->sqlite3_metadata_cache_miss = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_miss = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + else { + gs->sqlite3_context_cache_miss = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_spill = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + else { + gs->sqlite3_metadata_cache_spill = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_spill = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + else { + gs->sqlite3_context_cache_spill = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_write = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); + else { + gs->sqlite3_metadata_cache_write = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_write = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); + else { + gs->sqlite3_context_cache_write = UINT64_MAX; + query_sqlite3 = false; + } + + last_run = now_monotonic_usec(); +} + +static void sqlite3_statistics_charts(void) { + struct sqlite3_statistics gs; + sqlite3_statistics_copy(&gs); if(gs.sqlite3_queries_made) { static RRDSET *st_sqlite3_queries = NULL; @@ -833,10 +900,17 @@ static void global_statistics_charts(void) { rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } - rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_metadata_cache_hit); - rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_metadata_cache_miss); - rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_metadata_cache_spill); - rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_metadata_cache_write); + if(gs.sqlite3_metadata_cache_hit != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_metadata_cache_hit); + + if(gs.sqlite3_metadata_cache_miss != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_metadata_cache_miss); + + if(gs.sqlite3_metadata_cache_spill != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_metadata_cache_spill); + + if(gs.sqlite3_metadata_cache_write != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_metadata_cache_write); rrdset_done(st_sqlite3_cache); } @@ -870,10 +944,17 @@ static void global_statistics_charts(void) { rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } - rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_context_cache_hit); - rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_context_cache_miss); - rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_context_cache_spill); - rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_context_cache_write); + if(gs.sqlite3_context_cache_hit != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_context_cache_hit); + + if(gs.sqlite3_context_cache_miss != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_context_cache_miss); + + if(gs.sqlite3_context_cache_spill != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_context_cache_spill); + + if(gs.sqlite3_context_cache_write != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_context_cache_write); rrdset_done(st_sqlite3_cache); } @@ -2322,7 +2403,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.value.%s", wu->name_lowercase, job_name_sanitized); char title[1000 + 1]; - snprintf(title, 1000, "Netdata Workers %s Value of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + snprintf(title, 1000, "Netdata Workers %s value of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); wu->per_job_type[i].st = rrdset_create_localhost( "netdata" @@ -2334,7 +2415,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"value" , "netdata" , "stats" - , wu->priority + 5 + , wu->priority + 5 + i , localhost->rrd_update_every , RRDSET_TYPE_LINE ); @@ -2378,7 +2459,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.rate.%s", wu->name_lowercase, job_name_sanitized); char title[1000 + 1]; - snprintf(title, 1000, "Netdata Workers %s Rate of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + snprintf(title, 1000, "Netdata Workers %s rate of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); wu->per_job_type[i].st = rrdset_create_localhost( "netdata" @@ -2390,7 +2471,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"rate" , "netdata" , "stats" - , wu->priority + 5 + , wu->priority + 5 + i , localhost->rrd_update_every , RRDSET_TYPE_LINE ); @@ -2447,21 +2528,37 @@ static void workers_utilization_reset_statistics(struct worker_utilization *wu) } } +#define TASK_STAT_PREFIX "/proc/self/task/" +#define TASK_STAT_SUFFIX "/stat" + static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_uint_t *utime __maybe_unused, kernel_uint_t *stime __maybe_unused) { #ifdef __linux__ - char filename[200 + 1]; - snprintfz(filename, 200, "/proc/self/task/%d/stat", pid); + static char filename[sizeof(TASK_STAT_PREFIX) + sizeof(TASK_STAT_SUFFIX) + 20] = TASK_STAT_PREFIX; + static size_t start_pos = sizeof(TASK_STAT_PREFIX) - 1; + static procfile *ff = NULL; - procfile *ff = procfile_open(filename, " ", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); - if(!ff) return -1; + // construct the filename + size_t end_pos = snprintfz(&filename[start_pos], 20, "%d", pid); + strcpy(&filename[start_pos + end_pos], TASK_STAT_SUFFIX); + // (re)open the procfile to the new filename + bool set_quotes = (ff == NULL) ? true : false; + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return -1; + + if(set_quotes) + procfile_set_open_close(ff, "(", ")"); + + // read the entire file and split it to lines and words ff = procfile_readall(ff); - if(!ff) return -1; + if(unlikely(!ff)) return -1; + // parse the numbers we are interested *utime = str2kernel_uint_t(procfile_lineword(ff, 0, 13)); *stime = str2kernel_uint_t(procfile_lineword(ff, 0, 14)); - procfile_close(ff); + // leave the file open for the next iteration + return 0; #else // TODO: add here cpu time detection per thread, for FreeBSD and MacOS @@ -2474,8 +2571,6 @@ static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_ static Pvoid_t workers_by_pid_JudyL_array = NULL; static void workers_threads_cleanup(struct worker_utilization *wu) { - netdata_thread_disable_cancelability(); - struct worker_thread *t = wu->threads; while(t) { struct worker_thread *next = t->next; @@ -2487,8 +2582,6 @@ static void workers_threads_cleanup(struct worker_utilization *wu) { } t = next; } - - netdata_thread_enable_cancelability(); } static struct worker_thread *worker_thread_find(struct worker_utilization *wu __maybe_unused, pid_t pid) { @@ -2621,16 +2714,20 @@ static void worker_utilization_charts(void) { static size_t iterations = 0; iterations++; - int i; - for(i = 0; all_workers_utilization[i].name ;i++) { + for(int i = 0; all_workers_utilization[i].name ;i++) { workers_utilization_reset_statistics(&all_workers_utilization[i]); + + netdata_thread_disable_cancelability(); workers_foreach(all_workers_utilization[i].name, worker_utilization_charts_callback, &all_workers_utilization[i]); + netdata_thread_enable_cancelability(); // skip the first iteration, so that we don't accumulate startup utilization to our charts if(likely(iterations > 1)) workers_utilization_update_chart(&all_workers_utilization[i]); + netdata_thread_disable_cancelability(); workers_threads_cleanup(&all_workers_utilization[i]); + netdata_thread_enable_cancelability(); } workers_total_cpu_utilization_chart(); @@ -2672,11 +2769,12 @@ static void global_statistics_register_workers(void) { worker_register("STATS"); worker_register_job_name(WORKER_JOB_GLOBAL, "global"); worker_register_job_name(WORKER_JOB_REGISTRY, "registry"); - worker_register_job_name(WORKER_JOB_WORKERS, "workers"); worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine"); worker_register_job_name(WORKER_JOB_STRINGS, "strings"); worker_register_job_name(WORKER_JOB_DICTIONARIES, "dictionaries"); worker_register_job_name(WORKER_JOB_MALLOC_TRACE, "malloc_trace"); + worker_register_job_name(WORKER_JOB_WORKERS, "workers"); + worker_register_job_name(WORKER_JOB_SQLITE3, "sqlite3"); } static void global_statistics_cleanup(void *ptr) @@ -2719,6 +2817,9 @@ void *global_statistics_main(void *ptr) worker_is_busy(WORKER_JOB_GLOBAL); global_statistics_charts(); + worker_is_busy(WORKER_JOB_SQLITE3); + sqlite3_statistics_charts(); + worker_is_busy(WORKER_JOB_REGISTRY); registry_statistics(); diff --git a/daemon/service.c b/daemon/service.c index a7db7ceb7..6db2ef69f 100644 --- a/daemon/service.c +++ b/daemon/service.c @@ -46,19 +46,19 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { /* only a collector can mark a chart as obsolete, so we must remove the reference */ - size_t tiers_available = 0, tiers_said_yes = 0; + size_t tiers_available = 0, tiers_said_no_retention = 0; for(size_t tier = 0; tier < storage_tiers ;tier++) { if(rd->tiers[tier]) { tiers_available++; if(rd->tiers[tier]->collect_ops->finalize(rd->tiers[tier]->db_collection_handle)) - tiers_said_yes++; + tiers_said_no_retention++; rd->tiers[tier]->db_collection_handle = NULL; } } - if (tiers_available == tiers_said_yes && tiers_said_yes) { + if (tiers_available == tiers_said_no_retention && tiers_said_no_retention) { /* This metric has no data and no references */ metaqueue_delete_dimension_uuid(&rd->metric_uuid); } -- cgit v1.2.3