summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
Diffstat (limited to 'daemon')
-rw-r--r--daemon/global_statistics.c267
-rw-r--r--daemon/service.c6
2 files changed, 187 insertions, 86 deletions
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index 53fd6c45a..a4e9d321f 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -12,9 +12,10 @@
#define WORKER_JOB_STRINGS 5
#define WORKER_JOB_DICTIONARIES 6
#define WORKER_JOB_MALLOC_TRACE 7
+#define WORKER_JOB_SQLITE3 8
-#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8
-#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 8
+#if WORKER_UTILIZATION_MAX_JOB_TYPES < 9
+#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 9
#endif
bool global_statistics_enabled = true;
@@ -60,21 +61,6 @@ static struct global_statistics {
uint64_t db_points_stored_per_tier[RRD_STORAGE_TIERS];
- uint64_t sqlite3_queries_made;
- uint64_t sqlite3_queries_ok;
- uint64_t sqlite3_queries_failed;
- uint64_t sqlite3_queries_failed_busy;
- uint64_t sqlite3_queries_failed_locked;
- uint64_t sqlite3_rows;
- uint64_t sqlite3_metadata_cache_hit;
- uint64_t sqlite3_context_cache_hit;
- uint64_t sqlite3_metadata_cache_miss;
- uint64_t sqlite3_context_cache_miss;
- uint64_t sqlite3_metadata_cache_spill;
- uint64_t sqlite3_context_cache_spill;
- uint64_t sqlite3_metadata_cache_write;
- uint64_t sqlite3_context_cache_write;
-
} global_statistics = {
.connected_clients = 0,
.web_requests = 0,
@@ -112,27 +98,6 @@ void global_statistics_backfill_query_completed(size_t points_read) {
__atomic_fetch_add(&global_statistics.backfill_db_points_read, points_read, __ATOMIC_RELAXED);
}
-void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked) {
- __atomic_fetch_add(&global_statistics.sqlite3_queries_made, 1, __ATOMIC_RELAXED);
-
- if(success) {
- __atomic_fetch_add(&global_statistics.sqlite3_queries_ok, 1, __ATOMIC_RELAXED);
- }
- else {
- __atomic_fetch_add(&global_statistics.sqlite3_queries_failed, 1, __ATOMIC_RELAXED);
-
- if(busy)
- __atomic_fetch_add(&global_statistics.sqlite3_queries_failed_busy, 1, __ATOMIC_RELAXED);
-
- if(locked)
- __atomic_fetch_add(&global_statistics.sqlite3_queries_failed_locked, 1, __ATOMIC_RELAXED);
- }
-}
-
-void global_statistics_sqlite3_row_completed(void) {
- __atomic_fetch_add(&global_statistics.sqlite3_rows, 1, __ATOMIC_RELAXED);
-}
-
void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source) {
switch(query_source) {
case QUERY_SOURCE_API_DATA:
@@ -241,25 +206,6 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t
uint64_t n = 0;
__atomic_compare_exchange(&global_statistics.web_usec_max, (uint64_t *) &gs->web_usec_max, &n, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED);
}
-
- gs->sqlite3_queries_made = __atomic_load_n(&global_statistics.sqlite3_queries_made, __ATOMIC_RELAXED);
- gs->sqlite3_queries_ok = __atomic_load_n(&global_statistics.sqlite3_queries_ok, __ATOMIC_RELAXED);
- gs->sqlite3_queries_failed = __atomic_load_n(&global_statistics.sqlite3_queries_failed, __ATOMIC_RELAXED);
- gs->sqlite3_queries_failed_busy = __atomic_load_n(&global_statistics.sqlite3_queries_failed_busy, __ATOMIC_RELAXED);
- gs->sqlite3_queries_failed_locked = __atomic_load_n(&global_statistics.sqlite3_queries_failed_locked, __ATOMIC_RELAXED);
- gs->sqlite3_rows = __atomic_load_n(&global_statistics.sqlite3_rows, __ATOMIC_RELAXED);
-
- gs->sqlite3_metadata_cache_hit = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_HIT);
- gs->sqlite3_context_cache_hit = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_HIT);
-
- gs->sqlite3_metadata_cache_miss = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_MISS);
- gs->sqlite3_context_cache_miss = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_MISS);
-
- gs->sqlite3_metadata_cache_spill = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL);
- gs->sqlite3_context_cache_spill = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL);
-
- gs->sqlite3_metadata_cache_write = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE);
- gs->sqlite3_context_cache_write = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE);
}
static void global_statistics_charts(void) {
@@ -707,8 +653,129 @@ static void global_statistics_charts(void) {
rrdset_done(st_points_stored);
}
+}
- // ----------------------------------------------------------------
+// ----------------------------------------------------------------------------
+// sqlite3 statistics
+
+struct sqlite3_statistics {
+ uint64_t sqlite3_queries_made;
+ uint64_t sqlite3_queries_ok;
+ uint64_t sqlite3_queries_failed;
+ uint64_t sqlite3_queries_failed_busy;
+ uint64_t sqlite3_queries_failed_locked;
+ uint64_t sqlite3_rows;
+ uint64_t sqlite3_metadata_cache_hit;
+ uint64_t sqlite3_context_cache_hit;
+ uint64_t sqlite3_metadata_cache_miss;
+ uint64_t sqlite3_context_cache_miss;
+ uint64_t sqlite3_metadata_cache_spill;
+ uint64_t sqlite3_context_cache_spill;
+ uint64_t sqlite3_metadata_cache_write;
+ uint64_t sqlite3_context_cache_write;
+
+} sqlite3_statistics = { };
+
+void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked) {
+ __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_made, 1, __ATOMIC_RELAXED);
+
+ if(success) {
+ __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_ok, 1, __ATOMIC_RELAXED);
+ }
+ else {
+ __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed, 1, __ATOMIC_RELAXED);
+
+ if(busy)
+ __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_busy, 1, __ATOMIC_RELAXED);
+
+ if(locked)
+ __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_locked, 1, __ATOMIC_RELAXED);
+ }
+}
+
+void global_statistics_sqlite3_row_completed(void) {
+ __atomic_fetch_add(&sqlite3_statistics.sqlite3_rows, 1, __ATOMIC_RELAXED);
+}
+
+static inline void sqlite3_statistics_copy(struct sqlite3_statistics *gs) {
+ static usec_t last_run = 0;
+
+ gs->sqlite3_queries_made = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_made, __ATOMIC_RELAXED);
+ gs->sqlite3_queries_ok = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_ok, __ATOMIC_RELAXED);
+ gs->sqlite3_queries_failed = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed, __ATOMIC_RELAXED);
+ gs->sqlite3_queries_failed_busy = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_busy, __ATOMIC_RELAXED);
+ gs->sqlite3_queries_failed_locked = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_locked, __ATOMIC_RELAXED);
+ gs->sqlite3_rows = __atomic_load_n(&sqlite3_statistics.sqlite3_rows, __ATOMIC_RELAXED);
+
+ usec_t timeout = default_rrd_update_every * USEC_PER_SEC + default_rrd_update_every * USEC_PER_SEC / 3;
+ usec_t now = now_monotonic_usec();
+ if(!last_run)
+ last_run = now;
+ usec_t delta = now - last_run;
+ bool query_sqlite3 = delta < timeout;
+
+ if(query_sqlite3 && now_monotonic_usec() - last_run < timeout)
+ gs->sqlite3_metadata_cache_hit = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_HIT);
+ else {
+ gs->sqlite3_metadata_cache_hit = UINT64_MAX;
+ query_sqlite3 = false;
+ }
+
+ if(query_sqlite3 && now_monotonic_usec() - last_run < timeout)
+ gs->sqlite3_context_cache_hit = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_HIT);
+ else {
+ gs->sqlite3_context_cache_hit = UINT64_MAX;
+ query_sqlite3 = false;
+ }
+
+ if(query_sqlite3 && now_monotonic_usec() - last_run < timeout)
+ gs->sqlite3_metadata_cache_miss = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_MISS);
+ else {
+ gs->sqlite3_metadata_cache_miss = UINT64_MAX;
+ query_sqlite3 = false;
+ }
+
+ if(query_sqlite3 && now_monotonic_usec() - last_run < timeout)
+ gs->sqlite3_context_cache_miss = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_MISS);
+ else {
+ gs->sqlite3_context_cache_miss = UINT64_MAX;
+ query_sqlite3 = false;
+ }
+
+ if(query_sqlite3 && now_monotonic_usec() - last_run < timeout)
+ gs->sqlite3_metadata_cache_spill = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL);
+ else {
+ gs->sqlite3_metadata_cache_spill = UINT64_MAX;
+ query_sqlite3 = false;
+ }
+
+ if(query_sqlite3 && now_monotonic_usec() - last_run < timeout)
+ gs->sqlite3_context_cache_spill = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL);
+ else {
+ gs->sqlite3_context_cache_spill = UINT64_MAX;
+ query_sqlite3 = false;
+ }
+
+ if(query_sqlite3 && now_monotonic_usec() - last_run < timeout)
+ gs->sqlite3_metadata_cache_write = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE);
+ else {
+ gs->sqlite3_metadata_cache_write = UINT64_MAX;
+ query_sqlite3 = false;
+ }
+
+ if(query_sqlite3 && now_monotonic_usec() - last_run < timeout)
+ gs->sqlite3_context_cache_write = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE);
+ else {
+ gs->sqlite3_context_cache_write = UINT64_MAX;
+ query_sqlite3 = false;
+ }
+
+ last_run = now_monotonic_usec();
+}
+
+static void sqlite3_statistics_charts(void) {
+ struct sqlite3_statistics gs;
+ sqlite3_statistics_copy(&gs);
if(gs.sqlite3_queries_made) {
static RRDSET *st_sqlite3_queries = NULL;
@@ -833,10 +900,17 @@ static void global_statistics_charts(void) {
rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
- rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_metadata_cache_hit);
- rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_metadata_cache_miss);
- rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_metadata_cache_spill);
- rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_metadata_cache_write);
+ if(gs.sqlite3_metadata_cache_hit != UINT64_MAX)
+ rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_metadata_cache_hit);
+
+ if(gs.sqlite3_metadata_cache_miss != UINT64_MAX)
+ rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_metadata_cache_miss);
+
+ if(gs.sqlite3_metadata_cache_spill != UINT64_MAX)
+ rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_metadata_cache_spill);
+
+ if(gs.sqlite3_metadata_cache_write != UINT64_MAX)
+ rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_metadata_cache_write);
rrdset_done(st_sqlite3_cache);
}
@@ -870,10 +944,17 @@ static void global_statistics_charts(void) {
rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
- rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_context_cache_hit);
- rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_context_cache_miss);
- rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_context_cache_spill);
- rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_context_cache_write);
+ if(gs.sqlite3_context_cache_hit != UINT64_MAX)
+ rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_context_cache_hit);
+
+ if(gs.sqlite3_context_cache_miss != UINT64_MAX)
+ rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_context_cache_miss);
+
+ if(gs.sqlite3_context_cache_spill != UINT64_MAX)
+ rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_context_cache_spill);
+
+ if(gs.sqlite3_context_cache_write != UINT64_MAX)
+ rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_context_cache_write);
rrdset_done(st_sqlite3_cache);
}
@@ -2322,7 +2403,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) {
snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.value.%s", wu->name_lowercase, job_name_sanitized);
char title[1000 + 1];
- snprintf(title, 1000, "Netdata Workers %s Value of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name));
+ snprintf(title, 1000, "Netdata Workers %s value of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name));
wu->per_job_type[i].st = rrdset_create_localhost(
"netdata"
@@ -2334,7 +2415,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) {
, (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"value"
, "netdata"
, "stats"
- , wu->priority + 5
+ , wu->priority + 5 + i
, localhost->rrd_update_every
, RRDSET_TYPE_LINE
);
@@ -2378,7 +2459,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) {
snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.rate.%s", wu->name_lowercase, job_name_sanitized);
char title[1000 + 1];
- snprintf(title, 1000, "Netdata Workers %s Rate of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name));
+ snprintf(title, 1000, "Netdata Workers %s rate of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name));
wu->per_job_type[i].st = rrdset_create_localhost(
"netdata"
@@ -2390,7 +2471,7 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) {
, (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"rate"
, "netdata"
, "stats"
- , wu->priority + 5
+ , wu->priority + 5 + i
, localhost->rrd_update_every
, RRDSET_TYPE_LINE
);
@@ -2447,21 +2528,37 @@ static void workers_utilization_reset_statistics(struct worker_utilization *wu)
}
}
+#define TASK_STAT_PREFIX "/proc/self/task/"
+#define TASK_STAT_SUFFIX "/stat"
+
static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_uint_t *utime __maybe_unused, kernel_uint_t *stime __maybe_unused) {
#ifdef __linux__
- char filename[200 + 1];
- snprintfz(filename, 200, "/proc/self/task/%d/stat", pid);
+ static char filename[sizeof(TASK_STAT_PREFIX) + sizeof(TASK_STAT_SUFFIX) + 20] = TASK_STAT_PREFIX;
+ static size_t start_pos = sizeof(TASK_STAT_PREFIX) - 1;
+ static procfile *ff = NULL;
- procfile *ff = procfile_open(filename, " ", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO);
- if(!ff) return -1;
+ // construct the filename
+ size_t end_pos = snprintfz(&filename[start_pos], 20, "%d", pid);
+ strcpy(&filename[start_pos + end_pos], TASK_STAT_SUFFIX);
+ // (re)open the procfile to the new filename
+ bool set_quotes = (ff == NULL) ? true : false;
+ ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT);
+ if(unlikely(!ff)) return -1;
+
+ if(set_quotes)
+ procfile_set_open_close(ff, "(", ")");
+
+ // read the entire file and split it to lines and words
ff = procfile_readall(ff);
- if(!ff) return -1;
+ if(unlikely(!ff)) return -1;
+ // parse the numbers we are interested
*utime = str2kernel_uint_t(procfile_lineword(ff, 0, 13));
*stime = str2kernel_uint_t(procfile_lineword(ff, 0, 14));
- procfile_close(ff);
+ // leave the file open for the next iteration
+
return 0;
#else
// TODO: add here cpu time detection per thread, for FreeBSD and MacOS
@@ -2474,8 +2571,6 @@ static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_
static Pvoid_t workers_by_pid_JudyL_array = NULL;
static void workers_threads_cleanup(struct worker_utilization *wu) {
- netdata_thread_disable_cancelability();
-
struct worker_thread *t = wu->threads;
while(t) {
struct worker_thread *next = t->next;
@@ -2487,8 +2582,6 @@ static void workers_threads_cleanup(struct worker_utilization *wu) {
}
t = next;
}
-
- netdata_thread_enable_cancelability();
}
static struct worker_thread *worker_thread_find(struct worker_utilization *wu __maybe_unused, pid_t pid) {
@@ -2621,16 +2714,20 @@ static void worker_utilization_charts(void) {
static size_t iterations = 0;
iterations++;
- int i;
- for(i = 0; all_workers_utilization[i].name ;i++) {
+ for(int i = 0; all_workers_utilization[i].name ;i++) {
workers_utilization_reset_statistics(&all_workers_utilization[i]);
+
+ netdata_thread_disable_cancelability();
workers_foreach(all_workers_utilization[i].name, worker_utilization_charts_callback, &all_workers_utilization[i]);
+ netdata_thread_enable_cancelability();
// skip the first iteration, so that we don't accumulate startup utilization to our charts
if(likely(iterations > 1))
workers_utilization_update_chart(&all_workers_utilization[i]);
+ netdata_thread_disable_cancelability();
workers_threads_cleanup(&all_workers_utilization[i]);
+ netdata_thread_enable_cancelability();
}
workers_total_cpu_utilization_chart();
@@ -2672,11 +2769,12 @@ static void global_statistics_register_workers(void) {
worker_register("STATS");
worker_register_job_name(WORKER_JOB_GLOBAL, "global");
worker_register_job_name(WORKER_JOB_REGISTRY, "registry");
- worker_register_job_name(WORKER_JOB_WORKERS, "workers");
worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine");
worker_register_job_name(WORKER_JOB_STRINGS, "strings");
worker_register_job_name(WORKER_JOB_DICTIONARIES, "dictionaries");
worker_register_job_name(WORKER_JOB_MALLOC_TRACE, "malloc_trace");
+ worker_register_job_name(WORKER_JOB_WORKERS, "workers");
+ worker_register_job_name(WORKER_JOB_SQLITE3, "sqlite3");
}
static void global_statistics_cleanup(void *ptr)
@@ -2719,6 +2817,9 @@ void *global_statistics_main(void *ptr)
worker_is_busy(WORKER_JOB_GLOBAL);
global_statistics_charts();
+ worker_is_busy(WORKER_JOB_SQLITE3);
+ sqlite3_statistics_charts();
+
worker_is_busy(WORKER_JOB_REGISTRY);
registry_statistics();
diff --git a/daemon/service.c b/daemon/service.c
index a7db7ceb7..6db2ef69f 100644
--- a/daemon/service.c
+++ b/daemon/service.c
@@ -46,19 +46,19 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) {
/* only a collector can mark a chart as obsolete, so we must remove the reference */
- size_t tiers_available = 0, tiers_said_yes = 0;
+ size_t tiers_available = 0, tiers_said_no_retention = 0;
for(size_t tier = 0; tier < storage_tiers ;tier++) {
if(rd->tiers[tier]) {
tiers_available++;
if(rd->tiers[tier]->collect_ops->finalize(rd->tiers[tier]->db_collection_handle))
- tiers_said_yes++;
+ tiers_said_no_retention++;
rd->tiers[tier]->db_collection_handle = NULL;
}
}
- if (tiers_available == tiers_said_yes && tiers_said_yes) {
+ if (tiers_available == tiers_said_no_retention && tiers_said_no_retention) {
/* This metric has no data and no references */
metaqueue_delete_dimension_uuid(&rd->metric_uuid);
}