diff options
Diffstat (limited to 'daemon')
-rw-r--r-- | daemon/README.md | 7 | ||||
-rw-r--r-- | daemon/analytics.c | 86 | ||||
-rw-r--r-- | daemon/analytics.h | 24 | ||||
-rw-r--r-- | daemon/buildinfo.c | 23 | ||||
-rw-r--r-- | daemon/buildinfo.h | 8 | ||||
-rw-r--r-- | daemon/commands.c | 4 | ||||
-rw-r--r-- | daemon/commands.h | 4 | ||||
-rw-r--r-- | daemon/common.h | 2 | ||||
-rw-r--r-- | daemon/daemon.h | 10 | ||||
-rw-r--r-- | daemon/global_statistics.c | 1639 | ||||
-rw-r--r-- | daemon/global_statistics.h | 26 | ||||
-rw-r--r-- | daemon/main.c | 91 | ||||
-rw-r--r-- | daemon/main.h | 8 | ||||
-rw-r--r-- | daemon/service.c | 266 | ||||
-rw-r--r-- | daemon/signals.c | 6 | ||||
-rw-r--r-- | daemon/signals.h | 12 | ||||
-rw-r--r-- | daemon/static_threads.c | 74 | ||||
-rw-r--r-- | daemon/static_threads.h | 6 | ||||
-rw-r--r-- | daemon/static_threads_linux.c | 36 | ||||
-rw-r--r-- | daemon/static_threads_macos.c | 10 | ||||
-rwxr-xr-x | daemon/system-info.sh | 3 | ||||
-rw-r--r-- | daemon/unit_test.c | 287 | ||||
-rw-r--r-- | daemon/unit_test.h | 26 |
23 files changed, 2166 insertions, 492 deletions
diff --git a/daemon/README.md b/daemon/README.md index 3ebb405b2..c5951c694 100644 --- a/daemon/README.md +++ b/daemon/README.md @@ -116,7 +116,7 @@ The command line options of the Netdata 1.10.0 version are the following: | '-' '-' '-' '-' real-time performance monitoring, done right! +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---> - Copyright (C) 2016-2020, Netdata, Inc. <info@netdata.cloud> + Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud> Released under GNU General Public License v3 or later. All rights reserved. @@ -127,7 +127,8 @@ The command line options of the Netdata 1.10.0 version are the following: License : https://github.com/netdata/netdata/blob/master/LICENSE.md Twitter : https://twitter.com/linuxnetdata - Facebook : https://www.facebook.com/linuxnetdata/ + LinkedIn : https://linkedin.com/company/netdata-cloud/ + Facebook : https://facebook.com/linuxnetdata/ SYNOPSIS: netdata [options] @@ -191,7 +192,7 @@ The command line options of the Netdata 1.10.0 version are the following: -W simple-pattern pattern string Check if string matches pattern and exit. - -W "claim -token=TOKEN -rooms=ROOM1,ROOM2 url=https://app.netdata.cloud" + -W "claim -token=TOKEN -rooms=ROOM1,ROOM2 url=https://api.netdata.cloud" Connect the agent to the workspace rooms pointed to by TOKEN and ROOM*. Signals netdata handles: diff --git a/daemon/analytics.c b/daemon/analytics.c index 370818b8a..3d0e514d6 100644 --- a/daemon/analytics.c +++ b/daemon/analytics.c @@ -10,8 +10,8 @@ extern void analytics_build_info (BUFFER *b); extern int aclk_connected; struct collector { - char *plugin; - char *module; + const char *plugin; + const char *module; }; struct array_printer { @@ -249,8 +249,7 @@ void analytics_exporters(void) buffer_free(bi); } -int collector_counter_callb(const char *name, void *entry, void *data) { - (void)name; +int collector_counter_callb(const DICTIONARY_ITEM *item __maybe_unused, void *entry, void *data) { struct array_printer *ap = (struct array_printer *)data; struct collector *col = (struct collector *)entry; @@ -279,19 +278,22 @@ int collector_counter_callb(const char *name, void *entry, void *data) { void analytics_collectors(void) { RRDSET *st; - DICTIONARY *dict = dictionary_create(DICTIONARY_FLAG_SINGLE_THREADED); + DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); char name[500]; BUFFER *bt = buffer_create(1000); - rrdset_foreach_read(st, localhost) - { - if (rrdset_is_available_for_viewers(st)) { - struct collector col = { .plugin = st->plugin_name ? st->plugin_name : "", - .module = st->module_name ? st->module_name : "" }; - snprintfz(name, 499, "%s:%s", col.plugin, col.module); - dictionary_set(dict, name, &col, sizeof(struct collector)); - } + rrdset_foreach_read(st, localhost) { + if(!rrdset_is_available_for_viewers(st)) + continue; + + struct collector col = { + .plugin = rrdset_plugin_name(st), + .module = rrdset_module_name(st) + }; + snprintfz(name, 499, "%s:%s", col.plugin, col.module); + dictionary_set(dict, name, &col, sizeof(struct collector)); } + rrdset_foreach_done(st); struct array_printer ap; ap.c = 0; @@ -335,11 +337,12 @@ void analytics_alarms_notifications(void) BUFFER *b = buffer_create(1000); int cnt = 0; - FILE *fp = mypopen(script, &command_pid); - if (fp) { + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); + if (fp_child_output) { char line[200 + 1]; - while (fgets(line, 200, fp) != NULL) { + while (fgets(line, 200, fp_child_output) != NULL) { char *end = line; while (*end && *end != '\n') end++; @@ -352,7 +355,7 @@ void analytics_alarms_notifications(void) cnt++; } - mypclose(fp, command_pid); + netdata_pclose(fp_child_input, fp_child_output, command_pid); } freez(script); @@ -382,8 +385,8 @@ void analytics_https(void) BUFFER *b = buffer_create(30); #ifdef ENABLE_HTTPS analytics_exporting_connectors_ssl(b); - buffer_strcat(b, netdata_client_ctx && localhost->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE && __atomic_load_n(&localhost->rrdpush_sender_connected, __ATOMIC_SEQ_CST) ? "streaming|" : "|"); - buffer_strcat(b, netdata_srv_ctx ? "web" : ""); + buffer_strcat(b, netdata_ssl_client_ctx && rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED) && localhost->sender->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE ? "streaming|" : "|"); + buffer_strcat(b, netdata_ssl_srv_ctx ? "web" : ""); #else buffer_strcat(b, "||"); #endif @@ -396,12 +399,11 @@ void analytics_charts(void) { RRDSET *st; int c = 0; + rrdset_foreach_read(st, localhost) - { - if (rrdset_is_available_for_viewers(st)) { - c++; - } - } + if(rrdset_is_available_for_viewers(st)) c++; + rrdset_foreach_done(st); + { char b[7]; snprintfz(b, 6, "%d", c); @@ -413,22 +415,19 @@ void analytics_metrics(void) { RRDSET *st; long int dimensions = 0; - RRDDIM *rd; - rrdset_foreach_read(st, localhost) - { - rrdset_rdlock(st); - + rrdset_foreach_read(st, localhost) { if (rrdset_is_available_for_viewers(st)) { - rrddim_foreach_read(rd, st) - { - if (rrddim_flag_check(rd, RRDDIM_FLAG_HIDDEN) || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if (rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN) || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) continue; dimensions++; } + rrddim_foreach_done(rd); } - - rrdset_unlock(st); } + rrdset_foreach_done(st); + { char b[7]; snprintfz(b, 6, "%ld", dimensions); @@ -441,7 +440,7 @@ void analytics_alarms(void) int alarm_warn = 0, alarm_crit = 0, alarm_normal = 0; char b[10]; RRDCALC *rc; - for (rc = localhost->alarms; rc; rc = rc->next) { + foreach_rrdcalc_in_rrdhost_read(localhost, rc) { if (unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) continue; @@ -456,6 +455,7 @@ void analytics_alarms(void) alarm_normal++; } } + foreach_rrdcalc_in_rrdhost_done(rc); snprintfz(b, 9, "%d", alarm_normal); analytics_set_data(&analytics_data.netdata_alarms_normal, b); @@ -525,21 +525,16 @@ void analytics_gather_immutable_meta_data(void) */ void analytics_gather_mutable_meta_data(void) { - rrdhost_rdlock(localhost); - analytics_collectors(); analytics_alarms(); analytics_charts(); analytics_metrics(); analytics_aclk(); - - rrdhost_unlock(localhost); - analytics_mirrored_hosts(); analytics_alarms_notifications(); analytics_set_data( - &analytics_data.netdata_config_is_parent, (localhost->next || configured_as_parent()) ? "true" : "false"); + &analytics_data.netdata_config_is_parent, (rrdhost_hosts_available() > 1 || configured_as_parent()) ? "true" : "false"); char *claim_id = get_agent_claimid(); analytics_set_data(&analytics_data.netdata_host_agent_claimed, claim_id ? "true" : "false"); @@ -1022,11 +1017,12 @@ void send_statistics(const char *action, const char *action_result, const char * info("%s '%s' '%s' '%s'", as_script, action, action_result, action_data); - FILE *fp = mypopen(command_to_run, &command_pid); - if (fp) { + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input); + if (fp_child_output) { char buffer[4 + 1]; - char *s = fgets(buffer, 4, fp); - int exit_code = mypclose(fp, command_pid); + char *s = fgets(buffer, 4, fp_child_output); + int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid); if (exit_code) error("Execution of anonymous statistics script returned %d.", exit_code); if (s && strncmp(buffer, "200", 3)) diff --git a/daemon/analytics.h b/daemon/analytics.h index 76c1cbb7e..d1ffcec18 100644 --- a/daemon/analytics.h +++ b/daemon/analytics.h @@ -69,18 +69,18 @@ struct analytics_data { uint8_t dashboard_hits; }; -extern void analytics_get_data(char *name, BUFFER *wb); -extern void set_late_global_environment(void); -extern void analytics_free_data(void); -extern void set_global_environment(void); -extern void send_statistics(const char *action, const char *action_result, const char *action_data); -extern void analytics_log_shell(void); -extern void analytics_log_json(void); -extern void analytics_log_prometheus(void); -extern void analytics_log_dashboard(void); -extern void analytics_gather_mutable_meta_data(void); -extern void analytics_report_oom_score(long long int score); -extern void get_system_timezone(void); +void analytics_get_data(char *name, BUFFER *wb); +void set_late_global_environment(void); +void analytics_free_data(void); +void set_global_environment(void); +void send_statistics(const char *action, const char *action_result, const char *action_data); +void analytics_log_shell(void); +void analytics_log_json(void); +void analytics_log_prometheus(void); +void analytics_log_dashboard(void); +void analytics_gather_mutable_meta_data(void); +void analytics_report_oom_score(long long int score); +void get_system_timezone(void); extern struct analytics_data analytics_data; diff --git a/daemon/buildinfo.c b/daemon/buildinfo.c index 0a64547af..ef813a961 100644 --- a/daemon/buildinfo.c +++ b/daemon/buildinfo.c @@ -197,6 +197,11 @@ #define FEAT_YES_NO(x) ((x) ? "YES" : "NO") +#ifdef NETDATA_TRACE_ALLOCATIONS +#define FEAT_TRACE_ALLOC 1 +#else +#define FEAT_TRACE_ALLOC 0 +#endif char *get_value_from_key(char *buffer, char *key) { char *s = NULL, *t = NULL; @@ -266,9 +271,7 @@ void print_build_info(void) { printf(" dbengine: %s\n", FEAT_YES_NO(FEAT_DBENGINE)); printf(" Native HTTPS: %s\n", FEAT_YES_NO(FEAT_NATIVE_HTTPS)); printf(" Netdata Cloud: %s %s\n", FEAT_YES_NO(FEAT_CLOUD), FEAT_CLOUD_MSG); - printf(" ACLK Next Generation: %s\n", FEAT_YES_NO(FEAT_CLOUD)); - printf(" ACLK-NG New Cloud Protocol: %s\n", FEAT_YES_NO(1)); - printf(" ACLK Legacy: %s\n", FEAT_YES_NO(0)); + printf(" ACLK: %s\n", FEAT_YES_NO(FEAT_CLOUD)); printf(" TLS Host Verification: %s\n", FEAT_YES_NO(FEAT_TLS_HOST_VERIFY)); printf(" Machine Learning: %s\n", FEAT_YES_NO(FEAT_ML)); printf(" Stream Compression: %s\n", FEAT_YES_NO(FEAT_STREAM_COMPRESSION)); @@ -300,6 +303,9 @@ void print_build_info(void) { printf(" GCP PubSub: %s\n", FEAT_YES_NO(FEAT_PUBSUB)); printf(" MongoDB: %s\n", FEAT_YES_NO(FEAT_MONGO)); printf(" Prometheus Remote Write: %s\n", FEAT_YES_NO(FEAT_REMOTE_WRITE)); + + printf("Debug/Developer Features:\n"); + printf(" Trace Allocations: %s\n", FEAT_YES_NO(FEAT_TRACE_ALLOC)); }; #define FEAT_JSON_BOOL(x) ((x) ? "true" : "false") @@ -318,9 +324,7 @@ void print_build_info_json(void) { #else printf(" \"cloud-disabled\": false,\n"); #endif - printf(" \"aclk-ng\": %s,\n", FEAT_JSON_BOOL(FEAT_CLOUD)); - printf(" \"aclk-ng-new-cloud-proto\": %s,\n", FEAT_JSON_BOOL(1)); - printf(" \"aclk-legacy\": %s,\n", FEAT_JSON_BOOL(0)); + printf(" \"aclk\": %s,\n", FEAT_JSON_BOOL(FEAT_CLOUD)); printf(" \"tls-host-verify\": %s,\n", FEAT_JSON_BOOL(FEAT_TLS_HOST_VERIFY)); printf(" \"machine-learning\": %s\n", FEAT_JSON_BOOL(FEAT_ML)); @@ -358,6 +362,8 @@ void print_build_info_json(void) { printf(" \"mongodb\": %s,\n", FEAT_JSON_BOOL(FEAT_MONGO)); printf(" \"prom-remote-write\": %s\n", FEAT_JSON_BOOL(FEAT_REMOTE_WRITE)); printf(" }\n"); + printf(" \"debug-n-devel\": {\n"); + printf(" \"trace-allocations\": %s\n }\n",FEAT_JSON_BOOL(FEAT_TRACE_ALLOC)); printf("}\n"); }; @@ -377,7 +383,7 @@ void analytics_build_info(BUFFER *b) { add_to_bi(b, "Native HTTPS"); #endif #ifdef ENABLE_ACLK - add_to_bi(b, "Netdata Cloud|ACLK Next Generation|New Cloud Protocol Support"); + add_to_bi(b, "Netdata Cloud"); #endif #if (FEAT_TLS_HOST_VERIFY!=0) add_to_bi(b, "TLS Host Verification"); @@ -458,4 +464,7 @@ void analytics_build_info(BUFFER *b) { #ifdef ENABLE_PROMETHEUS_REMOTE_WRITE add_to_bi(b, "Prometheus Remote Write"); #endif +#ifdef NETDATA_TRACE_ALLOCATIONS + add_to_bi(b, "DebugTraceAlloc"); +#endif } diff --git a/daemon/buildinfo.h b/daemon/buildinfo.h index 542a0e92e..d3b439fc3 100644 --- a/daemon/buildinfo.h +++ b/daemon/buildinfo.h @@ -3,12 +3,12 @@ #ifndef NETDATA_BUILDINFO_H #define NETDATA_BUILDINFO_H 1 -extern void print_build_info(void); +void print_build_info(void); -extern void print_build_info_json(void); +void print_build_info_json(void); -extern char *get_value_from_key(char *buffer, char *key); +char *get_value_from_key(char *buffer, char *key); -extern void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist); +void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist); #endif // NETDATA_BUILDINFO_H diff --git a/daemon/commands.c b/daemon/commands.c index 13d8dbd40..6288ee59b 100644 --- a/daemon/commands.c +++ b/daemon/commands.c @@ -217,7 +217,7 @@ static cmd_status_t cmd_reload_labels_execute(char *args, char **message) reload_host_labels(); BUFFER *wb = buffer_create(10); - rrdlabels_log_to_buffer(localhost->host_labels, wb); + rrdlabels_log_to_buffer(localhost->rrdlabels, wb); (*message)=strdupz(buffer_tostring(wb)); buffer_free(wb); @@ -515,7 +515,7 @@ static void pipe_read_cb(uv_stream_t *client, ssize_t nread, const uv_buf_t *buf } else if (nread) { size_t to_copy; - to_copy = MIN(nread, MAX_COMMAND_LENGTH - 1 - cmd_ctx->command_string_size); + to_copy = MIN((size_t) nread, MAX_COMMAND_LENGTH - 1 - cmd_ctx->command_string_size); memcpy(cmd_ctx->command_string + cmd_ctx->command_string_size, buf->base, to_copy); cmd_ctx->command_string_size += to_copy; cmd_ctx->command_string[cmd_ctx->command_string_size] = '\0'; diff --git a/daemon/commands.h b/daemon/commands.h index 1253e2dc1..f0e38ce93 100644 --- a/daemon/commands.h +++ b/daemon/commands.h @@ -76,7 +76,7 @@ typedef struct command_info { typedef void (command_lock_t) (unsigned index); cmd_status_t execute_command(cmd_t idx, char *args, char **message); -extern void commands_init(void); -extern void commands_exit(void); +void commands_init(void); +void commands_exit(void); #endif //NETDATA_COMMANDS_H diff --git a/daemon/common.h b/daemon/common.h index 2a45ffe70..f3d868661 100644 --- a/daemon/common.h +++ b/daemon/common.h @@ -69,7 +69,7 @@ #include "claim/claim.h" // netdata agent cloud link -#include "aclk/aclk_api.h" +#include "aclk/aclk.h" // global GUID map functions diff --git a/daemon/daemon.h b/daemon/daemon.h index bec3df9fc..2a8a58ec6 100644 --- a/daemon/daemon.h +++ b/daemon/daemon.h @@ -3,14 +3,14 @@ #ifndef NETDATA_DAEMON_H #define NETDATA_DAEMON_H 1 -extern int become_user(const char *username, int pid_fd); +int become_user(const char *username, int pid_fd); -extern int become_daemon(int dont_fork, const char *user); +int become_daemon(int dont_fork, const char *user); -extern void netdata_cleanup_and_exit(int i); -extern void send_statistics(const char *action, const char *action_result, const char *action_data); +void netdata_cleanup_and_exit(int i); +void send_statistics(const char *action, const char *action_result, const char *action_data); -extern void get_netdata_execution_path(void); +void get_netdata_execution_path(void); extern char pidfile[]; extern char exepath[]; diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index 249369519..53fd6c45a 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -9,27 +9,72 @@ #define WORKER_JOB_WORKERS 2 #define WORKER_JOB_DBENGINE 3 #define WORKER_JOB_HEARTBEAT 4 +#define WORKER_JOB_STRINGS 5 +#define WORKER_JOB_DICTIONARIES 6 +#define WORKER_JOB_MALLOC_TRACE 7 -#if WORKER_UTILIZATION_MAX_JOB_TYPES < 5 -#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 5 +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 8 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 8 #endif -static struct global_statistics { - volatile uint16_t connected_clients; - - volatile uint64_t web_requests; - volatile uint64_t web_usec; - volatile uint64_t web_usec_max; - volatile uint64_t bytes_received; - volatile uint64_t bytes_sent; - volatile uint64_t content_size; - volatile uint64_t compressed_content_size; +bool global_statistics_enabled = true; - volatile uint64_t web_client_count; +static struct global_statistics { + uint16_t connected_clients; + + uint64_t web_requests; + uint64_t web_usec; + uint64_t web_usec_max; + uint64_t bytes_received; + uint64_t bytes_sent; + uint64_t content_size; + uint64_t compressed_content_size; + + uint64_t web_client_count; + + uint64_t api_data_queries_made; + uint64_t api_data_db_points_read; + uint64_t api_data_result_points_generated; + + uint64_t api_weights_queries_made; + uint64_t api_weights_db_points_read; + uint64_t api_weights_result_points_generated; + + uint64_t api_badges_queries_made; + uint64_t api_badges_db_points_read; + uint64_t api_badges_result_points_generated; + + uint64_t health_queries_made; + uint64_t health_db_points_read; + uint64_t health_result_points_generated; + + uint64_t ml_queries_made; + uint64_t ml_db_points_read; + uint64_t ml_result_points_generated; + + uint64_t exporters_queries_made; + uint64_t exporters_db_points_read; + + uint64_t backfill_queries_made; + uint64_t backfill_db_points_read; + + uint64_t db_points_stored_per_tier[RRD_STORAGE_TIERS]; + + uint64_t sqlite3_queries_made; + uint64_t sqlite3_queries_ok; + uint64_t sqlite3_queries_failed; + uint64_t sqlite3_queries_failed_busy; + uint64_t sqlite3_queries_failed_locked; + uint64_t sqlite3_rows; + uint64_t sqlite3_metadata_cache_hit; + uint64_t sqlite3_context_cache_hit; + uint64_t sqlite3_metadata_cache_miss; + uint64_t sqlite3_context_cache_miss; + uint64_t sqlite3_metadata_cache_spill; + uint64_t sqlite3_context_cache_spill; + uint64_t sqlite3_metadata_cache_write; + uint64_t sqlite3_context_cache_write; - volatile uint64_t rrdr_queries_made; - volatile uint64_t rrdr_db_points_read; - volatile uint64_t rrdr_result_points_generated; } global_statistics = { .connected_clients = 0, .web_requests = 0, @@ -40,22 +85,98 @@ static struct global_statistics { .compressed_content_size = 0, .web_client_count = 1, - .rrdr_queries_made = 0, - .rrdr_db_points_read = 0, - .rrdr_result_points_generated = 0, + .api_data_queries_made = 0, + .api_data_db_points_read = 0, + .api_data_result_points_generated = 0, }; -void rrdr_query_completed(uint64_t db_points_read, uint64_t result_points_generated) { - __atomic_fetch_add(&global_statistics.rrdr_queries_made, 1, __ATOMIC_RELAXED); - __atomic_fetch_add(&global_statistics.rrdr_db_points_read, db_points_read, __ATOMIC_RELAXED); - __atomic_fetch_add(&global_statistics.rrdr_result_points_generated, result_points_generated, __ATOMIC_RELAXED); +void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array) { + for(size_t tier = 0; tier < storage_tiers ;tier++) { + __atomic_fetch_add(&global_statistics.db_points_stored_per_tier[tier], points_read_per_tier_array[tier], __ATOMIC_RELAXED); + points_read_per_tier_array[tier] = 0; + } +} + +void global_statistics_ml_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.ml_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_exporters_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.exporters_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.exporters_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_backfill_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.backfill_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.backfill_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked) { + __atomic_fetch_add(&global_statistics.sqlite3_queries_made, 1, __ATOMIC_RELAXED); + + if(success) { + __atomic_fetch_add(&global_statistics.sqlite3_queries_ok, 1, __ATOMIC_RELAXED); + } + else { + __atomic_fetch_add(&global_statistics.sqlite3_queries_failed, 1, __ATOMIC_RELAXED); + + if(busy) + __atomic_fetch_add(&global_statistics.sqlite3_queries_failed_busy, 1, __ATOMIC_RELAXED); + + if(locked) + __atomic_fetch_add(&global_statistics.sqlite3_queries_failed_locked, 1, __ATOMIC_RELAXED); + } } -void finished_web_request_statistics(uint64_t dt, - uint64_t bytes_received, - uint64_t bytes_sent, - uint64_t content_size, - uint64_t compressed_content_size) { +void global_statistics_sqlite3_row_completed(void) { + __atomic_fetch_add(&global_statistics.sqlite3_rows, 1, __ATOMIC_RELAXED); +} + +void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source) { + switch(query_source) { + case QUERY_SOURCE_API_DATA: + __atomic_fetch_add(&global_statistics.api_data_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_data_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_data_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_ML: + __atomic_fetch_add(&global_statistics.ml_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_API_WEIGHTS: + __atomic_fetch_add(&global_statistics.api_weights_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_weights_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_weights_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_API_BADGE: + __atomic_fetch_add(&global_statistics.api_badges_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_badges_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_badges_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_HEALTH: + __atomic_fetch_add(&global_statistics.health_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.health_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.health_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + default: + case QUERY_SOURCE_UNITTEST: + case QUERY_SOURCE_UNKNOWN: + break; + } +} + +void global_statistics_web_request_completed(uint64_t dt, + uint64_t bytes_received, + uint64_t bytes_sent, + uint64_t content_size, + uint64_t compressed_content_size) { uint64_t old_web_usec_max = global_statistics.web_usec_max; while(dt > old_web_usec_max) __atomic_compare_exchange(&global_statistics.web_usec_max, &old_web_usec_max, &dt, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); @@ -68,35 +189,77 @@ void finished_web_request_statistics(uint64_t dt, __atomic_fetch_add(&global_statistics.compressed_content_size, compressed_content_size, __ATOMIC_RELAXED); } -uint64_t web_client_connected(void) { +uint64_t global_statistics_web_client_connected(void) { __atomic_fetch_add(&global_statistics.connected_clients, 1, __ATOMIC_RELAXED); return __atomic_fetch_add(&global_statistics.web_client_count, 1, __ATOMIC_RELAXED); } -void web_client_disconnected(void) { +void global_statistics_web_client_disconnected(void) { __atomic_fetch_sub(&global_statistics.connected_clients, 1, __ATOMIC_RELAXED); } - static inline void global_statistics_copy(struct global_statistics *gs, uint8_t options) { - gs->connected_clients = __atomic_fetch_add(&global_statistics.connected_clients, 0, __ATOMIC_RELAXED); - gs->web_requests = __atomic_fetch_add(&global_statistics.web_requests, 0, __ATOMIC_RELAXED); - gs->web_usec = __atomic_fetch_add(&global_statistics.web_usec, 0, __ATOMIC_RELAXED); - gs->web_usec_max = __atomic_fetch_add(&global_statistics.web_usec_max, 0, __ATOMIC_RELAXED); - gs->bytes_received = __atomic_fetch_add(&global_statistics.bytes_received, 0, __ATOMIC_RELAXED); - gs->bytes_sent = __atomic_fetch_add(&global_statistics.bytes_sent, 0, __ATOMIC_RELAXED); - gs->content_size = __atomic_fetch_add(&global_statistics.content_size, 0, __ATOMIC_RELAXED); - gs->compressed_content_size = __atomic_fetch_add(&global_statistics.compressed_content_size, 0, __ATOMIC_RELAXED); - gs->web_client_count = __atomic_fetch_add(&global_statistics.web_client_count, 0, __ATOMIC_RELAXED); - - gs->rrdr_queries_made = __atomic_fetch_add(&global_statistics.rrdr_queries_made, 0, __ATOMIC_RELAXED); - gs->rrdr_db_points_read = __atomic_fetch_add(&global_statistics.rrdr_db_points_read, 0, __ATOMIC_RELAXED); - gs->rrdr_result_points_generated = __atomic_fetch_add(&global_statistics.rrdr_result_points_generated, 0, __ATOMIC_RELAXED); + gs->connected_clients = __atomic_load_n(&global_statistics.connected_clients, __ATOMIC_RELAXED); + gs->web_requests = __atomic_load_n(&global_statistics.web_requests, __ATOMIC_RELAXED); + gs->web_usec = __atomic_load_n(&global_statistics.web_usec, __ATOMIC_RELAXED); + gs->web_usec_max = __atomic_load_n(&global_statistics.web_usec_max, __ATOMIC_RELAXED); + gs->bytes_received = __atomic_load_n(&global_statistics.bytes_received, __ATOMIC_RELAXED); + gs->bytes_sent = __atomic_load_n(&global_statistics.bytes_sent, __ATOMIC_RELAXED); + gs->content_size = __atomic_load_n(&global_statistics.content_size, __ATOMIC_RELAXED); + gs->compressed_content_size = __atomic_load_n(&global_statistics.compressed_content_size, __ATOMIC_RELAXED); + gs->web_client_count = __atomic_load_n(&global_statistics.web_client_count, __ATOMIC_RELAXED); + + gs->api_data_queries_made = __atomic_load_n(&global_statistics.api_data_queries_made, __ATOMIC_RELAXED); + gs->api_data_db_points_read = __atomic_load_n(&global_statistics.api_data_db_points_read, __ATOMIC_RELAXED); + gs->api_data_result_points_generated = __atomic_load_n(&global_statistics.api_data_result_points_generated, __ATOMIC_RELAXED); + + gs->api_weights_queries_made = __atomic_load_n(&global_statistics.api_weights_queries_made, __ATOMIC_RELAXED); + gs->api_weights_db_points_read = __atomic_load_n(&global_statistics.api_weights_db_points_read, __ATOMIC_RELAXED); + gs->api_weights_result_points_generated = __atomic_load_n(&global_statistics.api_weights_result_points_generated, __ATOMIC_RELAXED); + + gs->api_badges_queries_made = __atomic_load_n(&global_statistics.api_badges_queries_made, __ATOMIC_RELAXED); + gs->api_badges_db_points_read = __atomic_load_n(&global_statistics.api_badges_db_points_read, __ATOMIC_RELAXED); + gs->api_badges_result_points_generated = __atomic_load_n(&global_statistics.api_badges_result_points_generated, __ATOMIC_RELAXED); + + gs->health_queries_made = __atomic_load_n(&global_statistics.health_queries_made, __ATOMIC_RELAXED); + gs->health_db_points_read = __atomic_load_n(&global_statistics.health_db_points_read, __ATOMIC_RELAXED); + gs->health_result_points_generated = __atomic_load_n(&global_statistics.health_result_points_generated, __ATOMIC_RELAXED); + + gs->ml_queries_made = __atomic_load_n(&global_statistics.ml_queries_made, __ATOMIC_RELAXED); + gs->ml_db_points_read = __atomic_load_n(&global_statistics.ml_db_points_read, __ATOMIC_RELAXED); + gs->ml_result_points_generated = __atomic_load_n(&global_statistics.ml_result_points_generated, __ATOMIC_RELAXED); + + gs->exporters_queries_made = __atomic_load_n(&global_statistics.exporters_queries_made, __ATOMIC_RELAXED); + gs->exporters_db_points_read = __atomic_load_n(&global_statistics.exporters_db_points_read, __ATOMIC_RELAXED); + gs->backfill_queries_made = __atomic_load_n(&global_statistics.backfill_queries_made, __ATOMIC_RELAXED); + gs->backfill_db_points_read = __atomic_load_n(&global_statistics.backfill_db_points_read, __ATOMIC_RELAXED); + + for(size_t tier = 0; tier < storage_tiers ;tier++) + gs->db_points_stored_per_tier[tier] = __atomic_load_n(&global_statistics.db_points_stored_per_tier[tier], __ATOMIC_RELAXED); if(options & GLOBAL_STATS_RESET_WEB_USEC_MAX) { uint64_t n = 0; __atomic_compare_exchange(&global_statistics.web_usec_max, (uint64_t *) &gs->web_usec_max, &n, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); } + + gs->sqlite3_queries_made = __atomic_load_n(&global_statistics.sqlite3_queries_made, __ATOMIC_RELAXED); + gs->sqlite3_queries_ok = __atomic_load_n(&global_statistics.sqlite3_queries_ok, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed = __atomic_load_n(&global_statistics.sqlite3_queries_failed, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_busy = __atomic_load_n(&global_statistics.sqlite3_queries_failed_busy, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_locked = __atomic_load_n(&global_statistics.sqlite3_queries_failed_locked, __ATOMIC_RELAXED); + gs->sqlite3_rows = __atomic_load_n(&global_statistics.sqlite3_rows, __ATOMIC_RELAXED); + + gs->sqlite3_metadata_cache_hit = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + gs->sqlite3_context_cache_hit = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + + gs->sqlite3_metadata_cache_miss = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + gs->sqlite3_context_cache_miss = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + + gs->sqlite3_metadata_cache_spill = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + gs->sqlite3_context_cache_spill = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + + gs->sqlite3_metadata_cache_write = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); + gs->sqlite3_context_cache_write = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); } static void global_statistics_charts(void) { @@ -116,6 +279,7 @@ static void global_statistics_charts(void) { struct global_statistics gs; struct rusage me; + struct replication_query_statistics replication = replication_get_query_statistics(); global_statistics_copy(&gs, GLOBAL_STATS_RESET_WEB_USEC_MAX); getrusage(RUSAGE_SELF, &me); @@ -145,8 +309,6 @@ static void global_statistics_charts(void) { rd_cpu_user = rrddim_add(st_cpu, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); rd_cpu_system = rrddim_add(st_cpu, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); } - else - rrdset_next(st_cpu); rrddim_set_by_pointer(st_cpu, rd_cpu_user, me.ru_utime.tv_sec * 1000000ULL + me.ru_utime.tv_usec); rrddim_set_by_pointer(st_cpu, rd_cpu_system, me.ru_stime.tv_sec * 1000000ULL + me.ru_stime.tv_usec); @@ -175,8 +337,7 @@ static void global_statistics_charts(void) { RRDSET_TYPE_LINE); rd_uptime = rrddim_add(st_uptime, "uptime", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } else - rrdset_next(st_uptime); + } rrddim_set_by_pointer(st_uptime, rd_uptime, netdata_uptime); rrdset_done(st_uptime); @@ -206,8 +367,6 @@ static void global_statistics_charts(void) { rd_clients = rrddim_add(st_clients, "clients", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } - else - rrdset_next(st_clients); rrddim_set_by_pointer(st_clients, rd_clients, gs.connected_clients); rrdset_done(st_clients); @@ -237,8 +396,6 @@ static void global_statistics_charts(void) { rd_requests = rrddim_add(st_reqs, "requests", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } - else - rrdset_next(st_reqs); rrddim_set_by_pointer(st_reqs, rd_requests, (collected_number) gs.web_requests); rrdset_done(st_reqs); @@ -270,8 +427,6 @@ static void global_statistics_charts(void) { rd_in = rrddim_add(st_bytes, "in", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); rd_out = rrddim_add(st_bytes, "out", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); } - else - rrdset_next(st_bytes); rrddim_set_by_pointer(st_bytes, rd_in, (collected_number) gs.bytes_received); rrddim_set_by_pointer(st_bytes, rd_out, (collected_number) gs.bytes_sent); @@ -304,8 +459,6 @@ static void global_statistics_charts(void) { rd_average = rrddim_add(st_duration, "average", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); rd_max = rrddim_add(st_duration, "max", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); } - else - rrdset_next(st_duration); uint64_t gweb_usec = gs.web_usec; uint64_t gweb_requests = gs.web_requests; @@ -352,8 +505,6 @@ static void global_statistics_charts(void) { rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); } - else - rrdset_next(st_compression); // since we don't lock here to read the global statistics // read the smaller value first @@ -377,69 +528,354 @@ static void global_statistics_charts(void) { // ---------------------------------------------------------------- - if(gs.rrdr_queries_made) { - static RRDSET *st_rrdr_queries = NULL; - static RRDDIM *rd_queries = NULL; - - if (unlikely(!st_rrdr_queries)) { - st_rrdr_queries = rrdset_create_localhost( + { + static RRDSET *st_queries = NULL; + static RRDDIM *rd_api_data_queries = NULL; + static RRDDIM *rd_api_weights_queries = NULL; + static RRDDIM *rd_api_badges_queries = NULL; + static RRDDIM *rd_health_queries = NULL; + static RRDDIM *rd_ml_queries = NULL; + static RRDDIM *rd_exporters_queries = NULL; + static RRDDIM *rd_backfill_queries = NULL; + static RRDDIM *rd_replication_queries = NULL; + + if (unlikely(!st_queries)) { + st_queries = rrdset_create_localhost( "netdata" , "queries" , NULL , "queries" , NULL - , "Netdata API Queries" + , "Netdata DB Queries" , "queries/s" , "netdata" , "stats" , 131000 , localhost->rrd_update_every - , RRDSET_TYPE_LINE + , RRDSET_TYPE_STACKED ); - rd_queries = rrddim_add(st_rrdr_queries, "queries", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_data_queries = rrddim_add(st_queries, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_queries = rrddim_add(st_queries, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_queries = rrddim_add(st_queries, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_queries = rrddim_add(st_queries, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_queries = rrddim_add(st_queries, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_exporters_queries = rrddim_add(st_queries, "exporters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_backfill_queries = rrddim_add(st_queries, "backfill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_queries = rrddim_add(st_queries, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } - else - rrdset_next(st_rrdr_queries); - rrddim_set_by_pointer(st_rrdr_queries, rd_queries, (collected_number)gs.rrdr_queries_made); + rrddim_set_by_pointer(st_queries, rd_api_data_queries, (collected_number)gs.api_data_queries_made); + rrddim_set_by_pointer(st_queries, rd_api_weights_queries, (collected_number)gs.api_weights_queries_made); + rrddim_set_by_pointer(st_queries, rd_api_badges_queries, (collected_number)gs.api_badges_queries_made); + rrddim_set_by_pointer(st_queries, rd_health_queries, (collected_number)gs.health_queries_made); + rrddim_set_by_pointer(st_queries, rd_ml_queries, (collected_number)gs.ml_queries_made); + rrddim_set_by_pointer(st_queries, rd_exporters_queries, (collected_number)gs.exporters_queries_made); + rrddim_set_by_pointer(st_queries, rd_backfill_queries, (collected_number)gs.backfill_queries_made); + rrddim_set_by_pointer(st_queries, rd_replication_queries, (collected_number)replication.queries_finished); - rrdset_done(st_rrdr_queries); + rrdset_done(st_queries); } // ---------------------------------------------------------------- - if(gs.rrdr_db_points_read || gs.rrdr_result_points_generated) { - static RRDSET *st_rrdr_points = NULL; - static RRDDIM *rd_points_read = NULL; - static RRDDIM *rd_points_generated = NULL; - - if (unlikely(!st_rrdr_points)) { - st_rrdr_points = rrdset_create_localhost( + { + static RRDSET *st_points_read = NULL; + static RRDDIM *rd_api_data_points_read = NULL; + static RRDDIM *rd_api_weights_points_read = NULL; + static RRDDIM *rd_api_badges_points_read = NULL; + static RRDDIM *rd_health_points_read = NULL; + static RRDDIM *rd_ml_points_read = NULL; + static RRDDIM *rd_exporters_points_read = NULL; + static RRDDIM *rd_backfill_points_read = NULL; + static RRDDIM *rd_replication_points_read = NULL; + + if (unlikely(!st_points_read)) { + st_points_read = rrdset_create_localhost( "netdata" - , "db_points" + , "db_points_read" , NULL , "queries" , NULL - , "Netdata API Points" + , "Netdata DB Points Query Read" , "points/s" , "netdata" , "stats" , 131001 , localhost->rrd_update_every - , RRDSET_TYPE_AREA + , RRDSET_TYPE_STACKED ); - rd_points_read = rrddim_add(st_rrdr_points, "read", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_points_generated = rrddim_add(st_rrdr_points, "generated", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_data_points_read = rrddim_add(st_points_read, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_points_read = rrddim_add(st_points_read, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_points_read = rrddim_add(st_points_read, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_points_read = rrddim_add(st_points_read, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_points_read = rrddim_add(st_points_read, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_exporters_points_read = rrddim_add(st_points_read, "exporters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_backfill_points_read = rrddim_add(st_points_read, "backfill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_points_read = rrddim_add(st_points_read, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_points_read, rd_api_data_points_read, (collected_number)gs.api_data_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_api_weights_points_read, (collected_number)gs.api_weights_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_api_badges_points_read, (collected_number)gs.api_badges_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_health_points_read, (collected_number)gs.health_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_ml_points_read, (collected_number)gs.ml_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_exporters_points_read, (collected_number)gs.exporters_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_backfill_points_read, (collected_number)gs.backfill_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_replication_points_read, (collected_number)replication.points_read); + + rrdset_done(st_points_read); + } + + // ---------------------------------------------------------------- + + if(gs.api_data_result_points_generated || replication.points_generated) { + static RRDSET *st_points_generated = NULL; + static RRDDIM *rd_api_data_points_generated = NULL; + static RRDDIM *rd_api_weights_points_generated = NULL; + static RRDDIM *rd_api_badges_points_generated = NULL; + static RRDDIM *rd_health_points_generated = NULL; + static RRDDIM *rd_ml_points_generated = NULL; + static RRDDIM *rd_replication_points_generated = NULL; + + if (unlikely(!st_points_generated)) { + st_points_generated = rrdset_create_localhost( + "netdata" + , "db_points_results" + , NULL + , "queries" + , NULL + , "Netdata Points in Query Results" + , "points/s" + , "netdata" + , "stats" + , 131002 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_points_generated = rrddim_add(st_points_generated, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_points_generated = rrddim_add(st_points_generated, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_points_generated = rrddim_add(st_points_generated, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_points_generated = rrddim_add(st_points_generated, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_points_generated = rrddim_add(st_points_generated, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_points_generated = rrddim_add(st_points_generated, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_points_generated, rd_api_data_points_generated, (collected_number)gs.api_data_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_api_weights_points_generated, (collected_number)gs.api_weights_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_api_badges_points_generated, (collected_number)gs.api_badges_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_health_points_generated, (collected_number)gs.health_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_ml_points_generated, (collected_number)gs.ml_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_replication_points_generated, (collected_number)replication.points_generated); + + rrdset_done(st_points_generated); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_points_stored = NULL; + static RRDDIM *rds[RRD_STORAGE_TIERS] = {}; + + if (unlikely(!st_points_stored)) { + st_points_stored = rrdset_create_localhost( + "netdata" + , "db_points_stored" + , NULL + , "queries" + , NULL + , "Netdata DB Points Stored" + , "points/s" + , "netdata" + , "stats" + , 131003 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + char buf[30 + 1]; + snprintfz(buf, 30, "tier%zu", tier); + rds[tier] = rrddim_add(st_points_stored, buf, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + } + + for(size_t tier = 0; tier < storage_tiers ;tier++) + rrddim_set_by_pointer(st_points_stored, rds[tier], (collected_number)gs.db_points_stored_per_tier[tier]); + + rrdset_done(st_points_stored); + } + + // ---------------------------------------------------------------- + + if(gs.sqlite3_queries_made) { + static RRDSET *st_sqlite3_queries = NULL; + static RRDDIM *rd_queries = NULL; + + if (unlikely(!st_sqlite3_queries)) { + st_sqlite3_queries = rrdset_create_localhost( + "netdata" + , "sqlite3_queries" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Queries" + , "queries/s" + , "netdata" + , "stats" + , 131100 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_queries = rrddim_add(st_sqlite3_queries, "queries", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_queries, rd_queries, (collected_number)gs.sqlite3_queries_made); + + rrdset_done(st_sqlite3_queries); + } + + // ---------------------------------------------------------------- + + if(gs.sqlite3_queries_ok || gs.sqlite3_queries_failed) { + static RRDSET *st_sqlite3_queries_by_status = NULL; + static RRDDIM *rd_ok = NULL, *rd_failed = NULL, *rd_busy = NULL, *rd_locked = NULL; + + if (unlikely(!st_sqlite3_queries_by_status)) { + st_sqlite3_queries_by_status = rrdset_create_localhost( + "netdata" + , "sqlite3_queries_by_status" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Queries by status" + , "queries/s" + , "netdata" + , "stats" + , 131101 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_ok = rrddim_add(st_sqlite3_queries_by_status, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st_sqlite3_queries_by_status, "failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_busy = rrddim_add(st_sqlite3_queries_by_status, "busy", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_locked = rrddim_add(st_sqlite3_queries_by_status, "locked", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_ok, (collected_number)gs.sqlite3_queries_made); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_failed, (collected_number)gs.sqlite3_queries_failed); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_busy, (collected_number)gs.sqlite3_queries_failed_busy); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_locked, (collected_number)gs.sqlite3_queries_failed_locked); + + rrdset_done(st_sqlite3_queries_by_status); + } + + // ---------------------------------------------------------------- + + if(gs.sqlite3_rows) { + static RRDSET *st_sqlite3_rows = NULL; + static RRDDIM *rd_rows = NULL; + + if (unlikely(!st_sqlite3_rows)) { + st_sqlite3_rows = rrdset_create_localhost( + "netdata" + , "sqlite3_rows" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Rows" + , "rows/s" + , "netdata" + , "stats" + , 131102 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_rows = rrddim_add(st_sqlite3_rows, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_rows, rd_rows, (collected_number)gs.sqlite3_rows); + + rrdset_done(st_sqlite3_rows); + } + + if(gs.sqlite3_metadata_cache_hit) { + static RRDSET *st_sqlite3_cache = NULL; + static RRDDIM *rd_cache_hit = NULL; + static RRDDIM *rd_cache_miss= NULL; + static RRDDIM *rd_cache_spill= NULL; + static RRDDIM *rd_cache_write= NULL; + + if (unlikely(!st_sqlite3_cache)) { + st_sqlite3_cache = rrdset_create_localhost( + "netdata" + , "sqlite3_metatada_cache" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 metadata cache" + , "ops/s" + , "netdata" + , "stats" + , 131103 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_cache_hit = rrddim_add(st_sqlite3_cache, "cache_hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_miss = rrddim_add(st_sqlite3_cache, "cache_miss", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_spill = rrddim_add(st_sqlite3_cache, "cache_spill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_metadata_cache_hit); + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_metadata_cache_miss); + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_metadata_cache_spill); + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_metadata_cache_write); + + rrdset_done(st_sqlite3_cache); + } + + if(gs.sqlite3_context_cache_hit) { + static RRDSET *st_sqlite3_cache = NULL; + static RRDDIM *rd_cache_hit = NULL; + static RRDDIM *rd_cache_miss= NULL; + static RRDDIM *rd_cache_spill= NULL; + static RRDDIM *rd_cache_write= NULL; + + if (unlikely(!st_sqlite3_cache)) { + st_sqlite3_cache = rrdset_create_localhost( + "netdata" + , "sqlite3_context_cache" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 context cache" + , "ops/s" + , "netdata" + , "stats" + , 131104 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_cache_hit = rrddim_add(st_sqlite3_cache, "cache_hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_miss = rrddim_add(st_sqlite3_cache, "cache_miss", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_spill = rrddim_add(st_sqlite3_cache, "cache_spill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } - else - rrdset_next(st_rrdr_points); - rrddim_set_by_pointer(st_rrdr_points, rd_points_read, (collected_number)gs.rrdr_db_points_read); - rrddim_set_by_pointer(st_rrdr_points, rd_points_generated, (collected_number)gs.rrdr_result_points_generated); + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_context_cache_hit); + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_context_cache_miss); + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_context_cache_spill); + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_context_cache_write); - rrdset_done(st_rrdr_points); + rrdset_done(st_sqlite3_cache); } // ---------------------------------------------------------------- @@ -454,13 +890,14 @@ static void dbengine_statistics_charts(void) { unsigned dbengine_contexts = 0, counted_multihost_db[RRD_STORAGE_TIERS] = { 0 }, i; rrdhost_foreach_read(host) { - if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && !rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { + if (!rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { /* get localhost's DB engine's statistics for each tier */ - for(int tier = 0; tier < storage_tiers ;tier++) { - if(!host->storage_instance[tier]) continue; + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(host->db[tier].mode != RRD_MEMORY_MODE_DBENGINE) continue; + if(!host->db[tier].instance) continue; - if(is_storage_engine_shared(host->storage_instance[tier])) { + if(is_storage_engine_shared(host->db[tier].instance)) { if(counted_multihost_db[tier]) continue; else @@ -468,7 +905,7 @@ static void dbengine_statistics_charts(void) { } ++dbengine_contexts; - rrdeng_get_37_statistics((struct rrdengine_instance *)host->storage_instance[tier], local_stats_array); + rrdeng_get_37_statistics((struct rrdengine_instance *)host->db[tier].instance, local_stats_array); for (i = 0; i < RRDENG_NR_STATS; ++i) { /* aggregate statistics across hosts */ stats_array[i] += local_stats_array[i]; @@ -508,8 +945,7 @@ static void dbengine_statistics_charts(void) { RRDSET_TYPE_LINE); rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - } else - rrdset_next(st_compression); + } unsigned long long ratio; unsigned long long compressed_content_size = stats_array[12]; @@ -548,8 +984,7 @@ static void dbengine_statistics_charts(void) { RRDSET_TYPE_LINE); rd_hit_ratio = rrddim_add(st_pg_cache_hit_ratio, "ratio", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - } else - rrdset_next(st_pg_cache_hit_ratio); + } static unsigned long long old_hits = 0; static unsigned long long old_misses = 0; @@ -607,8 +1042,7 @@ static void dbengine_statistics_charts(void) { rd_evictions = rrddim_add(st_pg_cache_pages, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); rd_used_by_collectors = rrddim_add(st_pg_cache_pages, "used_by_collectors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } else - rrdset_next(st_pg_cache_pages); + } rrddim_set_by_pointer(st_pg_cache_pages, rd_descriptors, (collected_number)stats_array[27]); rrddim_set_by_pointer(st_pg_cache_pages, rd_populated, (collected_number)stats_array[3]); @@ -648,8 +1082,7 @@ static void dbengine_statistics_charts(void) { rd_deletions = rrddim_add(st_long_term_pages, "deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); rd_flushing_pressure_deletions = rrddim_add( st_long_term_pages, "flushing_pressure_deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } else - rrdset_next(st_long_term_pages); + } rrddim_set_by_pointer(st_long_term_pages, rd_total, (collected_number)stats_array[2]); rrddim_set_by_pointer(st_long_term_pages, rd_insertions, (collected_number)stats_array[5]); @@ -683,8 +1116,7 @@ static void dbengine_statistics_charts(void) { rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - } else - rrdset_next(st_io_stats); + } rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[17]); rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[15]); @@ -715,8 +1147,7 @@ static void dbengine_statistics_charts(void) { rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } else - rrdset_next(st_io_stats); + } rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[18]); rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[16]); @@ -750,8 +1181,7 @@ static void dbengine_statistics_charts(void) { rd_fs_errors = rrddim_add(st_errors, "fs_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); pg_cache_over_half_dirty_events = rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - } else - rrdset_next(st_errors); + } rrddim_set_by_pointer(st_errors, rd_io_errors, (collected_number)stats_array[30]); rrddim_set_by_pointer(st_errors, rd_fs_errors, (collected_number)stats_array[31]); @@ -783,8 +1213,7 @@ static void dbengine_statistics_charts(void) { rd_fd_current = rrddim_add(st_fd, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_fd_max = rrddim_add(st_fd, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } else - rrdset_next(st_fd); + } rrddim_set_by_pointer(st_fd, rd_fd_current, (collected_number)stats_array[32]); /* Careful here, modify this accordingly if the File-Descriptor budget ever changes */ @@ -825,8 +1254,7 @@ static void dbengine_statistics_charts(void) { rd_cache_metadata = rrddim_add(st_ram_usage, "cache metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); rd_pages_metadata = rrddim_add(st_ram_usage, "pages metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); rd_index_metadata = rrddim_add(st_ram_usage, "index metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); - } else - rrdset_next(st_ram_usage); + } API_producers = (collected_number)stats_array[0]; pages_on_disk = (collected_number)stats_array[2]; @@ -852,6 +1280,90 @@ static void dbengine_statistics_charts(void) { #endif } +static void update_strings_charts() { + static RRDSET *st_ops = NULL, *st_entries = NULL, *st_mem = NULL; + static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL, *rd_ops_searches = NULL, *rd_ops_duplications = NULL, *rd_ops_releases = NULL; + static RRDDIM *rd_entries_entries = NULL, *rd_entries_refs = NULL; + static RRDDIM *rd_mem = NULL; + + size_t inserts, deletes, searches, entries, references, memory, duplications, releases; + + string_statistics(&inserts, &deletes, &searches, &entries, &references, &memory, &duplications, &releases); + + if (unlikely(!st_ops)) { + st_ops = rrdset_create_localhost( + "netdata" + , "strings_ops" + , NULL + , "strings" + , NULL + , "Strings operations" + , "ops/s" + , "netdata" + , "stats" + , 910000 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE); + + rd_ops_inserts = rrddim_add(st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_deletes = rrddim_add(st_ops, "deletes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_searches = rrddim_add(st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_duplications = rrddim_add(st_ops, "duplications", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_releases = rrddim_add(st_ops, "releases", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ops, rd_ops_inserts, (collected_number)inserts); + rrddim_set_by_pointer(st_ops, rd_ops_deletes, (collected_number)deletes); + rrddim_set_by_pointer(st_ops, rd_ops_searches, (collected_number)searches); + rrddim_set_by_pointer(st_ops, rd_ops_duplications, (collected_number)duplications); + rrddim_set_by_pointer(st_ops, rd_ops_releases, (collected_number)releases); + rrdset_done(st_ops); + + if (unlikely(!st_entries)) { + st_entries = rrdset_create_localhost( + "netdata" + , "strings_entries" + , NULL + , "strings" + , NULL + , "Strings entries" + , "entries" + , "netdata" + , "stats" + , 910001 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_entries_entries = rrddim_add(st_entries, "entries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_entries_refs = rrddim_add(st_entries, "references", NULL, 1, -1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_entries, rd_entries_entries, (collected_number)entries); + rrddim_set_by_pointer(st_entries, rd_entries_refs, (collected_number)references); + rrdset_done(st_entries); + + if (unlikely(!st_mem)) { + st_mem = rrdset_create_localhost( + "netdata" + , "strings_memory" + , NULL + , "strings" + , NULL + , "Strings memory" + , "bytes" + , "netdata" + , "stats" + , 910001 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_mem = rrddim_add(st_mem, "memory", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem, rd_mem, (collected_number)memory); + rrdset_done(st_mem); +} + static void update_heartbeat_charts() { static RRDSET *st_heartbeat = NULL; static RRDDIM *rd_heartbeat_min = NULL; @@ -876,8 +1388,7 @@ static void update_heartbeat_charts() { rd_heartbeat_min = rrddim_add(st_heartbeat, "min", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_heartbeat_max = rrddim_add(st_heartbeat, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_heartbeat_avg = rrddim_add(st_heartbeat, "average", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } else - rrdset_next(st_heartbeat); + } usec_t min, max, average; size_t count; @@ -892,24 +1403,529 @@ static void update_heartbeat_charts() { } // --------------------------------------------------------------------------------------------------------------------- +// dictionary statistics + +struct dictionary_categories { + struct dictionary_stats *stats; + const char *family; + const char *context_prefix; + int priority; + + RRDSET *st_dicts; + RRDDIM *rd_dicts_active; + RRDDIM *rd_dicts_deleted; + + RRDSET *st_items; + RRDDIM *rd_items_entries; + RRDDIM *rd_items_referenced; + RRDDIM *rd_items_pending_deletion; + + RRDSET *st_ops; + RRDDIM *rd_ops_creations; + RRDDIM *rd_ops_destructions; + RRDDIM *rd_ops_flushes; + RRDDIM *rd_ops_traversals; + RRDDIM *rd_ops_walkthroughs; + RRDDIM *rd_ops_garbage_collections; + RRDDIM *rd_ops_searches; + RRDDIM *rd_ops_inserts; + RRDDIM *rd_ops_resets; + RRDDIM *rd_ops_deletes; + + RRDSET *st_callbacks; + RRDDIM *rd_callbacks_inserts; + RRDDIM *rd_callbacks_conflicts; + RRDDIM *rd_callbacks_reacts; + RRDDIM *rd_callbacks_deletes; + + RRDSET *st_memory; + RRDDIM *rd_memory_indexed; + RRDDIM *rd_memory_values; + RRDDIM *rd_memory_dict; + + RRDSET *st_spins; + RRDDIM *rd_spins_use; + RRDDIM *rd_spins_search; + RRDDIM *rd_spins_insert; + RRDDIM *rd_spins_delete; + +} dictionary_categories[] = { + { .stats = &dictionary_stats_category_other, "dictionaries", "dictionaries", 900000 }, + + // terminator + { .stats = NULL, NULL, NULL, 0 }, +}; + +#define load_dictionary_stats_entry(x) total += (size_t)(stats.x = __atomic_load_n(&c->stats->x, __ATOMIC_RELAXED)) + +static void update_dictionary_category_charts(struct dictionary_categories *c) { + struct dictionary_stats stats; + stats.name = c->stats->name; + + // ------------------------------------------------------------------------ + + size_t total = 0; + load_dictionary_stats_entry(dictionaries.active); + load_dictionary_stats_entry(dictionaries.deleted); + + if(c->st_dicts || total != 0) { + if (unlikely(!c->st_dicts)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.dictionaries", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.dictionaries", c->context_prefix); + + c->st_dicts = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionaries" + , "dictionaries" + , "netdata" + , "stats" + , c->priority + 0 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_dicts_active = rrddim_add(c->st_dicts, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_dicts_deleted = rrddim_add(c->st_dicts, "deleted", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_dicts->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_dicts, c->rd_dicts_active, (collected_number)stats.dictionaries.active); + rrddim_set_by_pointer(c->st_dicts, c->rd_dicts_deleted, (collected_number)stats.dictionaries.deleted); + rrdset_done(c->st_dicts); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(items.entries); + load_dictionary_stats_entry(items.referenced); + load_dictionary_stats_entry(items.pending_deletion); + + if(c->st_items || total != 0) { + if (unlikely(!c->st_items)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.items", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.items", c->context_prefix); + + c->st_items = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Items" + , "items" + , "netdata" + , "stats" + , c->priority + 1 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_items_entries = rrddim_add(c->st_items, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_items_pending_deletion = rrddim_add(c->st_items, "deleted", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_items_referenced = rrddim_add(c->st_items, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_items->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_items, c->rd_items_entries, stats.items.entries); + rrddim_set_by_pointer(c->st_items, c->rd_items_pending_deletion, stats.items.pending_deletion); + rrddim_set_by_pointer(c->st_items, c->rd_items_referenced, stats.items.referenced); + rrdset_done(c->st_items); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(ops.creations); + load_dictionary_stats_entry(ops.destructions); + load_dictionary_stats_entry(ops.flushes); + load_dictionary_stats_entry(ops.traversals); + load_dictionary_stats_entry(ops.walkthroughs); + load_dictionary_stats_entry(ops.garbage_collections); + load_dictionary_stats_entry(ops.searches); + load_dictionary_stats_entry(ops.inserts); + load_dictionary_stats_entry(ops.resets); + load_dictionary_stats_entry(ops.deletes); + + if(c->st_ops || total != 0) { + if (unlikely(!c->st_ops)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.ops", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.ops", c->context_prefix); + + c->st_ops = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Operations" + , "ops/s" + , "netdata" + , "stats" + , c->priority + 2 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_ops_creations = rrddim_add(c->st_ops, "creations", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_destructions = rrddim_add(c->st_ops, "destructions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_flushes = rrddim_add(c->st_ops, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_traversals = rrddim_add(c->st_ops, "traversals", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_walkthroughs = rrddim_add(c->st_ops, "walkthroughs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_garbage_collections = rrddim_add(c->st_ops, "garbage_collections", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_searches = rrddim_add(c->st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_inserts = rrddim_add(c->st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_resets = rrddim_add(c->st_ops, "resets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_deletes = rrddim_add(c->st_ops, "deletes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_ops->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_ops, c->rd_ops_creations, (collected_number)stats.ops.creations); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_destructions, (collected_number)stats.ops.destructions); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_flushes, (collected_number)stats.ops.flushes); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_traversals, (collected_number)stats.ops.traversals); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_walkthroughs, (collected_number)stats.ops.walkthroughs); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_garbage_collections, (collected_number)stats.ops.garbage_collections); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_searches, (collected_number)stats.ops.searches); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_inserts, (collected_number)stats.ops.inserts); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_resets, (collected_number)stats.ops.resets); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_deletes, (collected_number)stats.ops.deletes); + + rrdset_done(c->st_ops); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(callbacks.inserts); + load_dictionary_stats_entry(callbacks.conflicts); + load_dictionary_stats_entry(callbacks.reacts); + load_dictionary_stats_entry(callbacks.deletes); + + if(c->st_callbacks || total != 0) { + if (unlikely(!c->st_callbacks)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.callbacks", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.callbacks", c->context_prefix); + + c->st_callbacks = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Callbacks" + , "callbacks/s" + , "netdata" + , "stats" + , c->priority + 3 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_callbacks_inserts = rrddim_add(c->st_callbacks, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_deletes = rrddim_add(c->st_callbacks, "deletes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_conflicts = rrddim_add(c->st_callbacks, "conflicts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_reacts = rrddim_add(c->st_callbacks, "reacts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_callbacks->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_inserts, (collected_number)stats.callbacks.inserts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_conflicts, (collected_number)stats.callbacks.conflicts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_reacts, (collected_number)stats.callbacks.reacts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_deletes, (collected_number)stats.callbacks.deletes); + + rrdset_done(c->st_callbacks); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(memory.indexed); + load_dictionary_stats_entry(memory.values); + load_dictionary_stats_entry(memory.dict); + + if(c->st_memory || total != 0) { + if (unlikely(!c->st_memory)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.memory", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.memory", c->context_prefix); + + c->st_memory = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Memory" + , "bytes" + , "netdata" + , "stats" + , c->priority + 4 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + c->rd_memory_indexed = rrddim_add(c->st_memory, "index", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_memory_values = rrddim_add(c->st_memory, "data", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_memory_dict = rrddim_add(c->st_memory, "structures", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_memory->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_memory, c->rd_memory_indexed, (collected_number)stats.memory.indexed); + rrddim_set_by_pointer(c->st_memory, c->rd_memory_values, (collected_number)stats.memory.values); + rrddim_set_by_pointer(c->st_memory, c->rd_memory_dict, (collected_number)stats.memory.dict); + + rrdset_done(c->st_memory); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(spin_locks.use_spins); + load_dictionary_stats_entry(spin_locks.search_spins); + load_dictionary_stats_entry(spin_locks.insert_spins); + load_dictionary_stats_entry(spin_locks.delete_spins); + + if(c->st_spins || total != 0) { + if (unlikely(!c->st_spins)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.spins", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.spins", c->context_prefix); + + c->st_spins = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Spins" + , "count" + , "netdata" + , "stats" + , c->priority + 5 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_spins_use = rrddim_add(c->st_spins, "use", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_search = rrddim_add(c->st_spins, "search", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_insert = rrddim_add(c->st_spins, "insert", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_delete = rrddim_add(c->st_spins, "delete", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_spins->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_spins, c->rd_spins_use, (collected_number)stats.spin_locks.use_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_search, (collected_number)stats.spin_locks.search_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_insert, (collected_number)stats.spin_locks.insert_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_delete, (collected_number)stats.spin_locks.delete_spins); + + rrdset_done(c->st_spins); + } +} + +#ifdef NETDATA_TRACE_ALLOCATIONS + +struct memory_trace_data { + RRDSET *st_memory; + RRDSET *st_allocations; + RRDSET *st_avg_alloc; + RRDSET *st_ops; +}; + +static int do_memory_trace_item(void *item, void *data) { + struct memory_trace_data *tmp = data; + struct malloc_trace *p = item; + + // ------------------------------------------------------------------------ + + if(!p->rd_bytes) + p->rd_bytes = rrddim_add(tmp->st_memory, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number bytes = (collected_number)__atomic_load_n(&p->bytes, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_memory, p->rd_bytes, bytes); + + // ------------------------------------------------------------------------ + + if(!p->rd_allocations) + p->rd_allocations = rrddim_add(tmp->st_allocations, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number allocs = (collected_number)__atomic_load_n(&p->allocations, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_allocations, p->rd_allocations, allocs); + + // ------------------------------------------------------------------------ + + if(!p->rd_avg_alloc) + p->rd_avg_alloc = rrddim_add(tmp->st_avg_alloc, p->function, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + collected_number avg_alloc = (allocs)?(bytes * 100 / allocs):0; + rrddim_set_by_pointer(tmp->st_avg_alloc, p->rd_avg_alloc, avg_alloc); + + // ------------------------------------------------------------------------ + + if(!p->rd_ops) + p->rd_ops = rrddim_add(tmp->st_ops, p->function, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + collected_number ops = 0; + ops += (collected_number)__atomic_load_n(&p->malloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->calloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->realloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->strdup_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->free_calls, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_ops, p->rd_ops, ops); + + // ------------------------------------------------------------------------ + + return 1; +} +static void malloc_trace_statistics(void) { + static struct memory_trace_data tmp = { + .st_memory = NULL, + .st_allocations = NULL, + .st_avg_alloc = NULL, + .st_ops = NULL, + }; + + if(!tmp.st_memory) { + tmp.st_memory = rrdset_create_localhost( + "netdata" + , "memory_size" + , NULL + , "memory" + , "netdata.memory.size" + , "Netdata Memory Used by Function" + , "bytes" + , "netdata" + , "stats" + , 900000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + if(!tmp.st_ops) { + tmp.st_ops = rrdset_create_localhost( + "netdata" + , "memory_operations" + , NULL + , "memory" + , "netdata.memory.operations" + , "Netdata Memory Operations by Function" + , "ops/s" + , "netdata" + , "stats" + , 900001 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + + if(!tmp.st_allocations) { + tmp.st_allocations = rrdset_create_localhost( + "netdata" + , "memory_allocations" + , NULL + , "memory" + , "netdata.memory.allocations" + , "Netdata Memory Allocations by Function" + , "allocations" + , "netdata" + , "stats" + , 900002 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + if(!tmp.st_avg_alloc) { + tmp.st_avg_alloc = rrdset_create_localhost( + "netdata" + , "memory_avg_alloc" + , NULL + , "memory" + , "netdata.memory.avg_alloc" + , "Netdata Average Allocation Size by Function" + , "bytes" + , "netdata" + , "stats" + , 900003 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + + malloc_trace_walkthrough(do_memory_trace_item, &tmp); + + rrdset_done(tmp.st_memory); + rrdset_done(tmp.st_ops); + rrdset_done(tmp.st_allocations); + rrdset_done(tmp.st_avg_alloc); +} +#endif + +static void dictionary_statistics(void) { + for(int i = 0; dictionary_categories[i].stats ;i++) { + update_dictionary_category_charts(&dictionary_categories[i]); + } +} + +// --------------------------------------------------------------------------------------------------------------------- // worker utilization #define WORKERS_MIN_PERCENT_DEFAULT 10000.0 -struct worker_job_type { - char name[WORKER_UTILIZATION_MAX_JOB_NAME_LENGTH + 1]; +struct worker_job_type_gs { + STRING *name; + STRING *units; + size_t jobs_started; usec_t busy_time; RRDDIM *rd_jobs_started; RRDDIM *rd_busy_time; + + WORKER_METRIC_TYPE type; + NETDATA_DOUBLE min_value; + NETDATA_DOUBLE max_value; + NETDATA_DOUBLE sum_value; + size_t count_value; + + RRDSET *st; + RRDDIM *rd_min; + RRDDIM *rd_max; + RRDDIM *rd_avg; }; struct worker_thread { pid_t pid; - int enabled; + bool enabled; - int cpu_enabled; + bool cpu_enabled; double cpu; kernel_uint_t utime; @@ -925,6 +1941,7 @@ struct worker_thread { usec_t busy_time; struct worker_thread *next; + struct worker_thread *prev; }; struct worker_utilization { @@ -935,8 +1952,9 @@ struct worker_utilization { char *name_lowercase; - struct worker_job_type per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES]; + struct worker_job_type_gs per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES]; + size_t workers_max_job_id; size_t workers_registered; size_t workers_busy; usec_t workers_total_busy_time; @@ -983,6 +2001,7 @@ static struct worker_utilization all_workers_utilization[] = { { .name = "WEB", .family = "workers web server", .priority = 1000000 }, { .name = "ACLKQUERY", .family = "workers aclk query", .priority = 1000000 }, { .name = "ACLKSYNC", .family = "workers aclk host sync", .priority = 1000000 }, + { .name = "METASYNC", .family = "workers metadata sync", .priority = 1000000 }, { .name = "PLUGINSD", .family = "workers plugins.d", .priority = 1000000 }, { .name = "STATSD", .family = "workers plugin statsd", .priority = 1000000 }, { .name = "STATSDFLUSH", .family = "workers plugin statsd flush", .priority = 1000000 }, @@ -996,7 +2015,9 @@ static struct worker_utilization all_workers_utilization[] = { { .name = "TC", .family = "workers plugin tc", .priority = 1000000 }, { .name = "TIMEX", .family = "workers plugin timex", .priority = 1000000 }, { .name = "IDLEJITTER", .family = "workers plugin idlejitter", .priority = 1000000 }, - { .name = "RRDCONTEXT", .family = "workers aclk contexts", .priority = 1000000 }, + { .name = "RRDCONTEXT", .family = "workers contexts", .priority = 1000000 }, + { .name = "REPLICATION", .family = "workers replication sender", .priority = 1000000 }, + { .name = "SERVICE", .family = "workers service", .priority = 1000000 }, // has to be terminated with a NULL { .name = NULL, .family = NULL } @@ -1027,21 +2048,21 @@ static void workers_total_cpu_utilization_chart(void) { RRDSET_TYPE_STACKED); } - rrdset_next(st); - for(i = 0; all_workers_utilization[i].name ;i++) { struct worker_utilization *wu = &all_workers_utilization[i]; if(!wu->workers_cpu_registered) continue; if(!wu->rd_total_cpu_utilizaton) - wu->rd_total_cpu_utilizaton = rrddim_add(st, wu->name_lowercase, NULL, 1, 10000ULL, RRD_ALGORITHM_ABSOLUTE); + wu->rd_total_cpu_utilizaton = rrddim_add(st, wu->name_lowercase, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); - rrddim_set_by_pointer(st, wu->rd_total_cpu_utilizaton, (collected_number)((double)wu->workers_cpu_total * 10000.0)); + rrddim_set_by_pointer(st, wu->rd_total_cpu_utilizaton, (collected_number)((double)wu->workers_cpu_total * 100.0)); } rrdset_done(st); } +#define WORKER_CHART_DECIMAL_PRECISION 100 + static void workers_utilization_update_chart(struct worker_utilization *wu) { if(!wu->workers_registered) return; @@ -1079,28 +2100,26 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { // we add the min and max dimensions only when we have multiple workers if(unlikely(!wu->rd_workers_time_min && wu->workers_registered > 1)) - wu->rd_workers_time_min = rrddim_add(wu->st_workers_time, "min", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + wu->rd_workers_time_min = rrddim_add(wu->st_workers_time, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); if(unlikely(!wu->rd_workers_time_max && wu->workers_registered > 1)) - wu->rd_workers_time_max = rrddim_add(wu->st_workers_time, "max", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + wu->rd_workers_time_max = rrddim_add(wu->st_workers_time, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); if(unlikely(!wu->rd_workers_time_avg)) - wu->rd_workers_time_avg = rrddim_add(wu->st_workers_time, "average", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); - - rrdset_next(wu->st_workers_time); + wu->rd_workers_time_avg = rrddim_add(wu->st_workers_time, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); if(unlikely(wu->workers_min_busy_time == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_min_busy_time = 0.0; if(wu->rd_workers_time_min) - rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_min, (collected_number)((double)wu->workers_min_busy_time * 10000.0)); + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_min, (collected_number)((double)wu->workers_min_busy_time * WORKER_CHART_DECIMAL_PRECISION)); if(wu->rd_workers_time_max) - rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_max, (collected_number)((double)wu->workers_max_busy_time * 10000.0)); + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_max, (collected_number)((double)wu->workers_max_busy_time * WORKER_CHART_DECIMAL_PRECISION)); if(wu->workers_total_duration == 0) rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, 0); else - rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, (collected_number)((double)wu->workers_total_busy_time * 100.0 * 10000.0 / (double)wu->workers_total_duration)); + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, (collected_number)((double)wu->workers_total_busy_time * 100.0 * WORKER_CHART_DECIMAL_PRECISION / (double)wu->workers_total_duration)); rrdset_done(wu->st_workers_time); @@ -1132,28 +2151,26 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { } if (unlikely(!wu->rd_workers_cpu_min && wu->workers_registered > 1)) - wu->rd_workers_cpu_min = rrddim_add(wu->st_workers_cpu, "min", NULL, 1, 10000ULL, RRD_ALGORITHM_ABSOLUTE); + wu->rd_workers_cpu_min = rrddim_add(wu->st_workers_cpu, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); if (unlikely(!wu->rd_workers_cpu_max && wu->workers_registered > 1)) - wu->rd_workers_cpu_max = rrddim_add(wu->st_workers_cpu, "max", NULL, 1, 10000ULL, RRD_ALGORITHM_ABSOLUTE); + wu->rd_workers_cpu_max = rrddim_add(wu->st_workers_cpu, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); if(unlikely(!wu->rd_workers_cpu_avg)) - wu->rd_workers_cpu_avg = rrddim_add(wu->st_workers_cpu, "average", NULL, 1, 10000ULL, RRD_ALGORITHM_ABSOLUTE); - - rrdset_next(wu->st_workers_cpu); + wu->rd_workers_cpu_avg = rrddim_add(wu->st_workers_cpu, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); if(unlikely(wu->workers_cpu_min == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_cpu_min = 0.0; if(wu->rd_workers_cpu_min) - rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_min, (collected_number)(wu->workers_cpu_min * 10000ULL)); + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_min, (collected_number)(wu->workers_cpu_min * WORKER_CHART_DECIMAL_PRECISION)); if(wu->rd_workers_cpu_max) - rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_max, (collected_number)(wu->workers_cpu_max * 10000ULL)); + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_max, (collected_number)(wu->workers_cpu_max * WORKER_CHART_DECIMAL_PRECISION)); if(wu->workers_cpu_registered == 0) rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, 0); else - rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, (collected_number)( wu->workers_cpu_total * 10000ULL / (NETDATA_DOUBLE)wu->workers_cpu_registered )); + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, (collected_number)( wu->workers_cpu_total * WORKER_CHART_DECIMAL_PRECISION / (NETDATA_DOUBLE)wu->workers_cpu_registered )); rrdset_done(wu->st_workers_cpu); } @@ -1184,15 +2201,16 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { ); } - rrdset_next(wu->st_workers_jobs_per_job_type); - { size_t i; - for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { - if (wu->per_job_type[i].name[0]) { + for(i = 0; i <= wu->workers_max_job_id ;i++) { + if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) + continue; + + if (wu->per_job_type[i].name) { if(unlikely(!wu->per_job_type[i].rd_jobs_started)) - wu->per_job_type[i].rd_jobs_started = rrddim_add(wu->st_workers_jobs_per_job_type, wu->per_job_type[i].name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_jobs_started = rrddim_add(wu->st_workers_jobs_per_job_type, string2str(wu->per_job_type[i].name), NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rrddim_set_by_pointer(wu->st_workers_jobs_per_job_type, wu->per_job_type[i].rd_jobs_started, (collected_number)(wu->per_job_type[i].jobs_started)); } @@ -1226,15 +2244,16 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { ); } - rrdset_next(wu->st_workers_busy_per_job_type); - { size_t i; - for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { - if (wu->per_job_type[i].name[0]) { + for(i = 0; i <= wu->workers_max_job_id ;i++) { + if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) + continue; + + if (wu->per_job_type[i].name) { if(unlikely(!wu->per_job_type[i].rd_busy_time)) - wu->per_job_type[i].rd_busy_time = rrddim_add(wu->st_workers_busy_per_job_type, wu->per_job_type[i].name, NULL, 1, USEC_PER_MS, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_busy_time = rrddim_add(wu->st_workers_busy_per_job_type, string2str(wu->per_job_type[i].name), NULL, 1, USEC_PER_MS, RRD_ALGORITHM_ABSOLUTE); rrddim_set_by_pointer(wu->st_workers_busy_per_job_type, wu->per_job_type[i].rd_busy_time, (collected_number)(wu->per_job_type[i].busy_time)); } @@ -1271,13 +2290,123 @@ static void workers_utilization_update_chart(struct worker_utilization *wu) { wu->rd_workers_threads_free = rrddim_add(wu->st_workers_threads, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); wu->rd_workers_threads_busy = rrddim_add(wu->st_workers_threads, "busy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } - else - rrdset_next(wu->st_workers_threads); rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_free, (collected_number)(wu->workers_registered - wu->workers_busy)); rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_busy, (collected_number)(wu->workers_busy)); rrdset_done(wu->st_workers_threads); } + + // ---------------------------------------------------------------------- + // custom metric types WORKER_METRIC_ABSOLUTE + + { + size_t i; + for (i = 0; i <= wu->workers_max_job_id; i++) { + if(wu->per_job_type[i].type != WORKER_METRIC_ABSOLUTE) + continue; + + if(!wu->per_job_type[i].count_value) + continue; + + if(!wu->per_job_type[i].st) { + size_t job_name_len = string_strlen(wu->per_job_type[i].name); + if(job_name_len > RRD_ID_LENGTH_MAX) job_name_len = RRD_ID_LENGTH_MAX; + + char job_name_sanitized[job_name_len + 1]; + rrdset_strncpyz_name(job_name_sanitized, string2str(wu->per_job_type[i].name), job_name_len); + + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_%s_value_%s", wu->name_lowercase, job_name_sanitized); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.value.%s", wu->name_lowercase, job_name_sanitized); + + char title[1000 + 1]; + snprintf(title, 1000, "Netdata Workers %s Value of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + + wu->per_job_type[i].st = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , title + , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"value" + , "netdata" + , "stats" + , wu->priority + 5 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + wu->per_job_type[i].rd_min = rrddim_add(wu->per_job_type[i].st, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_max = rrddim_add(wu->per_job_type[i].st, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_avg = rrddim_add(wu->per_job_type[i].st, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_min, (collected_number)(wu->per_job_type[i].min_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_max, (collected_number)(wu->per_job_type[i].max_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_avg, (collected_number)(wu->per_job_type[i].sum_value / wu->per_job_type[i].count_value * WORKER_CHART_DECIMAL_PRECISION)); + + rrdset_done(wu->per_job_type[i].st); + } + } + + // ---------------------------------------------------------------------- + // custom metric types WORKER_METRIC_INCREMENTAL + + { + size_t i; + for (i = 0; i <= wu->workers_max_job_id ; i++) { + if(wu->per_job_type[i].type != WORKER_METRIC_INCREMENT && wu->per_job_type[i].type != WORKER_METRIC_INCREMENTAL_TOTAL) + continue; + + if(!wu->per_job_type[i].count_value) + continue; + + if(!wu->per_job_type[i].st) { + size_t job_name_len = string_strlen(wu->per_job_type[i].name); + if(job_name_len > RRD_ID_LENGTH_MAX) job_name_len = RRD_ID_LENGTH_MAX; + + char job_name_sanitized[job_name_len + 1]; + rrdset_strncpyz_name(job_name_sanitized, string2str(wu->per_job_type[i].name), job_name_len); + + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_%s_rate_%s", wu->name_lowercase, job_name_sanitized); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.rate.%s", wu->name_lowercase, job_name_sanitized); + + char title[1000 + 1]; + snprintf(title, 1000, "Netdata Workers %s Rate of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + + wu->per_job_type[i].st = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , title + , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"rate" + , "netdata" + , "stats" + , wu->priority + 5 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + wu->per_job_type[i].rd_min = rrddim_add(wu->per_job_type[i].st, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_max = rrddim_add(wu->per_job_type[i].st, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_avg = rrddim_add(wu->per_job_type[i].st, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_min, (collected_number)(wu->per_job_type[i].min_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_max, (collected_number)(wu->per_job_type[i].max_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_avg, (collected_number)(wu->per_job_type[i].sum_value / wu->per_job_type[i].count_value * WORKER_CHART_DECIMAL_PRECISION)); + + rrdset_done(wu->per_job_type[i].st); + } + } } static void workers_utilization_reset_statistics(struct worker_utilization *wu) { @@ -1304,12 +2433,17 @@ static void workers_utilization_reset_statistics(struct worker_utilization *wu) wu->per_job_type[i].jobs_started = 0; wu->per_job_type[i].busy_time = 0; + + wu->per_job_type[i].min_value = NAN; + wu->per_job_type[i].max_value = NAN; + wu->per_job_type[i].sum_value = NAN; + wu->per_job_type[i].count_value = 0; } struct worker_thread *wt; for(wt = wu->threads; wt ; wt = wt->next) { - wt->enabled = 0; - wt->cpu_enabled = 0; + wt->enabled = false; + wt->cpu_enabled = false; } } @@ -1337,31 +2471,33 @@ static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_ #endif } +static Pvoid_t workers_by_pid_JudyL_array = NULL; + static void workers_threads_cleanup(struct worker_utilization *wu) { - struct worker_thread *t; - - // free threads at the beginning of the linked list - while(wu->threads && !wu->threads->enabled) { - t = wu->threads; - wu->threads = t->next; - t->next = NULL; - freez(t); - } + netdata_thread_disable_cancelability(); - // free threads in the middle of the linked list - for(t = wu->threads; t && t->next ; t = t->next) { - if(t->next->enabled) continue; + struct worker_thread *t = wu->threads; + while(t) { + struct worker_thread *next = t->next; - struct worker_thread *to_remove = t->next; - t->next = to_remove->next; - to_remove->next = NULL; - freez(to_remove); + if(!t->enabled) { + JudyLDel(&workers_by_pid_JudyL_array, t->pid, PJE0); + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wu->threads, t, prev, next); + freez(t); + } + t = next; } -} -static struct worker_thread *worker_thread_find(struct worker_utilization *wu, pid_t pid) { - struct worker_thread *wt; - for(wt = wu->threads; wt && wt->pid != pid ; wt = wt->next) ; + netdata_thread_enable_cancelability(); + } + +static struct worker_thread *worker_thread_find(struct worker_utilization *wu __maybe_unused, pid_t pid) { + struct worker_thread *wt = NULL; + + Pvoid_t *PValue = JudyLGet(workers_by_pid_JudyL_array, pid, PJE0); + if(PValue) + wt = *PValue; + return wt; } @@ -1371,9 +2507,11 @@ static struct worker_thread *worker_thread_create(struct worker_utilization *wu, wt = (struct worker_thread *)callocz(1, sizeof(struct worker_thread)); wt->pid = pid; + Pvoid_t *PValue = JudyLIns(&workers_by_pid_JudyL_array, pid, PJE0); + *PValue = wt; + // link it - wt->next = wu->threads; - wu->threads = wt; + DOUBLE_LINKED_LIST_APPEND_UNSAFE(wu->threads, wt, prev, next); return wt; } @@ -1386,13 +2524,27 @@ static struct worker_thread *worker_thread_find_or_create(struct worker_utilizat return wt; } -static void worker_utilization_charts_callback(void *ptr, pid_t pid __maybe_unused, const char *thread_tag __maybe_unused, size_t utilization_usec __maybe_unused, size_t duration_usec __maybe_unused, size_t jobs_started __maybe_unused, size_t is_running __maybe_unused, const char **job_types_names __maybe_unused, size_t *job_types_jobs_started __maybe_unused, usec_t *job_types_busy_time __maybe_unused) { +static void worker_utilization_charts_callback(void *ptr + , pid_t pid __maybe_unused + , const char *thread_tag __maybe_unused + , size_t max_job_id __maybe_unused + , size_t utilization_usec __maybe_unused + , size_t duration_usec __maybe_unused + , size_t jobs_started __maybe_unused + , size_t is_running __maybe_unused + , STRING **job_types_names __maybe_unused + , STRING **job_types_units __maybe_unused + , WORKER_METRIC_TYPE *job_types_metric_types __maybe_unused + , size_t *job_types_jobs_started __maybe_unused + , usec_t *job_types_busy_time __maybe_unused + , NETDATA_DOUBLE *job_types_custom_metrics __maybe_unused + ) { struct worker_utilization *wu = (struct worker_utilization *)ptr; // find the worker_thread in the list struct worker_thread *wt = worker_thread_find_or_create(wu, pid); - wt->enabled = 1; + wt->enabled = true; wt->busy_time = utilization_usec; wt->jobs_started = jobs_started; @@ -1400,6 +2552,9 @@ static void worker_utilization_charts_callback(void *ptr, pid_t pid __maybe_unus wt->stime_old = wt->stime; wt->collected_time_old = wt->collected_time; + if(max_job_id > wu->workers_max_job_id) + wu->workers_max_job_id = max_job_id; + wu->workers_total_busy_time += utilization_usec; wu->workers_total_duration += duration_usec; wu->workers_total_jobs_started += jobs_started; @@ -1415,13 +2570,33 @@ static void worker_utilization_charts_callback(void *ptr, pid_t pid __maybe_unus // accumulate per job type statistics size_t i; - for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + for(i = 0; i <= max_job_id ;i++) { + if(!wu->per_job_type[i].name && job_types_names[i]) + wu->per_job_type[i].name = string_dup(job_types_names[i]); + + if(!wu->per_job_type[i].units && job_types_units[i]) + wu->per_job_type[i].units = string_dup(job_types_units[i]); + + wu->per_job_type[i].type = job_types_metric_types[i]; + wu->per_job_type[i].jobs_started += job_types_jobs_started[i]; wu->per_job_type[i].busy_time += job_types_busy_time[i]; - // new job type found - if(unlikely(!wu->per_job_type[i].name[0] && job_types_names[i])) - strncpyz(wu->per_job_type[i].name, job_types_names[i], WORKER_UTILIZATION_MAX_JOB_NAME_LENGTH); + NETDATA_DOUBLE value = job_types_custom_metrics[i]; + if(netdata_double_isnumber(value)) { + if(!wu->per_job_type[i].count_value) { + wu->per_job_type[i].count_value = 1; + wu->per_job_type[i].min_value = value; + wu->per_job_type[i].max_value = value; + wu->per_job_type[i].sum_value = value; + } + else { + wu->per_job_type[i].count_value++; + wu->per_job_type[i].sum_value += value; + if(value < wu->per_job_type[i].min_value) wu->per_job_type[i].min_value = value; + if(value > wu->per_job_type[i].max_value) wu->per_job_type[i].max_value = value; + } + } } // find its CPU utilization @@ -1433,13 +2608,13 @@ static void worker_utilization_charts_callback(void *ptr, pid_t pid __maybe_unus double stime = (double)(wt->stime - wt->stime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; double cpu = utime + stime; wt->cpu = cpu; - wt->cpu_enabled = 1; + wt->cpu_enabled = true; wu->workers_cpu_total += cpu; if(cpu < wu->workers_cpu_min) wu->workers_cpu_min = cpu; if(cpu > wu->workers_cpu_max) wu->workers_cpu_max = cpu; } - wu->workers_cpu_registered += wt->cpu_enabled; + wu->workers_cpu_registered += (wt->cpu_enabled) ? 1 : 0; } static void worker_utilization_charts(void) { @@ -1462,7 +2637,7 @@ static void worker_utilization_charts(void) { } static void worker_utilization_finish(void) { - int i; + int i, j; for(i = 0; all_workers_utilization[i].name ;i++) { struct worker_utilization *wu = &all_workers_utilization[i]; @@ -1471,9 +2646,18 @@ static void worker_utilization_finish(void) { wu->name_lowercase = NULL; } + for(j = 0; j < WORKER_UTILIZATION_MAX_JOB_TYPES ;j++) { + string_freez(wu->per_job_type[j].name); + wu->per_job_type[j].name = NULL; + + string_freez(wu->per_job_type[j].units); + wu->per_job_type[j].units = NULL; + } + // mark all threads as not enabled struct worker_thread *t; - for(t = wu->threads; t ; t = t->next) t->enabled = 0; + for(t = wu->threads; t ; t = t->next) + t->enabled = false; // let the cleanup job free them workers_threads_cleanup(wu); @@ -1481,6 +2665,19 @@ static void worker_utilization_finish(void) { } // --------------------------------------------------------------------------------------------------------------------- +// global statistics thread + + +static void global_statistics_register_workers(void) { + worker_register("STATS"); + worker_register_job_name(WORKER_JOB_GLOBAL, "global"); + worker_register_job_name(WORKER_JOB_REGISTRY, "registry"); + worker_register_job_name(WORKER_JOB_WORKERS, "workers"); + worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine"); + worker_register_job_name(WORKER_JOB_STRINGS, "strings"); + worker_register_job_name(WORKER_JOB_DICTIONARIES, "dictionaries"); + worker_register_job_name(WORKER_JOB_MALLOC_TRACE, "malloc_trace"); +} static void global_statistics_cleanup(void *ptr) { @@ -1498,11 +2695,7 @@ static void global_statistics_cleanup(void *ptr) void *global_statistics_main(void *ptr) { - worker_register("STATS"); - worker_register_job_name(WORKER_JOB_GLOBAL, "global"); - worker_register_job_name(WORKER_JOB_REGISTRY, "registry"); - worker_register_job_name(WORKER_JOB_WORKERS, "workers"); - worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine"); + global_statistics_register_workers(); netdata_thread_cleanup_push(global_statistics_cleanup, ptr); @@ -1523,22 +2716,78 @@ void *global_statistics_main(void *ptr) worker_is_idle(); heartbeat_next(&hb, step); - worker_is_busy(WORKER_JOB_WORKERS); - worker_utilization_charts(); - worker_is_busy(WORKER_JOB_GLOBAL); global_statistics_charts(); worker_is_busy(WORKER_JOB_REGISTRY); registry_statistics(); - worker_is_busy(WORKER_JOB_DBENGINE); - dbengine_statistics_charts(); + if(dbengine_enabled) { + worker_is_busy(WORKER_JOB_DBENGINE); + dbengine_statistics_charts(); + } worker_is_busy(WORKER_JOB_HEARTBEAT); update_heartbeat_charts(); + + worker_is_busy(WORKER_JOB_STRINGS); + update_strings_charts(); + + worker_is_busy(WORKER_JOB_DICTIONARIES); + dictionary_statistics(); + +#ifdef NETDATA_TRACE_ALLOCATIONS + worker_is_busy(WORKER_JOB_MALLOC_TRACE); + malloc_trace_statistics(); +#endif } netdata_thread_cleanup_pop(1); return NULL; } + + +// --------------------------------------------------------------------------------------------------------------------- +// workers thread + +static void global_statistics_workers_cleanup(void *ptr) +{ + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + worker_utilization_finish(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *global_statistics_workers_main(void *ptr) +{ + global_statistics_register_workers(); + + netdata_thread_cleanup_push(global_statistics_workers_cleanup, ptr); + + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_JOB_WORKERS); + worker_utilization_charts(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + diff --git a/daemon/global_statistics.h b/daemon/global_statistics.h index 268b5319e..f7d6775cf 100644 --- a/daemon/global_statistics.h +++ b/daemon/global_statistics.h @@ -3,20 +3,28 @@ #ifndef NETDATA_GLOBAL_STATISTICS_H #define NETDATA_GLOBAL_STATISTICS_H 1 -#include "common.h" +#include "database/rrd.h" // ---------------------------------------------------------------------------- // global statistics -extern void rrdr_query_completed(uint64_t db_points_read, uint64_t result_points_generated); +void global_statistics_ml_query_completed(size_t points_read); +void global_statistics_exporters_query_completed(size_t points_read); +void global_statistics_backfill_query_completed(size_t points_read); +void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source); +void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked); +void global_statistics_sqlite3_row_completed(void); +void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array); -extern void finished_web_request_statistics(uint64_t dt, - uint64_t bytes_received, - uint64_t bytes_sent, - uint64_t content_size, - uint64_t compressed_content_size); +void global_statistics_web_request_completed(uint64_t dt, + uint64_t bytes_received, + uint64_t bytes_sent, + uint64_t content_size, + uint64_t compressed_content_size); -extern uint64_t web_client_connected(void); -extern void web_client_disconnected(void); +uint64_t global_statistics_web_client_connected(void); +void global_statistics_web_client_disconnected(void); + +extern bool global_statistics_enabled; #endif /* NETDATA_GLOBAL_STATISTICS_H */ diff --git a/daemon/main.c b/daemon/main.c index ada3c14f2..6b591385d 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -4,6 +4,7 @@ #include "buildinfo.h" #include "static_threads.h" +bool unittest_running = false; int netdata_zero_metrics_enabled; int netdata_anonymous_statistics_enabled; @@ -55,13 +56,19 @@ void netdata_cleanup_and_exit(int ret) { // free the database info("EXIT: freeing database memory..."); #ifdef ENABLE_DBENGINE - for(int tier = 0; tier < storage_tiers ; tier++) - rrdeng_prepare_exit(multidb_ctx[tier]); + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_prepare_exit(multidb_ctx[tier]); + } #endif + metadata_sync_shutdown_prepare(); rrdhost_free_all(); + metadata_sync_shutdown(); #ifdef ENABLE_DBENGINE - for(int tier = 0; tier < storage_tiers ; tier++) - rrdeng_exit(multidb_ctx[tier]); + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_exit(multidb_ctx[tier]); + } #endif } sql_close_context_database(); @@ -255,7 +262,8 @@ void cancel_main_threads() { for (i = 0; static_threads[i].name != NULL ; i++) freez(static_threads[i].thread); - free(static_threads); + + freez(static_threads); } struct option_def option_definitions[] = { @@ -303,7 +311,7 @@ int help(int exitcode) { " | '-' '-' '-' '-' real-time performance monitoring, done right! \n" " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n" "\n" - " Copyright (C) 2016-2020, Netdata, Inc. <info@netdata.cloud>\n" + " Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud>\n" " Released under GNU General Public License v3 or later.\n" " All rights reserved.\n" "\n" @@ -314,7 +322,8 @@ int help(int exitcode) { " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n" "\n" " Twitter : https://twitter.com/linuxnetdata\n" - " Facebook : https://www.facebook.com/linuxnetdata/\n" + " LinkedIn : https://linkedin.com/company/netdata-cloud/\n" + " Facebook : https://facebook.com/linuxnetdata/\n" "\n" "\n" ); @@ -379,10 +388,10 @@ int help(int exitcode) { static void security_init(){ char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir); - security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename); + netdata_ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename); snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir); - security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename); + netdata_ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename); tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3"); tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none"); @@ -402,6 +411,9 @@ static void log_init(void) { snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir); stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename); + snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir); + stdhealth_filename = config_get(CONFIG_SECTION_LOGS, "health", filename); + #ifdef ENABLE_ACLK aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO); if (aclklog_enabled) { @@ -667,7 +679,7 @@ static void get_netdata_configured_variables() { // ------------------------------------------------------------------------ // get default Database Engine page cache size in MiB - db_engine_use_malloc = config_get_boolean(CONFIG_SECTION_DB, "dbengine page cache with malloc", CONFIG_BOOLEAN_NO); + db_engine_use_malloc = config_get_boolean(CONFIG_SECTION_DB, "dbengine page cache with malloc", CONFIG_BOOLEAN_YES); default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) { error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB); @@ -736,12 +748,6 @@ static void get_netdata_configured_variables() { } // -------------------------------------------------------------------- - // rrdcontext - - rrdcontext_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "rrdcontexts", rrdcontext_enabled); - - - // -------------------------------------------------------------------- // get various system parameters get_system_HZ(); @@ -801,12 +807,13 @@ int get_system_info(struct rrdhost_system_info *system_info) { info("Executing %s", script); - FILE *fp = mypopen(script, &command_pid); - if(fp) { + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); + if(fp_child_output) { char line[200 + 1]; // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert. // One time init code, but I'm curious about the warning... - while (fgets(line, 200, fp) != NULL) { + while (fgets(line, 200, fp_child_output) != NULL) { char *value=line; while (*value && *value != '=') value++; if (*value=='=') { @@ -827,7 +834,7 @@ int get_system_info(struct rrdhost_system_info *system_info) { } } } - mypclose(fp, command_pid); + netdata_pclose(fp_child_input, fp_child_output, command_pid); } freez(script); return 0; @@ -976,6 +983,8 @@ int main(int argc, char **argv) { } if(strcmp(optarg, "unittest") == 0) { + unittest_running = true; + if (unit_test_static_threads()) return 1; if (unit_test_buffer()) @@ -987,7 +996,6 @@ int main(int argc, char **argv) { // No call to load the config file on this code-path post_conf_load(&user); get_netdata_configured_variables(); - rrdcontext_enabled = CONFIG_BOOLEAN_NO; default_rrd_update_every = 1; default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; default_health_enabled = 0; @@ -1004,8 +1012,11 @@ int main(int argc, char **argv) { if(test_dbengine()) return 1; #endif if(test_sqlite()) return 1; + if(string_unittest(10000)) return 1; if (dictionary_unittest(10000)) return 1; + if(aral_unittest(10000)) + return 1; if (rrdlabels_unittest()) return 1; if (ctx_unittest()) @@ -1013,6 +1024,9 @@ int main(int argc, char **argv) { fprintf(stderr, "\n\nALL TESTS PASSED\n\n"); return 0; } + else if(strcmp(optarg, "escapetest") == 0) { + return command_argument_sanitization_tests(); + } #ifdef ENABLE_ML_TESTS else if(strcmp(optarg, "mltest") == 0) { return test_ml(argc, argv); @@ -1020,17 +1034,33 @@ int main(int argc, char **argv) { #endif #ifdef ENABLE_DBENGINE else if(strcmp(optarg, "mctest") == 0) { + unittest_running = true; return mc_unittest(); } else if(strcmp(optarg, "ctxtest") == 0) { + unittest_running = true; return ctx_unittest(); } else if(strcmp(optarg, "dicttest") == 0) { + unittest_running = true; return dictionary_unittest(10000); } + else if(strcmp(optarg, "araltest") == 0) { + unittest_running = true; + return aral_unittest(10000); + } + else if(strcmp(optarg, "stringtest") == 0) { + unittest_running = true; + return string_unittest(10000); + } else if(strcmp(optarg, "rrdlabelstest") == 0) { + unittest_running = true; return rrdlabels_unittest(); } + else if(strcmp(optarg, "metatest") == 0) { + unittest_running = true; + return metadata_unittest(); + } else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { optarg += strlen(createdataset_string); unsigned history_seconds = strtoul(optarg, NULL, 0); @@ -1284,6 +1314,7 @@ int main(int argc, char **argv) { } #endif + if(!config_loaded) { load_netdata_conf(NULL, 0); @@ -1291,6 +1322,11 @@ int main(int argc, char **argv) { load_cloud_conf(0); } + char *nd_disable_cloud = getenv("NETDATA_DISABLE_CLOUD"); + if (nd_disable_cloud && !strncmp(nd_disable_cloud, "1", 1)) { + appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", "false"); + } + // ------------------------------------------------------------------------ // initialize netdata @@ -1303,6 +1339,12 @@ int main(int argc, char **argv) { i = (int)config_get_number(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for netdata", 1); if(i > 0) mallopt(M_ARENA_MAX, 1); + + +#ifdef NETDATA_INTERNAL_CHECKS + mallopt(M_PERTURB, 0x5A); + // mallopt(M_MXFAST, 0); +#endif #endif // initialize the system clocks @@ -1401,8 +1443,13 @@ int main(int argc, char **argv) { if(st->enabled && st->init_routine) st->init_routine(); - } + if(st->env_name) + setenv(st->env_name, st->enabled?"YES":"NO", 1); + + if(st->global_variable) + *st->global_variable = (st->enabled) ? true : false; + } // -------------------------------------------------------------------- // create the listening sockets diff --git a/daemon/main.h b/daemon/main.h index 63d6c5a09..a4e2b3aa7 100644 --- a/daemon/main.h +++ b/daemon/main.h @@ -22,9 +22,9 @@ struct option_def { const char *default_value; }; -extern void cancel_main_threads(void); -extern int killpid(pid_t pid); -extern void netdata_cleanup_and_exit(int ret) NORETURN; -extern void send_statistics(const char *action, const char *action_result, const char *action_data); +void cancel_main_threads(void); +int killpid(pid_t pid); +void netdata_cleanup_and_exit(int ret) NORETURN; +void send_statistics(const char *action, const char *action_result, const char *action_data); #endif /* NETDATA_MAIN_H */ diff --git a/daemon/service.c b/daemon/service.c index 61cc1281a..a7db7ceb7 100644 --- a/daemon/service.c +++ b/daemon/service.c @@ -5,12 +5,253 @@ /* Run service jobs every X seconds */ #define SERVICE_HEARTBEAT 10 +#define TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT (3600 / 2) +#define ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT 60 + +#define WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK 1 +#define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS 2 +#define WORKER_JOB_ARCHIVE_CHART 3 +#define WORKER_JOB_ARCHIVE_CHART_DIMENSIONS 4 +#define WORKER_JOB_ARCHIVE_DIMENSION 5 +#define WORKER_JOB_CLEANUP_ORPHAN_HOSTS 6 +#define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS 7 +#define WORKER_JOB_FREE_HOST 9 +#define WORKER_JOB_SAVE_HOST_CHARTS 10 +#define WORKER_JOB_DELETE_HOST_CHARTS 11 +#define WORKER_JOB_FREE_CHART 12 +#define WORKER_JOB_SAVE_CHART 13 +#define WORKER_JOB_DELETE_CHART 14 +#define WORKER_JOB_FREE_DIMENSION 15 + +static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { + RRDSET *st = rd->rrdset; + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED) || !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) + return; + + worker_is_busy(WORKER_JOB_ARCHIVE_DIMENSION); + + rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED); + rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); + + const char *cache_filename = rrddim_cache_filename(rd); + if(cache_filename) { + info("Deleting dimension file '%s'.", cache_filename); + if (unlikely(unlink(cache_filename) == -1)) + error("Cannot delete dimension file '%s'", cache_filename); + } + + if (rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + rrddimvar_delete_all(rd); + + /* only a collector can mark a chart as obsolete, so we must remove the reference */ + + size_t tiers_available = 0, tiers_said_yes = 0; + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(rd->tiers[tier]) { + tiers_available++; + + if(rd->tiers[tier]->collect_ops->finalize(rd->tiers[tier]->db_collection_handle)) + tiers_said_yes++; + + rd->tiers[tier]->db_collection_handle = NULL; + } + } + + if (tiers_available == tiers_said_yes && tiers_said_yes) { + /* This metric has no data and no references */ + metaqueue_delete_dimension_uuid(&rd->metric_uuid); + } + else { + /* Do not delete this dimension */ + return; + } + } + + worker_is_busy(WORKER_JOB_FREE_DIMENSION); + rrddim_free(st, rd); +} + +static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) { + worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS); + + RRDDIM *rd; + time_t now = now_realtime_sec(); + + bool done_all_dimensions = true; + + dfe_start_write(st->rrddim_root_index, rd) { + if(unlikely( + all_dimensions || + (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->last_collected_time.tv_sec + rrdset_free_obsolete_time < now)) + )) { + + if(dictionary_acquired_item_references(rd_dfe.item) == 1) { + info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st)); + svc_rrddim_obsolete_to_archive(rd); + } + else + done_all_dimensions = false; + } + else + done_all_dimensions = false; + } + dfe_done(rd); + + return done_all_dimensions; +} + +static void svc_rrdset_obsolete_to_archive(RRDSET *st) { + worker_is_busy(WORKER_JOB_ARCHIVE_CHART); + + if(!svc_rrdset_archive_obsolete_dimensions(st, true)) + return; + + rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED); + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE); + + rrdcalc_unlink_all_rrdset_alerts(st); + + rrdsetvar_release_and_delete_all(st); + + // has to be run after all dimensions are archived - or use-after-free will occur + rrdvar_delete_all(st->rrdvars); + + if(st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) { + if(rrdhost_option_check(st->rrdhost, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS)) { + worker_is_busy(WORKER_JOB_DELETE_CHART); + rrdset_delete_files(st); + } + else { + worker_is_busy(WORKER_JOB_SAVE_CHART); + rrdset_save(st); + } + + worker_is_busy(WORKER_JOB_FREE_CHART); + rrdset_free(st); + } +} + +static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) { + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS); + + time_t now = now_realtime_sec(); + RRDSET *st; + rrdset_foreach_reentrant(st, host) { + if(rrdset_is_replicating(st)) + continue; + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) + && st->last_accessed_time + rrdset_free_obsolete_time < now + && st->last_updated.tv_sec + rrdset_free_obsolete_time < now + && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now + )) { + svc_rrdset_obsolete_to_archive(st); + } + else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) { + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + svc_rrdset_archive_obsolete_dimensions(st, false); + } + } + rrdset_foreach_done(st); +} + +static void svc_rrdset_check_obsoletion(RRDHOST *host) { + worker_is_busy(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK); + + time_t now = now_realtime_sec(); + time_t last_entry_t; + RRDSET *st; + rrdset_foreach_read(st, host) { + if(rrdset_is_replicating(st)) + continue; + + last_entry_t = rrdset_last_entry_t(st); + + if(last_entry_t && last_entry_t < host->senders_connect_time && + host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every + < now) + + rrdset_is_obsolete(st); + } + rrdset_foreach_done(st); +} + +static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS); + + rrd_rdlock(); + + RRDHOST *host; + rrdhost_foreach_read(host) { + if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host)) + continue; + + if(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) { + rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + svc_rrdhost_cleanup_obsolete_charts(host); + } + + if(host != localhost + && host->trigger_chart_obsoletion_check + && ( + ( + host->senders_last_chart_command + && host->senders_last_chart_command + host->health_delay_up_to < now_realtime_sec() + ) + || (host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec()) + ) + ) { + svc_rrdset_check_obsoletion(host); + host->trigger_chart_obsoletion_check = 0; + } + } + + rrd_unlock(); +} + +static void svc_rrdhost_cleanup_orphan_hosts(RRDHOST *protected_host) { + worker_is_busy(WORKER_JOB_CLEANUP_ORPHAN_HOSTS); + rrd_wrlock(); + + time_t now = now_realtime_sec(); + + RRDHOST *host; + +restart_after_removal: + rrdhost_foreach_write(host) { + if(!rrdhost_should_be_removed(host, protected_host, now)) + continue; + + info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); + + if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) + /* don't delete multi-host DB host files */ + && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) + ) { + worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); + rrdhost_delete_charts(host); + } + else { + worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS); + rrdhost_save_charts(host); + } + + worker_is_busy(WORKER_JOB_FREE_HOST); + rrdhost_free(host, 0); + goto restart_after_removal; + } + + rrd_unlock(); +} + static void service_main_cleanup(void *ptr) { struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; debug(D_SYSTEM, "Cleaning up..."); + worker_unregister(); static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } @@ -20,6 +261,22 @@ static void service_main_cleanup(void *ptr) */ void *service_main(void *ptr) { + worker_register("SERVICE"); + worker_register_job_name(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK, "child chart obsoletion check"); + worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS, "cleanup obsolete charts"); + worker_register_job_name(WORKER_JOB_ARCHIVE_CHART, "archive chart"); + worker_register_job_name(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS, "archive chart dimensions"); + worker_register_job_name(WORKER_JOB_ARCHIVE_DIMENSION, "archive dimension"); + worker_register_job_name(WORKER_JOB_CLEANUP_ORPHAN_HOSTS, "cleanup orphan hosts"); + worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS, "cleanup obsolete charts on all hosts"); + worker_register_job_name(WORKER_JOB_FREE_HOST, "free host"); + worker_register_job_name(WORKER_JOB_SAVE_HOST_CHARTS, "save host charts"); + worker_register_job_name(WORKER_JOB_DELETE_HOST_CHARTS, "delete host charts"); + worker_register_job_name(WORKER_JOB_FREE_CHART, "free chart"); + worker_register_job_name(WORKER_JOB_SAVE_CHART, "save chart"); + worker_register_job_name(WORKER_JOB_DELETE_CHART, "delete chart"); + worker_register_job_name(WORKER_JOB_FREE_DIMENSION, "free dimension"); + netdata_thread_cleanup_push(service_main_cleanup, ptr); heartbeat_t hb; heartbeat_init(&hb); @@ -28,14 +285,11 @@ void *service_main(void *ptr) debug(D_SYSTEM, "Service thread starts"); while (!netdata_exit) { + worker_is_idle(); heartbeat_next(&hb, step); - rrd_cleanup_obsolete_charts(); - - rrd_wrlock(); - rrdhost_cleanup_orphan_hosts_nolock(localhost); - rrd_unlock(); - + svc_rrd_cleanup_obsolete_charts_from_all_hosts(); + svc_rrdhost_cleanup_orphan_hosts(localhost); } netdata_thread_cleanup_pop(1); diff --git a/daemon/signals.c b/daemon/signals.c index b991d46bf..c857a9b57 100644 --- a/daemon/signals.c +++ b/daemon/signals.c @@ -82,7 +82,7 @@ void signals_init(void) { // This prevents zombie processes when running in a container. if (getpid() == 1) { info("SIGNAL: Enabling reaper"); - myp_init(); + netdata_popen_tracking_init(); reaper_enabled = 1; } else { info("SIGNAL: Not enabling reaper"); @@ -139,7 +139,7 @@ void signals_reset(void) { } if (reaper_enabled == 1) - myp_free(); + netdata_popen_tracking_cleanup(); } // reap_child reaps the child identified by pid. @@ -198,7 +198,7 @@ static void reap_children() { } else if (i.si_pid == 0) { // No child exited. return; - } else if (myp_reap(i.si_pid) == 0) { + } else if (netdata_popen_tracking_pid_shoud_be_reaped(i.si_pid) == 0) { // myp managed, sleep for a short time to avoid busy wait while // this is handled by myp. usleep(10000); diff --git a/daemon/signals.h b/daemon/signals.h index 3fa2b0f43..12b1ed198 100644 --- a/daemon/signals.h +++ b/daemon/signals.h @@ -3,11 +3,11 @@ #ifndef NETDATA_SIGNALS_H #define NETDATA_SIGNALS_H 1 -extern void signals_init(void); -extern void signals_block(void); -extern void signals_unblock(void); -extern void signals_restore_SIGCHLD(void); -extern void signals_reset(void); -extern void signals_handle(void) NORETURN; +void signals_init(void); +void signals_block(void); +void signals_unblock(void); +void signals_restore_SIGCHLD(void); +void signals_reset(void); +void signals_handle(void) NORETURN; #endif //NETDATA_SIGNALS_H diff --git a/daemon/static_threads.c b/daemon/static_threads.c index 96e279906..b7730bc31 100644 --- a/daemon/static_threads.c +++ b/daemon/static_threads.c @@ -2,16 +2,19 @@ #include "common.h" -extern void *aclk_starter(void *ptr); -extern void *analytics_main(void *ptr); -extern void *checks_main(void *ptr); -extern void *cpuidlejitter_main(void *ptr); -extern void *global_statistics_main(void *ptr); -extern void *health_main(void *ptr); -extern void *pluginsd_main(void *ptr); -extern void *service_main(void *ptr); -extern void *statsd_main(void *ptr); -extern void *timex_main(void *ptr); +void *aclk_main(void *ptr); +void *analytics_main(void *ptr); +void *cpuidlejitter_main(void *ptr); +void *global_statistics_main(void *ptr); +void *global_statistics_workers_main(void *ptr); +void *health_main(void *ptr); +void *pluginsd_main(void *ptr); +void *service_main(void *ptr); +void *statsd_main(void *ptr); +void *timex_main(void *ptr); +void *replication_thread_main(void *ptr __maybe_unused); + +extern bool global_statistics_enabled; const struct netdata_static_thread static_threads_common[] = { { @@ -24,15 +27,6 @@ const struct netdata_static_thread static_threads_common[] = { .start_routine = timex_main }, { - .name = "PLUGIN[check]", - .config_section = CONFIG_SECTION_PLUGINS, - .config_name = "checks", - .enabled = 0, - .thread = NULL, - .init_routine = NULL, - .start_routine = checks_main - }, - { .name = "PLUGIN[idlejitter]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "idlejitter", @@ -52,21 +46,25 @@ const struct netdata_static_thread static_threads_common[] = { }, { .name = "GLOBAL_STATS", - .config_section = NULL, - .config_name = NULL, + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring", + .env_name = "NETDATA_INTERNALS_MONITORING", + .global_variable = &global_statistics_enabled, .enabled = 1, .thread = NULL, .init_routine = NULL, .start_routine = global_statistics_main }, { - .name = "HEALTH", - .config_section = NULL, - .config_name = NULL, + .name = "WORKERS_STATS", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring", + .env_name = "NETDATA_INTERNALS_MONITORING", + .global_variable = &global_statistics_enabled, .enabled = 1, .thread = NULL, .init_routine = NULL, - .start_routine = health_main + .start_routine = global_statistics_workers_main }, { .name = "PLUGINSD", @@ -131,12 +129,12 @@ const struct netdata_static_thread static_threads_common[] = { .enabled = 1, .thread = NULL, .init_routine = NULL, - .start_routine = aclk_starter + .start_routine = aclk_main }, #endif { - .name = "rrdcontext", + .name = "RRDCONTEXT", .config_section = NULL, .config_name = NULL, .enabled = 1, @@ -145,7 +143,27 @@ const struct netdata_static_thread static_threads_common[] = { .start_routine = rrdcontext_main }, - {NULL, NULL, NULL, 0, NULL, NULL, NULL} + { + .name = "REPLICATION", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = replication_thread_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } }; struct netdata_static_thread * diff --git a/daemon/static_threads.h b/daemon/static_threads.h index dac615e76..9597da704 100644 --- a/daemon/static_threads.h +++ b/daemon/static_threads.h @@ -26,6 +26,12 @@ struct netdata_static_thread { // the threaded worker void *(*start_routine) (void *); + + // the environment variable to create + char *env_name; + + // global variable + bool *global_variable; }; #define NETDATA_MAIN_THREAD_RUNNING CONFIG_BOOLEAN_YES diff --git a/daemon/static_threads_linux.c b/daemon/static_threads_linux.c index 5f7a67768..260b2c176 100644 --- a/daemon/static_threads_linux.c +++ b/daemon/static_threads_linux.c @@ -46,15 +46,45 @@ const struct netdata_static_thread static_threads_linux[] = { .start_routine = cgroups_main }, - {NULL, NULL, NULL, 0, NULL, NULL, NULL} + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } }; const struct netdata_static_thread static_threads_freebsd[] = { - {NULL, NULL, NULL, 0, NULL, NULL, NULL} + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } }; const struct netdata_static_thread static_threads_macos[] = { - {NULL, NULL, NULL, 0, NULL, NULL, NULL} + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } }; struct netdata_static_thread *static_threads_get() { diff --git a/daemon/static_threads_macos.c b/daemon/static_threads_macos.c index ae34a1363..72c032454 100644 --- a/daemon/static_threads_macos.c +++ b/daemon/static_threads_macos.c @@ -12,18 +12,20 @@ const struct netdata_static_thread static_threads_macos[] = { .enabled = 1, .thread = NULL, .init_routine = NULL, - .start_routine = macos_main + .start_routine = macos_main, + .env_name = NULL, + .global_variable = NULL, }, - {NULL, NULL, NULL, 0, NULL, NULL, NULL} + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} }; const struct netdata_static_thread static_threads_freebsd[] = { - {NULL, NULL, NULL, 0, NULL, NULL, NULL} + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} }; const struct netdata_static_thread static_threads_linux[] = { - {NULL, NULL, NULL, 0, NULL, NULL, NULL} + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} }; struct netdata_static_thread *static_threads_get() { diff --git a/daemon/system-info.sh b/daemon/system-info.sh index 101ccb0bf..68cdc4812 100755 --- a/daemon/system-info.sh +++ b/daemon/system-info.sh @@ -33,7 +33,8 @@ if [ -z "${VIRTUALIZATION}" ]; then if command -v systemd-detect-virt >/dev/null 2>&1; then VIRTUALIZATION="$(systemd-detect-virt -v)" VIRT_DETECTION="systemd-detect-virt" - CONTAINER=${CONTAINER:-$(systemd-detect-virt -c)} + CONTAINER_DETECT_TMP="$(systemd-detect-virt -c)" + [ -n "$CONTAINER_DETECT_TMP" ] && CONTAINER="$CONTAINER_DETECT_TMP" CONT_DETECTION="systemd-detect-virt" elif command -v lscpu >/dev/null 2>&1; then VIRTUALIZATION=$(lscpu | grep "Hypervisor vendor:" | cut -d: -f 2 | awk '{$1=$1};1') diff --git a/daemon/unit_test.c b/daemon/unit_test.c index 8ba251b9a..f69861869 100644 --- a/daemon/unit_test.c +++ b/daemon/unit_test.c @@ -2,6 +2,61 @@ #include "common.h" +static bool cmd_arg_sanitization_test(const char *expected, const char *src, char *dst, size_t dst_size) { + bool ok = sanitize_command_argument_string(dst, src, dst_size); + + if (!expected) + return ok == false; + + return strcmp(expected, dst) == 0; +} + +bool command_argument_sanitization_tests() { + char dst[1024]; + + for (size_t i = 0; i != 5; i++) { + const char *expected = i == 4 ? "'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 9; i++) { + const char *expected = i == 8 ? "'\\'''\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "''", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "'\\''a" : NULL; + if (cmd_arg_sanitization_test(expected, "'a", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "a'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "a'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 22; i++) { + const char *expected = i == 21 ? "foo'\\''a'\\'''\\'''\\''b" : NULL; + if (cmd_arg_sanitization_test(expected, "--foo'a'''b", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n length: %zu\n", expected, dst, strlen(dst)); + return 1; + } + } + + return 0; +} + static int check_number_printing(void) { struct { NETDATA_DOUBLE n; @@ -1197,16 +1252,18 @@ int run_test(struct test *test) fprintf(stderr, " > %s: feeding position %lu\n", test->name, c+1); } - fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rd->name, test->feed[c].value); + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd), test->feed[c].value); rrddim_set(st, "dim1", test->feed[c].value); last = test->feed[c].value; if(rd2) { - fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rd2->name, test->feed2[c]); + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd2), test->feed2[c]); rrddim_set(st, "dim2", test->feed2[c]); } - rrdset_done(st); + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st, now, false); // align the first entry to second boundary if(!c) { @@ -1231,7 +1288,7 @@ int run_test(struct test *test) int same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT ", found " NETDATA_DOUBLE_FORMAT ", %s\n", - test->name, rd->name, c+1, + test->name, rrddim_name(rd), c+1, (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start), n, v, (same)?"OK":"### E R R O R ###"); @@ -1243,7 +1300,7 @@ int run_test(struct test *test) same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT ", found " NETDATA_DOUBLE_FORMAT ", %s\n", - test->name, rd2->name, c+1, + test->name, rrddim_name(rd2), c+1, (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start), n, v, (same)?"OK":"### E R R O R ###"); if(!same) errors++; @@ -1258,39 +1315,39 @@ static int test_variable_renames(void) { fprintf(stderr, "Creating chart\n"); RRDSET *st = rrdset_create_localhost("chart", "ID", NULL, "family", "context", "Unit Testing", "a value", "unittest", NULL, 1, 1, RRDSET_TYPE_LINE); - fprintf(stderr, "Created chart with id '%s', name '%s'\n", st->id, st->name); + fprintf(stderr, "Created chart with id '%s', name '%s'\n", rrdset_id(st), rrdset_name(st)); fprintf(stderr, "Creating dimension DIM1\n"); RRDDIM *rd1 = rrddim_add(st, "DIM1", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rd1->id, rd1->name); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); fprintf(stderr, "Creating dimension DIM2\n"); RRDDIM *rd2 = rrddim_add(st, "DIM2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rd2->id, rd2->name); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); fprintf(stderr, "Renaming chart to CHARTNAME1\n"); - rrdset_set_name(st, "CHARTNAME1"); - fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", st->id, st->name); + rrdset_reset_name(st, "CHARTNAME1"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); fprintf(stderr, "Renaming chart to CHARTNAME2\n"); - rrdset_set_name(st, "CHARTNAME2"); - fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", st->id, st->name); + rrdset_reset_name(st, "CHARTNAME2"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME1\n"); - rrddim_set_name(st, rd1, "DIM1NAME1"); - fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rd1->id, rd1->name); + rrddim_reset_name(st, rd1, "DIM1NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME2\n"); - rrddim_set_name(st, rd1, "DIM1NAME2"); - fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rd1->id, rd1->name); + rrddim_reset_name(st, rd1, "DIM1NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME1\n"); - rrddim_set_name(st, rd2, "DIM2NAME1"); - fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rd2->id, rd2->name); + rrddim_reset_name(st, rd2, "DIM2NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME2\n"); - rrddim_set_name(st, rd2, "DIM2NAME2"); - fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rd2->id, rd2->name); + rrddim_reset_name(st, rd2, "DIM2NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); BUFFER *buf = buffer_create(1); health_api_v1_chart_variables2json(st, buf); @@ -1447,9 +1504,8 @@ int unit_test(long delay, long shift) long increment = 1000; collected_number i = 0; - unsigned long c, dimensions = 0; + unsigned long c, dimensions = rrdset_number_of_dimensions(st); RRDDIM *rd; - for(rd = st->dimensions ; rd ; rd = rd->next) dimensions++; for(c = 0; c < 20 ;c++) { i += increment; @@ -1470,8 +1526,10 @@ int unit_test(long delay, long shift) } // prevent it from deleting the dimensions - for(rd = st->dimensions ; rd ; rd = rd->next) + rrddim_foreach_read(rd, st) { rd->last_collected_time.tv_sec = st->last_collected_time.tv_sec; + } + rrddim_foreach_done(rd); rrdset_done(st); } @@ -1486,10 +1544,10 @@ int unit_test(long delay, long shift) for(c = 0 ; c < st->counter ; c++) { fprintf(stderr, "\nPOSITION: c = %lu, EXPECTED VALUE %lu\n", c, (oincrement + c * increment + increment * (1000000 - shift) / 1000000 )* 10); - for(rd = st->dimensions ; rd ; rd = rd->next) { + rrddim_foreach_read(rd, st) { sn = rd->db[c]; cn = unpack_storage_number(sn); - fprintf(stderr, "\t %s " NETDATA_DOUBLE_FORMAT " (PACKED AS " STORAGE_NUMBER_FORMAT ") -> ", rd->id, cn, sn); + fprintf(stderr, "\t %s " NETDATA_DOUBLE_FORMAT " (PACKED AS " STORAGE_NUMBER_FORMAT ") -> ", rrddim_id(rd), cn, sn); if(rd == rdabs) v = ( oincrement @@ -1508,6 +1566,7 @@ int unit_test(long delay, long shift) ret = 1; } } + rrddim_foreach_done(rd); } if(ret) @@ -1527,19 +1586,19 @@ int test_sqlite(void) { return 1; } - rc = sqlite3_exec(db_meta, "CREATE TABLE IF NOT EXISTS mine (id1, id2);", 0, 0, NULL); + rc = sqlite3_exec_monitored(db_meta, "CREATE TABLE IF NOT EXISTS mine (id1, id2);", 0, 0, NULL); if (rc != SQLITE_OK) { fprintf(stderr,"Failed to test SQLite: Create table failed\n"); return 1; } - rc = sqlite3_exec(db_meta, "DELETE FROM MINE LIMIT 1;", 0, 0, NULL); + rc = sqlite3_exec_monitored(db_meta, "DELETE FROM MINE LIMIT 1;", 0, 0, NULL); if (rc != SQLITE_OK) { fprintf(stderr,"Failed to test SQLite: Delete with LIMIT failed\n"); return 1; } - rc = sqlite3_exec(db_meta, "UPDATE MINE SET id1=1 LIMIT 1;", 0, 0, NULL); + rc = sqlite3_exec_monitored(db_meta, "UPDATE MINE SET id1=1 LIMIT 1;", 0, 0, NULL); if (rc != SQLITE_OK) { fprintf(stderr,"Failed to test SQLite: Update with LIMIT failed\n"); return 1; @@ -1548,58 +1607,24 @@ int test_sqlite(void) { BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE); char *uuid_str = "0000_000"; - buffer_sprintf(sql, TABLE_ACLK_CHART, uuid_str); - rc = sqlite3_exec(db_meta, buffer_tostring(sql), 0, 0, NULL); - buffer_flush(sql); - if (rc != SQLITE_OK) - goto error; - - buffer_sprintf(sql, TABLE_ACLK_CHART_PAYLOAD, uuid_str); - rc = sqlite3_exec(db_meta, buffer_tostring(sql), 0, 0, NULL); - buffer_flush(sql); - if (rc != SQLITE_OK) - goto error; - - buffer_sprintf(sql, TABLE_ACLK_CHART_LATEST, uuid_str); - rc = sqlite3_exec(db_meta, buffer_tostring(sql), 0, 0, NULL); - if (rc != SQLITE_OK) - goto error; - buffer_flush(sql); - - buffer_sprintf(sql, INDEX_ACLK_CHART, uuid_str, uuid_str); - rc = sqlite3_exec(db_meta, buffer_tostring(sql), 0, 0, NULL); - if (rc != SQLITE_OK) - goto error; - buffer_flush(sql); - - buffer_sprintf(sql, INDEX_ACLK_CHART_LATEST, uuid_str, uuid_str); - rc = sqlite3_exec(db_meta, buffer_tostring(sql), 0, 0, NULL); - if (rc != SQLITE_OK) - goto error; - buffer_flush(sql); - - buffer_sprintf(sql, TRIGGER_ACLK_CHART_PAYLOAD, uuid_str, uuid_str, uuid_str); - rc = sqlite3_exec(db_meta, buffer_tostring(sql), 0, 0, NULL); - if (rc != SQLITE_OK) - goto error; - buffer_flush(sql); - buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str); - rc = sqlite3_exec(db_meta, buffer_tostring(sql), 0, 0, NULL); + rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL); if (rc != SQLITE_OK) goto error; buffer_flush(sql); buffer_sprintf(sql, INDEX_ACLK_ALERT, uuid_str, uuid_str); - rc = sqlite3_exec(db_meta, buffer_tostring(sql), 0, 0, NULL); + rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL); if (rc != SQLITE_OK) goto error; buffer_flush(sql); buffer_free(sql); fprintf(stderr,"SQLite is OK\n"); + rc = sqlite3_close_v2(db_meta); return 0; error: + rc = sqlite3_close_v2(db_meta); fprintf(stderr,"SQLite statement failed: %s\n", buffer_tostring(sql)); buffer_free(sql); fprintf(stderr,"SQLite tests failed\n"); @@ -1634,28 +1659,28 @@ int unit_test_bitmap256(void) { if (test_bitmap.data[0] == 0xffffffffffffffff) fprintf(stderr, "%s() INDEX 0 is fully set OK\n", __FUNCTION__); else { - fprintf(stderr, "%s() INDEX 0 is %lx expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + fprintf(stderr, "%s() INDEX 0 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); return 1; } if (test_bitmap.data[1] == 0xffffffffffffffff) fprintf(stderr, "%s() INDEX 1 is fully set OK\n", __FUNCTION__); else { - fprintf(stderr, "%s() INDEX 1 is %lx expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + fprintf(stderr, "%s() INDEX 1 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); return 1; } if (test_bitmap.data[2] == 0xffffffffffffffff) fprintf(stderr, "%s() INDEX 2 is fully set OK\n", __FUNCTION__); else { - fprintf(stderr, "%s() INDEX 2 is %lx expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + fprintf(stderr, "%s() INDEX 2 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); return 1; } if (test_bitmap.data[3] == 0xffffffffffffffff) fprintf(stderr, "%s() INDEX 3 is fully set OK\n", __FUNCTION__); else { - fprintf(stderr, "%s() INDEX 3 is %lx expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + fprintf(stderr, "%s() INDEX 3 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); return 1; } @@ -1704,28 +1729,28 @@ int unit_test_bitmap256(void) { if (test_bitmap.data[0] == 0x1111111111111111) fprintf(stderr, "%s() INDEX 0 is 0x1111111111111111 set OK\n", __FUNCTION__); else { - fprintf(stderr, "%s() INDEX 0 is %lx expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[0]); + fprintf(stderr, "%s() INDEX 0 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[0]); return 1; } if (test_bitmap.data[1] == 0x1111111111111111) fprintf(stderr, "%s() INDEX 1 is 0x1111111111111111 set OK\n", __FUNCTION__); else { - fprintf(stderr, "%s() INDEX 1 is %lx expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[1]); + fprintf(stderr, "%s() INDEX 1 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[1]); return 1; } if (test_bitmap.data[2] == 0x1111111111111111) fprintf(stderr, "%s() INDEX 2 is 0x1111111111111111 set OK\n", __FUNCTION__); else { - fprintf(stderr, "%s() INDEX 2 is %lx expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[2]); + fprintf(stderr, "%s() INDEX 2 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[2]); return 1; } if (test_bitmap.data[3] == 0x1111111111111111) fprintf(stderr, "%s() INDEX 3 is 0x1111111111111111 set OK\n", __FUNCTION__); else { - fprintf(stderr, "%s() INDEX 3 is %lx expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[3]); + fprintf(stderr, "%s() INDEX 3 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[3]); return 1; } @@ -1771,6 +1796,9 @@ static RRDHOST *dbengine_rrdhost_find_or_create(char *name) , default_rrdpush_destination , default_rrdpush_api_key , default_rrdpush_send_charts_matching + , default_rrdpush_enable_replication + , default_rrdpush_seconds_to_replicate + , default_rrdpush_replication_step , NULL , 0 ); @@ -1826,7 +1854,10 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI for (j = 0; j < DIMS; ++j) { rrddim_set_by_pointer_fake_time(rd[i][j], 69, 2 * API_RELATIVE_TIME_MAX); // set first value to 69 } - rrdset_done(st[i]); + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st[i], now, false); } // Fluh pages for subsequent real values for (i = 0 ; i < CHARTS ; ++i) { @@ -1850,6 +1881,8 @@ static time_t test_dbengine_create_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS // feed it with the test data for (i = 0 ; i < CHARTS ; ++i) { for (j = 0 ; j < DIMS ; ++j) { + rd[i][j]->tiers[0]->collect_ops->change_collection_frequency(rd[i][j]->tiers[0]->db_collection_handle, update_every); + rd[i][j]->last_collected_time.tv_sec = st[i]->last_collected_time.tv_sec = st[i]->last_updated.tv_sec = time_now; rd[i][j]->last_collected_time.tv_usec = @@ -1858,7 +1891,8 @@ static time_t test_dbengine_create_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS } for (c = 0; c < REGION_POINTS[current_region] ; ++c) { time_now += update_every; // time_now = start + (c + 1) * update_every - for (i = 0 ; i < CHARTS ; ++i) { + + for (i = 0 ; i < CHARTS ; ++i) { st[i]->usec_since_last_update = USEC_PER_SEC * update_every; for (j = 0; j < DIMS; ++j) { @@ -1866,7 +1900,12 @@ static time_t test_dbengine_create_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS j * REGION_POINTS[current_region] + c; rrddim_set_by_pointer_fake_time(rd[i][j], next, time_now); } - rrdset_done(st[i]); + + struct timeval now; + now.tv_sec = time_now; + now.tv_usec = 0; + + rrdset_timed_done(st[i], now, false); } } return time_now; //time_end @@ -1882,7 +1921,7 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI int i, j, k, c, errors, update_every; collected_number last; NETDATA_DOUBLE value, expected; - struct rrddim_query_handle handle; + struct storage_engine_query_handle handle; size_t value_errors = 0, time_errors = 0; update_every = REGION_UPDATE_EVERY[current_region]; @@ -1893,13 +1932,13 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI time_now = time_start + (c + 1) * update_every; for (i = 0 ; i < CHARTS ; ++i) { for (j = 0; j < DIMS; ++j) { - rd[i][j]->tiers[0]->query_ops.init(rd[i][j]->tiers[0]->db_metric_handle, &handle, time_now, time_now + QUERY_BATCH * update_every, TIER_QUERY_FETCH_SUM); + rd[i][j]->tiers[0]->query_ops->init(rd[i][j]->tiers[0]->db_metric_handle, &handle, time_now, time_now + QUERY_BATCH * update_every); for (k = 0; k < QUERY_BATCH; ++k) { last = ((collected_number)i * DIMS) * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c + k; expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS)); - STORAGE_POINT sp = rd[i][j]->tiers[0]->query_ops.next_metric(&handle); + STORAGE_POINT sp = rd[i][j]->tiers[0]->query_ops->next_metric(&handle); value = sp.sum; time_retrieved = sp.start_time; end_time = sp.end_time; @@ -1909,19 +1948,19 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI if(!value_errors) fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT ", found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", - st[i]->name, rd[i][j]->name, (unsigned long)time_now + k * update_every, expected, value); + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now + k * update_every, expected, value); value_errors++; errors++; } if(end_time != time_now + k * update_every) { if(!time_errors) fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n", - st[i]->name, rd[i][j]->name, (unsigned long)time_now + k * update_every, (unsigned long)time_retrieved); + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now + k * update_every, (unsigned long)time_retrieved); time_errors++; errors++; } } - rd[i][j]->tiers[0]->query_ops.finalize(&handle); + rd[i][j]->tiers[0]->query_ops->finalize(&handle); } } } @@ -1940,7 +1979,7 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] int current_region, time_t time_start, time_t time_end) { int update_every = REGION_UPDATE_EVERY[current_region]; - fprintf(stderr, "%s() running on region %d, start time %ld, end time %ld, update every %d...\n", __FUNCTION__, current_region, time_start, time_end, update_every); + fprintf(stderr, "%s() running on region %d, start time %lld, end time %lld, update every %d...\n", __FUNCTION__, current_region, (long long)time_start, (long long)time_end, update_every); uint8_t same; time_t time_now, time_retrieved; int i, j, errors, value_errors = 0, time_errors = 0; @@ -1952,22 +1991,25 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] long points = (time_end - time_start) / update_every; for (i = 0 ; i < CHARTS ; ++i) { ONEWAYALLOC *owa = onewayalloc_create(0); - RRDR *r = rrd2rrdr(owa, st[i], points, time_start, time_end, - RRDR_GROUPING_AVERAGE, 0, RRDR_OPTION_NATURAL_POINTS, - NULL, NULL, NULL, 0, 0); - + RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start, time_end, + RRDR_GROUPING_AVERAGE, 0, RRDR_OPTION_NATURAL_POINTS, + NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST); if (!r) { - fprintf(stderr, " DB-engine unittest %s: empty RRDR on region %d ### E R R O R ###\n", st[i]->name, current_region); + fprintf(stderr, " DB-engine unittest %s: empty RRDR on region %d ### E R R O R ###\n", rrdset_name(st[i]), current_region); return ++errors; } else { - assert(r->st == st[i]); - for (c = 0; c != rrdr_rows(r) ; ++c) { + assert(r->internal.qt->request.st == st[i]); + for (c = 0; c != (long)rrdr_rows(r) ; ++c) { RRDDIM *d; time_now = time_start + (c + 1) * update_every; time_retrieved = r->t[c]; // for each dimension - for (j = 0, d = r->st->dimensions ; d && j < r->d ; ++j, d = d->next) { + rrddim_foreach_read(d, r->internal.qt->request.st) { + if(unlikely(d_dfe.counter >= r->d)) break; // d_counter is provided by the dictionary dfe + + j = (int)d_dfe.counter; + NETDATA_DOUBLE *cn = &r->v[ c * r->d ]; value = cn[j]; assert(rd[i][j] == d); @@ -1980,16 +2022,17 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] if(value_errors < 20) fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT ", RRDR found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", - st[i]->name, rd[i][j]->name, (unsigned long)time_now, expected, value); + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, expected, value); value_errors++; } if(time_retrieved != time_now) { if(time_errors < 20) fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n", - st[i]->name, rd[i][j]->name, (unsigned long)time_now, (unsigned long)time_retrieved); + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, (unsigned long)time_retrieved); time_errors++; } } + rrddim_foreach_done(d); } rrdr_free(owa, r); } @@ -2086,24 +2129,29 @@ int test_dbengine(void) long point_offset = (time_start[current_region] - time_start[0]) / update_every; for (i = 0 ; i < CHARTS ; ++i) { ONEWAYALLOC *owa = onewayalloc_create(0); - RRDR *r = rrd2rrdr(owa, st[i], points, time_start[0] + update_every, - time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0, - RRDR_OPTION_NATURAL_POINTS, NULL, NULL, NULL, 0, 0); + RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start[0] + update_every, + time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0, + RRDR_OPTION_NATURAL_POINTS, NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST); + if (!r) { - fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", st[i]->name); + fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", rrdset_name(st[i])); ++errors; } else { long c; - assert(r->st == st[i]); + assert(r->internal.qt->request.st == st[i]); // test current region values only, since they must be left unchanged - for (c = point_offset ; c < point_offset + rrdr_rows(r) / REGIONS / 2 ; ++c) { + for (c = point_offset ; c < (long)(point_offset + rrdr_rows(r) / REGIONS / 2) ; ++c) { RRDDIM *d; time_t time_now = time_start[current_region] + (c - point_offset + 2) * update_every; time_t time_retrieved = r->t[c]; // for each dimension - for(j = 0, d = r->st->dimensions ; d && j < r->d ; ++j, d = d->next) { + rrddim_foreach_read(d, r->internal.qt->request.st) { + if(unlikely(d_dfe.counter >= r->d)) break; // d_counter is provided by the dictionary dfe + + j = (int)d_dfe.counter; + NETDATA_DOUBLE *cn = &r->v[ c * r->d ]; NETDATA_DOUBLE value = cn[j]; assert(rd[i][j] == d); @@ -2116,16 +2164,17 @@ int test_dbengine(void) if(!value_errors) fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT ", RRDR found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", - st[i]->name, rd[i][j]->name, (unsigned long)time_now, expected, value); + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, expected, value); value_errors++; } if(time_retrieved != time_now) { if(!time_errors) fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n", - st[i]->name, rd[i][j]->name, (unsigned long)time_now, (unsigned long)time_retrieved); + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, (unsigned long)time_retrieved); time_errors++; } } + rrddim_foreach_done(d); } rrdr_free(owa, r); } @@ -2133,9 +2182,9 @@ int test_dbengine(void) } error_out: rrd_wrlock(); - rrdeng_prepare_exit((struct rrdengine_instance *)host->storage_instance[0]); + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance); rrdhost_delete_charts(host); - rrdeng_exit((struct rrdengine_instance *)host->storage_instance[0]); + rrdeng_exit((struct rrdengine_instance *)host->db[0].instance); rrd_unlock(); return errors + value_errors + time_errors; @@ -2313,7 +2362,7 @@ static void query_dbengine_chart(void *arg) time_t time_now, time_retrieved, end_time; collected_number generatedv; NETDATA_DOUBLE value, expected; - struct rrddim_query_handle handle; + struct storage_engine_query_handle handle; size_t value_errors = 0, time_errors = 0; do { @@ -2340,23 +2389,23 @@ static void query_dbengine_chart(void *arg) time_before = MIN(time_after + duration, time_max); /* up to 1 hour queries */ } - rd->tiers[0]->query_ops.init(rd->tiers[0]->db_metric_handle, &handle, time_after, time_before, TIER_QUERY_FETCH_SUM); + rd->tiers[0]->query_ops->init(rd->tiers[0]->db_metric_handle, &handle, time_after, time_before); ++thread_info->queries_nr; for (time_now = time_after ; time_now <= time_before ; time_now += update_every) { generatedv = generate_dbengine_chart_value(i, j, time_now); expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE) generatedv, SN_DEFAULT_FLAGS)); - if (unlikely(rd->tiers[0]->query_ops.is_finished(&handle))) { + if (unlikely(rd->tiers[0]->query_ops->is_finished(&handle))) { if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT ", found data gap, ### E R R O R ###\n", - st->name, rd->name, (unsigned long) time_now, expected); + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected); ++thread_info->errors; } break; } - STORAGE_POINT sp = rd->tiers[0]->query_ops.next_metric(&handle); + STORAGE_POINT sp = rd->tiers[0]->query_ops->next_metric(&handle); value = sp.sum; time_retrieved = sp.start_time; end_time = sp.end_time; @@ -2365,7 +2414,7 @@ static void query_dbengine_chart(void *arg) if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT ", found data gap, ### E R R O R ###\n", - st->name, rd->name, (unsigned long) time_now, expected); + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected); ++thread_info->errors; } break; @@ -2378,7 +2427,7 @@ static void query_dbengine_chart(void *arg) if(!value_errors) fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT ", found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", - st->name, rd->name, (unsigned long) time_now, expected, value); + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected, value); value_errors++; thread_info->errors++; } @@ -2388,13 +2437,13 @@ static void query_dbengine_chart(void *arg) if(!time_errors) fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n", - st->name, rd->name, (unsigned long) time_now, (unsigned long) time_retrieved); + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, (unsigned long) time_retrieved); time_errors++; thread_info->errors++; } } } - rd->tiers[0]->query_ops.finalize(&handle); + rd->tiers[0]->query_ops->finalize(&handle); } while(!thread_info->done); if(value_errors) @@ -2515,7 +2564,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi test_duration = now_realtime_sec() - (time_start - HISTORY_SECONDS); if (!test_duration) test_duration = 1; - fprintf(stderr, "\nDB-engine stress test finished in %ld seconds.\n", test_duration); + fprintf(stderr, "\nDB-engine stress test finished in %lld seconds.\n", (long long)test_duration); unsigned long stored_metrics_nr = 0; for (i = 0 ; i < DSET_CHARTS ; ++i) { stored_metrics_nr += chart_threads[i]->stored_metrics_nr; @@ -2530,8 +2579,8 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi fprintf(stderr, "%lu metric data points were queried by %u reader threads.\n", queried_metrics_nr, QUERY_THREADS); fprintf(stderr, "Query starting time is randomly chosen from the beginning of the time-series up to the time of\n" "the latest data point, and ending time from 1 second up to 1 hour after the starting time.\n"); - fprintf(stderr, "Performance is %lu written data points/sec and %lu read data points/sec.\n", - stored_metrics_nr / test_duration, queried_metrics_nr / test_duration); + fprintf(stderr, "Performance is %lld written data points/sec and %lld read data points/sec.\n", + (long long)(stored_metrics_nr / test_duration), (long long)(queried_metrics_nr / test_duration)); for (i = 0 ; i < DSET_CHARTS ; ++i) { freez(chart_threads[i]); @@ -2542,9 +2591,9 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi } freez(query_threads); rrd_wrlock(); - rrdeng_prepare_exit((struct rrdengine_instance *)host->storage_instance[0]); + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance); rrdhost_delete_charts(host); - rrdeng_exit((struct rrdengine_instance *)host->storage_instance[0]); + rrdeng_exit((struct rrdengine_instance *)host->db[0].instance); rrd_unlock(); } diff --git a/daemon/unit_test.h b/daemon/unit_test.h index 2d2533afe..f79bd5c40 100644 --- a/daemon/unit_test.h +++ b/daemon/unit_test.h @@ -3,20 +3,24 @@ #ifndef NETDATA_UNIT_TEST_H #define NETDATA_UNIT_TEST_H 1 -extern int unit_test_storage(void); -extern int unit_test(long delay, long shift); -extern int run_all_mockup_tests(void); -extern int unit_test_str2ld(void); -extern int unit_test_buffer(void); -extern int unit_test_static_threads(void); -extern int test_sqlite(void); -extern int unit_test_bitmap256(void); +#include "stdbool.h" + +int unit_test_storage(void); +int unit_test(long delay, long shift); +int run_all_mockup_tests(void); +int unit_test_str2ld(void); +int unit_test_buffer(void); +int unit_test_static_threads(void); +int test_sqlite(void); +int unit_test_bitmap256(void); #ifdef ENABLE_DBENGINE -extern int test_dbengine(void); -extern void generate_dbengine_dataset(unsigned history_seconds); -extern void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, +int test_dbengine(void); +void generate_dbengine_dataset(unsigned history_seconds); +void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB); #endif +bool command_argument_sanitization_tests(); + #endif /* NETDATA_UNIT_TEST_H */ |