From d079b656b4719739b2247dcd9d46e9bec793095a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 6 Feb 2023 17:11:34 +0100 Subject: Merging upstream version 1.38.0. Signed-off-by: Daniel Baumann --- daemon/README.md | 43 +- daemon/analytics.c | 24 +- daemon/commands.c | 28 +- daemon/commands.h | 1 + daemon/common.c | 35 + daemon/common.h | 3 + daemon/config/README.md | 70 +- daemon/event_loop.c | 59 ++ daemon/event_loop.h | 53 ++ daemon/global_statistics.c | 1946 +++++++++++++++++++++++++++++++++------ daemon/global_statistics.h | 29 + daemon/main.c | 608 ++++++++++-- daemon/main.h | 31 + daemon/service.c | 38 +- daemon/static_threads.c | 51 +- daemon/static_threads_freebsd.c | 2 +- daemon/static_threads_linux.c | 8 +- daemon/static_threads_macos.c | 2 +- daemon/system-info.sh | 5 +- daemon/unit_test.c | 96 +- 20 files changed, 2654 insertions(+), 478 deletions(-) create mode 100644 daemon/event_loop.c create mode 100644 daemon/event_loop.h (limited to 'daemon') diff --git a/daemon/README.md b/daemon/README.md index c5951c694..7a17506bb 100644 --- a/daemon/README.md +++ b/daemon/README.md @@ -1,7 +1,11 @@ # Netdata daemon @@ -11,7 +15,7 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/daemon/README.md - You can start Netdata by executing it with `/usr/sbin/netdata` (the installer will also start it). - You can stop Netdata by killing it with `killall netdata`. You can stop and start Netdata at any point. When - exiting, the [database engine](/database/engine/README.md) saves metrics to `/var/cache/netdata/dbengine/` so that + exiting, the [database engine](https://github.com/netdata/netdata/blob/master/database/engine/README.md) saves metrics to `/var/cache/netdata/dbengine/` so that it can continue when started again. Access to the web site, for all graphs, is by default on port `19999`, so go to: @@ -202,29 +206,26 @@ The command line options of the Netdata 1.10.0 version are the following: - USR2 Reload health configuration. ``` -You can send commands during runtime via [netdatacli](/cli/README.md). +You can send commands during runtime via [netdatacli](https://github.com/netdata/netdata/blob/master/cli/README.md). ## Log files -Netdata uses 3 log files: +Netdata uses 4 log files: 1. `error.log` -2. `access.log` -3. `debug.log` +2. `collector.log` +3. `access.log` +4. `debug.log` -Any of them can be disabled by setting it to `/dev/null` or `none` in `netdata.conf`. By default `error.log` and -`access.log` are enabled. `debug.log` is only enabled if debugging/tracing is also enabled (Netdata needs to be compiled -with debugging enabled). +Any of them can be disabled by setting it to `/dev/null` or `none` in `netdata.conf`. By default `error.log`, +`collector.log`, and `access.log` are enabled. `debug.log` is only enabled if debugging/tracing is also enabled +(Netdata needs to be compiled with debugging enabled). Log files are stored in `/var/log/netdata/` by default. ### error.log -The `error.log` is the `stderr` of the `netdata` daemon and all external plugins -run by `netdata`. - -So if any process, in the Netdata process tree, writes anything to its standard error, -it will appear in `error.log`. +The `error.log` is the `stderr` of the `netdata` daemon . For most Netdata programs (including standard external plugins shipped by netdata), the following lines may appear: @@ -239,6 +240,16 @@ program continues to run. When a Netdata program cannot run at all, a `FATAL` line is logged. +### collector.log + +The `collector.log` is the `stderr` of all [collectors](https://github.com/netdata/netdata/blob/master/collectors/COLLECTORS.md) + run by `netdata`. + +So if any process, in the Netdata process tree, writes anything to its standard error, +it will appear in `collector.log`. + +Data stored inside this file follows pattern already described for `error.log`. + ### access.log The `access.log` logs web requests. The format is: @@ -361,7 +372,7 @@ all programs), edit `netdata.conf` and set: process nice level = -1 ``` -then execute this to [restart Netdata](/docs/configure/start-stop-restart.md): +then execute this to [restart Netdata](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md): ```sh sudo systemctl restart netdata diff --git a/daemon/analytics.c b/daemon/analytics.c index 3d0e514d6..a2f52bc8f 100644 --- a/daemon/analytics.c +++ b/daemon/analytics.c @@ -223,9 +223,7 @@ void analytics_mirrored_hosts(void) if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) continue; - netdata_mutex_lock(&host->receiver_lock); - ((host->receiver || host == localhost) ? reachable++ : unreachable++); - netdata_mutex_unlock(&host->receiver_lock); + ((host == localhost || !rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)) ? reachable++ : unreachable++); count++; } @@ -243,7 +241,7 @@ void analytics_exporters(void) { //when no exporters are available, an empty string will be sent //decide if something else is more suitable (but probably not null) - BUFFER *bi = buffer_create(1000); + BUFFER *bi = buffer_create(1000, NULL); analytics_exporting_connectors(bi); analytics_set_data_str(&analytics_data.netdata_exporting_connectors, (char *)buffer_tostring(bi)); buffer_free(bi); @@ -280,7 +278,7 @@ void analytics_collectors(void) RRDSET *st; DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); char name[500]; - BUFFER *bt = buffer_create(1000); + BUFFER *bt = buffer_create(1000, NULL); rrdset_foreach_read(st, localhost) { if(!rrdset_is_available_for_viewers(st)) @@ -335,7 +333,7 @@ void analytics_alarms_notifications(void) debug(D_ANALYTICS, "Executing %s", script); - BUFFER *b = buffer_create(1000); + BUFFER *b = buffer_create(1000, NULL); int cnt = 0; FILE *fp_child_input; FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); @@ -382,7 +380,7 @@ void analytics_get_install_type(void) */ void analytics_https(void) { - BUFFER *b = buffer_create(30); + BUFFER *b = buffer_create(30, NULL); #ifdef ENABLE_HTTPS analytics_exporting_connectors_ssl(b); buffer_strcat(b, netdata_ssl_client_ctx && rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED) && localhost->sender->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE ? "streaming|" : "|"); @@ -554,7 +552,7 @@ void analytics_gather_mutable_meta_data(void) snprintfz(b, 6, "%d", analytics_data.dashboard_hits); analytics_set_data(&analytics_data.netdata_dashboard_used, b); - snprintfz(b, 6, "%zu", rrd_hosts_available); + snprintfz(b, 6, "%zu", rrdhost_hosts_available()); analytics_set_data(&analytics_data.netdata_config_hosts_available, b); } } @@ -587,12 +585,12 @@ void *analytics_main(void *ptr) debug(D_ANALYTICS, "Analytics thread starts"); //first delay after agent start - while (!netdata_exit && likely(sec <= ANALYTICS_INIT_SLEEP_SEC)) { + while (service_running(SERVICE_ANALYTICS) && likely(sec <= ANALYTICS_INIT_SLEEP_SEC)) { heartbeat_next(&hb, step_ut); sec++; } - if (unlikely(netdata_exit)) + if (unlikely(!service_running(SERVICE_ANALYTICS))) goto cleanup; analytics_gather_immutable_meta_data(); @@ -605,7 +603,7 @@ void *analytics_main(void *ptr) heartbeat_next(&hb, step_ut * 2); sec += 2; - if (unlikely(netdata_exit)) + if (unlikely(!service_running(SERVICE_ANALYTICS))) break; if (likely(sec < ANALYTICS_HEARTBEAT)) @@ -677,7 +675,7 @@ void set_late_global_environment() analytics_set_data_str(&analytics_data.netdata_config_release_channel, (char *)get_release_channel()); { - BUFFER *bi = buffer_create(1000); + BUFFER *bi = buffer_create(1000, NULL); analytics_build_info(bi); analytics_set_data_str(&analytics_data.netdata_buildinfo, (char *)buffer_tostring(bi)); buffer_free(bi); @@ -838,7 +836,7 @@ void set_global_environment() setenv("NETDATA_HOST_PREFIX", netdata_configured_host_prefix, 1); { - BUFFER *user_plugins_dirs = buffer_create(FILENAME_MAX); + BUFFER *user_plugins_dirs = buffer_create(FILENAME_MAX, NULL); for (size_t i = 1; i < PLUGINSD_MAX_DIRECTORIES && plugin_directories[i]; i++) { if (i > 1) diff --git a/daemon/commands.c b/daemon/commands.c index 6288ee59b..377a4002f 100644 --- a/daemon/commands.c +++ b/daemon/commands.c @@ -46,6 +46,7 @@ static cmd_status_t cmd_read_config_execute(char *args, char **message); static cmd_status_t cmd_write_config_execute(char *args, char **message); static cmd_status_t cmd_ping_execute(char *args, char **message); static cmd_status_t cmd_aclk_state(char *args, char **message); +static cmd_status_t cmd_version(char *args, char **message); static command_info_t command_info_array[] = { {"help", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY}, // show help menu @@ -59,7 +60,8 @@ static command_info_t command_info_array[] = { {"read-config", cmd_read_config_execute, CMD_TYPE_CONCURRENT}, {"write-config", cmd_write_config_execute, CMD_TYPE_ORTHOGONAL}, {"ping", cmd_ping_execute, CMD_TYPE_ORTHOGONAL}, - {"aclk-state", cmd_aclk_state, CMD_TYPE_ORTHOGONAL} + {"aclk-state", cmd_aclk_state, CMD_TYPE_ORTHOGONAL}, + {"version", cmd_version, CMD_TYPE_ORTHOGONAL} }; /* Mutexes for commands of type CMD_TYPE_ORTHOGONAL */ @@ -124,7 +126,9 @@ static cmd_status_t cmd_help_execute(char *args, char **message) "ping\n" " Return with 'pong' if agent is alive.\n" "aclk-state [json]\n" - " Returns current state of ACLK and Cloud connection. (optionally in json)\n", + " Returns current state of ACLK and Cloud connection. (optionally in json).\n" + "version\n" + " Returns the netdata version.\n", MAX_COMMAND_LENGTH - 1); return CMD_STATUS_SUCCESS; } @@ -216,7 +220,7 @@ static cmd_status_t cmd_reload_labels_execute(char *args, char **message) info("COMMAND: reloading host labels."); reload_host_labels(); - BUFFER *wb = buffer_create(10); + BUFFER *wb = buffer_create(10, NULL); rrdlabels_log_to_buffer(localhost->rrdlabels, wb); (*message)=strdupz(buffer_tostring(wb)); buffer_free(wb); @@ -314,6 +318,18 @@ static cmd_status_t cmd_aclk_state(char *args, char **message) return CMD_STATUS_SUCCESS; } +static cmd_status_t cmd_version(char *args, char **message) +{ + (void)args; + + char version[MAX_COMMAND_LENGTH]; + snprintfz(version, MAX_COMMAND_LENGTH -1, "%s %s", program_name, program_version); + + *message = strdupz(version); + + return CMD_STATUS_SUCCESS; +} + static void cmd_lock_exclusive(unsigned index) { (void)index; @@ -454,9 +470,13 @@ static void after_schedule_command(uv_work_t *req, int status) static void schedule_command(uv_work_t *req) { - struct command_context *cmd_ctx = req->data; + register_libuv_worker_jobs(); + worker_is_busy(UV_EVENT_SCHEDULE_CMD); + struct command_context *cmd_ctx = req->data; cmd_ctx->status = execute_command(cmd_ctx->idx, cmd_ctx->args, &cmd_ctx->message); + + worker_is_idle(); } /* This will alter the state of the command_info_array.cmd_str diff --git a/daemon/commands.h b/daemon/commands.h index f0e38ce93..78bdcc779 100644 --- a/daemon/commands.h +++ b/daemon/commands.h @@ -25,6 +25,7 @@ typedef enum cmd { CMD_WRITE_CONFIG, CMD_PING, CMD_ACLK_STATE, + CMD_VERSION, CMD_TOTAL_COMMANDS } cmd_t; diff --git a/daemon/common.c b/daemon/common.c index 85d638631..6eae07cff 100644 --- a/daemon/common.c +++ b/daemon/common.c @@ -19,3 +19,38 @@ int32_t netdata_configured_utc_offset = 0; int netdata_ready; int netdata_cloud_setting; +long get_netdata_cpus(void) { + static long processors = 0; + + if(processors) + return processors; + + long cores_proc_stat = get_system_cpus_with_cache(false, true); + long cores_cpuset_v1 = (long)read_cpuset_cpus("/sys/fs/cgroup/cpuset/cpuset.cpus", cores_proc_stat); + long cores_cpuset_v2 = (long)read_cpuset_cpus("/sys/fs/cgroup/cpuset.cpus", cores_proc_stat); + + if(cores_cpuset_v2) + processors = cores_cpuset_v2; + else if(cores_cpuset_v1) + processors = cores_cpuset_v1; + else + processors = cores_proc_stat; + + long cores_user_configured = config_get_number(CONFIG_SECTION_GLOBAL, "cpu cores", processors); + + errno = 0; + internal_error(true, + "System CPUs: %ld, (" + "system: %ld, cgroups cpuset v1: %ld, cgroups cpuset v2: %ld, netdata.conf: %ld" + ")" + , processors + , cores_proc_stat + , cores_cpuset_v1 + , cores_cpuset_v2 + , cores_user_configured + ); + + processors = cores_user_configured; + + return processors; +} diff --git a/daemon/common.h b/daemon/common.h index f3d868661..ca4d5c954 100644 --- a/daemon/common.h +++ b/daemon/common.h @@ -4,6 +4,7 @@ #define NETDATA_COMMON_H 1 #include "libnetdata/libnetdata.h" +#include "event_loop.h" // ---------------------------------------------------------------------------- // shortcuts for the default netdata configuration @@ -105,4 +106,6 @@ extern int netdata_anonymous_statistics_enabled; extern int netdata_ready; extern int netdata_cloud_setting; +long get_netdata_cpus(void); + #endif /* NETDATA_COMMON_H */ diff --git a/daemon/config/README.md b/daemon/config/README.md index 7b4d27ecf..4a6d0bb80 100644 --- a/daemon/config/README.md +++ b/daemon/config/README.md @@ -1,7 +1,12 @@ # Daemon configuration @@ -21,24 +26,24 @@ adapt the general behavior of Netdata, in great detail. You can find all these s accessing the URL `https://netdata.server.hostname:19999/netdata.conf`. For example check the configuration file of [netdata.firehol.org](http://netdata.firehol.org/netdata.conf). HTTP access to this file is limited by default to [private IPs](https://en.wikipedia.org/wiki/Private_network), via -the [web server access lists](/web/server/README.md#access-lists). +the [web server access lists](https://github.com/netdata/netdata/blob/master/web/server/README.md#access-lists). `netdata.conf` has sections stated with `[section]`. You will see the following sections: -1. `[global]` to [configure](#global-section-options) the [Netdata daemon](/daemon/README.md). +1. `[global]` to [configure](#global-section-options) the [Netdata daemon](https://github.com/netdata/netdata/blob/master/daemon/README.md). 2. `[db]` to [configure](#db-section-options) the database of Netdata. 3. `[directories]` to [configure](#directories-section-options) the directories used by Netdata. 4. `[logs]` to [configure](#logs-section-options) the Netdata logging. 5. `[environment variables]` to [configure](#environment-variables-section-options) the environment variables used Netdata. -6. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](/daemon/README.md) SQLite settings. -7. `[ml]` to configure settings for [machine learning](/ml/README.md). -8. `[health]` to [configure](#health-section-options) general settings for [health monitoring](/health/README.md). -9. `[web]` to [configure the web server](/web/server/README.md). -10. `[registry]` for the [Netdata registry](/registry/README.md). -11. `[global statistics]` for the [Netdata registry](/registry/README.md). -12. `[statsd]` for the general settings of the [stats.d.plugin](/collectors/statsd.plugin/README.md). -13. `[plugins]` to [configure](#plugins-section-options) which [collectors](/collectors/README.md) to use and PATH +6. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](https://github.com/netdata/netdata/blob/master/daemon/README.md) SQLite settings. +7. `[ml]` to configure settings for [machine learning](https://github.com/netdata/netdata/blob/master/ml/README.md). +8. `[health]` to [configure](#health-section-options) general settings for [health monitoring](https://github.com/netdata/netdata/blob/master/health/README.md). +9. `[web]` to [configure the web server](https://github.com/netdata/netdata/blob/master/web/server/README.md). +10. `[registry]` for the [Netdata registry](https://github.com/netdata/netdata/blob/master/registry/README.md). +11. `[global statistics]` for the [Netdata registry](https://github.com/netdata/netdata/blob/master/registry/README.md). +12. `[statsd]` for the general settings of the [stats.d.plugin](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/README.md). +13. `[plugins]` to [configure](#plugins-section-options) which [collectors](https://github.com/netdata/netdata/blob/master/collectors/README.md) to use and PATH settings. 14. `[plugin:NAME]` sections for each collector plugin, under the comment [Per plugin configuration](#per-plugin-configuration). @@ -49,7 +54,7 @@ comment on settings it does not currently use. ## Applying changes -After `netdata.conf` has been modified, Netdata needs to be [restarted](/docs/configure/start-stop-restart.md) for +After `netdata.conf` has been modified, Netdata needs to be [restarted](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for changes to apply: ```bash @@ -70,10 +75,10 @@ Please note that your data history will be lost if you have modified `history` p | setting | default | info | |:-------------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| process scheduling policy | `keep` | See [Netdata process scheduling policy](/daemon/README.md#netdata-process-scheduling-policy) | +| process scheduling policy | `keep` | See [Netdata process scheduling policy](https://github.com/netdata/netdata/blob/master/daemon/README.md#netdata-process-scheduling-policy) | | OOM score | `0` | | -| glibc malloc arena max for plugins | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). | -| glibc malloc arena max for Netdata | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). | +| glibc malloc arena max for plugins | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). | +| glibc malloc arena max for Netdata | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). | | hostname | auto-detected | The hostname of the computer running Netdata. | | host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | | timezone | auto-detected | The timezone retrieved from the environment variable | @@ -84,22 +89,22 @@ Please note that your data history will be lost if you have modified `history` p | setting | default | info | |:---------------------------------------------:|:----------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`.
`save`: Netdata will save its round robin database on exit and load it on startup.
`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`).
`ram`: The round-robin database will be temporary and it will be lost when Netdata exits.
`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. | -| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](/database/README.md) for more information. | -| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. | +| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`.
`save`: Netdata will save its round robin database on exit and load it on startup.
`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`).
`ram`: The round-robin database will be temporary and it will be lost when Netdata exits.
`alloc`: Similar to `ram`, but can significantly reduce memory usage, when combined with a low retention and does not support KSM.
`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. Not to be used together with streaming. | +| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](https://github.com/netdata/netdata/blob/master/database/README.md) for more information. | +| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. | | dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. | | dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier.
`N belongs to [1..4]` || | dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). | | dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. | | dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well.
`N belongs to [1..4]` | -| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](/database/engine/README.md#tiering). | +| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). | | dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`.
`N belongs to [1..4]` | | dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier.
`New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window).
`none`: No back filling is applied.
`N belongs to [1..4]` | -| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm) | -| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | +| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](https://github.com/netdata/netdata/blob/master/database/README.md#ksm) | +| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | | gap when lost iterations above | `1` | | | cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. | -| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions | +| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions | | delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. | | enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. | @@ -116,7 +121,7 @@ The multiplication of all the **enabled** tiers `dbengine tier N update every i |:-------------------:|:------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | config | `/etc/netdata` | The directory configuration files are kept. | | stock config | `/usr/lib/netdata/conf.d` | | -| log | `/var/log/netdata` | The directory in which the [log files](/daemon/README.md#log-files) are kept. | +| log | `/var/log/netdata` | The directory in which the [log files](https://github.com/netdata/netdata/blob/master/daemon/README.md#log-files) are kept. | | web | `/usr/share/netdata/web` | The directory the web static files are kept. | | cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. | | lib | `/var/lib/netdata` | Contains the alarm log and the Netdata instance GUID. | @@ -125,14 +130,14 @@ The multiplication of all the **enabled** tiers `dbengine tier N update every i | plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. | | health config | `/etc/netdata/health.d` | The directory containing the user alarm configuration files, to override the stock configurations | | stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alarm configuration files for each collector | -| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | +| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](https://github.com/netdata/netdata/blob/master/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | ### [logs] section options | setting | default | info | |:----------------------------------:|:-----------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](/daemon/README.md#debugging). | -| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/daemon/README.md#debugging). | +| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). | +| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). | | error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. | | access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. | | facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. | @@ -163,9 +168,9 @@ The multiplication of all the **enabled** tiers `dbengine tier N update every i This section controls the general behavior of the health monitoring capabilities of Netdata. Specific alarms are configured in per-collector config files under the `health.d` directory. For more info, see [health -monitoring](/health/README.md). +monitoring](https://github.com/netdata/netdata/blob/master/health/README.md). -[Alarm notifications](/health/notifications/README.md) are configured in `health_alarm_notify.conf`. +[Alarm notifications](https://github.com/netdata/netdata/blob/master/health/notifications/README.md) are configured in `health_alarm_notify.conf`. | setting | default | info | |:----------------------------------------------:|:------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| @@ -175,10 +180,11 @@ monitoring](/health/README.md). | run at least every seconds | `10` | Controls how often all alarm conditions should be evaluated. | | postpone alarms during hibernation for seconds | `60` | Prevents false alarms. May need to be increased if you get alarms during hibernation. | | rotate log every lines | 2000 | Controls the number of alarm log entries stored in `/health-log.db`, where `` is the one configured in the [\[global\] section](#global-section-options) | +| enabled alarms | * | Defines which alarms to load from both user and stock directories. This is a [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md) list of alarm or template names. Can be used to disable specific alarms. For example, `enabled alarms = !oom_kill *` will load all alarms except `oom_kill`. | ### [web] section options -Refer to the [web server documentation](/web/server/README.md) +Refer to the [web server documentation](https://github.com/netdata/netdata/blob/master/web/server/README.md) ### [plugins] section options @@ -198,7 +204,7 @@ Additionally, there will be the following options: ### [registry] section options To understand what this section is and how it should be configured, please refer to -the [registry documentation](/registry/README.md). +the [registry documentation](https://github.com/netdata/netdata/blob/master/registry/README.md). ## Per-plugin configuration @@ -206,7 +212,7 @@ The configuration options for plugins appear in sections following the pattern ` ### Internal plugins -Most internal plugins will provide additional options. Check [Internal Plugins](/collectors/README.md) for more +Most internal plugins will provide additional options. Check [Internal Plugins](https://github.com/netdata/netdata/blob/master/collectors/README.md) for more information. Please note, that by default Netdata will enable monitoring metrics for disks, memory, and network only when they are @@ -222,7 +228,7 @@ External plugins will have only 2 options at `netdata.conf`: | setting | default | info | |:---------------:|:--------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------| -| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](/docs/guides/configure/performance.md). | +| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). | | command options | - | Additional command line options to pass to the plugin. | | External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their diff --git a/daemon/event_loop.c b/daemon/event_loop.c new file mode 100644 index 000000000..6f09cd654 --- /dev/null +++ b/daemon/event_loop.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include "event_loop.h" + +// Register workers +void register_libuv_worker_jobs() { + static __thread bool registered = false; + + if(likely(registered)) + return; + + registered = true; + + worker_register("LIBUV"); + + // generic + worker_register_job_name(UV_EVENT_WORKER_INIT, "worker init"); + + // query related + worker_register_job_name(UV_EVENT_DBENGINE_QUERY, "query"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP, "extent cache"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_MMAP, "extent mmap"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION, "extent decompression"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP, "page lookup"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION, "page populate"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_ALLOCATION, "page allocate"); + + // flushing related + worker_register_job_name(UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE, "flush main"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_WRITE, "extent write"); + worker_register_job_name(UV_EVENT_DBENGINE_FLUSHED_TO_OPEN, "flushed to open"); + + // datafile full + worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT, "jv2 index wait"); + worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX, "jv2 indexing"); + + // db rotation related + worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT, "datafile delete wait"); + worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE, "datafile deletion"); + worker_register_job_name(UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, "find rotated metrics"); + worker_register_job_name(UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, "find remaining retention"); + worker_register_job_name(UV_EVENT_DBENGINE_POPULATE_MRG, "update retention"); + + // other dbengine events + worker_register_job_name(UV_EVENT_DBENGINE_EVICT_MAIN_CACHE, "evict main"); + worker_register_job_name(UV_EVENT_DBENGINE_BUFFERS_CLEANUP, "dbengine buffers cleanup"); + worker_register_job_name(UV_EVENT_DBENGINE_QUIESCE, "dbengine quiesce"); + worker_register_job_name(UV_EVENT_DBENGINE_SHUTDOWN, "dbengine shutdown"); + + // metadata + worker_register_job_name(UV_EVENT_METADATA_STORE, "metadata store host"); + worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup"); + + // netdatacli + worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command"); + + uv_thread_set_name_np(pthread_self(), "LIBUV_WORKER"); +} diff --git a/daemon/event_loop.h b/daemon/event_loop.h new file mode 100644 index 000000000..0d3cc0d07 --- /dev/null +++ b/daemon/event_loop.h @@ -0,0 +1,53 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EVENT_LOOP_H +#define NETDATA_EVENT_LOOP_H + +enum event_loop_job { + UV_EVENT_JOB_NONE = 0, + + // generic + UV_EVENT_WORKER_INIT, + + // query related + UV_EVENT_DBENGINE_QUERY, + UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP, + UV_EVENT_DBENGINE_EXTENT_MMAP, + UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION, + UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP, + UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION, + UV_EVENT_DBENGINE_EXTENT_PAGE_ALLOCATION, + + // flushing related + UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE, + UV_EVENT_DBENGINE_EXTENT_WRITE, + UV_EVENT_DBENGINE_FLUSHED_TO_OPEN, + + // datafile full + UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT, + UV_EVENT_DBENGINE_JOURNAL_INDEX, + + // db rotation related + UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT, + UV_EVENT_DBENGINE_DATAFILE_DELETE, + UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, // find the metrics that are rotated + UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, // find their remaining retention + UV_EVENT_DBENGINE_POPULATE_MRG, // update mrg + + // other dbengine events + UV_EVENT_DBENGINE_EVICT_MAIN_CACHE, + UV_EVENT_DBENGINE_BUFFERS_CLEANUP, + UV_EVENT_DBENGINE_QUIESCE, + UV_EVENT_DBENGINE_SHUTDOWN, + + // metadata + UV_EVENT_METADATA_STORE, + UV_EVENT_METADATA_CLEANUP, + + // netdatacli + UV_EVENT_SCHEDULE_CMD, +}; + +void register_libuv_worker_jobs(); + +#endif //NETDATA_EVENT_LOOP_H diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index a4e9d321f..0dc3ee645 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -20,6 +20,11 @@ bool global_statistics_enabled = true; +struct netdata_buffers_statistics netdata_buffers_statistics = {}; + +static size_t dbengine_total_memory = 0; +size_t rrddim_db_memory_size = 0; + static struct global_statistics { uint16_t connected_clients; @@ -52,6 +57,7 @@ static struct global_statistics { uint64_t ml_queries_made; uint64_t ml_db_points_read; uint64_t ml_result_points_generated; + uint64_t ml_models_consulted; uint64_t exporters_queries_made; uint64_t exporters_db_points_read; @@ -88,6 +94,10 @@ void global_statistics_ml_query_completed(size_t points_read) { __atomic_fetch_add(&global_statistics.ml_db_points_read, points_read, __ATOMIC_RELAXED); } +void global_statistics_ml_models_consulted(size_t models_consulted) { + __atomic_fetch_add(&global_statistics.ml_models_consulted, models_consulted, __ATOMIC_RELAXED); +} + void global_statistics_exporters_query_completed(size_t points_read) { __atomic_fetch_add(&global_statistics.exporters_queries_made, 1, __ATOMIC_RELAXED); __atomic_fetch_add(&global_statistics.exporters_db_points_read, points_read, __ATOMIC_RELAXED); @@ -193,6 +203,7 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t gs->ml_queries_made = __atomic_load_n(&global_statistics.ml_queries_made, __ATOMIC_RELAXED); gs->ml_db_points_read = __atomic_load_n(&global_statistics.ml_db_points_read, __ATOMIC_RELAXED); gs->ml_result_points_generated = __atomic_load_n(&global_statistics.ml_result_points_generated, __ATOMIC_RELAXED); + gs->ml_models_consulted = __atomic_load_n(&global_statistics.ml_models_consulted, __ATOMIC_RELAXED); gs->exporters_queries_made = __atomic_load_n(&global_statistics.exporters_queries_made, __ATOMIC_RELAXED); gs->exporters_db_points_read = __atomic_load_n(&global_statistics.exporters_db_points_read, __ATOMIC_RELAXED); @@ -208,6 +219,9 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t } } +#define dictionary_stats_memory_total(stats) \ + ((stats).memory.dict + (stats).memory.values + (stats).memory.index) + static void global_statistics_charts(void) { static unsigned long long old_web_requests = 0, old_web_usec = 0, @@ -264,23 +278,182 @@ static void global_statistics_charts(void) { // ---------------------------------------------------------------- { - static RRDSET *st_uptime = NULL; - static RRDDIM *rd_uptime = NULL; + static RRDSET *st_memory = NULL; + static RRDDIM *rd_database = NULL; + static RRDDIM *rd_collectors = NULL; + static RRDDIM *rd_hosts = NULL; + static RRDDIM *rd_rrd = NULL; + static RRDDIM *rd_contexts = NULL; + static RRDDIM *rd_health = NULL; + static RRDDIM *rd_functions = NULL; + static RRDDIM *rd_labels = NULL; + static RRDDIM *rd_strings = NULL; + static RRDDIM *rd_streaming = NULL; + static RRDDIM *rd_replication = NULL; + static RRDDIM *rd_buffers = NULL; + static RRDDIM *rd_workers = NULL; + static RRDDIM *rd_aral = NULL; + static RRDDIM *rd_judy = NULL; + static RRDDIM *rd_other = NULL; + + if (unlikely(!st_memory)) { + st_memory = rrdset_create_localhost( + "netdata", + "memory", + NULL, + "netdata", + NULL, + "Netdata Memory", + "bytes", + "netdata", + "stats", + 130100, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_database = rrddim_add(st_memory, "db", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_collectors = rrddim_add(st_memory, "collectors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_hosts = rrddim_add(st_memory, "hosts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_rrd = rrddim_add(st_memory, "rrdset rrddim", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_contexts = rrddim_add(st_memory, "contexts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_health = rrddim_add(st_memory, "health", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_functions = rrddim_add(st_memory, "functions", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_labels = rrddim_add(st_memory, "labels", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_strings = rrddim_add(st_memory, "strings", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_streaming = rrddim_add(st_memory, "streaming", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_replication = rrddim_add(st_memory, "replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers = rrddim_add(st_memory, "buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_workers = rrddim_add(st_memory, "workers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_aral = rrddim_add(st_memory, "aral", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_judy = rrddim_add(st_memory, "judy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_other = rrddim_add(st_memory, "other", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } - if (unlikely(!st_uptime)) { - st_uptime = rrdset_create_localhost( + size_t buffers = + netdata_buffers_statistics.query_targets_size + + netdata_buffers_statistics.rrdset_done_rda_size + + netdata_buffers_statistics.buffers_aclk + + netdata_buffers_statistics.buffers_api + + netdata_buffers_statistics.buffers_functions + + netdata_buffers_statistics.buffers_sqlite + + netdata_buffers_statistics.buffers_exporters + + netdata_buffers_statistics.buffers_health + + netdata_buffers_statistics.buffers_streaming + + netdata_buffers_statistics.cbuffers_streaming + + netdata_buffers_statistics.buffers_web + + replication_allocated_buffers() + + aral_by_size_overhead() + + judy_aral_overhead(); + + size_t strings = 0; + string_statistics(NULL, NULL, NULL, NULL, NULL, &strings, NULL, NULL); + + rrddim_set_by_pointer(st_memory, rd_database, (collected_number)dbengine_total_memory + (collected_number)rrddim_db_memory_size); + rrddim_set_by_pointer(st_memory, rd_collectors, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_collectors)); + rrddim_set_by_pointer(st_memory, rd_hosts, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdhost) + (collected_number)netdata_buffers_statistics.rrdhost_allocations_size); + rrddim_set_by_pointer(st_memory, rd_rrd, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdset_rrddim)); + rrddim_set_by_pointer(st_memory, rd_contexts, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdcontext)); + rrddim_set_by_pointer(st_memory, rd_health, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdhealth)); + rrddim_set_by_pointer(st_memory, rd_functions, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_functions)); + rrddim_set_by_pointer(st_memory, rd_labels, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdlabels)); + rrddim_set_by_pointer(st_memory, rd_strings, (collected_number)strings); + rrddim_set_by_pointer(st_memory, rd_streaming, (collected_number)netdata_buffers_statistics.rrdhost_senders + (collected_number)netdata_buffers_statistics.rrdhost_receivers); + rrddim_set_by_pointer(st_memory, rd_replication, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_replication) + (collected_number)replication_allocated_memory()); + rrddim_set_by_pointer(st_memory, rd_buffers, (collected_number)buffers); + rrddim_set_by_pointer(st_memory, rd_workers, (collected_number) workers_allocated_memory()); + rrddim_set_by_pointer(st_memory, rd_aral, (collected_number) aral_by_size_structures()); + rrddim_set_by_pointer(st_memory, rd_judy, (collected_number) judy_aral_structures()); + rrddim_set_by_pointer(st_memory, rd_other, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_other)); + + rrdset_done(st_memory); + } + + { + static RRDSET *st_memory_buffers = NULL; + static RRDDIM *rd_queries = NULL; + static RRDDIM *rd_collectors = NULL; + static RRDDIM *rd_buffers_aclk = NULL; + static RRDDIM *rd_buffers_api = NULL; + static RRDDIM *rd_buffers_functions = NULL; + static RRDDIM *rd_buffers_sqlite = NULL; + static RRDDIM *rd_buffers_exporters = NULL; + static RRDDIM *rd_buffers_health = NULL; + static RRDDIM *rd_buffers_streaming = NULL; + static RRDDIM *rd_cbuffers_streaming = NULL; + static RRDDIM *rd_buffers_replication = NULL; + static RRDDIM *rd_buffers_web = NULL; + static RRDDIM *rd_buffers_aral = NULL; + static RRDDIM *rd_buffers_judy = NULL; + + if (unlikely(!st_memory_buffers)) { + st_memory_buffers = rrdset_create_localhost( "netdata", - "uptime", + "memory_buffers", NULL, "netdata", NULL, - "Netdata uptime", - "seconds", + "Netdata Memory Buffers", + "bytes", "netdata", "stats", - 130100, + 130101, localhost->rrd_update_every, - RRDSET_TYPE_LINE); + RRDSET_TYPE_STACKED); + + rd_queries = rrddim_add(st_memory_buffers, "queries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_collectors = rrddim_add(st_memory_buffers, "collection", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_aclk = rrddim_add(st_memory_buffers, "aclk", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_api = rrddim_add(st_memory_buffers, "api", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_functions = rrddim_add(st_memory_buffers, "functions", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_sqlite = rrddim_add(st_memory_buffers, "sqlite", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_exporters = rrddim_add(st_memory_buffers, "exporters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_health = rrddim_add(st_memory_buffers, "health", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_streaming = rrddim_add(st_memory_buffers, "streaming", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_cbuffers_streaming = rrddim_add(st_memory_buffers, "streaming cbuf", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_replication = rrddim_add(st_memory_buffers, "replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_web = rrddim_add(st_memory_buffers, "web", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_aral = rrddim_add(st_memory_buffers, "aral", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_judy = rrddim_add(st_memory_buffers, "judy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_memory_buffers, rd_queries, (collected_number)netdata_buffers_statistics.query_targets_size + (collected_number) onewayalloc_allocated_memory()); + rrddim_set_by_pointer(st_memory_buffers, rd_collectors, (collected_number)netdata_buffers_statistics.rrdset_done_rda_size); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_aclk, (collected_number)netdata_buffers_statistics.buffers_aclk); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_api, (collected_number)netdata_buffers_statistics.buffers_api); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_functions, (collected_number)netdata_buffers_statistics.buffers_functions); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_sqlite, (collected_number)netdata_buffers_statistics.buffers_sqlite); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_exporters, (collected_number)netdata_buffers_statistics.buffers_exporters); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_health, (collected_number)netdata_buffers_statistics.buffers_health); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_streaming, (collected_number)netdata_buffers_statistics.buffers_streaming); + rrddim_set_by_pointer(st_memory_buffers, rd_cbuffers_streaming, (collected_number)netdata_buffers_statistics.cbuffers_streaming); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_replication, (collected_number)replication_allocated_buffers()); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_web, (collected_number)netdata_buffers_statistics.buffers_web); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_aral, (collected_number)aral_by_size_overhead()); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_judy, (collected_number)judy_aral_overhead()); + + rrdset_done(st_memory_buffers); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_uptime = NULL; + static RRDDIM *rd_uptime = NULL; + + if (unlikely(!st_uptime)) { + st_uptime = rrdset_create_localhost( + "netdata", + "uptime", + NULL, + "netdata", + NULL, + "Netdata uptime", + "seconds", + "netdata", + "stats", + 130150, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); rd_uptime = rrddim_add(st_uptime, "uptime", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } @@ -653,6 +826,34 @@ static void global_statistics_charts(void) { rrdset_done(st_points_stored); } + + { + static RRDSET *st = NULL; + static RRDDIM *rd = NULL; + + if (unlikely(!st)) { + st = rrdset_create_localhost( + "netdata" // type + , "ml_models_consulted" // id + , NULL // name + , NETDATA_ML_CHART_FAMILY // family + , NULL // context + , "KMeans models used for prediction" // title + , "models" // units + , NETDATA_ML_PLUGIN // plugin + , NETDATA_ML_MODULE_DETECTION // module + , NETDATA_ML_CHART_PRIO_MACHINE_LEARNING_STATUS // priority + , localhost->rrd_update_every // update_every + , RRDSET_TYPE_AREA // chart_type + ); + + rd = rrddim_add(st, "num_models_consulted", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st, rd, (collected_number) gs.ml_models_consulted); + + rrdset_done(st); + } } // ---------------------------------------------------------------------------- @@ -962,9 +1163,1332 @@ static void sqlite3_statistics_charts(void) { // ---------------------------------------------------------------- } -static void dbengine_statistics_charts(void) { #ifdef ENABLE_DBENGINE + +struct dbengine2_cache_pointers { + RRDSET *st_cache_hit_ratio; + RRDDIM *rd_hit_ratio_closest; + RRDDIM *rd_hit_ratio_exact; + + RRDSET *st_operations; + RRDDIM *rd_searches_closest; + RRDDIM *rd_searches_exact; + RRDDIM *rd_add_hot; + RRDDIM *rd_add_clean; + RRDDIM *rd_evictions; + RRDDIM *rd_flushes; + RRDDIM *rd_acquires; + RRDDIM *rd_releases; + RRDDIM *rd_acquires_for_deletion; + + RRDSET *st_pgc_memory; + RRDDIM *rd_pgc_memory_free; + RRDDIM *rd_pgc_memory_clean; + RRDDIM *rd_pgc_memory_hot; + RRDDIM *rd_pgc_memory_dirty; + RRDDIM *rd_pgc_memory_index; + RRDDIM *rd_pgc_memory_evicting; + RRDDIM *rd_pgc_memory_flushing; + + RRDSET *st_pgc_tm; + RRDDIM *rd_pgc_tm_current; + RRDDIM *rd_pgc_tm_wanted; + RRDDIM *rd_pgc_tm_hot_max; + RRDDIM *rd_pgc_tm_dirty_max; + RRDDIM *rd_pgc_tm_hot; + RRDDIM *rd_pgc_tm_dirty; + RRDDIM *rd_pgc_tm_referenced; + + RRDSET *st_pgc_pages; + RRDDIM *rd_pgc_pages_clean; + RRDDIM *rd_pgc_pages_hot; + RRDDIM *rd_pgc_pages_dirty; + RRDDIM *rd_pgc_pages_referenced; + + RRDSET *st_pgc_memory_changes; + RRDDIM *rd_pgc_memory_new_hot; + RRDDIM *rd_pgc_memory_new_clean; + RRDDIM *rd_pgc_memory_clean_evictions; + + RRDSET *st_pgc_memory_migrations; + RRDDIM *rd_pgc_memory_hot_to_dirty; + RRDDIM *rd_pgc_memory_dirty_to_clean; + + RRDSET *st_pgc_workers; + RRDDIM *rd_pgc_workers_evictors; + RRDDIM *rd_pgc_workers_flushers; + RRDDIM *rd_pgc_workers_adders; + RRDDIM *rd_pgc_workers_searchers; + RRDDIM *rd_pgc_workers_jv2_flushers; + RRDDIM *rd_pgc_workers_hot2dirty; + + RRDSET *st_pgc_memory_events; + RRDDIM *rd_pgc_memory_evictions_critical; + RRDDIM *rd_pgc_memory_evictions_aggressive; + RRDDIM *rd_pgc_memory_flushes_critical; + + RRDSET *st_pgc_waste; + RRDDIM *rd_pgc_waste_evictions_skipped; + RRDDIM *rd_pgc_waste_flushes_cancelled; + RRDDIM *rd_pgc_waste_insert_spins; + RRDDIM *rd_pgc_waste_evict_spins; + RRDDIM *rd_pgc_waste_release_spins; + RRDDIM *rd_pgc_waste_acquire_spins; + RRDDIM *rd_pgc_waste_delete_spins; + RRDDIM *rd_pgc_waste_flush_spins; + +}; + +static void dbengine2_cache_statistics_charts(struct dbengine2_cache_pointers *ptrs, struct pgc_statistics *pgc_stats, struct pgc_statistics *pgc_stats_old __maybe_unused, const char *name, int priority) { + + { + if (unlikely(!ptrs->st_cache_hit_ratio)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_hit_ratio", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Hit Ratio", name); + + ptrs->st_cache_hit_ratio = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "%", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_hit_ratio_closest = rrddim_add(ptrs->st_cache_hit_ratio, "closest", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_hit_ratio_exact = rrddim_add(ptrs->st_cache_hit_ratio, "exact", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + size_t closest_percent = 100 * 10000; + if(pgc_stats->searches_closest > pgc_stats_old->searches_closest) + closest_percent = (pgc_stats->searches_closest_hits - pgc_stats_old->searches_closest_hits) * 100 * 10000 / (pgc_stats->searches_closest - pgc_stats_old->searches_closest); + + size_t exact_percent = 100 * 10000; + if(pgc_stats->searches_exact > pgc_stats_old->searches_exact) + exact_percent = (pgc_stats->searches_exact_hits - pgc_stats_old->searches_exact_hits) * 100 * 10000 / (pgc_stats->searches_exact - pgc_stats_old->searches_exact); + + rrddim_set_by_pointer(ptrs->st_cache_hit_ratio, ptrs->rd_hit_ratio_closest, (collected_number)closest_percent); + rrddim_set_by_pointer(ptrs->st_cache_hit_ratio, ptrs->rd_hit_ratio_exact, (collected_number)exact_percent); + + rrdset_done(ptrs->st_cache_hit_ratio); + } + + { + if (unlikely(!ptrs->st_operations)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_operations", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Operations", name); + + ptrs->st_operations = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "ops/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_searches_closest = rrddim_add(ptrs->st_operations, "search closest", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_searches_exact = rrddim_add(ptrs->st_operations, "search exact", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_add_hot = rrddim_add(ptrs->st_operations, "add hot", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_add_clean = rrddim_add(ptrs->st_operations, "add clean", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_evictions = rrddim_add(ptrs->st_operations, "evictions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_flushes = rrddim_add(ptrs->st_operations, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_acquires = rrddim_add(ptrs->st_operations, "acquires", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_releases = rrddim_add(ptrs->st_operations, "releases", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_acquires_for_deletion = rrddim_add(ptrs->st_operations, "del acquires", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_searches_closest, (collected_number)pgc_stats->searches_closest); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_searches_exact, (collected_number)pgc_stats->searches_exact); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_add_hot, (collected_number)pgc_stats->queues.hot.added_entries); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_add_clean, (collected_number)(pgc_stats->added_entries - pgc_stats->queues.hot.added_entries)); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_evictions, (collected_number)pgc_stats->queues.clean.removed_entries); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_flushes, (collected_number)pgc_stats->flushes_completed); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_acquires, (collected_number)pgc_stats->acquires); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_releases, (collected_number)pgc_stats->releases); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_acquires_for_deletion, (collected_number)pgc_stats->acquires_for_deletion); + + rrdset_done(ptrs->st_operations); + } + + { + if (unlikely(!ptrs->st_pgc_memory)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_memory", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Memory", name); + + ptrs->st_pgc_memory = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "bytes", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + ptrs->rd_pgc_memory_free = rrddim_add(ptrs->st_pgc_memory, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_hot = rrddim_add(ptrs->st_pgc_memory, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_dirty = rrddim_add(ptrs->st_pgc_memory, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_clean = rrddim_add(ptrs->st_pgc_memory, "clean", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_index = rrddim_add(ptrs->st_pgc_memory, "index", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_evicting = rrddim_add(ptrs->st_pgc_memory, "evicting", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_flushing = rrddim_add(ptrs->st_pgc_memory, "flushing", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + collected_number free = (pgc_stats->current_cache_size > pgc_stats->wanted_cache_size) ? 0 : + (collected_number)(pgc_stats->wanted_cache_size - pgc_stats->current_cache_size); + + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_free, free); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_hot, (collected_number)pgc_stats->queues.hot.size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_dirty, (collected_number)pgc_stats->queues.dirty.size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_clean, (collected_number)pgc_stats->queues.clean.size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_evicting, (collected_number)pgc_stats->evicting_size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_flushing, (collected_number)pgc_stats->flushing_size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_index, + (collected_number)(pgc_stats->size - pgc_stats->queues.clean.size - pgc_stats->queues.hot.size - pgc_stats->queues.dirty.size - pgc_stats->evicting_size - pgc_stats->flushing_size)); + + rrdset_done(ptrs->st_pgc_memory); + } + + { + if (unlikely(!ptrs->st_pgc_tm)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_target_memory", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Target Cache Memory", name); + + ptrs->st_pgc_tm = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "bytes", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_pgc_tm_current = rrddim_add(ptrs->st_pgc_tm, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_wanted = rrddim_add(ptrs->st_pgc_tm, "wanted", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_referenced = rrddim_add(ptrs->st_pgc_tm, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_hot_max = rrddim_add(ptrs->st_pgc_tm, "hot max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_dirty_max = rrddim_add(ptrs->st_pgc_tm, "dirty max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_hot = rrddim_add(ptrs->st_pgc_tm, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_dirty = rrddim_add(ptrs->st_pgc_tm, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_current, (collected_number)pgc_stats->current_cache_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_wanted, (collected_number)pgc_stats->wanted_cache_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_referenced, (collected_number)pgc_stats->referenced_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_hot_max, (collected_number)pgc_stats->queues.hot.max_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_dirty_max, (collected_number)pgc_stats->queues.dirty.max_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_hot, (collected_number)pgc_stats->queues.hot.size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_dirty, (collected_number)pgc_stats->queues.dirty.size); + + rrdset_done(ptrs->st_pgc_tm); + } + + { + if (unlikely(!ptrs->st_pgc_pages)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_pages", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Pages", name); + + ptrs->st_pgc_pages = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "pages", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_pgc_pages_clean = rrddim_add(ptrs->st_pgc_pages, "clean", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_pages_hot = rrddim_add(ptrs->st_pgc_pages, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_pages_dirty = rrddim_add(ptrs->st_pgc_pages, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_pages_referenced = rrddim_add(ptrs->st_pgc_pages, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_clean, (collected_number)pgc_stats->queues.clean.entries); + rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_hot, (collected_number)pgc_stats->queues.hot.entries); + rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_dirty, (collected_number)pgc_stats->queues.dirty.entries); + rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_referenced, (collected_number)pgc_stats->referenced_entries); + + rrdset_done(ptrs->st_pgc_pages); + } + + { + if (unlikely(!ptrs->st_pgc_memory_changes)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_memory_changes", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Memory Changes", name); + + ptrs->st_pgc_memory_changes = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "bytes/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_AREA); + + ptrs->rd_pgc_memory_new_clean = rrddim_add(ptrs->st_pgc_memory_changes, "new clean", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_clean_evictions = rrddim_add(ptrs->st_pgc_memory_changes, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_new_hot = rrddim_add(ptrs->st_pgc_memory_changes, "new hot", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_memory_changes, ptrs->rd_pgc_memory_new_clean, (collected_number)(pgc_stats->added_size - pgc_stats->queues.hot.added_size)); + rrddim_set_by_pointer(ptrs->st_pgc_memory_changes, ptrs->rd_pgc_memory_clean_evictions, (collected_number)pgc_stats->queues.clean.removed_size); + rrddim_set_by_pointer(ptrs->st_pgc_memory_changes, ptrs->rd_pgc_memory_new_hot, (collected_number)pgc_stats->queues.hot.added_size); + + rrdset_done(ptrs->st_pgc_memory_changes); + } + + { + if (unlikely(!ptrs->st_pgc_memory_migrations)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_memory_migrations", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Memory Migrations", name); + + ptrs->st_pgc_memory_migrations = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "bytes/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_AREA); + + ptrs->rd_pgc_memory_dirty_to_clean = rrddim_add(ptrs->st_pgc_memory_migrations, "dirty to clean", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_hot_to_dirty = rrddim_add(ptrs->st_pgc_memory_migrations, "hot to dirty", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_memory_migrations, ptrs->rd_pgc_memory_dirty_to_clean, (collected_number)pgc_stats->queues.dirty.removed_size); + rrddim_set_by_pointer(ptrs->st_pgc_memory_migrations, ptrs->rd_pgc_memory_hot_to_dirty, (collected_number)pgc_stats->queues.dirty.added_size); + + rrdset_done(ptrs->st_pgc_memory_migrations); + } + + { + if (unlikely(!ptrs->st_pgc_memory_events)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_events", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Events", name); + + ptrs->st_pgc_memory_events = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "events/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_AREA); + + ptrs->rd_pgc_memory_evictions_aggressive = rrddim_add(ptrs->st_pgc_memory_events, "evictions aggressive", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_evictions_critical = rrddim_add(ptrs->st_pgc_memory_events, "evictions critical", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_flushes_critical = rrddim_add(ptrs->st_pgc_memory_events, "flushes critical", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_memory_events, ptrs->rd_pgc_memory_evictions_aggressive, (collected_number)pgc_stats->events_cache_needs_space_aggressively); + rrddim_set_by_pointer(ptrs->st_pgc_memory_events, ptrs->rd_pgc_memory_evictions_critical, (collected_number)pgc_stats->events_cache_under_severe_pressure); + rrddim_set_by_pointer(ptrs->st_pgc_memory_events, ptrs->rd_pgc_memory_flushes_critical, (collected_number)pgc_stats->events_flush_critical); + + rrdset_done(ptrs->st_pgc_memory_events); + } + + { + if (unlikely(!ptrs->st_pgc_waste)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_waste_events", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Waste Events", name); + + ptrs->st_pgc_waste = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "events/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_pgc_waste_evictions_skipped = rrddim_add(ptrs->st_pgc_waste, "evictions skipped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_flushes_cancelled = rrddim_add(ptrs->st_pgc_waste, "flushes cancelled", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_acquire_spins = rrddim_add(ptrs->st_pgc_waste, "acquire spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_release_spins = rrddim_add(ptrs->st_pgc_waste, "release spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_insert_spins = rrddim_add(ptrs->st_pgc_waste, "insert spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_delete_spins = rrddim_add(ptrs->st_pgc_waste, "delete spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_evict_spins = rrddim_add(ptrs->st_pgc_waste, "evict spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_flush_spins = rrddim_add(ptrs->st_pgc_waste, "flush spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_evictions_skipped, (collected_number)pgc_stats->evict_skipped); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_flushes_cancelled, (collected_number)pgc_stats->flushes_cancelled); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_acquire_spins, (collected_number)pgc_stats->acquire_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_release_spins, (collected_number)pgc_stats->release_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_insert_spins, (collected_number)pgc_stats->insert_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_delete_spins, (collected_number)pgc_stats->delete_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_evict_spins, (collected_number)pgc_stats->evict_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_flush_spins, (collected_number)pgc_stats->flush_spins); + + rrdset_done(ptrs->st_pgc_waste); + } + + { + if (unlikely(!ptrs->st_pgc_workers)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_workers", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Workers", name); + + ptrs->st_pgc_workers = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "workers", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_pgc_workers_searchers = rrddim_add(ptrs->st_pgc_workers, "searchers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_adders = rrddim_add(ptrs->st_pgc_workers, "adders", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_evictors = rrddim_add(ptrs->st_pgc_workers, "evictors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_flushers = rrddim_add(ptrs->st_pgc_workers, "flushers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_hot2dirty = rrddim_add(ptrs->st_pgc_workers, "hot2dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_jv2_flushers = rrddim_add(ptrs->st_pgc_workers, "jv2 flushers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_searchers, (collected_number)pgc_stats->workers_search); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_adders, (collected_number)pgc_stats->workers_add); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_evictors, (collected_number)pgc_stats->workers_evict); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_flushers, (collected_number)pgc_stats->workers_flush); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_hot2dirty, (collected_number)pgc_stats->workers_hot2dirty); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_jv2_flushers, (collected_number)pgc_stats->workers_jv2_flush); + + rrdset_done(ptrs->st_pgc_workers); + } +} + + +static void dbengine2_statistics_charts(void) { + if(!main_cache || !main_mrg) + return; + + static struct dbengine2_cache_pointers main_cache_ptrs = {}, open_cache_ptrs = {}, extent_cache_ptrs = {}; + static struct rrdeng_cache_efficiency_stats cache_efficiency_stats = {}, cache_efficiency_stats_old = {}; + static struct pgc_statistics pgc_main_stats = {}, pgc_main_stats_old = {}; (void)pgc_main_stats_old; + static struct pgc_statistics pgc_open_stats = {}, pgc_open_stats_old = {}; (void)pgc_open_stats_old; + static struct pgc_statistics pgc_extent_stats = {}, pgc_extent_stats_old = {}; (void)pgc_extent_stats_old; + static struct mrg_statistics mrg_stats = {}, mrg_stats_old = {}; (void)mrg_stats_old; + + pgc_main_stats_old = pgc_main_stats; + pgc_main_stats = pgc_get_statistics(main_cache); + dbengine2_cache_statistics_charts(&main_cache_ptrs, &pgc_main_stats, &pgc_main_stats_old, "main", 135100); + + pgc_open_stats_old = pgc_open_stats; + pgc_open_stats = pgc_get_statistics(open_cache); + dbengine2_cache_statistics_charts(&open_cache_ptrs, &pgc_open_stats, &pgc_open_stats_old, "open", 135200); + + pgc_extent_stats_old = pgc_extent_stats; + pgc_extent_stats = pgc_get_statistics(extent_cache); + dbengine2_cache_statistics_charts(&extent_cache_ptrs, &pgc_extent_stats, &pgc_extent_stats_old, "extent", 135300); + + cache_efficiency_stats_old = cache_efficiency_stats; + cache_efficiency_stats = rrdeng_get_cache_efficiency_stats(); + + mrg_stats_old = mrg_stats; + mrg_stats = mrg_get_statistics(main_mrg); + + struct rrdeng_buffer_sizes buffers = rrdeng_get_buffer_sizes(); + size_t buffers_total_size = buffers.handles + buffers.xt_buf + buffers.xt_io + buffers.pdc + buffers.descriptors + + buffers.opcodes + buffers.wal + buffers.workers + buffers.epdl + buffers.deol + buffers.pd + buffers.pgc + buffers.mrg; + +#ifdef PDC_USE_JULYL + buffers_total_size += buffers.julyl; +#endif + + dbengine_total_memory = pgc_main_stats.size + pgc_open_stats.size + pgc_extent_stats.size + mrg_stats.size + buffers_total_size; + + size_t priority = 135000; + + { + static RRDSET *st_pgc_memory = NULL; + static RRDDIM *rd_pgc_memory_main = NULL; + static RRDDIM *rd_pgc_memory_open = NULL; // open journal memory + static RRDDIM *rd_pgc_memory_extent = NULL; // extent compresses cache memory + static RRDDIM *rd_pgc_memory_metrics = NULL; // metric registry memory + static RRDDIM *rd_pgc_memory_buffers = NULL; + + if (unlikely(!st_pgc_memory)) { + st_pgc_memory = rrdset_create_localhost( + "netdata", + "dbengine_memory", + NULL, + "dbengine memory", + NULL, + "Netdata DB Memory", + "bytes", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pgc_memory_main = rrddim_add(st_pgc_memory, "main cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_open = rrddim_add(st_pgc_memory, "open cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_extent = rrddim_add(st_pgc_memory, "extent cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_metrics = rrddim_add(st_pgc_memory, "metrics registry", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_buffers = rrddim_add(st_pgc_memory, "buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_main, (collected_number)pgc_main_stats.size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_open, (collected_number)pgc_open_stats.size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_extent, (collected_number)pgc_extent_stats.size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_metrics, (collected_number)mrg_stats.size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_buffers, (collected_number)buffers_total_size); + + rrdset_done(st_pgc_memory); + } + + { + static RRDSET *st_pgc_buffers = NULL; + static RRDDIM *rd_pgc_buffers_pgc = NULL; + static RRDDIM *rd_pgc_buffers_mrg = NULL; + static RRDDIM *rd_pgc_buffers_opcodes = NULL; + static RRDDIM *rd_pgc_buffers_handles = NULL; + static RRDDIM *rd_pgc_buffers_descriptors = NULL; + static RRDDIM *rd_pgc_buffers_wal = NULL; + static RRDDIM *rd_pgc_buffers_workers = NULL; + static RRDDIM *rd_pgc_buffers_pdc = NULL; + static RRDDIM *rd_pgc_buffers_xt_io = NULL; + static RRDDIM *rd_pgc_buffers_xt_buf = NULL; + static RRDDIM *rd_pgc_buffers_epdl = NULL; + static RRDDIM *rd_pgc_buffers_deol = NULL; + static RRDDIM *rd_pgc_buffers_pd = NULL; +#ifdef PDC_USE_JULYL + static RRDDIM *rd_pgc_buffers_julyl = NULL; +#endif + + if (unlikely(!st_pgc_buffers)) { + st_pgc_buffers = rrdset_create_localhost( + "netdata", + "dbengine_buffers", + NULL, + "dbengine memory", + NULL, + "Netdata DB Buffers", + "bytes", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pgc_buffers_pgc = rrddim_add(st_pgc_buffers, "pgc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_mrg = rrddim_add(st_pgc_buffers, "mrg", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_opcodes = rrddim_add(st_pgc_buffers, "opcodes", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_handles = rrddim_add(st_pgc_buffers, "query handles", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_descriptors = rrddim_add(st_pgc_buffers, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_wal = rrddim_add(st_pgc_buffers, "wal", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_workers = rrddim_add(st_pgc_buffers, "workers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_pdc = rrddim_add(st_pgc_buffers, "pdc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_pd = rrddim_add(st_pgc_buffers, "pd", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_xt_io = rrddim_add(st_pgc_buffers, "extent io", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_xt_buf = rrddim_add(st_pgc_buffers, "extent buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_epdl = rrddim_add(st_pgc_buffers, "epdl", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_deol = rrddim_add(st_pgc_buffers, "deol", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); +#ifdef PDC_USE_JULYL + rd_pgc_buffers_julyl = rrddim_add(st_pgc_buffers, "julyl", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); +#endif + } + priority++; + + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pgc, (collected_number)buffers.pgc); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_mrg, (collected_number)buffers.mrg); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_opcodes, (collected_number)buffers.opcodes); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_handles, (collected_number)buffers.handles); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_descriptors, (collected_number)buffers.descriptors); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_wal, (collected_number)buffers.wal); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_workers, (collected_number)buffers.workers); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pdc, (collected_number)buffers.pdc); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pd, (collected_number)buffers.pd); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_io, (collected_number)buffers.xt_io); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_buf, (collected_number)buffers.xt_buf); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_epdl, (collected_number)buffers.epdl); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_deol, (collected_number)buffers.deol); +#ifdef PDC_USE_JULYL + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_julyl, (collected_number)buffers.julyl); +#endif + + rrdset_done(st_pgc_buffers); + } + +#ifdef PDC_USE_JULYL + { + static RRDSET *st_julyl_moved = NULL; + static RRDDIM *rd_julyl_moved = NULL; + + if (unlikely(!st_julyl_moved)) { + st_julyl_moved = rrdset_create_localhost( + "netdata", + "dbengine_julyl_moved", + NULL, + "dbengine memory", + NULL, + "Netdata JulyL Memory Moved", + "bytes/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_AREA); + + rd_julyl_moved = rrddim_add(st_julyl_moved, "moved", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_julyl_moved, rd_julyl_moved, (collected_number)julyl_bytes_moved()); + + rrdset_done(st_julyl_moved); + } +#endif + + { + static RRDSET *st_mrg_metrics = NULL; + static RRDDIM *rd_mrg_metrics = NULL; + static RRDDIM *rd_mrg_acquired = NULL; + static RRDDIM *rd_mrg_collected = NULL; + static RRDDIM *rd_mrg_with_retention = NULL; + static RRDDIM *rd_mrg_without_retention = NULL; + static RRDDIM *rd_mrg_multiple_writers = NULL; + + if (unlikely(!st_mrg_metrics)) { + st_mrg_metrics = rrdset_create_localhost( + "netdata", + "dbengine_metrics", + NULL, + "dbengine metrics", + NULL, + "Netdata Metrics in Metrics Registry", + "metrics", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_mrg_metrics = rrddim_add(st_mrg_metrics, "all", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_mrg_acquired = rrddim_add(st_mrg_metrics, "acquired", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_mrg_collected = rrddim_add(st_mrg_metrics, "collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_mrg_with_retention = rrddim_add(st_mrg_metrics, "with retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_mrg_without_retention = rrddim_add(st_mrg_metrics, "without retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_mrg_multiple_writers = rrddim_add(st_mrg_metrics, "multi-collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_metrics, (collected_number)mrg_stats.entries); + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_acquired, (collected_number)mrg_stats.entries_referenced); + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_collected, (collected_number)mrg_stats.writers); + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_with_retention, (collected_number)mrg_stats.entries_with_retention); + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_without_retention, (collected_number)mrg_stats.entries - (collected_number)mrg_stats.entries_with_retention); + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_multiple_writers, (collected_number)mrg_stats.writers_conflicts); + + rrdset_done(st_mrg_metrics); + } + + { + static RRDSET *st_mrg_ops = NULL; + static RRDDIM *rd_mrg_add = NULL; + static RRDDIM *rd_mrg_del = NULL; + static RRDDIM *rd_mrg_search = NULL; + + if (unlikely(!st_mrg_ops)) { + st_mrg_ops = rrdset_create_localhost( + "netdata", + "dbengine_metrics_registry_operations", + NULL, + "dbengine metrics", + NULL, + "Netdata Metrics Registry Operations", + "metrics", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_mrg_add = rrddim_add(st_mrg_ops, "add", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mrg_del = rrddim_add(st_mrg_ops, "delete", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mrg_search = rrddim_add(st_mrg_ops, "search", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_mrg_ops, rd_mrg_add, (collected_number)mrg_stats.additions); + rrddim_set_by_pointer(st_mrg_ops, rd_mrg_del, (collected_number)mrg_stats.deletions); + rrddim_set_by_pointer(st_mrg_ops, rd_mrg_search, (collected_number)mrg_stats.search_hits + (collected_number)mrg_stats.search_misses); + + rrdset_done(st_mrg_ops); + } + + { + static RRDSET *st_mrg_references = NULL; + static RRDDIM *rd_mrg_references = NULL; + + if (unlikely(!st_mrg_references)) { + st_mrg_references = rrdset_create_localhost( + "netdata", + "dbengine_metrics_registry_references", + NULL, + "dbengine metrics", + NULL, + "Netdata Metrics Registry References", + "references", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_mrg_references = rrddim_add(st_mrg_references, "references", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + rrddim_set_by_pointer(st_mrg_references, rd_mrg_references, (collected_number)mrg_stats.current_references); + + rrdset_done(st_mrg_references); + } + + { + static RRDSET *st_cache_hit_ratio = NULL; + static RRDDIM *rd_hit_ratio = NULL; + static RRDDIM *rd_main_cache_hit_ratio = NULL; + static RRDDIM *rd_extent_cache_hit_ratio = NULL; + static RRDDIM *rd_extent_merge_hit_ratio = NULL; + + if (unlikely(!st_cache_hit_ratio)) { + st_cache_hit_ratio = rrdset_create_localhost( + "netdata", + "dbengine_cache_hit_ratio", + NULL, + "dbengine query router", + NULL, + "Netdata Queries Cache Hit Ratio", + "%", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_hit_ratio = rrddim_add(st_cache_hit_ratio, "overall", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + rd_main_cache_hit_ratio = rrddim_add(st_cache_hit_ratio, "main cache", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + rd_extent_cache_hit_ratio = rrddim_add(st_cache_hit_ratio, "extent cache", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + rd_extent_merge_hit_ratio = rrddim_add(st_cache_hit_ratio, "extent merge", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + size_t delta_pages_total = cache_efficiency_stats.pages_total - cache_efficiency_stats_old.pages_total; + size_t delta_pages_to_load_from_disk = cache_efficiency_stats.pages_to_load_from_disk - cache_efficiency_stats_old.pages_to_load_from_disk; + size_t delta_extents_loaded_from_disk = cache_efficiency_stats.extents_loaded_from_disk - cache_efficiency_stats_old.extents_loaded_from_disk; + + size_t delta_pages_data_source_main_cache = cache_efficiency_stats.pages_data_source_main_cache - cache_efficiency_stats_old.pages_data_source_main_cache; + size_t delta_pages_pending_found_in_cache_at_pass4 = cache_efficiency_stats.pages_data_source_main_cache_at_pass4 - cache_efficiency_stats_old.pages_data_source_main_cache_at_pass4; + + size_t delta_pages_data_source_extent_cache = cache_efficiency_stats.pages_data_source_extent_cache - cache_efficiency_stats_old.pages_data_source_extent_cache; + size_t delta_pages_load_extent_merged = cache_efficiency_stats.pages_load_extent_merged - cache_efficiency_stats_old.pages_load_extent_merged; + + size_t pages_total_hit = delta_pages_total - delta_extents_loaded_from_disk; + + static size_t overall_hit_ratio = 100; + size_t main_cache_hit_ratio = 0, extent_cache_hit_ratio = 0, extent_merge_hit_ratio = 0; + if(delta_pages_total) { + if(pages_total_hit > delta_pages_total) + pages_total_hit = delta_pages_total; + + overall_hit_ratio = pages_total_hit * 100 * 10000 / delta_pages_total; + + size_t delta_pages_main_cache = delta_pages_data_source_main_cache + delta_pages_pending_found_in_cache_at_pass4; + if(delta_pages_main_cache > delta_pages_total) + delta_pages_main_cache = delta_pages_total; + + main_cache_hit_ratio = delta_pages_main_cache * 100 * 10000 / delta_pages_total; + } + + if(delta_pages_to_load_from_disk) { + if(delta_pages_data_source_extent_cache > delta_pages_to_load_from_disk) + delta_pages_data_source_extent_cache = delta_pages_to_load_from_disk; + + extent_cache_hit_ratio = delta_pages_data_source_extent_cache * 100 * 10000 / delta_pages_to_load_from_disk; + + if(delta_pages_load_extent_merged > delta_pages_to_load_from_disk) + delta_pages_load_extent_merged = delta_pages_to_load_from_disk; + + extent_merge_hit_ratio = delta_pages_load_extent_merged * 100 * 10000 / delta_pages_to_load_from_disk; + } + + rrddim_set_by_pointer(st_cache_hit_ratio, rd_hit_ratio, (collected_number)overall_hit_ratio); + rrddim_set_by_pointer(st_cache_hit_ratio, rd_main_cache_hit_ratio, (collected_number)main_cache_hit_ratio); + rrddim_set_by_pointer(st_cache_hit_ratio, rd_extent_cache_hit_ratio, (collected_number)extent_cache_hit_ratio); + rrddim_set_by_pointer(st_cache_hit_ratio, rd_extent_merge_hit_ratio, (collected_number)extent_merge_hit_ratio); + + rrdset_done(st_cache_hit_ratio); + } + + { + static RRDSET *st_queries = NULL; + static RRDDIM *rd_total = NULL; + static RRDDIM *rd_open = NULL; + static RRDDIM *rd_jv2 = NULL; + static RRDDIM *rd_planned_with_gaps = NULL; + static RRDDIM *rd_executed_with_gaps = NULL; + + if (unlikely(!st_queries)) { + st_queries = rrdset_create_localhost( + "netdata", + "dbengine_queries", + NULL, + "dbengine query router", + NULL, + "Netdata Queries", + "queries/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_total = rrddim_add(st_queries, "total", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_open = rrddim_add(st_queries, "open cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_jv2 = rrddim_add(st_queries, "journal v2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_planned_with_gaps = rrddim_add(st_queries, "planned with gaps", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_executed_with_gaps = rrddim_add(st_queries, "executed with gaps", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_queries, rd_total, (collected_number)cache_efficiency_stats.queries); + rrddim_set_by_pointer(st_queries, rd_open, (collected_number)cache_efficiency_stats.queries_open); + rrddim_set_by_pointer(st_queries, rd_jv2, (collected_number)cache_efficiency_stats.queries_journal_v2); + rrddim_set_by_pointer(st_queries, rd_planned_with_gaps, (collected_number)cache_efficiency_stats.queries_planned_with_gaps); + rrddim_set_by_pointer(st_queries, rd_executed_with_gaps, (collected_number)cache_efficiency_stats.queries_executed_with_gaps); + + rrdset_done(st_queries); + } + + { + static RRDSET *st_queries_running = NULL; + static RRDDIM *rd_queries = NULL; + + if (unlikely(!st_queries_running)) { + st_queries_running = rrdset_create_localhost( + "netdata", + "dbengine_queries_running", + NULL, + "dbengine query router", + NULL, + "Netdata Queries Running", + "queries", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_queries = rrddim_add(st_queries_running, "queries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + rrddim_set_by_pointer(st_queries_running, rd_queries, (collected_number)cache_efficiency_stats.currently_running_queries); + + rrdset_done(st_queries_running); + } + + { + static RRDSET *st_query_pages_metadata_source = NULL; + static RRDDIM *rd_cache = NULL; + static RRDDIM *rd_open = NULL; + static RRDDIM *rd_jv2 = NULL; + + if (unlikely(!st_query_pages_metadata_source)) { + st_query_pages_metadata_source = rrdset_create_localhost( + "netdata", + "dbengine_query_pages_metadata_source", + NULL, + "dbengine query router", + NULL, + "Netdata Query Pages Metadata Source", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_cache = rrddim_add(st_query_pages_metadata_source, "cache hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_jv2 = rrddim_add(st_query_pages_metadata_source, "journal v2 scan", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_open = rrddim_add(st_query_pages_metadata_source, "open journal", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_pages_metadata_source, rd_cache, (collected_number)cache_efficiency_stats.pages_meta_source_main_cache); + rrddim_set_by_pointer(st_query_pages_metadata_source, rd_jv2, (collected_number)cache_efficiency_stats.pages_meta_source_journal_v2); + rrddim_set_by_pointer(st_query_pages_metadata_source, rd_open, (collected_number)cache_efficiency_stats.pages_meta_source_open_cache); + + rrdset_done(st_query_pages_metadata_source); + } + + { + static RRDSET *st_query_pages_data_source = NULL; + static RRDDIM *rd_pages_main_cache = NULL; + static RRDDIM *rd_pages_disk = NULL; + static RRDDIM *rd_pages_extent_cache = NULL; + + if (unlikely(!st_query_pages_data_source)) { + st_query_pages_data_source = rrdset_create_localhost( + "netdata", + "dbengine_query_pages_data_source", + NULL, + "dbengine query router", + NULL, + "Netdata Query Pages to Data Source", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pages_main_cache = rrddim_add(st_query_pages_data_source, "main cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_disk = rrddim_add(st_query_pages_data_source, "disk", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_extent_cache = rrddim_add(st_query_pages_data_source, "extent cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_pages_data_source, rd_pages_main_cache, (collected_number)cache_efficiency_stats.pages_data_source_main_cache + (collected_number)cache_efficiency_stats.pages_data_source_main_cache_at_pass4); + rrddim_set_by_pointer(st_query_pages_data_source, rd_pages_disk, (collected_number)cache_efficiency_stats.pages_to_load_from_disk); + rrddim_set_by_pointer(st_query_pages_data_source, rd_pages_extent_cache, (collected_number)cache_efficiency_stats.pages_data_source_extent_cache); + + rrdset_done(st_query_pages_data_source); + } + + { + static RRDSET *st_query_next_page = NULL; + static RRDDIM *rd_pass4 = NULL; + static RRDDIM *rd_nowait_failed = NULL; + static RRDDIM *rd_wait_failed = NULL; + static RRDDIM *rd_wait_loaded = NULL; + static RRDDIM *rd_nowait_loaded = NULL; + + if (unlikely(!st_query_next_page)) { + st_query_next_page = rrdset_create_localhost( + "netdata", + "dbengine_query_next_page", + NULL, + "dbengine query router", + NULL, + "Netdata Query Next Page", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pass4 = rrddim_add(st_query_next_page, "pass4", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_wait_failed = rrddim_add(st_query_next_page, "failed slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_nowait_failed = rrddim_add(st_query_next_page, "failed fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_wait_loaded = rrddim_add(st_query_next_page, "loaded slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_nowait_loaded = rrddim_add(st_query_next_page, "loaded fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_next_page, rd_pass4, (collected_number)cache_efficiency_stats.pages_data_source_main_cache_at_pass4); + rrddim_set_by_pointer(st_query_next_page, rd_wait_failed, (collected_number)cache_efficiency_stats.page_next_wait_failed); + rrddim_set_by_pointer(st_query_next_page, rd_nowait_failed, (collected_number)cache_efficiency_stats.page_next_nowait_failed); + rrddim_set_by_pointer(st_query_next_page, rd_wait_loaded, (collected_number)cache_efficiency_stats.page_next_wait_loaded); + rrddim_set_by_pointer(st_query_next_page, rd_nowait_loaded, (collected_number)cache_efficiency_stats.page_next_nowait_loaded); + + rrdset_done(st_query_next_page); + } + + { + static RRDSET *st_query_page_issues = NULL; + static RRDDIM *rd_pages_zero_time = NULL; + static RRDDIM *rd_pages_past_time = NULL; + static RRDDIM *rd_pages_invalid_size = NULL; + static RRDDIM *rd_pages_fixed_update_every = NULL; + static RRDDIM *rd_pages_fixed_entries = NULL; + static RRDDIM *rd_pages_overlapping = NULL; + + if (unlikely(!st_query_page_issues)) { + st_query_page_issues = rrdset_create_localhost( + "netdata", + "dbengine_query_next_page_issues", + NULL, + "dbengine query router", + NULL, + "Netdata Query Next Page Issues", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pages_zero_time = rrddim_add(st_query_page_issues, "zero timestamp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_invalid_size = rrddim_add(st_query_page_issues, "invalid size", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_past_time = rrddim_add(st_query_page_issues, "past time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_overlapping = rrddim_add(st_query_page_issues, "overlapping", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_fixed_update_every = rrddim_add(st_query_page_issues, "update every fixed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_fixed_entries = rrddim_add(st_query_page_issues, "entries fixed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_page_issues, rd_pages_zero_time, (collected_number)cache_efficiency_stats.pages_zero_time_skipped); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_invalid_size, (collected_number)cache_efficiency_stats.pages_invalid_size_skipped); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_past_time, (collected_number)cache_efficiency_stats.pages_past_time_skipped); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_overlapping, (collected_number)cache_efficiency_stats.pages_overlapping_skipped); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_fixed_update_every, (collected_number)cache_efficiency_stats.pages_invalid_update_every_fixed); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_fixed_entries, (collected_number)cache_efficiency_stats.pages_invalid_entries_fixed); + + rrdset_done(st_query_page_issues); + } + + { + static RRDSET *st_query_pages_from_disk = NULL; + static RRDDIM *rd_compressed = NULL; + static RRDDIM *rd_invalid = NULL; + static RRDDIM *rd_uncompressed = NULL; + static RRDDIM *rd_mmap_failed = NULL; + static RRDDIM *rd_unavailable = NULL; + static RRDDIM *rd_unroutable = NULL; + static RRDDIM *rd_not_found = NULL; + static RRDDIM *rd_cancelled = NULL; + static RRDDIM *rd_invalid_extent = NULL; + static RRDDIM *rd_extent_merged = NULL; + + if (unlikely(!st_query_pages_from_disk)) { + st_query_pages_from_disk = rrdset_create_localhost( + "netdata", + "dbengine_query_pages_disk_load", + NULL, + "dbengine query router", + NULL, + "Netdata Query Pages Loaded from Disk", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_compressed = rrddim_add(st_query_pages_from_disk, "ok compressed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_invalid = rrddim_add(st_query_pages_from_disk, "fail invalid page", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_uncompressed = rrddim_add(st_query_pages_from_disk, "ok uncompressed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mmap_failed = rrddim_add(st_query_pages_from_disk, "fail cant mmap", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_unavailable = rrddim_add(st_query_pages_from_disk, "fail unavailable", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_unroutable = rrddim_add(st_query_pages_from_disk, "fail unroutable", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_not_found = rrddim_add(st_query_pages_from_disk, "fail not found", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_invalid_extent = rrddim_add(st_query_pages_from_disk, "fail invalid extent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_extent_merged = rrddim_add(st_query_pages_from_disk, "extent merged", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cancelled = rrddim_add(st_query_pages_from_disk, "cancelled", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_pages_from_disk, rd_compressed, (collected_number)cache_efficiency_stats.pages_load_ok_compressed); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_invalid, (collected_number)cache_efficiency_stats.pages_load_fail_invalid_page_in_extent); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_uncompressed, (collected_number)cache_efficiency_stats.pages_load_ok_uncompressed); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_mmap_failed, (collected_number)cache_efficiency_stats.pages_load_fail_cant_mmap_extent); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_unavailable, (collected_number)cache_efficiency_stats.pages_load_fail_datafile_not_available); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_unroutable, (collected_number)cache_efficiency_stats.pages_load_fail_unroutable); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_not_found, (collected_number)cache_efficiency_stats.pages_load_fail_not_found); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_cancelled, (collected_number)cache_efficiency_stats.pages_load_fail_cancelled); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_invalid_extent, (collected_number)cache_efficiency_stats.pages_load_fail_invalid_extent); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_extent_merged, (collected_number)cache_efficiency_stats.pages_load_extent_merged); + + rrdset_done(st_query_pages_from_disk); + } + + { + static RRDSET *st_events = NULL; + static RRDDIM *rd_journal_v2_mapped = NULL; + static RRDDIM *rd_journal_v2_unmapped = NULL; + static RRDDIM *rd_datafile_creation = NULL; + static RRDDIM *rd_datafile_deletion = NULL; + static RRDDIM *rd_datafile_deletion_spin = NULL; + static RRDDIM *rd_jv2_indexing = NULL; + static RRDDIM *rd_retention = NULL; + + if (unlikely(!st_events)) { + st_events = rrdset_create_localhost( + "netdata", + "dbengine_events", + NULL, + "dbengine query router", + NULL, + "Netdata Database Events", + "events/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_journal_v2_mapped = rrddim_add(st_events, "journal v2 mapped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_journal_v2_unmapped = rrddim_add(st_events, "journal v2 unmapped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_datafile_creation = rrddim_add(st_events, "datafile creation", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_datafile_deletion = rrddim_add(st_events, "datafile deletion", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_datafile_deletion_spin = rrddim_add(st_events, "datafile deletion spin", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_jv2_indexing = rrddim_add(st_events, "journal v2 indexing", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_retention = rrddim_add(st_events, "retention", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_events, rd_journal_v2_mapped, (collected_number)cache_efficiency_stats.journal_v2_mapped); + rrddim_set_by_pointer(st_events, rd_journal_v2_unmapped, (collected_number)cache_efficiency_stats.journal_v2_unmapped); + rrddim_set_by_pointer(st_events, rd_datafile_creation, (collected_number)cache_efficiency_stats.datafile_creation_started); + rrddim_set_by_pointer(st_events, rd_datafile_deletion, (collected_number)cache_efficiency_stats.datafile_deletion_started); + rrddim_set_by_pointer(st_events, rd_datafile_deletion_spin, (collected_number)cache_efficiency_stats.datafile_deletion_spin); + rrddim_set_by_pointer(st_events, rd_jv2_indexing, (collected_number)cache_efficiency_stats.journal_v2_indexing_started); + rrddim_set_by_pointer(st_events, rd_retention, (collected_number)cache_efficiency_stats.metrics_retention_started); + + rrdset_done(st_events); + } + + { + static RRDSET *st_prep_timings = NULL; + static RRDDIM *rd_routing = NULL; + static RRDDIM *rd_main_cache = NULL; + static RRDDIM *rd_open_cache = NULL; + static RRDDIM *rd_journal_v2 = NULL; + static RRDDIM *rd_pass4 = NULL; + + if (unlikely(!st_prep_timings)) { + st_prep_timings = rrdset_create_localhost( + "netdata", + "dbengine_prep_timings", + NULL, + "dbengine query router", + NULL, + "Netdata Query Preparation Timings", + "usec/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_routing = rrddim_add(st_prep_timings, "routing", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_main_cache = rrddim_add(st_prep_timings, "main cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_open_cache = rrddim_add(st_prep_timings, "open cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_journal_v2 = rrddim_add(st_prep_timings, "journal v2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pass4 = rrddim_add(st_prep_timings, "pass4", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_prep_timings, rd_routing, (collected_number)cache_efficiency_stats.prep_time_to_route); + rrddim_set_by_pointer(st_prep_timings, rd_main_cache, (collected_number)cache_efficiency_stats.prep_time_in_main_cache_lookup); + rrddim_set_by_pointer(st_prep_timings, rd_open_cache, (collected_number)cache_efficiency_stats.prep_time_in_open_cache_lookup); + rrddim_set_by_pointer(st_prep_timings, rd_journal_v2, (collected_number)cache_efficiency_stats.prep_time_in_journal_v2_lookup); + rrddim_set_by_pointer(st_prep_timings, rd_pass4, (collected_number)cache_efficiency_stats.prep_time_in_pass4_lookup); + + rrdset_done(st_prep_timings); + } + + { + static RRDSET *st_query_timings = NULL; + static RRDDIM *rd_init = NULL; + static RRDDIM *rd_prep_wait = NULL; + static RRDDIM *rd_next_page_disk_fast = NULL; + static RRDDIM *rd_next_page_disk_slow = NULL; + static RRDDIM *rd_next_page_preload_fast = NULL; + static RRDDIM *rd_next_page_preload_slow = NULL; + + if (unlikely(!st_query_timings)) { + st_query_timings = rrdset_create_localhost( + "netdata", + "dbengine_query_timings", + NULL, + "dbengine query router", + NULL, + "Netdata Query Timings", + "usec/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_init = rrddim_add(st_query_timings, "init", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_prep_wait = rrddim_add(st_query_timings, "prep wait", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_next_page_disk_fast = rrddim_add(st_query_timings, "next page disk fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_next_page_disk_slow = rrddim_add(st_query_timings, "next page disk slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_next_page_preload_fast = rrddim_add(st_query_timings, "next page preload fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_next_page_preload_slow = rrddim_add(st_query_timings, "next page preload slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_timings, rd_init, (collected_number)cache_efficiency_stats.query_time_init); + rrddim_set_by_pointer(st_query_timings, rd_prep_wait, (collected_number)cache_efficiency_stats.query_time_wait_for_prep); + rrddim_set_by_pointer(st_query_timings, rd_next_page_disk_fast, (collected_number)cache_efficiency_stats.query_time_to_fast_disk_next_page); + rrddim_set_by_pointer(st_query_timings, rd_next_page_disk_slow, (collected_number)cache_efficiency_stats.query_time_to_slow_disk_next_page); + rrddim_set_by_pointer(st_query_timings, rd_next_page_preload_fast, (collected_number)cache_efficiency_stats.query_time_to_fast_preload_next_page); + rrddim_set_by_pointer(st_query_timings, rd_next_page_preload_slow, (collected_number)cache_efficiency_stats.query_time_to_slow_preload_next_page); + + rrdset_done(st_query_timings); + } + if(netdata_rwlock_tryrdlock(&rrd_rwlock) == 0) { + priority = 135400; + RRDHOST *host; unsigned long long stats_array[RRDENG_NR_STATS] = {0}; unsigned long long local_stats_array[RRDENG_NR_STATS]; @@ -1012,21 +2536,22 @@ static void dbengine_statistics_charts(void) { if (unlikely(!st_compression)) { st_compression = rrdset_create_localhost( - "netdata", - "dbengine_compression_ratio", - NULL, - "dbengine", - NULL, - "Netdata DB engine data extents' compression savings ratio", - "percentage", - "netdata", - "stats", - 132000, - localhost->rrd_update_every, - RRDSET_TYPE_LINE); + "netdata", + "dbengine_compression_ratio", + NULL, + "dbengine io", + NULL, + "Netdata DB engine data extents' compression savings ratio", + "percentage", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); } + priority++; unsigned long long ratio; unsigned long long compressed_content_size = stats_array[12]; @@ -1045,136 +2570,6 @@ static void dbengine_statistics_charts(void) { // ---------------------------------------------------------------- - { - static RRDSET *st_pg_cache_hit_ratio = NULL; - static RRDDIM *rd_hit_ratio = NULL; - - if (unlikely(!st_pg_cache_hit_ratio)) { - st_pg_cache_hit_ratio = rrdset_create_localhost( - "netdata", - "page_cache_hit_ratio", - NULL, - "dbengine", - NULL, - "Netdata DB engine page cache hit ratio", - "percentage", - "netdata", - "stats", - 132003, - localhost->rrd_update_every, - RRDSET_TYPE_LINE); - - rd_hit_ratio = rrddim_add(st_pg_cache_hit_ratio, "ratio", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); - } - - static unsigned long long old_hits = 0; - static unsigned long long old_misses = 0; - unsigned long long hits = stats_array[7]; - unsigned long long misses = stats_array[8]; - unsigned long long hits_delta; - unsigned long long misses_delta; - unsigned long long ratio; - - hits_delta = hits - old_hits; - misses_delta = misses - old_misses; - old_hits = hits; - old_misses = misses; - - if (hits_delta + misses_delta) { - ratio = (hits_delta * 100 * 1000) / (hits_delta + misses_delta); - } else { - ratio = 0; - } - rrddim_set_by_pointer(st_pg_cache_hit_ratio, rd_hit_ratio, ratio); - - rrdset_done(st_pg_cache_hit_ratio); - } - - // ---------------------------------------------------------------- - - { - static RRDSET *st_pg_cache_pages = NULL; - static RRDDIM *rd_descriptors = NULL; - static RRDDIM *rd_populated = NULL; - static RRDDIM *rd_dirty = NULL; - static RRDDIM *rd_backfills = NULL; - static RRDDIM *rd_evictions = NULL; - static RRDDIM *rd_used_by_collectors = NULL; - - if (unlikely(!st_pg_cache_pages)) { - st_pg_cache_pages = rrdset_create_localhost( - "netdata", - "page_cache_stats", - NULL, - "dbengine", - NULL, - "Netdata dbengine page cache statistics", - "pages", - "netdata", - "stats", - 132004, - localhost->rrd_update_every, - RRDSET_TYPE_LINE); - - rd_descriptors = rrddim_add(st_pg_cache_pages, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_populated = rrddim_add(st_pg_cache_pages, "populated", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_dirty = rrddim_add(st_pg_cache_pages, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_backfills = rrddim_add(st_pg_cache_pages, "backfills", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_evictions = rrddim_add(st_pg_cache_pages, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_used_by_collectors = - rrddim_add(st_pg_cache_pages, "used_by_collectors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - } - - rrddim_set_by_pointer(st_pg_cache_pages, rd_descriptors, (collected_number)stats_array[27]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_populated, (collected_number)stats_array[3]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_dirty, (collected_number)stats_array[0] + stats_array[4]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_backfills, (collected_number)stats_array[9]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_evictions, (collected_number)stats_array[10]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_used_by_collectors, (collected_number)stats_array[0]); - rrdset_done(st_pg_cache_pages); - } - - // ---------------------------------------------------------------- - - { - static RRDSET *st_long_term_pages = NULL; - static RRDDIM *rd_total = NULL; - static RRDDIM *rd_insertions = NULL; - static RRDDIM *rd_deletions = NULL; - static RRDDIM *rd_flushing_pressure_deletions = NULL; - - if (unlikely(!st_long_term_pages)) { - st_long_term_pages = rrdset_create_localhost( - "netdata", - "dbengine_long_term_page_stats", - NULL, - "dbengine", - NULL, - "Netdata dbengine long-term page statistics", - "pages", - "netdata", - "stats", - 132005, - localhost->rrd_update_every, - RRDSET_TYPE_LINE); - - rd_total = rrddim_add(st_long_term_pages, "total", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_insertions = rrddim_add(st_long_term_pages, "insertions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_deletions = rrddim_add(st_long_term_pages, "deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_flushing_pressure_deletions = rrddim_add( - st_long_term_pages, "flushing_pressure_deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - - rrddim_set_by_pointer(st_long_term_pages, rd_total, (collected_number)stats_array[2]); - rrddim_set_by_pointer(st_long_term_pages, rd_insertions, (collected_number)stats_array[5]); - rrddim_set_by_pointer(st_long_term_pages, rd_deletions, (collected_number)stats_array[6]); - rrddim_set_by_pointer( - st_long_term_pages, rd_flushing_pressure_deletions, (collected_number)stats_array[36]); - rrdset_done(st_long_term_pages); - } - - // ---------------------------------------------------------------- - { static RRDSET *st_io_stats = NULL; static RRDDIM *rd_reads = NULL; @@ -1182,22 +2577,23 @@ static void dbengine_statistics_charts(void) { if (unlikely(!st_io_stats)) { st_io_stats = rrdset_create_localhost( - "netdata", - "dbengine_io_throughput", - NULL, - "dbengine", - NULL, - "Netdata DB engine I/O throughput", - "MiB/s", - "netdata", - "stats", - 132006, - localhost->rrd_update_every, - RRDSET_TYPE_LINE); + "netdata", + "dbengine_io_throughput", + NULL, + "dbengine io", + NULL, + "Netdata DB engine I/O throughput", + "MiB/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); } + priority++; rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[17]); rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[15]); @@ -1213,22 +2609,23 @@ static void dbengine_statistics_charts(void) { if (unlikely(!st_io_stats)) { st_io_stats = rrdset_create_localhost( - "netdata", - "dbengine_io_operations", - NULL, - "dbengine", - NULL, - "Netdata DB engine I/O operations", - "operations/s", - "netdata", - "stats", - 132007, - localhost->rrd_update_every, - RRDSET_TYPE_LINE); + "netdata", + "dbengine_io_operations", + NULL, + "dbengine io", + NULL, + "Netdata DB engine I/O operations", + "operations/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); } + priority++; rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[18]); rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[16]); @@ -1245,24 +2642,25 @@ static void dbengine_statistics_charts(void) { if (unlikely(!st_errors)) { st_errors = rrdset_create_localhost( - "netdata", - "dbengine_global_errors", - NULL, - "dbengine", - NULL, - "Netdata DB engine errors", - "errors/s", - "netdata", - "stats", - 132008, - localhost->rrd_update_every, - RRDSET_TYPE_LINE); + "netdata", + "dbengine_global_errors", + NULL, + "dbengine io", + NULL, + "Netdata DB engine errors", + "errors/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); rd_io_errors = rrddim_add(st_errors, "io_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); rd_fs_errors = rrddim_add(st_errors, "fs_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); pg_cache_over_half_dirty_events = - rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); } + priority++; rrddim_set_by_pointer(st_errors, rd_io_errors, (collected_number)stats_array[30]); rrddim_set_by_pointer(st_errors, rd_fs_errors, (collected_number)stats_array[31]); @@ -1279,87 +2677,33 @@ static void dbengine_statistics_charts(void) { if (unlikely(!st_fd)) { st_fd = rrdset_create_localhost( - "netdata", - "dbengine_global_file_descriptors", - NULL, - "dbengine", - NULL, - "Netdata DB engine File Descriptors", - "descriptors", - "netdata", - "stats", - 132009, - localhost->rrd_update_every, - RRDSET_TYPE_LINE); + "netdata", + "dbengine_global_file_descriptors", + NULL, + "dbengine io", + NULL, + "Netdata DB engine File Descriptors", + "descriptors", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); rd_fd_current = rrddim_add(st_fd, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); rd_fd_max = rrddim_add(st_fd, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); } + priority++; rrddim_set_by_pointer(st_fd, rd_fd_current, (collected_number)stats_array[32]); /* Careful here, modify this accordingly if the File-Descriptor budget ever changes */ rrddim_set_by_pointer(st_fd, rd_fd_max, (collected_number)rlimit_nofile.rlim_cur / 4); rrdset_done(st_fd); } - - // ---------------------------------------------------------------- - - { - static RRDSET *st_ram_usage = NULL; - static RRDDIM *rd_cached = NULL; - static RRDDIM *rd_pinned = NULL; - static RRDDIM *rd_cache_metadata = NULL; - static RRDDIM *rd_index_metadata = NULL; - static RRDDIM *rd_pages_metadata = NULL; - - collected_number API_producers, populated_pages, cache_metadata, pages_on_disk, - page_cache_descriptors, index_metadata, pages_metadata; - - if (unlikely(!st_ram_usage)) { - st_ram_usage = rrdset_create_localhost( - "netdata", - "dbengine_ram", - NULL, - "dbengine", - NULL, - "Netdata DB engine RAM usage", - "MiB", - "netdata", - "stats", - 132010, - localhost->rrd_update_every, - RRDSET_TYPE_STACKED); - - rd_cached = rrddim_add(st_ram_usage, "cache", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE); - rd_pinned = rrddim_add(st_ram_usage, "collectors", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE); - rd_cache_metadata = rrddim_add(st_ram_usage, "cache metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); - rd_pages_metadata = rrddim_add(st_ram_usage, "pages metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); - rd_index_metadata = rrddim_add(st_ram_usage, "index metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); - } - - API_producers = (collected_number)stats_array[0]; - pages_on_disk = (collected_number)stats_array[2]; - populated_pages = (collected_number)stats_array[3]; - page_cache_descriptors = (collected_number)stats_array[27]; - - cache_metadata = page_cache_descriptors * sizeof(struct page_cache_descr); - - pages_metadata = pages_on_disk * sizeof(struct rrdeng_page_descr); - - /* This is an empirical estimation for Judy array indexing and extent structures */ - index_metadata = pages_on_disk * 58; - - rrddim_set_by_pointer(st_ram_usage, rd_cached, populated_pages - API_producers); - rrddim_set_by_pointer(st_ram_usage, rd_pinned, API_producers); - rrddim_set_by_pointer(st_ram_usage, rd_cache_metadata, cache_metadata); - rrddim_set_by_pointer(st_ram_usage, rd_pages_metadata, pages_metadata); - rrddim_set_by_pointer(st_ram_usage, rd_index_metadata, index_metadata); - rrdset_done(st_ram_usage); - } } } -#endif } +#endif // ENABLE_DBENGINE static void update_strings_charts() { static RRDSET *st_ops = NULL, *st_entries = NULL, *st_mem = NULL; @@ -1486,6 +2830,15 @@ static void update_heartbeat_charts() { // --------------------------------------------------------------------------------------------------------------------- // dictionary statistics +struct dictionary_stats dictionary_stats_category_collectors = { .name = "collectors" }; +struct dictionary_stats dictionary_stats_category_rrdhost = { .name = "rrdhost" }; +struct dictionary_stats dictionary_stats_category_rrdset_rrddim = { .name = "rrdset_rrddim" }; +struct dictionary_stats dictionary_stats_category_rrdcontext = { .name = "context" }; +struct dictionary_stats dictionary_stats_category_rrdlabels = { .name = "labels" }; +struct dictionary_stats dictionary_stats_category_rrdhealth = { .name = "health" }; +struct dictionary_stats dictionary_stats_category_functions = { .name = "functions" }; +struct dictionary_stats dictionary_stats_category_replication = { .name = "replication" }; + struct dictionary_categories { struct dictionary_stats *stats; const char *family; @@ -1531,7 +2884,15 @@ struct dictionary_categories { RRDDIM *rd_spins_delete; } dictionary_categories[] = { - { .stats = &dictionary_stats_category_other, "dictionaries", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_collectors, "dictionaries collectors", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdhost, "dictionaries hosts", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdset_rrddim, "dictionaries rrd", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdcontext, "dictionaries contexts", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdlabels, "dictionaries labels", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdhealth, "dictionaries health", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_functions, "dictionaries functions", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_replication, "dictionaries replication", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_other, "dictionaries other", "dictionaries", 900000 }, // terminator { .stats = NULL, NULL, NULL, 0 }, @@ -1741,7 +3102,7 @@ static void update_dictionary_category_charts(struct dictionary_categories *c) { // ------------------------------------------------------------------------ total = 0; - load_dictionary_stats_entry(memory.indexed); + load_dictionary_stats_entry(memory.index); load_dictionary_stats_entry(memory.values); load_dictionary_stats_entry(memory.dict); @@ -1775,7 +3136,7 @@ static void update_dictionary_category_charts(struct dictionary_categories *c) { rrdlabels_add(c->st_memory->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); } - rrddim_set_by_pointer(c->st_memory, c->rd_memory_indexed, (collected_number)stats.memory.indexed); + rrddim_set_by_pointer(c->st_memory, c->rd_memory_indexed, (collected_number)stats.memory.index); rrddim_set_by_pointer(c->st_memory, c->rd_memory_values, (collected_number)stats.memory.values); rrddim_set_by_pointer(c->st_memory, c->rd_memory_dict, (collected_number)stats.memory.dict); @@ -2079,6 +3440,7 @@ static struct worker_utilization all_workers_utilization[] = { { .name = "STREAMRCV", .family = "workers streaming receive", .priority = 1000000 }, { .name = "STREAMSND", .family = "workers streaming send", .priority = 1000000 }, { .name = "DBENGINE", .family = "workers dbengine instances", .priority = 1000000 }, + { .name = "LIBUV", .family = "workers libuv threadpool", .priority = 1000000 }, { .name = "WEB", .family = "workers web server", .priority = 1000000 }, { .name = "ACLKQUERY", .family = "workers aclk query", .priority = 1000000 }, { .name = "ACLKSYNC", .family = "workers aclk host sync", .priority = 1000000 }, @@ -2543,7 +3905,7 @@ static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_ // (re)open the procfile to the new filename bool set_quotes = (ff == NULL) ? true : false; - ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_ERROR_ON_ERROR_LOG); if(unlikely(!ff)) return -1; if(set_quotes) @@ -2577,7 +3939,7 @@ static void workers_threads_cleanup(struct worker_utilization *wu) { if(!t->enabled) { JudyLDel(&workers_by_pid_JudyL_array, t->pid, PJE0); - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wu->threads, t, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wu->threads, t, prev, next); freez(t); } t = next; @@ -2604,7 +3966,7 @@ static struct worker_thread *worker_thread_create(struct worker_utilization *wu, *PValue = wt; // link it - DOUBLE_LINKED_LIST_APPEND_UNSAFE(wu->threads, wt, prev, next); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(wu->threads, wt, prev, next); return wt; } @@ -2637,6 +3999,9 @@ static void worker_utilization_charts_callback(void *ptr // find the worker_thread in the list struct worker_thread *wt = worker_thread_find_or_create(wu, pid); + if(utilization_usec > duration_usec) + utilization_usec = duration_usec; + wt->enabled = true; wt->busy_time = utilization_usec; wt->jobs_started = jobs_started; @@ -2786,8 +4151,6 @@ static void global_statistics_cleanup(void *ptr) info("cleaning up..."); - worker_utilization_finish(); - static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } @@ -2810,23 +4173,22 @@ void *global_statistics_main(void *ptr) // to make sure we are not close to any other thread hb.randomness = 0; - while (!netdata_exit) { + while (service_running(SERVICE_COLLECTORS)) { worker_is_idle(); heartbeat_next(&hb, step); worker_is_busy(WORKER_JOB_GLOBAL); global_statistics_charts(); - worker_is_busy(WORKER_JOB_SQLITE3); - sqlite3_statistics_charts(); - worker_is_busy(WORKER_JOB_REGISTRY); registry_statistics(); +#ifdef ENABLE_DBENGINE if(dbengine_enabled) { worker_is_busy(WORKER_JOB_DBENGINE); - dbengine_statistics_charts(); + dbengine2_statistics_charts(); } +#endif worker_is_busy(WORKER_JOB_HEARTBEAT); update_heartbeat_charts(); @@ -2880,7 +4242,7 @@ void *global_statistics_workers_main(void *ptr) heartbeat_t hb; heartbeat_init(&hb); - while (!netdata_exit) { + while (service_running(SERVICE_COLLECTORS)) { worker_is_idle(); heartbeat_next(&hb, step); @@ -2892,3 +4254,45 @@ void *global_statistics_workers_main(void *ptr) return NULL; } +// --------------------------------------------------------------------------------------------------------------------- +// sqlite3 thread + +static void global_statistics_sqlite3_cleanup(void *ptr) +{ + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *global_statistics_sqlite3_main(void *ptr) +{ + global_statistics_register_workers(); + + netdata_thread_cleanup_push(global_statistics_sqlite3_cleanup, ptr); + + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_JOB_SQLITE3); + sqlite3_statistics_charts(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + diff --git a/daemon/global_statistics.h b/daemon/global_statistics.h index f7d6775cf..7bdb153dd 100644 --- a/daemon/global_statistics.h +++ b/daemon/global_statistics.h @@ -5,10 +5,39 @@ #include "database/rrd.h" +extern struct netdata_buffers_statistics { + size_t rrdhost_allocations_size; + size_t rrdhost_senders; + size_t rrdhost_receivers; + size_t query_targets_size; + size_t rrdset_done_rda_size; + size_t buffers_aclk; + size_t buffers_api; + size_t buffers_functions; + size_t buffers_sqlite; + size_t buffers_exporters; + size_t buffers_health; + size_t buffers_streaming; + size_t cbuffers_streaming; + size_t buffers_web; +} netdata_buffers_statistics; + +extern struct dictionary_stats dictionary_stats_category_collectors; +extern struct dictionary_stats dictionary_stats_category_rrdhost; +extern struct dictionary_stats dictionary_stats_category_rrdset_rrddim; +extern struct dictionary_stats dictionary_stats_category_rrdcontext; +extern struct dictionary_stats dictionary_stats_category_rrdlabels; +extern struct dictionary_stats dictionary_stats_category_rrdhealth; +extern struct dictionary_stats dictionary_stats_category_functions; +extern struct dictionary_stats dictionary_stats_category_replication; + +extern size_t rrddim_db_memory_size; + // ---------------------------------------------------------------------------- // global statistics void global_statistics_ml_query_completed(size_t points_read); +void global_statistics_ml_models_consulted(size_t models_consulted); void global_statistics_exporters_query_completed(size_t points_read); void global_statistics_backfill_query_completed(size_t points_read); void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source); diff --git a/daemon/main.c b/daemon/main.c index 6b591385d..7b2076f3f 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -4,10 +4,16 @@ #include "buildinfo.h" #include "static_threads.h" +#if defined(ENV32BIT) +#warning COMPILING 32BIT NETDATA +#endif + bool unittest_running = false; int netdata_zero_metrics_enabled; int netdata_anonymous_statistics_enabled; +int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; + struct netdata_static_thread *static_threads; struct config netdata_config = { @@ -23,69 +29,461 @@ struct config netdata_config = { } }; +typedef struct service_thread { + pid_t tid; + SERVICE_THREAD_TYPE type; + SERVICE_TYPE services; + char name[NETDATA_THREAD_NAME_MAX + 1]; + bool cancelled; + + union { + netdata_thread_t netdata_thread; + uv_thread_t uv_thread; + }; + + force_quit_t force_quit_callback; + request_quit_t request_quit_callback; + void *data; +} SERVICE_THREAD; + +struct service_globals { + SERVICE_TYPE running; + SPINLOCK lock; + Pvoid_t pid_judy; +} service_globals = { + .running = ~0, + .pid_judy = NULL, +}; + +SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused) { + SERVICE_THREAD *sth = NULL; + pid_t tid = gettid(); + + netdata_spinlock_lock(&service_globals.lock); + Pvoid_t *PValue = JudyLIns(&service_globals.pid_judy, tid, PJE0); + if(!*PValue) { + sth = callocz(1, sizeof(SERVICE_THREAD)); + sth->tid = tid; + sth->type = thread_type; + sth->request_quit_callback = request_quit_callback; + sth->force_quit_callback = force_quit_callback; + sth->data = data; + os_thread_get_current_name_np(sth->name); + *PValue = sth; + + switch(thread_type) { + default: + case SERVICE_THREAD_TYPE_NETDATA: + sth->netdata_thread = netdata_thread_self(); + break; + + case SERVICE_THREAD_TYPE_EVENT_LOOP: + case SERVICE_THREAD_TYPE_LIBUV: + sth->uv_thread = uv_thread_self(); + break; + } + } + else { + sth = *PValue; + } + netdata_spinlock_unlock(&service_globals.lock); + + return sth; +} + +void service_exits(void) { + pid_t tid = gettid(); + + netdata_spinlock_lock(&service_globals.lock); + Pvoid_t *PValue = JudyLGet(service_globals.pid_judy, tid, PJE0); + if(PValue) { + freez(*PValue); + JudyLDel(&service_globals.pid_judy, tid, PJE0); + } + netdata_spinlock_unlock(&service_globals.lock); +} + +bool service_running(SERVICE_TYPE service) { + static __thread SERVICE_THREAD *sth = NULL; + + if(unlikely(!sth)) + sth = service_register(SERVICE_THREAD_TYPE_NETDATA, NULL, NULL, NULL, false); + + if(netdata_exit) + __atomic_store_n(&service_globals.running, 0, __ATOMIC_RELAXED); + + if(service == 0) + service = sth->services; + + sth->services |= service; + + return ((__atomic_load_n(&service_globals.running, __ATOMIC_RELAXED) & service) == service); +} + +void service_signal_exit(SERVICE_TYPE service) { + __atomic_and_fetch(&service_globals.running, ~(service), __ATOMIC_RELAXED); + + netdata_spinlock_lock(&service_globals.lock); + + Pvoid_t *PValue; + Word_t tid = 0; + bool first = true; + while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) { + SERVICE_THREAD *sth = *PValue; + + if((sth->services & service) && sth->request_quit_callback) { + netdata_spinlock_unlock(&service_globals.lock); + sth->request_quit_callback(sth->data); + netdata_spinlock_lock(&service_globals.lock); + continue; + } + } + + netdata_spinlock_unlock(&service_globals.lock); +} + +static void service_to_buffer(BUFFER *wb, SERVICE_TYPE service) { + if(service & SERVICE_MAINTENANCE) + buffer_strcat(wb, "MAINTENANCE "); + if(service & SERVICE_COLLECTORS) + buffer_strcat(wb, "COLLECTORS "); + if(service & SERVICE_ML_TRAINING) + buffer_strcat(wb, "ML_TRAINING "); + if(service & SERVICE_ML_PREDICTION) + buffer_strcat(wb, "ML_PREDICTION "); + if(service & SERVICE_REPLICATION) + buffer_strcat(wb, "REPLICATION "); + if(service & ABILITY_DATA_QUERIES) + buffer_strcat(wb, "DATA_QUERIES "); + if(service & ABILITY_WEB_REQUESTS) + buffer_strcat(wb, "WEB_REQUESTS "); + if(service & SERVICE_WEB_SERVER) + buffer_strcat(wb, "WEB_SERVER "); + if(service & SERVICE_ACLK) + buffer_strcat(wb, "ACLK "); + if(service & SERVICE_HEALTH) + buffer_strcat(wb, "HEALTH "); + if(service & SERVICE_STREAMING) + buffer_strcat(wb, "STREAMING "); + if(service & ABILITY_STREAMING_CONNECTIONS) + buffer_strcat(wb, "STREAMING_CONNECTIONS "); + if(service & SERVICE_CONTEXT) + buffer_strcat(wb, "CONTEXT "); + if(service & SERVICE_ANALYTICS) + buffer_strcat(wb, "ANALYTICS "); + if(service & SERVICE_EXPORTERS) + buffer_strcat(wb, "EXPORTERS "); +} + +static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) { + BUFFER *service_list = buffer_create(1024, NULL); + BUFFER *thread_list = buffer_create(1024, NULL); + usec_t started_ut = now_monotonic_usec(), ended_ut; + size_t running; + SERVICE_TYPE running_services = 0; + + // cancel the threads + running = 0; + running_services = 0; + { + buffer_flush(thread_list); + + netdata_spinlock_lock(&service_globals.lock); + + Pvoid_t *PValue; + Word_t tid = 0; + bool first = true; + while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) { + SERVICE_THREAD *sth = *PValue; + if(sth->services & service && sth->tid != gettid() && !sth->cancelled) { + sth->cancelled = true; + + switch(sth->type) { + default: + case SERVICE_THREAD_TYPE_NETDATA: + netdata_thread_cancel(sth->netdata_thread); + break; + + case SERVICE_THREAD_TYPE_EVENT_LOOP: + case SERVICE_THREAD_TYPE_LIBUV: + break; + } + + if(running) + buffer_strcat(thread_list, ", "); + + buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid); + + running++; + running_services |= sth->services & service; + + if(sth->force_quit_callback) { + netdata_spinlock_unlock(&service_globals.lock); + sth->force_quit_callback(sth->data); + netdata_spinlock_lock(&service_globals.lock); + continue; + } + } + } + + netdata_spinlock_unlock(&service_globals.lock); + } + + service_signal_exit(service); + + // signal them to stop + size_t last_running = 0; + size_t stale_time_ut = 0; + usec_t sleep_ut = 50 * USEC_PER_MS; + size_t log_countdown_ut = sleep_ut; + do { + if(running != last_running) + stale_time_ut = 0; + + last_running = running; + running = 0; + running_services = 0; + buffer_flush(thread_list); + + netdata_spinlock_lock(&service_globals.lock); + + Pvoid_t *PValue; + Word_t tid = 0; + bool first = true; + while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) { + SERVICE_THREAD *sth = *PValue; + if(sth->services & service && sth->tid != gettid()) { + if(running) + buffer_strcat(thread_list, ", "); + + buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid); + + running_services |= sth->services & service; + running++; + } + } + + netdata_spinlock_unlock(&service_globals.lock); + + if(running) { + log_countdown_ut -= (log_countdown_ut >= sleep_ut) ? sleep_ut : log_countdown_ut; + if(log_countdown_ut == 0 || running != last_running) { + log_countdown_ut = 20 * sleep_ut; + + buffer_flush(service_list); + service_to_buffer(service_list, running_services); + info("SERVICE CONTROL: waiting for the following %zu services [ %s] to exit: %s", + running, buffer_tostring(service_list), + running <= 10 ? buffer_tostring(thread_list) : ""); + } + + sleep_usec(sleep_ut); + stale_time_ut += sleep_ut; + } + + ended_ut = now_monotonic_usec(); + } while(running && (ended_ut - started_ut < timeout_ut || stale_time_ut < timeout_ut)); + + if(running) { + buffer_flush(service_list); + service_to_buffer(service_list, running_services); + info("SERVICE CONTROL: " + "the following %zu service(s) [ %s] take too long to exit: %s; " + "giving up on them...", + running, buffer_tostring(service_list), + buffer_tostring(thread_list)); + } + + buffer_free(thread_list); + buffer_free(service_list); + + return (running == 0); +} + +#define delta_shutdown_time(msg) \ + { \ + usec_t now_ut = now_monotonic_usec(); \ + if(prev_msg) \ + info("NETDATA SHUTDOWN: in %7llu ms, %s%s - next: %s", (now_ut - last_ut) / USEC_PER_MS, (timeout)?"(TIMEOUT) ":"", prev_msg, msg); \ + else \ + info("NETDATA SHUTDOWN: next: %s", msg); \ + last_ut = now_ut; \ + prev_msg = msg; \ + timeout = false; \ + } + void netdata_cleanup_and_exit(int ret) { - // enabling this, is wrong - // because the threads will be cancelled while cleaning up - // netdata_exit = 1; + usec_t started_ut = now_monotonic_usec(); + usec_t last_ut = started_ut; + const char *prev_msg = NULL; + bool timeout = false; error_log_limit_unlimited(); - info("EXIT: netdata prepares to exit with code %d...", ret); + info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret); send_statistics("EXIT", ret?"ERROR":"OK","-"); + delta_shutdown_time("create shutdown file"); + char agent_crash_file[FILENAME_MAX + 1]; char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); (void) rename(agent_crash_file, agent_incomplete_shutdown_file); - // cleanup/save the database and exit - info("EXIT: cleaning up the database..."); +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + delta_shutdown_time("dbengine exit mode"); + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_exit_mode(multidb_ctx[tier]); + } +#endif + + delta_shutdown_time("disable maintenance, new queries, new web requests, new streaming connections and aclk"); + + service_signal_exit( + SERVICE_MAINTENANCE + | ABILITY_DATA_QUERIES + | ABILITY_WEB_REQUESTS + | ABILITY_STREAMING_CONNECTIONS + | SERVICE_ACLK + ); + + delta_shutdown_time("stop replication, exporters, ML training, health and web servers threads"); + + timeout = !service_wait_exit( + SERVICE_REPLICATION + | SERVICE_EXPORTERS + | SERVICE_ML_TRAINING + | SERVICE_HEALTH + | SERVICE_WEB_SERVER + , 3 * USEC_PER_SEC); + + delta_shutdown_time("stop collectors and streaming threads"); + + timeout = !service_wait_exit( + SERVICE_COLLECTORS + | SERVICE_STREAMING + , 3 * USEC_PER_SEC); + + delta_shutdown_time("stop ML prediction and context threads"); + + timeout = !service_wait_exit( + SERVICE_ML_PREDICTION + | SERVICE_CONTEXT + , 3 * USEC_PER_SEC); + + delta_shutdown_time("stop maintenance thread"); + + timeout = !service_wait_exit( + SERVICE_MAINTENANCE + , 3 * USEC_PER_SEC); + + delta_shutdown_time("clean rrdhost database"); + rrdhost_cleanup_all(); - if(!ret) { - // exit cleanly + delta_shutdown_time("prepare metasync shutdown"); + + metadata_sync_shutdown_prepare(); - // stop everything - info("EXIT: stopping static threads..."); #ifdef ENABLE_ACLK - aclk_sync_exit_all(); + delta_shutdown_time("signal aclk sync to stop"); + aclk_sync_exit_all(); #endif - cancel_main_threads(); - // free the database - info("EXIT: freeing database memory..."); + delta_shutdown_time("stop aclk threads"); + + timeout = !service_wait_exit( + SERVICE_ACLK + , 3 * USEC_PER_SEC); + + delta_shutdown_time("stop all remaining worker threads"); + + timeout = !service_wait_exit(~0, 10 * USEC_PER_SEC); + + delta_shutdown_time("cancel main threads"); + + cancel_main_threads(); + + if(!ret) { + // exit cleanly + #ifdef ENABLE_DBENGINE if(dbengine_enabled) { + delta_shutdown_time("flush dbengine tiers"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_prepare_exit(multidb_ctx[tier]); } #endif - metadata_sync_shutdown_prepare(); - rrdhost_free_all(); + + // free the database + delta_shutdown_time("stop collection for all hosts"); + + // rrdhost_free_all(); + rrd_finalize_collection_for_all_hosts(); + + delta_shutdown_time("stop metasync threads"); + metadata_sync_shutdown(); + #ifdef ENABLE_DBENGINE if(dbengine_enabled) { + delta_shutdown_time("wait for dbengine collectors to finish"); + + size_t running = 1; + while(running) { + running = 0; + for (size_t tier = 0; tier < storage_tiers; tier++) + running += rrdeng_collectors_running(multidb_ctx[tier]); + + if(running) + sleep_usec(100 * USEC_PER_MS); + } + + delta_shutdown_time("wait for dbengine main cache to finish flushing"); + + while (pgc_hot_and_dirty_entries(main_cache)) { + pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL); + sleep_usec(100 * USEC_PER_MS); + } + + delta_shutdown_time("stop dbengine tiers"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_exit(multidb_ctx[tier]); } #endif } + + delta_shutdown_time("close SQL context db"); + sql_close_context_database(); + + delta_shutdown_time("closed SQL main db"); + sql_close_database(); // unlink the pid if(pidfile[0]) { - info("EXIT: removing netdata PID file '%s'...", pidfile); + delta_shutdown_time("remove pid file"); + if(unlink(pidfile) != 0) error("EXIT: cannot unlink pidfile '%s'.", pidfile); } #ifdef ENABLE_HTTPS + delta_shutdown_time("free openssl structures"); security_clean_openssl(); #endif - info("EXIT: all done - netdata is now exiting - bye bye..."); + + delta_shutdown_time("remove incomplete shutdown file"); + (void) unlink(agent_incomplete_shutdown_file); + + delta_shutdown_time("exit"); + + usec_t ended_ut = now_monotonic_usec(); + info("NETDATA SHUTDOWN: completed in %llu ms - netdata is now exiting - bye bye...", (ended_ut - started_ut) / USEC_PER_MS); exit(ret); } @@ -225,6 +623,32 @@ int killpid(pid_t pid) { return ret; } +static void set_nofile_limit(struct rlimit *rl) { + // get the num files allowed + if(getrlimit(RLIMIT_NOFILE, rl) != 0) { + error("getrlimit(RLIMIT_NOFILE) failed"); + return; + } + + info("resources control: allowed file descriptors: soft = %zu, max = %zu", + (size_t) rl->rlim_cur, (size_t) rl->rlim_max); + + // make the soft/hard limits equal + rl->rlim_cur = rl->rlim_max; + if (setrlimit(RLIMIT_NOFILE, rl) != 0) { + error("setrlimit(RLIMIT_NOFILE, { %zu, %zu }) failed", (size_t)rl->rlim_cur, (size_t)rl->rlim_max); + } + + // sanity check to make sure we have enough file descriptors available to open + if (getrlimit(RLIMIT_NOFILE, rl) != 0) { + error("getrlimit(RLIMIT_NOFILE) failed"); + return; + } + + if (rl->rlim_cur < 1024) + error("Number of open file descriptors allowed for this process is too low (RLIMIT_NOFILE=%zu)", (size_t)rl->rlim_cur); +} + void cancel_main_threads() { error_log_limit_unlimited(); @@ -408,6 +832,9 @@ static void log_init(void) { snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir); stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename); + snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir); + stdcollector_filename = config_get(CONFIG_SECTION_LOGS, "collector", filename); + snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir); stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename); @@ -679,8 +1106,9 @@ static void get_netdata_configured_variables() { // ------------------------------------------------------------------------ // get default Database Engine page cache size in MiB - db_engine_use_malloc = config_get_boolean(CONFIG_SECTION_DB, "dbengine page cache with malloc", CONFIG_BOOLEAN_YES); default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); + db_engine_journal_check = config_get_boolean(CONFIG_SECTION_DB, "dbengine enable journal integrity check", CONFIG_BOOLEAN_NO); + if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) { error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB); default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB; @@ -731,14 +1159,14 @@ static void get_netdata_configured_variables() { // -------------------------------------------------------------------- - rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time); + rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s); // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information. - if (rrdset_free_obsolete_time < 10) { - rrdset_free_obsolete_time = 10; + if (rrdset_free_obsolete_time_s < 10) { + rrdset_free_obsolete_time_s = 10; info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds."); - config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time); + config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s); } gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); @@ -746,12 +1174,13 @@ static void get_netdata_configured_variables() { gap_when_lost_iterations_above = 1; config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); } + gap_when_lost_iterations_above += 2; // -------------------------------------------------------------------- // get various system parameters get_system_HZ(); - get_system_cpus(); + get_system_cpus_uncached(); get_system_pid_max(); @@ -874,7 +1303,30 @@ void post_conf_load(char **user) appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL); } +#define delta_startup_time(msg) \ + { \ + usec_t now_ut = now_monotonic_usec(); \ + if(prev_msg) \ + info("NETDATA STARTUP: in %7llu ms, %s - next: %s", (now_ut - last_ut) / USEC_PER_MS, prev_msg, msg); \ + else \ + info("NETDATA STARTUP: next: %s", msg); \ + last_ut = now_ut; \ + prev_msg = msg; \ + } + +int pgc_unittest(void); +int mrg_unittest(void); +int julytest(void); + int main(int argc, char **argv) { + // initialize the system clocks + clocks_init(); + usec_t started_ut = now_monotonic_usec(); + usec_t last_ut = started_ut; + const char *prev_msg = NULL; + // Initialize stderror avoiding coredump when info() or error() is called + stderror = stderr; + int i; int config_loaded = 0; int dont_fork = 0; @@ -1001,7 +1453,7 @@ int main(int argc, char **argv) { default_health_enabled = 0; storage_tiers = 1; registry_init(); - if(rrd_init("unittest", NULL)) { + if(rrd_init("unittest", NULL, true)) { fprintf(stderr, "rrd_init failed for unittest\n"); return 1; } @@ -1027,11 +1479,6 @@ int main(int argc, char **argv) { else if(strcmp(optarg, "escapetest") == 0) { return command_argument_sanitization_tests(); } -#ifdef ENABLE_ML_TESTS - else if(strcmp(optarg, "mltest") == 0) { - return test_ml(argc, argv); - } -#endif #ifdef ENABLE_DBENGINE else if(strcmp(optarg, "mctest") == 0) { unittest_running = true; @@ -1061,6 +1508,18 @@ int main(int argc, char **argv) { unittest_running = true; return metadata_unittest(); } + else if(strcmp(optarg, "pgctest") == 0) { + unittest_running = true; + return pgc_unittest(); + } + else if(strcmp(optarg, "mrgtest") == 0) { + unittest_running = true; + return mrg_unittest(); + } + else if(strcmp(optarg, "julytest") == 0) { + unittest_running = true; + return julytest(); + } else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { optarg += strlen(createdataset_string); unsigned history_seconds = strtoul(optarg, NULL, 0); @@ -1304,19 +1763,14 @@ int main(int argc, char **argv) { } } -#ifdef _SC_OPEN_MAX if (close_open_fds == true) { // close all open file descriptors, except the standard ones // the caller may have left open files (lxc-attach has this issue) - for(int fd = (int) (sysconf(_SC_OPEN_MAX) - 1); fd > 2; fd--) - if(fd_is_valid(fd)) - close(fd); + for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR); } -#endif - if(!config_loaded) - { + if(!config_loaded) { load_netdata_conf(NULL, 0); post_conf_load(&user); load_cloud_conf(0); @@ -1327,7 +1781,6 @@ int main(int argc, char **argv) { appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", "false"); } - // ------------------------------------------------------------------------ // initialize netdata { @@ -1347,12 +1800,29 @@ int main(int argc, char **argv) { #endif #endif - // initialize the system clocks - clocks_init(); + // set libuv worker threads + libuv_worker_threads = (int)get_netdata_cpus() * 2; - // prepare configuration environment variables for the plugins + if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) + libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; - setenv("UV_THREADPOOL_SIZE", config_get(CONFIG_SECTION_GLOBAL, "libuv worker threads", "16"), 1); + if(libuv_worker_threads > MAX_LIBUV_WORKER_THREADS) + libuv_worker_threads = MAX_LIBUV_WORKER_THREADS; + + + libuv_worker_threads = config_get_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads); + if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) { + libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; + config_set_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads); + } + + { + char buf[20 + 1]; + snprintfz(buf, 20, "%d", libuv_worker_threads); + setenv("UV_THREADPOOL_SIZE", buf, 1); + } + + // prepare configuration environment variables for the plugins get_netdata_configured_variables(); set_global_environment(); @@ -1396,6 +1866,8 @@ int main(int argc, char **argv) { // initialize the log files open_all_log_files(); + aral_judy_init(); + get_system_timezone(); // -------------------------------------------------------------------- @@ -1414,6 +1886,7 @@ int main(int argc, char **argv) { // -------------------------------------------------------------------- // Initialize ML configuration + delta_startup_time("initialize ML"); ml_init(); // -------------------------------------------------------------------- @@ -1422,19 +1895,18 @@ int main(int argc, char **argv) { // block signals while initializing threads. // this causes the threads to block signals. + delta_startup_time("initialize signals"); signals_block(); + signals_init(); // setup the signals we want to use - // setup the signals we want to use + // -------------------------------------------------------------------- + // check which threads are enabled and initialize them - signals_init(); + delta_startup_time("initialize static threads"); // setup threads configs default_stacksize = netdata_threads_init(); - - // -------------------------------------------------------------------- - // check which threads are enabled and initialize them - for (i = 0; static_threads[i].name != NULL ; i++) { struct netdata_static_thread *st = &static_threads[i]; @@ -1454,14 +1926,17 @@ int main(int argc, char **argv) { // -------------------------------------------------------------------- // create the listening sockets + delta_startup_time("initialize web server"); + web_client_api_v1_init(); web_server_threading_selection(); if(web_server_mode != WEB_SERVER_MODE_NONE) api_listen_sockets_setup(); - } + delta_startup_time("set resource limits"); + #ifdef NETDATA_INTERNAL_CHECKS if(debug_flags != 0) { struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; @@ -1473,11 +1948,9 @@ int main(int argc, char **argv) { } #endif /* NETDATA_INTERNAL_CHECKS */ - // get the max file limit - if(getrlimit(RLIMIT_NOFILE, &rlimit_nofile) != 0) - error("getrlimit(RLIMIT_NOFILE) failed"); - else - info("resources control: allowed file descriptors: soft = %zu, max = %zu", (size_t)rlimit_nofile.rlim_cur, (size_t)rlimit_nofile.rlim_max); + set_nofile_limit(&rlimit_nofile); + + delta_startup_time("become daemon"); // fork, switch user, create pid file, set process priority if(become_daemon(dont_fork, user) == -1) @@ -1485,12 +1958,18 @@ int main(int argc, char **argv) { info("netdata started on pid %d.", getpid()); + delta_startup_time("initialize threads after fork"); + netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize)); // initialize internal registry + delta_startup_time("initialize registry"); registry_init(); + // fork the spawn server + delta_startup_time("fork the spawn server"); spawn_init(); + /* * Libuv uv_spawn() uses SIGCHLD internally: * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485 @@ -1503,15 +1982,22 @@ int main(int argc, char **argv) { // ------------------------------------------------------------------------ // initialize rrd, registry, health, rrdpush, etc. + delta_startup_time("collecting system info"); + netdata_anonymous_statistics_enabled=-1; struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info)); + __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED); get_system_info(system_info); system_info->hops = 0; get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist); - if(rrd_init(netdata_configured_hostname, system_info)) + delta_startup_time("initialize RRD structures"); + + if(rrd_init(netdata_configured_hostname, system_info, false)) fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname); + delta_startup_time("check for incomplete shutdown"); + char agent_crash_file[FILENAME_MAX + 1]; char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); @@ -1526,6 +2012,8 @@ int main(int argc, char **argv) { // ------------------------------------------------------------------------ // Claim netdata agent to a cloud endpoint + delta_startup_time("collect claiming info"); + if (claiming_pending_arguments) claim_agent(claiming_pending_arguments); load_claiming_state(); @@ -1536,11 +2024,14 @@ int main(int argc, char **argv) { error_log_limit_reset(); // Load host labels + delta_startup_time("collect host labels"); reload_host_labels(); // ------------------------------------------------------------------------ // spawn the threads + delta_startup_time("start the static threads"); + web_server_config_options(); netdata_zero_metrics_enabled = config_get_boolean_ondemand(CONFIG_SECTION_DB, "enable zero metrics", CONFIG_BOOLEAN_NO); @@ -1561,9 +2052,14 @@ int main(int argc, char **argv) { // ------------------------------------------------------------------------ // Initialize netdata agent command serving from cli and signals + delta_startup_time("initialize commands API"); + commands_init(); - info("netdata initialization completed. Enjoy real-time performance monitoring!"); + delta_startup_time("ready"); + + usec_t ready_ut = now_monotonic_usec(); + info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS); netdata_ready = 1; send_statistics("START", "-", "-"); diff --git a/daemon/main.h b/daemon/main.h index a4e2b3aa7..8704d6097 100644 --- a/daemon/main.h +++ b/daemon/main.h @@ -27,4 +27,35 @@ int killpid(pid_t pid); void netdata_cleanup_and_exit(int ret) NORETURN; void send_statistics(const char *action, const char *action_result, const char *action_data); +typedef enum { + ABILITY_DATA_QUERIES = (1 << 0), + ABILITY_WEB_REQUESTS = (1 << 1), + ABILITY_STREAMING_CONNECTIONS = (1 << 2), + SERVICE_MAINTENANCE = (1 << 3), + SERVICE_COLLECTORS = (1 << 4), + SERVICE_ML_TRAINING = (1 << 5), + SERVICE_ML_PREDICTION = (1 << 6), + SERVICE_REPLICATION = (1 << 7), + SERVICE_WEB_SERVER = (1 << 8), + SERVICE_ACLK = (1 << 9), + SERVICE_HEALTH = (1 << 10), + SERVICE_STREAMING = (1 << 11), + SERVICE_CONTEXT = (1 << 12), + SERVICE_ANALYTICS = (1 << 13), + SERVICE_EXPORTERS = (1 << 14), +} SERVICE_TYPE; + +typedef enum { + SERVICE_THREAD_TYPE_NETDATA, + SERVICE_THREAD_TYPE_LIBUV, + SERVICE_THREAD_TYPE_EVENT_LOOP, +} SERVICE_THREAD_TYPE; + +typedef void (*force_quit_t)(void *data); +typedef void (*request_quit_t)(void *data); + +void service_exits(void); +bool service_running(SERVICE_TYPE service); +struct service_thread *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused); + #endif /* NETDATA_MAIN_H */ diff --git a/daemon/service.c b/daemon/service.c index 6db2ef69f..9761abd02 100644 --- a/daemon/service.c +++ b/daemon/service.c @@ -22,6 +22,10 @@ #define WORKER_JOB_SAVE_CHART 13 #define WORKER_JOB_DELETE_CHART 14 #define WORKER_JOB_FREE_DIMENSION 15 +#define WORKER_JOB_PGC_MAIN_EVICT 16 +#define WORKER_JOB_PGC_MAIN_FLUSH 17 +#define WORKER_JOB_PGC_OPEN_EVICT 18 +#define WORKER_JOB_PGC_OPEN_FLUSH 19 static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { RRDSET *st = rd->rrdset; @@ -48,13 +52,13 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { size_t tiers_available = 0, tiers_said_no_retention = 0; for(size_t tier = 0; tier < storage_tiers ;tier++) { - if(rd->tiers[tier]) { + if(rd->tiers[tier].db_collection_handle) { tiers_available++; - if(rd->tiers[tier]->collect_ops->finalize(rd->tiers[tier]->db_collection_handle)) + if(rd->tiers[tier].collect_ops->finalize(rd->tiers[tier].db_collection_handle)) tiers_said_no_retention++; - rd->tiers[tier]->db_collection_handle = NULL; + rd->tiers[tier].db_collection_handle = NULL; } } @@ -83,7 +87,7 @@ static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensio dfe_start_write(st->rrddim_root_index, rd) { if(unlikely( all_dimensions || - (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->last_collected_time.tv_sec + rrdset_free_obsolete_time < now)) + (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now)) )) { if(dictionary_acquired_item_references(rd_dfe.item) == 1) { @@ -142,9 +146,9 @@ static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) { continue; if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) - && st->last_accessed_time + rrdset_free_obsolete_time < now - && st->last_updated.tv_sec + rrdset_free_obsolete_time < now - && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now + && st->last_accessed_time_s + rrdset_free_obsolete_time_s < now + && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now + && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now )) { svc_rrdset_obsolete_to_archive(st); } @@ -166,10 +170,10 @@ static void svc_rrdset_check_obsoletion(RRDHOST *host) { if(rrdset_is_replicating(st)) continue; - last_entry_t = rrdset_last_entry_t(st); + last_entry_t = rrdset_last_entry_s(st); - if(last_entry_t && last_entry_t < host->senders_connect_time && - host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every + if(last_entry_t && last_entry_t < host->child_connect_time && + host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every < now) rrdset_is_obsolete(st); @@ -196,10 +200,10 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { && host->trigger_chart_obsoletion_check && ( ( - host->senders_last_chart_command - && host->senders_last_chart_command + host->health_delay_up_to < now_realtime_sec() + host->child_last_chart_command + && host->child_last_chart_command + host->health.health_delay_up_to < now_realtime_sec() ) - || (host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec()) + || (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec()) ) ) { svc_rrdset_check_obsoletion(host); @@ -238,7 +242,7 @@ restart_after_removal: } worker_is_busy(WORKER_JOB_FREE_HOST); - rrdhost_free(host, 0); + rrdhost_free___while_having_rrd_wrlock(host, false); goto restart_after_removal; } @@ -276,6 +280,10 @@ void *service_main(void *ptr) worker_register_job_name(WORKER_JOB_SAVE_CHART, "save chart"); worker_register_job_name(WORKER_JOB_DELETE_CHART, "delete chart"); worker_register_job_name(WORKER_JOB_FREE_DIMENSION, "free dimension"); + worker_register_job_name(WORKER_JOB_PGC_MAIN_EVICT, "main cache evictions"); + worker_register_job_name(WORKER_JOB_PGC_MAIN_FLUSH, "main cache flushes"); + worker_register_job_name(WORKER_JOB_PGC_OPEN_EVICT, "open cache evictions"); + worker_register_job_name(WORKER_JOB_PGC_OPEN_FLUSH, "open cache flushes"); netdata_thread_cleanup_push(service_main_cleanup, ptr); heartbeat_t hb; @@ -284,7 +292,7 @@ void *service_main(void *ptr) debug(D_SYSTEM, "Service thread starts"); - while (!netdata_exit) { + while (service_running(SERVICE_MAINTENANCE)) { worker_is_idle(); heartbeat_next(&hb, step); diff --git a/daemon/static_threads.c b/daemon/static_threads.c index b7730bc31..d93cfe9d0 100644 --- a/daemon/static_threads.c +++ b/daemon/static_threads.c @@ -7,6 +7,7 @@ void *analytics_main(void *ptr); void *cpuidlejitter_main(void *ptr); void *global_statistics_main(void *ptr); void *global_statistics_workers_main(void *ptr); +void *global_statistics_sqlite3_main(void *ptr); void *health_main(void *ptr); void *pluginsd_main(void *ptr); void *service_main(void *ptr); @@ -18,7 +19,7 @@ extern bool global_statistics_enabled; const struct netdata_static_thread static_threads_common[] = { { - .name = "PLUGIN[timex]", + .name = "P[timex]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "timex", .enabled = 1, @@ -27,7 +28,7 @@ const struct netdata_static_thread static_threads_common[] = { .start_routine = timex_main }, { - .name = "PLUGIN[idlejitter]", + .name = "P[idlejitter]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "idlejitter", .enabled = 1, @@ -35,6 +36,15 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = cpuidlejitter_main }, + { + .name = "HEALTH", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = health_main + }, { .name = "ANALYTICS", .config_section = NULL, @@ -45,7 +55,7 @@ const struct netdata_static_thread static_threads_common[] = { .start_routine = analytics_main }, { - .name = "GLOBAL_STATS", + .name = "STATS_GLOBAL", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "netdata monitoring", .env_name = "NETDATA_INTERNALS_MONITORING", @@ -56,7 +66,7 @@ const struct netdata_static_thread static_threads_common[] = { .start_routine = global_statistics_main }, { - .name = "WORKERS_STATS", + .name = "STATS_WORKERS", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "netdata monitoring", .env_name = "NETDATA_INTERNALS_MONITORING", @@ -66,6 +76,17 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = global_statistics_workers_main }, + { + .name = "STATS_SQLITE3", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring", + .env_name = "NETDATA_INTERNALS_MONITORING", + .global_variable = &global_statistics_enabled, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = global_statistics_sqlite3_main + }, { .name = "PLUGINSD", .config_section = NULL, @@ -85,7 +106,7 @@ const struct netdata_static_thread static_threads_common[] = { .start_routine = service_main }, { - .name = "STATSD", + .name = "STATSD_FLUSH", .config_section = NULL, .config_name = NULL, .enabled = 1, @@ -103,7 +124,7 @@ const struct netdata_static_thread static_threads_common[] = { .start_routine = exporting_main }, { - .name = "STREAM", + .name = "SNDR[localhost]", .config_section = NULL, .config_name = NULL, .enabled = 0, @@ -112,7 +133,7 @@ const struct netdata_static_thread static_threads_common[] = { .start_routine = rrdpush_sender_thread }, { - .name = "WEB_SERVER[static1]", + .name = "WEB[1]", .config_section = NULL, .config_name = NULL, .enabled = 0, @@ -123,7 +144,7 @@ const struct netdata_static_thread static_threads_common[] = { #ifdef ENABLE_ACLK { - .name = "ACLK_Main", + .name = "ACLK_MAIN", .config_section = NULL, .config_name = NULL, .enabled = 1, @@ -144,13 +165,13 @@ const struct netdata_static_thread static_threads_common[] = { }, { - .name = "REPLICATION", - .config_section = NULL, - .config_name = NULL, - .enabled = 1, - .thread = NULL, - .init_routine = NULL, - .start_routine = replication_thread_main + .name = "REPLAY[1]", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = replication_thread_main }, // terminator diff --git a/daemon/static_threads_freebsd.c b/daemon/static_threads_freebsd.c index 48066bff5..cc150faf9 100644 --- a/daemon/static_threads_freebsd.c +++ b/daemon/static_threads_freebsd.c @@ -6,7 +6,7 @@ extern void *freebsd_main(void *ptr); const struct netdata_static_thread static_threads_freebsd[] = { { - .name = "PLUGIN[freebsd]", + .name = "P[freebsd]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "freebsd", .enabled = 1, diff --git a/daemon/static_threads_linux.c b/daemon/static_threads_linux.c index 260b2c176..54307eccf 100644 --- a/daemon/static_threads_linux.c +++ b/daemon/static_threads_linux.c @@ -10,7 +10,7 @@ extern void *timex_main(void *ptr); const struct netdata_static_thread static_threads_linux[] = { { - .name = "PLUGIN[tc]", + .name = "P[tc]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "tc", .enabled = 1, @@ -19,7 +19,7 @@ const struct netdata_static_thread static_threads_linux[] = { .start_routine = tc_main }, { - .name = "PLUGIN[diskspace]", + .name = "P[diskspace]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "diskspace", .enabled = 1, @@ -28,7 +28,7 @@ const struct netdata_static_thread static_threads_linux[] = { .start_routine = diskspace_main }, { - .name = "PLUGIN[proc]", + .name = "P[proc]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "proc", .enabled = 1, @@ -37,7 +37,7 @@ const struct netdata_static_thread static_threads_linux[] = { .start_routine = proc_main }, { - .name = "PLUGIN[cgroups]", + .name = "P[cgroups]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "cgroups", .enabled = 1, diff --git a/daemon/static_threads_macos.c b/daemon/static_threads_macos.c index 72c032454..aaf7df6f6 100644 --- a/daemon/static_threads_macos.c +++ b/daemon/static_threads_macos.c @@ -6,7 +6,7 @@ extern void *macos_main(void *ptr); const struct netdata_static_thread static_threads_macos[] = { { - .name = "PLUGIN[macos]", + .name = "P[macos]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "macos", .enabled = 1, diff --git a/daemon/system-info.sh b/daemon/system-info.sh index 68cdc4812..1e334a3d1 100755 --- a/daemon/system-info.sh +++ b/daemon/system-info.sh @@ -217,6 +217,9 @@ if [ -n "${lscpu}" ] && lscpu > /dev/null 2>&1; then LCPU_COUNT="$(echo "${lscpu_output}" | grep "^CPU(s):" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" CPU_VENDOR="$(echo "${lscpu_output}" | grep "^Vendor ID:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" CPU_MODEL="$(echo "${lscpu_output}" | grep "^Model name:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + if grep -q "^lxcfs /proc" /proc/self/mounts 2>/dev/null && count=$(grep -c ^processor /proc/cpuinfo 2>/dev/null); then + LCPU_COUNT="$count" + fi possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU max MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" if [ -z "$possible_cpu_freq" ]; then possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" @@ -437,7 +440,7 @@ CLOUD_INSTANCE_TYPE="unknown" CLOUD_INSTANCE_REGION="unknown" if [ "${VIRTUALIZATION}" != "none" ] && command -v curl > /dev/null 2>&1; then - # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404. + # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404. curl --fail -s -m 1 --noproxy "*" http://169.254.169.254 >/dev/null 2>&1 ret=$? # anything but operation timeout. diff --git a/daemon/unit_test.c b/daemon/unit_test.c index f69861869..52b55c4e5 100644 --- a/daemon/unit_test.c +++ b/daemon/unit_test.c @@ -439,7 +439,7 @@ int unit_test_str2ld() { } int unit_test_buffer() { - BUFFER *wb = buffer_create(1); + BUFFER *wb = buffer_create(1, NULL); char string[2048 + 1]; char final[9000 + 1]; int i; @@ -1289,7 +1289,7 @@ int run_test(struct test *test) fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT ", found " NETDATA_DOUBLE_FORMAT ", %s\n", test->name, rrddim_name(rd), c+1, - (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start), + (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start), n, v, (same)?"OK":"### E R R O R ###"); if(!same) errors++; @@ -1301,7 +1301,7 @@ int run_test(struct test *test) fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT ", found " NETDATA_DOUBLE_FORMAT ", %s\n", test->name, rrddim_name(rd2), c+1, - (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start), + (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start), n, v, (same)?"OK":"### E R R O R ###"); if(!same) errors++; } @@ -1349,7 +1349,7 @@ static int test_variable_renames(void) { rrddim_reset_name(st, rd2, "DIM2NAME2"); fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); - BUFFER *buf = buffer_create(1); + BUFFER *buf = buffer_create(1, NULL); health_api_v1_chart_variables2json(st, buf); fprintf(stderr, "%s", buffer_tostring(buf)); buffer_free(buf); @@ -1604,7 +1604,7 @@ int test_sqlite(void) { return 1; } - BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE); + BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE, NULL); char *uuid_str = "0000_000"; buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str); @@ -1775,7 +1775,6 @@ static inline void rrddim_set_by_pointer_fake_time(RRDDIM *rd, collected_number static RRDHOST *dbengine_rrdhost_find_or_create(char *name) { /* We don't want to drop metrics when generating load, we prefer to block data generation itself */ - rrdeng_drop_metrics_under_page_cache_pressure = 0; return rrdhost_find_or_create( name @@ -1859,10 +1858,10 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI now_realtime_timeval(&now); rrdset_timed_done(st[i], now, false); } - // Fluh pages for subsequent real values + // Flush pages for subsequent real values for (i = 0 ; i < CHARTS ; ++i) { for (j = 0; j < DIMS; ++j) { - rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle); + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0].db_collection_handle); } } } @@ -1881,7 +1880,7 @@ static time_t test_dbengine_create_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS // feed it with the test data for (i = 0 ; i < CHARTS ; ++i) { for (j = 0 ; j < DIMS ; ++j) { - rd[i][j]->tiers[0]->collect_ops->change_collection_frequency(rd[i][j]->tiers[0]->db_collection_handle, update_every); + rd[i][j]->tiers[0].collect_ops->change_collection_frequency(rd[i][j]->tiers[0].db_collection_handle, update_every); rd[i][j]->last_collected_time.tv_sec = st[i]->last_collected_time.tv_sec = st[i]->last_updated.tv_sec = time_now; @@ -1932,16 +1931,16 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI time_now = time_start + (c + 1) * update_every; for (i = 0 ; i < CHARTS ; ++i) { for (j = 0; j < DIMS; ++j) { - rd[i][j]->tiers[0]->query_ops->init(rd[i][j]->tiers[0]->db_metric_handle, &handle, time_now, time_now + QUERY_BATCH * update_every); + rd[i][j]->tiers[0].query_ops->init(rd[i][j]->tiers[0].db_metric_handle, &handle, time_now, time_now + QUERY_BATCH * update_every, STORAGE_PRIORITY_NORMAL); for (k = 0; k < QUERY_BATCH; ++k) { last = ((collected_number)i * DIMS) * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c + k; expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS)); - STORAGE_POINT sp = rd[i][j]->tiers[0]->query_ops->next_metric(&handle); + STORAGE_POINT sp = rd[i][j]->tiers[0].query_ops->next_metric(&handle); value = sp.sum; - time_retrieved = sp.start_time; - end_time = sp.end_time; + time_retrieved = sp.start_time_s; + end_time = sp.end_time_s; same = (roundndd(value) == roundndd(expected)) ? 1 : 0; if(!same) { @@ -1960,7 +1959,7 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI errors++; } } - rd[i][j]->tiers[0]->query_ops->finalize(&handle); + rd[i][j]->tiers[0].query_ops->finalize(&handle); } } } @@ -1979,10 +1978,11 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] int current_region, time_t time_start, time_t time_end) { int update_every = REGION_UPDATE_EVERY[current_region]; - fprintf(stderr, "%s() running on region %d, start time %lld, end time %lld, update every %d...\n", __FUNCTION__, current_region, (long long)time_start, (long long)time_end, update_every); + fprintf(stderr, "%s() running on region %d, start time %lld, end time %lld, update every %d, on %d dimensions...\n", + __FUNCTION__, current_region, (long long)time_start, (long long)time_end, update_every, CHARTS * DIMS); uint8_t same; time_t time_now, time_retrieved; - int i, j, errors, value_errors = 0, time_errors = 0; + int i, j, errors, value_errors = 0, time_errors = 0, value_right = 0, time_right = 0; long c; collected_number last; NETDATA_DOUBLE value, expected; @@ -1993,7 +1993,8 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] ONEWAYALLOC *owa = onewayalloc_create(0); RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start, time_end, RRDR_GROUPING_AVERAGE, 0, RRDR_OPTION_NATURAL_POINTS, - NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST); + NULL, NULL, 0, 0, + QUERY_SOURCE_UNITTEST, STORAGE_PRIORITY_NORMAL); if (!r) { fprintf(stderr, " DB-engine unittest %s: empty RRDR on region %d ### E R R O R ###\n", rrdset_name(st[i]), current_region); return ++errors; @@ -2020,17 +2021,22 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] same = (roundndd(value) == roundndd(expected)) ? 1 : 0; if(!same) { if(value_errors < 20) - fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + fprintf(stderr, " DB-engine unittest %s/%s: point #%ld, at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT ", RRDR found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", - rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, expected, value); + rrdset_name(st[i]), rrddim_name(rd[i][j]), (long) c+1, (unsigned long)time_now, expected, value); value_errors++; } + else + value_right++; + if(time_retrieved != time_now) { if(time_errors < 20) - fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n", - rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, (unsigned long)time_retrieved); + fprintf(stderr, " DB-engine unittest %s/%s: point #%ld at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (long)c+1, (unsigned long)time_now, (unsigned long)time_retrieved); time_errors++; } + else + time_right++; } rrddim_foreach_done(d); } @@ -2040,10 +2046,10 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] } if(value_errors) - fprintf(stderr, "%d value errors encountered\n", value_errors); + fprintf(stderr, "%d value errors encountered (%d were ok)\n", value_errors, value_right); if(time_errors) - fprintf(stderr, "%d time errors encountered\n", time_errors); + fprintf(stderr, "%d time errors encountered (%d were ok)\n", time_errors, value_right); return errors + value_errors + time_errors; } @@ -2051,7 +2057,7 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] int test_dbengine(void) { fprintf(stderr, "%s() running...\n", __FUNCTION__ ); - int i, j, errors, value_errors = 0, time_errors = 0, update_every, current_region; + int i, j, errors = 0, value_errors = 0, time_errors = 0, update_every, current_region; RRDHOST *host = NULL; RRDSET *st[CHARTS]; RRDDIM *rd[CHARTS][DIMS]; @@ -2074,9 +2080,7 @@ int test_dbengine(void) time_start[current_region] = 2 * API_RELATIVE_TIME_MAX; time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); - errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); - if (errors) - goto error_out; + errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); current_region = 1; //this is the second region of data update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 3 seconds @@ -2084,7 +2088,7 @@ int test_dbengine(void) for (i = 0 ; i < CHARTS ; ++i) { st[i]->update_every = update_every; for (j = 0; j < DIMS; ++j) { - rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle); + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0].db_collection_handle); } } @@ -2093,9 +2097,7 @@ int test_dbengine(void) time_start[current_region] += update_every - time_start[current_region] % update_every; time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); - errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); - if (errors) - goto error_out; + errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); current_region = 2; //this is the third region of data update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 1 seconds @@ -2103,7 +2105,7 @@ int test_dbengine(void) for (i = 0 ; i < CHARTS ; ++i) { st[i]->update_every = update_every; for (j = 0; j < DIMS; ++j) { - rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle); + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0].db_collection_handle); } } @@ -2112,26 +2114,22 @@ int test_dbengine(void) time_start[current_region] += update_every - time_start[current_region] % update_every; time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); - errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); - if (errors) - goto error_out; + errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); for (current_region = 0 ; current_region < REGIONS ; ++current_region) { - errors = test_dbengine_check_rrdr(st, rd, current_region, time_start[current_region], time_end[current_region]); - if (errors) - goto error_out; + errors += test_dbengine_check_rrdr(st, rd, current_region, time_start[current_region], time_end[current_region]); } current_region = 1; update_every = REGION_UPDATE_EVERY[current_region]; // use the maximum update_every = 3 - errors = 0; long points = (time_end[REGIONS - 1] - time_start[0]) / update_every; // cover all time regions with RRDR long point_offset = (time_start[current_region] - time_start[0]) / update_every; for (i = 0 ; i < CHARTS ; ++i) { ONEWAYALLOC *owa = onewayalloc_create(0); RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start[0] + update_every, time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0, - RRDR_OPTION_NATURAL_POINTS, NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST); + RRDR_OPTION_NATURAL_POINTS, NULL, NULL, 0, 0, + QUERY_SOURCE_UNITTEST, STORAGE_PRIORITY_NORMAL); if (!r) { fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", rrdset_name(st[i])); @@ -2180,7 +2178,7 @@ int test_dbengine(void) } onewayalloc_destroy(owa); } -error_out: + rrd_wrlock(); rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance); rrdhost_delete_charts(host); @@ -2269,7 +2267,7 @@ static void generate_dbengine_chart(void *arg) thread_info->time_max = time_current; } for (j = 0; j < DSET_DIMS; ++j) { - rrdeng_store_metric_finalize((rd[j])->tiers[0]->db_collection_handle); + rrdeng_store_metric_finalize((rd[j])->tiers[0].db_collection_handle); } } @@ -2329,7 +2327,7 @@ void generate_dbengine_dataset(unsigned history_seconds) } freez(thread_info); rrd_wrlock(); - rrdhost_free(host, 1); + rrdhost_free___while_having_rrd_wrlock(host, true); rrd_unlock(); } @@ -2389,13 +2387,13 @@ static void query_dbengine_chart(void *arg) time_before = MIN(time_after + duration, time_max); /* up to 1 hour queries */ } - rd->tiers[0]->query_ops->init(rd->tiers[0]->db_metric_handle, &handle, time_after, time_before); + rd->tiers[0].query_ops->init(rd->tiers[0].db_metric_handle, &handle, time_after, time_before, STORAGE_PRIORITY_NORMAL); ++thread_info->queries_nr; for (time_now = time_after ; time_now <= time_before ; time_now += update_every) { generatedv = generate_dbengine_chart_value(i, j, time_now); expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE) generatedv, SN_DEFAULT_FLAGS)); - if (unlikely(rd->tiers[0]->query_ops->is_finished(&handle))) { + if (unlikely(rd->tiers[0].query_ops->is_finished(&handle))) { if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT ", found data gap, ### E R R O R ###\n", @@ -2405,10 +2403,10 @@ static void query_dbengine_chart(void *arg) break; } - STORAGE_POINT sp = rd->tiers[0]->query_ops->next_metric(&handle); + STORAGE_POINT sp = rd->tiers[0].query_ops->next_metric(&handle); value = sp.sum; - time_retrieved = sp.start_time; - end_time = sp.end_time; + time_retrieved = sp.start_time_s; + end_time = sp.end_time_s; if (!netdata_double_isnumber(value)) { if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ @@ -2443,7 +2441,7 @@ static void query_dbengine_chart(void *arg) } } } - rd->tiers[0]->query_ops->finalize(&handle); + rd->tiers[0].query_ops->finalize(&handle); } while(!thread_info->done); if(value_errors) -- cgit v1.2.3