summaryrefslogtreecommitdiffstats
path: root/daemon
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2023-02-06 16:11:30 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2023-02-06 16:11:30 +0000
commitaa2fe8ccbfcb117efa207d10229eeeac5d0f97c7 (patch)
tree941cbdd387b41c1a81587c20a6df9f0e5e0ff7ab /daemon
parentAdding upstream version 1.37.1. (diff)
downloadnetdata-aa2fe8ccbfcb117efa207d10229eeeac5d0f97c7.tar.xz
netdata-aa2fe8ccbfcb117efa207d10229eeeac5d0f97c7.zip
Adding upstream version 1.38.0.upstream/1.38.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemon')
-rw-r--r--daemon/README.md43
-rw-r--r--daemon/analytics.c24
-rw-r--r--daemon/commands.c28
-rw-r--r--daemon/commands.h1
-rw-r--r--daemon/common.c35
-rw-r--r--daemon/common.h3
-rw-r--r--daemon/config/README.md70
-rw-r--r--daemon/event_loop.c59
-rw-r--r--daemon/event_loop.h53
-rw-r--r--daemon/global_statistics.c1946
-rw-r--r--daemon/global_statistics.h29
-rw-r--r--daemon/main.c608
-rw-r--r--daemon/main.h31
-rw-r--r--daemon/service.c38
-rw-r--r--daemon/static_threads.c51
-rw-r--r--daemon/static_threads_freebsd.c2
-rw-r--r--daemon/static_threads_linux.c8
-rw-r--r--daemon/static_threads_macos.c2
-rwxr-xr-xdaemon/system-info.sh5
-rw-r--r--daemon/unit_test.c96
20 files changed, 2654 insertions, 478 deletions
diff --git a/daemon/README.md b/daemon/README.md
index c5951c694..7a17506bb 100644
--- a/daemon/README.md
+++ b/daemon/README.md
@@ -1,7 +1,11 @@
<!--
title: "Netdata daemon"
-date: 2020-04-29
-custom_edit_url: https://github.com/netdata/netdata/edit/master/daemon/README.md
+date: "2020-04-29"
+custom_edit_url: "https://github.com/netdata/netdata/edit/master/daemon/README.md"
+sidebar_label: "Netdata daemon"
+learn_status: "Published"
+learn_topic_type: "References"
+learn_rel_path: "References/Configuration"
-->
# Netdata daemon
@@ -11,7 +15,7 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/daemon/README.md
- You can start Netdata by executing it with `/usr/sbin/netdata` (the installer will also start it).
- You can stop Netdata by killing it with `killall netdata`. You can stop and start Netdata at any point. When
- exiting, the [database engine](/database/engine/README.md) saves metrics to `/var/cache/netdata/dbengine/` so that
+ exiting, the [database engine](https://github.com/netdata/netdata/blob/master/database/engine/README.md) saves metrics to `/var/cache/netdata/dbengine/` so that
it can continue when started again.
Access to the web site, for all graphs, is by default on port `19999`, so go to:
@@ -202,29 +206,26 @@ The command line options of the Netdata 1.10.0 version are the following:
- USR2 Reload health configuration.
```
-You can send commands during runtime via [netdatacli](/cli/README.md).
+You can send commands during runtime via [netdatacli](https://github.com/netdata/netdata/blob/master/cli/README.md).
## Log files
-Netdata uses 3 log files:
+Netdata uses 4 log files:
1. `error.log`
-2. `access.log`
-3. `debug.log`
+2. `collector.log`
+3. `access.log`
+4. `debug.log`
-Any of them can be disabled by setting it to `/dev/null` or `none` in `netdata.conf`. By default `error.log` and
-`access.log` are enabled. `debug.log` is only enabled if debugging/tracing is also enabled (Netdata needs to be compiled
-with debugging enabled).
+Any of them can be disabled by setting it to `/dev/null` or `none` in `netdata.conf`. By default `error.log`,
+`collector.log`, and `access.log` are enabled. `debug.log` is only enabled if debugging/tracing is also enabled
+(Netdata needs to be compiled with debugging enabled).
Log files are stored in `/var/log/netdata/` by default.
### error.log
-The `error.log` is the `stderr` of the `netdata` daemon and all external plugins
-run by `netdata`.
-
-So if any process, in the Netdata process tree, writes anything to its standard error,
-it will appear in `error.log`.
+The `error.log` is the `stderr` of the `netdata` daemon .
For most Netdata programs (including standard external plugins shipped by netdata), the following lines may appear:
@@ -239,6 +240,16 @@ program continues to run.
When a Netdata program cannot run at all, a `FATAL` line is logged.
+### collector.log
+
+The `collector.log` is the `stderr` of all [collectors](https://github.com/netdata/netdata/blob/master/collectors/COLLECTORS.md)
+ run by `netdata`.
+
+So if any process, in the Netdata process tree, writes anything to its standard error,
+it will appear in `collector.log`.
+
+Data stored inside this file follows pattern already described for `error.log`.
+
### access.log
The `access.log` logs web requests. The format is:
@@ -361,7 +372,7 @@ all programs), edit `netdata.conf` and set:
process nice level = -1
```
-then execute this to [restart Netdata](/docs/configure/start-stop-restart.md):
+then execute this to [restart Netdata](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md):
```sh
sudo systemctl restart netdata
diff --git a/daemon/analytics.c b/daemon/analytics.c
index 3d0e514d6..a2f52bc8f 100644
--- a/daemon/analytics.c
+++ b/daemon/analytics.c
@@ -223,9 +223,7 @@ void analytics_mirrored_hosts(void)
if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))
continue;
- netdata_mutex_lock(&host->receiver_lock);
- ((host->receiver || host == localhost) ? reachable++ : unreachable++);
- netdata_mutex_unlock(&host->receiver_lock);
+ ((host == localhost || !rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)) ? reachable++ : unreachable++);
count++;
}
@@ -243,7 +241,7 @@ void analytics_exporters(void)
{
//when no exporters are available, an empty string will be sent
//decide if something else is more suitable (but probably not null)
- BUFFER *bi = buffer_create(1000);
+ BUFFER *bi = buffer_create(1000, NULL);
analytics_exporting_connectors(bi);
analytics_set_data_str(&analytics_data.netdata_exporting_connectors, (char *)buffer_tostring(bi));
buffer_free(bi);
@@ -280,7 +278,7 @@ void analytics_collectors(void)
RRDSET *st;
DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED);
char name[500];
- BUFFER *bt = buffer_create(1000);
+ BUFFER *bt = buffer_create(1000, NULL);
rrdset_foreach_read(st, localhost) {
if(!rrdset_is_available_for_viewers(st))
@@ -335,7 +333,7 @@ void analytics_alarms_notifications(void)
debug(D_ANALYTICS, "Executing %s", script);
- BUFFER *b = buffer_create(1000);
+ BUFFER *b = buffer_create(1000, NULL);
int cnt = 0;
FILE *fp_child_input;
FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input);
@@ -382,7 +380,7 @@ void analytics_get_install_type(void)
*/
void analytics_https(void)
{
- BUFFER *b = buffer_create(30);
+ BUFFER *b = buffer_create(30, NULL);
#ifdef ENABLE_HTTPS
analytics_exporting_connectors_ssl(b);
buffer_strcat(b, netdata_ssl_client_ctx && rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED) && localhost->sender->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE ? "streaming|" : "|");
@@ -554,7 +552,7 @@ void analytics_gather_mutable_meta_data(void)
snprintfz(b, 6, "%d", analytics_data.dashboard_hits);
analytics_set_data(&analytics_data.netdata_dashboard_used, b);
- snprintfz(b, 6, "%zu", rrd_hosts_available);
+ snprintfz(b, 6, "%zu", rrdhost_hosts_available());
analytics_set_data(&analytics_data.netdata_config_hosts_available, b);
}
}
@@ -587,12 +585,12 @@ void *analytics_main(void *ptr)
debug(D_ANALYTICS, "Analytics thread starts");
//first delay after agent start
- while (!netdata_exit && likely(sec <= ANALYTICS_INIT_SLEEP_SEC)) {
+ while (service_running(SERVICE_ANALYTICS) && likely(sec <= ANALYTICS_INIT_SLEEP_SEC)) {
heartbeat_next(&hb, step_ut);
sec++;
}
- if (unlikely(netdata_exit))
+ if (unlikely(!service_running(SERVICE_ANALYTICS)))
goto cleanup;
analytics_gather_immutable_meta_data();
@@ -605,7 +603,7 @@ void *analytics_main(void *ptr)
heartbeat_next(&hb, step_ut * 2);
sec += 2;
- if (unlikely(netdata_exit))
+ if (unlikely(!service_running(SERVICE_ANALYTICS)))
break;
if (likely(sec < ANALYTICS_HEARTBEAT))
@@ -677,7 +675,7 @@ void set_late_global_environment()
analytics_set_data_str(&analytics_data.netdata_config_release_channel, (char *)get_release_channel());
{
- BUFFER *bi = buffer_create(1000);
+ BUFFER *bi = buffer_create(1000, NULL);
analytics_build_info(bi);
analytics_set_data_str(&analytics_data.netdata_buildinfo, (char *)buffer_tostring(bi));
buffer_free(bi);
@@ -838,7 +836,7 @@ void set_global_environment()
setenv("NETDATA_HOST_PREFIX", netdata_configured_host_prefix, 1);
{
- BUFFER *user_plugins_dirs = buffer_create(FILENAME_MAX);
+ BUFFER *user_plugins_dirs = buffer_create(FILENAME_MAX, NULL);
for (size_t i = 1; i < PLUGINSD_MAX_DIRECTORIES && plugin_directories[i]; i++) {
if (i > 1)
diff --git a/daemon/commands.c b/daemon/commands.c
index 6288ee59b..377a4002f 100644
--- a/daemon/commands.c
+++ b/daemon/commands.c
@@ -46,6 +46,7 @@ static cmd_status_t cmd_read_config_execute(char *args, char **message);
static cmd_status_t cmd_write_config_execute(char *args, char **message);
static cmd_status_t cmd_ping_execute(char *args, char **message);
static cmd_status_t cmd_aclk_state(char *args, char **message);
+static cmd_status_t cmd_version(char *args, char **message);
static command_info_t command_info_array[] = {
{"help", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY}, // show help menu
@@ -59,7 +60,8 @@ static command_info_t command_info_array[] = {
{"read-config", cmd_read_config_execute, CMD_TYPE_CONCURRENT},
{"write-config", cmd_write_config_execute, CMD_TYPE_ORTHOGONAL},
{"ping", cmd_ping_execute, CMD_TYPE_ORTHOGONAL},
- {"aclk-state", cmd_aclk_state, CMD_TYPE_ORTHOGONAL}
+ {"aclk-state", cmd_aclk_state, CMD_TYPE_ORTHOGONAL},
+ {"version", cmd_version, CMD_TYPE_ORTHOGONAL}
};
/* Mutexes for commands of type CMD_TYPE_ORTHOGONAL */
@@ -124,7 +126,9 @@ static cmd_status_t cmd_help_execute(char *args, char **message)
"ping\n"
" Return with 'pong' if agent is alive.\n"
"aclk-state [json]\n"
- " Returns current state of ACLK and Cloud connection. (optionally in json)\n",
+ " Returns current state of ACLK and Cloud connection. (optionally in json).\n"
+ "version\n"
+ " Returns the netdata version.\n",
MAX_COMMAND_LENGTH - 1);
return CMD_STATUS_SUCCESS;
}
@@ -216,7 +220,7 @@ static cmd_status_t cmd_reload_labels_execute(char *args, char **message)
info("COMMAND: reloading host labels.");
reload_host_labels();
- BUFFER *wb = buffer_create(10);
+ BUFFER *wb = buffer_create(10, NULL);
rrdlabels_log_to_buffer(localhost->rrdlabels, wb);
(*message)=strdupz(buffer_tostring(wb));
buffer_free(wb);
@@ -314,6 +318,18 @@ static cmd_status_t cmd_aclk_state(char *args, char **message)
return CMD_STATUS_SUCCESS;
}
+static cmd_status_t cmd_version(char *args, char **message)
+{
+ (void)args;
+
+ char version[MAX_COMMAND_LENGTH];
+ snprintfz(version, MAX_COMMAND_LENGTH -1, "%s %s", program_name, program_version);
+
+ *message = strdupz(version);
+
+ return CMD_STATUS_SUCCESS;
+}
+
static void cmd_lock_exclusive(unsigned index)
{
(void)index;
@@ -454,9 +470,13 @@ static void after_schedule_command(uv_work_t *req, int status)
static void schedule_command(uv_work_t *req)
{
- struct command_context *cmd_ctx = req->data;
+ register_libuv_worker_jobs();
+ worker_is_busy(UV_EVENT_SCHEDULE_CMD);
+ struct command_context *cmd_ctx = req->data;
cmd_ctx->status = execute_command(cmd_ctx->idx, cmd_ctx->args, &cmd_ctx->message);
+
+ worker_is_idle();
}
/* This will alter the state of the command_info_array.cmd_str
diff --git a/daemon/commands.h b/daemon/commands.h
index f0e38ce93..78bdcc779 100644
--- a/daemon/commands.h
+++ b/daemon/commands.h
@@ -25,6 +25,7 @@ typedef enum cmd {
CMD_WRITE_CONFIG,
CMD_PING,
CMD_ACLK_STATE,
+ CMD_VERSION,
CMD_TOTAL_COMMANDS
} cmd_t;
diff --git a/daemon/common.c b/daemon/common.c
index 85d638631..6eae07cff 100644
--- a/daemon/common.c
+++ b/daemon/common.c
@@ -19,3 +19,38 @@ int32_t netdata_configured_utc_offset = 0;
int netdata_ready;
int netdata_cloud_setting;
+long get_netdata_cpus(void) {
+ static long processors = 0;
+
+ if(processors)
+ return processors;
+
+ long cores_proc_stat = get_system_cpus_with_cache(false, true);
+ long cores_cpuset_v1 = (long)read_cpuset_cpus("/sys/fs/cgroup/cpuset/cpuset.cpus", cores_proc_stat);
+ long cores_cpuset_v2 = (long)read_cpuset_cpus("/sys/fs/cgroup/cpuset.cpus", cores_proc_stat);
+
+ if(cores_cpuset_v2)
+ processors = cores_cpuset_v2;
+ else if(cores_cpuset_v1)
+ processors = cores_cpuset_v1;
+ else
+ processors = cores_proc_stat;
+
+ long cores_user_configured = config_get_number(CONFIG_SECTION_GLOBAL, "cpu cores", processors);
+
+ errno = 0;
+ internal_error(true,
+ "System CPUs: %ld, ("
+ "system: %ld, cgroups cpuset v1: %ld, cgroups cpuset v2: %ld, netdata.conf: %ld"
+ ")"
+ , processors
+ , cores_proc_stat
+ , cores_cpuset_v1
+ , cores_cpuset_v2
+ , cores_user_configured
+ );
+
+ processors = cores_user_configured;
+
+ return processors;
+}
diff --git a/daemon/common.h b/daemon/common.h
index f3d868661..ca4d5c954 100644
--- a/daemon/common.h
+++ b/daemon/common.h
@@ -4,6 +4,7 @@
#define NETDATA_COMMON_H 1
#include "libnetdata/libnetdata.h"
+#include "event_loop.h"
// ----------------------------------------------------------------------------
// shortcuts for the default netdata configuration
@@ -105,4 +106,6 @@ extern int netdata_anonymous_statistics_enabled;
extern int netdata_ready;
extern int netdata_cloud_setting;
+long get_netdata_cpus(void);
+
#endif /* NETDATA_COMMON_H */
diff --git a/daemon/config/README.md b/daemon/config/README.md
index 7b4d27ecf..4a6d0bb80 100644
--- a/daemon/config/README.md
+++ b/daemon/config/README.md
@@ -1,7 +1,12 @@
<!--
title: "Daemon configuration"
description: "The Netdata Agent's daemon is installed preconfigured to collect thousands of metrics every second, but is highly configurable for real-world workloads."
-custom_edit_url: https://github.com/netdata/netdata/edit/master/daemon/config/README.md
+custom_edit_url: "https://github.com/netdata/netdata/edit/master/daemon/config/README.md"
+sidebar_label: "Daemon"
+learn_status: "Published"
+learn_topic_type: "References"
+learn_rel_path: "References/Configuration"
+learn_doc_purpose: "Explain the daemon options, the log files, the process scheduling, virtual memory, explain how the netdata.conf is used and backlink to the netdata.conf file reference"
-->
# Daemon configuration
@@ -21,24 +26,24 @@ adapt the general behavior of Netdata, in great detail. You can find all these s
accessing the URL `https://netdata.server.hostname:19999/netdata.conf`. For example check the configuration file
of [netdata.firehol.org](http://netdata.firehol.org/netdata.conf). HTTP access to this file is limited by default to
[private IPs](https://en.wikipedia.org/wiki/Private_network), via
-the [web server access lists](/web/server/README.md#access-lists).
+the [web server access lists](https://github.com/netdata/netdata/blob/master/web/server/README.md#access-lists).
`netdata.conf` has sections stated with `[section]`. You will see the following sections:
-1. `[global]` to [configure](#global-section-options) the [Netdata daemon](/daemon/README.md).
+1. `[global]` to [configure](#global-section-options) the [Netdata daemon](https://github.com/netdata/netdata/blob/master/daemon/README.md).
2. `[db]` to [configure](#db-section-options) the database of Netdata.
3. `[directories]` to [configure](#directories-section-options) the directories used by Netdata.
4. `[logs]` to [configure](#logs-section-options) the Netdata logging.
5. `[environment variables]` to [configure](#environment-variables-section-options) the environment variables used
Netdata.
-6. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](/daemon/README.md) SQLite settings.
-7. `[ml]` to configure settings for [machine learning](/ml/README.md).
-8. `[health]` to [configure](#health-section-options) general settings for [health monitoring](/health/README.md).
-9. `[web]` to [configure the web server](/web/server/README.md).
-10. `[registry]` for the [Netdata registry](/registry/README.md).
-11. `[global statistics]` for the [Netdata registry](/registry/README.md).
-12. `[statsd]` for the general settings of the [stats.d.plugin](/collectors/statsd.plugin/README.md).
-13. `[plugins]` to [configure](#plugins-section-options) which [collectors](/collectors/README.md) to use and PATH
+6. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](https://github.com/netdata/netdata/blob/master/daemon/README.md) SQLite settings.
+7. `[ml]` to configure settings for [machine learning](https://github.com/netdata/netdata/blob/master/ml/README.md).
+8. `[health]` to [configure](#health-section-options) general settings for [health monitoring](https://github.com/netdata/netdata/blob/master/health/README.md).
+9. `[web]` to [configure the web server](https://github.com/netdata/netdata/blob/master/web/server/README.md).
+10. `[registry]` for the [Netdata registry](https://github.com/netdata/netdata/blob/master/registry/README.md).
+11. `[global statistics]` for the [Netdata registry](https://github.com/netdata/netdata/blob/master/registry/README.md).
+12. `[statsd]` for the general settings of the [stats.d.plugin](https://github.com/netdata/netdata/blob/master/collectors/statsd.plugin/README.md).
+13. `[plugins]` to [configure](#plugins-section-options) which [collectors](https://github.com/netdata/netdata/blob/master/collectors/README.md) to use and PATH
settings.
14. `[plugin:NAME]` sections for each collector plugin, under the
comment [Per plugin configuration](#per-plugin-configuration).
@@ -49,7 +54,7 @@ comment on settings it does not currently use.
## Applying changes
-After `netdata.conf` has been modified, Netdata needs to be [restarted](/docs/configure/start-stop-restart.md) for
+After `netdata.conf` has been modified, Netdata needs to be [restarted](https://github.com/netdata/netdata/blob/master/docs/configure/start-stop-restart.md) for
changes to apply:
```bash
@@ -70,10 +75,10 @@ Please note that your data history will be lost if you have modified `history` p
| setting | default | info |
|:-------------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| process scheduling policy | `keep` | See [Netdata process scheduling policy](/daemon/README.md#netdata-process-scheduling-policy) |
+| process scheduling policy | `keep` | See [Netdata process scheduling policy](https://github.com/netdata/netdata/blob/master/daemon/README.md#netdata-process-scheduling-policy) |
| OOM score | `0` | |
-| glibc malloc arena max for plugins | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). |
-| glibc malloc arena max for Netdata | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). |
+| glibc malloc arena max for plugins | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
+| glibc malloc arena max for Netdata | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/daemon/README.md#virtual-memory). |
| hostname | auto-detected | The hostname of the computer running Netdata. |
| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). |
| timezone | auto-detected | The timezone retrieved from the environment variable |
@@ -84,22 +89,22 @@ Please note that your data history will be lost if you have modified `history` p
| setting | default | info |
|:---------------------------------------------:|:----------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`. <br />`save`: Netdata will save its round robin database on exit and load it on startup. <br />`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`). <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. |
-| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](/database/README.md) for more information. |
-| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. |
+| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`. <br />`save`: Netdata will save its round robin database on exit and load it on startup. <br />`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`). <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`alloc`: Similar to `ram`, but can significantly reduce memory usage, when combined with a low retention and does not support KSM. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. Not to be used together with streaming. |
+| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](https://github.com/netdata/netdata/blob/master/database/README.md) for more information. |
+| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. |
| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. |
| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier. <br /> `N belongs to [1..4]` ||
| dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). |
| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. |
| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well. <br /> `N belongs to [1..4]` |
-| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](/database/engine/README.md#tiering). |
+| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/database/engine/README.md#tiering). |
| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`. <br /> `N belongs to [1..4]` |
| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier. <br /> `New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window). <br /> `none`: No back filling is applied. <br /> `N belongs to [1..4]` |
-| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm) |
-| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions |
+| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](https://github.com/netdata/netdata/blob/master/database/README.md#ksm) |
+| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions |
| gap when lost iterations above | `1` | |
| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. |
-| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions |
+| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions |
| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. |
| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. |
@@ -116,7 +121,7 @@ The multiplication of all the **enabled** tiers `dbengine tier N update every i
|:-------------------:|:------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| config | `/etc/netdata` | The directory configuration files are kept. |
| stock config | `/usr/lib/netdata/conf.d` | |
-| log | `/var/log/netdata` | The directory in which the [log files](/daemon/README.md#log-files) are kept. |
+| log | `/var/log/netdata` | The directory in which the [log files](https://github.com/netdata/netdata/blob/master/daemon/README.md#log-files) are kept. |
| web | `/usr/share/netdata/web` | The directory the web static files are kept. |
| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. |
| lib | `/var/lib/netdata` | Contains the alarm log and the Netdata instance GUID. |
@@ -125,14 +130,14 @@ The multiplication of all the **enabled** tiers `dbengine tier N update every i
| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. |
| health config | `/etc/netdata/health.d` | The directory containing the user alarm configuration files, to override the stock configurations |
| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alarm configuration files for each collector |
-| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](/registry/README.md) database and GUID that uniquely identifies each Netdata Agent |
+| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](https://github.com/netdata/netdata/blob/master/registry/README.md) database and GUID that uniquely identifies each Netdata Agent |
### [logs] section options
| setting | default | info |
|:----------------------------------:|:-----------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
-| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](/daemon/README.md#debugging). |
-| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/daemon/README.md#debugging). |
+| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). |
+| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/daemon/README.md#debugging). |
| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. |
| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. |
| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. |
@@ -163,9 +168,9 @@ The multiplication of all the **enabled** tiers `dbengine tier N update every i
This section controls the general behavior of the health monitoring capabilities of Netdata.
Specific alarms are configured in per-collector config files under the `health.d` directory. For more info, see [health
-monitoring](/health/README.md).
+monitoring](https://github.com/netdata/netdata/blob/master/health/README.md).
-[Alarm notifications](/health/notifications/README.md) are configured in `health_alarm_notify.conf`.
+[Alarm notifications](https://github.com/netdata/netdata/blob/master/health/notifications/README.md) are configured in `health_alarm_notify.conf`.
| setting | default | info |
|:----------------------------------------------:|:------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -175,10 +180,11 @@ monitoring](/health/README.md).
| run at least every seconds | `10` | Controls how often all alarm conditions should be evaluated. |
| postpone alarms during hibernation for seconds | `60` | Prevents false alarms. May need to be increased if you get alarms during hibernation. |
| rotate log every lines | 2000 | Controls the number of alarm log entries stored in `<lib directory>/health-log.db`, where `<lib directory>` is the one configured in the [\[global\] section](#global-section-options) |
+| enabled alarms | * | Defines which alarms to load from both user and stock directories. This is a [simple pattern](https://github.com/netdata/netdata/blob/master/libnetdata/simple_pattern/README.md) list of alarm or template names. Can be used to disable specific alarms. For example, `enabled alarms = !oom_kill *` will load all alarms except `oom_kill`. |
### [web] section options
-Refer to the [web server documentation](/web/server/README.md)
+Refer to the [web server documentation](https://github.com/netdata/netdata/blob/master/web/server/README.md)
### [plugins] section options
@@ -198,7 +204,7 @@ Additionally, there will be the following options:
### [registry] section options
To understand what this section is and how it should be configured, please refer to
-the [registry documentation](/registry/README.md).
+the [registry documentation](https://github.com/netdata/netdata/blob/master/registry/README.md).
## Per-plugin configuration
@@ -206,7 +212,7 @@ The configuration options for plugins appear in sections following the pattern `
### Internal plugins
-Most internal plugins will provide additional options. Check [Internal Plugins](/collectors/README.md) for more
+Most internal plugins will provide additional options. Check [Internal Plugins](https://github.com/netdata/netdata/blob/master/collectors/README.md) for more
information.
Please note, that by default Netdata will enable monitoring metrics for disks, memory, and network only when they are
@@ -222,7 +228,7 @@ External plugins will have only 2 options at `netdata.conf`:
| setting | default | info |
|:---------------:|:--------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------|
-| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](/docs/guides/configure/performance.md). |
+| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). |
| command options | - | Additional command line options to pass to the plugin. | |
External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their
diff --git a/daemon/event_loop.c b/daemon/event_loop.c
new file mode 100644
index 000000000..6f09cd654
--- /dev/null
+++ b/daemon/event_loop.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include <daemon/main.h>
+#include "event_loop.h"
+
+// Register workers
+void register_libuv_worker_jobs() {
+ static __thread bool registered = false;
+
+ if(likely(registered))
+ return;
+
+ registered = true;
+
+ worker_register("LIBUV");
+
+ // generic
+ worker_register_job_name(UV_EVENT_WORKER_INIT, "worker init");
+
+ // query related
+ worker_register_job_name(UV_EVENT_DBENGINE_QUERY, "query");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP, "extent cache");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_MMAP, "extent mmap");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION, "extent decompression");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP, "page lookup");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION, "page populate");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_ALLOCATION, "page allocate");
+
+ // flushing related
+ worker_register_job_name(UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE, "flush main");
+ worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_WRITE, "extent write");
+ worker_register_job_name(UV_EVENT_DBENGINE_FLUSHED_TO_OPEN, "flushed to open");
+
+ // datafile full
+ worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT, "jv2 index wait");
+ worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX, "jv2 indexing");
+
+ // db rotation related
+ worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT, "datafile delete wait");
+ worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE, "datafile deletion");
+ worker_register_job_name(UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, "find rotated metrics");
+ worker_register_job_name(UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, "find remaining retention");
+ worker_register_job_name(UV_EVENT_DBENGINE_POPULATE_MRG, "update retention");
+
+ // other dbengine events
+ worker_register_job_name(UV_EVENT_DBENGINE_EVICT_MAIN_CACHE, "evict main");
+ worker_register_job_name(UV_EVENT_DBENGINE_BUFFERS_CLEANUP, "dbengine buffers cleanup");
+ worker_register_job_name(UV_EVENT_DBENGINE_QUIESCE, "dbengine quiesce");
+ worker_register_job_name(UV_EVENT_DBENGINE_SHUTDOWN, "dbengine shutdown");
+
+ // metadata
+ worker_register_job_name(UV_EVENT_METADATA_STORE, "metadata store host");
+ worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup");
+
+ // netdatacli
+ worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command");
+
+ uv_thread_set_name_np(pthread_self(), "LIBUV_WORKER");
+}
diff --git a/daemon/event_loop.h b/daemon/event_loop.h
new file mode 100644
index 000000000..0d3cc0d07
--- /dev/null
+++ b/daemon/event_loop.h
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#ifndef NETDATA_EVENT_LOOP_H
+#define NETDATA_EVENT_LOOP_H
+
+enum event_loop_job {
+ UV_EVENT_JOB_NONE = 0,
+
+ // generic
+ UV_EVENT_WORKER_INIT,
+
+ // query related
+ UV_EVENT_DBENGINE_QUERY,
+ UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP,
+ UV_EVENT_DBENGINE_EXTENT_MMAP,
+ UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION,
+ UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP,
+ UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION,
+ UV_EVENT_DBENGINE_EXTENT_PAGE_ALLOCATION,
+
+ // flushing related
+ UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE,
+ UV_EVENT_DBENGINE_EXTENT_WRITE,
+ UV_EVENT_DBENGINE_FLUSHED_TO_OPEN,
+
+ // datafile full
+ UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT,
+ UV_EVENT_DBENGINE_JOURNAL_INDEX,
+
+ // db rotation related
+ UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT,
+ UV_EVENT_DBENGINE_DATAFILE_DELETE,
+ UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, // find the metrics that are rotated
+ UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, // find their remaining retention
+ UV_EVENT_DBENGINE_POPULATE_MRG, // update mrg
+
+ // other dbengine events
+ UV_EVENT_DBENGINE_EVICT_MAIN_CACHE,
+ UV_EVENT_DBENGINE_BUFFERS_CLEANUP,
+ UV_EVENT_DBENGINE_QUIESCE,
+ UV_EVENT_DBENGINE_SHUTDOWN,
+
+ // metadata
+ UV_EVENT_METADATA_STORE,
+ UV_EVENT_METADATA_CLEANUP,
+
+ // netdatacli
+ UV_EVENT_SCHEDULE_CMD,
+};
+
+void register_libuv_worker_jobs();
+
+#endif //NETDATA_EVENT_LOOP_H
diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c
index a4e9d321f..0dc3ee645 100644
--- a/daemon/global_statistics.c
+++ b/daemon/global_statistics.c
@@ -20,6 +20,11 @@
bool global_statistics_enabled = true;
+struct netdata_buffers_statistics netdata_buffers_statistics = {};
+
+static size_t dbengine_total_memory = 0;
+size_t rrddim_db_memory_size = 0;
+
static struct global_statistics {
uint16_t connected_clients;
@@ -52,6 +57,7 @@ static struct global_statistics {
uint64_t ml_queries_made;
uint64_t ml_db_points_read;
uint64_t ml_result_points_generated;
+ uint64_t ml_models_consulted;
uint64_t exporters_queries_made;
uint64_t exporters_db_points_read;
@@ -88,6 +94,10 @@ void global_statistics_ml_query_completed(size_t points_read) {
__atomic_fetch_add(&global_statistics.ml_db_points_read, points_read, __ATOMIC_RELAXED);
}
+void global_statistics_ml_models_consulted(size_t models_consulted) {
+ __atomic_fetch_add(&global_statistics.ml_models_consulted, models_consulted, __ATOMIC_RELAXED);
+}
+
void global_statistics_exporters_query_completed(size_t points_read) {
__atomic_fetch_add(&global_statistics.exporters_queries_made, 1, __ATOMIC_RELAXED);
__atomic_fetch_add(&global_statistics.exporters_db_points_read, points_read, __ATOMIC_RELAXED);
@@ -193,6 +203,7 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t
gs->ml_queries_made = __atomic_load_n(&global_statistics.ml_queries_made, __ATOMIC_RELAXED);
gs->ml_db_points_read = __atomic_load_n(&global_statistics.ml_db_points_read, __ATOMIC_RELAXED);
gs->ml_result_points_generated = __atomic_load_n(&global_statistics.ml_result_points_generated, __ATOMIC_RELAXED);
+ gs->ml_models_consulted = __atomic_load_n(&global_statistics.ml_models_consulted, __ATOMIC_RELAXED);
gs->exporters_queries_made = __atomic_load_n(&global_statistics.exporters_queries_made, __ATOMIC_RELAXED);
gs->exporters_db_points_read = __atomic_load_n(&global_statistics.exporters_db_points_read, __ATOMIC_RELAXED);
@@ -208,6 +219,9 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t
}
}
+#define dictionary_stats_memory_total(stats) \
+ ((stats).memory.dict + (stats).memory.values + (stats).memory.index)
+
static void global_statistics_charts(void) {
static unsigned long long old_web_requests = 0,
old_web_usec = 0,
@@ -264,23 +278,182 @@ static void global_statistics_charts(void) {
// ----------------------------------------------------------------
{
- static RRDSET *st_uptime = NULL;
- static RRDDIM *rd_uptime = NULL;
+ static RRDSET *st_memory = NULL;
+ static RRDDIM *rd_database = NULL;
+ static RRDDIM *rd_collectors = NULL;
+ static RRDDIM *rd_hosts = NULL;
+ static RRDDIM *rd_rrd = NULL;
+ static RRDDIM *rd_contexts = NULL;
+ static RRDDIM *rd_health = NULL;
+ static RRDDIM *rd_functions = NULL;
+ static RRDDIM *rd_labels = NULL;
+ static RRDDIM *rd_strings = NULL;
+ static RRDDIM *rd_streaming = NULL;
+ static RRDDIM *rd_replication = NULL;
+ static RRDDIM *rd_buffers = NULL;
+ static RRDDIM *rd_workers = NULL;
+ static RRDDIM *rd_aral = NULL;
+ static RRDDIM *rd_judy = NULL;
+ static RRDDIM *rd_other = NULL;
+
+ if (unlikely(!st_memory)) {
+ st_memory = rrdset_create_localhost(
+ "netdata",
+ "memory",
+ NULL,
+ "netdata",
+ NULL,
+ "Netdata Memory",
+ "bytes",
+ "netdata",
+ "stats",
+ 130100,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rd_database = rrddim_add(st_memory, "db", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_collectors = rrddim_add(st_memory, "collectors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_hosts = rrddim_add(st_memory, "hosts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_rrd = rrddim_add(st_memory, "rrdset rrddim", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_contexts = rrddim_add(st_memory, "contexts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_health = rrddim_add(st_memory, "health", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_functions = rrddim_add(st_memory, "functions", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_labels = rrddim_add(st_memory, "labels", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_strings = rrddim_add(st_memory, "strings", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_streaming = rrddim_add(st_memory, "streaming", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_replication = rrddim_add(st_memory, "replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers = rrddim_add(st_memory, "buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_workers = rrddim_add(st_memory, "workers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_aral = rrddim_add(st_memory, "aral", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_judy = rrddim_add(st_memory, "judy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_other = rrddim_add(st_memory, "other", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
- if (unlikely(!st_uptime)) {
- st_uptime = rrdset_create_localhost(
+ size_t buffers =
+ netdata_buffers_statistics.query_targets_size +
+ netdata_buffers_statistics.rrdset_done_rda_size +
+ netdata_buffers_statistics.buffers_aclk +
+ netdata_buffers_statistics.buffers_api +
+ netdata_buffers_statistics.buffers_functions +
+ netdata_buffers_statistics.buffers_sqlite +
+ netdata_buffers_statistics.buffers_exporters +
+ netdata_buffers_statistics.buffers_health +
+ netdata_buffers_statistics.buffers_streaming +
+ netdata_buffers_statistics.cbuffers_streaming +
+ netdata_buffers_statistics.buffers_web +
+ replication_allocated_buffers() +
+ aral_by_size_overhead() +
+ judy_aral_overhead();
+
+ size_t strings = 0;
+ string_statistics(NULL, NULL, NULL, NULL, NULL, &strings, NULL, NULL);
+
+ rrddim_set_by_pointer(st_memory, rd_database, (collected_number)dbengine_total_memory + (collected_number)rrddim_db_memory_size);
+ rrddim_set_by_pointer(st_memory, rd_collectors, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_collectors));
+ rrddim_set_by_pointer(st_memory, rd_hosts, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdhost) + (collected_number)netdata_buffers_statistics.rrdhost_allocations_size);
+ rrddim_set_by_pointer(st_memory, rd_rrd, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdset_rrddim));
+ rrddim_set_by_pointer(st_memory, rd_contexts, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdcontext));
+ rrddim_set_by_pointer(st_memory, rd_health, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdhealth));
+ rrddim_set_by_pointer(st_memory, rd_functions, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_functions));
+ rrddim_set_by_pointer(st_memory, rd_labels, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdlabels));
+ rrddim_set_by_pointer(st_memory, rd_strings, (collected_number)strings);
+ rrddim_set_by_pointer(st_memory, rd_streaming, (collected_number)netdata_buffers_statistics.rrdhost_senders + (collected_number)netdata_buffers_statistics.rrdhost_receivers);
+ rrddim_set_by_pointer(st_memory, rd_replication, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_replication) + (collected_number)replication_allocated_memory());
+ rrddim_set_by_pointer(st_memory, rd_buffers, (collected_number)buffers);
+ rrddim_set_by_pointer(st_memory, rd_workers, (collected_number) workers_allocated_memory());
+ rrddim_set_by_pointer(st_memory, rd_aral, (collected_number) aral_by_size_structures());
+ rrddim_set_by_pointer(st_memory, rd_judy, (collected_number) judy_aral_structures());
+ rrddim_set_by_pointer(st_memory, rd_other, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_other));
+
+ rrdset_done(st_memory);
+ }
+
+ {
+ static RRDSET *st_memory_buffers = NULL;
+ static RRDDIM *rd_queries = NULL;
+ static RRDDIM *rd_collectors = NULL;
+ static RRDDIM *rd_buffers_aclk = NULL;
+ static RRDDIM *rd_buffers_api = NULL;
+ static RRDDIM *rd_buffers_functions = NULL;
+ static RRDDIM *rd_buffers_sqlite = NULL;
+ static RRDDIM *rd_buffers_exporters = NULL;
+ static RRDDIM *rd_buffers_health = NULL;
+ static RRDDIM *rd_buffers_streaming = NULL;
+ static RRDDIM *rd_cbuffers_streaming = NULL;
+ static RRDDIM *rd_buffers_replication = NULL;
+ static RRDDIM *rd_buffers_web = NULL;
+ static RRDDIM *rd_buffers_aral = NULL;
+ static RRDDIM *rd_buffers_judy = NULL;
+
+ if (unlikely(!st_memory_buffers)) {
+ st_memory_buffers = rrdset_create_localhost(
"netdata",
- "uptime",
+ "memory_buffers",
NULL,
"netdata",
NULL,
- "Netdata uptime",
- "seconds",
+ "Netdata Memory Buffers",
+ "bytes",
"netdata",
"stats",
- 130100,
+ 130101,
localhost->rrd_update_every,
- RRDSET_TYPE_LINE);
+ RRDSET_TYPE_STACKED);
+
+ rd_queries = rrddim_add(st_memory_buffers, "queries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_collectors = rrddim_add(st_memory_buffers, "collection", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_aclk = rrddim_add(st_memory_buffers, "aclk", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_api = rrddim_add(st_memory_buffers, "api", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_functions = rrddim_add(st_memory_buffers, "functions", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_sqlite = rrddim_add(st_memory_buffers, "sqlite", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_exporters = rrddim_add(st_memory_buffers, "exporters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_health = rrddim_add(st_memory_buffers, "health", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_streaming = rrddim_add(st_memory_buffers, "streaming", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_cbuffers_streaming = rrddim_add(st_memory_buffers, "streaming cbuf", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_replication = rrddim_add(st_memory_buffers, "replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_web = rrddim_add(st_memory_buffers, "web", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_aral = rrddim_add(st_memory_buffers, "aral", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_buffers_judy = rrddim_add(st_memory_buffers, "judy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+
+ rrddim_set_by_pointer(st_memory_buffers, rd_queries, (collected_number)netdata_buffers_statistics.query_targets_size + (collected_number) onewayalloc_allocated_memory());
+ rrddim_set_by_pointer(st_memory_buffers, rd_collectors, (collected_number)netdata_buffers_statistics.rrdset_done_rda_size);
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_aclk, (collected_number)netdata_buffers_statistics.buffers_aclk);
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_api, (collected_number)netdata_buffers_statistics.buffers_api);
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_functions, (collected_number)netdata_buffers_statistics.buffers_functions);
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_sqlite, (collected_number)netdata_buffers_statistics.buffers_sqlite);
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_exporters, (collected_number)netdata_buffers_statistics.buffers_exporters);
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_health, (collected_number)netdata_buffers_statistics.buffers_health);
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_streaming, (collected_number)netdata_buffers_statistics.buffers_streaming);
+ rrddim_set_by_pointer(st_memory_buffers, rd_cbuffers_streaming, (collected_number)netdata_buffers_statistics.cbuffers_streaming);
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_replication, (collected_number)replication_allocated_buffers());
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_web, (collected_number)netdata_buffers_statistics.buffers_web);
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_aral, (collected_number)aral_by_size_overhead());
+ rrddim_set_by_pointer(st_memory_buffers, rd_buffers_judy, (collected_number)judy_aral_overhead());
+
+ rrdset_done(st_memory_buffers);
+ }
+
+ // ----------------------------------------------------------------
+
+ {
+ static RRDSET *st_uptime = NULL;
+ static RRDDIM *rd_uptime = NULL;
+
+ if (unlikely(!st_uptime)) {
+ st_uptime = rrdset_create_localhost(
+ "netdata",
+ "uptime",
+ NULL,
+ "netdata",
+ NULL,
+ "Netdata uptime",
+ "seconds",
+ "netdata",
+ "stats",
+ 130150,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
rd_uptime = rrddim_add(st_uptime, "uptime", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
}
@@ -653,6 +826,34 @@ static void global_statistics_charts(void) {
rrdset_done(st_points_stored);
}
+
+ {
+ static RRDSET *st = NULL;
+ static RRDDIM *rd = NULL;
+
+ if (unlikely(!st)) {
+ st = rrdset_create_localhost(
+ "netdata" // type
+ , "ml_models_consulted" // id
+ , NULL // name
+ , NETDATA_ML_CHART_FAMILY // family
+ , NULL // context
+ , "KMeans models used for prediction" // title
+ , "models" // units
+ , NETDATA_ML_PLUGIN // plugin
+ , NETDATA_ML_MODULE_DETECTION // module
+ , NETDATA_ML_CHART_PRIO_MACHINE_LEARNING_STATUS // priority
+ , localhost->rrd_update_every // update_every
+ , RRDSET_TYPE_AREA // chart_type
+ );
+
+ rd = rrddim_add(st, "num_models_consulted", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+
+ rrddim_set_by_pointer(st, rd, (collected_number) gs.ml_models_consulted);
+
+ rrdset_done(st);
+ }
}
// ----------------------------------------------------------------------------
@@ -962,9 +1163,1332 @@ static void sqlite3_statistics_charts(void) {
// ----------------------------------------------------------------
}
-static void dbengine_statistics_charts(void) {
#ifdef ENABLE_DBENGINE
+
+struct dbengine2_cache_pointers {
+ RRDSET *st_cache_hit_ratio;
+ RRDDIM *rd_hit_ratio_closest;
+ RRDDIM *rd_hit_ratio_exact;
+
+ RRDSET *st_operations;
+ RRDDIM *rd_searches_closest;
+ RRDDIM *rd_searches_exact;
+ RRDDIM *rd_add_hot;
+ RRDDIM *rd_add_clean;
+ RRDDIM *rd_evictions;
+ RRDDIM *rd_flushes;
+ RRDDIM *rd_acquires;
+ RRDDIM *rd_releases;
+ RRDDIM *rd_acquires_for_deletion;
+
+ RRDSET *st_pgc_memory;
+ RRDDIM *rd_pgc_memory_free;
+ RRDDIM *rd_pgc_memory_clean;
+ RRDDIM *rd_pgc_memory_hot;
+ RRDDIM *rd_pgc_memory_dirty;
+ RRDDIM *rd_pgc_memory_index;
+ RRDDIM *rd_pgc_memory_evicting;
+ RRDDIM *rd_pgc_memory_flushing;
+
+ RRDSET *st_pgc_tm;
+ RRDDIM *rd_pgc_tm_current;
+ RRDDIM *rd_pgc_tm_wanted;
+ RRDDIM *rd_pgc_tm_hot_max;
+ RRDDIM *rd_pgc_tm_dirty_max;
+ RRDDIM *rd_pgc_tm_hot;
+ RRDDIM *rd_pgc_tm_dirty;
+ RRDDIM *rd_pgc_tm_referenced;
+
+ RRDSET *st_pgc_pages;
+ RRDDIM *rd_pgc_pages_clean;
+ RRDDIM *rd_pgc_pages_hot;
+ RRDDIM *rd_pgc_pages_dirty;
+ RRDDIM *rd_pgc_pages_referenced;
+
+ RRDSET *st_pgc_memory_changes;
+ RRDDIM *rd_pgc_memory_new_hot;
+ RRDDIM *rd_pgc_memory_new_clean;
+ RRDDIM *rd_pgc_memory_clean_evictions;
+
+ RRDSET *st_pgc_memory_migrations;
+ RRDDIM *rd_pgc_memory_hot_to_dirty;
+ RRDDIM *rd_pgc_memory_dirty_to_clean;
+
+ RRDSET *st_pgc_workers;
+ RRDDIM *rd_pgc_workers_evictors;
+ RRDDIM *rd_pgc_workers_flushers;
+ RRDDIM *rd_pgc_workers_adders;
+ RRDDIM *rd_pgc_workers_searchers;
+ RRDDIM *rd_pgc_workers_jv2_flushers;
+ RRDDIM *rd_pgc_workers_hot2dirty;
+
+ RRDSET *st_pgc_memory_events;
+ RRDDIM *rd_pgc_memory_evictions_critical;
+ RRDDIM *rd_pgc_memory_evictions_aggressive;
+ RRDDIM *rd_pgc_memory_flushes_critical;
+
+ RRDSET *st_pgc_waste;
+ RRDDIM *rd_pgc_waste_evictions_skipped;
+ RRDDIM *rd_pgc_waste_flushes_cancelled;
+ RRDDIM *rd_pgc_waste_insert_spins;
+ RRDDIM *rd_pgc_waste_evict_spins;
+ RRDDIM *rd_pgc_waste_release_spins;
+ RRDDIM *rd_pgc_waste_acquire_spins;
+ RRDDIM *rd_pgc_waste_delete_spins;
+ RRDDIM *rd_pgc_waste_flush_spins;
+
+};
+
+static void dbengine2_cache_statistics_charts(struct dbengine2_cache_pointers *ptrs, struct pgc_statistics *pgc_stats, struct pgc_statistics *pgc_stats_old __maybe_unused, const char *name, int priority) {
+
+ {
+ if (unlikely(!ptrs->st_cache_hit_ratio)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_cache_hit_ratio", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Cache Hit Ratio", name);
+
+ ptrs->st_cache_hit_ratio = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "%",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ ptrs->rd_hit_ratio_closest = rrddim_add(ptrs->st_cache_hit_ratio, "closest", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_hit_ratio_exact = rrddim_add(ptrs->st_cache_hit_ratio, "exact", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ size_t closest_percent = 100 * 10000;
+ if(pgc_stats->searches_closest > pgc_stats_old->searches_closest)
+ closest_percent = (pgc_stats->searches_closest_hits - pgc_stats_old->searches_closest_hits) * 100 * 10000 / (pgc_stats->searches_closest - pgc_stats_old->searches_closest);
+
+ size_t exact_percent = 100 * 10000;
+ if(pgc_stats->searches_exact > pgc_stats_old->searches_exact)
+ exact_percent = (pgc_stats->searches_exact_hits - pgc_stats_old->searches_exact_hits) * 100 * 10000 / (pgc_stats->searches_exact - pgc_stats_old->searches_exact);
+
+ rrddim_set_by_pointer(ptrs->st_cache_hit_ratio, ptrs->rd_hit_ratio_closest, (collected_number)closest_percent);
+ rrddim_set_by_pointer(ptrs->st_cache_hit_ratio, ptrs->rd_hit_ratio_exact, (collected_number)exact_percent);
+
+ rrdset_done(ptrs->st_cache_hit_ratio);
+ }
+
+ {
+ if (unlikely(!ptrs->st_operations)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_cache_operations", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Cache Operations", name);
+
+ ptrs->st_operations = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "ops/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ ptrs->rd_searches_closest = rrddim_add(ptrs->st_operations, "search closest", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_searches_exact = rrddim_add(ptrs->st_operations, "search exact", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_add_hot = rrddim_add(ptrs->st_operations, "add hot", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_add_clean = rrddim_add(ptrs->st_operations, "add clean", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_evictions = rrddim_add(ptrs->st_operations, "evictions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_flushes = rrddim_add(ptrs->st_operations, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_acquires = rrddim_add(ptrs->st_operations, "acquires", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_releases = rrddim_add(ptrs->st_operations, "releases", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_acquires_for_deletion = rrddim_add(ptrs->st_operations, "del acquires", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_searches_closest, (collected_number)pgc_stats->searches_closest);
+ rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_searches_exact, (collected_number)pgc_stats->searches_exact);
+ rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_add_hot, (collected_number)pgc_stats->queues.hot.added_entries);
+ rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_add_clean, (collected_number)(pgc_stats->added_entries - pgc_stats->queues.hot.added_entries));
+ rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_evictions, (collected_number)pgc_stats->queues.clean.removed_entries);
+ rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_flushes, (collected_number)pgc_stats->flushes_completed);
+ rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_acquires, (collected_number)pgc_stats->acquires);
+ rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_releases, (collected_number)pgc_stats->releases);
+ rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_acquires_for_deletion, (collected_number)pgc_stats->acquires_for_deletion);
+
+ rrdset_done(ptrs->st_operations);
+ }
+
+ {
+ if (unlikely(!ptrs->st_pgc_memory)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_cache_memory", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Cache Memory", name);
+
+ ptrs->st_pgc_memory = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "bytes",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ ptrs->rd_pgc_memory_free = rrddim_add(ptrs->st_pgc_memory, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_memory_hot = rrddim_add(ptrs->st_pgc_memory, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_memory_dirty = rrddim_add(ptrs->st_pgc_memory, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_memory_clean = rrddim_add(ptrs->st_pgc_memory, "clean", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_memory_index = rrddim_add(ptrs->st_pgc_memory, "index", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_memory_evicting = rrddim_add(ptrs->st_pgc_memory, "evicting", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_memory_flushing = rrddim_add(ptrs->st_pgc_memory, "flushing", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ collected_number free = (pgc_stats->current_cache_size > pgc_stats->wanted_cache_size) ? 0 :
+ (collected_number)(pgc_stats->wanted_cache_size - pgc_stats->current_cache_size);
+
+ rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_free, free);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_hot, (collected_number)pgc_stats->queues.hot.size);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_dirty, (collected_number)pgc_stats->queues.dirty.size);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_clean, (collected_number)pgc_stats->queues.clean.size);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_evicting, (collected_number)pgc_stats->evicting_size);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_flushing, (collected_number)pgc_stats->flushing_size);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_index,
+ (collected_number)(pgc_stats->size - pgc_stats->queues.clean.size - pgc_stats->queues.hot.size - pgc_stats->queues.dirty.size - pgc_stats->evicting_size - pgc_stats->flushing_size));
+
+ rrdset_done(ptrs->st_pgc_memory);
+ }
+
+ {
+ if (unlikely(!ptrs->st_pgc_tm)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_target_memory", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Target Cache Memory", name);
+
+ ptrs->st_pgc_tm = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "bytes",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ ptrs->rd_pgc_tm_current = rrddim_add(ptrs->st_pgc_tm, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_tm_wanted = rrddim_add(ptrs->st_pgc_tm, "wanted", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_tm_referenced = rrddim_add(ptrs->st_pgc_tm, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_tm_hot_max = rrddim_add(ptrs->st_pgc_tm, "hot max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_tm_dirty_max = rrddim_add(ptrs->st_pgc_tm, "dirty max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_tm_hot = rrddim_add(ptrs->st_pgc_tm, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_tm_dirty = rrddim_add(ptrs->st_pgc_tm, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_current, (collected_number)pgc_stats->current_cache_size);
+ rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_wanted, (collected_number)pgc_stats->wanted_cache_size);
+ rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_referenced, (collected_number)pgc_stats->referenced_size);
+ rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_hot_max, (collected_number)pgc_stats->queues.hot.max_size);
+ rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_dirty_max, (collected_number)pgc_stats->queues.dirty.max_size);
+ rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_hot, (collected_number)pgc_stats->queues.hot.size);
+ rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_dirty, (collected_number)pgc_stats->queues.dirty.size);
+
+ rrdset_done(ptrs->st_pgc_tm);
+ }
+
+ {
+ if (unlikely(!ptrs->st_pgc_pages)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_cache_pages", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Cache Pages", name);
+
+ ptrs->st_pgc_pages = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "pages",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ ptrs->rd_pgc_pages_clean = rrddim_add(ptrs->st_pgc_pages, "clean", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_pages_hot = rrddim_add(ptrs->st_pgc_pages, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_pages_dirty = rrddim_add(ptrs->st_pgc_pages, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_pages_referenced = rrddim_add(ptrs->st_pgc_pages, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_clean, (collected_number)pgc_stats->queues.clean.entries);
+ rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_hot, (collected_number)pgc_stats->queues.hot.entries);
+ rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_dirty, (collected_number)pgc_stats->queues.dirty.entries);
+ rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_referenced, (collected_number)pgc_stats->referenced_entries);
+
+ rrdset_done(ptrs->st_pgc_pages);
+ }
+
+ {
+ if (unlikely(!ptrs->st_pgc_memory_changes)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_cache_memory_changes", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Cache Memory Changes", name);
+
+ ptrs->st_pgc_memory_changes = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "bytes/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_AREA);
+
+ ptrs->rd_pgc_memory_new_clean = rrddim_add(ptrs->st_pgc_memory_changes, "new clean", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_memory_clean_evictions = rrddim_add(ptrs->st_pgc_memory_changes, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_memory_new_hot = rrddim_add(ptrs->st_pgc_memory_changes, "new hot", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ rrddim_set_by_pointer(ptrs->st_pgc_memory_changes, ptrs->rd_pgc_memory_new_clean, (collected_number)(pgc_stats->added_size - pgc_stats->queues.hot.added_size));
+ rrddim_set_by_pointer(ptrs->st_pgc_memory_changes, ptrs->rd_pgc_memory_clean_evictions, (collected_number)pgc_stats->queues.clean.removed_size);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory_changes, ptrs->rd_pgc_memory_new_hot, (collected_number)pgc_stats->queues.hot.added_size);
+
+ rrdset_done(ptrs->st_pgc_memory_changes);
+ }
+
+ {
+ if (unlikely(!ptrs->st_pgc_memory_migrations)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_cache_memory_migrations", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Cache Memory Migrations", name);
+
+ ptrs->st_pgc_memory_migrations = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "bytes/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_AREA);
+
+ ptrs->rd_pgc_memory_dirty_to_clean = rrddim_add(ptrs->st_pgc_memory_migrations, "dirty to clean", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_memory_hot_to_dirty = rrddim_add(ptrs->st_pgc_memory_migrations, "hot to dirty", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ rrddim_set_by_pointer(ptrs->st_pgc_memory_migrations, ptrs->rd_pgc_memory_dirty_to_clean, (collected_number)pgc_stats->queues.dirty.removed_size);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory_migrations, ptrs->rd_pgc_memory_hot_to_dirty, (collected_number)pgc_stats->queues.dirty.added_size);
+
+ rrdset_done(ptrs->st_pgc_memory_migrations);
+ }
+
+ {
+ if (unlikely(!ptrs->st_pgc_memory_events)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_cache_events", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Cache Events", name);
+
+ ptrs->st_pgc_memory_events = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "events/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_AREA);
+
+ ptrs->rd_pgc_memory_evictions_aggressive = rrddim_add(ptrs->st_pgc_memory_events, "evictions aggressive", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_memory_evictions_critical = rrddim_add(ptrs->st_pgc_memory_events, "evictions critical", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_memory_flushes_critical = rrddim_add(ptrs->st_pgc_memory_events, "flushes critical", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ rrddim_set_by_pointer(ptrs->st_pgc_memory_events, ptrs->rd_pgc_memory_evictions_aggressive, (collected_number)pgc_stats->events_cache_needs_space_aggressively);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory_events, ptrs->rd_pgc_memory_evictions_critical, (collected_number)pgc_stats->events_cache_under_severe_pressure);
+ rrddim_set_by_pointer(ptrs->st_pgc_memory_events, ptrs->rd_pgc_memory_flushes_critical, (collected_number)pgc_stats->events_flush_critical);
+
+ rrdset_done(ptrs->st_pgc_memory_events);
+ }
+
+ {
+ if (unlikely(!ptrs->st_pgc_waste)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_waste_events", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Waste Events", name);
+
+ ptrs->st_pgc_waste = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "events/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ ptrs->rd_pgc_waste_evictions_skipped = rrddim_add(ptrs->st_pgc_waste, "evictions skipped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_waste_flushes_cancelled = rrddim_add(ptrs->st_pgc_waste, "flushes cancelled", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_waste_acquire_spins = rrddim_add(ptrs->st_pgc_waste, "acquire spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_waste_release_spins = rrddim_add(ptrs->st_pgc_waste, "release spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_waste_insert_spins = rrddim_add(ptrs->st_pgc_waste, "insert spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_waste_delete_spins = rrddim_add(ptrs->st_pgc_waste, "delete spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_waste_evict_spins = rrddim_add(ptrs->st_pgc_waste, "evict spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ ptrs->rd_pgc_waste_flush_spins = rrddim_add(ptrs->st_pgc_waste, "flush spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_evictions_skipped, (collected_number)pgc_stats->evict_skipped);
+ rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_flushes_cancelled, (collected_number)pgc_stats->flushes_cancelled);
+ rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_acquire_spins, (collected_number)pgc_stats->acquire_spins);
+ rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_release_spins, (collected_number)pgc_stats->release_spins);
+ rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_insert_spins, (collected_number)pgc_stats->insert_spins);
+ rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_delete_spins, (collected_number)pgc_stats->delete_spins);
+ rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_evict_spins, (collected_number)pgc_stats->evict_spins);
+ rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_flush_spins, (collected_number)pgc_stats->flush_spins);
+
+ rrdset_done(ptrs->st_pgc_waste);
+ }
+
+ {
+ if (unlikely(!ptrs->st_pgc_workers)) {
+ BUFFER *id = buffer_create(100, NULL);
+ buffer_sprintf(id, "dbengine_%s_cache_workers", name);
+
+ BUFFER *family = buffer_create(100, NULL);
+ buffer_sprintf(family, "dbengine %s cache", name);
+
+ BUFFER *title = buffer_create(100, NULL);
+ buffer_sprintf(title, "Netdata %s Cache Workers", name);
+
+ ptrs->st_pgc_workers = rrdset_create_localhost(
+ "netdata",
+ buffer_tostring(id),
+ NULL,
+ buffer_tostring(family),
+ NULL,
+ buffer_tostring(title),
+ "workers",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ ptrs->rd_pgc_workers_searchers = rrddim_add(ptrs->st_pgc_workers, "searchers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_workers_adders = rrddim_add(ptrs->st_pgc_workers, "adders", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_workers_evictors = rrddim_add(ptrs->st_pgc_workers, "evictors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_workers_flushers = rrddim_add(ptrs->st_pgc_workers, "flushers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_workers_hot2dirty = rrddim_add(ptrs->st_pgc_workers, "hot2dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ ptrs->rd_pgc_workers_jv2_flushers = rrddim_add(ptrs->st_pgc_workers, "jv2 flushers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+
+ buffer_free(id);
+ buffer_free(family);
+ buffer_free(title);
+ priority++;
+ }
+
+ rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_searchers, (collected_number)pgc_stats->workers_search);
+ rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_adders, (collected_number)pgc_stats->workers_add);
+ rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_evictors, (collected_number)pgc_stats->workers_evict);
+ rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_flushers, (collected_number)pgc_stats->workers_flush);
+ rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_hot2dirty, (collected_number)pgc_stats->workers_hot2dirty);
+ rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_jv2_flushers, (collected_number)pgc_stats->workers_jv2_flush);
+
+ rrdset_done(ptrs->st_pgc_workers);
+ }
+}
+
+
+static void dbengine2_statistics_charts(void) {
+ if(!main_cache || !main_mrg)
+ return;
+
+ static struct dbengine2_cache_pointers main_cache_ptrs = {}, open_cache_ptrs = {}, extent_cache_ptrs = {};
+ static struct rrdeng_cache_efficiency_stats cache_efficiency_stats = {}, cache_efficiency_stats_old = {};
+ static struct pgc_statistics pgc_main_stats = {}, pgc_main_stats_old = {}; (void)pgc_main_stats_old;
+ static struct pgc_statistics pgc_open_stats = {}, pgc_open_stats_old = {}; (void)pgc_open_stats_old;
+ static struct pgc_statistics pgc_extent_stats = {}, pgc_extent_stats_old = {}; (void)pgc_extent_stats_old;
+ static struct mrg_statistics mrg_stats = {}, mrg_stats_old = {}; (void)mrg_stats_old;
+
+ pgc_main_stats_old = pgc_main_stats;
+ pgc_main_stats = pgc_get_statistics(main_cache);
+ dbengine2_cache_statistics_charts(&main_cache_ptrs, &pgc_main_stats, &pgc_main_stats_old, "main", 135100);
+
+ pgc_open_stats_old = pgc_open_stats;
+ pgc_open_stats = pgc_get_statistics(open_cache);
+ dbengine2_cache_statistics_charts(&open_cache_ptrs, &pgc_open_stats, &pgc_open_stats_old, "open", 135200);
+
+ pgc_extent_stats_old = pgc_extent_stats;
+ pgc_extent_stats = pgc_get_statistics(extent_cache);
+ dbengine2_cache_statistics_charts(&extent_cache_ptrs, &pgc_extent_stats, &pgc_extent_stats_old, "extent", 135300);
+
+ cache_efficiency_stats_old = cache_efficiency_stats;
+ cache_efficiency_stats = rrdeng_get_cache_efficiency_stats();
+
+ mrg_stats_old = mrg_stats;
+ mrg_stats = mrg_get_statistics(main_mrg);
+
+ struct rrdeng_buffer_sizes buffers = rrdeng_get_buffer_sizes();
+ size_t buffers_total_size = buffers.handles + buffers.xt_buf + buffers.xt_io + buffers.pdc + buffers.descriptors +
+ buffers.opcodes + buffers.wal + buffers.workers + buffers.epdl + buffers.deol + buffers.pd + buffers.pgc + buffers.mrg;
+
+#ifdef PDC_USE_JULYL
+ buffers_total_size += buffers.julyl;
+#endif
+
+ dbengine_total_memory = pgc_main_stats.size + pgc_open_stats.size + pgc_extent_stats.size + mrg_stats.size + buffers_total_size;
+
+ size_t priority = 135000;
+
+ {
+ static RRDSET *st_pgc_memory = NULL;
+ static RRDDIM *rd_pgc_memory_main = NULL;
+ static RRDDIM *rd_pgc_memory_open = NULL; // open journal memory
+ static RRDDIM *rd_pgc_memory_extent = NULL; // extent compresses cache memory
+ static RRDDIM *rd_pgc_memory_metrics = NULL; // metric registry memory
+ static RRDDIM *rd_pgc_memory_buffers = NULL;
+
+ if (unlikely(!st_pgc_memory)) {
+ st_pgc_memory = rrdset_create_localhost(
+ "netdata",
+ "dbengine_memory",
+ NULL,
+ "dbengine memory",
+ NULL,
+ "Netdata DB Memory",
+ "bytes",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rd_pgc_memory_main = rrddim_add(st_pgc_memory, "main cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_memory_open = rrddim_add(st_pgc_memory, "open cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_memory_extent = rrddim_add(st_pgc_memory, "extent cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_memory_metrics = rrddim_add(st_pgc_memory, "metrics registry", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_memory_buffers = rrddim_add(st_pgc_memory, "buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+ priority++;
+
+
+ rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_main, (collected_number)pgc_main_stats.size);
+ rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_open, (collected_number)pgc_open_stats.size);
+ rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_extent, (collected_number)pgc_extent_stats.size);
+ rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_metrics, (collected_number)mrg_stats.size);
+ rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_buffers, (collected_number)buffers_total_size);
+
+ rrdset_done(st_pgc_memory);
+ }
+
+ {
+ static RRDSET *st_pgc_buffers = NULL;
+ static RRDDIM *rd_pgc_buffers_pgc = NULL;
+ static RRDDIM *rd_pgc_buffers_mrg = NULL;
+ static RRDDIM *rd_pgc_buffers_opcodes = NULL;
+ static RRDDIM *rd_pgc_buffers_handles = NULL;
+ static RRDDIM *rd_pgc_buffers_descriptors = NULL;
+ static RRDDIM *rd_pgc_buffers_wal = NULL;
+ static RRDDIM *rd_pgc_buffers_workers = NULL;
+ static RRDDIM *rd_pgc_buffers_pdc = NULL;
+ static RRDDIM *rd_pgc_buffers_xt_io = NULL;
+ static RRDDIM *rd_pgc_buffers_xt_buf = NULL;
+ static RRDDIM *rd_pgc_buffers_epdl = NULL;
+ static RRDDIM *rd_pgc_buffers_deol = NULL;
+ static RRDDIM *rd_pgc_buffers_pd = NULL;
+#ifdef PDC_USE_JULYL
+ static RRDDIM *rd_pgc_buffers_julyl = NULL;
+#endif
+
+ if (unlikely(!st_pgc_buffers)) {
+ st_pgc_buffers = rrdset_create_localhost(
+ "netdata",
+ "dbengine_buffers",
+ NULL,
+ "dbengine memory",
+ NULL,
+ "Netdata DB Buffers",
+ "bytes",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rd_pgc_buffers_pgc = rrddim_add(st_pgc_buffers, "pgc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_mrg = rrddim_add(st_pgc_buffers, "mrg", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_opcodes = rrddim_add(st_pgc_buffers, "opcodes", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_handles = rrddim_add(st_pgc_buffers, "query handles", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_descriptors = rrddim_add(st_pgc_buffers, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_wal = rrddim_add(st_pgc_buffers, "wal", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_workers = rrddim_add(st_pgc_buffers, "workers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_pdc = rrddim_add(st_pgc_buffers, "pdc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_pd = rrddim_add(st_pgc_buffers, "pd", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_xt_io = rrddim_add(st_pgc_buffers, "extent io", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_xt_buf = rrddim_add(st_pgc_buffers, "extent buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_epdl = rrddim_add(st_pgc_buffers, "epdl", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_pgc_buffers_deol = rrddim_add(st_pgc_buffers, "deol", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+#ifdef PDC_USE_JULYL
+ rd_pgc_buffers_julyl = rrddim_add(st_pgc_buffers, "julyl", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+#endif
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pgc, (collected_number)buffers.pgc);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_mrg, (collected_number)buffers.mrg);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_opcodes, (collected_number)buffers.opcodes);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_handles, (collected_number)buffers.handles);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_descriptors, (collected_number)buffers.descriptors);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_wal, (collected_number)buffers.wal);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_workers, (collected_number)buffers.workers);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pdc, (collected_number)buffers.pdc);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pd, (collected_number)buffers.pd);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_io, (collected_number)buffers.xt_io);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_buf, (collected_number)buffers.xt_buf);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_epdl, (collected_number)buffers.epdl);
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_deol, (collected_number)buffers.deol);
+#ifdef PDC_USE_JULYL
+ rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_julyl, (collected_number)buffers.julyl);
+#endif
+
+ rrdset_done(st_pgc_buffers);
+ }
+
+#ifdef PDC_USE_JULYL
+ {
+ static RRDSET *st_julyl_moved = NULL;
+ static RRDDIM *rd_julyl_moved = NULL;
+
+ if (unlikely(!st_julyl_moved)) {
+ st_julyl_moved = rrdset_create_localhost(
+ "netdata",
+ "dbengine_julyl_moved",
+ NULL,
+ "dbengine memory",
+ NULL,
+ "Netdata JulyL Memory Moved",
+ "bytes/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_AREA);
+
+ rd_julyl_moved = rrddim_add(st_julyl_moved, "moved", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_julyl_moved, rd_julyl_moved, (collected_number)julyl_bytes_moved());
+
+ rrdset_done(st_julyl_moved);
+ }
+#endif
+
+ {
+ static RRDSET *st_mrg_metrics = NULL;
+ static RRDDIM *rd_mrg_metrics = NULL;
+ static RRDDIM *rd_mrg_acquired = NULL;
+ static RRDDIM *rd_mrg_collected = NULL;
+ static RRDDIM *rd_mrg_with_retention = NULL;
+ static RRDDIM *rd_mrg_without_retention = NULL;
+ static RRDDIM *rd_mrg_multiple_writers = NULL;
+
+ if (unlikely(!st_mrg_metrics)) {
+ st_mrg_metrics = rrdset_create_localhost(
+ "netdata",
+ "dbengine_metrics",
+ NULL,
+ "dbengine metrics",
+ NULL,
+ "Netdata Metrics in Metrics Registry",
+ "metrics",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ rd_mrg_metrics = rrddim_add(st_mrg_metrics, "all", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_mrg_acquired = rrddim_add(st_mrg_metrics, "acquired", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_mrg_collected = rrddim_add(st_mrg_metrics, "collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_mrg_with_retention = rrddim_add(st_mrg_metrics, "with retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_mrg_without_retention = rrddim_add(st_mrg_metrics, "without retention", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ rd_mrg_multiple_writers = rrddim_add(st_mrg_metrics, "multi-collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_metrics, (collected_number)mrg_stats.entries);
+ rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_acquired, (collected_number)mrg_stats.entries_referenced);
+ rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_collected, (collected_number)mrg_stats.writers);
+ rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_with_retention, (collected_number)mrg_stats.entries_with_retention);
+ rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_without_retention, (collected_number)mrg_stats.entries - (collected_number)mrg_stats.entries_with_retention);
+ rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_multiple_writers, (collected_number)mrg_stats.writers_conflicts);
+
+ rrdset_done(st_mrg_metrics);
+ }
+
+ {
+ static RRDSET *st_mrg_ops = NULL;
+ static RRDDIM *rd_mrg_add = NULL;
+ static RRDDIM *rd_mrg_del = NULL;
+ static RRDDIM *rd_mrg_search = NULL;
+
+ if (unlikely(!st_mrg_ops)) {
+ st_mrg_ops = rrdset_create_localhost(
+ "netdata",
+ "dbengine_metrics_registry_operations",
+ NULL,
+ "dbengine metrics",
+ NULL,
+ "Netdata Metrics Registry Operations",
+ "metrics",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ rd_mrg_add = rrddim_add(st_mrg_ops, "add", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_mrg_del = rrddim_add(st_mrg_ops, "delete", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_mrg_search = rrddim_add(st_mrg_ops, "search", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_mrg_ops, rd_mrg_add, (collected_number)mrg_stats.additions);
+ rrddim_set_by_pointer(st_mrg_ops, rd_mrg_del, (collected_number)mrg_stats.deletions);
+ rrddim_set_by_pointer(st_mrg_ops, rd_mrg_search, (collected_number)mrg_stats.search_hits + (collected_number)mrg_stats.search_misses);
+
+ rrdset_done(st_mrg_ops);
+ }
+
+ {
+ static RRDSET *st_mrg_references = NULL;
+ static RRDDIM *rd_mrg_references = NULL;
+
+ if (unlikely(!st_mrg_references)) {
+ st_mrg_references = rrdset_create_localhost(
+ "netdata",
+ "dbengine_metrics_registry_references",
+ NULL,
+ "dbengine metrics",
+ NULL,
+ "Netdata Metrics Registry References",
+ "references",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ rd_mrg_references = rrddim_add(st_mrg_references, "references", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_mrg_references, rd_mrg_references, (collected_number)mrg_stats.current_references);
+
+ rrdset_done(st_mrg_references);
+ }
+
+ {
+ static RRDSET *st_cache_hit_ratio = NULL;
+ static RRDDIM *rd_hit_ratio = NULL;
+ static RRDDIM *rd_main_cache_hit_ratio = NULL;
+ static RRDDIM *rd_extent_cache_hit_ratio = NULL;
+ static RRDDIM *rd_extent_merge_hit_ratio = NULL;
+
+ if (unlikely(!st_cache_hit_ratio)) {
+ st_cache_hit_ratio = rrdset_create_localhost(
+ "netdata",
+ "dbengine_cache_hit_ratio",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Queries Cache Hit Ratio",
+ "%",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ rd_hit_ratio = rrddim_add(st_cache_hit_ratio, "overall", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE);
+ rd_main_cache_hit_ratio = rrddim_add(st_cache_hit_ratio, "main cache", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE);
+ rd_extent_cache_hit_ratio = rrddim_add(st_cache_hit_ratio, "extent cache", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE);
+ rd_extent_merge_hit_ratio = rrddim_add(st_cache_hit_ratio, "extent merge", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE);
+ }
+ priority++;
+
+ size_t delta_pages_total = cache_efficiency_stats.pages_total - cache_efficiency_stats_old.pages_total;
+ size_t delta_pages_to_load_from_disk = cache_efficiency_stats.pages_to_load_from_disk - cache_efficiency_stats_old.pages_to_load_from_disk;
+ size_t delta_extents_loaded_from_disk = cache_efficiency_stats.extents_loaded_from_disk - cache_efficiency_stats_old.extents_loaded_from_disk;
+
+ size_t delta_pages_data_source_main_cache = cache_efficiency_stats.pages_data_source_main_cache - cache_efficiency_stats_old.pages_data_source_main_cache;
+ size_t delta_pages_pending_found_in_cache_at_pass4 = cache_efficiency_stats.pages_data_source_main_cache_at_pass4 - cache_efficiency_stats_old.pages_data_source_main_cache_at_pass4;
+
+ size_t delta_pages_data_source_extent_cache = cache_efficiency_stats.pages_data_source_extent_cache - cache_efficiency_stats_old.pages_data_source_extent_cache;
+ size_t delta_pages_load_extent_merged = cache_efficiency_stats.pages_load_extent_merged - cache_efficiency_stats_old.pages_load_extent_merged;
+
+ size_t pages_total_hit = delta_pages_total - delta_extents_loaded_from_disk;
+
+ static size_t overall_hit_ratio = 100;
+ size_t main_cache_hit_ratio = 0, extent_cache_hit_ratio = 0, extent_merge_hit_ratio = 0;
+ if(delta_pages_total) {
+ if(pages_total_hit > delta_pages_total)
+ pages_total_hit = delta_pages_total;
+
+ overall_hit_ratio = pages_total_hit * 100 * 10000 / delta_pages_total;
+
+ size_t delta_pages_main_cache = delta_pages_data_source_main_cache + delta_pages_pending_found_in_cache_at_pass4;
+ if(delta_pages_main_cache > delta_pages_total)
+ delta_pages_main_cache = delta_pages_total;
+
+ main_cache_hit_ratio = delta_pages_main_cache * 100 * 10000 / delta_pages_total;
+ }
+
+ if(delta_pages_to_load_from_disk) {
+ if(delta_pages_data_source_extent_cache > delta_pages_to_load_from_disk)
+ delta_pages_data_source_extent_cache = delta_pages_to_load_from_disk;
+
+ extent_cache_hit_ratio = delta_pages_data_source_extent_cache * 100 * 10000 / delta_pages_to_load_from_disk;
+
+ if(delta_pages_load_extent_merged > delta_pages_to_load_from_disk)
+ delta_pages_load_extent_merged = delta_pages_to_load_from_disk;
+
+ extent_merge_hit_ratio = delta_pages_load_extent_merged * 100 * 10000 / delta_pages_to_load_from_disk;
+ }
+
+ rrddim_set_by_pointer(st_cache_hit_ratio, rd_hit_ratio, (collected_number)overall_hit_ratio);
+ rrddim_set_by_pointer(st_cache_hit_ratio, rd_main_cache_hit_ratio, (collected_number)main_cache_hit_ratio);
+ rrddim_set_by_pointer(st_cache_hit_ratio, rd_extent_cache_hit_ratio, (collected_number)extent_cache_hit_ratio);
+ rrddim_set_by_pointer(st_cache_hit_ratio, rd_extent_merge_hit_ratio, (collected_number)extent_merge_hit_ratio);
+
+ rrdset_done(st_cache_hit_ratio);
+ }
+
+ {
+ static RRDSET *st_queries = NULL;
+ static RRDDIM *rd_total = NULL;
+ static RRDDIM *rd_open = NULL;
+ static RRDDIM *rd_jv2 = NULL;
+ static RRDDIM *rd_planned_with_gaps = NULL;
+ static RRDDIM *rd_executed_with_gaps = NULL;
+
+ if (unlikely(!st_queries)) {
+ st_queries = rrdset_create_localhost(
+ "netdata",
+ "dbengine_queries",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Queries",
+ "queries/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ rd_total = rrddim_add(st_queries, "total", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_open = rrddim_add(st_queries, "open cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_jv2 = rrddim_add(st_queries, "journal v2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_planned_with_gaps = rrddim_add(st_queries, "planned with gaps", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_executed_with_gaps = rrddim_add(st_queries, "executed with gaps", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_queries, rd_total, (collected_number)cache_efficiency_stats.queries);
+ rrddim_set_by_pointer(st_queries, rd_open, (collected_number)cache_efficiency_stats.queries_open);
+ rrddim_set_by_pointer(st_queries, rd_jv2, (collected_number)cache_efficiency_stats.queries_journal_v2);
+ rrddim_set_by_pointer(st_queries, rd_planned_with_gaps, (collected_number)cache_efficiency_stats.queries_planned_with_gaps);
+ rrddim_set_by_pointer(st_queries, rd_executed_with_gaps, (collected_number)cache_efficiency_stats.queries_executed_with_gaps);
+
+ rrdset_done(st_queries);
+ }
+
+ {
+ static RRDSET *st_queries_running = NULL;
+ static RRDDIM *rd_queries = NULL;
+
+ if (unlikely(!st_queries_running)) {
+ st_queries_running = rrdset_create_localhost(
+ "netdata",
+ "dbengine_queries_running",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Queries Running",
+ "queries",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ rd_queries = rrddim_add(st_queries_running, "queries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_queries_running, rd_queries, (collected_number)cache_efficiency_stats.currently_running_queries);
+
+ rrdset_done(st_queries_running);
+ }
+
+ {
+ static RRDSET *st_query_pages_metadata_source = NULL;
+ static RRDDIM *rd_cache = NULL;
+ static RRDDIM *rd_open = NULL;
+ static RRDDIM *rd_jv2 = NULL;
+
+ if (unlikely(!st_query_pages_metadata_source)) {
+ st_query_pages_metadata_source = rrdset_create_localhost(
+ "netdata",
+ "dbengine_query_pages_metadata_source",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Query Pages Metadata Source",
+ "pages/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rd_cache = rrddim_add(st_query_pages_metadata_source, "cache hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_jv2 = rrddim_add(st_query_pages_metadata_source, "journal v2 scan", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_open = rrddim_add(st_query_pages_metadata_source, "open journal", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_query_pages_metadata_source, rd_cache, (collected_number)cache_efficiency_stats.pages_meta_source_main_cache);
+ rrddim_set_by_pointer(st_query_pages_metadata_source, rd_jv2, (collected_number)cache_efficiency_stats.pages_meta_source_journal_v2);
+ rrddim_set_by_pointer(st_query_pages_metadata_source, rd_open, (collected_number)cache_efficiency_stats.pages_meta_source_open_cache);
+
+ rrdset_done(st_query_pages_metadata_source);
+ }
+
+ {
+ static RRDSET *st_query_pages_data_source = NULL;
+ static RRDDIM *rd_pages_main_cache = NULL;
+ static RRDDIM *rd_pages_disk = NULL;
+ static RRDDIM *rd_pages_extent_cache = NULL;
+
+ if (unlikely(!st_query_pages_data_source)) {
+ st_query_pages_data_source = rrdset_create_localhost(
+ "netdata",
+ "dbengine_query_pages_data_source",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Query Pages to Data Source",
+ "pages/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rd_pages_main_cache = rrddim_add(st_query_pages_data_source, "main cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_pages_disk = rrddim_add(st_query_pages_data_source, "disk", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_pages_extent_cache = rrddim_add(st_query_pages_data_source, "extent cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_query_pages_data_source, rd_pages_main_cache, (collected_number)cache_efficiency_stats.pages_data_source_main_cache + (collected_number)cache_efficiency_stats.pages_data_source_main_cache_at_pass4);
+ rrddim_set_by_pointer(st_query_pages_data_source, rd_pages_disk, (collected_number)cache_efficiency_stats.pages_to_load_from_disk);
+ rrddim_set_by_pointer(st_query_pages_data_source, rd_pages_extent_cache, (collected_number)cache_efficiency_stats.pages_data_source_extent_cache);
+
+ rrdset_done(st_query_pages_data_source);
+ }
+
+ {
+ static RRDSET *st_query_next_page = NULL;
+ static RRDDIM *rd_pass4 = NULL;
+ static RRDDIM *rd_nowait_failed = NULL;
+ static RRDDIM *rd_wait_failed = NULL;
+ static RRDDIM *rd_wait_loaded = NULL;
+ static RRDDIM *rd_nowait_loaded = NULL;
+
+ if (unlikely(!st_query_next_page)) {
+ st_query_next_page = rrdset_create_localhost(
+ "netdata",
+ "dbengine_query_next_page",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Query Next Page",
+ "pages/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rd_pass4 = rrddim_add(st_query_next_page, "pass4", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_wait_failed = rrddim_add(st_query_next_page, "failed slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_nowait_failed = rrddim_add(st_query_next_page, "failed fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_wait_loaded = rrddim_add(st_query_next_page, "loaded slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_nowait_loaded = rrddim_add(st_query_next_page, "loaded fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_query_next_page, rd_pass4, (collected_number)cache_efficiency_stats.pages_data_source_main_cache_at_pass4);
+ rrddim_set_by_pointer(st_query_next_page, rd_wait_failed, (collected_number)cache_efficiency_stats.page_next_wait_failed);
+ rrddim_set_by_pointer(st_query_next_page, rd_nowait_failed, (collected_number)cache_efficiency_stats.page_next_nowait_failed);
+ rrddim_set_by_pointer(st_query_next_page, rd_wait_loaded, (collected_number)cache_efficiency_stats.page_next_wait_loaded);
+ rrddim_set_by_pointer(st_query_next_page, rd_nowait_loaded, (collected_number)cache_efficiency_stats.page_next_nowait_loaded);
+
+ rrdset_done(st_query_next_page);
+ }
+
+ {
+ static RRDSET *st_query_page_issues = NULL;
+ static RRDDIM *rd_pages_zero_time = NULL;
+ static RRDDIM *rd_pages_past_time = NULL;
+ static RRDDIM *rd_pages_invalid_size = NULL;
+ static RRDDIM *rd_pages_fixed_update_every = NULL;
+ static RRDDIM *rd_pages_fixed_entries = NULL;
+ static RRDDIM *rd_pages_overlapping = NULL;
+
+ if (unlikely(!st_query_page_issues)) {
+ st_query_page_issues = rrdset_create_localhost(
+ "netdata",
+ "dbengine_query_next_page_issues",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Query Next Page Issues",
+ "pages/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rd_pages_zero_time = rrddim_add(st_query_page_issues, "zero timestamp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_pages_invalid_size = rrddim_add(st_query_page_issues, "invalid size", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_pages_past_time = rrddim_add(st_query_page_issues, "past time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_pages_overlapping = rrddim_add(st_query_page_issues, "overlapping", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_pages_fixed_update_every = rrddim_add(st_query_page_issues, "update every fixed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_pages_fixed_entries = rrddim_add(st_query_page_issues, "entries fixed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_query_page_issues, rd_pages_zero_time, (collected_number)cache_efficiency_stats.pages_zero_time_skipped);
+ rrddim_set_by_pointer(st_query_page_issues, rd_pages_invalid_size, (collected_number)cache_efficiency_stats.pages_invalid_size_skipped);
+ rrddim_set_by_pointer(st_query_page_issues, rd_pages_past_time, (collected_number)cache_efficiency_stats.pages_past_time_skipped);
+ rrddim_set_by_pointer(st_query_page_issues, rd_pages_overlapping, (collected_number)cache_efficiency_stats.pages_overlapping_skipped);
+ rrddim_set_by_pointer(st_query_page_issues, rd_pages_fixed_update_every, (collected_number)cache_efficiency_stats.pages_invalid_update_every_fixed);
+ rrddim_set_by_pointer(st_query_page_issues, rd_pages_fixed_entries, (collected_number)cache_efficiency_stats.pages_invalid_entries_fixed);
+
+ rrdset_done(st_query_page_issues);
+ }
+
+ {
+ static RRDSET *st_query_pages_from_disk = NULL;
+ static RRDDIM *rd_compressed = NULL;
+ static RRDDIM *rd_invalid = NULL;
+ static RRDDIM *rd_uncompressed = NULL;
+ static RRDDIM *rd_mmap_failed = NULL;
+ static RRDDIM *rd_unavailable = NULL;
+ static RRDDIM *rd_unroutable = NULL;
+ static RRDDIM *rd_not_found = NULL;
+ static RRDDIM *rd_cancelled = NULL;
+ static RRDDIM *rd_invalid_extent = NULL;
+ static RRDDIM *rd_extent_merged = NULL;
+
+ if (unlikely(!st_query_pages_from_disk)) {
+ st_query_pages_from_disk = rrdset_create_localhost(
+ "netdata",
+ "dbengine_query_pages_disk_load",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Query Pages Loaded from Disk",
+ "pages/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ rd_compressed = rrddim_add(st_query_pages_from_disk, "ok compressed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_invalid = rrddim_add(st_query_pages_from_disk, "fail invalid page", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_uncompressed = rrddim_add(st_query_pages_from_disk, "ok uncompressed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_mmap_failed = rrddim_add(st_query_pages_from_disk, "fail cant mmap", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_unavailable = rrddim_add(st_query_pages_from_disk, "fail unavailable", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_unroutable = rrddim_add(st_query_pages_from_disk, "fail unroutable", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_not_found = rrddim_add(st_query_pages_from_disk, "fail not found", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_invalid_extent = rrddim_add(st_query_pages_from_disk, "fail invalid extent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_extent_merged = rrddim_add(st_query_pages_from_disk, "extent merged", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_cancelled = rrddim_add(st_query_pages_from_disk, "cancelled", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_compressed, (collected_number)cache_efficiency_stats.pages_load_ok_compressed);
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_invalid, (collected_number)cache_efficiency_stats.pages_load_fail_invalid_page_in_extent);
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_uncompressed, (collected_number)cache_efficiency_stats.pages_load_ok_uncompressed);
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_mmap_failed, (collected_number)cache_efficiency_stats.pages_load_fail_cant_mmap_extent);
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_unavailable, (collected_number)cache_efficiency_stats.pages_load_fail_datafile_not_available);
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_unroutable, (collected_number)cache_efficiency_stats.pages_load_fail_unroutable);
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_not_found, (collected_number)cache_efficiency_stats.pages_load_fail_not_found);
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_cancelled, (collected_number)cache_efficiency_stats.pages_load_fail_cancelled);
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_invalid_extent, (collected_number)cache_efficiency_stats.pages_load_fail_invalid_extent);
+ rrddim_set_by_pointer(st_query_pages_from_disk, rd_extent_merged, (collected_number)cache_efficiency_stats.pages_load_extent_merged);
+
+ rrdset_done(st_query_pages_from_disk);
+ }
+
+ {
+ static RRDSET *st_events = NULL;
+ static RRDDIM *rd_journal_v2_mapped = NULL;
+ static RRDDIM *rd_journal_v2_unmapped = NULL;
+ static RRDDIM *rd_datafile_creation = NULL;
+ static RRDDIM *rd_datafile_deletion = NULL;
+ static RRDDIM *rd_datafile_deletion_spin = NULL;
+ static RRDDIM *rd_jv2_indexing = NULL;
+ static RRDDIM *rd_retention = NULL;
+
+ if (unlikely(!st_events)) {
+ st_events = rrdset_create_localhost(
+ "netdata",
+ "dbengine_events",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Database Events",
+ "events/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
+
+ rd_journal_v2_mapped = rrddim_add(st_events, "journal v2 mapped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_journal_v2_unmapped = rrddim_add(st_events, "journal v2 unmapped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_datafile_creation = rrddim_add(st_events, "datafile creation", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_datafile_deletion = rrddim_add(st_events, "datafile deletion", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_datafile_deletion_spin = rrddim_add(st_events, "datafile deletion spin", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_jv2_indexing = rrddim_add(st_events, "journal v2 indexing", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_retention = rrddim_add(st_events, "retention", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_events, rd_journal_v2_mapped, (collected_number)cache_efficiency_stats.journal_v2_mapped);
+ rrddim_set_by_pointer(st_events, rd_journal_v2_unmapped, (collected_number)cache_efficiency_stats.journal_v2_unmapped);
+ rrddim_set_by_pointer(st_events, rd_datafile_creation, (collected_number)cache_efficiency_stats.datafile_creation_started);
+ rrddim_set_by_pointer(st_events, rd_datafile_deletion, (collected_number)cache_efficiency_stats.datafile_deletion_started);
+ rrddim_set_by_pointer(st_events, rd_datafile_deletion_spin, (collected_number)cache_efficiency_stats.datafile_deletion_spin);
+ rrddim_set_by_pointer(st_events, rd_jv2_indexing, (collected_number)cache_efficiency_stats.journal_v2_indexing_started);
+ rrddim_set_by_pointer(st_events, rd_retention, (collected_number)cache_efficiency_stats.metrics_retention_started);
+
+ rrdset_done(st_events);
+ }
+
+ {
+ static RRDSET *st_prep_timings = NULL;
+ static RRDDIM *rd_routing = NULL;
+ static RRDDIM *rd_main_cache = NULL;
+ static RRDDIM *rd_open_cache = NULL;
+ static RRDDIM *rd_journal_v2 = NULL;
+ static RRDDIM *rd_pass4 = NULL;
+
+ if (unlikely(!st_prep_timings)) {
+ st_prep_timings = rrdset_create_localhost(
+ "netdata",
+ "dbengine_prep_timings",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Query Preparation Timings",
+ "usec/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rd_routing = rrddim_add(st_prep_timings, "routing", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_main_cache = rrddim_add(st_prep_timings, "main cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_open_cache = rrddim_add(st_prep_timings, "open cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_journal_v2 = rrddim_add(st_prep_timings, "journal v2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_pass4 = rrddim_add(st_prep_timings, "pass4", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_prep_timings, rd_routing, (collected_number)cache_efficiency_stats.prep_time_to_route);
+ rrddim_set_by_pointer(st_prep_timings, rd_main_cache, (collected_number)cache_efficiency_stats.prep_time_in_main_cache_lookup);
+ rrddim_set_by_pointer(st_prep_timings, rd_open_cache, (collected_number)cache_efficiency_stats.prep_time_in_open_cache_lookup);
+ rrddim_set_by_pointer(st_prep_timings, rd_journal_v2, (collected_number)cache_efficiency_stats.prep_time_in_journal_v2_lookup);
+ rrddim_set_by_pointer(st_prep_timings, rd_pass4, (collected_number)cache_efficiency_stats.prep_time_in_pass4_lookup);
+
+ rrdset_done(st_prep_timings);
+ }
+
+ {
+ static RRDSET *st_query_timings = NULL;
+ static RRDDIM *rd_init = NULL;
+ static RRDDIM *rd_prep_wait = NULL;
+ static RRDDIM *rd_next_page_disk_fast = NULL;
+ static RRDDIM *rd_next_page_disk_slow = NULL;
+ static RRDDIM *rd_next_page_preload_fast = NULL;
+ static RRDDIM *rd_next_page_preload_slow = NULL;
+
+ if (unlikely(!st_query_timings)) {
+ st_query_timings = rrdset_create_localhost(
+ "netdata",
+ "dbengine_query_timings",
+ NULL,
+ "dbengine query router",
+ NULL,
+ "Netdata Query Timings",
+ "usec/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_STACKED);
+
+ rd_init = rrddim_add(st_query_timings, "init", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_prep_wait = rrddim_add(st_query_timings, "prep wait", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_next_page_disk_fast = rrddim_add(st_query_timings, "next page disk fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_next_page_disk_slow = rrddim_add(st_query_timings, "next page disk slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_next_page_preload_fast = rrddim_add(st_query_timings, "next page preload fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rd_next_page_preload_slow = rrddim_add(st_query_timings, "next page preload slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ }
+ priority++;
+
+ rrddim_set_by_pointer(st_query_timings, rd_init, (collected_number)cache_efficiency_stats.query_time_init);
+ rrddim_set_by_pointer(st_query_timings, rd_prep_wait, (collected_number)cache_efficiency_stats.query_time_wait_for_prep);
+ rrddim_set_by_pointer(st_query_timings, rd_next_page_disk_fast, (collected_number)cache_efficiency_stats.query_time_to_fast_disk_next_page);
+ rrddim_set_by_pointer(st_query_timings, rd_next_page_disk_slow, (collected_number)cache_efficiency_stats.query_time_to_slow_disk_next_page);
+ rrddim_set_by_pointer(st_query_timings, rd_next_page_preload_fast, (collected_number)cache_efficiency_stats.query_time_to_fast_preload_next_page);
+ rrddim_set_by_pointer(st_query_timings, rd_next_page_preload_slow, (collected_number)cache_efficiency_stats.query_time_to_slow_preload_next_page);
+
+ rrdset_done(st_query_timings);
+ }
+
if(netdata_rwlock_tryrdlock(&rrd_rwlock) == 0) {
+ priority = 135400;
+
RRDHOST *host;
unsigned long long stats_array[RRDENG_NR_STATS] = {0};
unsigned long long local_stats_array[RRDENG_NR_STATS];
@@ -1012,21 +2536,22 @@ static void dbengine_statistics_charts(void) {
if (unlikely(!st_compression)) {
st_compression = rrdset_create_localhost(
- "netdata",
- "dbengine_compression_ratio",
- NULL,
- "dbengine",
- NULL,
- "Netdata DB engine data extents' compression savings ratio",
- "percentage",
- "netdata",
- "stats",
- 132000,
- localhost->rrd_update_every,
- RRDSET_TYPE_LINE);
+ "netdata",
+ "dbengine_compression_ratio",
+ NULL,
+ "dbengine io",
+ NULL,
+ "Netdata DB engine data extents' compression savings ratio",
+ "percentage",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
}
+ priority++;
unsigned long long ratio;
unsigned long long compressed_content_size = stats_array[12];
@@ -1046,158 +2571,29 @@ static void dbengine_statistics_charts(void) {
// ----------------------------------------------------------------
{
- static RRDSET *st_pg_cache_hit_ratio = NULL;
- static RRDDIM *rd_hit_ratio = NULL;
-
- if (unlikely(!st_pg_cache_hit_ratio)) {
- st_pg_cache_hit_ratio = rrdset_create_localhost(
- "netdata",
- "page_cache_hit_ratio",
- NULL,
- "dbengine",
- NULL,
- "Netdata DB engine page cache hit ratio",
- "percentage",
- "netdata",
- "stats",
- 132003,
- localhost->rrd_update_every,
- RRDSET_TYPE_LINE);
-
- rd_hit_ratio = rrddim_add(st_pg_cache_hit_ratio, "ratio", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE);
- }
-
- static unsigned long long old_hits = 0;
- static unsigned long long old_misses = 0;
- unsigned long long hits = stats_array[7];
- unsigned long long misses = stats_array[8];
- unsigned long long hits_delta;
- unsigned long long misses_delta;
- unsigned long long ratio;
-
- hits_delta = hits - old_hits;
- misses_delta = misses - old_misses;
- old_hits = hits;
- old_misses = misses;
-
- if (hits_delta + misses_delta) {
- ratio = (hits_delta * 100 * 1000) / (hits_delta + misses_delta);
- } else {
- ratio = 0;
- }
- rrddim_set_by_pointer(st_pg_cache_hit_ratio, rd_hit_ratio, ratio);
-
- rrdset_done(st_pg_cache_hit_ratio);
- }
-
- // ----------------------------------------------------------------
-
- {
- static RRDSET *st_pg_cache_pages = NULL;
- static RRDDIM *rd_descriptors = NULL;
- static RRDDIM *rd_populated = NULL;
- static RRDDIM *rd_dirty = NULL;
- static RRDDIM *rd_backfills = NULL;
- static RRDDIM *rd_evictions = NULL;
- static RRDDIM *rd_used_by_collectors = NULL;
-
- if (unlikely(!st_pg_cache_pages)) {
- st_pg_cache_pages = rrdset_create_localhost(
- "netdata",
- "page_cache_stats",
- NULL,
- "dbengine",
- NULL,
- "Netdata dbengine page cache statistics",
- "pages",
- "netdata",
- "stats",
- 132004,
- localhost->rrd_update_every,
- RRDSET_TYPE_LINE);
-
- rd_descriptors = rrddim_add(st_pg_cache_pages, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- rd_populated = rrddim_add(st_pg_cache_pages, "populated", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- rd_dirty = rrddim_add(st_pg_cache_pages, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- rd_backfills = rrddim_add(st_pg_cache_pages, "backfills", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- rd_evictions = rrddim_add(st_pg_cache_pages, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
- rd_used_by_collectors =
- rrddim_add(st_pg_cache_pages, "used_by_collectors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- }
-
- rrddim_set_by_pointer(st_pg_cache_pages, rd_descriptors, (collected_number)stats_array[27]);
- rrddim_set_by_pointer(st_pg_cache_pages, rd_populated, (collected_number)stats_array[3]);
- rrddim_set_by_pointer(st_pg_cache_pages, rd_dirty, (collected_number)stats_array[0] + stats_array[4]);
- rrddim_set_by_pointer(st_pg_cache_pages, rd_backfills, (collected_number)stats_array[9]);
- rrddim_set_by_pointer(st_pg_cache_pages, rd_evictions, (collected_number)stats_array[10]);
- rrddim_set_by_pointer(st_pg_cache_pages, rd_used_by_collectors, (collected_number)stats_array[0]);
- rrdset_done(st_pg_cache_pages);
- }
-
- // ----------------------------------------------------------------
-
- {
- static RRDSET *st_long_term_pages = NULL;
- static RRDDIM *rd_total = NULL;
- static RRDDIM *rd_insertions = NULL;
- static RRDDIM *rd_deletions = NULL;
- static RRDDIM *rd_flushing_pressure_deletions = NULL;
-
- if (unlikely(!st_long_term_pages)) {
- st_long_term_pages = rrdset_create_localhost(
- "netdata",
- "dbengine_long_term_page_stats",
- NULL,
- "dbengine",
- NULL,
- "Netdata dbengine long-term page statistics",
- "pages",
- "netdata",
- "stats",
- 132005,
- localhost->rrd_update_every,
- RRDSET_TYPE_LINE);
-
- rd_total = rrddim_add(st_long_term_pages, "total", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
- rd_insertions = rrddim_add(st_long_term_pages, "insertions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
- rd_deletions = rrddim_add(st_long_term_pages, "deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
- rd_flushing_pressure_deletions = rrddim_add(
- st_long_term_pages, "flushing_pressure_deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
- }
-
- rrddim_set_by_pointer(st_long_term_pages, rd_total, (collected_number)stats_array[2]);
- rrddim_set_by_pointer(st_long_term_pages, rd_insertions, (collected_number)stats_array[5]);
- rrddim_set_by_pointer(st_long_term_pages, rd_deletions, (collected_number)stats_array[6]);
- rrddim_set_by_pointer(
- st_long_term_pages, rd_flushing_pressure_deletions, (collected_number)stats_array[36]);
- rrdset_done(st_long_term_pages);
- }
-
- // ----------------------------------------------------------------
-
- {
static RRDSET *st_io_stats = NULL;
static RRDDIM *rd_reads = NULL;
static RRDDIM *rd_writes = NULL;
if (unlikely(!st_io_stats)) {
st_io_stats = rrdset_create_localhost(
- "netdata",
- "dbengine_io_throughput",
- NULL,
- "dbengine",
- NULL,
- "Netdata DB engine I/O throughput",
- "MiB/s",
- "netdata",
- "stats",
- 132006,
- localhost->rrd_update_every,
- RRDSET_TYPE_LINE);
+ "netdata",
+ "dbengine_io_throughput",
+ NULL,
+ "dbengine io",
+ NULL,
+ "Netdata DB engine I/O throughput",
+ "MiB/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL);
}
+ priority++;
rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[17]);
rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[15]);
@@ -1213,22 +2609,23 @@ static void dbengine_statistics_charts(void) {
if (unlikely(!st_io_stats)) {
st_io_stats = rrdset_create_localhost(
- "netdata",
- "dbengine_io_operations",
- NULL,
- "dbengine",
- NULL,
- "Netdata DB engine I/O operations",
- "operations/s",
- "netdata",
- "stats",
- 132007,
- localhost->rrd_update_every,
- RRDSET_TYPE_LINE);
+ "netdata",
+ "dbengine_io_operations",
+ NULL,
+ "dbengine io",
+ NULL,
+ "Netdata DB engine I/O operations",
+ "operations/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL);
}
+ priority++;
rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[18]);
rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[16]);
@@ -1245,24 +2642,25 @@ static void dbengine_statistics_charts(void) {
if (unlikely(!st_errors)) {
st_errors = rrdset_create_localhost(
- "netdata",
- "dbengine_global_errors",
- NULL,
- "dbengine",
- NULL,
- "Netdata DB engine errors",
- "errors/s",
- "netdata",
- "stats",
- 132008,
- localhost->rrd_update_every,
- RRDSET_TYPE_LINE);
+ "netdata",
+ "dbengine_global_errors",
+ NULL,
+ "dbengine io",
+ NULL,
+ "Netdata DB engine errors",
+ "errors/s",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
rd_io_errors = rrddim_add(st_errors, "io_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
rd_fs_errors = rrddim_add(st_errors, "fs_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
pg_cache_over_half_dirty_events =
- rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
+ rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL);
}
+ priority++;
rrddim_set_by_pointer(st_errors, rd_io_errors, (collected_number)stats_array[30]);
rrddim_set_by_pointer(st_errors, rd_fs_errors, (collected_number)stats_array[31]);
@@ -1279,87 +2677,33 @@ static void dbengine_statistics_charts(void) {
if (unlikely(!st_fd)) {
st_fd = rrdset_create_localhost(
- "netdata",
- "dbengine_global_file_descriptors",
- NULL,
- "dbengine",
- NULL,
- "Netdata DB engine File Descriptors",
- "descriptors",
- "netdata",
- "stats",
- 132009,
- localhost->rrd_update_every,
- RRDSET_TYPE_LINE);
+ "netdata",
+ "dbengine_global_file_descriptors",
+ NULL,
+ "dbengine io",
+ NULL,
+ "Netdata DB engine File Descriptors",
+ "descriptors",
+ "netdata",
+ "stats",
+ priority,
+ localhost->rrd_update_every,
+ RRDSET_TYPE_LINE);
rd_fd_current = rrddim_add(st_fd, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
rd_fd_max = rrddim_add(st_fd, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE);
}
+ priority++;
rrddim_set_by_pointer(st_fd, rd_fd_current, (collected_number)stats_array[32]);
/* Careful here, modify this accordingly if the File-Descriptor budget ever changes */
rrddim_set_by_pointer(st_fd, rd_fd_max, (collected_number)rlimit_nofile.rlim_cur / 4);
rrdset_done(st_fd);
}
-
- // ----------------------------------------------------------------
-
- {
- static RRDSET *st_ram_usage = NULL;
- static RRDDIM *rd_cached = NULL;
- static RRDDIM *rd_pinned = NULL;
- static RRDDIM *rd_cache_metadata = NULL;
- static RRDDIM *rd_index_metadata = NULL;
- static RRDDIM *rd_pages_metadata = NULL;
-
- collected_number API_producers, populated_pages, cache_metadata, pages_on_disk,
- page_cache_descriptors, index_metadata, pages_metadata;
-
- if (unlikely(!st_ram_usage)) {
- st_ram_usage = rrdset_create_localhost(
- "netdata",
- "dbengine_ram",
- NULL,
- "dbengine",
- NULL,
- "Netdata DB engine RAM usage",
- "MiB",
- "netdata",
- "stats",
- 132010,
- localhost->rrd_update_every,
- RRDSET_TYPE_STACKED);
-
- rd_cached = rrddim_add(st_ram_usage, "cache", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE);
- rd_pinned = rrddim_add(st_ram_usage, "collectors", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE);
- rd_cache_metadata = rrddim_add(st_ram_usage, "cache metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE);
- rd_pages_metadata = rrddim_add(st_ram_usage, "pages metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE);
- rd_index_metadata = rrddim_add(st_ram_usage, "index metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE);
- }
-
- API_producers = (collected_number)stats_array[0];
- pages_on_disk = (collected_number)stats_array[2];
- populated_pages = (collected_number)stats_array[3];
- page_cache_descriptors = (collected_number)stats_array[27];
-
- cache_metadata = page_cache_descriptors * sizeof(struct page_cache_descr);
-
- pages_metadata = pages_on_disk * sizeof(struct rrdeng_page_descr);
-
- /* This is an empirical estimation for Judy array indexing and extent structures */
- index_metadata = pages_on_disk * 58;
-
- rrddim_set_by_pointer(st_ram_usage, rd_cached, populated_pages - API_producers);
- rrddim_set_by_pointer(st_ram_usage, rd_pinned, API_producers);
- rrddim_set_by_pointer(st_ram_usage, rd_cache_metadata, cache_metadata);
- rrddim_set_by_pointer(st_ram_usage, rd_pages_metadata, pages_metadata);
- rrddim_set_by_pointer(st_ram_usage, rd_index_metadata, index_metadata);
- rrdset_done(st_ram_usage);
- }
}
}
-#endif
}
+#endif // ENABLE_DBENGINE
static void update_strings_charts() {
static RRDSET *st_ops = NULL, *st_entries = NULL, *st_mem = NULL;
@@ -1486,6 +2830,15 @@ static void update_heartbeat_charts() {
// ---------------------------------------------------------------------------------------------------------------------
// dictionary statistics
+struct dictionary_stats dictionary_stats_category_collectors = { .name = "collectors" };
+struct dictionary_stats dictionary_stats_category_rrdhost = { .name = "rrdhost" };
+struct dictionary_stats dictionary_stats_category_rrdset_rrddim = { .name = "rrdset_rrddim" };
+struct dictionary_stats dictionary_stats_category_rrdcontext = { .name = "context" };
+struct dictionary_stats dictionary_stats_category_rrdlabels = { .name = "labels" };
+struct dictionary_stats dictionary_stats_category_rrdhealth = { .name = "health" };
+struct dictionary_stats dictionary_stats_category_functions = { .name = "functions" };
+struct dictionary_stats dictionary_stats_category_replication = { .name = "replication" };
+
struct dictionary_categories {
struct dictionary_stats *stats;
const char *family;
@@ -1531,7 +2884,15 @@ struct dictionary_categories {
RRDDIM *rd_spins_delete;
} dictionary_categories[] = {
- { .stats = &dictionary_stats_category_other, "dictionaries", "dictionaries", 900000 },
+ { .stats = &dictionary_stats_category_collectors, "dictionaries collectors", "dictionaries", 900000 },
+ { .stats = &dictionary_stats_category_rrdhost, "dictionaries hosts", "dictionaries", 900000 },
+ { .stats = &dictionary_stats_category_rrdset_rrddim, "dictionaries rrd", "dictionaries", 900000 },
+ { .stats = &dictionary_stats_category_rrdcontext, "dictionaries contexts", "dictionaries", 900000 },
+ { .stats = &dictionary_stats_category_rrdlabels, "dictionaries labels", "dictionaries", 900000 },
+ { .stats = &dictionary_stats_category_rrdhealth, "dictionaries health", "dictionaries", 900000 },
+ { .stats = &dictionary_stats_category_functions, "dictionaries functions", "dictionaries", 900000 },
+ { .stats = &dictionary_stats_category_replication, "dictionaries replication", "dictionaries", 900000 },
+ { .stats = &dictionary_stats_category_other, "dictionaries other", "dictionaries", 900000 },
// terminator
{ .stats = NULL, NULL, NULL, 0 },
@@ -1741,7 +3102,7 @@ static void update_dictionary_category_charts(struct dictionary_categories *c) {
// ------------------------------------------------------------------------
total = 0;
- load_dictionary_stats_entry(memory.indexed);
+ load_dictionary_stats_entry(memory.index);
load_dictionary_stats_entry(memory.values);
load_dictionary_stats_entry(memory.dict);
@@ -1775,7 +3136,7 @@ static void update_dictionary_category_charts(struct dictionary_categories *c) {
rrdlabels_add(c->st_memory->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO);
}
- rrddim_set_by_pointer(c->st_memory, c->rd_memory_indexed, (collected_number)stats.memory.indexed);
+ rrddim_set_by_pointer(c->st_memory, c->rd_memory_indexed, (collected_number)stats.memory.index);
rrddim_set_by_pointer(c->st_memory, c->rd_memory_values, (collected_number)stats.memory.values);
rrddim_set_by_pointer(c->st_memory, c->rd_memory_dict, (collected_number)stats.memory.dict);
@@ -2079,6 +3440,7 @@ static struct worker_utilization all_workers_utilization[] = {
{ .name = "STREAMRCV", .family = "workers streaming receive", .priority = 1000000 },
{ .name = "STREAMSND", .family = "workers streaming send", .priority = 1000000 },
{ .name = "DBENGINE", .family = "workers dbengine instances", .priority = 1000000 },
+ { .name = "LIBUV", .family = "workers libuv threadpool", .priority = 1000000 },
{ .name = "WEB", .family = "workers web server", .priority = 1000000 },
{ .name = "ACLKQUERY", .family = "workers aclk query", .priority = 1000000 },
{ .name = "ACLKSYNC", .family = "workers aclk host sync", .priority = 1000000 },
@@ -2543,7 +3905,7 @@ static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_
// (re)open the procfile to the new filename
bool set_quotes = (ff == NULL) ? true : false;
- ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT);
+ ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_ERROR_ON_ERROR_LOG);
if(unlikely(!ff)) return -1;
if(set_quotes)
@@ -2577,7 +3939,7 @@ static void workers_threads_cleanup(struct worker_utilization *wu) {
if(!t->enabled) {
JudyLDel(&workers_by_pid_JudyL_array, t->pid, PJE0);
- DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wu->threads, t, prev, next);
+ DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wu->threads, t, prev, next);
freez(t);
}
t = next;
@@ -2604,7 +3966,7 @@ static struct worker_thread *worker_thread_create(struct worker_utilization *wu,
*PValue = wt;
// link it
- DOUBLE_LINKED_LIST_APPEND_UNSAFE(wu->threads, wt, prev, next);
+ DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(wu->threads, wt, prev, next);
return wt;
}
@@ -2637,6 +3999,9 @@ static void worker_utilization_charts_callback(void *ptr
// find the worker_thread in the list
struct worker_thread *wt = worker_thread_find_or_create(wu, pid);
+ if(utilization_usec > duration_usec)
+ utilization_usec = duration_usec;
+
wt->enabled = true;
wt->busy_time = utilization_usec;
wt->jobs_started = jobs_started;
@@ -2786,8 +4151,6 @@ static void global_statistics_cleanup(void *ptr)
info("cleaning up...");
- worker_utilization_finish();
-
static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
}
@@ -2810,23 +4173,22 @@ void *global_statistics_main(void *ptr)
// to make sure we are not close to any other thread
hb.randomness = 0;
- while (!netdata_exit) {
+ while (service_running(SERVICE_COLLECTORS)) {
worker_is_idle();
heartbeat_next(&hb, step);
worker_is_busy(WORKER_JOB_GLOBAL);
global_statistics_charts();
- worker_is_busy(WORKER_JOB_SQLITE3);
- sqlite3_statistics_charts();
-
worker_is_busy(WORKER_JOB_REGISTRY);
registry_statistics();
+#ifdef ENABLE_DBENGINE
if(dbengine_enabled) {
worker_is_busy(WORKER_JOB_DBENGINE);
- dbengine_statistics_charts();
+ dbengine2_statistics_charts();
}
+#endif
worker_is_busy(WORKER_JOB_HEARTBEAT);
update_heartbeat_charts();
@@ -2880,7 +4242,7 @@ void *global_statistics_workers_main(void *ptr)
heartbeat_t hb;
heartbeat_init(&hb);
- while (!netdata_exit) {
+ while (service_running(SERVICE_COLLECTORS)) {
worker_is_idle();
heartbeat_next(&hb, step);
@@ -2892,3 +4254,45 @@ void *global_statistics_workers_main(void *ptr)
return NULL;
}
+// ---------------------------------------------------------------------------------------------------------------------
+// sqlite3 thread
+
+static void global_statistics_sqlite3_cleanup(void *ptr)
+{
+ worker_unregister();
+
+ struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr;
+ static_thread->enabled = NETDATA_MAIN_THREAD_EXITING;
+
+ info("cleaning up...");
+
+ static_thread->enabled = NETDATA_MAIN_THREAD_EXITED;
+}
+
+void *global_statistics_sqlite3_main(void *ptr)
+{
+ global_statistics_register_workers();
+
+ netdata_thread_cleanup_push(global_statistics_sqlite3_cleanup, ptr);
+
+ int update_every =
+ (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every);
+ if (update_every < localhost->rrd_update_every)
+ update_every = localhost->rrd_update_every;
+
+ usec_t step = update_every * USEC_PER_SEC;
+ heartbeat_t hb;
+ heartbeat_init(&hb);
+
+ while (service_running(SERVICE_COLLECTORS)) {
+ worker_is_idle();
+ heartbeat_next(&hb, step);
+
+ worker_is_busy(WORKER_JOB_SQLITE3);
+ sqlite3_statistics_charts();
+ }
+
+ netdata_thread_cleanup_pop(1);
+ return NULL;
+}
+
diff --git a/daemon/global_statistics.h b/daemon/global_statistics.h
index f7d6775cf..7bdb153dd 100644
--- a/daemon/global_statistics.h
+++ b/daemon/global_statistics.h
@@ -5,10 +5,39 @@
#include "database/rrd.h"
+extern struct netdata_buffers_statistics {
+ size_t rrdhost_allocations_size;
+ size_t rrdhost_senders;
+ size_t rrdhost_receivers;
+ size_t query_targets_size;
+ size_t rrdset_done_rda_size;
+ size_t buffers_aclk;
+ size_t buffers_api;
+ size_t buffers_functions;
+ size_t buffers_sqlite;
+ size_t buffers_exporters;
+ size_t buffers_health;
+ size_t buffers_streaming;
+ size_t cbuffers_streaming;
+ size_t buffers_web;
+} netdata_buffers_statistics;
+
+extern struct dictionary_stats dictionary_stats_category_collectors;
+extern struct dictionary_stats dictionary_stats_category_rrdhost;
+extern struct dictionary_stats dictionary_stats_category_rrdset_rrddim;
+extern struct dictionary_stats dictionary_stats_category_rrdcontext;
+extern struct dictionary_stats dictionary_stats_category_rrdlabels;
+extern struct dictionary_stats dictionary_stats_category_rrdhealth;
+extern struct dictionary_stats dictionary_stats_category_functions;
+extern struct dictionary_stats dictionary_stats_category_replication;
+
+extern size_t rrddim_db_memory_size;
+
// ----------------------------------------------------------------------------
// global statistics
void global_statistics_ml_query_completed(size_t points_read);
+void global_statistics_ml_models_consulted(size_t models_consulted);
void global_statistics_exporters_query_completed(size_t points_read);
void global_statistics_backfill_query_completed(size_t points_read);
void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source);
diff --git a/daemon/main.c b/daemon/main.c
index 6b591385d..7b2076f3f 100644
--- a/daemon/main.c
+++ b/daemon/main.c
@@ -4,10 +4,16 @@
#include "buildinfo.h"
#include "static_threads.h"
+#if defined(ENV32BIT)
+#warning COMPILING 32BIT NETDATA
+#endif
+
bool unittest_running = false;
int netdata_zero_metrics_enabled;
int netdata_anonymous_statistics_enabled;
+int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
+
struct netdata_static_thread *static_threads;
struct config netdata_config = {
@@ -23,69 +29,461 @@ struct config netdata_config = {
}
};
+typedef struct service_thread {
+ pid_t tid;
+ SERVICE_THREAD_TYPE type;
+ SERVICE_TYPE services;
+ char name[NETDATA_THREAD_NAME_MAX + 1];
+ bool cancelled;
+
+ union {
+ netdata_thread_t netdata_thread;
+ uv_thread_t uv_thread;
+ };
+
+ force_quit_t force_quit_callback;
+ request_quit_t request_quit_callback;
+ void *data;
+} SERVICE_THREAD;
+
+struct service_globals {
+ SERVICE_TYPE running;
+ SPINLOCK lock;
+ Pvoid_t pid_judy;
+} service_globals = {
+ .running = ~0,
+ .pid_judy = NULL,
+};
+
+SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused) {
+ SERVICE_THREAD *sth = NULL;
+ pid_t tid = gettid();
+
+ netdata_spinlock_lock(&service_globals.lock);
+ Pvoid_t *PValue = JudyLIns(&service_globals.pid_judy, tid, PJE0);
+ if(!*PValue) {
+ sth = callocz(1, sizeof(SERVICE_THREAD));
+ sth->tid = tid;
+ sth->type = thread_type;
+ sth->request_quit_callback = request_quit_callback;
+ sth->force_quit_callback = force_quit_callback;
+ sth->data = data;
+ os_thread_get_current_name_np(sth->name);
+ *PValue = sth;
+
+ switch(thread_type) {
+ default:
+ case SERVICE_THREAD_TYPE_NETDATA:
+ sth->netdata_thread = netdata_thread_self();
+ break;
+
+ case SERVICE_THREAD_TYPE_EVENT_LOOP:
+ case SERVICE_THREAD_TYPE_LIBUV:
+ sth->uv_thread = uv_thread_self();
+ break;
+ }
+ }
+ else {
+ sth = *PValue;
+ }
+ netdata_spinlock_unlock(&service_globals.lock);
+
+ return sth;
+}
+
+void service_exits(void) {
+ pid_t tid = gettid();
+
+ netdata_spinlock_lock(&service_globals.lock);
+ Pvoid_t *PValue = JudyLGet(service_globals.pid_judy, tid, PJE0);
+ if(PValue) {
+ freez(*PValue);
+ JudyLDel(&service_globals.pid_judy, tid, PJE0);
+ }
+ netdata_spinlock_unlock(&service_globals.lock);
+}
+
+bool service_running(SERVICE_TYPE service) {
+ static __thread SERVICE_THREAD *sth = NULL;
+
+ if(unlikely(!sth))
+ sth = service_register(SERVICE_THREAD_TYPE_NETDATA, NULL, NULL, NULL, false);
+
+ if(netdata_exit)
+ __atomic_store_n(&service_globals.running, 0, __ATOMIC_RELAXED);
+
+ if(service == 0)
+ service = sth->services;
+
+ sth->services |= service;
+
+ return ((__atomic_load_n(&service_globals.running, __ATOMIC_RELAXED) & service) == service);
+}
+
+void service_signal_exit(SERVICE_TYPE service) {
+ __atomic_and_fetch(&service_globals.running, ~(service), __ATOMIC_RELAXED);
+
+ netdata_spinlock_lock(&service_globals.lock);
+
+ Pvoid_t *PValue;
+ Word_t tid = 0;
+ bool first = true;
+ while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
+ SERVICE_THREAD *sth = *PValue;
+
+ if((sth->services & service) && sth->request_quit_callback) {
+ netdata_spinlock_unlock(&service_globals.lock);
+ sth->request_quit_callback(sth->data);
+ netdata_spinlock_lock(&service_globals.lock);
+ continue;
+ }
+ }
+
+ netdata_spinlock_unlock(&service_globals.lock);
+}
+
+static void service_to_buffer(BUFFER *wb, SERVICE_TYPE service) {
+ if(service & SERVICE_MAINTENANCE)
+ buffer_strcat(wb, "MAINTENANCE ");
+ if(service & SERVICE_COLLECTORS)
+ buffer_strcat(wb, "COLLECTORS ");
+ if(service & SERVICE_ML_TRAINING)
+ buffer_strcat(wb, "ML_TRAINING ");
+ if(service & SERVICE_ML_PREDICTION)
+ buffer_strcat(wb, "ML_PREDICTION ");
+ if(service & SERVICE_REPLICATION)
+ buffer_strcat(wb, "REPLICATION ");
+ if(service & ABILITY_DATA_QUERIES)
+ buffer_strcat(wb, "DATA_QUERIES ");
+ if(service & ABILITY_WEB_REQUESTS)
+ buffer_strcat(wb, "WEB_REQUESTS ");
+ if(service & SERVICE_WEB_SERVER)
+ buffer_strcat(wb, "WEB_SERVER ");
+ if(service & SERVICE_ACLK)
+ buffer_strcat(wb, "ACLK ");
+ if(service & SERVICE_HEALTH)
+ buffer_strcat(wb, "HEALTH ");
+ if(service & SERVICE_STREAMING)
+ buffer_strcat(wb, "STREAMING ");
+ if(service & ABILITY_STREAMING_CONNECTIONS)
+ buffer_strcat(wb, "STREAMING_CONNECTIONS ");
+ if(service & SERVICE_CONTEXT)
+ buffer_strcat(wb, "CONTEXT ");
+ if(service & SERVICE_ANALYTICS)
+ buffer_strcat(wb, "ANALYTICS ");
+ if(service & SERVICE_EXPORTERS)
+ buffer_strcat(wb, "EXPORTERS ");
+}
+
+static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) {
+ BUFFER *service_list = buffer_create(1024, NULL);
+ BUFFER *thread_list = buffer_create(1024, NULL);
+ usec_t started_ut = now_monotonic_usec(), ended_ut;
+ size_t running;
+ SERVICE_TYPE running_services = 0;
+
+ // cancel the threads
+ running = 0;
+ running_services = 0;
+ {
+ buffer_flush(thread_list);
+
+ netdata_spinlock_lock(&service_globals.lock);
+
+ Pvoid_t *PValue;
+ Word_t tid = 0;
+ bool first = true;
+ while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
+ SERVICE_THREAD *sth = *PValue;
+ if(sth->services & service && sth->tid != gettid() && !sth->cancelled) {
+ sth->cancelled = true;
+
+ switch(sth->type) {
+ default:
+ case SERVICE_THREAD_TYPE_NETDATA:
+ netdata_thread_cancel(sth->netdata_thread);
+ break;
+
+ case SERVICE_THREAD_TYPE_EVENT_LOOP:
+ case SERVICE_THREAD_TYPE_LIBUV:
+ break;
+ }
+
+ if(running)
+ buffer_strcat(thread_list, ", ");
+
+ buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
+
+ running++;
+ running_services |= sth->services & service;
+
+ if(sth->force_quit_callback) {
+ netdata_spinlock_unlock(&service_globals.lock);
+ sth->force_quit_callback(sth->data);
+ netdata_spinlock_lock(&service_globals.lock);
+ continue;
+ }
+ }
+ }
+
+ netdata_spinlock_unlock(&service_globals.lock);
+ }
+
+ service_signal_exit(service);
+
+ // signal them to stop
+ size_t last_running = 0;
+ size_t stale_time_ut = 0;
+ usec_t sleep_ut = 50 * USEC_PER_MS;
+ size_t log_countdown_ut = sleep_ut;
+ do {
+ if(running != last_running)
+ stale_time_ut = 0;
+
+ last_running = running;
+ running = 0;
+ running_services = 0;
+ buffer_flush(thread_list);
+
+ netdata_spinlock_lock(&service_globals.lock);
+
+ Pvoid_t *PValue;
+ Word_t tid = 0;
+ bool first = true;
+ while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) {
+ SERVICE_THREAD *sth = *PValue;
+ if(sth->services & service && sth->tid != gettid()) {
+ if(running)
+ buffer_strcat(thread_list, ", ");
+
+ buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid);
+
+ running_services |= sth->services & service;
+ running++;
+ }
+ }
+
+ netdata_spinlock_unlock(&service_globals.lock);
+
+ if(running) {
+ log_countdown_ut -= (log_countdown_ut >= sleep_ut) ? sleep_ut : log_countdown_ut;
+ if(log_countdown_ut == 0 || running != last_running) {
+ log_countdown_ut = 20 * sleep_ut;
+
+ buffer_flush(service_list);
+ service_to_buffer(service_list, running_services);
+ info("SERVICE CONTROL: waiting for the following %zu services [ %s] to exit: %s",
+ running, buffer_tostring(service_list),
+ running <= 10 ? buffer_tostring(thread_list) : "");
+ }
+
+ sleep_usec(sleep_ut);
+ stale_time_ut += sleep_ut;
+ }
+
+ ended_ut = now_monotonic_usec();
+ } while(running && (ended_ut - started_ut < timeout_ut || stale_time_ut < timeout_ut));
+
+ if(running) {
+ buffer_flush(service_list);
+ service_to_buffer(service_list, running_services);
+ info("SERVICE CONTROL: "
+ "the following %zu service(s) [ %s] take too long to exit: %s; "
+ "giving up on them...",
+ running, buffer_tostring(service_list),
+ buffer_tostring(thread_list));
+ }
+
+ buffer_free(thread_list);
+ buffer_free(service_list);
+
+ return (running == 0);
+}
+
+#define delta_shutdown_time(msg) \
+ { \
+ usec_t now_ut = now_monotonic_usec(); \
+ if(prev_msg) \
+ info("NETDATA SHUTDOWN: in %7llu ms, %s%s - next: %s", (now_ut - last_ut) / USEC_PER_MS, (timeout)?"(TIMEOUT) ":"", prev_msg, msg); \
+ else \
+ info("NETDATA SHUTDOWN: next: %s", msg); \
+ last_ut = now_ut; \
+ prev_msg = msg; \
+ timeout = false; \
+ }
+
void netdata_cleanup_and_exit(int ret) {
- // enabling this, is wrong
- // because the threads will be cancelled while cleaning up
- // netdata_exit = 1;
+ usec_t started_ut = now_monotonic_usec();
+ usec_t last_ut = started_ut;
+ const char *prev_msg = NULL;
+ bool timeout = false;
error_log_limit_unlimited();
- info("EXIT: netdata prepares to exit with code %d...", ret);
+ info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret);
send_statistics("EXIT", ret?"ERROR":"OK","-");
+ delta_shutdown_time("create shutdown file");
+
char agent_crash_file[FILENAME_MAX + 1];
char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir);
snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
(void) rename(agent_crash_file, agent_incomplete_shutdown_file);
- // cleanup/save the database and exit
- info("EXIT: cleaning up the database...");
+#ifdef ENABLE_DBENGINE
+ if(dbengine_enabled) {
+ delta_shutdown_time("dbengine exit mode");
+ for (size_t tier = 0; tier < storage_tiers; tier++)
+ rrdeng_exit_mode(multidb_ctx[tier]);
+ }
+#endif
+
+ delta_shutdown_time("disable maintenance, new queries, new web requests, new streaming connections and aclk");
+
+ service_signal_exit(
+ SERVICE_MAINTENANCE
+ | ABILITY_DATA_QUERIES
+ | ABILITY_WEB_REQUESTS
+ | ABILITY_STREAMING_CONNECTIONS
+ | SERVICE_ACLK
+ );
+
+ delta_shutdown_time("stop replication, exporters, ML training, health and web servers threads");
+
+ timeout = !service_wait_exit(
+ SERVICE_REPLICATION
+ | SERVICE_EXPORTERS
+ | SERVICE_ML_TRAINING
+ | SERVICE_HEALTH
+ | SERVICE_WEB_SERVER
+ , 3 * USEC_PER_SEC);
+
+ delta_shutdown_time("stop collectors and streaming threads");
+
+ timeout = !service_wait_exit(
+ SERVICE_COLLECTORS
+ | SERVICE_STREAMING
+ , 3 * USEC_PER_SEC);
+
+ delta_shutdown_time("stop ML prediction and context threads");
+
+ timeout = !service_wait_exit(
+ SERVICE_ML_PREDICTION
+ | SERVICE_CONTEXT
+ , 3 * USEC_PER_SEC);
+
+ delta_shutdown_time("stop maintenance thread");
+
+ timeout = !service_wait_exit(
+ SERVICE_MAINTENANCE
+ , 3 * USEC_PER_SEC);
+
+ delta_shutdown_time("clean rrdhost database");
+
rrdhost_cleanup_all();
- if(!ret) {
- // exit cleanly
+ delta_shutdown_time("prepare metasync shutdown");
+
+ metadata_sync_shutdown_prepare();
- // stop everything
- info("EXIT: stopping static threads...");
#ifdef ENABLE_ACLK
- aclk_sync_exit_all();
+ delta_shutdown_time("signal aclk sync to stop");
+ aclk_sync_exit_all();
#endif
- cancel_main_threads();
- // free the database
- info("EXIT: freeing database memory...");
+ delta_shutdown_time("stop aclk threads");
+
+ timeout = !service_wait_exit(
+ SERVICE_ACLK
+ , 3 * USEC_PER_SEC);
+
+ delta_shutdown_time("stop all remaining worker threads");
+
+ timeout = !service_wait_exit(~0, 10 * USEC_PER_SEC);
+
+ delta_shutdown_time("cancel main threads");
+
+ cancel_main_threads();
+
+ if(!ret) {
+ // exit cleanly
+
#ifdef ENABLE_DBENGINE
if(dbengine_enabled) {
+ delta_shutdown_time("flush dbengine tiers");
for (size_t tier = 0; tier < storage_tiers; tier++)
rrdeng_prepare_exit(multidb_ctx[tier]);
}
#endif
- metadata_sync_shutdown_prepare();
- rrdhost_free_all();
+
+ // free the database
+ delta_shutdown_time("stop collection for all hosts");
+
+ // rrdhost_free_all();
+ rrd_finalize_collection_for_all_hosts();
+
+ delta_shutdown_time("stop metasync threads");
+
metadata_sync_shutdown();
+
#ifdef ENABLE_DBENGINE
if(dbengine_enabled) {
+ delta_shutdown_time("wait for dbengine collectors to finish");
+
+ size_t running = 1;
+ while(running) {
+ running = 0;
+ for (size_t tier = 0; tier < storage_tiers; tier++)
+ running += rrdeng_collectors_running(multidb_ctx[tier]);
+
+ if(running)
+ sleep_usec(100 * USEC_PER_MS);
+ }
+
+ delta_shutdown_time("wait for dbengine main cache to finish flushing");
+
+ while (pgc_hot_and_dirty_entries(main_cache)) {
+ pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL);
+ sleep_usec(100 * USEC_PER_MS);
+ }
+
+ delta_shutdown_time("stop dbengine tiers");
for (size_t tier = 0; tier < storage_tiers; tier++)
rrdeng_exit(multidb_ctx[tier]);
}
#endif
}
+
+ delta_shutdown_time("close SQL context db");
+
sql_close_context_database();
+
+ delta_shutdown_time("closed SQL main db");
+
sql_close_database();
// unlink the pid
if(pidfile[0]) {
- info("EXIT: removing netdata PID file '%s'...", pidfile);
+ delta_shutdown_time("remove pid file");
+
if(unlink(pidfile) != 0)
error("EXIT: cannot unlink pidfile '%s'.", pidfile);
}
#ifdef ENABLE_HTTPS
+ delta_shutdown_time("free openssl structures");
security_clean_openssl();
#endif
- info("EXIT: all done - netdata is now exiting - bye bye...");
+
+ delta_shutdown_time("remove incomplete shutdown file");
+
(void) unlink(agent_incomplete_shutdown_file);
+
+ delta_shutdown_time("exit");
+
+ usec_t ended_ut = now_monotonic_usec();
+ info("NETDATA SHUTDOWN: completed in %llu ms - netdata is now exiting - bye bye...", (ended_ut - started_ut) / USEC_PER_MS);
exit(ret);
}
@@ -225,6 +623,32 @@ int killpid(pid_t pid) {
return ret;
}
+static void set_nofile_limit(struct rlimit *rl) {
+ // get the num files allowed
+ if(getrlimit(RLIMIT_NOFILE, rl) != 0) {
+ error("getrlimit(RLIMIT_NOFILE) failed");
+ return;
+ }
+
+ info("resources control: allowed file descriptors: soft = %zu, max = %zu",
+ (size_t) rl->rlim_cur, (size_t) rl->rlim_max);
+
+ // make the soft/hard limits equal
+ rl->rlim_cur = rl->rlim_max;
+ if (setrlimit(RLIMIT_NOFILE, rl) != 0) {
+ error("setrlimit(RLIMIT_NOFILE, { %zu, %zu }) failed", (size_t)rl->rlim_cur, (size_t)rl->rlim_max);
+ }
+
+ // sanity check to make sure we have enough file descriptors available to open
+ if (getrlimit(RLIMIT_NOFILE, rl) != 0) {
+ error("getrlimit(RLIMIT_NOFILE) failed");
+ return;
+ }
+
+ if (rl->rlim_cur < 1024)
+ error("Number of open file descriptors allowed for this process is too low (RLIMIT_NOFILE=%zu)", (size_t)rl->rlim_cur);
+}
+
void cancel_main_threads() {
error_log_limit_unlimited();
@@ -408,6 +832,9 @@ static void log_init(void) {
snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir);
stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename);
+ snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir);
+ stdcollector_filename = config_get(CONFIG_SECTION_LOGS, "collector", filename);
+
snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir);
stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename);
@@ -679,8 +1106,9 @@ static void get_netdata_configured_variables() {
// ------------------------------------------------------------------------
// get default Database Engine page cache size in MiB
- db_engine_use_malloc = config_get_boolean(CONFIG_SECTION_DB, "dbengine page cache with malloc", CONFIG_BOOLEAN_YES);
default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb);
+ db_engine_journal_check = config_get_boolean(CONFIG_SECTION_DB, "dbengine enable journal integrity check", CONFIG_BOOLEAN_NO);
+
if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) {
error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB);
default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB;
@@ -731,14 +1159,14 @@ static void get_netdata_configured_variables() {
// --------------------------------------------------------------------
- rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time);
+ rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
// Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short
// cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at
// https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information.
- if (rrdset_free_obsolete_time < 10) {
- rrdset_free_obsolete_time = 10;
+ if (rrdset_free_obsolete_time_s < 10) {
+ rrdset_free_obsolete_time_s = 10;
info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds.");
- config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time);
+ config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s);
}
gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
@@ -746,12 +1174,13 @@ static void get_netdata_configured_variables() {
gap_when_lost_iterations_above = 1;
config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above);
}
+ gap_when_lost_iterations_above += 2;
// --------------------------------------------------------------------
// get various system parameters
get_system_HZ();
- get_system_cpus();
+ get_system_cpus_uncached();
get_system_pid_max();
@@ -874,7 +1303,30 @@ void post_conf_load(char **user)
appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL);
}
+#define delta_startup_time(msg) \
+ { \
+ usec_t now_ut = now_monotonic_usec(); \
+ if(prev_msg) \
+ info("NETDATA STARTUP: in %7llu ms, %s - next: %s", (now_ut - last_ut) / USEC_PER_MS, prev_msg, msg); \
+ else \
+ info("NETDATA STARTUP: next: %s", msg); \
+ last_ut = now_ut; \
+ prev_msg = msg; \
+ }
+
+int pgc_unittest(void);
+int mrg_unittest(void);
+int julytest(void);
+
int main(int argc, char **argv) {
+ // initialize the system clocks
+ clocks_init();
+ usec_t started_ut = now_monotonic_usec();
+ usec_t last_ut = started_ut;
+ const char *prev_msg = NULL;
+ // Initialize stderror avoiding coredump when info() or error() is called
+ stderror = stderr;
+
int i;
int config_loaded = 0;
int dont_fork = 0;
@@ -1001,7 +1453,7 @@ int main(int argc, char **argv) {
default_health_enabled = 0;
storage_tiers = 1;
registry_init();
- if(rrd_init("unittest", NULL)) {
+ if(rrd_init("unittest", NULL, true)) {
fprintf(stderr, "rrd_init failed for unittest\n");
return 1;
}
@@ -1027,11 +1479,6 @@ int main(int argc, char **argv) {
else if(strcmp(optarg, "escapetest") == 0) {
return command_argument_sanitization_tests();
}
-#ifdef ENABLE_ML_TESTS
- else if(strcmp(optarg, "mltest") == 0) {
- return test_ml(argc, argv);
- }
-#endif
#ifdef ENABLE_DBENGINE
else if(strcmp(optarg, "mctest") == 0) {
unittest_running = true;
@@ -1061,6 +1508,18 @@ int main(int argc, char **argv) {
unittest_running = true;
return metadata_unittest();
}
+ else if(strcmp(optarg, "pgctest") == 0) {
+ unittest_running = true;
+ return pgc_unittest();
+ }
+ else if(strcmp(optarg, "mrgtest") == 0) {
+ unittest_running = true;
+ return mrg_unittest();
+ }
+ else if(strcmp(optarg, "julytest") == 0) {
+ unittest_running = true;
+ return julytest();
+ }
else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) {
optarg += strlen(createdataset_string);
unsigned history_seconds = strtoul(optarg, NULL, 0);
@@ -1304,19 +1763,14 @@ int main(int argc, char **argv) {
}
}
-#ifdef _SC_OPEN_MAX
if (close_open_fds == true) {
// close all open file descriptors, except the standard ones
// the caller may have left open files (lxc-attach has this issue)
- for(int fd = (int) (sysconf(_SC_OPEN_MAX) - 1); fd > 2; fd--)
- if(fd_is_valid(fd))
- close(fd);
+ for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR);
}
-#endif
- if(!config_loaded)
- {
+ if(!config_loaded) {
load_netdata_conf(NULL, 0);
post_conf_load(&user);
load_cloud_conf(0);
@@ -1327,7 +1781,6 @@ int main(int argc, char **argv) {
appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", "false");
}
-
// ------------------------------------------------------------------------
// initialize netdata
{
@@ -1347,12 +1800,29 @@ int main(int argc, char **argv) {
#endif
#endif
- // initialize the system clocks
- clocks_init();
+ // set libuv worker threads
+ libuv_worker_threads = (int)get_netdata_cpus() * 2;
- // prepare configuration environment variables for the plugins
+ if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS)
+ libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
- setenv("UV_THREADPOOL_SIZE", config_get(CONFIG_SECTION_GLOBAL, "libuv worker threads", "16"), 1);
+ if(libuv_worker_threads > MAX_LIBUV_WORKER_THREADS)
+ libuv_worker_threads = MAX_LIBUV_WORKER_THREADS;
+
+
+ libuv_worker_threads = config_get_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
+ if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) {
+ libuv_worker_threads = MIN_LIBUV_WORKER_THREADS;
+ config_set_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads);
+ }
+
+ {
+ char buf[20 + 1];
+ snprintfz(buf, 20, "%d", libuv_worker_threads);
+ setenv("UV_THREADPOOL_SIZE", buf, 1);
+ }
+
+ // prepare configuration environment variables for the plugins
get_netdata_configured_variables();
set_global_environment();
@@ -1396,6 +1866,8 @@ int main(int argc, char **argv) {
// initialize the log files
open_all_log_files();
+ aral_judy_init();
+
get_system_timezone();
// --------------------------------------------------------------------
@@ -1414,6 +1886,7 @@ int main(int argc, char **argv) {
// --------------------------------------------------------------------
// Initialize ML configuration
+ delta_startup_time("initialize ML");
ml_init();
// --------------------------------------------------------------------
@@ -1422,19 +1895,18 @@ int main(int argc, char **argv) {
// block signals while initializing threads.
// this causes the threads to block signals.
+ delta_startup_time("initialize signals");
signals_block();
+ signals_init(); // setup the signals we want to use
- // setup the signals we want to use
+ // --------------------------------------------------------------------
+ // check which threads are enabled and initialize them
- signals_init();
+ delta_startup_time("initialize static threads");
// setup threads configs
default_stacksize = netdata_threads_init();
-
- // --------------------------------------------------------------------
- // check which threads are enabled and initialize them
-
for (i = 0; static_threads[i].name != NULL ; i++) {
struct netdata_static_thread *st = &static_threads[i];
@@ -1454,14 +1926,17 @@ int main(int argc, char **argv) {
// --------------------------------------------------------------------
// create the listening sockets
+ delta_startup_time("initialize web server");
+
web_client_api_v1_init();
web_server_threading_selection();
if(web_server_mode != WEB_SERVER_MODE_NONE)
api_listen_sockets_setup();
-
}
+ delta_startup_time("set resource limits");
+
#ifdef NETDATA_INTERNAL_CHECKS
if(debug_flags != 0) {
struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY };
@@ -1473,11 +1948,9 @@ int main(int argc, char **argv) {
}
#endif /* NETDATA_INTERNAL_CHECKS */
- // get the max file limit
- if(getrlimit(RLIMIT_NOFILE, &rlimit_nofile) != 0)
- error("getrlimit(RLIMIT_NOFILE) failed");
- else
- info("resources control: allowed file descriptors: soft = %zu, max = %zu", (size_t)rlimit_nofile.rlim_cur, (size_t)rlimit_nofile.rlim_max);
+ set_nofile_limit(&rlimit_nofile);
+
+ delta_startup_time("become daemon");
// fork, switch user, create pid file, set process priority
if(become_daemon(dont_fork, user) == -1)
@@ -1485,12 +1958,18 @@ int main(int argc, char **argv) {
info("netdata started on pid %d.", getpid());
+ delta_startup_time("initialize threads after fork");
+
netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize));
// initialize internal registry
+ delta_startup_time("initialize registry");
registry_init();
+
// fork the spawn server
+ delta_startup_time("fork the spawn server");
spawn_init();
+
/*
* Libuv uv_spawn() uses SIGCHLD internally:
* https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485
@@ -1503,15 +1982,22 @@ int main(int argc, char **argv) {
// ------------------------------------------------------------------------
// initialize rrd, registry, health, rrdpush, etc.
+ delta_startup_time("collecting system info");
+
netdata_anonymous_statistics_enabled=-1;
struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info));
+ __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED);
get_system_info(system_info);
system_info->hops = 0;
get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist);
- if(rrd_init(netdata_configured_hostname, system_info))
+ delta_startup_time("initialize RRD structures");
+
+ if(rrd_init(netdata_configured_hostname, system_info, false))
fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname);
+ delta_startup_time("check for incomplete shutdown");
+
char agent_crash_file[FILENAME_MAX + 1];
char agent_incomplete_shutdown_file[FILENAME_MAX + 1];
snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir);
@@ -1526,6 +2012,8 @@ int main(int argc, char **argv) {
// ------------------------------------------------------------------------
// Claim netdata agent to a cloud endpoint
+ delta_startup_time("collect claiming info");
+
if (claiming_pending_arguments)
claim_agent(claiming_pending_arguments);
load_claiming_state();
@@ -1536,11 +2024,14 @@ int main(int argc, char **argv) {
error_log_limit_reset();
// Load host labels
+ delta_startup_time("collect host labels");
reload_host_labels();
// ------------------------------------------------------------------------
// spawn the threads
+ delta_startup_time("start the static threads");
+
web_server_config_options();
netdata_zero_metrics_enabled = config_get_boolean_ondemand(CONFIG_SECTION_DB, "enable zero metrics", CONFIG_BOOLEAN_NO);
@@ -1561,9 +2052,14 @@ int main(int argc, char **argv) {
// ------------------------------------------------------------------------
// Initialize netdata agent command serving from cli and signals
+ delta_startup_time("initialize commands API");
+
commands_init();
- info("netdata initialization completed. Enjoy real-time performance monitoring!");
+ delta_startup_time("ready");
+
+ usec_t ready_ut = now_monotonic_usec();
+ info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS);
netdata_ready = 1;
send_statistics("START", "-", "-");
diff --git a/daemon/main.h b/daemon/main.h
index a4e2b3aa7..8704d6097 100644
--- a/daemon/main.h
+++ b/daemon/main.h
@@ -27,4 +27,35 @@ int killpid(pid_t pid);
void netdata_cleanup_and_exit(int ret) NORETURN;
void send_statistics(const char *action, const char *action_result, const char *action_data);
+typedef enum {
+ ABILITY_DATA_QUERIES = (1 << 0),
+ ABILITY_WEB_REQUESTS = (1 << 1),
+ ABILITY_STREAMING_CONNECTIONS = (1 << 2),
+ SERVICE_MAINTENANCE = (1 << 3),
+ SERVICE_COLLECTORS = (1 << 4),
+ SERVICE_ML_TRAINING = (1 << 5),
+ SERVICE_ML_PREDICTION = (1 << 6),
+ SERVICE_REPLICATION = (1 << 7),
+ SERVICE_WEB_SERVER = (1 << 8),
+ SERVICE_ACLK = (1 << 9),
+ SERVICE_HEALTH = (1 << 10),
+ SERVICE_STREAMING = (1 << 11),
+ SERVICE_CONTEXT = (1 << 12),
+ SERVICE_ANALYTICS = (1 << 13),
+ SERVICE_EXPORTERS = (1 << 14),
+} SERVICE_TYPE;
+
+typedef enum {
+ SERVICE_THREAD_TYPE_NETDATA,
+ SERVICE_THREAD_TYPE_LIBUV,
+ SERVICE_THREAD_TYPE_EVENT_LOOP,
+} SERVICE_THREAD_TYPE;
+
+typedef void (*force_quit_t)(void *data);
+typedef void (*request_quit_t)(void *data);
+
+void service_exits(void);
+bool service_running(SERVICE_TYPE service);
+struct service_thread *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused);
+
#endif /* NETDATA_MAIN_H */
diff --git a/daemon/service.c b/daemon/service.c
index 6db2ef69f..9761abd02 100644
--- a/daemon/service.c
+++ b/daemon/service.c
@@ -22,6 +22,10 @@
#define WORKER_JOB_SAVE_CHART 13
#define WORKER_JOB_DELETE_CHART 14
#define WORKER_JOB_FREE_DIMENSION 15
+#define WORKER_JOB_PGC_MAIN_EVICT 16
+#define WORKER_JOB_PGC_MAIN_FLUSH 17
+#define WORKER_JOB_PGC_OPEN_EVICT 18
+#define WORKER_JOB_PGC_OPEN_FLUSH 19
static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) {
RRDSET *st = rd->rrdset;
@@ -48,13 +52,13 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) {
size_t tiers_available = 0, tiers_said_no_retention = 0;
for(size_t tier = 0; tier < storage_tiers ;tier++) {
- if(rd->tiers[tier]) {
+ if(rd->tiers[tier].db_collection_handle) {
tiers_available++;
- if(rd->tiers[tier]->collect_ops->finalize(rd->tiers[tier]->db_collection_handle))
+ if(rd->tiers[tier].collect_ops->finalize(rd->tiers[tier].db_collection_handle))
tiers_said_no_retention++;
- rd->tiers[tier]->db_collection_handle = NULL;
+ rd->tiers[tier].db_collection_handle = NULL;
}
}
@@ -83,7 +87,7 @@ static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensio
dfe_start_write(st->rrddim_root_index, rd) {
if(unlikely(
all_dimensions ||
- (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->last_collected_time.tv_sec + rrdset_free_obsolete_time < now))
+ (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now))
)) {
if(dictionary_acquired_item_references(rd_dfe.item) == 1) {
@@ -142,9 +146,9 @@ static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) {
continue;
if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)
- && st->last_accessed_time + rrdset_free_obsolete_time < now
- && st->last_updated.tv_sec + rrdset_free_obsolete_time < now
- && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now
+ && st->last_accessed_time_s + rrdset_free_obsolete_time_s < now
+ && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now
+ && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now
)) {
svc_rrdset_obsolete_to_archive(st);
}
@@ -166,10 +170,10 @@ static void svc_rrdset_check_obsoletion(RRDHOST *host) {
if(rrdset_is_replicating(st))
continue;
- last_entry_t = rrdset_last_entry_t(st);
+ last_entry_t = rrdset_last_entry_s(st);
- if(last_entry_t && last_entry_t < host->senders_connect_time &&
- host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every
+ if(last_entry_t && last_entry_t < host->child_connect_time &&
+ host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every
< now)
rrdset_is_obsolete(st);
@@ -196,10 +200,10 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() {
&& host->trigger_chart_obsoletion_check
&& (
(
- host->senders_last_chart_command
- && host->senders_last_chart_command + host->health_delay_up_to < now_realtime_sec()
+ host->child_last_chart_command
+ && host->child_last_chart_command + host->health.health_delay_up_to < now_realtime_sec()
)
- || (host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec())
+ || (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec())
)
) {
svc_rrdset_check_obsoletion(host);
@@ -238,7 +242,7 @@ restart_after_removal:
}
worker_is_busy(WORKER_JOB_FREE_HOST);
- rrdhost_free(host, 0);
+ rrdhost_free___while_having_rrd_wrlock(host, false);
goto restart_after_removal;
}
@@ -276,6 +280,10 @@ void *service_main(void *ptr)
worker_register_job_name(WORKER_JOB_SAVE_CHART, "save chart");
worker_register_job_name(WORKER_JOB_DELETE_CHART, "delete chart");
worker_register_job_name(WORKER_JOB_FREE_DIMENSION, "free dimension");
+ worker_register_job_name(WORKER_JOB_PGC_MAIN_EVICT, "main cache evictions");
+ worker_register_job_name(WORKER_JOB_PGC_MAIN_FLUSH, "main cache flushes");
+ worker_register_job_name(WORKER_JOB_PGC_OPEN_EVICT, "open cache evictions");
+ worker_register_job_name(WORKER_JOB_PGC_OPEN_FLUSH, "open cache flushes");
netdata_thread_cleanup_push(service_main_cleanup, ptr);
heartbeat_t hb;
@@ -284,7 +292,7 @@ void *service_main(void *ptr)
debug(D_SYSTEM, "Service thread starts");
- while (!netdata_exit) {
+ while (service_running(SERVICE_MAINTENANCE)) {
worker_is_idle();
heartbeat_next(&hb, step);
diff --git a/daemon/static_threads.c b/daemon/static_threads.c
index b7730bc31..d93cfe9d0 100644
--- a/daemon/static_threads.c
+++ b/daemon/static_threads.c
@@ -7,6 +7,7 @@ void *analytics_main(void *ptr);
void *cpuidlejitter_main(void *ptr);
void *global_statistics_main(void *ptr);
void *global_statistics_workers_main(void *ptr);
+void *global_statistics_sqlite3_main(void *ptr);
void *health_main(void *ptr);
void *pluginsd_main(void *ptr);
void *service_main(void *ptr);
@@ -18,7 +19,7 @@ extern bool global_statistics_enabled;
const struct netdata_static_thread static_threads_common[] = {
{
- .name = "PLUGIN[timex]",
+ .name = "P[timex]",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "timex",
.enabled = 1,
@@ -27,7 +28,7 @@ const struct netdata_static_thread static_threads_common[] = {
.start_routine = timex_main
},
{
- .name = "PLUGIN[idlejitter]",
+ .name = "P[idlejitter]",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "idlejitter",
.enabled = 1,
@@ -36,6 +37,15 @@ const struct netdata_static_thread static_threads_common[] = {
.start_routine = cpuidlejitter_main
},
{
+ .name = "HEALTH",
+ .config_section = NULL,
+ .config_name = NULL,
+ .enabled = 1,
+ .thread = NULL,
+ .init_routine = NULL,
+ .start_routine = health_main
+ },
+ {
.name = "ANALYTICS",
.config_section = NULL,
.config_name = NULL,
@@ -45,7 +55,7 @@ const struct netdata_static_thread static_threads_common[] = {
.start_routine = analytics_main
},
{
- .name = "GLOBAL_STATS",
+ .name = "STATS_GLOBAL",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "netdata monitoring",
.env_name = "NETDATA_INTERNALS_MONITORING",
@@ -56,7 +66,7 @@ const struct netdata_static_thread static_threads_common[] = {
.start_routine = global_statistics_main
},
{
- .name = "WORKERS_STATS",
+ .name = "STATS_WORKERS",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "netdata monitoring",
.env_name = "NETDATA_INTERNALS_MONITORING",
@@ -67,6 +77,17 @@ const struct netdata_static_thread static_threads_common[] = {
.start_routine = global_statistics_workers_main
},
{
+ .name = "STATS_SQLITE3",
+ .config_section = CONFIG_SECTION_PLUGINS,
+ .config_name = "netdata monitoring",
+ .env_name = "NETDATA_INTERNALS_MONITORING",
+ .global_variable = &global_statistics_enabled,
+ .enabled = 1,
+ .thread = NULL,
+ .init_routine = NULL,
+ .start_routine = global_statistics_sqlite3_main
+ },
+ {
.name = "PLUGINSD",
.config_section = NULL,
.config_name = NULL,
@@ -85,7 +106,7 @@ const struct netdata_static_thread static_threads_common[] = {
.start_routine = service_main
},
{
- .name = "STATSD",
+ .name = "STATSD_FLUSH",
.config_section = NULL,
.config_name = NULL,
.enabled = 1,
@@ -103,7 +124,7 @@ const struct netdata_static_thread static_threads_common[] = {
.start_routine = exporting_main
},
{
- .name = "STREAM",
+ .name = "SNDR[localhost]",
.config_section = NULL,
.config_name = NULL,
.enabled = 0,
@@ -112,7 +133,7 @@ const struct netdata_static_thread static_threads_common[] = {
.start_routine = rrdpush_sender_thread
},
{
- .name = "WEB_SERVER[static1]",
+ .name = "WEB[1]",
.config_section = NULL,
.config_name = NULL,
.enabled = 0,
@@ -123,7 +144,7 @@ const struct netdata_static_thread static_threads_common[] = {
#ifdef ENABLE_ACLK
{
- .name = "ACLK_Main",
+ .name = "ACLK_MAIN",
.config_section = NULL,
.config_name = NULL,
.enabled = 1,
@@ -144,13 +165,13 @@ const struct netdata_static_thread static_threads_common[] = {
},
{
- .name = "REPLICATION",
- .config_section = NULL,
- .config_name = NULL,
- .enabled = 1,
- .thread = NULL,
- .init_routine = NULL,
- .start_routine = replication_thread_main
+ .name = "REPLAY[1]",
+ .config_section = NULL,
+ .config_name = NULL,
+ .enabled = 1,
+ .thread = NULL,
+ .init_routine = NULL,
+ .start_routine = replication_thread_main
},
// terminator
diff --git a/daemon/static_threads_freebsd.c b/daemon/static_threads_freebsd.c
index 48066bff5..cc150faf9 100644
--- a/daemon/static_threads_freebsd.c
+++ b/daemon/static_threads_freebsd.c
@@ -6,7 +6,7 @@ extern void *freebsd_main(void *ptr);
const struct netdata_static_thread static_threads_freebsd[] = {
{
- .name = "PLUGIN[freebsd]",
+ .name = "P[freebsd]",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "freebsd",
.enabled = 1,
diff --git a/daemon/static_threads_linux.c b/daemon/static_threads_linux.c
index 260b2c176..54307eccf 100644
--- a/daemon/static_threads_linux.c
+++ b/daemon/static_threads_linux.c
@@ -10,7 +10,7 @@ extern void *timex_main(void *ptr);
const struct netdata_static_thread static_threads_linux[] = {
{
- .name = "PLUGIN[tc]",
+ .name = "P[tc]",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "tc",
.enabled = 1,
@@ -19,7 +19,7 @@ const struct netdata_static_thread static_threads_linux[] = {
.start_routine = tc_main
},
{
- .name = "PLUGIN[diskspace]",
+ .name = "P[diskspace]",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "diskspace",
.enabled = 1,
@@ -28,7 +28,7 @@ const struct netdata_static_thread static_threads_linux[] = {
.start_routine = diskspace_main
},
{
- .name = "PLUGIN[proc]",
+ .name = "P[proc]",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "proc",
.enabled = 1,
@@ -37,7 +37,7 @@ const struct netdata_static_thread static_threads_linux[] = {
.start_routine = proc_main
},
{
- .name = "PLUGIN[cgroups]",
+ .name = "P[cgroups]",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "cgroups",
.enabled = 1,
diff --git a/daemon/static_threads_macos.c b/daemon/static_threads_macos.c
index 72c032454..aaf7df6f6 100644
--- a/daemon/static_threads_macos.c
+++ b/daemon/static_threads_macos.c
@@ -6,7 +6,7 @@ extern void *macos_main(void *ptr);
const struct netdata_static_thread static_threads_macos[] = {
{
- .name = "PLUGIN[macos]",
+ .name = "P[macos]",
.config_section = CONFIG_SECTION_PLUGINS,
.config_name = "macos",
.enabled = 1,
diff --git a/daemon/system-info.sh b/daemon/system-info.sh
index 68cdc4812..1e334a3d1 100755
--- a/daemon/system-info.sh
+++ b/daemon/system-info.sh
@@ -217,6 +217,9 @@ if [ -n "${lscpu}" ] && lscpu > /dev/null 2>&1; then
LCPU_COUNT="$(echo "${lscpu_output}" | grep "^CPU(s):" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
CPU_VENDOR="$(echo "${lscpu_output}" | grep "^Vendor ID:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
CPU_MODEL="$(echo "${lscpu_output}" | grep "^Model name:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')"
+ if grep -q "^lxcfs /proc" /proc/self/mounts 2>/dev/null && count=$(grep -c ^processor /proc/cpuinfo 2>/dev/null); then
+ LCPU_COUNT="$count"
+ fi
possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU max MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')"
if [ -z "$possible_cpu_freq" ]; then
possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')"
@@ -437,7 +440,7 @@ CLOUD_INSTANCE_TYPE="unknown"
CLOUD_INSTANCE_REGION="unknown"
if [ "${VIRTUALIZATION}" != "none" ] && command -v curl > /dev/null 2>&1; then
- # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404.
+ # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404.
curl --fail -s -m 1 --noproxy "*" http://169.254.169.254 >/dev/null 2>&1
ret=$?
# anything but operation timeout.
diff --git a/daemon/unit_test.c b/daemon/unit_test.c
index f69861869..52b55c4e5 100644
--- a/daemon/unit_test.c
+++ b/daemon/unit_test.c
@@ -439,7 +439,7 @@ int unit_test_str2ld() {
}
int unit_test_buffer() {
- BUFFER *wb = buffer_create(1);
+ BUFFER *wb = buffer_create(1, NULL);
char string[2048 + 1];
char final[9000 + 1];
int i;
@@ -1289,7 +1289,7 @@ int run_test(struct test *test)
fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT
", found " NETDATA_DOUBLE_FORMAT ", %s\n",
test->name, rrddim_name(rd), c+1,
- (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start),
+ (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start),
n, v, (same)?"OK":"### E R R O R ###");
if(!same) errors++;
@@ -1301,7 +1301,7 @@ int run_test(struct test *test)
fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT
", found " NETDATA_DOUBLE_FORMAT ", %s\n",
test->name, rrddim_name(rd2), c+1,
- (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start),
+ (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start),
n, v, (same)?"OK":"### E R R O R ###");
if(!same) errors++;
}
@@ -1349,7 +1349,7 @@ static int test_variable_renames(void) {
rrddim_reset_name(st, rd2, "DIM2NAME2");
fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2));
- BUFFER *buf = buffer_create(1);
+ BUFFER *buf = buffer_create(1, NULL);
health_api_v1_chart_variables2json(st, buf);
fprintf(stderr, "%s", buffer_tostring(buf));
buffer_free(buf);
@@ -1604,7 +1604,7 @@ int test_sqlite(void) {
return 1;
}
- BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE);
+ BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE, NULL);
char *uuid_str = "0000_000";
buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str);
@@ -1775,7 +1775,6 @@ static inline void rrddim_set_by_pointer_fake_time(RRDDIM *rd, collected_number
static RRDHOST *dbengine_rrdhost_find_or_create(char *name)
{
/* We don't want to drop metrics when generating load, we prefer to block data generation itself */
- rrdeng_drop_metrics_under_page_cache_pressure = 0;
return rrdhost_find_or_create(
name
@@ -1859,10 +1858,10 @@ static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDI
now_realtime_timeval(&now);
rrdset_timed_done(st[i], now, false);
}
- // Fluh pages for subsequent real values
+ // Flush pages for subsequent real values
for (i = 0 ; i < CHARTS ; ++i) {
for (j = 0; j < DIMS; ++j) {
- rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle);
+ rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0].db_collection_handle);
}
}
}
@@ -1881,7 +1880,7 @@ static time_t test_dbengine_create_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS
// feed it with the test data
for (i = 0 ; i < CHARTS ; ++i) {
for (j = 0 ; j < DIMS ; ++j) {
- rd[i][j]->tiers[0]->collect_ops->change_collection_frequency(rd[i][j]->tiers[0]->db_collection_handle, update_every);
+ rd[i][j]->tiers[0].collect_ops->change_collection_frequency(rd[i][j]->tiers[0].db_collection_handle, update_every);
rd[i][j]->last_collected_time.tv_sec =
st[i]->last_collected_time.tv_sec = st[i]->last_updated.tv_sec = time_now;
@@ -1932,16 +1931,16 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI
time_now = time_start + (c + 1) * update_every;
for (i = 0 ; i < CHARTS ; ++i) {
for (j = 0; j < DIMS; ++j) {
- rd[i][j]->tiers[0]->query_ops->init(rd[i][j]->tiers[0]->db_metric_handle, &handle, time_now, time_now + QUERY_BATCH * update_every);
+ rd[i][j]->tiers[0].query_ops->init(rd[i][j]->tiers[0].db_metric_handle, &handle, time_now, time_now + QUERY_BATCH * update_every, STORAGE_PRIORITY_NORMAL);
for (k = 0; k < QUERY_BATCH; ++k) {
last = ((collected_number)i * DIMS) * REGION_POINTS[current_region] +
j * REGION_POINTS[current_region] + c + k;
expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS));
- STORAGE_POINT sp = rd[i][j]->tiers[0]->query_ops->next_metric(&handle);
+ STORAGE_POINT sp = rd[i][j]->tiers[0].query_ops->next_metric(&handle);
value = sp.sum;
- time_retrieved = sp.start_time;
- end_time = sp.end_time;
+ time_retrieved = sp.start_time_s;
+ end_time = sp.end_time_s;
same = (roundndd(value) == roundndd(expected)) ? 1 : 0;
if(!same) {
@@ -1960,7 +1959,7 @@ static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DI
errors++;
}
}
- rd[i][j]->tiers[0]->query_ops->finalize(&handle);
+ rd[i][j]->tiers[0].query_ops->finalize(&handle);
}
}
}
@@ -1979,10 +1978,11 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]
int current_region, time_t time_start, time_t time_end)
{
int update_every = REGION_UPDATE_EVERY[current_region];
- fprintf(stderr, "%s() running on region %d, start time %lld, end time %lld, update every %d...\n", __FUNCTION__, current_region, (long long)time_start, (long long)time_end, update_every);
+ fprintf(stderr, "%s() running on region %d, start time %lld, end time %lld, update every %d, on %d dimensions...\n",
+ __FUNCTION__, current_region, (long long)time_start, (long long)time_end, update_every, CHARTS * DIMS);
uint8_t same;
time_t time_now, time_retrieved;
- int i, j, errors, value_errors = 0, time_errors = 0;
+ int i, j, errors, value_errors = 0, time_errors = 0, value_right = 0, time_right = 0;
long c;
collected_number last;
NETDATA_DOUBLE value, expected;
@@ -1993,7 +1993,8 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]
ONEWAYALLOC *owa = onewayalloc_create(0);
RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start, time_end,
RRDR_GROUPING_AVERAGE, 0, RRDR_OPTION_NATURAL_POINTS,
- NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST);
+ NULL, NULL, 0, 0,
+ QUERY_SOURCE_UNITTEST, STORAGE_PRIORITY_NORMAL);
if (!r) {
fprintf(stderr, " DB-engine unittest %s: empty RRDR on region %d ### E R R O R ###\n", rrdset_name(st[i]), current_region);
return ++errors;
@@ -2020,17 +2021,22 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]
same = (roundndd(value) == roundndd(expected)) ? 1 : 0;
if(!same) {
if(value_errors < 20)
- fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT
+ fprintf(stderr, " DB-engine unittest %s/%s: point #%ld, at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT
", RRDR found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n",
- rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, expected, value);
+ rrdset_name(st[i]), rrddim_name(rd[i][j]), (long) c+1, (unsigned long)time_now, expected, value);
value_errors++;
}
+ else
+ value_right++;
+
if(time_retrieved != time_now) {
if(time_errors < 20)
- fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n",
- rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, (unsigned long)time_retrieved);
+ fprintf(stderr, " DB-engine unittest %s/%s: point #%ld at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n",
+ rrdset_name(st[i]), rrddim_name(rd[i][j]), (long)c+1, (unsigned long)time_now, (unsigned long)time_retrieved);
time_errors++;
}
+ else
+ time_right++;
}
rrddim_foreach_done(d);
}
@@ -2040,10 +2046,10 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]
}
if(value_errors)
- fprintf(stderr, "%d value errors encountered\n", value_errors);
+ fprintf(stderr, "%d value errors encountered (%d were ok)\n", value_errors, value_right);
if(time_errors)
- fprintf(stderr, "%d time errors encountered\n", time_errors);
+ fprintf(stderr, "%d time errors encountered (%d were ok)\n", time_errors, value_right);
return errors + value_errors + time_errors;
}
@@ -2051,7 +2057,7 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS]
int test_dbengine(void)
{
fprintf(stderr, "%s() running...\n", __FUNCTION__ );
- int i, j, errors, value_errors = 0, time_errors = 0, update_every, current_region;
+ int i, j, errors = 0, value_errors = 0, time_errors = 0, update_every, current_region;
RRDHOST *host = NULL;
RRDSET *st[CHARTS];
RRDDIM *rd[CHARTS][DIMS];
@@ -2074,9 +2080,7 @@ int test_dbengine(void)
time_start[current_region] = 2 * API_RELATIVE_TIME_MAX;
time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]);
- errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
- if (errors)
- goto error_out;
+ errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
current_region = 1; //this is the second region of data
update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 3 seconds
@@ -2084,7 +2088,7 @@ int test_dbengine(void)
for (i = 0 ; i < CHARTS ; ++i) {
st[i]->update_every = update_every;
for (j = 0; j < DIMS; ++j) {
- rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle);
+ rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0].db_collection_handle);
}
}
@@ -2093,9 +2097,7 @@ int test_dbengine(void)
time_start[current_region] += update_every - time_start[current_region] % update_every;
time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]);
- errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
- if (errors)
- goto error_out;
+ errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
current_region = 2; //this is the third region of data
update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 1 seconds
@@ -2103,7 +2105,7 @@ int test_dbengine(void)
for (i = 0 ; i < CHARTS ; ++i) {
st[i]->update_every = update_every;
for (j = 0; j < DIMS; ++j) {
- rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle);
+ rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0].db_collection_handle);
}
}
@@ -2112,26 +2114,22 @@ int test_dbengine(void)
time_start[current_region] += update_every - time_start[current_region] % update_every;
time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]);
- errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
- if (errors)
- goto error_out;
+ errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]);
for (current_region = 0 ; current_region < REGIONS ; ++current_region) {
- errors = test_dbengine_check_rrdr(st, rd, current_region, time_start[current_region], time_end[current_region]);
- if (errors)
- goto error_out;
+ errors += test_dbengine_check_rrdr(st, rd, current_region, time_start[current_region], time_end[current_region]);
}
current_region = 1;
update_every = REGION_UPDATE_EVERY[current_region]; // use the maximum update_every = 3
- errors = 0;
long points = (time_end[REGIONS - 1] - time_start[0]) / update_every; // cover all time regions with RRDR
long point_offset = (time_start[current_region] - time_start[0]) / update_every;
for (i = 0 ; i < CHARTS ; ++i) {
ONEWAYALLOC *owa = onewayalloc_create(0);
RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start[0] + update_every,
time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0,
- RRDR_OPTION_NATURAL_POINTS, NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST);
+ RRDR_OPTION_NATURAL_POINTS, NULL, NULL, 0, 0,
+ QUERY_SOURCE_UNITTEST, STORAGE_PRIORITY_NORMAL);
if (!r) {
fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", rrdset_name(st[i]));
@@ -2180,7 +2178,7 @@ int test_dbengine(void)
}
onewayalloc_destroy(owa);
}
-error_out:
+
rrd_wrlock();
rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance);
rrdhost_delete_charts(host);
@@ -2269,7 +2267,7 @@ static void generate_dbengine_chart(void *arg)
thread_info->time_max = time_current;
}
for (j = 0; j < DSET_DIMS; ++j) {
- rrdeng_store_metric_finalize((rd[j])->tiers[0]->db_collection_handle);
+ rrdeng_store_metric_finalize((rd[j])->tiers[0].db_collection_handle);
}
}
@@ -2329,7 +2327,7 @@ void generate_dbengine_dataset(unsigned history_seconds)
}
freez(thread_info);
rrd_wrlock();
- rrdhost_free(host, 1);
+ rrdhost_free___while_having_rrd_wrlock(host, true);
rrd_unlock();
}
@@ -2389,13 +2387,13 @@ static void query_dbengine_chart(void *arg)
time_before = MIN(time_after + duration, time_max); /* up to 1 hour queries */
}
- rd->tiers[0]->query_ops->init(rd->tiers[0]->db_metric_handle, &handle, time_after, time_before);
+ rd->tiers[0].query_ops->init(rd->tiers[0].db_metric_handle, &handle, time_after, time_before, STORAGE_PRIORITY_NORMAL);
++thread_info->queries_nr;
for (time_now = time_after ; time_now <= time_before ; time_now += update_every) {
generatedv = generate_dbengine_chart_value(i, j, time_now);
expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE) generatedv, SN_DEFAULT_FLAGS));
- if (unlikely(rd->tiers[0]->query_ops->is_finished(&handle))) {
+ if (unlikely(rd->tiers[0].query_ops->is_finished(&handle))) {
if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT
", found data gap, ### E R R O R ###\n",
@@ -2405,10 +2403,10 @@ static void query_dbengine_chart(void *arg)
break;
}
- STORAGE_POINT sp = rd->tiers[0]->query_ops->next_metric(&handle);
+ STORAGE_POINT sp = rd->tiers[0].query_ops->next_metric(&handle);
value = sp.sum;
- time_retrieved = sp.start_time;
- end_time = sp.end_time;
+ time_retrieved = sp.start_time_s;
+ end_time = sp.end_time_s;
if (!netdata_double_isnumber(value)) {
if (!thread_info->delete_old_data) { /* data validation only when we don't delete */
@@ -2443,7 +2441,7 @@ static void query_dbengine_chart(void *arg)
}
}
}
- rd->tiers[0]->query_ops->finalize(&handle);
+ rd->tiers[0].query_ops->finalize(&handle);
} while(!thread_info->done);
if(value_errors)