diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-06-09 04:52:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2022-06-09 04:52:39 +0000 |
commit | 89f3604407aff8f4cb2ed958252c61e23c767e24 (patch) | |
tree | 7fbf408102cab051557d38193524d8c6e991d070 /daemon | |
parent | Adding upstream version 1.34.1. (diff) | |
download | netdata-upstream/1.35.0.tar.xz netdata-upstream/1.35.0.zip |
Adding upstream version 1.35.0.upstream/1.35.0
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemon')
-rw-r--r-- | daemon/analytics.c | 13 | ||||
-rw-r--r-- | daemon/common.h | 5 | ||||
-rw-r--r-- | daemon/config/README.md | 303 | ||||
-rw-r--r-- | daemon/global_statistics.c | 1489 | ||||
-rw-r--r-- | daemon/main.c | 142 | ||||
-rwxr-xr-x | daemon/system-info.sh | 123 | ||||
-rw-r--r-- | daemon/unit_test.c | 16 |
7 files changed, 1390 insertions, 701 deletions
diff --git a/daemon/analytics.c b/daemon/analytics.c index 0af41fdda..6c02561d0 100644 --- a/daemon/analytics.c +++ b/daemon/analytics.c @@ -249,8 +249,9 @@ void analytics_exporters(void) buffer_free(bi); } -int collector_counter_callb(void *entry, void *data) -{ +int collector_counter_callb(const char *name, void *entry, void *data) { + (void)name; + struct array_printer *ap = (struct array_printer *)data; struct collector *col = (struct collector *)entry; @@ -296,7 +297,7 @@ void analytics_collectors(void) ap.c = 0; ap.both = bt; - dictionary_get_all(dict, collector_counter_callb, &ap); + dictionary_walkthrough_read(dict, collector_counter_callb, &ap); dictionary_destroy(dict); analytics_set_data(&analytics_data.netdata_collectors, (char *)buffer_tostring(ap.both)); @@ -701,7 +702,7 @@ void get_system_timezone(void) // http://stackoverflow.com/questions/4554271/how-to-avoid-excessive-stat-etc-localtime-calls-in-strftime-on-linux const char *tz = getenv("TZ"); if (!tz || !*tz) - setenv("TZ", config_get(CONFIG_SECTION_GLOBAL, "TZ environment variable", ":/etc/localtime"), 0); + setenv("TZ", config_get(CONFIG_SECTION_ENV_VARS, "TZ", ":/etc/localtime"), 0); char buffer[FILENAME_MAX + 1] = ""; const char *timezone = NULL; @@ -908,13 +909,13 @@ void set_global_environment() if (!p) p = "/bin:/usr/bin"; snprintfz(path, 1024, "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"); - setenv("PATH", config_get(CONFIG_SECTION_PLUGINS, "PATH environment variable", path), 1); + setenv("PATH", config_get(CONFIG_SECTION_ENV_VARS, "PATH", path), 1); // python options p = getenv("PYTHONPATH"); if (!p) p = ""; - setenv("PYTHONPATH", config_get(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable", p), 1); + setenv("PYTHONPATH", config_get(CONFIG_SECTION_ENV_VARS, "PYTHONPATH", p), 1); // disable buffering for python plugins setenv("PYTHONUNBUFFERED", "1", 1); diff --git a/daemon/common.h b/daemon/common.h index e11a6d6b6..da96e2ac1 100644 --- a/daemon/common.h +++ b/daemon/common.h @@ -27,6 +27,8 @@ #define config_generate(buffer, only_changed) appconfig_generate(&netdata_config, buffer, only_changed) +#define config_section_option_destroy(section, name) appconfig_section_option_destroy_non_loaded(&netdata_config, section, name) + // ---------------------------------------------------------------------------- // netdata include files @@ -82,6 +84,9 @@ #include "commands.h" #include "analytics.h" +// metric correlations +#include "database/metric_correlations.h" + // global netdata daemon variables extern char *netdata_configured_hostname; extern char *netdata_configured_user_config_dir; diff --git a/daemon/config/README.md b/daemon/config/README.md index c3c639923..72f688543 100644 --- a/daemon/config/README.md +++ b/daemon/config/README.md @@ -6,24 +6,45 @@ custom_edit_url: https://github.com/netdata/netdata/edit/master/daemon/config/RE # Daemon configuration -<details markdown="1"><summary>The daemon configuration file is read from `/etc/netdata/netdata.conf`.</summary> -Depending on your installation method, Netdata will have been installed either directly under `/`, or under `/opt/netdata`. The paths mentioned here and in the documentation in general assume that your installation is under `/`. If it is not, you will find the exact same paths under `/opt/netdata` as well. (i.e. `/etc/netdata` will be `/opt/netdata/etc/netdata`).</details> +<details> +<summary>The daemon configuration file is read from /etc/netdata/netdata.conf.</summary> -This config file **is not needed by default**. Netdata works fine out of the box without it. But it does allow you to adapt the general behavior of Netdata, in great detail. You can find all these settings, with their default values, by accessing the URL `https://netdata.server.hostname:19999/netdata.conf`. For example check the configuration file of [netdata.firehol.org](http://netdata.firehol.org/netdata.conf). HTTP access to this file is limited by default to private IPs, via the [web server access lists](/web/server/README.md#access-lists). +Depending on your installation method, Netdata will have been installed either directly under `/`, or +under `/opt/netdata`. The paths mentioned here and in the documentation in general assume that your installation is +under `/`. If it is not, you will find the exact same paths under `/opt/netdata` as well. (i.e. `/etc/netdata` will +be `/opt/netdata/etc/netdata`). + +</details> + +This config file **is not needed by default**. Netdata works fine out of the box without it. But it does allow you to +adapt the general behavior of Netdata, in great detail. You can find all these settings, with their default values, by +accessing the URL `https://netdata.server.hostname:19999/netdata.conf`. For example check the configuration file +of [netdata.firehol.org](http://netdata.firehol.org/netdata.conf). HTTP access to this file is limited by default to +[private IPs](https://en.wikipedia.org/wiki/Private_network), via +the [web server access lists](/web/server/README.md#access-lists). `netdata.conf` has sections stated with `[section]`. You will see the following sections: -1. `[global]` to [configure](#global-section-options) the [Netdata daemon](/daemon/README.md). -2. `[web]` to [configure the web server](/web/server/README.md). -3. `[plugins]` to [configure](#plugins-section-options) which [collectors](/collectors/README.md) to use and PATH +1. `[global]` to [configure](#global-section-options) the [Netdata daemon](/daemon/README.md). +2. `[directories]` to [configure](#directories-section-options) the directories used by Netdata. +3. `[logs]` to [configure](#logs-section-options) the Netdata logging. +4. `[environment variables]` to [configure](#environment-variables-section-options) the environment variables used + Netdata. +5. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](/daemon/README.md) SQLite settings. +6. `[ml]` to configure settings for [machine learning](/ml/README.md). +7. `[health]` to [configure](#health-section-options) general settings for [health monitoring](/health/README.md). +8. `[web]` to [configure the web server](/web/server/README.md). +9. `[registry]` for the [Netdata registry](/registry/README.md). +10. `[global statistics]` for the [Netdata registry](/registry/README.md). +11. `[statsd]` for the general settings of the [stats.d.plugin](/collectors/statsd.plugin/README.md). +12. `[plugins]` to [configure](#plugins-section-options) which [collectors](/collectors/README.md) to use and PATH settings. -4. `[health]` to [configure](#health-section-options) general settings for [health monitoring](/health/README.md) -5. `[registry]` for the [Netdata registry](/registry/README.md). -6. `[statsd]` for the general settings of the [stats.d.plugin](/collectors/statsd.plugin/README.md). -7. `[plugin:NAME]` sections for each collector plugin, under the comment [Per plugin configuration](#per-plugin-configuration). -8. `[CHART_NAME]` sections for each chart defined, under the comment [Per chart configuration](#per-chart-configuration). +13. `[plugin:NAME]` sections for each collector plugin, under the + comment [Per plugin configuration](#per-plugin-configuration). -The configuration file is a `name = value` dictionary. Netdata will not complain if you set options unknown to it. When you check the running configuration by accessing the URL `/netdata.conf` on your Netdata server, Netdata will add a comment on settings it does not currently use. +The configuration file is a `name = value` dictionary. Netdata will not complain if you set options unknown to it. When +you check the running configuration by accessing the URL `/netdata.conf` on your Netdata server, Netdata will add a +comment on settings it does not currently use. ## Applying changes @@ -46,87 +67,120 @@ Please note that your data history will be lost if you have modified `history` p ### [global] section options -| setting|default|info| -|:-----:|:-----:|:---| -| process scheduling policy|`keep`|See [Netdata process scheduling policy](/daemon/README.md#netdata-process-scheduling-policy)| -| OOM score|`0`|| -| glibc malloc arena max for plugins|`1`|See [Virtual memory](/daemon/README.md#virtual-memory).| -| glibc malloc arena max for Netdata|`1`|See [Virtual memory](/daemon/README.md#virtual-memory).| -| hostname|auto-detected|The hostname of the computer running Netdata.| -| history|`3996`| Used with `memory mode = save/map/ram/alloc`, not the default `memory mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. This setting can also be configured per chart. Check [Memory Requirements](/database/README.md) for more information. | -| update every|`1`|The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md).| -| config directory|`/etc/netdata`|The directory configuration files are kept.| -| stock config directory|`/usr/lib/netdata/conf.d`|| -| log directory|`/var/log/netdata`|The directory in which the [log files](/daemon/README.md#log-files) are kept.| -| web files directory|`/usr/share/netdata/web`|The directory the web static files are kept.| -| cache directory|`/var/cache/netdata`|The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point.| -| lib directory|`/var/lib/netdata`|Contains the alarm log and the Netdata instance guid.| -| home directory|`/var/cache/netdata`|Contains the db files for the collected metrics| -| plugins directory|`"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"`|The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes.| -| memory mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `page cache size` and `dbengine disk space`. <br />`save`: Netdata will save its round robin database on exit and load it on startup. <br />`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`). <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. | -| page cache size | 32 | Determines the amount of RAM in MiB that is dedicated to caching Netdata metric values. | -| dbengine disk space | 256 | Determines the amount of disk space in MiB that is dedicated to storing Netdata metric values and all related metadata describing them. | -| dbengine multihost disk space | 256 | Same functionality as `dbengine disk space`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. | -| host access prefix| |This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43).| -| memory deduplication (ksm)|`yes`|When set to `yes`, Netdata will offer its in-memory round robin database to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm)| -| TZ environment variable|`:/etc/localtime`|Where to find the timezone| -| timezone|auto-detected|The timezone retrieved from the environment variable| -| debug flags|`0x0000000000000000`|Bitmap of debug options to enable. For more information check [Tracing Options](/daemon/README.md#debugging).| -| debug log|`/var/log/netdata/debug.log`|The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/daemon/README.md#debugging).| -| error log|`/var/log/netdata/error.log`|The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log.| -| access log|`/var/log/netdata/access.log`|The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log.| -| errors flood protection period|`1200`|Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`.| -| errors to trigger flood protection|`200`|Number of errors written to the log in `errors flood protection period` sec before flood protection is activated.| -| run as user|`netdata`|The user Netdata will run as.| -| pthread stack size|auto-detected|| -| cleanup obsolete charts after seconds|`3600`|See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions| -| gap when lost iterations above|`1`|| -| cleanup orphan hosts after seconds|`3600`|How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data.| -| delete obsolete charts files|`yes`|See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions| -| delete orphan hosts files|`yes`|Set to `no` to disable non-responsive host removal.| -| enable zero metrics|`no`|Set to `yes` to show charts when all their metrics are zero.| +| setting | default | info | +|:-------------------------------------:|:------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| process scheduling policy | `keep` | See [Netdata process scheduling policy](/daemon/README.md#netdata-process-scheduling-policy) | +| OOM score | `0` | | +| glibc malloc arena max for plugins | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). | +| glibc malloc arena max for Netdata | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). | +| hostname | auto-detected | The hostname of the computer running Netdata. | +| history | `3996` | Used with `memory mode = save/map/ram/alloc`, not the default `memory mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. This setting can also be configured per chart. Check [Memory Requirements](/database/README.md) for more information. | +| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md). | +| memory mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `page cache size` and `dbengine disk space`. <br />`save`: Netdata will save its round robin database on exit and load it on startup. <br />`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`). <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. | +| page cache size | 32 | Determines the amount of RAM in MiB that is dedicated to caching Netdata metric values. | +| dbengine disk space | 256 | Determines the amount of disk space in MiB that is dedicated to storing Netdata metric values and all related metadata describing them. | +| dbengine multihost disk space | 256 | Same functionality as `dbengine disk space`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. | +| host access prefix | | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | +| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm) | +| timezone | auto-detected | The timezone retrieved from the environment variable | +| run as user | `netdata` | The user Netdata will run as. | +| pthread stack size | auto-detected | | +| cleanup obsolete charts after seconds | `3600` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | +| gap when lost iterations above | `1` | | +| cleanup orphan hosts after seconds | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. | +| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions | +| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. | +| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. | + +### [directories] section options + +| setting | default | info | +|:-------------------:|:------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| config | `/etc/netdata` | The directory configuration files are kept. | +| stock config | `/usr/lib/netdata/conf.d` | | +| log | `/var/log/netdata` | The directory in which the [log files](/daemon/README.md#log-files) are kept. | +| web | `/usr/share/netdata/web` | The directory the web static files are kept. | +| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. | +| lib | `/var/lib/netdata` | Contains the alarm log and the Netdata instance GUID. | +| home | `/var/cache/netdata` | Contains the db files for the collected metrics. | +| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. | +| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. | +| health config | `/etc/netdata/health.d` | The directory containing the user alarm configuration files, to override the stock configurations | +| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alarm configuration files for each collector | +| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | + +### [logs] section options + +| setting | default | info | +|:----------------------------------:|:-----------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](/daemon/README.md#debugging). | +| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/daemon/README.md#debugging). | +| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. | +| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. | +| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. | +| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. | +| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. | + +### [environment variables] section options + +| setting | default | info | +|:----------:|:-----------------:|:-----------------------------------------------------------| +| TZ | `:/etc/localtime` | Where to find the timezone | +| PATH | `auto-detected` | Specifies the directories to be searched to find a command | +| PYTHONPATH | | Used to set a custom python path | + +### [sqlite] section options + +| setting | default | info | +|:------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| auto vacuum | `INCREMENTAL` | The [auto-vacuum status](https://www.sqlite.org/pragma.html#pragma_auto_vacuum) in the database | +| synchronous | `NORMAL` | The setting of the ["synchronous"](https://www.sqlite.org/pragma.html#pragma_synchronous) flag | +| journal mode | `WAL` | The [journal mode](https://www.sqlite.org/pragma.html#pragma_journal_mode) for databases | +| temp store | `MEMORY` | Used to determine where [temporary tables and indices are stored](https://www.sqlite.org/pragma.html#pragma_temp_store) | +| journal size limit | `16777216` | Used to set a new [limit in bytes for the database](https://www.sqlite.org/pragma.html#pragma_journal_size_limit) | +| cache size | `-2000` | Used to [suggest the maximum number of database disk pages](https://www.sqlite.org/pragma.html#pragma_cache_size) that SQLite will hold in memory at once per open database file | -### [web] section options +### [health] section options -Refer to the [web server documentation](/web/server/README.md) +This section controls the general behavior of the health monitoring capabilities of Netdata. -### [plugins] section options +Specific alarms are configured in per-collector config files under the `health.d` directory. For more info, see [health +monitoring](/health/README.md). -In this section you will see be a boolean (`yes`/`no`) option for each plugin (e.g. tc, cgroups, apps, proc etc.). Note that the configuration options in this section for the orchestrator plugins `python.d`, `charts.d` and `node.d` control **all the modules** written for that orchestrator. For instance, setting `python.d = no` means that all Python modules under `collectors/python.d.plugin` will be disabled. +[Alarm notifications](/health/notifications/README.md) are configured in `health_alarm_notify.conf`. -Additionally, there will be the following options: +| setting | default | info | +|:----------------------------------------------:|:------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enabled | `yes` | Set to `no` to disable all alarms and notifications | +| in memory max health log entries | 1000 | Size of the alarm history held in RAM | +| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alarm notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). | +| run at least every seconds | `10` | Controls how often all alarm conditions should be evaluated. | +| postpone alarms during hibernation for seconds | `60` | Prevents false alarms. May need to be increased if you get alarms during hibernation. | +| rotate log every lines | 2000 | Controls the number of alarm log entries stored in `<lib directory>/health-log.db`, where `<lib directory>` is the one configured in the [\[global\] section](#global-section-options) | -| setting|default|info| -|:-----:|:-----:|:---| -| PATH environment variable|`auto-detected`|| -| PYTHONPATH environment variable||Used to set a custom python path| -| enable running new plugins|`yes`|When set to `yes`, Netdata will enable detected plugins, even if they are not configured explicitly. Setting this to `no` will only enable plugins explicitly configured in this file with a `yes`| -| check for new plugins every|60|The time in seconds to check for new plugins in the plugins directory. This allows having other applications dynamically creating plugins for Netdata.| -| checks|`no`|This is a debugging plugin for the internal latency| +### [web] section options -### [health] section options +Refer to the [web server documentation](/web/server/README.md) -This section controls the general behavior of the health monitoring capabilities of Netdata. +### [plugins] section options -Specific alarms are configured in per-collector config files under the `health.d` directory. For more info, see [health -monitoring](/health/README.md). +In this section you will see be a boolean (`yes`/`no`) option for each plugin (e.g. tc, cgroups, apps, proc etc.). Note +that the configuration options in this section for the orchestrator plugins `python.d` and `charts.d` control **all the +modules** written for that orchestrator. For instance, setting `python.d = no` means that all Python modules +under `collectors/python.d.plugin` will be disabled. -[Alarm notifications](/health/notifications/README.md) are configured in `health_alarm_notify.conf`. +Additionally, there will be the following options: -| setting|default|info| -|:-----:|:-----:|:---| -| enabled|`yes`|Set to `no` to disable all alarms and notifications| -| in memory max health log entries|1000|Size of the alarm history held in RAM| -| script to execute on alarm|`/usr/libexec/netdata/plugins.d/alarm-notify.sh`|The script that sends alarm notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`).| -| stock health configuration directory|`/usr/lib/netdata/conf.d/health.d`|Contains the stock alarm configuration files for each collector| -| health configuration directory|`/etc/netdata/health.d`|The directory containing the user alarm configuration files, to override the stock configurations| -| run at least every seconds|`10`|Controls how often all alarm conditions should be evaluated.| -| postpone alarms during hibernation for seconds|`60`|Prevents false alarms. May need to be increased if you get alarms during hibernation.| -| rotate log every lines|2000|Controls the number of alarm log entries stored in `<lib directory>/health-log.db`, where `<lib directory>` is the one configured in the [\[global\] section](#global-section-options)| +| setting | default | info | +|:-------------------------------:|:---------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enable running new plugins | `yes` | When set to `yes`, Netdata will enable detected plugins, even if they are not configured explicitly. Setting this to `no` will only enable plugins explicitly configured in this file with a `yes` | +| check for new plugins every | 60 | The time in seconds to check for new plugins in the plugins directory. This allows having other applications dynamically creating plugins for Netdata. | +| checks | `no` | This is a debugging plugin for the internal latency | ### [registry] section options -To understand what this section is and how it should be configured, please refer to the [registry documentation](/registry/README.md). +To understand what this section is and how it should be configured, please refer to +the [registry documentation](/registry/README.md). ## Per-plugin configuration @@ -137,89 +191,22 @@ The configuration options for plugins appear in sections following the pattern ` Most internal plugins will provide additional options. Check [Internal Plugins](/collectors/README.md) for more information. -Please note, that by default Netdata will enable monitoring metrics for disks, memory, and network only when they are not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics for all internal Netdata plugins. +Please note, that by default Netdata will enable monitoring metrics for disks, memory, and network only when they are +not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, +will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them +to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You +can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics +for all internal Netdata plugins. ### External plugins External plugins will have only 2 options at `netdata.conf`: -| setting | default | info | -| :-----:|:-----:|:---| -| update every | the value of `[global].update every` setting|The frequency in seconds the plugin should collect values. For more information check the [performance guide](/docs/guides/configure/performance.md).| -| command options | _empty_ | Additional command line options to pass to the plugin.| - -External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their documentation. - -## Per-chart configuration - -In this area of `netdata.conf` you can find configuration options for individual charts. They appear in sections -following the pattern `[NAME]`. - -Using the settings and values under these sections, you can control all aspects of a specific chart. You can change its -title, make it appear higher in Netdata's [menu](/web/gui/README.md#metrics-menus), tweak its dimensions, and much more. - -To find the name of a given chart, and thus the name of its section in `netdata.conf`, look at the top-left corner of a -chart: - -![Finding the unique ID of a -chart](https://user-images.githubusercontent.com/1153921/67443082-43b16e80-f5b8-11e9-8d33-d6ee052c6678.png) - -Every per-chart configuration section has several common settings, which are listed in the table just below. Beneath -that is information about lines that begin with `dim`, which affect a chart's dimensions. - -| Setting | Function | -| :---------------- | :-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | -| `history` | Override the `history` setting in the [[global] options](#global-section-options) for this particular chart. Should be less than or equal to the global `history` setting. | -| `enabled` | A boolean (`yes` or `no`) that explicitly enables or disables the chart in question. | -| `cache directory` | The directory where cache files for this plugin, if needed, are stored. | -| `chart type` | Defines what type of chart to display. It can be `line`, `area`, or `stacked`. If empty or missing, `line` will be used. | -| `type` | Uniquely identify which [metrics menu](/web/gui/README.md#metrics-menus) on the Netdata dashboard this chart should appear under. Some examples include `system` (**System**), `disk` (**Disks**), `net` (**Network Interfaces**), and `netdata` (**Netdata Monitoring**). | -| `family` | Change the chart's [family](/web/README.md#families) from its default. For example, you could force a disk space chart to collect metrics for family `sdb` instead of family `sda`. | -| `units` | Text for the label of the vertical axis of the chart. This means all dimensions should have the same unit of measurement. | -| `context` | Change the default [context](/web/README.md#contexts) of the chart. Changing this setting will affect what metrics and metrics the chart displays, and which alarms are attached to it. | -| `priority` | Define where the chart should appear on the Netdata dashboard. Lower values equal higher priority, so a priority of `1` will place the chart highest, while a priority of `9999999` would place the chart at the bottom of the Netdata dashboard. | -| `name` | The name of the chart that appears in the top-left corner, after the chart's title. You can also use this name when writing [health entities](/health/REFERENCE.md#health-entity-reference). | -| `title` | The text that appears above the chart in the Netdata dashboard. | - -### Dimension settings - -You may notice some settings that begin with `dim` beneath the ones defined in the table above. These settings determine -which dimensions appear on the given chart and how Netdata calculates them. - -Each dimension setting has the following structure: `dim [DIMENSION ID] [OPTION] = [VALUE]`. The available options are `name`, `algorithm`, `multiplier`, and `divisor`. - -| Setting | Function | -| :----------- | :------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `name` | The name of the dimension as it will appear in the legend of the chart. If left empty, or is missing, Netdata will use the `[DIMENSION ID]` instead. | -| `algorithm` | Can be `absolute`, `incremental`, `percentage-of-absolute-row`, or `percentage-of-incremental-row`. If this setting is empty, invalid, or missing, Netdata will use `absolute`. See the list beneath this table for descriptions of what each algorithm does. | -| `multiplier` | An integer value by which to multiply the collected value. If empty or missing, Netdata will use `1`. This setting is often used with the value `1024` to convert metabytes to kilobytes, kilobytes to bytes, and so on. | -| `divisor` | An integer value by which to divide the collected value. If empty or missing, Netdata will use `1`. This setting is often used with the value `1024` to convert bytes to kilobytes, kilobytes to megabytes, and so on. | - -Here are the options for the `algorithm` setting: - -- `absolute`: The value is drawn as-is (interpolated to second boundary). -- `incremental`: To be used when the value always increases over time, such as the I/O on a disk. Netdata takes the - difference between the current metric and the past metric to calculate a per-second figure. -- `percentage-of-absolute-row`: The % of this value compared to the total of all dimensions. -- `percentage-of-incremental-row`: The % of this value compared to the incremental total of all dimensions. - -For example, the `system.io` chart has the following default settings: - -```conf - # dim in name = in - # dim in algorithm = incremental - # dim in multiplier = 1 - # dim in divisor = 1 - # dim out name = out - # dim out algorithm = incremental - # dim out multiplier = -1 - # dim out divisor = 1 -``` - -These `dim` settings produce two dimensions, `in` and `out`, both of which use the `incremental` algorithm. By -multiplying the value of `out` by -1, Netdata creates the negative values seen in the following area chart: - -![The system.io chart on a macOS -laptop](https://user-images.githubusercontent.com/1153921/69286708-2cfb3900-0bb1-11ea-9fcd-dd8fbb2adf11.png) +| setting | default | info | +|:---------------:|:--------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------| +| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](/docs/guides/configure/performance.md). | +| command options | - | Additional command line options to pass to the plugin. | | +External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their +documentation. diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c index d5cc03159..98bf0bf9a 100644 --- a/daemon/global_statistics.c +++ b/daemon/global_statistics.c @@ -4,7 +4,15 @@ #define GLOBAL_STATS_RESET_WEB_USEC_MAX 0x01 -#define CONFIG_SECTION_GLOBAL_STATISTICS "global statistics" +#define WORKER_JOB_GLOBAL 0 +#define WORKER_JOB_REGISTRY 1 +#define WORKER_JOB_WORKERS 2 +#define WORKER_JOB_DBENGINE 3 +#define WORKER_JOB_HEARTBEAT 4 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 5 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 5 +#endif static struct global_statistics { volatile uint16_t connected_clients; @@ -37,37 +45,10 @@ static struct global_statistics { .rrdr_result_points_generated = 0, }; -#if defined(HAVE_C___ATOMIC) -#else -netdata_mutex_t global_statistics_mutex = NETDATA_MUTEX_INITIALIZER; - -static inline void global_statistics_lock(void) { - netdata_mutex_lock(&global_statistics_mutex); -} - -static inline void global_statistics_unlock(void) { - netdata_mutex_unlock(&global_statistics_mutex); -} -#endif - - void rrdr_query_completed(uint64_t db_points_read, uint64_t result_points_generated) { -#if defined(HAVE_C___ATOMIC) __atomic_fetch_add(&global_statistics.rrdr_queries_made, 1, __ATOMIC_SEQ_CST); __atomic_fetch_add(&global_statistics.rrdr_db_points_read, db_points_read, __ATOMIC_SEQ_CST); __atomic_fetch_add(&global_statistics.rrdr_result_points_generated, result_points_generated, __ATOMIC_SEQ_CST); -#else - #warning NOT using atomic operations - using locks for global statistics - if (web_server_is_multithreaded) - global_statistics_lock(); - - global_statistics.rrdr_queries_made++; - global_statistics.rrdr_db_points_read += db_points_read; - global_statistics.rrdr_result_points_generated += result_points_generated; - - if (web_server_is_multithreaded) - global_statistics_unlock(); -#endif } void finished_web_request_statistics(uint64_t dt, @@ -75,7 +56,6 @@ void finished_web_request_statistics(uint64_t dt, uint64_t bytes_sent, uint64_t content_size, uint64_t compressed_content_size) { -#if defined(HAVE_C___ATOMIC) uint64_t old_web_usec_max = global_statistics.web_usec_max; while(dt > old_web_usec_max) __atomic_compare_exchange(&global_statistics.web_usec_max, &old_web_usec_max, &dt, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); @@ -86,61 +66,19 @@ void finished_web_request_statistics(uint64_t dt, __atomic_fetch_add(&global_statistics.bytes_sent, bytes_sent, __ATOMIC_SEQ_CST); __atomic_fetch_add(&global_statistics.content_size, content_size, __ATOMIC_SEQ_CST); __atomic_fetch_add(&global_statistics.compressed_content_size, compressed_content_size, __ATOMIC_SEQ_CST); -#else -#warning NOT using atomic operations - using locks for global statistics - if (web_server_is_multithreaded) - global_statistics_lock(); - - if (dt > global_statistics.web_usec_max) - global_statistics.web_usec_max = dt; - - global_statistics.web_requests++; - global_statistics.web_usec += dt; - global_statistics.bytes_received += bytes_received; - global_statistics.bytes_sent += bytes_sent; - global_statistics.content_size += content_size; - global_statistics.compressed_content_size += compressed_content_size; - - if (web_server_is_multithreaded) - global_statistics_unlock(); -#endif } uint64_t web_client_connected(void) { -#if defined(HAVE_C___ATOMIC) __atomic_fetch_add(&global_statistics.connected_clients, 1, __ATOMIC_SEQ_CST); - uint64_t id = __atomic_fetch_add(&global_statistics.web_client_count, 1, __ATOMIC_SEQ_CST); -#else - if (web_server_is_multithreaded) - global_statistics_lock(); - - global_statistics.connected_clients++; - uint64_t id = global_statistics.web_client_count++; - - if (web_server_is_multithreaded) - global_statistics_unlock(); -#endif - - return id; + return __atomic_fetch_add(&global_statistics.web_client_count, 1, __ATOMIC_SEQ_CST); } void web_client_disconnected(void) { -#if defined(HAVE_C___ATOMIC) __atomic_fetch_sub(&global_statistics.connected_clients, 1, __ATOMIC_SEQ_CST); -#else - if (web_server_is_multithreaded) - global_statistics_lock(); - - global_statistics.connected_clients--; - - if (web_server_is_multithreaded) - global_statistics_unlock(); -#endif } static inline void global_statistics_copy(struct global_statistics *gs, uint8_t options) { -#if defined(HAVE_C___ATOMIC) gs->connected_clients = __atomic_fetch_add(&global_statistics.connected_clients, 0, __ATOMIC_SEQ_CST); gs->web_requests = __atomic_fetch_add(&global_statistics.web_requests, 0, __ATOMIC_SEQ_CST); gs->web_usec = __atomic_fetch_add(&global_statistics.web_usec, 0, __ATOMIC_SEQ_CST); @@ -160,16 +98,6 @@ static inline void global_statistics_copy(struct global_statistics *gs, uint8_t __atomic_compare_exchange(&global_statistics.web_usec_max, (uint64_t *) &gs->web_usec_max, &n, 1, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST); } -#else - global_statistics_lock(); - - memcpy(gs, (const void *)&global_statistics, sizeof(struct global_statistics)); - - if (options & GLOBAL_STATS_RESET_WEB_USEC_MAX) - global_statistics.web_usec_max = 0; - - global_statistics_unlock(); -#endif } static void global_statistics_charts(void) { @@ -266,7 +194,7 @@ static void global_statistics_charts(void) { "netdata" , "clients" , NULL - , "netdata" + , "api" , NULL , "Netdata Web Clients" , "connected clients" @@ -297,7 +225,7 @@ static void global_statistics_charts(void) { "netdata" , "requests" , NULL - , "netdata" + , "api" , NULL , "Netdata Web Requests" , "requests/s" @@ -329,13 +257,13 @@ static void global_statistics_charts(void) { "netdata" , "net" , NULL - , "netdata" + , "api" , NULL , "Netdata Network Traffic" , "kilobits/s" , "netdata" , "stats" - , 130000 + , 130400 , localhost->rrd_update_every , RRDSET_TYPE_AREA ); @@ -363,13 +291,13 @@ static void global_statistics_charts(void) { "netdata" , "response_time" , NULL - , "netdata" + , "api" , NULL , "Netdata API Response Time" , "milliseconds/request" , "netdata" , "stats" - , 130400 + , 130500 , localhost->rrd_update_every , RRDSET_TYPE_LINE ); @@ -412,13 +340,13 @@ static void global_statistics_charts(void) { "netdata" , "compression_ratio" , NULL - , "netdata" + , "api" , NULL , "Netdata API Responses Compression Savings Ratio" , "percentage" , "netdata" , "stats" - , 130500 + , 130600 , localhost->rrd_update_every , RRDSET_TYPE_LINE ); @@ -465,7 +393,7 @@ static void global_statistics_charts(void) { , "queries/s" , "netdata" , "stats" - , 130500 + , 131000 , localhost->rrd_update_every , RRDSET_TYPE_LINE ); @@ -498,7 +426,7 @@ static void global_statistics_charts(void) { , "points/s" , "netdata" , "stats" - , 130501 + , 131001 , localhost->rrd_update_every , RRDSET_TYPE_AREA ); @@ -516,450 +444,1099 @@ static void global_statistics_charts(void) { } // ---------------------------------------------------------------- +} +static void dbengine_statistics_charts(void) { #ifdef ENABLE_DBENGINE - RRDHOST *host; - unsigned long long stats_array[RRDENG_NR_STATS] = {0}; - unsigned long long local_stats_array[RRDENG_NR_STATS]; - unsigned dbengine_contexts = 0, counted_multihost_db = 0, i; - - rrd_rdlock(); - rrdhost_foreach_read(host) { - if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && !rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { - if (&multidb_ctx == host->rrdeng_ctx) { - if (counted_multihost_db) - continue; /* Only count multi-host DB once */ - counted_multihost_db = 1; - } - ++dbengine_contexts; - /* get localhost's DB engine's statistics */ - rrdeng_get_37_statistics(host->rrdeng_ctx, local_stats_array); - for (i = 0 ; i < RRDENG_NR_STATS ; ++i) { - /* aggregate statistics across hosts */ - stats_array[i] += local_stats_array[i]; + if(netdata_rwlock_tryrdlock(&rrd_rwlock) == 0) { + RRDHOST *host; + unsigned long long stats_array[RRDENG_NR_STATS] = {0}; + unsigned long long local_stats_array[RRDENG_NR_STATS]; + unsigned dbengine_contexts = 0, counted_multihost_db = 0, i; + + rrdhost_foreach_read(host) { + if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && !rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { + if (&multidb_ctx == host->rrdeng_ctx) { + if (counted_multihost_db) + continue; /* Only count multi-host DB once */ + counted_multihost_db = 1; + } + ++dbengine_contexts; + /* get localhost's DB engine's statistics */ + rrdeng_get_37_statistics(host->rrdeng_ctx, local_stats_array); + for (i = 0; i < RRDENG_NR_STATS; ++i) { + /* aggregate statistics across hosts */ + stats_array[i] += local_stats_array[i]; + } } } - } - rrd_unlock(); - - if (dbengine_contexts) { - /* deduplicate global statistics by getting the ones from the last context */ - stats_array[30] = local_stats_array[30]; - stats_array[31] = local_stats_array[31]; - stats_array[32] = local_stats_array[32]; - stats_array[34] = local_stats_array[34]; - stats_array[36] = local_stats_array[36]; - - // ---------------------------------------------------------------- - - { - static RRDSET *st_compression = NULL; - static RRDDIM *rd_savings = NULL; - - if (unlikely(!st_compression)) { - st_compression = rrdset_create_localhost( - "netdata" - , "dbengine_compression_ratio" - , NULL - , "dbengine" - , NULL - , "Netdata DB engine data extents' compression savings ratio" - , "percentage" - , "netdata" - , "stats" - , 130502 - , localhost->rrd_update_every - , RRDSET_TYPE_LINE - ); - - rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rrd_unlock(); + + if (dbengine_contexts) { + /* deduplicate global statistics by getting the ones from the last context */ + stats_array[30] = local_stats_array[30]; + stats_array[31] = local_stats_array[31]; + stats_array[32] = local_stats_array[32]; + stats_array[34] = local_stats_array[34]; + stats_array[36] = local_stats_array[36]; + + // ---------------------------------------------------------------- + + { + static RRDSET *st_compression = NULL; + static RRDDIM *rd_savings = NULL; + + if (unlikely(!st_compression)) { + st_compression = rrdset_create_localhost( + "netdata", + "dbengine_compression_ratio", + NULL, + "dbengine", + NULL, + "Netdata DB engine data extents' compression savings ratio", + "percentage", + "netdata", + "stats", + 132000, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } else + rrdset_next(st_compression); + + unsigned long long ratio; + unsigned long long compressed_content_size = stats_array[12]; + unsigned long long content_size = stats_array[11]; + + if (content_size) { + // allow negative savings + ratio = ((content_size - compressed_content_size) * 100 * 1000) / content_size; + } else { + ratio = 0; + } + rrddim_set_by_pointer(st_compression, rd_savings, ratio); + + rrdset_done(st_compression); } - else - rrdset_next(st_compression); - - unsigned long long ratio; - unsigned long long compressed_content_size = stats_array[12]; - unsigned long long content_size = stats_array[11]; - - if (content_size) { - // allow negative savings - ratio = ((content_size - compressed_content_size) * 100 * 1000) / content_size; - } else { - ratio = 0; - } - rrddim_set_by_pointer(st_compression, rd_savings, ratio); - rrdset_done(st_compression); - } + // ---------------------------------------------------------------- + + { + static RRDSET *st_pg_cache_hit_ratio = NULL; + static RRDDIM *rd_hit_ratio = NULL; + + if (unlikely(!st_pg_cache_hit_ratio)) { + st_pg_cache_hit_ratio = rrdset_create_localhost( + "netdata", + "page_cache_hit_ratio", + NULL, + "dbengine", + NULL, + "Netdata DB engine page cache hit ratio", + "percentage", + "netdata", + "stats", + 132003, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_hit_ratio = rrddim_add(st_pg_cache_hit_ratio, "ratio", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } else + rrdset_next(st_pg_cache_hit_ratio); + + static unsigned long long old_hits = 0; + static unsigned long long old_misses = 0; + unsigned long long hits = stats_array[7]; + unsigned long long misses = stats_array[8]; + unsigned long long hits_delta; + unsigned long long misses_delta; + unsigned long long ratio; + + hits_delta = hits - old_hits; + misses_delta = misses - old_misses; + old_hits = hits; + old_misses = misses; + + if (hits_delta + misses_delta) { + ratio = (hits_delta * 100 * 1000) / (hits_delta + misses_delta); + } else { + ratio = 0; + } + rrddim_set_by_pointer(st_pg_cache_hit_ratio, rd_hit_ratio, ratio); + + rrdset_done(st_pg_cache_hit_ratio); + } - // ---------------------------------------------------------------- - - { - static RRDSET *st_pg_cache_hit_ratio = NULL; - static RRDDIM *rd_hit_ratio = NULL; - - if (unlikely(!st_pg_cache_hit_ratio)) { - st_pg_cache_hit_ratio = rrdset_create_localhost( - "netdata" - , "page_cache_hit_ratio" - , NULL - , "dbengine" - , NULL - , "Netdata DB engine page cache hit ratio" - , "percentage" - , "netdata" - , "stats" - , 130503 - , localhost->rrd_update_every - , RRDSET_TYPE_LINE - ); - - rd_hit_ratio = rrddim_add(st_pg_cache_hit_ratio, "ratio", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + // ---------------------------------------------------------------- + + { + static RRDSET *st_pg_cache_pages = NULL; + static RRDDIM *rd_descriptors = NULL; + static RRDDIM *rd_populated = NULL; + static RRDDIM *rd_dirty = NULL; + static RRDDIM *rd_backfills = NULL; + static RRDDIM *rd_evictions = NULL; + static RRDDIM *rd_used_by_collectors = NULL; + + if (unlikely(!st_pg_cache_pages)) { + st_pg_cache_pages = rrdset_create_localhost( + "netdata", + "page_cache_stats", + NULL, + "dbengine", + NULL, + "Netdata dbengine page cache statistics", + "pages", + "netdata", + "stats", + 132004, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_descriptors = rrddim_add(st_pg_cache_pages, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_populated = rrddim_add(st_pg_cache_pages, "populated", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_dirty = rrddim_add(st_pg_cache_pages, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_backfills = rrddim_add(st_pg_cache_pages, "backfills", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_evictions = rrddim_add(st_pg_cache_pages, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_used_by_collectors = + rrddim_add(st_pg_cache_pages, "used_by_collectors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } else + rrdset_next(st_pg_cache_pages); + + rrddim_set_by_pointer(st_pg_cache_pages, rd_descriptors, (collected_number)stats_array[27]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_populated, (collected_number)stats_array[3]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_dirty, (collected_number)stats_array[0] + stats_array[4]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_backfills, (collected_number)stats_array[9]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_evictions, (collected_number)stats_array[10]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_used_by_collectors, (collected_number)stats_array[0]); + rrdset_done(st_pg_cache_pages); } - else - rrdset_next(st_pg_cache_hit_ratio); - - static unsigned long long old_hits = 0; - static unsigned long long old_misses = 0; - unsigned long long hits = stats_array[7]; - unsigned long long misses = stats_array[8]; - unsigned long long hits_delta; - unsigned long long misses_delta; - unsigned long long ratio; - - hits_delta = hits - old_hits; - misses_delta = misses - old_misses; - old_hits = hits; - old_misses = misses; - - if (hits_delta + misses_delta) { - ratio = (hits_delta * 100 * 1000) / (hits_delta + misses_delta); - } else { - ratio = 0; + + // ---------------------------------------------------------------- + + { + static RRDSET *st_long_term_pages = NULL; + static RRDDIM *rd_total = NULL; + static RRDDIM *rd_insertions = NULL; + static RRDDIM *rd_deletions = NULL; + static RRDDIM *rd_flushing_pressure_deletions = NULL; + + if (unlikely(!st_long_term_pages)) { + st_long_term_pages = rrdset_create_localhost( + "netdata", + "dbengine_long_term_page_stats", + NULL, + "dbengine", + NULL, + "Netdata dbengine long-term page statistics", + "pages", + "netdata", + "stats", + 132005, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_total = rrddim_add(st_long_term_pages, "total", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_insertions = rrddim_add(st_long_term_pages, "insertions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_deletions = rrddim_add(st_long_term_pages, "deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_flushing_pressure_deletions = rrddim_add( + st_long_term_pages, "flushing_pressure_deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } else + rrdset_next(st_long_term_pages); + + rrddim_set_by_pointer(st_long_term_pages, rd_total, (collected_number)stats_array[2]); + rrddim_set_by_pointer(st_long_term_pages, rd_insertions, (collected_number)stats_array[5]); + rrddim_set_by_pointer(st_long_term_pages, rd_deletions, (collected_number)stats_array[6]); + rrddim_set_by_pointer( + st_long_term_pages, rd_flushing_pressure_deletions, (collected_number)stats_array[36]); + rrdset_done(st_long_term_pages); } - rrddim_set_by_pointer(st_pg_cache_hit_ratio, rd_hit_ratio, ratio); - rrdset_done(st_pg_cache_hit_ratio); - } + // ---------------------------------------------------------------- + + { + static RRDSET *st_io_stats = NULL; + static RRDDIM *rd_reads = NULL; + static RRDDIM *rd_writes = NULL; + + if (unlikely(!st_io_stats)) { + st_io_stats = rrdset_create_localhost( + "netdata", + "dbengine_io_throughput", + NULL, + "dbengine", + NULL, + "Netdata DB engine I/O throughput", + "MiB/s", + "netdata", + "stats", + 132006, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } else + rrdset_next(st_io_stats); + + rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[17]); + rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[15]); + rrdset_done(st_io_stats); + } - // ---------------------------------------------------------------- - - { - static RRDSET *st_pg_cache_pages = NULL; - static RRDDIM *rd_descriptors = NULL; - static RRDDIM *rd_populated = NULL; - static RRDDIM *rd_dirty = NULL; - static RRDDIM *rd_backfills = NULL; - static RRDDIM *rd_evictions = NULL; - static RRDDIM *rd_used_by_collectors = NULL; - - if (unlikely(!st_pg_cache_pages)) { - st_pg_cache_pages = rrdset_create_localhost( - "netdata" - , "page_cache_stats" - , NULL - , "dbengine" - , NULL - , "Netdata dbengine page cache statistics" - , "pages" - , "netdata" - , "stats" - , 130504 - , localhost->rrd_update_every - , RRDSET_TYPE_LINE - ); - - rd_descriptors = rrddim_add(st_pg_cache_pages, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_populated = rrddim_add(st_pg_cache_pages, "populated", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_dirty = rrddim_add(st_pg_cache_pages, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_backfills = rrddim_add(st_pg_cache_pages, "backfills", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_evictions = rrddim_add(st_pg_cache_pages, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_used_by_collectors = rrddim_add(st_pg_cache_pages, "used_by_collectors", NULL, 1, 1, - RRD_ALGORITHM_ABSOLUTE); + // ---------------------------------------------------------------- + + { + static RRDSET *st_io_stats = NULL; + static RRDDIM *rd_reads = NULL; + static RRDDIM *rd_writes = NULL; + + if (unlikely(!st_io_stats)) { + st_io_stats = rrdset_create_localhost( + "netdata", + "dbengine_io_operations", + NULL, + "dbengine", + NULL, + "Netdata DB engine I/O operations", + "operations/s", + "netdata", + "stats", + 132007, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } else + rrdset_next(st_io_stats); + + rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[18]); + rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[16]); + rrdset_done(st_io_stats); } - else - rrdset_next(st_pg_cache_pages); - - rrddim_set_by_pointer(st_pg_cache_pages, rd_descriptors, (collected_number)stats_array[27]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_populated, (collected_number)stats_array[3]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_dirty, (collected_number)stats_array[0] + stats_array[4]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_backfills, (collected_number)stats_array[9]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_evictions, (collected_number)stats_array[10]); - rrddim_set_by_pointer(st_pg_cache_pages, rd_used_by_collectors, (collected_number)stats_array[0]); - rrdset_done(st_pg_cache_pages); - } - // ---------------------------------------------------------------- + // ---------------------------------------------------------------- + + { + static RRDSET *st_errors = NULL; + static RRDDIM *rd_fs_errors = NULL; + static RRDDIM *rd_io_errors = NULL; + static RRDDIM *pg_cache_over_half_dirty_events = NULL; + + if (unlikely(!st_errors)) { + st_errors = rrdset_create_localhost( + "netdata", + "dbengine_global_errors", + NULL, + "dbengine", + NULL, + "Netdata DB engine errors", + "errors/s", + "netdata", + "stats", + 132008, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_io_errors = rrddim_add(st_errors, "io_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_fs_errors = rrddim_add(st_errors, "fs_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + pg_cache_over_half_dirty_events = + rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } else + rrdset_next(st_errors); + + rrddim_set_by_pointer(st_errors, rd_io_errors, (collected_number)stats_array[30]); + rrddim_set_by_pointer(st_errors, rd_fs_errors, (collected_number)stats_array[31]); + rrddim_set_by_pointer(st_errors, pg_cache_over_half_dirty_events, (collected_number)stats_array[34]); + rrdset_done(st_errors); + } - { - static RRDSET *st_long_term_pages = NULL; - static RRDDIM *rd_total = NULL; - static RRDDIM *rd_insertions = NULL; - static RRDDIM *rd_deletions = NULL; - static RRDDIM *rd_flushing_pressure_deletions = NULL; + // ---------------------------------------------------------------- + + { + static RRDSET *st_fd = NULL; + static RRDDIM *rd_fd_current = NULL; + static RRDDIM *rd_fd_max = NULL; + + if (unlikely(!st_fd)) { + st_fd = rrdset_create_localhost( + "netdata", + "dbengine_global_file_descriptors", + NULL, + "dbengine", + NULL, + "Netdata DB engine File Descriptors", + "descriptors", + "netdata", + "stats", + 132009, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_fd_current = rrddim_add(st_fd, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_fd_max = rrddim_add(st_fd, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } else + rrdset_next(st_fd); + + rrddim_set_by_pointer(st_fd, rd_fd_current, (collected_number)stats_array[32]); + /* Careful here, modify this accordingly if the File-Descriptor budget ever changes */ + rrddim_set_by_pointer(st_fd, rd_fd_max, (collected_number)rlimit_nofile.rlim_cur / 4); + rrdset_done(st_fd); + } - if (unlikely(!st_long_term_pages)) { - st_long_term_pages = rrdset_create_localhost( - "netdata" - , "dbengine_long_term_page_stats" - , NULL - , "dbengine" - , NULL - , "Netdata dbengine long-term page statistics" - , "pages" - , "netdata" - , "stats" - , 130505 - , localhost->rrd_update_every - , RRDSET_TYPE_LINE - ); - - rd_total = rrddim_add(st_long_term_pages, "total", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_insertions = rrddim_add(st_long_term_pages, "insertions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_deletions = rrddim_add(st_long_term_pages, "deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_flushing_pressure_deletions = rrddim_add(st_long_term_pages, "flushing_pressure_deletions", NULL, -1, - 1, RRD_ALGORITHM_INCREMENTAL); + // ---------------------------------------------------------------- + + { + static RRDSET *st_ram_usage = NULL; + static RRDDIM *rd_cached = NULL; + static RRDDIM *rd_pinned = NULL; + static RRDDIM *rd_cache_metadata = NULL; + static RRDDIM *rd_index_metadata = NULL; + static RRDDIM *rd_pages_metadata = NULL; + + collected_number cached_pages, pinned_pages, API_producers, populated_pages, cache_metadata, pages_on_disk, + page_cache_descriptors, index_metadata, pages_metadata; + + if (unlikely(!st_ram_usage)) { + st_ram_usage = rrdset_create_localhost( + "netdata", + "dbengine_ram", + NULL, + "dbengine", + NULL, + "Netdata DB engine RAM usage", + "MiB", + "netdata", + "stats", + 132010, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_cached = rrddim_add(st_ram_usage, "cache", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_pinned = rrddim_add(st_ram_usage, "collectors", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_cache_metadata = rrddim_add(st_ram_usage, "cache metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_pages_metadata = rrddim_add(st_ram_usage, "pages metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_index_metadata = rrddim_add(st_ram_usage, "index metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + } else + rrdset_next(st_ram_usage); + + API_producers = (collected_number)stats_array[0]; + pages_on_disk = (collected_number)stats_array[2]; + populated_pages = (collected_number)stats_array[3]; + page_cache_descriptors = (collected_number)stats_array[27]; + + if (API_producers * 2 > populated_pages) { + pinned_pages = API_producers; + } else { + pinned_pages = API_producers * 2; + } + cached_pages = populated_pages - pinned_pages; + + cache_metadata = page_cache_descriptors * sizeof(struct page_cache_descr); + + pages_metadata = pages_on_disk * sizeof(struct rrdeng_page_descr); + + /* This is an empirical estimation for Judy array indexing and extent structures */ + index_metadata = pages_on_disk * 58; + + rrddim_set_by_pointer(st_ram_usage, rd_cached, cached_pages); + rrddim_set_by_pointer(st_ram_usage, rd_pinned, pinned_pages); + rrddim_set_by_pointer(st_ram_usage, rd_cache_metadata, cache_metadata); + rrddim_set_by_pointer(st_ram_usage, rd_pages_metadata, pages_metadata); + rrddim_set_by_pointer(st_ram_usage, rd_index_metadata, index_metadata); + rrdset_done(st_ram_usage); } - else - rrdset_next(st_long_term_pages); - - rrddim_set_by_pointer(st_long_term_pages, rd_total, (collected_number)stats_array[2]); - rrddim_set_by_pointer(st_long_term_pages, rd_insertions, (collected_number)stats_array[5]); - rrddim_set_by_pointer(st_long_term_pages, rd_deletions, (collected_number)stats_array[6]); - rrddim_set_by_pointer(st_long_term_pages, rd_flushing_pressure_deletions, - (collected_number)stats_array[36]); - rrdset_done(st_long_term_pages); } + } +#endif +} + +static void update_heartbeat_charts() { + static RRDSET *st_heartbeat = NULL; + static RRDDIM *rd_heartbeat_min = NULL; + static RRDDIM *rd_heartbeat_max = NULL; + static RRDDIM *rd_heartbeat_avg = NULL; + + if (unlikely(!st_heartbeat)) { + st_heartbeat = rrdset_create_localhost( + "netdata" + , "heartbeat" + , NULL + , "heartbeat" + , NULL + , "System clock jitter" + , "microseconds" + , "netdata" + , "stats" + , 900000 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_heartbeat_min = rrddim_add(st_heartbeat, "min", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_heartbeat_max = rrddim_add(st_heartbeat, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_heartbeat_avg = rrddim_add(st_heartbeat, "average", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } else + rrdset_next(st_heartbeat); + + usec_t min, max, average; + size_t count; + + heartbeat_statistics(&min, &max, &average, &count); + + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_min, (collected_number)min); + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_max, (collected_number)max); + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_avg, (collected_number)average); + + rrdset_done(st_heartbeat); +} + +// --------------------------------------------------------------------------------------------------------------------- +// worker utilization + +#define WORKERS_MIN_PERCENT_DEFAULT 10000.0 + +struct worker_job_type { + char name[WORKER_UTILIZATION_MAX_JOB_NAME_LENGTH + 1]; + size_t jobs_started; + usec_t busy_time; + + RRDDIM *rd_jobs_started; + RRDDIM *rd_busy_time; +}; + +struct worker_thread { + pid_t pid; + int enabled; + + int cpu_enabled; + double cpu; + + kernel_uint_t utime; + kernel_uint_t stime; + + kernel_uint_t utime_old; + kernel_uint_t stime_old; + + usec_t collected_time; + usec_t collected_time_old; + + size_t jobs_started; + usec_t busy_time; + + struct worker_thread *next; +}; + +struct worker_utilization { + const char *name; + const char *family; + size_t priority; + uint32_t flags; + + char *name_lowercase; + + struct worker_job_type per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES]; + + size_t workers_registered; + size_t workers_busy; + usec_t workers_total_busy_time; + usec_t workers_total_duration; + size_t workers_total_jobs_started; + double workers_min_busy_time; + double workers_max_busy_time; + + size_t workers_cpu_registered; + double workers_cpu_min; + double workers_cpu_max; + double workers_cpu_total; + + struct worker_thread *threads; + + RRDSET *st_workers_time; + RRDDIM *rd_workers_time_avg; + RRDDIM *rd_workers_time_min; + RRDDIM *rd_workers_time_max; + + RRDSET *st_workers_cpu; + RRDDIM *rd_workers_cpu_avg; + RRDDIM *rd_workers_cpu_min; + RRDDIM *rd_workers_cpu_max; + + RRDSET *st_workers_threads; + RRDDIM *rd_workers_threads_free; + RRDDIM *rd_workers_threads_busy; + + RRDSET *st_workers_jobs_per_job_type; + RRDSET *st_workers_busy_per_job_type; + + RRDDIM *rd_total_cpu_utilizaton; +}; + +static struct worker_utilization all_workers_utilization[] = { + { .name = "STATS", .family = "workers global statistics", .priority = 1000000 }, + { .name = "HEALTH", .family = "workers health alarms", .priority = 1000000 }, + { .name = "MLTRAIN", .family = "workers ML training", .priority = 1000000 }, + { .name = "MLDETECT", .family = "workers ML detection", .priority = 1000000 }, + { .name = "STREAMRCV", .family = "workers streaming receive", .priority = 1000000 }, + { .name = "STREAMSND", .family = "workers streaming send", .priority = 1000000 }, + { .name = "DBENGINE", .family = "workers dbengine instances", .priority = 1000000 }, + { .name = "WEB", .family = "workers web server", .priority = 1000000 }, + { .name = "ACLKQUERY", .family = "workers aclk query", .priority = 1000000 }, + { .name = "ACLKSYNC", .family = "workers aclk host sync", .priority = 1000000 }, + { .name = "PLUGINSD", .family = "workers plugins.d", .priority = 1000000 }, + { .name = "STATSD", .family = "workers plugin statsd", .priority = 1000000 }, + { .name = "STATSDFLUSH", .family = "workers plugin statsd flush", .priority = 1000000 }, + { .name = "PROC", .family = "workers plugin proc", .priority = 1000000 }, + { .name = "NETDEV", .family = "workers plugin proc netdev", .priority = 1000000 }, + { .name = "FREEBSD", .family = "workers plugin freebsd", .priority = 1000000 }, + { .name = "MACOS", .family = "workers plugin macos", .priority = 1000000 }, + { .name = "CGROUPS", .family = "workers plugin cgroups", .priority = 1000000 }, + { .name = "CGROUPSDISC", .family = "workers plugin cgroups find", .priority = 1000000 }, + { .name = "DISKSPACE", .family = "workers plugin diskspace", .priority = 1000000 }, + { .name = "TC", .family = "workers plugin tc", .priority = 1000000 }, + { .name = "TIMEX", .family = "workers plugin timex", .priority = 1000000 }, + { .name = "IDLEJITTER", .family = "workers plugin idlejitter", .priority = 1000000 }, + + // has to be terminated with a NULL + { .name = NULL, .family = NULL } +}; + +static void workers_total_cpu_utilization_chart(void) { + size_t i, cpu_enabled = 0; + for(i = 0; all_workers_utilization[i].name ;i++) + if(all_workers_utilization[i].workers_cpu_registered) cpu_enabled++; + + if(!cpu_enabled) return; + + static RRDSET *st = NULL; + + if(!st) { + st = rrdset_create_localhost( + "netdata", + "workers_cpu", + NULL, + "workers", + "netdata.workers.cpu_total", + "Netdata Workers CPU Utilization (100% = 1 core)", + "%", + "netdata", + "stats", + 999000, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + } + + rrdset_next(st); + + for(i = 0; all_workers_utilization[i].name ;i++) { + struct worker_utilization *wu = &all_workers_utilization[i]; + if(!wu->workers_cpu_registered) continue; + + if(!wu->rd_total_cpu_utilizaton) + wu->rd_total_cpu_utilizaton = rrddim_add(st, wu->name_lowercase, NULL, 1, 10000ULL, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st, wu->rd_total_cpu_utilizaton, (collected_number)((double)wu->workers_cpu_total * 10000.0)); + } + + rrdset_done(st); +} + +static void workers_utilization_update_chart(struct worker_utilization *wu) { + if(!wu->workers_registered) return; + + //fprintf(stderr, "%-12s WORKER UTILIZATION: %-3.2f%%, %zu jobs done, %zu running, on %zu workers, min %-3.02f%%, max %-3.02f%%.\n", + // wu->name, + // (double)wu->workers_total_busy_time * 100.0 / (double)wu->workers_total_duration, + // wu->workers_total_jobs_started, wu->workers_busy, wu->workers_registered, + // wu->workers_min_busy_time, wu->workers_max_busy_time); + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_time)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_time_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.time", wu->name_lowercase); + + wu->st_workers_time = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Busy Time (100% = all workers busy)" + , "%" + , "netdata" + , "stats" + , wu->priority + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + } + + // we add the min and max dimensions only when we have multiple workers + + if(unlikely(!wu->rd_workers_time_min && wu->workers_registered > 1)) + wu->rd_workers_time_min = rrddim_add(wu->st_workers_time, "min", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_time_max && wu->workers_registered > 1)) + wu->rd_workers_time_max = rrddim_add(wu->st_workers_time, "max", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_time_avg)) + wu->rd_workers_time_avg = rrddim_add(wu->st_workers_time, "average", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + + rrdset_next(wu->st_workers_time); + + if(unlikely(wu->workers_min_busy_time == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_min_busy_time = 0.0; + + if(wu->rd_workers_time_min) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_min, (collected_number)((double)wu->workers_min_busy_time * 10000.0)); + + if(wu->rd_workers_time_max) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_max, (collected_number)((double)wu->workers_max_busy_time * 10000.0)); - // ---------------------------------------------------------------- + if(wu->workers_total_duration == 0) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, 0); + else + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, (collected_number)((double)wu->workers_total_busy_time * 100.0 * 10000.0 / (double)wu->workers_total_duration)); - { - static RRDSET *st_io_stats = NULL; - static RRDDIM *rd_reads = NULL; - static RRDDIM *rd_writes = NULL; + rrdset_done(wu->st_workers_time); - if (unlikely(!st_io_stats)) { - st_io_stats = rrdset_create_localhost( + // ---------------------------------------------------------------------- + +#ifdef __linux__ + if(wu->workers_cpu_registered || wu->st_workers_cpu) { + if(unlikely(!wu->st_workers_cpu)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_cpu_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.cpu", wu->name_lowercase); + + wu->st_workers_cpu = rrdset_create_localhost( "netdata" - , "dbengine_io_throughput" - , NULL - , "dbengine" + , name , NULL - , "Netdata DB engine I/O throughput" - , "MiB/s" + , wu->family + , context + , "Netdata Workers CPU Utilization (100% = all workers busy)" + , "%" , "netdata" , "stats" - , 130506 + , wu->priority + 1 , localhost->rrd_update_every - , RRDSET_TYPE_LINE - ); + , RRDSET_TYPE_AREA + ); + } - rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); - } - else - rrdset_next(st_io_stats); + if (unlikely(!wu->rd_workers_cpu_min && wu->workers_registered > 1)) + wu->rd_workers_cpu_min = rrddim_add(wu->st_workers_cpu, "min", NULL, 1, 10000ULL, RRD_ALGORITHM_ABSOLUTE); - rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[17]); - rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[15]); - rrdset_done(st_io_stats); - } + if (unlikely(!wu->rd_workers_cpu_max && wu->workers_registered > 1)) + wu->rd_workers_cpu_max = rrddim_add(wu->st_workers_cpu, "max", NULL, 1, 10000ULL, RRD_ALGORITHM_ABSOLUTE); - // ---------------------------------------------------------------- - - { - static RRDSET *st_io_stats = NULL; - static RRDDIM *rd_reads = NULL; - static RRDDIM *rd_writes = NULL; - - if (unlikely(!st_io_stats)) { - st_io_stats = rrdset_create_localhost( - "netdata" - , "dbengine_io_operations" - , NULL - , "dbengine" - , NULL - , "Netdata DB engine I/O operations" - , "operations/s" - , "netdata" - , "stats" - , 130507 - , localhost->rrd_update_every - , RRDSET_TYPE_LINE - ); - - rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); - } - else - rrdset_next(st_io_stats); + if(unlikely(!wu->rd_workers_cpu_avg)) + wu->rd_workers_cpu_avg = rrddim_add(wu->st_workers_cpu, "average", NULL, 1, 10000ULL, RRD_ALGORITHM_ABSOLUTE); - rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[18]); - rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[16]); - rrdset_done(st_io_stats); - } + rrdset_next(wu->st_workers_cpu); - // ---------------------------------------------------------------- - - { - static RRDSET *st_errors = NULL; - static RRDDIM *rd_fs_errors = NULL; - static RRDDIM *rd_io_errors = NULL; - static RRDDIM *pg_cache_over_half_dirty_events = NULL; - - if (unlikely(!st_errors)) { - st_errors = rrdset_create_localhost( - "netdata" - , "dbengine_global_errors" - , NULL - , "dbengine" - , NULL - , "Netdata DB engine errors" - , "errors/s" - , "netdata" - , "stats" - , 130508 - , localhost->rrd_update_every - , RRDSET_TYPE_LINE - ); - - rd_io_errors = rrddim_add(st_errors, "io_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - rd_fs_errors = rrddim_add(st_errors, "fs_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); - pg_cache_over_half_dirty_events = rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, - RRD_ALGORITHM_INCREMENTAL); - } - else - rrdset_next(st_errors); + if(unlikely(wu->workers_cpu_min == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_cpu_min = 0.0; - rrddim_set_by_pointer(st_errors, rd_io_errors, (collected_number)stats_array[30]); - rrddim_set_by_pointer(st_errors, rd_fs_errors, (collected_number)stats_array[31]); - rrddim_set_by_pointer(st_errors, pg_cache_over_half_dirty_events, (collected_number)stats_array[34]); - rrdset_done(st_errors); - } + if(wu->rd_workers_cpu_min) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_min, (collected_number)(wu->workers_cpu_min * 10000ULL)); + + if(wu->rd_workers_cpu_max) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_max, (collected_number)(wu->workers_cpu_max * 10000ULL)); - // ---------------------------------------------------------------- - - { - static RRDSET *st_fd = NULL; - static RRDDIM *rd_fd_current = NULL; - static RRDDIM *rd_fd_max = NULL; - - if (unlikely(!st_fd)) { - st_fd = rrdset_create_localhost( - "netdata" - , "dbengine_global_file_descriptors" - , NULL - , "dbengine" - , NULL - , "Netdata DB engine File Descriptors" - , "descriptors" - , "netdata" - , "stats" - , 130509 - , localhost->rrd_update_every - , RRDSET_TYPE_LINE - ); - - rd_fd_current = rrddim_add(st_fd, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); - rd_fd_max = rrddim_add(st_fd, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + if(wu->workers_cpu_registered == 0) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, 0); + else + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, (collected_number)( wu->workers_cpu_total * 10000ULL / (calculated_number)wu->workers_cpu_registered )); + + rrdset_done(wu->st_workers_cpu); + } +#endif + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_jobs_per_job_type)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_jobs_by_type_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.jobs_started_by_type", wu->name_lowercase); + + wu->st_workers_jobs_per_job_type = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Jobs Started by Type" + , "jobs" + , "netdata" + , "stats" + , wu->priority + 2 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + rrdset_next(wu->st_workers_jobs_per_job_type); + + { + size_t i; + for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + if (wu->per_job_type[i].name[0]) { + + if(unlikely(!wu->per_job_type[i].rd_jobs_started)) + wu->per_job_type[i].rd_jobs_started = rrddim_add(wu->st_workers_jobs_per_job_type, wu->per_job_type[i].name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(wu->st_workers_jobs_per_job_type, wu->per_job_type[i].rd_jobs_started, (collected_number)(wu->per_job_type[i].jobs_started)); } - else - rrdset_next(st_fd); + } + } + + rrdset_done(wu->st_workers_jobs_per_job_type); + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_busy_per_job_type)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_busy_time_by_type_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.time_by_type", wu->name_lowercase); + + wu->st_workers_busy_per_job_type = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Busy Time by Type" + , "ms" + , "netdata" + , "stats" + , wu->priority + 3 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + rrdset_next(wu->st_workers_busy_per_job_type); + + { + size_t i; + for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + if (wu->per_job_type[i].name[0]) { - rrddim_set_by_pointer(st_fd, rd_fd_current, (collected_number)stats_array[32]); - /* Careful here, modify this accordingly if the File-Descriptor budget ever changes */ - rrddim_set_by_pointer(st_fd, rd_fd_max, (collected_number)rlimit_nofile.rlim_cur / 4); - rrdset_done(st_fd); + if(unlikely(!wu->per_job_type[i].rd_busy_time)) + wu->per_job_type[i].rd_busy_time = rrddim_add(wu->st_workers_busy_per_job_type, wu->per_job_type[i].name, NULL, 1, USEC_PER_MS, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(wu->st_workers_busy_per_job_type, wu->per_job_type[i].rd_busy_time, (collected_number)(wu->per_job_type[i].busy_time)); + } } + } - // ---------------------------------------------------------------- + rrdset_done(wu->st_workers_busy_per_job_type); - { - static RRDSET *st_ram_usage = NULL; - static RRDDIM *rd_cached = NULL; - static RRDDIM *rd_pinned = NULL; - static RRDDIM *rd_metadata = NULL; + // ---------------------------------------------------------------------- - collected_number cached_pages, pinned_pages, API_producers, populated_pages, metadata, pages_on_disk, - page_cache_descriptors; + if(wu->st_workers_threads || wu->workers_registered > 1) { + if(unlikely(!wu->st_workers_threads)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_threads_%s", wu->name_lowercase); - if (unlikely(!st_ram_usage)) { - st_ram_usage = rrdset_create_localhost( + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.threads", wu->name_lowercase); + + wu->st_workers_threads = rrdset_create_localhost( "netdata" - , "dbengine_ram" - , NULL - , "dbengine" + , name , NULL - , "Netdata DB engine RAM usage" - , "MiB" + , wu->family + , context + , "Netdata Workers Threads" + , "threads" , "netdata" , "stats" - , 130510 + , wu->priority + 4 , localhost->rrd_update_every , RRDSET_TYPE_STACKED - ); + ); - rd_cached = rrddim_add(st_ram_usage, "cache", NULL, 1, 256, RRD_ALGORITHM_ABSOLUTE); - rd_pinned = rrddim_add(st_ram_usage, "collectors", NULL, 1, 256, RRD_ALGORITHM_ABSOLUTE); - rd_metadata = rrddim_add(st_ram_usage, "metadata", NULL, 1, 1048576, RRD_ALGORITHM_ABSOLUTE); - } - else - rrdset_next(st_ram_usage); - - API_producers = (collected_number)stats_array[0]; - pages_on_disk = (collected_number)stats_array[2]; - populated_pages = (collected_number)stats_array[3]; - page_cache_descriptors = (collected_number)stats_array[27]; - - if (API_producers * 2 > populated_pages) { - pinned_pages = API_producers; - } else{ - pinned_pages = API_producers * 2; - } - cached_pages = populated_pages - pinned_pages; + wu->rd_workers_threads_free = rrddim_add(wu->st_workers_threads, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + wu->rd_workers_threads_busy = rrddim_add(wu->st_workers_threads, "busy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + else + rrdset_next(wu->st_workers_threads); - metadata = page_cache_descriptors * sizeof(struct page_cache_descr); - metadata += pages_on_disk * sizeof(struct rrdeng_page_descr); - /* This is an empirical estimation for Judy array indexing and extent structures */ - metadata += pages_on_disk * 58; + rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_free, (collected_number)(wu->workers_registered - wu->workers_busy)); + rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_busy, (collected_number)(wu->workers_busy)); + rrdset_done(wu->st_workers_threads); + } +} - rrddim_set_by_pointer(st_ram_usage, rd_cached, cached_pages); - rrddim_set_by_pointer(st_ram_usage, rd_pinned, pinned_pages); - rrddim_set_by_pointer(st_ram_usage, rd_metadata, metadata); - rrdset_done(st_ram_usage); +static void workers_utilization_reset_statistics(struct worker_utilization *wu) { + wu->workers_registered = 0; + wu->workers_busy = 0; + wu->workers_total_busy_time = 0; + wu->workers_total_duration = 0; + wu->workers_total_jobs_started = 0; + wu->workers_min_busy_time = WORKERS_MIN_PERCENT_DEFAULT; + wu->workers_max_busy_time = 0; + + wu->workers_cpu_registered = 0; + wu->workers_cpu_min = WORKERS_MIN_PERCENT_DEFAULT; + wu->workers_cpu_max = 0; + wu->workers_cpu_total = 0; + + size_t i; + for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + if(unlikely(!wu->name_lowercase)) { + wu->name_lowercase = strdupz(wu->name); + char *s = wu->name_lowercase; + for( ; *s ; s++) *s = tolower(*s); } + + wu->per_job_type[i].jobs_started = 0; + wu->per_job_type[i].busy_time = 0; } + + struct worker_thread *wt; + for(wt = wu->threads; wt ; wt = wt->next) { + wt->enabled = 0; + wt->cpu_enabled = 0; + } +} + +static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_uint_t *utime __maybe_unused, kernel_uint_t *stime __maybe_unused) { +#ifdef __linux__ + char filename[200 + 1]; + snprintfz(filename, 200, "/proc/self/task/%d/stat", pid); + + procfile *ff = procfile_open(filename, " ", PROCFILE_FLAG_NO_ERROR_ON_FILE_IO); + if(!ff) return -1; + + ff = procfile_readall(ff); + if(!ff) return -1; + + *utime = str2kernel_uint_t(procfile_lineword(ff, 0, 13)); + *stime = str2kernel_uint_t(procfile_lineword(ff, 0, 14)); + + procfile_close(ff); + return 0; +#else + // TODO: add here cpu time detection per thread, for FreeBSD and MacOS + *utime = 0; + *stime = 0; + return 1; #endif +} +static void workers_threads_cleanup(struct worker_utilization *wu) { + struct worker_thread *t; + + // free threads at the beginning of the linked list + while(wu->threads && !wu->threads->enabled) { + t = wu->threads; + wu->threads = t->next; + t->next = NULL; + freez(t); + } + + // free threads in the middle of the linked list + for(t = wu->threads; t && t->next ; t = t->next) { + if(t->next->enabled) continue; + + struct worker_thread *to_remove = t->next; + t->next = to_remove->next; + to_remove->next = NULL; + freez(to_remove); + } +} + +static struct worker_thread *worker_thread_find(struct worker_utilization *wu, pid_t pid) { + struct worker_thread *wt; + for(wt = wu->threads; wt && wt->pid != pid ; wt = wt->next) ; + return wt; +} + +static struct worker_thread *worker_thread_create(struct worker_utilization *wu, pid_t pid) { + struct worker_thread *wt; + + wt = (struct worker_thread *)callocz(1, sizeof(struct worker_thread)); + wt->pid = pid; + + // link it + wt->next = wu->threads; + wu->threads = wt; + + return wt; +} + +static struct worker_thread *worker_thread_find_or_create(struct worker_utilization *wu, pid_t pid) { + struct worker_thread *wt; + wt = worker_thread_find(wu, pid); + if(!wt) wt = worker_thread_create(wu, pid); + + return wt; +} + +static void worker_utilization_charts_callback(void *ptr, pid_t pid __maybe_unused, const char *thread_tag __maybe_unused, size_t utilization_usec __maybe_unused, size_t duration_usec __maybe_unused, size_t jobs_started __maybe_unused, size_t is_running __maybe_unused, const char **job_types_names __maybe_unused, size_t *job_types_jobs_started __maybe_unused, usec_t *job_types_busy_time __maybe_unused) { + struct worker_utilization *wu = (struct worker_utilization *)ptr; + + // find the worker_thread in the list + struct worker_thread *wt = worker_thread_find_or_create(wu, pid); + + wt->enabled = 1; + wt->busy_time = utilization_usec; + wt->jobs_started = jobs_started; + + wt->utime_old = wt->utime; + wt->stime_old = wt->stime; + wt->collected_time_old = wt->collected_time; + + wu->workers_total_busy_time += utilization_usec; + wu->workers_total_duration += duration_usec; + wu->workers_total_jobs_started += jobs_started; + wu->workers_busy += is_running; + wu->workers_registered++; + + double util = (double)utilization_usec * 100.0 / (double)duration_usec; + if(util > wu->workers_max_busy_time) + wu->workers_max_busy_time = util; + + if(util < wu->workers_min_busy_time) + wu->workers_min_busy_time = util; + + // accumulate per job type statistics + size_t i; + for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + wu->per_job_type[i].jobs_started += job_types_jobs_started[i]; + wu->per_job_type[i].busy_time += job_types_busy_time[i]; + + // new job type found + if(unlikely(!wu->per_job_type[i].name[0] && job_types_names[i])) + strncpyz(wu->per_job_type[i].name, job_types_names[i], WORKER_UTILIZATION_MAX_JOB_NAME_LENGTH); + } + + // find its CPU utilization + if((!read_thread_cpu_time_from_proc_stat(pid, &wt->utime, &wt->stime))) { + wt->collected_time = now_realtime_usec(); + usec_t delta = wt->collected_time - wt->collected_time_old; + + double utime = (double)(wt->utime - wt->utime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; + double stime = (double)(wt->stime - wt->stime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; + double cpu = utime + stime; + wt->cpu = cpu; + wt->cpu_enabled = 1; + + wu->workers_cpu_total += cpu; + if(cpu < wu->workers_cpu_min) wu->workers_cpu_min = cpu; + if(cpu > wu->workers_cpu_max) wu->workers_cpu_max = cpu; + } + wu->workers_cpu_registered += wt->cpu_enabled; +} + +static void worker_utilization_charts(void) { + static size_t iterations = 0; + iterations++; + + int i; + for(i = 0; all_workers_utilization[i].name ;i++) { + workers_utilization_reset_statistics(&all_workers_utilization[i]); + workers_foreach(all_workers_utilization[i].name, worker_utilization_charts_callback, &all_workers_utilization[i]); + + // skip the first iteration, so that we don't accumulate startup utilization to our charts + if(likely(iterations > 1)) + workers_utilization_update_chart(&all_workers_utilization[i]); + + workers_threads_cleanup(&all_workers_utilization[i]); + } + + workers_total_cpu_utilization_chart(); } +static void worker_utilization_finish(void) { + int i; + for(i = 0; all_workers_utilization[i].name ;i++) { + struct worker_utilization *wu = &all_workers_utilization[i]; + + if(wu->name_lowercase) { + freez(wu->name_lowercase); + wu->name_lowercase = NULL; + } + + // mark all threads as not enabled + struct worker_thread *t; + for(t = wu->threads; t ; t = t->next) t->enabled = 0; + + // let the cleanup job free them + workers_threads_cleanup(wu); + } +} + +// --------------------------------------------------------------------------------------------------------------------- + static void global_statistics_cleanup(void *ptr) { + worker_unregister(); + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; info("cleaning up..."); + worker_utilization_finish(); + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; } void *global_statistics_main(void *ptr) { + worker_register("STATS"); + worker_register_job_name(WORKER_JOB_GLOBAL, "global"); + worker_register_job_name(WORKER_JOB_REGISTRY, "registry"); + worker_register_job_name(WORKER_JOB_WORKERS, "workers"); + worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine"); + netdata_thread_cleanup_push(global_statistics_cleanup, ptr); int update_every = - (int)config_get_number("CONFIG_SECTION_GLOBAL_STATISTICS", "update every", localhost->rrd_update_every); + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); if (update_every < localhost->rrd_update_every) update_every = localhost->rrd_update_every; usec_t step = update_every * USEC_PER_SEC; heartbeat_t hb; heartbeat_init(&hb); + + // keep the randomness at zero + // to make sure we are not close to any other thread + hb.randomness = 0; + while (!netdata_exit) { + worker_is_idle(); heartbeat_next(&hb, step); + worker_is_busy(WORKER_JOB_WORKERS); + worker_utilization_charts(); + + worker_is_busy(WORKER_JOB_GLOBAL); global_statistics_charts(); + + worker_is_busy(WORKER_JOB_REGISTRY); registry_statistics(); + + worker_is_busy(WORKER_JOB_DBENGINE); + dbengine_statistics_charts(); + + worker_is_busy(WORKER_JOB_HEARTBEAT); + update_heartbeat_charts(); } netdata_thread_cleanup_pop(1); diff --git a/daemon/main.c b/daemon/main.c index c18778974..e10d38b40 100644 --- a/daemon/main.c +++ b/daemon/main.c @@ -340,12 +340,12 @@ int help(int exitcode) { " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n" #ifdef ENABLE_DBENGINE " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n" - " -W stresstest=A,B,C,D,E,F\n" + " -W stresstest=A,B,C,D,E,F,G\n" " Run a DB engine stress test for A seconds,\n" " with B writers and C readers, with a ramp up\n" " time of D seconds for writers, a page cache\n" " size of E MiB, an optional disk space limit\n" - " of F MiB and exit.\n\n" + " of F MiB, G libuv workers (default 16) and exit.\n\n" #endif " -W set section option value\n" " set netdata.conf option from the command line.\n\n" @@ -385,24 +385,24 @@ static void security_init(){ static void log_init(void) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir); - stdout_filename = config_get(CONFIG_SECTION_GLOBAL, "debug log", filename); + stdout_filename = config_get(CONFIG_SECTION_LOGS, "debug", filename); snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir); - stderr_filename = config_get(CONFIG_SECTION_GLOBAL, "error log", filename); + stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename); snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir); - stdaccess_filename = config_get(CONFIG_SECTION_GLOBAL, "access log", filename); + stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename); char deffacility[8]; snprintfz(deffacility,7,"%s","daemon"); - facility_log = config_get(CONFIG_SECTION_GLOBAL, "facility log", deffacility); + facility_log = config_get(CONFIG_SECTION_LOGS, "facility", deffacility); - error_log_throttle_period = config_get_number(CONFIG_SECTION_GLOBAL, "errors flood protection period", error_log_throttle_period); - error_log_errors_per_period = (unsigned long)config_get_number(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection", (long long int)error_log_errors_per_period); + error_log_throttle_period = config_get_number(CONFIG_SECTION_LOGS, "errors flood protection period", error_log_throttle_period); + error_log_errors_per_period = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "errors to trigger flood protection", (long long int)error_log_errors_per_period); error_log_errors_per_period_backup = error_log_errors_per_period; - setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_GLOBAL, "errors flood protection period" , ""), 1); - setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection", ""), 1); + setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1); + setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1); } char *initialize_lock_directory_path(char *prefix) @@ -410,7 +410,7 @@ char *initialize_lock_directory_path(char *prefix) char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/lock", prefix); - return config_get(CONFIG_SECTION_GLOBAL, "lock directory", filename); + return config_get(CONFIG_SECTION_DIRECTORIES, "lock", filename); } static void backwards_compatible_config() { @@ -447,6 +447,75 @@ static void backwards_compatible_config() { config_move(CONFIG_SECTION_GLOBAL, "web compression level", CONFIG_SECTION_WEB, "gzip compression level"); + + config_move(CONFIG_SECTION_GLOBAL, "config directory", + CONFIG_SECTION_DIRECTORIES, "config"); + + config_move(CONFIG_SECTION_GLOBAL, "stock config directory", + CONFIG_SECTION_DIRECTORIES, "stock config"); + + config_move(CONFIG_SECTION_GLOBAL, "log directory", + CONFIG_SECTION_DIRECTORIES, "log"); + + config_move(CONFIG_SECTION_GLOBAL, "web files directory", + CONFIG_SECTION_DIRECTORIES, "web"); + + config_move(CONFIG_SECTION_GLOBAL, "cache directory", + CONFIG_SECTION_DIRECTORIES, "cache"); + + config_move(CONFIG_SECTION_GLOBAL, "lib directory", + CONFIG_SECTION_DIRECTORIES, "lib"); + + config_move(CONFIG_SECTION_GLOBAL, "home directory", + CONFIG_SECTION_DIRECTORIES, "home"); + + config_move(CONFIG_SECTION_GLOBAL, "lock directory", + CONFIG_SECTION_DIRECTORIES, "lock"); + + config_move(CONFIG_SECTION_GLOBAL, "plugins directory", + CONFIG_SECTION_DIRECTORIES, "plugins"); + + config_move(CONFIG_SECTION_HEALTH, "health configuration directory", + CONFIG_SECTION_DIRECTORIES, "health config"); + + config_move(CONFIG_SECTION_HEALTH, "stock health configuration directory", + CONFIG_SECTION_DIRECTORIES, "stock health config"); + + config_move(CONFIG_SECTION_REGISTRY, "registry db directory", + CONFIG_SECTION_DIRECTORIES, "registry"); + + config_move(CONFIG_SECTION_GLOBAL, "debug log", + CONFIG_SECTION_LOGS, "debug"); + + config_move(CONFIG_SECTION_GLOBAL, "error log", + CONFIG_SECTION_LOGS, "error"); + + config_move(CONFIG_SECTION_GLOBAL, "access log", + CONFIG_SECTION_LOGS, "access"); + + config_move(CONFIG_SECTION_GLOBAL, "facility log", + CONFIG_SECTION_LOGS, "facility"); + + config_move(CONFIG_SECTION_GLOBAL, "errors flood protection period", + CONFIG_SECTION_LOGS, "errors flood protection period"); + + config_move(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection", + CONFIG_SECTION_LOGS, "errors to trigger flood protection"); + + config_move(CONFIG_SECTION_GLOBAL, "debug flags", + CONFIG_SECTION_LOGS, "debug flags"); + + config_move(CONFIG_SECTION_GLOBAL, "TZ environment variable", + CONFIG_SECTION_ENV_VARS, "TZ"); + + config_move(CONFIG_SECTION_PLUGINS, "PATH environment variable", + CONFIG_SECTION_ENV_VARS, "PATH"); + + config_move(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable", + CONFIG_SECTION_ENV_VARS, "PYTHONPATH"); + + config_move(CONFIG_SECTION_STATSD, "enabled", + CONFIG_SECTION_PLUGINS, "statsd"); } static void get_netdata_configured_variables() { @@ -491,14 +560,14 @@ static void get_netdata_configured_variables() { // ------------------------------------------------------------------------ // get system paths - netdata_configured_user_config_dir = config_get(CONFIG_SECTION_GLOBAL, "config directory", netdata_configured_user_config_dir); - netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_GLOBAL, "stock config directory", netdata_configured_stock_config_dir); - netdata_configured_log_dir = config_get(CONFIG_SECTION_GLOBAL, "log directory", netdata_configured_log_dir); - netdata_configured_web_dir = config_get(CONFIG_SECTION_GLOBAL, "web files directory", netdata_configured_web_dir); - netdata_configured_cache_dir = config_get(CONFIG_SECTION_GLOBAL, "cache directory", netdata_configured_cache_dir); - netdata_configured_varlib_dir = config_get(CONFIG_SECTION_GLOBAL, "lib directory", netdata_configured_varlib_dir); + netdata_configured_user_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "config", netdata_configured_user_config_dir); + netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "stock config", netdata_configured_stock_config_dir); + netdata_configured_log_dir = config_get(CONFIG_SECTION_DIRECTORIES, "log", netdata_configured_log_dir); + netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir); + netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir); + netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir); char *env_home=getenv("HOME"); - netdata_configured_home_dir = config_get(CONFIG_SECTION_GLOBAL, "home directory", env_home?env_home:netdata_configured_home_dir); + netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", env_home?env_home:netdata_configured_home_dir); netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir); @@ -554,6 +623,10 @@ static void get_netdata_configured_variables() { #endif // -------------------------------------------------------------------- + // metric correlations + enable_metric_correlations = config_get_boolean(CONFIG_SECTION_GLOBAL, "enable metric correlations", enable_metric_correlations); + + // -------------------------------------------------------------------- // get various system parameters get_system_HZ(); @@ -563,7 +636,7 @@ static void get_netdata_configured_variables() { } -static int load_netdata_conf(char *filename, char overwrite_used) { +int load_netdata_conf(char *filename, char overwrite_used) { errno = 0; int ret = 0; @@ -773,17 +846,17 @@ int main(int argc, char **argv) { char* stresstest_string = "stresstest="; #endif if(strcmp(optarg, "sqlite-check") == 0) { - sql_init_database(DB_CHECK_INTEGRITY); + sql_init_database(DB_CHECK_INTEGRITY, 0); return 0; } if(strcmp(optarg, "sqlite-fix") == 0) { - sql_init_database(DB_CHECK_FIX_DB); + sql_init_database(DB_CHECK_FIX_DB, 0); return 0; } if(strcmp(optarg, "sqlite-compact") == 0) { - sql_init_database(DB_CHECK_RECLAIM_SPACE); + sql_init_database(DB_CHECK_RECLAIM_SPACE, 0); return 0; } @@ -822,6 +895,9 @@ int main(int argc, char **argv) { } #endif #ifdef ENABLE_DBENGINE + else if(strcmp(optarg, "dicttest") == 0) { + return dictionary_unittest(10000); + } else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { optarg += strlen(createdataset_string); unsigned history_seconds = strtoul(optarg, NULL, 0); @@ -831,7 +907,7 @@ int main(int argc, char **argv) { else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) { char *endptr; unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0, - page_cache_mb = 0, disk_space_mb = 0; + page_cache_mb = 0, disk_space_mb = 0, workers = 16; optarg += strlen(stresstest_string); test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0); @@ -845,7 +921,15 @@ int main(int argc, char **argv) { page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); if (',' == *endptr) disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + workers = (unsigned)strtoul(endptr + 1, &endptr, 0); + + if (workers > 1024) + workers = 1024; + char workers_str[16]; + snprintf(workers_str, 15, "%u", workers); + setenv("UV_THREADPOOL_SIZE", workers_str, 1); dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds, page_cache_mb, disk_space_mb); return 0; @@ -900,7 +984,7 @@ int main(int argc, char **argv) { } else if(strncmp(optarg, debug_flags_string, strlen(debug_flags_string)) == 0) { optarg += strlen(debug_flags_string); - config_set(CONFIG_SECTION_GLOBAL, "debug flags", optarg); + config_set(CONFIG_SECTION_LOGS, "debug flags", optarg); debug_flags = strtoull(optarg, NULL, 0); } else if(strcmp(optarg, "set") == 0) { @@ -1087,11 +1171,13 @@ int main(int argc, char **argv) { if(i > 0) mallopt(M_ARENA_MAX, 1); #endif - test_clock_boottime(); - test_clock_monotonic_coarse(); + + // initialize the system clocks + clocks_init(); // prepare configuration environment variables for the plugins + setenv("UV_THREADPOOL_SIZE", config_get(CONFIG_SECTION_GLOBAL, "libuv worker threads", "16"), 1); get_netdata_configured_variables(); set_global_environment(); @@ -1109,7 +1195,7 @@ int main(int argc, char **argv) { // -------------------------------------------------------------------- // get the debugging flags from the configuration file - char *flags = config_get(CONFIG_SECTION_GLOBAL, "debug flags", "0x0000000000000000"); + char *flags = config_get(CONFIG_SECTION_LOGS, "debug flags", "0x0000000000000000"); setenv("NETDATA_DEBUG_FLAGS", flags, 1); debug_flags = strtoull(flags, NULL, 0); @@ -1229,7 +1315,7 @@ int main(int argc, char **argv) { // initialize rrd, registry, health, rrdpush, etc. netdata_anonymous_statistics_enabled=-1; - struct rrdhost_system_info *system_info = calloc(1, sizeof(struct rrdhost_system_info)); + struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info)); get_system_info(system_info); system_info->hops = 0; get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist); diff --git a/daemon/system-info.sh b/daemon/system-info.sh index 7fb2f25b5..12553e3da 100755 --- a/daemon/system-info.sh +++ b/daemon/system-info.sh @@ -10,6 +10,18 @@ ARCHITECTURE="$(uname -m)" # ------------------------------------------------------------------------------------------------- # detect the virtualization and possibly the container technology +# systemd-detect-virt: https://github.com/systemd/systemd/blob/df423851fcc05cf02281d11aab6aee7b476c1c3b/src/basic/virt.c#L999 +# lscpu: https://github.com/util-linux/util-linux/blob/581b77da7aa4a5205902857184d555bed367e3e0/sys-utils/lscpu.c#L52 +virtualization_normalize_name() { + vname="$1" + case "$vname" in + "User-mode Linux") vname="uml" ;; + "Windows Subsystem for Linux") vname="wsl" ;; + esac + + echo "$vname" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g' +} + CONTAINER="unknown" CONT_DETECTION="none" CONTAINER_IS_OFFICIAL_IMAGE="${NETDATA_OFFICIAL_IMAGE:-false}" @@ -18,25 +30,31 @@ if [ -z "${VIRTUALIZATION}" ]; then VIRTUALIZATION="unknown" VIRT_DETECTION="none" - if [ -n "$(command -v systemd-detect-virt 2> /dev/null)" ]; then + if command -v systemd-detect-virt >/dev/null 2>&1; then VIRTUALIZATION="$(systemd-detect-virt -v)" VIRT_DETECTION="systemd-detect-virt" CONTAINER=${CONTAINER:-$(systemd-detect-virt -c)} CONT_DETECTION="systemd-detect-virt" - else - if grep -q "^flags.*hypervisor" /proc/cpuinfo 2> /dev/null; then - VIRTUALIZATION="hypervisor" - VIRT_DETECTION="/proc/cpuinfo" - elif [ -n "$(command -v dmidecode)" ] && dmidecode -s system-product-name 2> /dev/null | grep -q "VMware\|Virtual\|KVM\|Bochs"; then - VIRTUALIZATION="$(dmidecode -s system-product-name)" - VIRT_DETECTION="dmidecode" - else - VIRTUALIZATION="none" - fi + elif command -v lscpu >/dev/null 2>&1; then + VIRTUALIZATION=$(lscpu | grep "Hypervisor vendor:" | cut -d: -f 2 | awk '{$1=$1};1') + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="lscpu" + [ -z "$VIRTUALIZATION" ] && lscpu | grep -q "Virtualization:" && VIRTUALIZATION="none" + elif command -v dmidecode >/dev/null 2>&1; then + VIRTUALIZATION=$(dmidecode -s system-product-name 2>/dev/null | grep "VMware\|Virtual\|KVM\|Bochs") + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="dmidecode" + fi + + if [ -z "${VIRTUALIZATION}" ] && [ "${KERNEL_NAME}" = "FreeBSD" ]; then + VIRTUALIZATION=$(sysctl kern.vm_guest 2>/dev/null | cut -d: -f 2 | awk '{$1=$1};1') + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="sysctl" fi + if [ -z "${VIRTUALIZATION}" ]; then # Output from the command is outside of spec VIRTUALIZATION="unknown" + VIRT_DETECTION="none" + elif [ "$VIRTUALIZATION" != "none" ] && [ "$VIRTUALIZATION" != "unknown" ]; then + VIRTUALIZATION=$(virtualization_normalize_name $VIRTUALIZATION) fi else # Passed from outside - probably in docker run @@ -332,18 +350,23 @@ DISK_SIZE="unknown" DISK_DETECTION="none" if [ "${KERNEL_NAME}" = "Darwin" ]; then - types='hfs' + if DISK_SIZE=$(diskutil info / 2>/dev/null | awk '/Disk Size/ {total += substr($5,2,length($5))} END { print total }') && + [ -n "$DISK_SIZE" ] && [ "$DISK_SIZE" != "0" ]; then + DISK_DETECTION="diskutil" + else + types='hfs' - if (lsvfs | grep -q apfs); then - types="${types},apfs" - fi + if (lsvfs | grep -q apfs); then + types="${types},apfs" + fi - if (lsvfs | grep -q ufs); then - types="${types},ufs" - fi + if (lsvfs | grep -q ufs); then + types="${types},ufs" + fi - DISK_DETECTION="df" - DISK_SIZE=$(($(/bin/df -k -t ${types} | tail -n +2 | sed -E 's/\/dev\/disk([[:digit:]]*)s[[:digit:]]*/\/dev\/disk\1/g' | sort -k 1 | awk -F ' ' '{s=$NF;for(i=NF-1;i>=1;i--)s=s FS $i;print s}' | uniq -f 9 | awk '{print $8}' | tr '\n' '+' | rev | cut -f 2- -d '+' | rev) * 1024)) + DISK_DETECTION="df" + DISK_SIZE=$(($(/bin/df -k -t ${types} | tail -n +2 | sed -E 's/\/dev\/disk([[:digit:]]*)s[[:digit:]]*/\/dev\/disk\1/g' | sort -k 1 | awk -F ' ' '{s=$NF;for(i=NF-1;i>=1;i--)s=s FS $i;print s}' | uniq -f 9 | awk '{print $8}' | tr '\n' '+' | rev | cut -f 2- -d '+' | rev) * 1024)) + fi elif [ "${KERNEL_NAME}" = FreeBSD ]; then types='ufs' @@ -411,37 +434,43 @@ CLOUD_INSTANCE_TYPE="unknown" CLOUD_INSTANCE_REGION="unknown" if [ "${VIRTUALIZATION}" != "none" ] && command -v curl > /dev/null 2>&1; then - # Try AWS IMDSv2 - if [ "${CLOUD_TYPE}" = "unknown" ]; then - AWS_IMDS_TOKEN="$(curl --fail -s -m 5 --noproxy "*" -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")" - if [ -n "${AWS_IMDS_TOKEN}" ]; then - CLOUD_TYPE="AWS" - CLOUD_INSTANCE_TYPE="$(curl --fail -s -m 5 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/instance-type" 2> /dev/null)" - CLOUD_INSTANCE_REGION="$(curl --fail -s -m 5 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/placement/region" 2> /dev/null)" + # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404. + curl --fail -s -m 1 --noproxy "*" http://169.254.169.254 >/dev/null 2>&1 + ret=$? + # anything but operation timeout. + if [ "$ret" != 28 ]; then + # Try AWS IMDSv2 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + AWS_IMDS_TOKEN="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")" + if [ -n "${AWS_IMDS_TOKEN}" ]; then + CLOUD_TYPE="AWS" + CLOUD_INSTANCE_TYPE="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/instance-type" 2>/dev/null)" + CLOUD_INSTANCE_REGION="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/placement/region" 2>/dev/null)" + fi fi - fi - # Try GCE computeMetadata v1 - if [ "${CLOUD_TYPE}" = "unknown" ]; then - if [ -n "$(curl --fail -s -m 5 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1")" ]; then - CLOUD_TYPE="GCP" - CLOUD_INSTANCE_TYPE="$(curl --fail -s -m 5 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/machine-type")" - [ -n "$CLOUD_INSTANCE_TYPE" ] && CLOUD_INSTANCE_TYPE=$(basename "$CLOUD_INSTANCE_TYPE") - CLOUD_INSTANCE_REGION="$(curl --fail -s -m 5 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/zone")" - [ -n "$CLOUD_INSTANCE_REGION" ] && CLOUD_INSTANCE_REGION=$(basename "$CLOUD_INSTANCE_REGION") && CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION%-*} + # Try GCE computeMetadata v1 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + if [ -n "$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1")" ]; then + CLOUD_TYPE="GCP" + CLOUD_INSTANCE_TYPE="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/machine-type")" + [ -n "$CLOUD_INSTANCE_TYPE" ] && CLOUD_INSTANCE_TYPE=$(basename "$CLOUD_INSTANCE_TYPE") + CLOUD_INSTANCE_REGION="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/zone")" + [ -n "$CLOUD_INSTANCE_REGION" ] && CLOUD_INSTANCE_REGION=$(basename "$CLOUD_INSTANCE_REGION") && CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION%-*} + fi fi - fi - # TODO: needs to be tested in Microsoft Azure - # Try Azure IMDS - # if [ "${CLOUD_TYPE}" = "unknown" ]; then - # AZURE_IMDS_DATA="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance?version=2021-10-01")" - # if [ -n "${AZURE_IMDS_DATA}" ]; then - # CLOUD_TYPE="Azure" - # CLOUD_INSTANCE_TYPE="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/vmSize?version=2021-10-01&format=text")" - # CLOUD_INSTANCE_REGION="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/location?version=2021-10-01&format=text")" - # fi - # fi + # TODO: needs to be tested in Microsoft Azure + # Try Azure IMDS + # if [ "${CLOUD_TYPE}" = "unknown" ]; then + # AZURE_IMDS_DATA="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance?version=2021-10-01")" + # if [ -n "${AZURE_IMDS_DATA}" ]; then + # CLOUD_TYPE="Azure" + # CLOUD_INSTANCE_TYPE="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/vmSize?version=2021-10-01&format=text")" + # CLOUD_INSTANCE_REGION="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/location?version=2021-10-01&format=text")" + # fi + # fi + fi fi echo "NETDATA_CONTAINER_OS_NAME=${CONTAINER_NAME}" diff --git a/daemon/unit_test.c b/daemon/unit_test.c index 7a52735d5..35f8613a2 100644 --- a/daemon/unit_test.c +++ b/daemon/unit_test.c @@ -1732,7 +1732,8 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] update_every = REGION_UPDATE_EVERY[current_region]; long points = (time_end - time_start) / update_every; for (i = 0 ; i < CHARTS ; ++i) { - RRDR *r = rrd2rrdr(st[i], points, time_start + update_every, time_end, RRDR_GROUPING_AVERAGE, 0, 0, NULL, NULL, 0); + ONEWAYALLOC *owa = onewayalloc_create(0); + RRDR *r = rrd2rrdr(owa, st[i], points, time_start + update_every, time_end, RRDR_GROUPING_AVERAGE, 0, 0, NULL, NULL, 0); if (!r) { fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", st[i]->name); return ++errors; @@ -1766,8 +1767,9 @@ static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS] } } } - rrdr_free(r); + rrdr_free(owa, r); } + onewayalloc_destroy(owa); } return errors; } @@ -1851,7 +1853,8 @@ int test_dbengine(void) long points = (time_end[REGIONS - 1] - time_start[0]) / update_every; // cover all time regions with RRDR long point_offset = (time_start[current_region] - time_start[0]) / update_every; for (i = 0 ; i < CHARTS ; ++i) { - RRDR *r = rrd2rrdr(st[i], points, time_start[0] + update_every, time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0, 0, NULL, NULL, 0); + ONEWAYALLOC *owa = onewayalloc_create(0); + RRDR *r = rrd2rrdr(owa, st[i], points, time_start[0] + update_every, time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0, 0, NULL, NULL, 0); if (!r) { fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", st[i]->name); ++errors; @@ -1888,8 +1891,9 @@ int test_dbengine(void) } } } - rrdr_free(r); + rrdr_free(owa, r); } + onewayalloc_destroy(owa); } error_out: rrd_wrlock(); @@ -2186,6 +2190,7 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi fprintf(stderr, "Initializing localhost with hostname 'dbengine-stress-test'\n"); + (void) sql_init_database(DB_CHECK_NONE, 1); host = dbengine_rrdhost_find_or_create("dbengine-stress-test"); if (NULL == host) return; @@ -2262,9 +2267,8 @@ void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsi for (i = 0 ; i < DSET_CHARTS ; ++i) { stored_metrics_nr += chart_threads[i]->stored_metrics_nr; } - unsigned long queries_nr = 0, queried_metrics_nr = 0; + unsigned long queried_metrics_nr = 0; for (i = 0 ; i < QUERY_THREADS ; ++i) { - queries_nr += query_threads[i]->queries_nr; queried_metrics_nr += query_threads[i]->queried_metrics_nr; } fprintf(stderr, "%u metrics were stored (dataset size of %lu MiB) in %u charts by 1 writer thread per chart.\n", |