From 836b47cb7e99a977c5a23b059ca1d0b5065d310e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Wed, 24 Jul 2024 11:54:23 +0200 Subject: Merging upstream version 1.46.3. Signed-off-by: Daniel Baumann --- src/daemon/README.md | 438 +++ src/daemon/analytics.c | 1083 ++++++++ src/daemon/analytics.h | 100 + src/daemon/anonymous-statistics.sh.in | 197 ++ src/daemon/buildinfo.c | 1520 +++++++++++ src/daemon/buildinfo.h | 16 + src/daemon/commands.c | 814 ++++++ src/daemon/commands.h | 80 + src/daemon/common.c | 197 ++ src/daemon/common.h | 141 + src/daemon/config/README.md | 231 ++ src/daemon/config/dyncfg-echo.c | 175 ++ src/daemon/config/dyncfg-files.c | 264 ++ src/daemon/config/dyncfg-inline.c | 66 + src/daemon/config/dyncfg-intercept.c | 429 +++ src/daemon/config/dyncfg-internals.h | 145 + src/daemon/config/dyncfg-tree.c | 292 ++ src/daemon/config/dyncfg-unittest.c | 799 ++++++ src/daemon/config/dyncfg.c | 454 ++++ src/daemon/config/dyncfg.h | 34 + src/daemon/daemon.c | 511 ++++ src/daemon/daemon.h | 16 + src/daemon/event_loop.c | 66 + src/daemon/event_loop.h | 55 + src/daemon/get-kubernetes-labels.sh.in | 59 + src/daemon/global_statistics.c | 4370 ++++++++++++++++++++++++++++++ src/daemon/global_statistics.h | 64 + src/daemon/main.c | 2370 ++++++++++++++++ src/daemon/main.h | 44 + src/daemon/metrics.csv | 254 ++ src/daemon/pipename.c | 17 + src/daemon/pipename.h | 8 + src/daemon/sentry-native/sentry-native.c | 62 + src/daemon/sentry-native/sentry-native.h | 11 + src/daemon/service.c | 328 +++ src/daemon/signals.c | 244 ++ src/daemon/signals.h | 13 + src/daemon/static_threads.c | 226 ++ src/daemon/static_threads.h | 16 + src/daemon/static_threads_freebsd.c | 33 + src/daemon/static_threads_linux.c | 73 + src/daemon/static_threads_macos.c | 35 + src/daemon/static_threads_windows.c | 33 + src/daemon/system-info.sh | 522 ++++ src/daemon/unit_test.c | 1695 ++++++++++++ src/daemon/unit_test.h | 26 + src/daemon/watcher.c | 178 ++ src/daemon/watcher.h | 54 + 48 files changed, 18858 insertions(+) create mode 100644 src/daemon/README.md create mode 100644 src/daemon/analytics.c create mode 100644 src/daemon/analytics.h create mode 100755 src/daemon/anonymous-statistics.sh.in create mode 100644 src/daemon/buildinfo.c create mode 100644 src/daemon/buildinfo.h create mode 100644 src/daemon/commands.c create mode 100644 src/daemon/commands.h create mode 100644 src/daemon/common.c create mode 100644 src/daemon/common.h create mode 100644 src/daemon/config/README.md create mode 100644 src/daemon/config/dyncfg-echo.c create mode 100644 src/daemon/config/dyncfg-files.c create mode 100644 src/daemon/config/dyncfg-inline.c create mode 100644 src/daemon/config/dyncfg-intercept.c create mode 100644 src/daemon/config/dyncfg-internals.h create mode 100644 src/daemon/config/dyncfg-tree.c create mode 100644 src/daemon/config/dyncfg-unittest.c create mode 100644 src/daemon/config/dyncfg.c create mode 100644 src/daemon/config/dyncfg.h create mode 100644 src/daemon/daemon.c create mode 100644 src/daemon/daemon.h create mode 100644 src/daemon/event_loop.c create mode 100644 src/daemon/event_loop.h create mode 100755 src/daemon/get-kubernetes-labels.sh.in create mode 100644 src/daemon/global_statistics.c create mode 100644 src/daemon/global_statistics.h create mode 100644 src/daemon/main.c create mode 100644 src/daemon/main.h create mode 100644 src/daemon/metrics.csv create mode 100644 src/daemon/pipename.c create mode 100644 src/daemon/pipename.h create mode 100644 src/daemon/sentry-native/sentry-native.c create mode 100644 src/daemon/sentry-native/sentry-native.h create mode 100644 src/daemon/service.c create mode 100644 src/daemon/signals.c create mode 100644 src/daemon/signals.h create mode 100644 src/daemon/static_threads.c create mode 100644 src/daemon/static_threads.h create mode 100644 src/daemon/static_threads_freebsd.c create mode 100644 src/daemon/static_threads_linux.c create mode 100644 src/daemon/static_threads_macos.c create mode 100644 src/daemon/static_threads_windows.c create mode 100755 src/daemon/system-info.sh create mode 100644 src/daemon/unit_test.c create mode 100644 src/daemon/unit_test.h create mode 100644 src/daemon/watcher.c create mode 100644 src/daemon/watcher.h (limited to 'src/daemon') diff --git a/src/daemon/README.md b/src/daemon/README.md new file mode 100644 index 000000000..bc2ec7757 --- /dev/null +++ b/src/daemon/README.md @@ -0,0 +1,438 @@ +# Netdata daemon + +The Netdata daemon is practically a synonym for the Netdata Agent, as it controls its +entire operation. We support various methods to +[start, stop, or restart the daemon](/packaging/installer/README.md#maintaining-a-netdata-agent-installation). + +This document provides some basic information on the command line options, log files, and how to debug and troubleshoot + +## Command line options + +Normally you don't need to supply any command line arguments to netdata. + +If you do though, they override the configuration equivalent options. + +To get a list of all command line parameters supported, run: + +```sh +netdata -h +``` + +The program will print the supported command line parameters. + +The command line options of the Netdata 1.10.0 version are the following: + +```sh + ^ + |.-. .-. .-. .-. . netdata + | '-' '-' '-' '-' real-time performance monitoring, done right! + +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---> + + Copyright (C) 2016-2022, Netdata, Inc. + Released under GNU General Public License v3 or later. + All rights reserved. + + Home Page : https://netdata.cloud + Source Code: https://github.com/netdata/netdata + Docs : https://learn.netdata.cloud + Support : https://github.com/netdata/netdata/issues + License : https://github.com/netdata/netdata/blob/master/LICENSE.md + + Twitter : https://twitter.com/netdatahq + LinkedIn : https://linkedin.com/company/netdata-cloud/ + Facebook : https://facebook.com/linuxnetdata/ + + + SYNOPSIS: netdata [options] + + Options: + + -c filename Configuration file to load. + Default: /etc/netdata/netdata.conf + + -D Do not fork. Run in the foreground. + Default: run in the background + + -h Display this help message. + + -P filename File to save a pid while running. + Default: do not save pid to a file + + -i IP The IP address to listen to. + Default: all IP addresses IPv4 and IPv6 + + -p port API/Web port to use. + Default: 19999 + + -s path Prefix for /proc and /sys (for containers). + Default: no prefix + + -t seconds The internal clock of netdata. + Default: 1 + + -u username Run as user. + Default: netdata + + -v Print netdata version and exit. + + -V Print netdata version and exit. + + -W options See Advanced options below. + + + Advanced options: + + -W stacksize=N Set the stacksize (in bytes). + + -W debug_flags=N Set runtime tracing to debug.log. + + -W unittest Run internal unittests and exit. + + -W createdataset=N Create a DB engine dataset of N seconds and exit. + + -W set section option value + set netdata.conf option from the command line. + + -W buildinfo Print the version, the configure options, + a list of optional features, and whether they + are enabled or not. + + -W buildinfojson Print the version, the configure options, + a list of optional features, and whether they + are enabled or not, in JSON format. + + -W simple-pattern pattern string + Check if string matches pattern and exit. + + -W "claim -token=TOKEN -rooms=ROOM1,ROOM2 url=https://app.netdata.cloud" + Connect the agent to the workspace Rooms pointed to by TOKEN and ROOM*. + + Signals netdata handles: + + - HUP Close and reopen log files. + - USR2 Reload health configuration. +``` + +You can send commands during runtime via [netdatacli](/src/cli/README.md). + +## Log files + +Netdata uses 4 log files: + +1. `error.log` +2. `collector.log` +3. `access.log` +4. `debug.log` + +Any of them can be disabled by setting it to `/dev/null` or `none` in `netdata.conf`. By default `error.log`, +`collector.log`, and `access.log` are enabled. `debug.log` is only enabled if debugging/tracing is also enabled +(Netdata needs to be compiled with debugging enabled). + +Log files are stored in `/var/log/netdata/` by default. + +### error.log + +The `error.log` is the `stderr` of the `netdata` daemon . + +For most Netdata programs (including standard external plugins shipped by netdata), the following lines may appear: + +| tag | description | +|:-:|:----------| +| `INFO` | Something important the user should know. | +| `ERROR` | Something that might disable a part of netdata.
The log line includes `errno` (if it is not zero). | +| `FATAL` | Something prevented a program from running.
The log line includes `errno` (if it is not zero) and the program exited. | + +The `FATAL` and `ERROR` messages will always appear in the logs, and `INFO`can be filtered using [severity level](/src/daemon/config/README.md#logs-section-options) option. + +So, when auto-detection of data collection fail, `ERROR` lines are logged and the relevant modules are disabled, but the +program continues to run. + +When a Netdata program cannot run at all, a `FATAL` line is logged. + +### collector.log + +The `collector.log` is the `stderr` of all [collectors](/src/collectors/COLLECTORS.md) + run by `netdata`. + +So if any process, in the Netdata process tree, writes anything to its standard error, +it will appear in `collector.log`. + +Data stored inside this file follows pattern already described for `error.log`. + +### access.log + +The `access.log` logs web requests. The format is: + +```txt +DATE: ID: (sent/all = SENT_BYTES/ALL_BYTES bytes PERCENT_COMPRESSION%, prep/sent/total PREP_TIME/SENT_TIME/TOTAL_TIME ms): ACTION CODE URL +``` + +where: + +- `ID` is the client ID. Client IDs are auto-incremented every time a client connects to netdata. +- `SENT_BYTES` is the number of bytes sent to the client, without the HTTP response header. +- `ALL_BYTES` is the number of bytes of the response, before compression. +- `PERCENT_COMPRESSION` is the percentage of traffic saved due to compression. +- `PREP_TIME` is the time in milliseconds needed to prepared the response. +- `SENT_TIME` is the time in milliseconds needed to sent the response to the client. +- `TOTAL_TIME` is the total time the request was inside Netdata (from the first byte of the request to the last byte + of the response). +- `ACTION` can be `filecopy`, `options` (used in CORS), `data` (API call). + +### debug.log + +See [debugging](#debugging). + +## Netdata process scheduling policy + +By default Netdata versions prior to 1.34.0 run with the `idle` process scheduling policy, so that it uses CPU +resources, only when there is idle CPU to spare. On very busy servers (or weak servers), this can lead to gaps on +the charts. + +Starting with version 1.34.0, Netdata instead uses the `batch` scheduling policy by default. This largely eliminates +issues with gaps in charts on busy systems while still keeping the impact on the rest of the system low. + +You can set Netdata scheduling policy in `netdata.conf`, like this: + +```conf +[global] + process scheduling policy = idle +``` + +You can use the following: + +| policy | description | +| :-----------------------: | :---------- | +| `idle` | use CPU only when there is spare - this is lower than nice 19 - it is the default for Netdata and it is so low that Netdata will run in "slow motion" under extreme system load, resulting in short (1-2 seconds) gaps at the charts. | +| `other`
or
`nice` | this is the default policy for all processes under Linux. It provides dynamic priorities based on the `nice` level of each process. Check below for setting this `nice` level for netdata. | +| `batch` | This policy is similar to `other` in that it schedules the thread according to its dynamic priority (based on the `nice` value). The difference is that this policy will cause the scheduler to always assume that the thread is CPU-intensive. Consequently, the scheduler will apply a small scheduling penalty with respect to wake-up behavior, so that this thread is mildly disfavored in scheduling decisions. | +| `fifo` | `fifo` can be used only with static priorities higher than 0, which means that when a `fifo` threads becomes runnable, it will always immediately preempt any currently running `other`, `batch`, or `idle` thread. `fifo` is a simple scheduling algorithm without time slicing. | +| `rr` | a simple enhancement of `fifo`. Everything described above for `fifo` also applies to `rr`, except that each thread is allowed to run only for a maximum time quantum. | +| `keep`
or
`none` | do not set scheduling policy, priority or nice level - i.e. keep running with whatever it is set already (e.g. by systemd). | + +For more information see `man sched`. + +### Scheduling priority for `rr` and `fifo` + +Once the policy is set to one of `rr` or `fifo`, the following will appear: + +```conf +[global] + process scheduling priority = 0 +``` + +These priorities are usually from 0 to 99. Higher numbers make the process more +important. + +### nice level for policies `other` or `batch` + +When the policy is set to `other`, `nice`, or `batch`, the following will appear: + +```conf +[global] + process nice level = 19 +``` + +## Scheduling settings and systemd + +Netdata will not be able to set its scheduling policy and priority to more important values when it is started as the +`netdata` user (systemd case). + +You can set these settings at `/etc/systemd/system/netdata.service`: + +```sh +[Service] +# By default Netdata switches to scheduling policy idle, which makes it use CPU, only +# when there is spare available. +# Valid policies: other (the system default) | batch | idle | fifo | rr +#CPUSchedulingPolicy=other + +# This sets the maximum scheduling priority Netdata can set (for policies: rr and fifo). +# Netdata (via [global].process scheduling priority in netdata.conf) can only lower this value. +# Priority gets values 1 (lowest) to 99 (highest). +#CPUSchedulingPriority=1 + +# For scheduling policy 'other' and 'batch', this sets the lowest niceness of netdata. +# Netdata (via [global].process nice level in netdata.conf) can only increase the value set here. +#Nice=0 +``` + +Run `systemctl daemon-reload` to reload these changes. + +Now, tell Netdata to keep these settings, as set by systemd, by editing +`netdata.conf` and setting: + +```conf +[global] + process scheduling policy = keep +``` + +Using the above, whatever scheduling settings you have set at `netdata.service` +will be maintained by netdata. + +### Example 1: Netdata with nice -1 on non-systemd systems + +On a system that is not based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for +all programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = other + process nice level = -1 +``` + +then execute this to [restart Netdata](/packaging/installer/README.md#maintaining-a-netdata-agent-installation): + +```sh +sudo systemctl restart netdata +``` + +#### Example 2: Netdata with nice -1 on systemd systems + +On a system that is based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for all +programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = keep +``` + +edit /etc/systemd/system/netdata.service and set: + +```sh +[Service] +CPUSchedulingPolicy=other +Nice=-1 +``` + +then execute: + +```sh +sudo systemctl daemon-reload +sudo systemctl restart netdata +``` + +## Virtual memory + +You may notice that netdata's virtual memory size, as reported by `ps` or `/proc/pid/status` (or even netdata's +applications virtual memory chart) is unrealistically high. + +For example, it may be reported to be 150+MB, even if the resident memory size is just 25MB. Similar values may be +reported for Netdata plugins too. + +Check this for example: A Netdata installation with default settings on Ubuntu +16.04LTS. The top chart is **real memory used**, while the bottom one is +**virtual memory**: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013772/5eb7173e-87e3-11e6-8f2b-a2ccfeb06faf.png) + +### Why does this happen? + +The system memory allocator allocates virtual memory arenas, per thread running. On Linux systems this defaults to 16MB +per thread on 64 bit machines. So, if you get the difference between real and virtual memory and divide it by 16MB you +will roughly get the number of threads running. + +The system does this for speed. Having a separate memory arena for each thread, allows the threads to run in parallel in +multi-core systems, without any locks between them. + +This behaviour is system specific. For example, the chart above when running +Netdata on Alpine Linux (that uses **musl** instead of **glibc**) is this: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013807/7cf5878e-87e4-11e6-9651-082e68701eab.png) + +### Can we do anything to lower it? + +Since Netdata already uses minimal memory allocations while it runs (i.e. it adapts its memory on start, so that while +repeatedly collects data it does not do memory allocations), it already instructs the system memory allocator to +minimize the memory arenas for each thread. We have also added [2 configuration +options](https://github.com/netdata/netdata/blob/5645b1ee35248d94e6931b64a8688f7f0d865ec6/src/main.c#L410-L418) to allow +you tweak these settings: `glibc malloc arena max for plugins` and `glibc malloc arena max for netdata`. + +However, even if we instructed the memory allocator to use just one arena, it +seems it allocates an arena per thread. + +Netdata also supports `jemalloc` and `tcmalloc`, however both behave exactly the +same to the glibc memory allocator in this aspect. + +### Is this a problem? + +No, it is not. + +Linux reserves real memory (physical RAM) in pages (on x86 machines pages are 4KB each). So even if the system memory +allocator is allocating huge amounts of virtual memory, only the 4KB pages that are actually used are reserving physical +RAM. The **real memory** chart on Netdata application section, shows the amount of physical memory these pages occupy(it +accounts the whole pages, even if parts of them are actually used). + +## Debugging + +When you compile Netdata with debugging: + +1. compiler optimizations for your CPU are disabled (Netdata will run somewhat slower) + +2. a lot of code is added all over netdata, to log debug messages to `/var/log/netdata/debug.log`. However, nothing is + printed by default. Netdata allows you to select which sections of Netdata you want to trace. Tracing is activated + via the config option `debug flags`. It accepts a hex number, to enable or disable specific sections. You can find + the options supported at [log.h](https://raw.githubusercontent.com/netdata/netdata/master/src/libnetdata/log/log.h). + They are the `D_*` defines. The value `0xffffffffffffffff` will enable all possible debug flags. + +Once Netdata is compiled with debugging and tracing is enabled for a few sections, the file `/var/log/netdata/debug.log` +will contain the messages. + +> Do not forget to disable tracing (`debug flags = 0`) when you are done tracing. The file `debug.log` can grow too +> fast. + +### Compiling Netdata with debugging + +To compile Netdata with debugging, use this: + +```sh +# step into the Netdata source directory +cd /usr/src/netdata.git + +# run the installer with debugging enabled +CFLAGS="-O1 -ggdb -DNETDATA_INTERNAL_CHECKS=1" ./netdata-installer.sh +``` + +The above will compile and install Netdata with debugging info embedded. You can now use `debug flags` to set the +section(s) you need to trace. + +### Debugging crashes + +We have made the most to make Netdata crash free. If however, Netdata crashes on your system, it would be very helpful +to provide stack traces of the crash. Without them, is will be almost impossible to find the issue (the code base is +quite large to find such an issue by just observing it). + +To provide stack traces, **you need to have Netdata compiled with debugging**. There is no need to enable any tracing +(`debug flags`). + +Then you need to be in one of the following 2 cases: + +1. Netdata crashes and you have a core dump + +2. you can reproduce the crash + +If you are not on these cases, you need to find a way to be (i.e. if your system does not produce core dumps, check your +distro documentation to enable them). + +### Netdata crashes and you have a core dump + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Run the following command and post the output on a github issue. + +```sh +gdb $(which netdata) /path/to/core/dump +``` + +### You can reproduce a Netdata crash on your system + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Install the package `valgrind` and run: + +```sh +valgrind $(which netdata) -D +``` + +Netdata will start and it will be a lot slower. Now reproduce the crash and `valgrind` will dump on your console the +stack trace. Open a new github issue and post the output. diff --git a/src/daemon/analytics.c b/src/daemon/analytics.c new file mode 100644 index 000000000..33f6f357f --- /dev/null +++ b/src/daemon/analytics.c @@ -0,0 +1,1083 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include "buildinfo.h" + +struct analytics_data analytics_data; +extern void analytics_exporting_connectors (BUFFER *b); +extern void analytics_exporting_connectors_ssl (BUFFER *b); +extern void analytics_build_info (BUFFER *b); + +struct collector { + const char *plugin; + const char *module; +}; + +struct array_printer { + int c; + BUFFER *both; +}; + +/* + * Debug logging + */ +void analytics_log_data(void) +{ + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_STREAM_ENABLED : [%s]", analytics_data.netdata_config_stream_enabled); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_MEMORY_MODE : [%s]", analytics_data.netdata_config_memory_mode); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_EXPORTING_ENABLED : [%s]", analytics_data.netdata_config_exporting_enabled); + netdata_log_debug(D_ANALYTICS, "NETDATA_EXPORTING_CONNECTORS : [%s]", analytics_data.netdata_exporting_connectors); + netdata_log_debug(D_ANALYTICS, "NETDATA_ALLMETRICS_PROMETHEUS_USED : [%s]", analytics_data.netdata_allmetrics_prometheus_used); + netdata_log_debug(D_ANALYTICS, "NETDATA_ALLMETRICS_SHELL_USED : [%s]", analytics_data.netdata_allmetrics_shell_used); + netdata_log_debug(D_ANALYTICS, "NETDATA_ALLMETRICS_JSON_USED : [%s]", analytics_data.netdata_allmetrics_json_used); + netdata_log_debug(D_ANALYTICS, "NETDATA_DASHBOARD_USED : [%s]", analytics_data.netdata_dashboard_used); + netdata_log_debug(D_ANALYTICS, "NETDATA_COLLECTORS : [%s]", analytics_data.netdata_collectors); + netdata_log_debug(D_ANALYTICS, "NETDATA_COLLECTORS_COUNT : [%s]", analytics_data.netdata_collectors_count); + netdata_log_debug(D_ANALYTICS, "NETDATA_BUILDINFO : [%s]", analytics_data.netdata_buildinfo); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_PAGE_CACHE_SIZE : [%s]", analytics_data.netdata_config_page_cache_size); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_MULTIDB_DISK_QUOTA : [%s]", analytics_data.netdata_config_multidb_disk_quota); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_HTTPS_ENABLED : [%s]", analytics_data.netdata_config_https_enabled); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_WEB_ENABLED : [%s]", analytics_data.netdata_config_web_enabled); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_RELEASE_CHANNEL : [%s]", analytics_data.netdata_config_release_channel); + netdata_log_debug(D_ANALYTICS, "NETDATA_MIRRORED_HOST_COUNT : [%s]", analytics_data.netdata_mirrored_host_count); + netdata_log_debug(D_ANALYTICS, "NETDATA_MIRRORED_HOSTS_REACHABLE : [%s]", analytics_data.netdata_mirrored_hosts_reachable); + netdata_log_debug(D_ANALYTICS, "NETDATA_MIRRORED_HOSTS_UNREACHABLE : [%s]", analytics_data.netdata_mirrored_hosts_unreachable); + netdata_log_debug(D_ANALYTICS, "NETDATA_NOTIFICATION_METHODS : [%s]", analytics_data.netdata_notification_methods); + netdata_log_debug(D_ANALYTICS, "NETDATA_ALARMS_NORMAL : [%s]", analytics_data.netdata_alarms_normal); + netdata_log_debug(D_ANALYTICS, "NETDATA_ALARMS_WARNING : [%s]", analytics_data.netdata_alarms_warning); + netdata_log_debug(D_ANALYTICS, "NETDATA_ALARMS_CRITICAL : [%s]", analytics_data.netdata_alarms_critical); + netdata_log_debug(D_ANALYTICS, "NETDATA_CHARTS_COUNT : [%s]", analytics_data.netdata_charts_count); + netdata_log_debug(D_ANALYTICS, "NETDATA_METRICS_COUNT : [%s]", analytics_data.netdata_metrics_count); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_IS_PARENT : [%s]", analytics_data.netdata_config_is_parent); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_HOSTS_AVAILABLE : [%s]", analytics_data.netdata_config_hosts_available); + netdata_log_debug(D_ANALYTICS, "NETDATA_HOST_CLOUD_AVAILABLE : [%s]", analytics_data.netdata_host_cloud_available); + netdata_log_debug(D_ANALYTICS, "NETDATA_HOST_ACLK_AVAILABLE : [%s]", analytics_data.netdata_host_aclk_available); + netdata_log_debug(D_ANALYTICS, "NETDATA_HOST_ACLK_PROTOCOL : [%s]", analytics_data.netdata_host_aclk_protocol); + netdata_log_debug(D_ANALYTICS, "NETDATA_HOST_ACLK_IMPLEMENTATION : [%s]", analytics_data.netdata_host_aclk_implementation); + netdata_log_debug(D_ANALYTICS, "NETDATA_HOST_AGENT_CLAIMED : [%s]", analytics_data.netdata_host_agent_claimed); + netdata_log_debug(D_ANALYTICS, "NETDATA_HOST_CLOUD_ENABLED : [%s]", analytics_data.netdata_host_cloud_enabled); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_HTTPS_AVAILABLE : [%s]", analytics_data.netdata_config_https_available); + netdata_log_debug(D_ANALYTICS, "NETDATA_INSTALL_TYPE : [%s]", analytics_data.netdata_install_type); + netdata_log_debug(D_ANALYTICS, "NETDATA_PREBUILT_DISTRO : [%s]", analytics_data.netdata_prebuilt_distro); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_IS_PRIVATE_REGISTRY : [%s]", analytics_data.netdata_config_is_private_registry); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_USE_PRIVATE_REGISTRY: [%s]", analytics_data.netdata_config_use_private_registry); + netdata_log_debug(D_ANALYTICS, "NETDATA_CONFIG_OOM_SCORE : [%s]", analytics_data.netdata_config_oom_score); +} + +/* + * Free data + */ +void analytics_free_data(void) +{ + freez(analytics_data.netdata_config_stream_enabled); + freez(analytics_data.netdata_config_memory_mode); + freez(analytics_data.netdata_config_exporting_enabled); + freez(analytics_data.netdata_exporting_connectors); + freez(analytics_data.netdata_allmetrics_prometheus_used); + freez(analytics_data.netdata_allmetrics_shell_used); + freez(analytics_data.netdata_allmetrics_json_used); + freez(analytics_data.netdata_dashboard_used); + freez(analytics_data.netdata_collectors); + freez(analytics_data.netdata_collectors_count); + freez(analytics_data.netdata_buildinfo); + freez(analytics_data.netdata_config_page_cache_size); + freez(analytics_data.netdata_config_multidb_disk_quota); + freez(analytics_data.netdata_config_https_enabled); + freez(analytics_data.netdata_config_web_enabled); + freez(analytics_data.netdata_config_release_channel); + freez(analytics_data.netdata_mirrored_host_count); + freez(analytics_data.netdata_mirrored_hosts_reachable); + freez(analytics_data.netdata_mirrored_hosts_unreachable); + freez(analytics_data.netdata_notification_methods); + freez(analytics_data.netdata_alarms_normal); + freez(analytics_data.netdata_alarms_warning); + freez(analytics_data.netdata_alarms_critical); + freez(analytics_data.netdata_charts_count); + freez(analytics_data.netdata_metrics_count); + freez(analytics_data.netdata_config_is_parent); + freez(analytics_data.netdata_config_hosts_available); + freez(analytics_data.netdata_host_cloud_available); + freez(analytics_data.netdata_host_aclk_available); + freez(analytics_data.netdata_host_aclk_protocol); + freez(analytics_data.netdata_host_aclk_implementation); + freez(analytics_data.netdata_host_agent_claimed); + freez(analytics_data.netdata_host_cloud_enabled); + freez(analytics_data.netdata_config_https_available); + freez(analytics_data.netdata_install_type); + freez(analytics_data.netdata_config_is_private_registry); + freez(analytics_data.netdata_config_use_private_registry); + freez(analytics_data.netdata_config_oom_score); + freez(analytics_data.netdata_prebuilt_distro); + freez(analytics_data.netdata_fail_reason); +} + +/* + * Set a numeric/boolean data with a value + */ +void analytics_set_data(char **name, char *value) +{ + spinlock_lock(&analytics_data.spinlock); + if (*name) { + analytics_data.data_length -= strlen(*name); + freez(*name); + } + *name = strdupz(value); + analytics_data.data_length += strlen(*name); + spinlock_unlock(&analytics_data.spinlock); +} + +/* + * Set a string data with a value + */ +void analytics_set_data_str(char **name, const char *value) +{ + size_t value_string_len; + spinlock_lock(&analytics_data.spinlock); + if (*name) { + analytics_data.data_length -= strlen(*name); + freez(*name); + } + value_string_len = strlen(value) + 4; + *name = mallocz(sizeof(char) * value_string_len); + snprintfz(*name, value_string_len - 1, "\"%s\"", value); + analytics_data.data_length += strlen(*name); + spinlock_unlock(&analytics_data.spinlock); +} + +/* + * Log hits on the allmetrics page, with prometheus parameter + */ +void analytics_log_prometheus(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.prometheus_hits < ANALYTICS_MAX_PROMETHEUS_HITS)) { + analytics_data.prometheus_hits++; + char b[21]; + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.prometheus_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b); + } +} + +/* + * Log hits on the allmetrics page, with shell parameter (or default) + */ +void analytics_log_shell(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.shell_hits < ANALYTICS_MAX_SHELL_HITS)) { + analytics_data.shell_hits++; + char b[21]; + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.shell_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b); + } +} + +/* + * Log hits on the allmetrics page, with json parameter + */ +void analytics_log_json(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.json_hits < ANALYTICS_MAX_JSON_HITS)) { + analytics_data.json_hits++; + char b[21]; + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.json_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b); + } +} + +/* + * Log hits on the dashboard, (when calling HELLO). + */ +void analytics_log_dashboard(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.dashboard_hits < ANALYTICS_MAX_DASHBOARD_HITS)) { + analytics_data.dashboard_hits++; + char b[21]; + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.dashboard_hits); + analytics_set_data(&analytics_data.netdata_dashboard_used, b); + } +} + +/* + * Called when setting the oom score + */ +void analytics_report_oom_score(long long int score){ + char b[21]; + snprintfz(b, sizeof(b) - 1, "%lld", score); + analytics_set_data(&analytics_data.netdata_config_oom_score, b); +} + +void analytics_mirrored_hosts(void) +{ + RRDHOST *host; + size_t count = 0; + size_t reachable = 0; + size_t unreachable = 0; + char b[21]; + + rrd_rdlock(); + rrdhost_foreach_read(host) + { + if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) + continue; + + ((host == localhost || !rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)) ? reachable++ : unreachable++); + + count++; + } + rrd_rdunlock(); + + snprintfz(b, sizeof(b) - 1, "%zu", count); + analytics_set_data(&analytics_data.netdata_mirrored_host_count, b); + snprintfz(b, sizeof(b) - 1, "%zu", reachable); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_reachable, b); + snprintfz(b, sizeof(b) - 1, "%zu", unreachable); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_unreachable, b); +} + +void analytics_exporters(void) +{ + //when no exporters are available, an empty string will be sent + //decide if something else is more suitable (but probably not null) + BUFFER *bi = buffer_create(1000, NULL); + analytics_exporting_connectors(bi); + analytics_set_data_str(&analytics_data.netdata_exporting_connectors, (char *)buffer_tostring(bi)); + buffer_free(bi); +} + +int collector_counter_callb(const DICTIONARY_ITEM *item __maybe_unused, void *entry, void *data) { + + struct array_printer *ap = (struct array_printer *)data; + struct collector *col = (struct collector *)entry; + + BUFFER *bt = ap->both; + + if (likely(ap->c)) { + buffer_strcat(bt, ","); + } + + buffer_strcat(bt, "{"); + buffer_strcat(bt, " \"plugin\": \""); + buffer_strcat(bt, col->plugin); + buffer_strcat(bt, "\", \"module\":\""); + buffer_strcat(bt, col->module); + buffer_strcat(bt, "\" }"); + + (ap->c)++; + + return 0; +} + +/* + * Create a JSON array of available collectors, same as in api/v1/info + */ +void analytics_collectors(void) +{ + RRDSET *st; + DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + char name[500]; + BUFFER *bt = buffer_create(1000, NULL); + + rrdset_foreach_read(st, localhost) { + if(!rrdset_is_available_for_viewers(st)) + continue; + + struct collector col = { + .plugin = rrdset_plugin_name(st), + .module = rrdset_module_name(st) + }; + snprintfz(name, sizeof(name) - 1, "%s:%s", col.plugin, col.module); + dictionary_set(dict, name, &col, sizeof(struct collector)); + } + rrdset_foreach_done(st); + + struct array_printer ap; + ap.c = 0; + ap.both = bt; + + dictionary_walkthrough_read(dict, collector_counter_callb, &ap); + dictionary_destroy(dict); + + analytics_set_data(&analytics_data.netdata_collectors, (char *)buffer_tostring(ap.both)); + + { + char b[21]; + snprintfz(b, sizeof(b) - 1, "%d", ap.c); + analytics_set_data(&analytics_data.netdata_collectors_count, b); + } + + buffer_free(bt); +} + +/* + * Run alarm-notify.sh script using the dump_methods parameter + * SEND_CUSTOM is always available + */ +void analytics_alarms_notifications(void) +{ + char *script; + script = mallocz( + sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("alarm-notify.sh dump_methods") + 2)); + sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "alarm-notify.sh"); + if (unlikely(access(script, R_OK) != 0)) { + netdata_log_info("Alarm notify script %s not found.", script); + freez(script); + return; + } + + strcat(script, " dump_methods"); + + pid_t command_pid; + + netdata_log_debug(D_ANALYTICS, "Executing %s", script); + + BUFFER *b = buffer_create(1000, NULL); + int cnt = 0; + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); + if (fp_child_output) { + char line[200 + 1]; + + while (fgets(line, 200, fp_child_output) != NULL) { + char *end = line; + while (*end && *end != '\n') + end++; + *end = '\0'; + + if (likely(cnt)) + buffer_strcat(b, "|"); + + buffer_strcat(b, line); + + cnt++; + } + netdata_pclose(fp_child_input, fp_child_output, command_pid); + } + freez(script); + + analytics_set_data_str(&analytics_data.netdata_notification_methods, (char *)buffer_tostring(b)); + + buffer_free(b); +} + +static void analytics_get_install_type(struct rrdhost_system_info *system_info) +{ + if (system_info->install_type == NULL) { + analytics_set_data_str(&analytics_data.netdata_install_type, "unknown"); + } else { + analytics_set_data_str(&analytics_data.netdata_install_type, system_info->install_type); + } + + if (system_info->prebuilt_dist != NULL) { + analytics_set_data_str(&analytics_data.netdata_prebuilt_distro, system_info->prebuilt_dist); + } +} + +/* + * Pick up if https is actually used + */ +void analytics_https(void) +{ + BUFFER *b = buffer_create(30, NULL); +#ifdef ENABLE_HTTPS + analytics_exporting_connectors_ssl(b); + + buffer_strcat(b, netdata_ssl_streaming_sender_ctx && + rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED) && + SSL_connection(&localhost->sender->ssl) ? "streaming|" : "|"); + + buffer_strcat(b, netdata_ssl_web_server_ctx ? "web" : ""); +#else + buffer_strcat(b, "||"); +#endif + + analytics_set_data_str(&analytics_data.netdata_config_https_available, (char *)buffer_tostring(b)); + buffer_free(b); +} + +void analytics_charts(void) +{ + RRDSET *st; + size_t c = 0; + + rrdset_foreach_read(st, localhost) + if(rrdset_is_available_for_viewers(st)) c++; + rrdset_foreach_done(st); + + analytics_data.charts_count = c; + { + char b[21]; + snprintfz(b, sizeof(b) - 1, "%zu", c); + analytics_set_data(&analytics_data.netdata_charts_count, b); + } +} + +void analytics_metrics(void) +{ + RRDSET *st; + size_t dimensions = 0; + rrdset_foreach_read(st, localhost) { + if (rrdset_is_available_for_viewers(st)) { + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if (rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN) || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) + continue; + dimensions++; + } + rrddim_foreach_done(rd); + } + } + rrdset_foreach_done(st); + + analytics_data.metrics_count = dimensions; + { + char b[21]; + snprintfz(b, sizeof(b) - 1, "%zu", dimensions); + analytics_set_data(&analytics_data.netdata_metrics_count, b); + } +} + +void analytics_alarms(void) +{ + size_t alarm_warn = 0, alarm_crit = 0, alarm_normal = 0; + char b[21]; + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_read(localhost, rc) { + if (unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) + continue; + + switch (rc->status) { + case RRDCALC_STATUS_WARNING: + alarm_warn++; + break; + case RRDCALC_STATUS_CRITICAL: + alarm_crit++; + break; + default: + alarm_normal++; + } + } + foreach_rrdcalc_in_rrdhost_done(rc); + + snprintfz(b, sizeof(b) - 1, "%zu", alarm_normal); + analytics_set_data(&analytics_data.netdata_alarms_normal, b); + snprintfz(b, sizeof(b) - 1, "%zu", alarm_warn); + analytics_set_data(&analytics_data.netdata_alarms_warning, b); + snprintfz(b, sizeof(b) - 1, "%zu", alarm_crit); + analytics_set_data(&analytics_data.netdata_alarms_critical, b); +} + +/* + * Misc attributes to get (run from start) + */ +void analytics_misc(void) +{ + spinlock_init(&analytics_data.spinlock); + +#ifdef ENABLE_ACLK + analytics_set_data(&analytics_data.netdata_host_cloud_available, "true"); + analytics_set_data_str(&analytics_data.netdata_host_aclk_implementation, "Next Generation"); +#else + analytics_set_data(&analytics_data.netdata_host_cloud_available, "false"); + analytics_set_data_str(&analytics_data.netdata_host_aclk_implementation, ""); +#endif + + analytics_data.exporting_enabled = appconfig_get_boolean(&exporting_config, CONFIG_SECTION_EXPORTING, "enabled", CONFIG_BOOLEAN_NO); + analytics_set_data(&analytics_data.netdata_config_exporting_enabled, analytics_data.exporting_enabled ? "true" : "false"); + + analytics_set_data(&analytics_data.netdata_config_is_private_registry, "false"); + analytics_set_data(&analytics_data.netdata_config_use_private_registry, "false"); + + if (strcmp( + config_get(CONFIG_SECTION_REGISTRY, "registry to announce", "https://registry.my-netdata.io"), + "https://registry.my-netdata.io") != 0) + analytics_set_data(&analytics_data.netdata_config_use_private_registry, "true"); + + //do we need both registry to announce and enabled to indicate that this is a private registry ? + if (config_get_boolean(CONFIG_SECTION_REGISTRY, "enabled", CONFIG_BOOLEAN_NO) && + web_server_mode != WEB_SERVER_MODE_NONE) + analytics_set_data(&analytics_data.netdata_config_is_private_registry, "true"); +} + +void analytics_aclk(void) +{ +#ifdef ENABLE_ACLK + if (aclk_connected) { + analytics_set_data(&analytics_data.netdata_host_aclk_available, "true"); + analytics_set_data_str(&analytics_data.netdata_host_aclk_protocol, "New"); + } + else +#endif + analytics_set_data(&analytics_data.netdata_host_aclk_available, "false"); +} + +/* + * Get the meta data, called from the thread once after the original delay + * These are values that won't change during agent runtime, and therefore + * don't try to read them on each META event send + */ +void analytics_gather_immutable_meta_data(void) +{ + analytics_misc(); + analytics_exporters(); + analytics_https(); +} + +/* + * Get the meta data, called from the thread on every heartbeat, and right before the EXIT event + * These are values that can change between agent restarts, and therefore + * try to read them on each META event send + */ +void analytics_gather_mutable_meta_data(void) +{ + analytics_collectors(); + analytics_alarms(); + analytics_charts(); + analytics_metrics(); + analytics_aclk(); + analytics_mirrored_hosts(); + analytics_alarms_notifications(); + + analytics_set_data( + &analytics_data.netdata_config_is_parent, (rrdhost_hosts_available() > 1 || configured_as_parent()) ? "true" : "false"); + + char *claim_id = get_agent_claimid(); + analytics_set_data(&analytics_data.netdata_host_agent_claimed, claim_id ? "true" : "false"); + freez(claim_id); + + { + char b[21]; + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.prometheus_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b); + + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.shell_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b); + + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.json_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b); + + snprintfz(b, sizeof(b) - 1, "%zu", analytics_data.dashboard_hits); + analytics_set_data(&analytics_data.netdata_dashboard_used, b); + + snprintfz(b, sizeof(b) - 1, "%zu", rrdhost_hosts_available()); + analytics_set_data(&analytics_data.netdata_config_hosts_available, b); + } +} + +void analytics_main_cleanup(void *pptr) +{ + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + netdata_log_debug(D_ANALYTICS, "Cleaning up..."); + analytics_free_data(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +/* + * The analytics thread. Sleep for ANALYTICS_INIT_SLEEP_SEC, + * gather the data, and then go to a loop where every ANALYTICS_HEARTBEAT + * it will send a new META event after gathering data that could be changed + * while the agent is running + */ +void *analytics_main(void *ptr) +{ + CLEANUP_FUNCTION_REGISTER(analytics_main_cleanup) cleanup_ptr = ptr; + unsigned int sec = 0; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step_ut = USEC_PER_SEC; + + netdata_log_debug(D_ANALYTICS, "Analytics thread starts"); + + //first delay after agent start + while (service_running(SERVICE_ANALYTICS) && likely(sec <= ANALYTICS_INIT_SLEEP_SEC)) { + heartbeat_next(&hb, step_ut); + sec++; + } + + if (unlikely(!service_running(SERVICE_ANALYTICS))) + goto cleanup; + + analytics_gather_immutable_meta_data(); + analytics_gather_mutable_meta_data(); + + analytics_statistic_t statistic = { "META_START", "-", "-" }; + analytics_statistic_send(&statistic); + analytics_log_data(); + + sec = 0; + while (1) { + heartbeat_next(&hb, step_ut * 2); + sec += 2; + + if (unlikely(!service_running(SERVICE_ANALYTICS))) + break; + + if (likely(sec < ANALYTICS_HEARTBEAT)) + continue; + + analytics_gather_mutable_meta_data(); + + analytics_statistic_t stt = { "META", "-", "-" }; + analytics_statistic_send(&stt); + analytics_log_data(); + + sec = 0; + } + +cleanup: + return NULL; +} + +static const char *verify_required_directory(const char *dir) +{ + if (chdir(dir) == -1) + fatal("Cannot change directory to '%s'", dir); + + DIR *d = opendir(dir); + if (!d) + fatal("Cannot examine the contents of directory '%s'", dir); + closedir(d); + + return dir; +} + +static const char *verify_or_create_required_directory(const char *dir) { + int result; + + result = mkdir(dir, 0755); + + if (result != 0 && errno != EEXIST) + fatal("Cannot create required directory '%s'", dir); + + return verify_required_directory(dir); +} + +/* + * This is called after the rrdinit + * These values will be sent on the START event + */ +void set_late_global_environment(struct rrdhost_system_info *system_info) +{ + analytics_set_data(&analytics_data.netdata_config_stream_enabled, default_rrdpush_enabled ? "true" : "false"); + analytics_set_data_str(&analytics_data.netdata_config_memory_mode, (char *)rrd_memory_mode_name(default_rrd_memory_mode)); + +#ifdef DISABLE_CLOUD + analytics_set_data(&analytics_data.netdata_host_cloud_enabled, "false"); +#else + analytics_set_data( + &analytics_data.netdata_host_cloud_enabled, + appconfig_get_boolean_ondemand(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", netdata_cloud_enabled) ? "true" : "false"); +#endif + +#ifdef ENABLE_DBENGINE + { + char b[16]; + snprintfz(b, sizeof(b) - 1, "%d", default_rrdeng_page_cache_mb); + analytics_set_data(&analytics_data.netdata_config_page_cache_size, b); + + snprintfz(b, sizeof(b) - 1, "%d", default_multidb_disk_quota_mb); + analytics_set_data(&analytics_data.netdata_config_multidb_disk_quota, b); + } +#endif + +#ifdef ENABLE_HTTPS + analytics_set_data(&analytics_data.netdata_config_https_enabled, "true"); +#else + analytics_set_data(&analytics_data.netdata_config_https_enabled, "false"); +#endif + + if (web_server_mode == WEB_SERVER_MODE_NONE) + analytics_set_data(&analytics_data.netdata_config_web_enabled, "false"); + else + analytics_set_data(&analytics_data.netdata_config_web_enabled, "true"); + + analytics_set_data_str(&analytics_data.netdata_config_release_channel, (char *)get_release_channel()); + + { + BUFFER *bi = buffer_create(1000, NULL); + analytics_build_info(bi); + analytics_set_data_str(&analytics_data.netdata_buildinfo, (char *)buffer_tostring(bi)); + buffer_free(bi); + } + + analytics_get_install_type(system_info); +} + +void get_system_timezone(void) +{ + // avoid flood calls to stat(/etc/localtime) + // http://stackoverflow.com/questions/4554271/how-to-avoid-excessive-stat-etc-localtime-calls-in-strftime-on-linux + const char *tz = getenv("TZ"); + if (!tz || !*tz) + setenv("TZ", config_get(CONFIG_SECTION_ENV_VARS, "TZ", ":/etc/localtime"), 0); + + char buffer[FILENAME_MAX + 1] = ""; + const char *timezone = NULL; + ssize_t ret; + + // use the TZ variable + if (tz && *tz && *tz != ':') { + timezone = tz; + netdata_log_info("TIMEZONE: using TZ variable '%s'", timezone); + } + + // use the contents of /etc/timezone + if (!timezone && !read_txt_file("/etc/timezone", buffer, sizeof(buffer))) { + timezone = buffer; + netdata_log_info("TIMEZONE: using the contents of /etc/timezone"); + } + + // read the link /etc/localtime + if (!timezone) { + ret = readlink("/etc/localtime", buffer, FILENAME_MAX); + + if (ret > 0) { + buffer[ret] = '\0'; + + char *cmp = "/usr/share/zoneinfo/"; + size_t cmp_len = strlen(cmp); + + char *s = strstr(buffer, cmp); + if (s && s[cmp_len]) { + timezone = &s[cmp_len]; + netdata_log_info("TIMEZONE: using the link of /etc/localtime: '%s'", timezone); + } + } else + buffer[0] = '\0'; + } + + // find the timezone from strftime() + if (!timezone) { + time_t t; + struct tm *tmp, tmbuf; + + t = now_realtime_sec(); + tmp = localtime_r(&t, &tmbuf); + + if (tmp != NULL) { + if (strftime(buffer, FILENAME_MAX, "%Z", tmp) == 0) + buffer[0] = '\0'; + else { + buffer[FILENAME_MAX] = '\0'; + timezone = buffer; + netdata_log_info("TIMEZONE: using strftime(): '%s'", timezone); + } + } + } + + if (timezone && *timezone) { + // make sure it does not have illegal characters + // netdata_log_info("TIMEZONE: fixing '%s'", timezone); + + size_t len = strlen(timezone); + char tmp[len + 1]; + char *d = tmp; + *d = '\0'; + + while (*timezone) { + if (isalnum((uint8_t)*timezone) || *timezone == '_' || *timezone == '/') + *d++ = *timezone++; + else + timezone++; + } + *d = '\0'; + strncpyz(buffer, tmp, len); + timezone = buffer; + netdata_log_info("TIMEZONE: fixed as '%s'", timezone); + } + + if (!timezone || !*timezone) + timezone = "unknown"; + + netdata_configured_timezone = config_get(CONFIG_SECTION_GLOBAL, "timezone", timezone); + + //get the utc offset, and the timezone as returned by strftime + //will be sent to the cloud + //Note: This will need an agent restart to get new offset on time change (dst, etc). + { + time_t t; + struct tm *tmp, tmbuf; + char zone[FILENAME_MAX + 1]; + char sign[2], hh[3], mm[3]; + + t = now_realtime_sec(); + tmp = localtime_r(&t, &tmbuf); + + if (tmp != NULL) { + if (strftime(zone, FILENAME_MAX, "%Z", tmp) == 0) { + netdata_configured_abbrev_timezone = strdupz("UTC"); + } else + netdata_configured_abbrev_timezone = strdupz(zone); + + if (strftime(zone, FILENAME_MAX, "%z", tmp) == 0) { + netdata_configured_utc_offset = 0; + } else { + sign[0] = zone[0] == '-' || zone[0] == '+' ? zone[0] : '0'; + sign[1] = '\0'; + hh[0] = isdigit((uint8_t)zone[1]) ? zone[1] : '0'; + hh[1] = isdigit((uint8_t)zone[2]) ? zone[2] : '0'; + hh[2] = '\0'; + mm[0] = isdigit((uint8_t)zone[3]) ? zone[3] : '0'; + mm[1] = isdigit((uint8_t)zone[4]) ? zone[4] : '0'; + mm[2] = '\0'; + + netdata_configured_utc_offset = (str2i(hh) * 3600) + (str2i(mm) * 60); + netdata_configured_utc_offset = + sign[0] == '-' ? -netdata_configured_utc_offset : netdata_configured_utc_offset; + } + } else { + netdata_configured_abbrev_timezone = strdupz("UTC"); + netdata_configured_utc_offset = 0; + } + } +} + +void set_global_environment() { + { + char b[16]; + snprintfz(b, sizeof(b) - 1, "%d", default_rrd_update_every); + setenv("NETDATA_UPDATE_EVERY", b, 1); + } + + setenv("NETDATA_VERSION", NETDATA_VERSION, 1); + setenv("NETDATA_HOSTNAME", netdata_configured_hostname, 1); + setenv("NETDATA_CONFIG_DIR", verify_required_directory(netdata_configured_user_config_dir), 1); + setenv("NETDATA_USER_CONFIG_DIR", verify_required_directory(netdata_configured_user_config_dir), 1); + setenv("NETDATA_STOCK_CONFIG_DIR", verify_required_directory(netdata_configured_stock_config_dir), 1); + setenv("NETDATA_PLUGINS_DIR", verify_required_directory(netdata_configured_primary_plugins_dir), 1); + setenv("NETDATA_WEB_DIR", verify_required_directory(netdata_configured_web_dir), 1); + setenv("NETDATA_CACHE_DIR", verify_or_create_required_directory(netdata_configured_cache_dir), 1); + setenv("NETDATA_LIB_DIR", verify_or_create_required_directory(netdata_configured_varlib_dir), 1); + setenv("NETDATA_LOCK_DIR", verify_or_create_required_directory(netdata_configured_lock_dir), 1); + setenv("NETDATA_LOG_DIR", verify_or_create_required_directory(netdata_configured_log_dir), 1); + setenv("NETDATA_HOST_PREFIX", netdata_configured_host_prefix, 1); + + { + BUFFER *user_plugins_dirs = buffer_create(FILENAME_MAX, NULL); + + for (size_t i = 1; i < PLUGINSD_MAX_DIRECTORIES && plugin_directories[i]; i++) { + if (i > 1) + buffer_strcat(user_plugins_dirs, " "); + buffer_strcat(user_plugins_dirs, plugin_directories[i]); + } + + setenv("NETDATA_USER_PLUGINS_DIRS", buffer_tostring(user_plugins_dirs), 1); + + buffer_free(user_plugins_dirs); + } + + analytics_data.data_length = 0; + analytics_set_data(&analytics_data.netdata_config_stream_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_memory_mode, "null"); + analytics_set_data(&analytics_data.netdata_config_exporting_enabled, "null"); + analytics_set_data(&analytics_data.netdata_exporting_connectors, "null"); + analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, "null"); + analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, "null"); + analytics_set_data(&analytics_data.netdata_allmetrics_json_used, "null"); + analytics_set_data(&analytics_data.netdata_dashboard_used, "null"); + analytics_set_data(&analytics_data.netdata_collectors, "null"); + analytics_set_data(&analytics_data.netdata_collectors_count, "null"); + analytics_set_data(&analytics_data.netdata_buildinfo, "null"); + analytics_set_data(&analytics_data.netdata_config_page_cache_size, "null"); + analytics_set_data(&analytics_data.netdata_config_multidb_disk_quota, "null"); + analytics_set_data(&analytics_data.netdata_config_https_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_web_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_release_channel, "null"); + analytics_set_data(&analytics_data.netdata_mirrored_host_count, "null"); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_reachable, "null"); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_unreachable, "null"); + analytics_set_data(&analytics_data.netdata_notification_methods, "null"); + analytics_set_data(&analytics_data.netdata_alarms_normal, "null"); + analytics_set_data(&analytics_data.netdata_alarms_warning, "null"); + analytics_set_data(&analytics_data.netdata_alarms_critical, "null"); + analytics_set_data(&analytics_data.netdata_charts_count, "null"); + analytics_set_data(&analytics_data.netdata_metrics_count, "null"); + analytics_set_data(&analytics_data.netdata_config_is_parent, "null"); + analytics_set_data(&analytics_data.netdata_config_hosts_available, "null"); + analytics_set_data(&analytics_data.netdata_host_cloud_available, "null"); + analytics_set_data(&analytics_data.netdata_host_aclk_implementation, "null"); + analytics_set_data(&analytics_data.netdata_host_aclk_available, "null"); + analytics_set_data(&analytics_data.netdata_host_aclk_protocol, "null"); + analytics_set_data(&analytics_data.netdata_host_agent_claimed, "null"); + analytics_set_data(&analytics_data.netdata_host_cloud_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_https_available, "null"); + analytics_set_data(&analytics_data.netdata_install_type, "null"); + analytics_set_data(&analytics_data.netdata_config_is_private_registry, "null"); + analytics_set_data(&analytics_data.netdata_config_use_private_registry, "null"); + analytics_set_data(&analytics_data.netdata_config_oom_score, "null"); + analytics_set_data(&analytics_data.netdata_prebuilt_distro, "null"); + analytics_set_data(&analytics_data.netdata_fail_reason, "null"); + + analytics_data.prometheus_hits = 0; + analytics_data.shell_hits = 0; + analytics_data.json_hits = 0; + analytics_data.dashboard_hits = 0; + analytics_data.charts_count = 0; + analytics_data.metrics_count = 0; + analytics_data.exporting_enabled = false; + + char *default_port = appconfig_get(&netdata_config, CONFIG_SECTION_WEB, "default port", NULL); + int clean = 0; + if (!default_port) { + default_port = strdupz("19999"); + clean = 1; + } + + setenv("NETDATA_LISTEN_PORT", default_port, 1); + if (clean) + freez(default_port); + + // set the path we need + char path[4096], *p = getenv("PATH"); + if (!p) p = "/bin:/usr/bin"; + snprintfz(path, sizeof(path), "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"); + setenv("PATH", config_get(CONFIG_SECTION_ENV_VARS, "PATH", path), 1); + + // python options + p = getenv("PYTHONPATH"); + if (!p) p = ""; + setenv("PYTHONPATH", config_get(CONFIG_SECTION_ENV_VARS, "PYTHONPATH", p), 1); + + // disable buffering for python plugins + setenv("PYTHONUNBUFFERED", "1", 1); + + // switch to standard locale for plugins + setenv("LC_ALL", "C", 1); +} + +void analytics_statistic_send(const analytics_statistic_t *statistic) { + if (!statistic) + return; + + static char *as_script; + + if (netdata_anonymous_statistics_enabled == -1) { + char *optout_file = mallocz( + sizeof(char) * + (strlen(netdata_configured_user_config_dir) + strlen(".opt-out-from-anonymous-statistics") + 2)); + + sprintf(optout_file, "%s/%s", netdata_configured_user_config_dir, ".opt-out-from-anonymous-statistics"); + + if (likely(access(optout_file, R_OK) != 0)) { + as_script = mallocz( + sizeof(char) * + (strlen(netdata_configured_primary_plugins_dir) + strlen("anonymous-statistics.sh") + 2)); + + sprintf(as_script, "%s/%s", netdata_configured_primary_plugins_dir, "anonymous-statistics.sh"); + + if (unlikely(access(as_script, R_OK) != 0)) { + netdata_anonymous_statistics_enabled = 0; + + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "Statistics script '%s' not found.", + as_script); + + freez(as_script); + } + else + netdata_anonymous_statistics_enabled = 1; + } + else { + netdata_anonymous_statistics_enabled = 0; + as_script = NULL; + } + + freez(optout_file); + } + + if (!netdata_anonymous_statistics_enabled || !statistic->action) + return; + + const char *action_result = statistic->result; + const char *action_data = statistic->data; + + if (!statistic->result) + action_result = ""; + if (!statistic->data) + action_data = ""; + + char *command_to_run = mallocz( + sizeof(char) * (strlen(statistic->action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + + analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15)); + pid_t command_pid; + + sprintf( + command_to_run, + "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", + as_script, + statistic->action, + action_result, + action_data, + analytics_data.netdata_config_stream_enabled, + analytics_data.netdata_config_memory_mode, + analytics_data.netdata_config_exporting_enabled, + analytics_data.netdata_exporting_connectors, + analytics_data.netdata_allmetrics_prometheus_used, + analytics_data.netdata_allmetrics_shell_used, + analytics_data.netdata_allmetrics_json_used, + analytics_data.netdata_dashboard_used, + analytics_data.netdata_collectors, + analytics_data.netdata_collectors_count, + analytics_data.netdata_buildinfo, + analytics_data.netdata_config_page_cache_size, + analytics_data.netdata_config_multidb_disk_quota, + analytics_data.netdata_config_https_enabled, + analytics_data.netdata_config_web_enabled, + analytics_data.netdata_config_release_channel, + analytics_data.netdata_mirrored_host_count, + analytics_data.netdata_mirrored_hosts_reachable, + analytics_data.netdata_mirrored_hosts_unreachable, + analytics_data.netdata_notification_methods, + analytics_data.netdata_alarms_normal, + analytics_data.netdata_alarms_warning, + analytics_data.netdata_alarms_critical, + analytics_data.netdata_charts_count, + analytics_data.netdata_metrics_count, + analytics_data.netdata_config_is_parent, + analytics_data.netdata_config_hosts_available, + analytics_data.netdata_host_cloud_available, + analytics_data.netdata_host_aclk_available, + analytics_data.netdata_host_aclk_protocol, + analytics_data.netdata_host_aclk_implementation, + analytics_data.netdata_host_agent_claimed, + analytics_data.netdata_host_cloud_enabled, + analytics_data.netdata_config_https_available, + analytics_data.netdata_install_type, + analytics_data.netdata_config_is_private_registry, + analytics_data.netdata_config_use_private_registry, + analytics_data.netdata_config_oom_score, + analytics_data.netdata_prebuilt_distro, + analytics_data.netdata_fail_reason); + + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "%s '%s' '%s' '%s'", + as_script, statistic->action, action_result, action_data); + + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input); + if (fp_child_output) { + char buffer[4 + 1]; + char *s = fgets(buffer, 4, fp_child_output); + int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid); + if (exit_code) + + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "Statistics script returned error: %d", + exit_code); + + if (s && strncmp(buffer, "200", 3) != 0) + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "Statistics script returned http code: %s", + buffer); + + } + else + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "Failed to run statistics script: %s.", + as_script); + + freez(command_to_run); +} diff --git a/src/daemon/analytics.h b/src/daemon/analytics.h new file mode 100644 index 000000000..501eb7b55 --- /dev/null +++ b/src/daemon/analytics.h @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_ANALYTICS_H +#define NETDATA_ANALYTICS_H 1 + +#include "daemon/common.h" + +/* Max number of seconds before the first META analytics is sent */ +#define ANALYTICS_INIT_SLEEP_SEC 120 + +/* Send a META event every X seconds */ +#define ANALYTICS_HEARTBEAT (6 * 3600) + +/* Maximum number of hits to log */ +#define ANALYTICS_MAX_PROMETHEUS_HITS 255 +#define ANALYTICS_MAX_SHELL_HITS 255 +#define ANALYTICS_MAX_JSON_HITS 255 +#define ANALYTICS_MAX_DASHBOARD_HITS 255 + +/* Needed to calculate the space needed for parameters */ +#define ANALYTICS_NO_OF_ITEMS 40 + +struct analytics_data { + char *netdata_config_stream_enabled; + char *netdata_config_memory_mode; + char *netdata_exporting_connectors; + char *netdata_config_exporting_enabled; + char *netdata_allmetrics_prometheus_used; + char *netdata_allmetrics_shell_used; + char *netdata_allmetrics_json_used; + char *netdata_dashboard_used; + char *netdata_collectors; + char *netdata_collectors_count; + char *netdata_buildinfo; + char *netdata_config_page_cache_size; + char *netdata_config_multidb_disk_quota; + char *netdata_config_https_enabled; + char *netdata_config_web_enabled; + char *netdata_config_release_channel; + char *netdata_mirrored_host_count; + char *netdata_mirrored_hosts_reachable; + char *netdata_mirrored_hosts_unreachable; + char *netdata_notification_methods; + char *netdata_alarms_normal; + char *netdata_alarms_warning; + char *netdata_alarms_critical; + char *netdata_charts_count; + char *netdata_metrics_count; + char *netdata_config_is_parent; + char *netdata_config_hosts_available; + char *netdata_host_cloud_available; + char *netdata_host_aclk_available; + char *netdata_host_aclk_protocol; + char *netdata_host_aclk_implementation; + char *netdata_host_agent_claimed; + char *netdata_host_cloud_enabled; + char *netdata_config_https_available; + char *netdata_install_type; + char *netdata_config_is_private_registry; + char *netdata_config_use_private_registry; + char *netdata_config_oom_score; + char *netdata_prebuilt_distro; + char *netdata_fail_reason; + + size_t data_length; + + size_t prometheus_hits; + size_t shell_hits; + size_t json_hits; + size_t dashboard_hits; + + size_t charts_count; + size_t metrics_count; + SPINLOCK spinlock; + + bool exporting_enabled; +}; + +void set_late_global_environment(struct rrdhost_system_info *system_info); +void analytics_free_data(void); +void set_global_environment(void); +void analytics_log_shell(void); +void analytics_log_json(void); +void analytics_log_prometheus(void); +void analytics_log_dashboard(void); +void analytics_gather_mutable_meta_data(void); +void analytics_report_oom_score(long long int score); +void get_system_timezone(void); + +typedef struct { + const char *action; + const char *result; + const char *data; +} analytics_statistic_t; + +void analytics_statistic_send(const analytics_statistic_t *statistic); + +extern struct analytics_data analytics_data; + +#endif //NETDATA_ANALYTICS_H diff --git a/src/daemon/anonymous-statistics.sh.in b/src/daemon/anonymous-statistics.sh.in new file mode 100755 index 000000000..d12e7e32a --- /dev/null +++ b/src/daemon/anonymous-statistics.sh.in @@ -0,0 +1,197 @@ +#!/usr/bin/env sh + +# Valid actions: + +# - FATAL - netdata exited due to a fatal condition +# ACTION_RESULT -- program name and thread tag +# ACTION_DATA -- fmt, args passed to fatal +# - START - netdata started +# ACTION_DATA -- nan +# - EXIT - installation action +# ACTION_DATA -- ret value of + +ACTION="${1}" +ACTION_RESULT="${2}" +ACTION_DATA="${3}" +ACTION_DATA=$(echo "${ACTION_DATA}" | tr '"' "'") + +# ------------------------------------------------------------------------------------------------- +# check opt-out + +if [ -f "@configdir_POST@/.opt-out-from-anonymous-statistics" ] || + [ ! "${DISABLE_TELEMETRY:-0}" -eq 0 ] || + [ -n "$DISABLE_TELEMETRY" ] || + [ ! "${DO_NOT_TRACK:-0}" -eq 0 ] || + [ -n "$DO_NOT_TRACK" ]; then + exit 0 +fi + +# ------------------------------------------------------------------------------------------------- +# Get the extra variables + +NETDATA_CONFIG_STREAM_ENABLED="${4}" +NETDATA_CONFIG_MEMORY_MODE="${5}" +NETDATA_CONFIG_EXPORTING_ENABLED="${6}" +NETDATA_EXPORTING_CONNECTORS="${7}" +NETDATA_ALLMETRICS_PROMETHEUS_USED="${8}" +NETDATA_ALLMETRICS_SHELL_USED="${9}" +NETDATA_ALLMETRICS_JSON_USED="${10}" +NETDATA_DASHBOARD_USED="${11}" +NETDATA_COLLECTORS="${12}" +NETDATA_COLLECTORS_COUNT="${13}" +NETDATA_BUILDINFO="${14}" +NETDATA_CONFIG_PAGE_CACHE_SIZE="${15}" +NETDATA_CONFIG_MULTIDB_DISK_QUOTA="${16}" +NETDATA_CONFIG_HTTPS_ENABLED="${17}" +NETDATA_CONFIG_WEB_ENABLED="${18}" +NETDATA_CONFIG_RELEASE_CHANNEL="${19}" +NETDATA_MIRRORED_HOST_COUNT="${20}" +NETDATA_MIRRORED_HOSTS_REACHABLE="${21}" +NETDATA_MIRRORED_HOSTS_UNREACHABLE="${22}" +NETDATA_NOTIFICATION_METHODS="${23}" +NETDATA_ALARMS_NORMAL="${24}" +NETDATA_ALARMS_WARNING="${25}" +NETDATA_ALARMS_CRITICAL="${26}" +NETDATA_CHARTS_COUNT="${27}" +NETDATA_METRICS_COUNT="${28}" +NETDATA_CONFIG_IS_PARENT="${29}" +NETDATA_CONFIG_HOSTS_AVAILABLE="${30}" +NETDATA_HOST_CLOUD_AVAILABLE="${31}" +NETDATA_HOST_ACLK_AVAILABLE="${32}" +NETDATA_HOST_ACLK_PROTOCOL="${33}" +NETDATA_HOST_ACLK_IMPLEMENTATION="${34}" +NETDATA_HOST_AGENT_CLAIMED="${35}" +NETDATA_HOST_CLOUD_ENABLED="${36}" +NETDATA_CONFIG_HTTPS_AVAILABLE="${37}" +NETDATA_INSTALL_TYPE="${38}" +NETDATA_IS_PRIVATE_REGISTRY="${39}" +NETDATA_USE_PRIVATE_REGISTRY="${40}" +NETDATA_CONFIG_OOM_SCORE="${41}" +NETDATA_PREBUILT_DISTRO="${42}" +NETDATA_FAIL_REASON="${43}" + +[ -z "$NETDATA_REGISTRY_UNIQUE_ID" ] && NETDATA_REGISTRY_UNIQUE_ID="00000000-0000-0000-0000-000000000000" + +KERNEL_NAME="$(uname -s)" +MD5_PATH="$(exec <&- 2>&-; which md5sum || command -v md5sum || type md5sum)" + +if [ "${KERNEL_NAME}" = Darwin ] && command -v ioreg >/dev/null 2>&1; then + SYSTEM_DISTINCT_ID="macos-$(ioreg -rd1 -c IOPlatformExpertDevice | awk '/IOPlatformUUID/ { split($0, line, "\""); printf("%s\n", line[4]); }')" +elif [ -f /etc/machine-id ] && [ -n "$MD5_PATH" ]; then + SYSTEM_DISTINCT_ID="machine-$($MD5_PATH < /etc/machine-id | cut -f1 -d" ")" +elif [ -f /var/db/dbus/machine-id ] && [ -n "$MD5_PATH" ]; then + SYSTEM_DISTINCT_ID="dbus-$($MD5_PATH < /var/db/dbus/machine-id | cut -f1 -d" ")" +elif [ -f /var/lib/dbus/machine-id ] && [ -n "$MD5_PATH" ]; then + SYSTEM_DISTINCT_ID="dbus-$($MD5_PATH < /var/lib/dbus/machine-id | cut -f1 -d" ")" +elif command -v uuidgen > /dev/null 2>&1; then + SYSTEM_DISTINCT_ID="uuid-$(uuidgen | tr '[:upper:]' '[:lower:]')" +else + SYSTEM_DISTINCT_ID="null" +fi + +# define body of request to be sent +REQ_BODY="$(cat << EOF +{ + "event": "${ACTION} ${ACTION_RESULT}", + "properties": { + "distinct_id": "${NETDATA_REGISTRY_UNIQUE_ID}", + "\$current_url": "agent backend", + "\$pathname": "netdata-backend", + "\$host": "backend.netdata.io", + "\$ip": "127.0.0.1", + "event_source": "agent backend", + "action": "${ACTION}", + "action_result": "${ACTION_RESULT}", + "action_data": "${ACTION_DATA}", + "netdata_machine_guid": "${NETDATA_REGISTRY_UNIQUE_ID}", + "netdata_version": "${NETDATA_VERSION}", + "netdata_buildinfo": ${NETDATA_BUILDINFO}, + "netdata_release_channel": ${NETDATA_CONFIG_RELEASE_CHANNEL}, + "netdata_install_type": ${NETDATA_INSTALL_TYPE}, + "netdata_prebuilt_distro": ${NETDATA_PREBUILT_DISTRO}, + "host_os_name": "${NETDATA_HOST_OS_NAME}", + "host_os_id": "${NETDATA_HOST_OS_ID}", + "host_os_id_like": "${NETDATA_HOST_OS_ID_LIKE}", + "host_os_version": "${NETDATA_HOST_OS_VERSION}", + "host_os_version_id": "${NETDATA_HOST_OS_VERSION_ID}", + "host_os_detection": "${NETDATA_HOST_OS_DETECTION}", + "host_is_k8s_node": "${NETDATA_HOST_IS_K8S_NODE}", + "system_kernel_name": "${NETDATA_SYSTEM_KERNEL_NAME}", + "system_kernel_version": "${NETDATA_SYSTEM_KERNEL_VERSION}", + "system_architecture": "${NETDATA_SYSTEM_ARCHITECTURE}", + "system_virtualization": "${NETDATA_SYSTEM_VIRTUALIZATION}", + "system_virt_detection": "${NETDATA_SYSTEM_VIRT_DETECTION}", + "system_container": "${NETDATA_SYSTEM_CONTAINER}", + "system_container_detection": "${NETDATA_SYSTEM_CONTAINER_DETECTION}", + "system_distinct_id": "${SYSTEM_DISTINCT_ID}", + "container_os_name": "${NETDATA_CONTAINER_OS_NAME}", + "container_os_id": "${NETDATA_CONTAINER_OS_ID}", + "container_os_id_like": "${NETDATA_CONTAINER_OS_ID_LIKE}", + "container_os_version": "${NETDATA_CONTAINER_OS_VERSION}", + "container_os_version_id": "${NETDATA_CONTAINER_OS_VERSION_ID}", + "container_os_detection": "${NETDATA_CONTAINER_OS_DETECTION}", + "container_is_official_image": ${NETDATA_CONTAINER_IS_OFFICIAL_IMAGE}, + "system_cpu_detection": "${NETDATA_SYSTEM_CPU_DETECTION}", + "system_cpu_freq": "${NETDATA_SYSTEM_CPU_FREQ}", + "system_cpu_logical_cpu_count": "${NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT}", + "system_cpu_model": "${NETDATA_SYSTEM_CPU_MODEL}", + "system_cpu_vendor": "${NETDATA_SYSTEM_CPU_VENDOR}", + "system_disk_detection": "${NETDATA_SYSTEM_DISK_DETECTION}", + "system_ram_detection": "${NETDATA_SYSTEM_RAM_DETECTION}", + "system_total_disk_size": "${NETDATA_SYSTEM_TOTAL_DISK_SIZE}", + "system_total_ram": "${NETDATA_SYSTEM_TOTAL_RAM}", + "config_stream_enabled": ${NETDATA_CONFIG_STREAM_ENABLED}, + "config_memory_mode": ${NETDATA_CONFIG_MEMORY_MODE}, + "config_page_cache_size": ${NETDATA_CONFIG_PAGE_CACHE_SIZE}, + "config_multidb_disk_quota": ${NETDATA_CONFIG_MULTIDB_DISK_QUOTA}, + "config_https_enabled": ${NETDATA_CONFIG_HTTPS_ENABLED}, + "config_https_available": ${NETDATA_CONFIG_HTTPS_AVAILABLE}, + "config_web_enabled": ${NETDATA_CONFIG_WEB_ENABLED}, + "config_exporting_enabled": ${NETDATA_CONFIG_EXPORTING_ENABLED}, + "config_is_parent": ${NETDATA_CONFIG_IS_PARENT}, + "config_is_private_registry": ${NETDATA_IS_PRIVATE_REGISTRY}, + "config_private_registry_used": ${NETDATA_USE_PRIVATE_REGISTRY}, + "config_hosts_available": ${NETDATA_CONFIG_HOSTS_AVAILABLE}, + "config_oom_score": ${NETDATA_CONFIG_OOM_SCORE}, + "alarms_normal": ${NETDATA_ALARMS_NORMAL}, + "alarms_warning": ${NETDATA_ALARMS_WARNING}, + "alarms_critical": ${NETDATA_ALARMS_CRITICAL}, + "host_charts_count": ${NETDATA_CHARTS_COUNT}, + "host_metrics_count": ${NETDATA_METRICS_COUNT}, + "host_collectors":[ + ${NETDATA_COLLECTORS} + ], + "host_collectors_count": ${NETDATA_COLLECTORS_COUNT}, + "host_notification_methods": ${NETDATA_NOTIFICATION_METHODS}, + "host_allmetrics_prometheus_used": ${NETDATA_ALLMETRICS_PROMETHEUS_USED}, + "host_allmetrics_shell_used": ${NETDATA_ALLMETRICS_SHELL_USED}, + "host_allmetrics_json_used": ${NETDATA_ALLMETRICS_JSON_USED}, + "host_dashboard_used": ${NETDATA_DASHBOARD_USED}, + "host_cloud_available": ${NETDATA_HOST_CLOUD_AVAILABLE}, + "host_cloud_enabled": ${NETDATA_HOST_CLOUD_ENABLED}, + "host_agent_claimed": ${NETDATA_HOST_AGENT_CLAIMED}, + "host_aclk_available": ${NETDATA_HOST_ACLK_AVAILABLE}, + "host_aclk_protocol": ${NETDATA_HOST_ACLK_PROTOCOL}, + "host_aclk_implementation": ${NETDATA_HOST_ACLK_IMPLEMENTATION}, + "mirrored_host_count": ${NETDATA_MIRRORED_HOST_COUNT}, + "mirrored_hosts_reachable": ${NETDATA_MIRRORED_HOSTS_REACHABLE}, + "mirrored_hosts_unreachable": ${NETDATA_MIRRORED_HOSTS_UNREACHABLE}, + "exporting_connectors": ${NETDATA_EXPORTING_CONNECTORS}, + "netdata_fail_reason": ${NETDATA_FAIL_REASON} + } +} +EOF +)" + +# send the anonymous statistics to Netdata +if [ -n "$(command -v curl 2> /dev/null)" ]; then + curl --silent -o /dev/null --write-out '%{http_code}' -X POST --max-time 2 --header "Content-Type: application/json" -d "${REQ_BODY}" https://us-east1-netdata-analytics-bi.cloudfunctions.net/ingest_agent_events +else + wget -q -O - --no-check-certificate \ + --server-response \ + --method POST \ + --timeout=1 \ + --header 'Content-Type: application/json' \ + --body-data "${REQ_BODY}" \ + 'https://us-east1-netdata-analytics-bi.cloudfunctions.net/ingest_agent_events' 2>&1 | awk '/^ HTTP/{print $2}' +fi diff --git a/src/daemon/buildinfo.c b/src/daemon/buildinfo.c new file mode 100644 index 000000000..4ee5b43de --- /dev/null +++ b/src/daemon/buildinfo.c @@ -0,0 +1,1520 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include "./config.h" +#include "common.h" +#include "buildinfo.h" + +typedef enum __attribute__((packed)) { + BIB_PACKAGING_NETDATA_VERSION, + BIB_PACKAGING_INSTALL_TYPE, + BIB_PACKAGING_ARCHITECTURE, + BIB_PACKAGING_DISTRO, + BIB_PACKAGING_CONFIGURE_OPTIONS, + BIB_DIR_USER_CONFIG, + BIB_DIR_STOCK_CONFIG, + BIB_DIR_CACHE, + BIB_DIR_LIB, + BIB_DIR_PLUGINS, + BIB_DIR_WEB, + BIB_DIR_LOG, + BIB_DIR_LOCK, + BIB_DIR_HOME, + BIB_OS_KERNEL_NAME, + BIB_OS_KERNEL_VERSION, + BIB_OS_NAME, + BIB_OS_ID, + BIB_OS_ID_LIKE, + BIB_OS_VERSION, + BIB_OS_VERSION_ID, + BIB_OS_DETECTION, + BIB_HW_CPU_CORES, + BIB_HW_CPU_FREQUENCY, + BIB_HW_RAM_SIZE, + BIB_HW_DISK_SPACE, + BIB_HW_ARCHITECTURE, + BIB_HW_VIRTUALIZATION, + BIB_HW_VIRTUALIZATION_DETECTION, + BIB_CONTAINER_NAME, + BIB_CONTAINER_DETECTION, + BIB_CONTAINER_ORCHESTRATOR, + BIB_CONTAINER_OS_NAME, + BIB_CONTAINER_OS_ID, + BIB_CONTAINER_OS_ID_LIKE, + BIB_CONTAINER_OS_VERSION, + BIB_CONTAINER_OS_VERSION_ID, + BIB_CONTAINER_OS_DETECTION, + BIB_FEATURE_BUILT_FOR, + BIB_FEATURE_CLOUD, + BIB_FEATURE_HEALTH, + BIB_FEATURE_STREAMING, + BIB_FEATURE_BACKFILLING, + BIB_FEATURE_REPLICATION, + BIB_FEATURE_STREAMING_COMPRESSION, + BIB_FEATURE_CONTEXTS, + BIB_FEATURE_TIERING, + BIB_FEATURE_ML, + BIB_DB_DBENGINE, + BIB_DB_ALLOC, + BIB_DB_RAM, + BIB_DB_NONE, + BIB_CONNECTIVITY_ACLK, + BIB_CONNECTIVITY_HTTPD_STATIC, + BIB_CONNECTIVITY_HTTPD_H2O, + BIB_CONNECTIVITY_WEBRTC, + BIB_CONNECTIVITY_NATIVE_HTTPS, + BIB_CONNECTIVITY_TLS_HOST_VERIFY, + BIB_LIB_LZ4, + BIB_LIB_ZSTD, + BIB_LIB_ZLIB, + BIB_LIB_BROTLI, + BIB_LIB_PROTOBUF, + BIB_LIB_OPENSSL, + BIB_LIB_LIBDATACHANNEL, + BIB_LIB_JSONC, + BIB_LIB_LIBCAP, + BIB_LIB_LIBCRYPTO, + BIB_LIB_LIBYAML, + BIB_PLUGIN_APPS, + BIB_PLUGIN_LINUX_CGROUPS, + BIB_PLUGIN_LINUX_CGROUP_NETWORK, + BIB_PLUGIN_LINUX_PROC, + BIB_PLUGIN_LINUX_TC, + BIB_PLUGIN_LINUX_DISKSPACE, + BIB_PLUGIN_FREEBSD, + BIB_PLUGIN_MACOS, + BIB_PLUGIN_STATSD, + BIB_PLUGIN_TIMEX, + BIB_PLUGIN_IDLEJITTER, + BIB_PLUGIN_BASH, + BIB_PLUGIN_DEBUGFS, + BIB_PLUGIN_CUPS, + BIB_PLUGIN_EBPF, + BIB_PLUGIN_FREEIPMI, + BIB_PLUGIN_NFACCT, + BIB_PLUGIN_PERF, + BIB_PLUGIN_SLABINFO, + BIB_PLUGIN_XEN, + BIB_PLUGIN_XEN_VBD_ERROR, + BIB_PLUGIN_LOGS_MANAGEMENT, + BIB_EXPORT_AWS_KINESIS, + BIB_EXPORT_GCP_PUBSUB, + BIB_EXPORT_MONGOC, + BIB_EXPORT_PROMETHEUS_EXPORTER, + BIB_EXPORT_PROMETHEUS_REMOTE_WRITE, + BIB_EXPORT_GRAPHITE, + BIB_EXPORT_GRAPHITE_HTTP, + BIB_EXPORT_JSON, + BIB_EXPORT_JSON_HTTP, + BIB_EXPORT_OPENTSDB, + BIB_EXPORT_OPENTSDB_HTTP, + BIB_EXPORT_ALLMETRICS, + BIB_EXPORT_SHELL, + BIB_DEVEL_TRACE_ALLOCATIONS, + BIB_DEVELOPER_MODE, + + // leave this last + BIB_TERMINATOR, +} BUILD_INFO_SLOT; + +typedef enum __attribute__((packed)) { + BIC_PACKAGING, + BIC_DIRECTORIES, + BIC_OPERATING_SYSTEM, + BIC_HARDWARE, + BIC_CONTAINER, + BIC_FEATURE, + BIC_DATABASE, + BIC_CONNECTIVITY, + BIC_LIBS, + BIC_PLUGINS, + BIC_EXPORTERS, + BIC_DEBUG_DEVEL +} BUILD_INFO_CATEGORY; + +typedef enum __attribute__((packed)) { + BIT_BOOLEAN, + BIT_STRING, +} BUILD_INFO_TYPE; + +static struct { + BUILD_INFO_CATEGORY category; + BUILD_INFO_TYPE type; + const char *analytics; + const char *print; + const char *json; + bool status; + const char *value; +} BUILD_INFO[] = { + [BIB_PACKAGING_NETDATA_VERSION] = { + .category = BIC_PACKAGING, + .type = BIT_STRING, + .analytics = NULL, + .print = "Netdata Version", + .json = "version", + .value = "unknown", + }, + [BIB_PACKAGING_INSTALL_TYPE] = { + .category = BIC_PACKAGING, + .type = BIT_STRING, + .analytics = NULL, + .print = "Installation Type", + .json = "type", + .value = "unknown", + }, + [BIB_PACKAGING_ARCHITECTURE] = { + .category = BIC_PACKAGING, + .type = BIT_STRING, + .analytics = NULL, + .print = "Package Architecture", + .json = "arch", + .value = "unknown", + }, + [BIB_PACKAGING_DISTRO] = { + .category = BIC_PACKAGING, + .type = BIT_STRING, + .analytics = NULL, + .print = "Package Distro", + .json = "distro", + .value = "unknown", + }, + [BIB_PACKAGING_CONFIGURE_OPTIONS] = { + .category = BIC_PACKAGING, + .type = BIT_STRING, + .analytics = NULL, + .print = "Configure Options", + .json = "configure", + .value = "unknown", + }, + [BIB_DIR_USER_CONFIG] = { + .category = BIC_DIRECTORIES, + .type = BIT_STRING, + .analytics = NULL, + .print = "User Configurations", + .json = "user_config", + .value = CONFIG_DIR, + }, + [BIB_DIR_STOCK_CONFIG] = { + .category = BIC_DIRECTORIES, + .type = BIT_STRING, + .analytics = NULL, + .print = "Stock Configurations", + .json = "stock_config", + .value = LIBCONFIG_DIR, + }, + [BIB_DIR_CACHE] = { + .category = BIC_DIRECTORIES, + .type = BIT_STRING, + .analytics = NULL, + .print = "Ephemeral Databases (metrics data, metadata)", + .json = "ephemeral_db", + .value = CACHE_DIR, + }, + [BIB_DIR_LIB] = { + .category = BIC_DIRECTORIES, + .type = BIT_STRING, + .analytics = NULL, + .print = "Permanent Databases", + .json = "permanent_db", + .value = VARLIB_DIR, + }, + [BIB_DIR_PLUGINS] = { + .category = BIC_DIRECTORIES, + .type = BIT_STRING, + .analytics = NULL, + .print = "Plugins", + .json = "plugins", + .value = PLUGINS_DIR, + }, + [BIB_DIR_WEB] = { + .category = BIC_DIRECTORIES, + .type = BIT_STRING, + .analytics = NULL, + .print = "Static Web Files", + .json = "web", + .value = WEB_DIR, + }, + [BIB_DIR_LOG] = { + .category = BIC_DIRECTORIES, + .type = BIT_STRING, + .analytics = NULL, + .print = "Log Files", + .json = "logs", + .value = LOG_DIR, + }, + [BIB_DIR_LOCK] = { + .category = BIC_DIRECTORIES, + .type = BIT_STRING, + .analytics = NULL, + .print = "Lock Files", + .json = "locks", + .value = VARLIB_DIR "/lock", + }, + [BIB_DIR_HOME] = { + .category = BIC_DIRECTORIES, + .type = BIT_STRING, + .analytics = NULL, + .print = "Home", + .json = "home", + .value = VARLIB_DIR, + }, + [BIB_OS_KERNEL_NAME] = { + .category = BIC_OPERATING_SYSTEM, + .type = BIT_STRING, + .analytics = NULL, + .print = "Kernel", + .json = "kernel", + .value = "unknown", + }, + [BIB_OS_KERNEL_VERSION] = { + .category = BIC_OPERATING_SYSTEM, + .type = BIT_STRING, + .analytics = NULL, + .print = "Kernel Version", + .json = "kernel_version", + .value = "unknown", + }, + [BIB_OS_NAME] = { + .category = BIC_OPERATING_SYSTEM, + .type = BIT_STRING, + .analytics = NULL, + .print = "Operating System", + .json = "os", + .value = "unknown", + }, + [BIB_OS_ID] = { + .category = BIC_OPERATING_SYSTEM, + .type = BIT_STRING, + .analytics = NULL, + .print = "Operating System ID", + .json = "id", + .value = "unknown", + }, + [BIB_OS_ID_LIKE] = { + .category = BIC_OPERATING_SYSTEM, + .type = BIT_STRING, + .analytics = NULL, + .print = "Operating System ID Like", + .json = "id_like", + .value = "unknown", + }, + [BIB_OS_VERSION] = { + .category = BIC_OPERATING_SYSTEM, + .type = BIT_STRING, + .analytics = NULL, + .print = "Operating System Version", + .json = "version", + .value = "unknown", + }, + [BIB_OS_VERSION_ID] = { + .category = BIC_OPERATING_SYSTEM, + .type = BIT_STRING, + .analytics = NULL, + .print = "Operating System Version ID", + .json = "version_id", + .value = "unknown", + }, + [BIB_OS_DETECTION] = { + .category = BIC_OPERATING_SYSTEM, + .type = BIT_STRING, + .analytics = NULL, + .print = "Detection", + .json = "detection", + .value = "unknown", + }, + [BIB_HW_CPU_CORES] = { + .category = BIC_HARDWARE, + .type = BIT_STRING, + .analytics = NULL, + .print = "CPU Cores", + .json = "cpu_cores", + .value = "unknown", + }, + [BIB_HW_CPU_FREQUENCY] = { + .category = BIC_HARDWARE, + .type = BIT_STRING, + .analytics = NULL, + .print = "CPU Frequency", + .json = "cpu_frequency", + .value = "unknown", + }, + [BIB_HW_ARCHITECTURE] = { + .category = BIC_HARDWARE, + .type = BIT_STRING, + .analytics = NULL, + .print = "CPU Architecture", + .json = "cpu_architecture", + .value = "unknown", + }, + [BIB_HW_RAM_SIZE] = { + .category = BIC_HARDWARE, + .type = BIT_STRING, + .analytics = NULL, + .print = "RAM Bytes", + .json = "ram", + .value = "unknown", + }, + [BIB_HW_DISK_SPACE] = { + .category = BIC_HARDWARE, + .type = BIT_STRING, + .analytics = NULL, + .print = "Disk Capacity", + .json = "disk", + .value = "unknown", + }, + [BIB_HW_VIRTUALIZATION] = { + .category = BIC_HARDWARE, + .type = BIT_STRING, + .analytics = NULL, + .print = "Virtualization Technology", + .json = "virtualization", + .value = "unknown", + }, + [BIB_HW_VIRTUALIZATION_DETECTION] = { + .category = BIC_HARDWARE, + .type = BIT_STRING, + .analytics = NULL, + .print = "Virtualization Detection", + .json = "virtualization_detection", + .value = "unknown", + }, + [BIB_CONTAINER_NAME] = { + .category = BIC_CONTAINER, + .type = BIT_STRING, + .analytics = NULL, + .print = "Container", + .json = "container", + .value = "unknown", + }, + [BIB_CONTAINER_DETECTION] = { + .category = BIC_CONTAINER, + .type = BIT_STRING, + .analytics = NULL, + .print = "Container Detection", + .json = "container_detection", + .value = "unknown", + }, + [BIB_CONTAINER_ORCHESTRATOR] = { + .category = BIC_CONTAINER, + .type = BIT_STRING, + .analytics = NULL, + .print = "Container Orchestrator", + .json = "orchestrator", + .value = "unknown", + }, + [BIB_CONTAINER_OS_NAME] = { + .category = BIC_CONTAINER, + .type = BIT_STRING, + .analytics = NULL, + .print = "Container Operating System", + .json = "os", + .value = "unknown", + }, + [BIB_CONTAINER_OS_ID] = { + .category = BIC_CONTAINER, + .type = BIT_STRING, + .analytics = NULL, + .print = "Container Operating System ID", + .json = "os_id", + .value = "unknown", + }, + [BIB_CONTAINER_OS_ID_LIKE] = { + .category = BIC_CONTAINER, + .type = BIT_STRING, + .analytics = NULL, + .print = "Container Operating System ID Like", + .json = "os_id_like", + .value = "unknown", + }, + [BIB_CONTAINER_OS_VERSION] = { + .category = BIC_CONTAINER, + .type = BIT_STRING, + .analytics = NULL, + .print = "Container Operating System Version", + .json = "version", + .value = "unknown", + }, + [BIB_CONTAINER_OS_VERSION_ID] = { + .category = BIC_CONTAINER, + .type = BIT_STRING, + .analytics = NULL, + .print = "Container Operating System Version ID", + .json = "version_id", + .value = "unknown", + }, + [BIB_CONTAINER_OS_DETECTION] = { + .category = BIC_CONTAINER, + .type = BIT_STRING, + .analytics = NULL, + .print = "Container Operating System Detection", + .json = "detection", + .value = "unknown", + }, + [BIB_FEATURE_BUILT_FOR] = { + .category = BIC_FEATURE, + .type = BIT_STRING, + .analytics = NULL, + .print = "Built For", + .json = "built-for", + .value = "unknown", + }, + [BIB_FEATURE_CLOUD] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = "Netdata Cloud", + .print = "Netdata Cloud", + .json = "cloud", + .value = NULL, + }, + [BIB_FEATURE_HEALTH] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Health (trigger alerts and send notifications)", + .json = "health", + .value = NULL, + }, + [BIB_FEATURE_STREAMING] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Streaming (stream metrics to parent Netdata servers)", + .json = "streaming", + .value = NULL, + }, + [BIB_FEATURE_BACKFILLING] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Back-filling (of higher database tiers)", + .json = "back-filling", + .value = NULL, + }, + [BIB_FEATURE_REPLICATION] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Replication (fill the gaps of parent Netdata servers)", + .json = "replication", + .value = NULL, + }, + [BIB_FEATURE_STREAMING_COMPRESSION] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = "Stream Compression", + .print = "Streaming and Replication Compression", + .json = "stream-compression", + .value = NULL, + }, + [BIB_FEATURE_CONTEXTS] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Contexts (index all active and archived metrics)", + .json = "contexts", + .value = NULL, + }, + [BIB_FEATURE_TIERING] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Tiering (multiple dbs with different metrics resolution)", + .json = "tiering", + .value = TOSTRING(RRD_STORAGE_TIERS), + }, + [BIB_FEATURE_ML] = { + .category = BIC_FEATURE, + .type = BIT_BOOLEAN, + .analytics = "Machine Learning", + .print = "Machine Learning", + .json = "ml", + .value = NULL, + }, + [BIB_DB_DBENGINE] = { + .category = BIC_DATABASE, + .type = BIT_BOOLEAN, + .analytics = "dbengine", + .print = "dbengine (compression)", + .json = "dbengine", + .value = NULL, + }, + [BIB_DB_ALLOC] = { + .category = BIC_DATABASE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "alloc", + .json = "alloc", + .value = NULL, + }, + [BIB_DB_RAM] = { + .category = BIC_DATABASE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "ram", + .json = "ram", + .value = NULL, + }, + [BIB_DB_NONE] = { + .category = BIC_DATABASE, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "none", + .json = "none", + .value = NULL, + }, + [BIB_CONNECTIVITY_ACLK] = { + .category = BIC_CONNECTIVITY, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "ACLK (Agent-Cloud Link: MQTT over WebSockets over TLS)", + .json = "aclk", + .value = NULL, + }, + [BIB_CONNECTIVITY_HTTPD_STATIC] = { + .category = BIC_CONNECTIVITY, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "static (Netdata internal web server)", + .json = "static", + .value = NULL, + }, + [BIB_CONNECTIVITY_HTTPD_H2O] = { + .category = BIC_CONNECTIVITY, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "h2o (web server)", + .json = "h2o", + .value = NULL, + }, + [BIB_CONNECTIVITY_WEBRTC] = { + .category = BIC_CONNECTIVITY, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "WebRTC (experimental)", + .json = "webrtc", + .value = NULL, + }, + [BIB_CONNECTIVITY_NATIVE_HTTPS] = { + .category = BIC_CONNECTIVITY, + .type = BIT_BOOLEAN, + .analytics = "Native HTTPS", + .print = "Native HTTPS (TLS Support)", + .json = "native-https", + .value = NULL, + }, + [BIB_CONNECTIVITY_TLS_HOST_VERIFY] = { + .category = BIC_CONNECTIVITY, + .type = BIT_BOOLEAN, + .analytics = "TLS Host Verification", + .print = "TLS Host Verification", + .json = "tls-host-verify", + .value = NULL, + }, + [BIB_LIB_LZ4] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "LZ4 (extremely fast lossless compression algorithm)", + .json = "lz4", + .value = NULL, + }, + [BIB_LIB_ZSTD] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "ZSTD (fast, lossless compression algorithm)", + .json = "zstd", + .value = NULL, + }, + [BIB_LIB_ZLIB] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = "zlib", + .print = "zlib (lossless data-compression library)", + .json = "zlib", + .value = NULL, + }, + [BIB_LIB_BROTLI] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Brotli (generic-purpose lossless compression algorithm)", + .json = "brotli", + .value = NULL, + }, + [BIB_LIB_PROTOBUF] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = "protobuf", + .print = "protobuf (platform-neutral data serialization protocol)", + .json = "protobuf", + .value = NULL, + }, + [BIB_LIB_OPENSSL] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "OpenSSL (cryptography)", + .json = "openssl", + .value = NULL, + }, + [BIB_LIB_LIBDATACHANNEL] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "libdatachannel (stand-alone WebRTC data channels)", + .json = "libdatachannel", + .value = NULL, + }, + [BIB_LIB_JSONC] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = "JSON-C", + .print = "JSON-C (lightweight JSON manipulation)", + .json = "jsonc", + .value = NULL, + }, + [BIB_LIB_LIBCAP] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = "libcap", + .print = "libcap (Linux capabilities system operations)", + .json = "libcap", + .value = NULL, + }, + [BIB_LIB_LIBCRYPTO] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = "libcrypto", + .print = "libcrypto (cryptographic functions)", + .json = "libcrypto", + .value = NULL, + }, + [BIB_LIB_LIBYAML] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = "libyaml", + .print = "libyaml (library for parsing and emitting YAML)", + .json = "libyaml", + .value = NULL, + }, + [BIB_PLUGIN_APPS] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "apps", + .print = "apps (monitor processes)", + .json = "apps", + .value = NULL, + }, + [BIB_PLUGIN_LINUX_CGROUPS] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "cgroups (monitor containers and VMs)", + .json = "cgroups", + .value = NULL, + }, + [BIB_PLUGIN_LINUX_CGROUP_NETWORK] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "cgroup Network Tracking", + .print = "cgroup-network (associate interfaces to CGROUPS)", + .json = "cgroup-network", + .value = NULL, + }, + [BIB_PLUGIN_LINUX_PROC] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "proc (monitor Linux systems)", + .json = "proc", + .value = NULL, + }, + [BIB_PLUGIN_LINUX_TC] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "tc (monitor Linux network QoS)", + .json = "tc", + .value = NULL, + }, + [BIB_PLUGIN_LINUX_DISKSPACE] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "diskspace (monitor Linux mount points)", + .json = "diskspace", + .value = NULL, + }, + [BIB_PLUGIN_FREEBSD] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "freebsd (monitor FreeBSD systems)", + .json = "freebsd", + .value = NULL, + }, + [BIB_PLUGIN_MACOS] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "macos (monitor MacOS systems)", + .json = "macos", + .value = NULL, + }, + [BIB_PLUGIN_STATSD] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "statsd (collect custom application metrics)", + .json = "statsd", + .value = NULL, + }, + [BIB_PLUGIN_TIMEX] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "timex (check system clock synchronization)", + .json = "timex", + .value = NULL, + }, + [BIB_PLUGIN_IDLEJITTER] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "idlejitter (check system latency and jitter)", + .json = "idlejitter", + .value = NULL, + }, + [BIB_PLUGIN_BASH] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "bash (support shell data collection jobs - charts.d)", + .json = "charts.d", + .value = NULL, + }, + [BIB_PLUGIN_DEBUGFS] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "debugfs", + .print = "debugfs (kernel debugging metrics)", + .json = "debugfs", + .value = NULL, + }, + [BIB_PLUGIN_CUPS] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "CUPS", + .print = "cups (monitor printers and print jobs)", + .json = "cups", + .value = NULL, + }, + [BIB_PLUGIN_EBPF] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "EBPF", + .print = "ebpf (monitor system calls)", + .json = "ebpf", + .value = NULL, + }, + [BIB_PLUGIN_FREEIPMI] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "IPMI", + .print = "freeipmi (monitor enterprise server H/W)", + .json = "freeipmi", + .value = NULL, + }, + [BIB_PLUGIN_NFACCT] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "NFACCT", + .print = "nfacct (gather netfilter accounting)", + .json = "nfacct", + .value = NULL, + }, + [BIB_PLUGIN_PERF] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "perf", + .print = "perf (collect kernel performance events)", + .json = "perf", + .value = NULL, + }, + [BIB_PLUGIN_SLABINFO] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "slabinfo", + .print = "slabinfo (monitor kernel object caching)", + .json = "slabinfo", + .value = NULL, + }, + [BIB_PLUGIN_XEN] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "Xen", + .print = "Xen", + .json = "xen", + .value = NULL, + }, + [BIB_PLUGIN_XEN_VBD_ERROR] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "Xen VBD Error Tracking", + .print = "Xen VBD Error Tracking", + .json = "xen-vbd-error", + .value = NULL, + }, + [BIB_PLUGIN_LOGS_MANAGEMENT] = { + .category = BIC_PLUGINS, + .type = BIT_BOOLEAN, + .analytics = "Logs Management", + .print = "Logs Management", + .json = "logs-management", + .value = NULL, + }, + [BIB_EXPORT_MONGOC] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = "MongoDB", + .print = "MongoDB", + .json = "mongodb", + .value = NULL, + }, + [BIB_EXPORT_GRAPHITE] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Graphite", + .json = "graphite", + .value = NULL, + }, + [BIB_EXPORT_GRAPHITE_HTTP] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Graphite HTTP / HTTPS", + .json = "graphite:http", + .value = NULL, + }, + [BIB_EXPORT_JSON] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "JSON", + .json = "json", + .value = NULL, + }, + [BIB_EXPORT_JSON_HTTP] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "JSON HTTP / HTTPS", + .json = "json:http", + .value = NULL, + }, + [BIB_EXPORT_OPENTSDB] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "OpenTSDB", + .json = "opentsdb", + .value = NULL, + }, + [BIB_EXPORT_OPENTSDB_HTTP] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "OpenTSDB HTTP / HTTPS", + .json = "opentsdb:http", + .value = NULL, + }, + [BIB_EXPORT_ALLMETRICS] = { + .category = BIC_EXPORTERS, + .analytics = NULL, + .type = BIT_BOOLEAN, + .print = "All Metrics API", + .json = "allmetrics", + .value = NULL, + }, + [BIB_EXPORT_SHELL] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Shell (use metrics in shell scripts)", + .json = "shell", + .value = NULL, + }, + [BIB_EXPORT_PROMETHEUS_EXPORTER] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Prometheus (OpenMetrics) Exporter", + .json = "openmetrics", + .value = NULL, + }, + [BIB_EXPORT_PROMETHEUS_REMOTE_WRITE] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = "Prometheus Remote Write", + .print = "Prometheus Remote Write", + .json = "prom-remote-write", + .value = NULL, + }, + [BIB_EXPORT_AWS_KINESIS] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = "AWS Kinesis", + .print = "AWS Kinesis", + .json = "kinesis", + .value = NULL, + }, + [BIB_EXPORT_GCP_PUBSUB] = { + .category = BIC_EXPORTERS, + .type = BIT_BOOLEAN, + .analytics = "GCP PubSub", + .print = "GCP PubSub", + .json = "pubsub", + .value = NULL, + }, + [BIB_DEVEL_TRACE_ALLOCATIONS] = { + .category = BIC_DEBUG_DEVEL, + .type = BIT_BOOLEAN, + .analytics = "DebugTraceAlloc", + .print = "Trace All Netdata Allocations (with charts)", + .json = "trace-allocations", + .value = NULL, + }, + [BIB_DEVELOPER_MODE] = { + .category = BIC_DEBUG_DEVEL, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Developer Mode (more runtime checks, slower)", + .json = "dev-mode", + .value = NULL, + }, + + // leave this last + [BIB_TERMINATOR] = { + .category = 0, + .type = 0, + .analytics = NULL, + .print = NULL, + .json = NULL, + .value = NULL, + }, +}; + +static void build_info_set_value(BUILD_INFO_SLOT slot, const char *value) { + BUILD_INFO[slot].value = value; +} + +static void build_info_append_value(BUILD_INFO_SLOT slot, const char *value) { + size_t size = BUILD_INFO[slot].value ? strlen(BUILD_INFO[slot].value) + 1 : 0; + size += strlen(value); + char buf[size + 1]; + + if(BUILD_INFO[slot].value) { + strcpy(buf, BUILD_INFO[slot].value); + strcat(buf, " "); + strcat(buf, value); + } + else + strcpy(buf, value); + + freez((void *)BUILD_INFO[slot].value); + BUILD_INFO[slot].value = strdupz(buf); +} + +static void build_info_set_value_strdupz(BUILD_INFO_SLOT slot, const char *value) { + if(!value) value = ""; + build_info_set_value(slot, strdupz(value)); +} + +static void build_info_set_status(BUILD_INFO_SLOT slot, bool status) { + BUILD_INFO[slot].status = status; +} + +__attribute__((constructor)) void initialize_build_info(void) { + build_info_set_value(BIB_PACKAGING_NETDATA_VERSION, NETDATA_VERSION); + build_info_set_value(BIB_PACKAGING_CONFIGURE_OPTIONS, CONFIGURE_COMMAND); + +#ifdef OS_LINUX + build_info_set_status(BIB_FEATURE_BUILT_FOR, true); + build_info_set_value(BIB_FEATURE_BUILT_FOR, "Linux"); + build_info_set_status(BIB_PLUGIN_LINUX_CGROUPS, true); + build_info_set_status(BIB_PLUGIN_LINUX_PROC, true); + build_info_set_status(BIB_PLUGIN_LINUX_DISKSPACE, true); + build_info_set_status(BIB_PLUGIN_LINUX_TC, true); +#endif +#ifdef OS_FREEBSD + build_info_set_status(BIB_FEATURE_BUILT_FOR, true); + build_info_set_value(BIB_FEATURE_BUILT_FOR, "FreeBSD"); + build_info_set_status(BIB_PLUGIN_FREEBSD, true); +#endif +#ifdef OS_MACOS + build_info_set_status(BIB_FEATURE_BUILT_FOR, true); + build_info_set_value(BIB_FEATURE_BUILT_FOR, "MacOS"); + build_info_set_status(BIB_PLUGIN_MACOS, true); +#endif +#ifdef OS_WINDOWS + build_info_set_status(BIB_FEATURE_BUILT_FOR, true); +#if defined(__CYGWIN__) && defined(__MSYS__) + build_info_set_value(BIB_FEATURE_BUILT_FOR, "Windows (MSYS)"); +#elif defined(__CYGWIN__) + build_info_set_value(BIB_FEATURE_BUILT_FOR, "Windows (CYGWIN)"); +#else + build_info_set_value(BIB_FEATURE_BUILT_FOR, "Windows"); +#endif +#endif + +#ifdef ENABLE_ACLK + build_info_set_status(BIB_FEATURE_CLOUD, true); + build_info_set_status(BIB_CONNECTIVITY_ACLK, true); +#else + build_info_set_status(BIB_FEATURE_CLOUD, false); +#ifdef DISABLE_CLOUD + build_info_set_value(BIB_FEATURE_CLOUD, "disabled"); +#else + build_info_set_value(BIB_FEATURE_CLOUD, "unavailable"); +#endif +#endif + + build_info_set_status(BIB_FEATURE_HEALTH, true); + build_info_set_status(BIB_FEATURE_STREAMING, true); + build_info_set_status(BIB_FEATURE_BACKFILLING, true); + build_info_set_status(BIB_FEATURE_REPLICATION, true); + + build_info_set_status(BIB_FEATURE_STREAMING_COMPRESSION, true); + +#ifdef ENABLE_ZSTD + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "zstd"); +#endif +#ifdef ENABLE_LZ4 + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "lz4"); +#endif + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "gzip"); +#ifdef ENABLE_BROTLI + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "brotli"); +#endif + + build_info_set_status(BIB_FEATURE_CONTEXTS, true); + build_info_set_status(BIB_FEATURE_TIERING, true); + +#ifdef ENABLE_ML + build_info_set_status(BIB_FEATURE_ML, true); +#endif + +#ifdef ENABLE_DBENGINE + build_info_set_status(BIB_DB_DBENGINE, true); +#ifdef ENABLE_ZSTD + build_info_append_value(BIB_DB_DBENGINE, "zstd"); +#endif +#ifdef ENABLE_LZ4 + build_info_append_value(BIB_DB_DBENGINE, "lz4"); +#endif +#endif + build_info_set_status(BIB_DB_ALLOC, true); + build_info_set_status(BIB_DB_RAM, true); + build_info_set_status(BIB_DB_NONE, true); + + build_info_set_status(BIB_CONNECTIVITY_HTTPD_STATIC, true); +#ifdef ENABLE_H2O + build_info_set_status(BIB_CONNECTIVITY_HTTPD_H2O, true); +#endif +#ifdef ENABLE_WEBRTC + build_info_set_status(BIB_CONNECTIVITY_WEBRTC, true); +#endif +#ifdef ENABLE_HTTPS + build_info_set_status(BIB_CONNECTIVITY_NATIVE_HTTPS, true); +#endif +#if defined(HAVE_X509_VERIFY_PARAM_set1_host) && HAVE_X509_VERIFY_PARAM_set1_host == 1 + build_info_set_status(BIB_CONNECTIVITY_TLS_HOST_VERIFY, true); +#endif + +#ifdef ENABLE_LZ4 + build_info_set_status(BIB_LIB_LZ4, true); +#endif +#ifdef ENABLE_ZSTD + build_info_set_status(BIB_LIB_ZSTD, true); +#endif +#ifdef ENABLE_BROTLI + build_info_set_status(BIB_LIB_BROTLI, true); +#endif + + build_info_set_status(BIB_LIB_ZLIB, true); + +#ifdef HAVE_DLIB + build_info_set_status(BIB_LIB_DLIB, true); + build_info_set_value(BIB_LIB_DLIB, "bundled"); +#endif + +#ifdef HAVE_PROTOBUF + build_info_set_status(BIB_LIB_PROTOBUF, true); +#ifdef BUNDLED_PROTOBUF + build_info_set_value(BIB_LIB_PROTOBUF, "bundled"); +#else + build_info_set_value(BIB_LIB_PROTOBUF, "system"); +#endif +#endif + +#ifdef HAVE_LIBDATACHANNEL + build_info_set_status(BIB_LIB_LIBDATACHANNEL, true); +#endif +#ifdef ENABLE_OPENSSL + build_info_set_status(BIB_LIB_OPENSSL, true); +#endif +#ifdef ENABLE_JSONC + build_info_set_status(BIB_LIB_JSONC, true); +#endif +#ifdef HAVE_CAPABILITY + build_info_set_status(BIB_LIB_LIBCAP, true); +#endif +#ifdef HAVE_CRYPTO + build_info_set_status(BIB_LIB_LIBCRYPTO, true); +#endif +#ifdef HAVE_LIBYAML + build_info_set_status(BIB_LIB_LIBYAML, true); +#endif + +#ifdef ENABLE_PLUGIN_APPS + build_info_set_status(BIB_PLUGIN_APPS, true); +#endif +#ifdef HAVE_SETNS + build_info_set_status(BIB_PLUGIN_LINUX_CGROUP_NETWORK, true); +#endif + + build_info_set_status(BIB_PLUGIN_STATSD, true); + build_info_set_status(BIB_PLUGIN_TIMEX, true); + build_info_set_status(BIB_PLUGIN_IDLEJITTER, true); + build_info_set_status(BIB_PLUGIN_BASH, true); + +#ifdef ENABLE_PLUGIN_DEBUGFS + build_info_set_status(BIB_PLUGIN_DEBUGFS, true); +#endif +#ifdef ENABLE_PLUGIN_CUPS + build_info_set_status(BIB_PLUGIN_CUPS, true); +#endif +#ifdef ENABLE_PLUGIN_EBPF + build_info_set_status(BIB_PLUGIN_EBPF, true); +#endif +#ifdef ENABLE_PLUGIN_FREEIPMI + build_info_set_status(BIB_PLUGIN_FREEIPMI, true); +#endif +#ifdef ENABLE_PLUGIN_NFACCT + build_info_set_status(BIB_PLUGIN_NFACCT, true); +#endif +#ifdef ENABLE_PLUGIN_PERF + build_info_set_status(BIB_PLUGIN_PERF, true); +#endif +#ifdef ENABLE_PLUGIN_SLABINFO + build_info_set_status(BIB_PLUGIN_SLABINFO, true); +#endif +#ifdef ENABLE_PLUGIN_XENSTAT + build_info_set_status(BIB_PLUGIN_XEN, true); +#endif +#ifdef HAVE_XENSTAT_VBD_ERROR + build_info_set_status(BIB_PLUGIN_XEN_VBD_ERROR, true); +#endif +#ifdef ENABLE_LOGSMANAGEMENT + build_info_set_status(BIB_PLUGIN_LOGS_MANAGEMENT, true); +#endif + + build_info_set_status(BIB_EXPORT_PROMETHEUS_EXPORTER, true); + build_info_set_status(BIB_EXPORT_GRAPHITE, true); + build_info_set_status(BIB_EXPORT_GRAPHITE_HTTP, true); + build_info_set_status(BIB_EXPORT_JSON, true); + build_info_set_status(BIB_EXPORT_JSON_HTTP, true); + build_info_set_status(BIB_EXPORT_OPENTSDB, true); + build_info_set_status(BIB_EXPORT_OPENTSDB_HTTP, true); + build_info_set_status(BIB_EXPORT_ALLMETRICS, true); + build_info_set_status(BIB_EXPORT_SHELL, true); + +#ifdef HAVE_KINESIS + build_info_set_status(BIB_EXPORT_AWS_KINESIS, true); +#endif +#ifdef ENABLE_EXPORTING_PUBSUB + build_info_set_status(BIB_EXPORT_GCP_PUBSUB, true); +#endif +#ifdef HAVE_MONGOC + build_info_set_status(BIB_EXPORT_MONGOC, true); +#endif +#ifdef ENABLE_PROMETHEUS_REMOTE_WRITE + build_info_set_status(BIB_EXPORT_PROMETHEUS_REMOTE_WRITE, true); +#endif + +#ifdef NETDATA_TRACE_ALLOCATIONS + build_info_set_status(BIB_DEVEL_TRACE_ALLOCATIONS, true); +#endif + +#if defined(NETDATA_DEV_MODE) || defined(NETDATA_INTERNAL_CHECKS) + build_info_set_status(BIB_DEVELOPER_MODE, true); +#endif +} + +// ---------------------------------------------------------------------------- +// system info + +int get_system_info(struct rrdhost_system_info *system_info); +static void populate_system_info(void) { + static bool populated = false; + static SPINLOCK spinlock = NETDATA_SPINLOCK_INITIALIZER; + + if(populated) + return; + + spinlock_lock(&spinlock); + + if(populated) { + spinlock_unlock(&spinlock); + return; + } + + struct rrdhost_system_info *system_info; + bool free_system_info = false; + + if(localhost && localhost->system_info) { + system_info = localhost->system_info; + } + else { + system_info = callocz(1, sizeof(struct rrdhost_system_info)); + get_system_info(system_info); + free_system_info = true; + } + + build_info_set_value_strdupz(BIB_OS_KERNEL_NAME, system_info->kernel_name); + build_info_set_value_strdupz(BIB_OS_KERNEL_VERSION, system_info->kernel_version); + build_info_set_value_strdupz(BIB_OS_NAME, system_info->host_os_name); + build_info_set_value_strdupz(BIB_OS_ID, system_info->host_os_id); + build_info_set_value_strdupz(BIB_OS_ID_LIKE, system_info->host_os_id_like); + build_info_set_value_strdupz(BIB_OS_VERSION, system_info->host_os_version); + build_info_set_value_strdupz(BIB_OS_VERSION_ID, system_info->container_os_version_id); + build_info_set_value_strdupz(BIB_OS_DETECTION, system_info->host_os_detection); + build_info_set_value_strdupz(BIB_HW_CPU_CORES, system_info->host_cores); + build_info_set_value_strdupz(BIB_HW_CPU_FREQUENCY, system_info->host_cpu_freq); + build_info_set_value_strdupz(BIB_HW_RAM_SIZE, system_info->host_ram_total); + build_info_set_value_strdupz(BIB_HW_DISK_SPACE, system_info->host_disk_space); + build_info_set_value_strdupz(BIB_HW_ARCHITECTURE, system_info->architecture); + build_info_set_value_strdupz(BIB_HW_VIRTUALIZATION, system_info->virtualization); + build_info_set_value_strdupz(BIB_HW_VIRTUALIZATION_DETECTION, system_info->virt_detection); + build_info_set_value_strdupz(BIB_CONTAINER_NAME, system_info->container); + build_info_set_value_strdupz(BIB_CONTAINER_DETECTION, system_info->container_detection); + + if(system_info->is_k8s_node && !strcmp(system_info->is_k8s_node, "true")) + build_info_set_value_strdupz(BIB_CONTAINER_ORCHESTRATOR, "kubernetes"); + else + build_info_set_value_strdupz(BIB_CONTAINER_ORCHESTRATOR, "none"); + + build_info_set_value_strdupz(BIB_CONTAINER_OS_NAME, system_info->container_os_name); + build_info_set_value_strdupz(BIB_CONTAINER_OS_ID, system_info->container_os_id); + build_info_set_value_strdupz(BIB_CONTAINER_OS_ID_LIKE, system_info->container_os_id_like); + build_info_set_value_strdupz(BIB_CONTAINER_OS_VERSION, system_info->container_os_version); + build_info_set_value_strdupz(BIB_CONTAINER_OS_VERSION_ID, system_info->container_os_version_id); + build_info_set_value_strdupz(BIB_CONTAINER_OS_DETECTION, system_info->container_os_detection); + + if(free_system_info) + rrdhost_system_info_free(system_info); + + populated = true; + spinlock_unlock(&spinlock); +} + +// ---------------------------------------------------------------------------- +// packaging info + +char *get_value_from_key(char *buffer, char *key) { + char *s = NULL, *t = NULL; + s = t = buffer + strlen(key) + 2; + if (s) { + while (*s == '\'') + s++; + while (*++t != '\0'); + while (--t > s && *t == '\'') + *t = '\0'; + } + return s; +} + +void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist) { + char *install_type_filename; + + int install_type_filename_len = (strlen(netdata_configured_user_config_dir) + strlen(".install-type") + 3); + install_type_filename = mallocz(sizeof(char) * install_type_filename_len); + snprintfz(install_type_filename, install_type_filename_len - 1, "%s/%s", netdata_configured_user_config_dir, ".install-type"); + + FILE *fp = fopen(install_type_filename, "r"); + if (fp) { + char *s, buf[256 + 1]; + size_t len = 0; + + while ((s = fgets_trim_len(buf, 256, fp, &len))) { + if (!strncmp(buf, "INSTALL_TYPE='", 14)) + *install_type = strdupz((char *)get_value_from_key(buf, "INSTALL_TYPE")); + else if (!strncmp(buf, "PREBUILT_ARCH='", 15)) + *prebuilt_arch = strdupz((char *)get_value_from_key(buf, "PREBUILT_ARCH")); + else if (!strncmp(buf, "PREBUILT_DISTRO='", 17)) + *prebuilt_dist = strdupz((char *)get_value_from_key(buf, "PREBUILT_DISTRO")); + } + fclose(fp); + } + freez(install_type_filename); +} + +static struct { + SPINLOCK spinlock; + bool populated; + char *install_type; + char *prebuilt_arch; + char *prebuilt_distro; +} BUILD_PACKAGING_INFO = { 0 }; + +static void populate_packaging_info() { + if(!BUILD_PACKAGING_INFO.populated) { + spinlock_lock(&BUILD_PACKAGING_INFO.spinlock); + if(!BUILD_PACKAGING_INFO.populated) { + BUILD_PACKAGING_INFO.populated = true; + + get_install_type(&BUILD_PACKAGING_INFO.install_type, &BUILD_PACKAGING_INFO.prebuilt_arch, &BUILD_PACKAGING_INFO.prebuilt_distro); + + if(!BUILD_PACKAGING_INFO.install_type) + BUILD_PACKAGING_INFO.install_type = "unknown"; + + if(!BUILD_PACKAGING_INFO.prebuilt_arch) + BUILD_PACKAGING_INFO.prebuilt_arch = "unknown"; + + if(!BUILD_PACKAGING_INFO.prebuilt_distro) + BUILD_PACKAGING_INFO.prebuilt_distro = "unknown"; + + build_info_set_value(BIB_PACKAGING_INSTALL_TYPE, strdupz(BUILD_PACKAGING_INFO.install_type)); + build_info_set_value(BIB_PACKAGING_ARCHITECTURE, strdupz(BUILD_PACKAGING_INFO.prebuilt_arch)); + build_info_set_value(BIB_PACKAGING_DISTRO, strdupz(BUILD_PACKAGING_INFO.prebuilt_distro)); + } + spinlock_unlock(&BUILD_PACKAGING_INFO.spinlock); + } +} + +// ---------------------------------------------------------------------------- + +static void populate_directories(void) { + build_info_set_value(BIB_DIR_USER_CONFIG, netdata_configured_user_config_dir); + build_info_set_value(BIB_DIR_STOCK_CONFIG, netdata_configured_stock_config_dir); + build_info_set_value(BIB_DIR_CACHE, netdata_configured_cache_dir); + build_info_set_value(BIB_DIR_LIB, netdata_configured_varlib_dir); + build_info_set_value(BIB_DIR_PLUGINS, netdata_configured_primary_plugins_dir); + build_info_set_value(BIB_DIR_WEB, netdata_configured_web_dir); + build_info_set_value(BIB_DIR_LOG, netdata_configured_log_dir); + build_info_set_value(BIB_DIR_LOCK, netdata_configured_lock_dir); + build_info_set_value(BIB_DIR_HOME, netdata_configured_home_dir); +} + +// ---------------------------------------------------------------------------- + +static void print_build_info_category_to_json(BUFFER *b, BUILD_INFO_CATEGORY category, const char *key) { + buffer_json_member_add_object(b, key); + for(size_t i = 0; i < BIB_TERMINATOR ;i++) { + if(BUILD_INFO[i].category == category && BUILD_INFO[i].json) { + if(BUILD_INFO[i].value) + buffer_json_member_add_string(b, BUILD_INFO[i].json, BUILD_INFO[i].value); + else + buffer_json_member_add_boolean(b, BUILD_INFO[i].json, BUILD_INFO[i].status); + } + } + buffer_json_object_close(b); // key +} + +static void print_build_info_category_to_console(BUILD_INFO_CATEGORY category, const char *title) { + printf("%s:\n", title); + for(size_t i = 0; i < BIB_TERMINATOR ;i++) { + if(BUILD_INFO[i].category == category && BUILD_INFO[i].print) { + const char *v = BUILD_INFO[i].status ? "YES" : "NO"; + const char *k = BUILD_INFO[i].print; + const char *d = BUILD_INFO[i].value; + + int padding_length = 60 - strlen(k) - 1; + if (padding_length < 0) padding_length = 0; + + char padding[padding_length + 1]; + memset(padding, '_', padding_length); + padding[padding_length] = '\0'; + + if(BUILD_INFO[i].type == BIT_STRING) + printf(" %s %s : %s\n", k, padding, d?d:"unknown"); + else + printf(" %s %s : %s%s%s%s\n", k, padding, v, + d?" (":"", d?d:"", d?")":""); + } + } +} + +void print_build_info(void) { + populate_packaging_info(); + populate_system_info(); + populate_directories(); + + print_build_info_category_to_console(BIC_PACKAGING, "Packaging"); + print_build_info_category_to_console(BIC_DIRECTORIES, "Default Directories"); + print_build_info_category_to_console(BIC_OPERATING_SYSTEM, "Operating System"); + print_build_info_category_to_console(BIC_HARDWARE, "Hardware"); + print_build_info_category_to_console(BIC_CONTAINER, "Container"); + print_build_info_category_to_console(BIC_FEATURE, "Features"); + print_build_info_category_to_console(BIC_DATABASE, "Database Engines"); + print_build_info_category_to_console(BIC_CONNECTIVITY, "Connectivity Capabilities"); + print_build_info_category_to_console(BIC_LIBS, "Libraries"); + print_build_info_category_to_console(BIC_PLUGINS, "Plugins"); + print_build_info_category_to_console(BIC_EXPORTERS, "Exporters"); + print_build_info_category_to_console(BIC_DEBUG_DEVEL, "Debug/Developer Features"); +} + +void build_info_to_json_object(BUFFER *b) { + populate_packaging_info(); + populate_system_info(); + populate_directories(); + + print_build_info_category_to_json(b, BIC_PACKAGING, "package"); + print_build_info_category_to_json(b, BIC_DIRECTORIES, "directories"); + print_build_info_category_to_json(b, BIC_OPERATING_SYSTEM, "os"); + print_build_info_category_to_json(b, BIC_HARDWARE, "hw"); + print_build_info_category_to_json(b, BIC_CONTAINER, "container"); + print_build_info_category_to_json(b, BIC_FEATURE, "features"); + print_build_info_category_to_json(b, BIC_DATABASE, "databases"); + print_build_info_category_to_json(b, BIC_CONNECTIVITY, "connectivity"); + print_build_info_category_to_json(b, BIC_LIBS, "libs"); + print_build_info_category_to_json(b, BIC_PLUGINS, "plugins"); + print_build_info_category_to_json(b, BIC_EXPORTERS, "exporters"); + print_build_info_category_to_json(b, BIC_DEBUG_DEVEL, "debug-n-devel"); +} + +void print_build_info_json(void) { + populate_packaging_info(); + populate_system_info(); + populate_directories(); + + BUFFER *b = buffer_create(0, NULL); + buffer_json_initialize(b, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + + build_info_to_json_object(b); + + buffer_json_finalize(b); + printf("%s\n", buffer_tostring(b)); + buffer_free(b); +} + +void analytics_build_info(BUFFER *b) { + populate_packaging_info(); + populate_system_info(); + populate_directories(); + + size_t added = 0; + for(size_t i = 0; i < BIB_TERMINATOR ;i++) { + if(BUILD_INFO[i].analytics && BUILD_INFO[i].status) { + + if(added) + buffer_strcat(b, "|"); + + buffer_strcat (b, BUILD_INFO[i].analytics); + added++; + } + } +} + diff --git a/src/daemon/buildinfo.h b/src/daemon/buildinfo.h new file mode 100644 index 000000000..1bb1c9760 --- /dev/null +++ b/src/daemon/buildinfo.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_BUILDINFO_H +#define NETDATA_BUILDINFO_H 1 + +void print_build_info(void); + +void print_build_info_json(void); + +char *get_value_from_key(char *buffer, char *key); + +void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist); + +void build_info_to_json_object(BUFFER *b); + +#endif // NETDATA_BUILDINFO_H diff --git a/src/daemon/commands.c b/src/daemon/commands.c new file mode 100644 index 000000000..70ba11d42 --- /dev/null +++ b/src/daemon/commands.c @@ -0,0 +1,814 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static uv_thread_t thread; +static uv_loop_t* loop; +static uv_async_t async; +static struct completion completion; +static uv_pipe_t server_pipe; + +char cmd_prefix_by_status[] = { + CMD_PREFIX_INFO, + CMD_PREFIX_ERROR, + CMD_PREFIX_ERROR +}; + +static int command_server_initialized = 0; +static int command_thread_error; +static int command_thread_shutdown; +static unsigned clients = 0; + +struct command_context { + /* embedded client pipe structure at address 0 */ + uv_pipe_t client; + + uv_work_t work; + uv_write_t write_req; + cmd_t idx; + char *args; + char *message; + cmd_status_t status; + char command_string[MAX_COMMAND_LENGTH]; + unsigned command_string_size; +}; + +/* Forward declarations */ +static cmd_status_t cmd_help_execute(char *args, char **message); +static cmd_status_t cmd_reload_health_execute(char *args, char **message); +static cmd_status_t cmd_reopen_logs_execute(char *args, char **message); +static cmd_status_t cmd_exit_execute(char *args, char **message); +static cmd_status_t cmd_fatal_execute(char *args, char **message); +static cmd_status_t cmd_reload_claiming_state_execute(char *args, char **message); +static cmd_status_t cmd_reload_labels_execute(char *args, char **message); +static cmd_status_t cmd_read_config_execute(char *args, char **message); +static cmd_status_t cmd_write_config_execute(char *args, char **message); +static cmd_status_t cmd_ping_execute(char *args, char **message); +static cmd_status_t cmd_aclk_state(char *args, char **message); +static cmd_status_t cmd_version(char *args, char **message); +static cmd_status_t cmd_dumpconfig(char *args, char **message); +#ifdef ENABLE_ACLK +static cmd_status_t cmd_remove_node(char *args, char **message); +#endif + +static command_info_t command_info_array[] = { + {"help", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY}, // show help menu + {"reload-health", cmd_reload_health_execute, CMD_TYPE_ORTHOGONAL}, // reload health configuration + {"reopen-logs", cmd_reopen_logs_execute, CMD_TYPE_ORTHOGONAL}, // Close and reopen log files + {"shutdown-agent", cmd_exit_execute, CMD_TYPE_EXCLUSIVE}, // exit cleanly + {"fatal-agent", cmd_fatal_execute, CMD_TYPE_HIGH_PRIORITY}, // exit with fatal error + {"reload-claiming-state", cmd_reload_claiming_state_execute, CMD_TYPE_ORTHOGONAL}, // reload claiming state + {"reload-labels", cmd_reload_labels_execute, CMD_TYPE_ORTHOGONAL}, // reload the labels + {"read-config", cmd_read_config_execute, CMD_TYPE_CONCURRENT}, + {"write-config", cmd_write_config_execute, CMD_TYPE_ORTHOGONAL}, + {"ping", cmd_ping_execute, CMD_TYPE_ORTHOGONAL}, + {"aclk-state", cmd_aclk_state, CMD_TYPE_ORTHOGONAL}, + {"version", cmd_version, CMD_TYPE_ORTHOGONAL}, + {"dumpconfig", cmd_dumpconfig, CMD_TYPE_ORTHOGONAL}, +#ifdef ENABLE_ACLK + {"remove-stale-node", cmd_remove_node, CMD_TYPE_ORTHOGONAL} +#endif +}; + +/* Mutexes for commands of type CMD_TYPE_ORTHOGONAL */ +static uv_mutex_t command_lock_array[CMD_TOTAL_COMMANDS]; +/* Commands of type CMD_TYPE_EXCLUSIVE are writers */ +static uv_rwlock_t exclusive_rwlock; +/* + * Locking order: + * 1. exclusive_rwlock + * 2. command_lock_array[] + */ + +/* Forward declarations */ +static void cmd_lock_exclusive(unsigned index); +static void cmd_lock_orthogonal(unsigned index); +static void cmd_lock_idempotent(unsigned index); +static void cmd_lock_high_priority(unsigned index); + +static command_lock_t *cmd_lock_by_type[] = { + cmd_lock_exclusive, + cmd_lock_orthogonal, + cmd_lock_idempotent, + cmd_lock_high_priority +}; + +/* Forward declarations */ +static void cmd_unlock_exclusive(unsigned index); +static void cmd_unlock_orthogonal(unsigned index); +static void cmd_unlock_idempotent(unsigned index); +static void cmd_unlock_high_priority(unsigned index); + +static command_lock_t *cmd_unlock_by_type[] = { + cmd_unlock_exclusive, + cmd_unlock_orthogonal, + cmd_unlock_idempotent, + cmd_unlock_high_priority +}; + +static cmd_status_t cmd_help_execute(char *args, char **message) +{ + (void)args; + + *message = mallocz(MAX_COMMAND_LENGTH); + strncpyz(*message, + "\nThe commands are (arguments are in brackets):\n" + "help\n" + " Show this help menu.\n" + "reload-health\n" + " Reload health configuration.\n" + "reload-labels\n" + " Reload all labels.\n" + "save-database\n" + " Save internal DB to disk for memory mode save.\n" + "reopen-logs\n" + " Close and reopen log files.\n" + "shutdown-agent\n" + " Cleanup and exit the netdata agent.\n" + "fatal-agent\n" + " Log the state and halt the netdata agent.\n" + "reload-claiming-state\n" + " Reload agent claiming state from disk.\n" + "ping\n" + " Return with 'pong' if agent is alive.\n" + "aclk-state [json]\n" + " Returns current state of ACLK and Cloud connection. (optionally in json).\n" + "dumpconfig\n" + " Returns the current netdata.conf on stdout.\n" +#ifdef ENABLE_ACLK + "remove-stale-node node_id|machine_guid\n" + " Unregisters and removes a node from the cloud.\n" +#endif + "version\n" + " Returns the netdata version.\n", + MAX_COMMAND_LENGTH - 1); + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reload_health_execute(char *args, char **message) +{ + (void)args; + (void)message; + + nd_log_limits_unlimited(); + netdata_log_info("COMMAND: Reloading HEALTH configuration."); + health_plugin_reload(); + nd_log_limits_reset(); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reopen_logs_execute(char *args, char **message) +{ + (void)args; + (void)message; + + nd_log_limits_unlimited(); + nd_log_reopen_log_files(); + nd_log_limits_reset(); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_exit_execute(char *args, char **message) +{ + (void)args; + (void)message; + + nd_log_limits_unlimited(); + netdata_log_info("COMMAND: Cleaning up to exit."); + netdata_cleanup_and_exit(0, NULL, NULL, NULL); + exit(0); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_fatal_execute(char *args, char **message) +{ + (void)args; + (void)message; + + fatal("COMMAND: netdata now exits."); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reload_claiming_state_execute(char *args, char **message) +{ + (void)args; + (void)message; +#if defined(DISABLE_CLOUD) || !defined(ENABLE_ACLK) + netdata_log_info("The claiming feature has been explicitly disabled"); + *message = strdupz("This agent cannot be claimed, it was built without support for Cloud"); + return CMD_STATUS_FAILURE; +#endif + netdata_log_info("COMMAND: Reloading Agent Claiming configuration."); + claim_reload_all(); + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reload_labels_execute(char *args, char **message) +{ + (void)args; + netdata_log_info("COMMAND: reloading host labels."); + reload_host_labels(); + aclk_queue_node_info(localhost, 1); + + BUFFER *wb = buffer_create(10, NULL); + rrdlabels_log_to_buffer(localhost->rrdlabels, wb); + (*message)=strdupz(buffer_tostring(wb)); + buffer_free(wb); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_read_config_execute(char *args, char **message) +{ + size_t n = strlen(args); + char *separator = strchr(args,'|'); + if (separator == NULL) + return CMD_STATUS_FAILURE; + char *separator2 = strchr(separator + 1,'|'); + if (separator2 == NULL) + return CMD_STATUS_FAILURE; + + char *temp = callocz(n + 1, 1); + strcpy(temp, args); + size_t offset = separator - args; + temp[offset] = 0; + size_t offset2 = separator2 - args; + temp[offset2] = 0; + + const char *conf_file = temp; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + + char *value = appconfig_get(tmp_config, temp + offset + 1, temp + offset2 + 1, NULL); + if (value == NULL) + { + netdata_log_error("Cannot execute read-config conf_file=%s section=%s / key=%s because no value set", + conf_file, + temp + offset + 1, + temp + offset2 + 1); + freez(temp); + return CMD_STATUS_FAILURE; + } + else + { + (*message) = strdupz(value); + freez(temp); + return CMD_STATUS_SUCCESS; + } + +} + +static cmd_status_t cmd_write_config_execute(char *args, char **message) +{ + UNUSED(message); + netdata_log_info("write-config %s", args); + size_t n = strlen(args); + char *separator = strchr(args,'|'); + if (separator == NULL) + return CMD_STATUS_FAILURE; + char *separator2 = strchr(separator + 1,'|'); + if (separator2 == NULL) + return CMD_STATUS_FAILURE; + char *separator3 = strchr(separator2 + 1,'|'); + if (separator3 == NULL) + return CMD_STATUS_FAILURE; + char *temp = callocz(n + 1, 1); + strcpy(temp, args); + size_t offset = separator - args; + temp[offset] = 0; + size_t offset2 = separator2 - args; + temp[offset2] = 0; + size_t offset3 = separator3 - args; + temp[offset3] = 0; + + const char *conf_file = temp; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + + appconfig_set(tmp_config, temp + offset + 1, temp + offset2 + 1, temp + offset3 + 1); + netdata_log_info("write-config conf_file=%s section=%s key=%s value=%s",conf_file, temp + offset + 1, temp + offset2 + 1, + temp + offset3 + 1); + freez(temp); + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_ping_execute(char *args, char **message) +{ + (void)args; + + *message = strdupz("pong"); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_aclk_state(char *args, char **message) +{ + netdata_log_info("COMMAND: Reopening aclk/cloud state."); + if (strstr(args, "json")) + *message = aclk_state_json(); + else + *message = aclk_state(); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_version(char *args, char **message) +{ + (void)args; + + char version[MAX_COMMAND_LENGTH]; + snprintfz(version, MAX_COMMAND_LENGTH -1, "%s %s", program_name, NETDATA_VERSION); + + *message = strdupz(version); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_dumpconfig(char *args, char **message) +{ + (void)args; + + BUFFER *wb = buffer_create(1024, NULL); + config_generate(wb, 0); + *message = strdupz(buffer_tostring(wb)); + buffer_free(wb); + return CMD_STATUS_SUCCESS; +} + +#ifdef ENABLE_ACLK +static cmd_status_t cmd_remove_node(char *args, char **message) +{ + (void)args; + + BUFFER *wb = buffer_create(1024, NULL); + if (strlen(args) == 0) { + buffer_sprintf(wb, "Please specify a machine or node UUID"); + goto done; + } + + RRDHOST *host = NULL; + host = rrdhost_find_by_guid(args); + if (!host) + host = find_host_by_node_id(args); + + if (!host) + buffer_sprintf(wb, "Node with machine or node UUID \"%s\" not found", args); + else { + + if (host == localhost) { + buffer_sprintf(wb, "You cannot unregister the parent node"); + goto done; + } + + if (rrdhost_is_online(host)) { + buffer_sprintf(wb, "Cannot unregister a live node"); + goto done; + } + + if (!rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST)) { + rrdhost_option_set(host, RRDHOST_OPTION_EPHEMERAL_HOST); + sql_set_host_label(&host->host_uuid, "_is_ephemeral", "true"); + aclk_host_state_update(host, 0, 0); + unregister_node(host->machine_guid); + freez(host->node_id); + host->node_id = NULL; + buffer_sprintf(wb, "Unregistering node with machine guid %s, hostname = %s", host->machine_guid, rrdhost_hostname(host)); + } + else + buffer_sprintf(wb, "Node with machine guid %s, hostname = %s is already unregistered", host->machine_guid, rrdhost_hostname(host)); + } + +done: + *message = strdupz(buffer_tostring(wb)); + buffer_free(wb); + return CMD_STATUS_SUCCESS; +} +#endif + +static void cmd_lock_exclusive(unsigned index) +{ + (void)index; + + uv_rwlock_wrlock(&exclusive_rwlock); +} + +static void cmd_lock_orthogonal(unsigned index) +{ + uv_rwlock_rdlock(&exclusive_rwlock); + uv_mutex_lock(&command_lock_array[index]); +} + +static void cmd_lock_idempotent(unsigned index) +{ + (void)index; + + uv_rwlock_rdlock(&exclusive_rwlock); +} + +static void cmd_lock_high_priority(unsigned index) +{ + (void)index; +} + +static void cmd_unlock_exclusive(unsigned index) +{ + (void)index; + + uv_rwlock_wrunlock(&exclusive_rwlock); +} + +static void cmd_unlock_orthogonal(unsigned index) +{ + uv_rwlock_rdunlock(&exclusive_rwlock); + uv_mutex_unlock(&command_lock_array[index]); +} + +static void cmd_unlock_idempotent(unsigned index) +{ + (void)index; + + uv_rwlock_rdunlock(&exclusive_rwlock); +} + +static void cmd_unlock_high_priority(unsigned index) +{ + (void)index; +} + +static void pipe_close_cb(uv_handle_t* handle) +{ + /* Also frees command context */ + freez(handle); +} + +static void pipe_write_cb(uv_write_t* req, int status) +{ + (void)status; + uv_pipe_t *client = req->data; + + uv_close((uv_handle_t *)client, pipe_close_cb); + --clients; + buffer_free(client->data); + // netdata_log_info("Command Clients = %u", clients); +} + +static inline void add_char_to_command_reply(BUFFER *reply_string, unsigned *reply_string_size, char character) +{ + buffer_fast_charcat(reply_string, character); + *reply_string_size +=1; +} + +static inline void add_string_to_command_reply(BUFFER *reply_string, unsigned *reply_string_size, char *str) +{ + unsigned len; + + len = strlen(str); + buffer_fast_strcat(reply_string, str, len); + *reply_string_size += len; +} + +static void send_command_reply(struct command_context *cmd_ctx, cmd_status_t status, char *message) +{ + int ret; + BUFFER *reply_string = buffer_create(128, NULL); + + char exit_status_string[MAX_EXIT_STATUS_LENGTH + 1] = {'\0', }; + unsigned reply_string_size = 0; + uv_buf_t write_buf; + uv_stream_t *client = (uv_stream_t *)(uv_pipe_t *)cmd_ctx; + + snprintfz(exit_status_string, MAX_EXIT_STATUS_LENGTH, "%u", status); + add_char_to_command_reply(reply_string, &reply_string_size, CMD_PREFIX_EXIT_CODE); + add_string_to_command_reply(reply_string, &reply_string_size, exit_status_string); + add_char_to_command_reply(reply_string, &reply_string_size, '\0'); + + if (message) { + add_char_to_command_reply(reply_string, &reply_string_size, cmd_prefix_by_status[status]); + add_string_to_command_reply(reply_string, &reply_string_size, message); + } + + cmd_ctx->write_req.data = client; + client->data = reply_string; + write_buf.base = reply_string->buffer; + write_buf.len = reply_string_size; + ret = uv_write(&cmd_ctx->write_req, (uv_stream_t *)client, &write_buf, 1, pipe_write_cb); + if (ret) { + netdata_log_error("uv_write(): %s", uv_strerror(ret)); + } +} + +cmd_status_t execute_command(cmd_t idx, char *args, char **message) +{ + cmd_status_t status; + cmd_type_t type = command_info_array[idx].type; + + cmd_lock_by_type[type](idx); + status = command_info_array[idx].func(args, message); + cmd_unlock_by_type[type](idx); + + return status; +} + +static void after_schedule_command(uv_work_t *req, int status) +{ + struct command_context *cmd_ctx = req->data; + + (void)status; + + send_command_reply(cmd_ctx, cmd_ctx->status, cmd_ctx->message); + if (cmd_ctx->message) + freez(cmd_ctx->message); +} + +static void schedule_command(uv_work_t *req) +{ + register_libuv_worker_jobs(); + worker_is_busy(UV_EVENT_SCHEDULE_CMD); + + struct command_context *cmd_ctx = req->data; + cmd_ctx->status = execute_command(cmd_ctx->idx, cmd_ctx->args, &cmd_ctx->message); + + worker_is_idle(); +} + +/* This will alter the state of the command_info_array.cmd_str +*/ +static void parse_commands(struct command_context *cmd_ctx) +{ + char *message = NULL, *pos, *lstrip, *rstrip; + cmd_t i; + cmd_status_t status; + + status = CMD_STATUS_FAILURE; + + /* Skip white-space characters */ + for (pos = cmd_ctx->command_string ; isspace((uint8_t)*pos) && ('\0' != *pos) ; ++pos) ; + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { + if (!strncmp(pos, command_info_array[i].cmd_str, strlen(command_info_array[i].cmd_str))) { + if (CMD_EXIT == i) { + /* musl C does not like libuv workqueues calling exit() */ + execute_command(CMD_EXIT, NULL, NULL); + } + for (lstrip=pos + strlen(command_info_array[i].cmd_str); isspace((uint8_t)*lstrip) && ('\0' != *lstrip); ++lstrip) ; + for (rstrip=lstrip+strlen(lstrip)-1; rstrip>lstrip && isspace((uint8_t)*rstrip); *(rstrip--) = 0 ) ; + + cmd_ctx->work.data = cmd_ctx; + cmd_ctx->idx = i; + cmd_ctx->args = lstrip; + cmd_ctx->message = NULL; + + fatal_assert(0 == uv_queue_work(loop, &cmd_ctx->work, schedule_command, after_schedule_command)); + break; + } + } + if (CMD_TOTAL_COMMANDS == i) { + /* no command found */ + message = strdupz("Illegal command. Please type \"help\" for instructions."); + send_command_reply(cmd_ctx, status, message); + freez(message); + } +} + +static void pipe_read_cb(uv_stream_t *client, ssize_t nread, const uv_buf_t *buf) +{ + struct command_context *cmd_ctx = (struct command_context *)client; + + if (0 == nread) { + netdata_log_info("%s: Zero bytes read by command pipe.", __func__); + } else if (UV_EOF == nread) { + netdata_log_info("EOF found in command pipe."); + parse_commands(cmd_ctx); + } else if (nread < 0) { + netdata_log_error("%s: %s", __func__, uv_strerror(nread)); + } + + if (nread < 0) { /* stop stream due to EOF or error */ + (void)uv_read_stop((uv_stream_t *)client); + } else if (nread) { + size_t to_copy; + + to_copy = MIN((size_t) nread, MAX_COMMAND_LENGTH - 1 - cmd_ctx->command_string_size); + memcpy(cmd_ctx->command_string + cmd_ctx->command_string_size, buf->base, to_copy); + cmd_ctx->command_string_size += to_copy; + cmd_ctx->command_string[cmd_ctx->command_string_size] = '\0'; + } + if (buf && buf->len) { + freez(buf->base); + } + + if (nread < 0 && UV_EOF != nread) { + uv_close((uv_handle_t *)client, pipe_close_cb); + --clients; + // netdata_log_info("Command Clients = %u", clients); + } +} + +static void alloc_cb(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf) +{ + (void)handle; + + buf->base = mallocz(suggested_size); + buf->len = suggested_size; +} + +static void connection_cb(uv_stream_t *server, int status) +{ + int ret; + uv_pipe_t *client; + struct command_context *cmd_ctx; + fatal_assert(status == 0); + + /* combined allocation of client pipe and command context */ + cmd_ctx = mallocz(sizeof(*cmd_ctx)); + client = (uv_pipe_t *)cmd_ctx; + ret = uv_pipe_init(server->loop, client, 1); + if (ret) { + netdata_log_error("uv_pipe_init(): %s", uv_strerror(ret)); + freez(cmd_ctx); + return; + } + ret = uv_accept(server, (uv_stream_t *)client); + if (ret) { + netdata_log_error("uv_accept(): %s", uv_strerror(ret)); + uv_close((uv_handle_t *)client, pipe_close_cb); + return; + } + + ++clients; + // netdata_log_info("Command Clients = %u", clients); + /* Start parsing a new command */ + cmd_ctx->command_string_size = 0; + cmd_ctx->command_string[0] = '\0'; + + ret = uv_read_start((uv_stream_t*)client, alloc_cb, pipe_read_cb); + if (ret) { + netdata_log_error("uv_read_start(): %s", uv_strerror(ret)); + uv_close((uv_handle_t *)client, pipe_close_cb); + --clients; + // netdata_log_info("Command Clients = %u", clients); + return; + } +} + +static void async_cb(uv_async_t *handle) +{ + uv_stop(handle->loop); +} + +static void command_thread(void *arg) { + uv_thread_set_name_np("DAEMON_COMMAND"); + + int ret; + uv_fs_t req; + + (void) arg; + loop = mallocz(sizeof(uv_loop_t)); + ret = uv_loop_init(loop); + if (ret) { + netdata_log_error("uv_loop_init(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_loop_init; + } + loop->data = NULL; + + ret = uv_async_init(loop, &async, async_cb); + if (ret) { + netdata_log_error("uv_async_init(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_async_init; + } + async.data = NULL; + + ret = uv_pipe_init(loop, &server_pipe, 0); + if (ret) { + netdata_log_error("uv_pipe_init(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_pipe_init; + } + + const char *pipename = daemon_pipename(); + + (void)uv_fs_unlink(loop, &req, pipename, NULL); + uv_fs_req_cleanup(&req); + ret = uv_pipe_bind(&server_pipe, pipename); + if (ret) { + netdata_log_error("uv_pipe_bind(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_pipe_bind; + } + + ret = uv_listen((uv_stream_t *)&server_pipe, SOMAXCONN, connection_cb); + if (ret) { + /* Fallback to backlog of 1 */ + netdata_log_info("uv_listen() failed with backlog = %d, falling back to backlog = 1.", SOMAXCONN); + ret = uv_listen((uv_stream_t *)&server_pipe, 1, connection_cb); + } + if (ret) { + netdata_log_error("uv_listen(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_uv_listen; + } + + command_thread_error = 0; + command_thread_shutdown = 0; + /* wake up initialization thread */ + completion_mark_complete(&completion); + + while (command_thread_shutdown == 0) { + uv_run(loop, UV_RUN_DEFAULT); + } + /* cleanup operations of the event loop */ + netdata_log_info("Shutting down command event loop."); + uv_close((uv_handle_t *)&async, NULL); + uv_close((uv_handle_t*)&server_pipe, NULL); + uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */ + + netdata_log_info("Shutting down command loop complete."); + fatal_assert(0 == uv_loop_close(loop)); + freez(loop); + + return; + +error_after_uv_listen: +error_after_pipe_bind: + uv_close((uv_handle_t*)&server_pipe, NULL); +error_after_pipe_init: + uv_close((uv_handle_t *)&async, NULL); +error_after_async_init: + uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */ + fatal_assert(0 == uv_loop_close(loop)); +error_after_loop_init: + freez(loop); + + /* wake up initialization thread */ + completion_mark_complete(&completion); +} + +static void sanity_check(void) +{ + /* The size of command_info_array must be CMD_TOTAL_COMMANDS elements */ + BUILD_BUG_ON(CMD_TOTAL_COMMANDS != sizeof(command_info_array) / sizeof(command_info_array[0])); +} + +void commands_init(void) +{ + cmd_t i; + int error; + + sanity_check(); + if (command_server_initialized) + return; + + netdata_log_info("Initializing command server."); + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { + fatal_assert(0 == uv_mutex_init(&command_lock_array[i])); + } + fatal_assert(0 == uv_rwlock_init(&exclusive_rwlock)); + + completion_init(&completion); + error = uv_thread_create(&thread, command_thread, NULL); + if (error) { + netdata_log_error("uv_thread_create(): %s", uv_strerror(error)); + goto after_error; + } + /* wait for worker thread to initialize */ + completion_wait_for(&completion); + completion_destroy(&completion); + + if (command_thread_error) { + error = uv_thread_join(&thread); + if (error) { + netdata_log_error("uv_thread_create(): %s", uv_strerror(error)); + } + goto after_error; + } + + command_server_initialized = 1; + return; + +after_error: + netdata_log_error("Failed to initialize command server. The netdata cli tool will be unable to send commands."); +} + +void commands_exit(void) +{ + cmd_t i; + + if (!command_server_initialized) + return; + + command_thread_shutdown = 1; + netdata_log_info("Shutting down command server."); + /* wake up event loop */ + fatal_assert(0 == uv_async_send(&async)); + fatal_assert(0 == uv_thread_join(&thread)); + + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { + uv_mutex_destroy(&command_lock_array[i]); + } + uv_rwlock_destroy(&exclusive_rwlock); + netdata_log_info("Command server has stopped."); + command_server_initialized = 0; +} diff --git a/src/daemon/commands.h b/src/daemon/commands.h new file mode 100644 index 000000000..14c2ec49e --- /dev/null +++ b/src/daemon/commands.h @@ -0,0 +1,80 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COMMANDS_H +#define NETDATA_COMMANDS_H 1 + +#define MAX_COMMAND_LENGTH (8192) +#define MAX_EXIT_STATUS_LENGTH 23 /* Can't ever be bigger than "X-18446744073709551616" */ + +typedef enum cmd { + CMD_HELP = 0, + CMD_RELOAD_HEALTH, + CMD_REOPEN_LOGS, + CMD_EXIT, + CMD_FATAL, + CMD_RELOAD_CLAIMING_STATE, + CMD_RELOAD_LABELS, + CMD_READ_CONFIG, + CMD_WRITE_CONFIG, + CMD_PING, + CMD_ACLK_STATE, + CMD_VERSION, + CMD_DUMPCONFIG, +#ifdef ENABLE_ACLK + CMD_REMOVE_NODE, +#endif + CMD_TOTAL_COMMANDS +} cmd_t; + +typedef enum cmd_status { + CMD_STATUS_SUCCESS = 0, + CMD_STATUS_FAILURE, + CMD_STATUS_BUSY +} cmd_status_t; + +#define CMD_PREFIX_INFO 'O' /* Following string should go to cli stdout */ +#define CMD_PREFIX_ERROR 'E' /* Following string should go to cli stderr */ +#define CMD_PREFIX_EXIT_CODE 'X' /* Following string is cli integer exit code */ + +typedef enum cmd_type { + /* + * No other command is allowed to run at the same time (except for CMD_TYPE_HIGH_PRIORITY). + */ + CMD_TYPE_EXCLUSIVE = 0, + /* + * Other commands are allowed to run concurrently (except for CMD_TYPE_EXCLUSIVE) but calls to this command are + * serialized. + */ + CMD_TYPE_ORTHOGONAL, + /* + * Other commands are allowed to run concurrently (except for CMD_TYPE_EXCLUSIVE) as are calls to this command. + */ + CMD_TYPE_CONCURRENT, + /* + * Those commands are always allowed to run. + */ + CMD_TYPE_HIGH_PRIORITY +} cmd_type_t; + +/** + * Executes a command and returns the status. + * + * @param args a string that may contain additional parameters to be parsed + * @param message allocate and return a message if need be (up to MAX_COMMAND_LENGTH bytes) + * @return CMD_FAILURE or CMD_SUCCESS + */ +typedef cmd_status_t (command_action_t) (char *args, char **message); + +typedef struct command_info { + char *cmd_str; // the command string + command_action_t *func; // the function that executes the command + cmd_type_t type; // Concurrency control information for the command +} command_info_t; + +typedef void (command_lock_t) (unsigned index); + +cmd_status_t execute_command(cmd_t idx, char *args, char **message); +void commands_init(void); +void commands_exit(void); + +#endif //NETDATA_COMMANDS_H diff --git a/src/daemon/common.c b/src/daemon/common.c new file mode 100644 index 000000000..a64d53585 --- /dev/null +++ b/src/daemon/common.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +char *netdata_configured_hostname = NULL; +char *netdata_configured_user_config_dir = CONFIG_DIR; +char *netdata_configured_stock_config_dir = LIBCONFIG_DIR; +char *netdata_configured_log_dir = LOG_DIR; +char *netdata_configured_primary_plugins_dir = PLUGINS_DIR; +char *netdata_configured_web_dir = WEB_DIR; +char *netdata_configured_cache_dir = CACHE_DIR; +char *netdata_configured_varlib_dir = VARLIB_DIR; +char *netdata_configured_lock_dir = VARLIB_DIR "/lock"; +char *netdata_configured_home_dir = VARLIB_DIR; +char *netdata_configured_host_prefix = NULL; +char *netdata_configured_timezone = NULL; +char *netdata_configured_abbrev_timezone = NULL; +int32_t netdata_configured_utc_offset = 0; + +bool netdata_ready = false; + +#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK ) +int netdata_cloud_enabled = CONFIG_BOOLEAN_NO; +#else +int netdata_cloud_enabled = CONFIG_BOOLEAN_AUTO; +#endif + +long get_netdata_cpus(void) { + static long processors = 0; + + if(processors) + return processors; + + long cores_proc_stat = os_get_system_cpus_cached(false, true); + long cores_cpuset_v1 = (long)os_read_cpuset_cpus("/sys/fs/cgroup/cpuset/cpuset.cpus", cores_proc_stat); + long cores_cpuset_v2 = (long)os_read_cpuset_cpus("/sys/fs/cgroup/cpuset.cpus", cores_proc_stat); + + if(cores_cpuset_v2) + processors = cores_cpuset_v2; + else if(cores_cpuset_v1) + processors = cores_cpuset_v1; + else + processors = cores_proc_stat; + + long cores_user_configured = config_get_number(CONFIG_SECTION_GLOBAL, "cpu cores", processors); + + errno = 0; + internal_error(true, + "System CPUs: %ld, (" + "system: %ld, cgroups cpuset v1: %ld, cgroups cpuset v2: %ld, netdata.conf: %ld" + ")" + , processors + , cores_proc_stat + , cores_cpuset_v1 + , cores_cpuset_v2 + , cores_user_configured + ); + + processors = cores_user_configured; + + if(processors < 1) + processors = 1; + + return processors; +} + +const char *cloud_status_to_string(CLOUD_STATUS status) { + switch(status) { + default: + case CLOUD_STATUS_UNAVAILABLE: + return "unavailable"; + + case CLOUD_STATUS_AVAILABLE: + return "available"; + + case CLOUD_STATUS_DISABLED: + return "disabled"; + + case CLOUD_STATUS_BANNED: + return "banned"; + + case CLOUD_STATUS_OFFLINE: + return "offline"; + + case CLOUD_STATUS_ONLINE: + return "online"; + } +} + +CLOUD_STATUS cloud_status(void) { +#ifdef ENABLE_ACLK + if(aclk_disable_runtime) + return CLOUD_STATUS_BANNED; + + if(aclk_connected) + return CLOUD_STATUS_ONLINE; + + if(netdata_cloud_enabled == CONFIG_BOOLEAN_YES) { + char *agent_id = get_agent_claimid(); + bool claimed = agent_id != NULL; + freez(agent_id); + + if(claimed) + return CLOUD_STATUS_OFFLINE; + } + + if(netdata_cloud_enabled != CONFIG_BOOLEAN_NO) + return CLOUD_STATUS_AVAILABLE; + + return CLOUD_STATUS_DISABLED; +#else + return CLOUD_STATUS_UNAVAILABLE; +#endif +} + +time_t cloud_last_change(void) { +#ifdef ENABLE_ACLK + time_t ret = MAX(last_conn_time_mqtt, last_disconnect_time); + if(!ret) ret = netdata_start_time; + return ret; +#else + return netdata_start_time; +#endif +} + +time_t cloud_next_connection_attempt(void) { +#ifdef ENABLE_ACLK + return next_connection_attempt; +#else + return 0; +#endif +} + +size_t cloud_connection_id(void) { +#ifdef ENABLE_ACLK + return aclk_connection_counter; +#else + return 0; +#endif +} + +const char *cloud_offline_reason() { +#ifdef ENABLE_ACLK + if(!netdata_cloud_enabled) + return "disabled"; + + if(aclk_disable_runtime) + return "banned"; + + return aclk_status_to_string(); +#else + return "disabled"; +#endif +} + +const char *cloud_base_url() { +#ifdef ENABLE_ACLK + return aclk_cloud_base_url; +#else + return NULL; +#endif +} + +CLOUD_STATUS buffer_json_cloud_status(BUFFER *wb, time_t now_s) { + CLOUD_STATUS status = cloud_status(); + + buffer_json_member_add_object(wb, "cloud"); + { + size_t id = cloud_connection_id(); + time_t last_change = cloud_last_change(); + time_t next_connect = cloud_next_connection_attempt(); + buffer_json_member_add_uint64(wb, "id", id); + buffer_json_member_add_string(wb, "status", cloud_status_to_string(status)); + buffer_json_member_add_time_t(wb, "since", last_change); + buffer_json_member_add_time_t(wb, "age", now_s - last_change); + + if (status != CLOUD_STATUS_ONLINE) + buffer_json_member_add_string(wb, "reason", cloud_offline_reason()); + + if (status == CLOUD_STATUS_OFFLINE && next_connect > now_s) { + buffer_json_member_add_time_t(wb, "next_check", next_connect); + buffer_json_member_add_time_t(wb, "next_in", next_connect - now_s); + } + + if (cloud_base_url()) + buffer_json_member_add_string(wb, "url", cloud_base_url()); + + char *claim_id = get_agent_claimid(); + if(claim_id) { + buffer_json_member_add_string(wb, "claim_id", claim_id); + freez(claim_id); + } + } + buffer_json_object_close(wb); // cloud + + return status; +} diff --git a/src/daemon/common.h b/src/daemon/common.h new file mode 100644 index 000000000..102ec81e2 --- /dev/null +++ b/src/daemon/common.h @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COMMON_H +#define NETDATA_COMMON_H 1 + +#include "libnetdata/libnetdata.h" +#include "event_loop.h" + +// ---------------------------------------------------------------------------- +// shortcuts for the default netdata configuration + +#define config_load(filename, overwrite_used, section) appconfig_load(&netdata_config, filename, overwrite_used, section) +#define config_get(section, name, default_value) appconfig_get(&netdata_config, section, name, default_value) +#define config_get_number(section, name, value) appconfig_get_number(&netdata_config, section, name, value) +#define config_get_float(section, name, value) appconfig_get_float(&netdata_config, section, name, value) +#define config_get_boolean(section, name, value) appconfig_get_boolean(&netdata_config, section, name, value) +#define config_get_boolean_ondemand(section, name, value) appconfig_get_boolean_ondemand(&netdata_config, section, name, value) +#define config_get_duration(section, name, value) appconfig_get_duration(&netdata_config, section, name, value) + +#define config_set(section, name, default_value) appconfig_set(&netdata_config, section, name, default_value) +#define config_set_default(section, name, value) appconfig_set_default(&netdata_config, section, name, value) +#define config_set_number(section, name, value) appconfig_set_number(&netdata_config, section, name, value) +#define config_set_float(section, name, value) appconfig_set_float(&netdata_config, section, name, value) +#define config_set_boolean(section, name, value) appconfig_set_boolean(&netdata_config, section, name, value) + +#define config_exists(section, name) appconfig_exists(&netdata_config, section, name) +#define config_move(section_old, name_old, section_new, name_new) appconfig_move(&netdata_config, section_old, name_old, section_new, name_new) + +#define config_generate(buffer, only_changed) appconfig_generate(&netdata_config, buffer, only_changed) + +#define config_section_destroy(section) appconfig_section_destroy_non_loaded(&netdata_config, section) +#define config_section_option_destroy(section, name) appconfig_section_option_destroy_non_loaded(&netdata_config, section, name) + +// ---------------------------------------------------------------------------- +// netdata include files + +#include "daemon/config/dyncfg.h" + +#include "global_statistics.h" + +// health monitoring and alarm notifications +#include "health/health.h" + +// the netdata database +#include "database/rrd.h" + +// the netdata webserver(s) +#include "web/server/web_server.h" + +// the new h2o based netdata webserver +#ifdef ENABLE_H2O +#include "web/server/h2o/http_server.h" +#endif + +// streaming metrics between netdata servers +#include "streaming/rrdpush.h" + + +// anomaly detection +#include "ml/ml.h" + +// the netdata registry +// the registry is actually an API feature +#include "registry/registry.h" + +// exporting engine for archiving the metrics +#include "exporting/exporting_engine.h" + +// the netdata API +#include "web/server/web_client.h" +#include "web/rtc/webrtc.h" + +// all data collection plugins +#include "collectors/all.h" + +// netdata unit tests +#include "unit_test.h" + +// netdata agent claiming +#include "claim/claim.h" + +// netdata agent cloud link +#include "aclk/aclk.h" + +// global GUID map functions + +// netdata agent spawn server +#include "spawn/spawn.h" + +// the netdata daemon +#include "daemon.h" +#include "main.h" +#include "static_threads.h" +#include "signals.h" +#include "commands.h" +#include "pipename.h" +#include "analytics.h" + +// global netdata daemon variables +extern char *netdata_configured_hostname; +extern char *netdata_configured_user_config_dir; +extern char *netdata_configured_stock_config_dir; +extern char *netdata_configured_log_dir; +extern char *netdata_configured_primary_plugins_dir; +extern char *netdata_configured_web_dir; +extern char *netdata_configured_cache_dir; +extern char *netdata_configured_varlib_dir; +extern char *netdata_configured_lock_dir; +extern char *netdata_configured_home_dir; +extern char *netdata_configured_host_prefix; +extern char *netdata_configured_timezone; +extern char *netdata_configured_abbrev_timezone; +extern int32_t netdata_configured_utc_offset; +extern int netdata_anonymous_statistics_enabled; + +extern bool netdata_ready; +extern int netdata_cloud_enabled; + +extern time_t netdata_start_time; + +long get_netdata_cpus(void); + +typedef enum __attribute__((packed)) { + CLOUD_STATUS_UNAVAILABLE = 0, // cloud and aclk functionality is not available on this agent + CLOUD_STATUS_AVAILABLE, // cloud and aclk functionality is available, but the agent is not claimed + CLOUD_STATUS_DISABLED, // cloud and aclk functionality is available, but it is disabled + CLOUD_STATUS_BANNED, // the agent has been banned from cloud + CLOUD_STATUS_OFFLINE, // the agent tries to connect to cloud, but cannot do it + CLOUD_STATUS_ONLINE, // the agent is connected to cloud +} CLOUD_STATUS; + +const char *cloud_status_to_string(CLOUD_STATUS status); +CLOUD_STATUS cloud_status(void); +time_t cloud_last_change(void); +time_t cloud_next_connection_attempt(void); +size_t cloud_connection_id(void); +const char *cloud_offline_reason(void); +const char *cloud_base_url(void); +CLOUD_STATUS buffer_json_cloud_status(BUFFER *wb, time_t now_s); + +#endif /* NETDATA_COMMON_H */ diff --git a/src/daemon/config/README.md b/src/daemon/config/README.md new file mode 100644 index 000000000..c59f55620 --- /dev/null +++ b/src/daemon/config/README.md @@ -0,0 +1,231 @@ + + +# Daemon configuration + +
+The daemon configuration file is read from /etc/netdata/netdata.conf. + +Depending on your installation method, Netdata will have been installed either directly under `/`, or +under `/opt/netdata`. The paths mentioned here and in the documentation in general assume that your installation is +under `/`. If it is not, you will find the exact same paths under `/opt/netdata` as well. (i.e. `/etc/netdata` will +be `/opt/netdata/etc/netdata`). + +
+ +This config file **is not needed by default**. Netdata works fine out of the box without it. But it does allow you to +adapt the general behavior of Netdata, in great detail. You can find all these settings, with their default values, by +accessing the URL `https://netdata.server.hostname:19999/netdata.conf`. For example check the configuration file +of [netdata.firehol.org](http://netdata.firehol.org/netdata.conf). HTTP access to this file is limited by default to +[private IPs](https://en.wikipedia.org/wiki/Private_network), via +the [web server access lists](/src/web/server/README.md#access-lists). + +`netdata.conf` has sections stated with `[section]`. You will see the following sections: + +1. `[global]` to [configure](#global-section-options) the [Netdata daemon](/src/daemon/README.md). +2. `[db]` to [configure](#db-section-options) the database of Netdata. +3. `[directories]` to [configure](#directories-section-options) the directories used by Netdata. +4. `[logs]` to [configure](#logs-section-options) the Netdata logging. +5. `[environment variables]` to [configure](#environment-variables-section-options) the environment variables used + Netdata. +6. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](/src/daemon/README.md) SQLite settings. +7. `[ml]` to configure settings for [machine learning](/src/ml/README.md). +8. `[health]` to [configure](#health-section-options) general settings for [health monitoring](/src/health/README.md). +9. `[web]` to [configure the web server](/src/web/server/README.md). +10. `[registry]` for the [Netdata registry](/src/registry/README.md). +11. `[global statistics]` for the [Netdata registry](/src/registry/README.md). +12. `[statsd]` for the general settings of the [stats.d.plugin](/src/collectors/statsd.plugin/README.md). +13. `[plugins]` to [configure](#plugins-section-options) which [collectors](/src/collectors/README.md) to use and PATH + settings. +14. `[plugin:NAME]` sections for each collector plugin, under the + comment [Per plugin configuration](#per-plugin-configuration). + +The configuration file is a `name = value` dictionary. Netdata will not complain if you set options unknown to it. When +you check the running configuration by accessing the URL `/netdata.conf` on your Netdata server, Netdata will add a +comment on settings it does not currently use. + +## Applying changes + +After `netdata.conf` has been modified, Netdata needs to be [restarted](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for +changes to apply: + +```bash +sudo systemctl restart netdata +``` + +If the above does not work, try the following: + +```bash +sudo killall netdata; sleep 10; sudo netdata +``` + +Please note that your data history will be lost if you have modified `history` parameter in section `[global]`. + +## Sections + +### [global] section options + +| setting | default | info | +|:----------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| process scheduling policy | `keep` | See [Netdata process scheduling policy](/src/daemon/README.md#netdata-process-scheduling-policy) | +| OOM score | `0` | | +| glibc malloc arena max for plugins | `1` | See [Virtual memory](/src/daemon/README.md#virtual-memory). | +| glibc malloc arena max for Netdata | `1` | See [Virtual memory](/src/daemon/README.md#virtual-memory). | +| hostname | auto-detected | The hostname of the computer running Netdata. | +| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | +| timezone | auto-detected | The timezone retrieved from the environment variable | +| run as user | `netdata` | The user Netdata will run as. | +| pthread stack size | auto-detected | | + +### [db] section options + +| setting | default | info | +|:---------------------------------------------:|:----------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`.
`ram`: The round-robin database will be temporary and it will be lost when Netdata exits.
`alloc`: Similar to `ram`, but can significantly reduce memory usage, when combined with a low retention and does not support KSM.
`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. Not to be used together with streaming. | +| retention | `3600` | Used with `mode = ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](/src/database/README.md) for more information. | +| storage tiers | `3` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](/src/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. | +| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. | +| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier.
`N belongs to [1..4]` | +| dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). | +| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. | +| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well.
`N belongs to [1..4]` | +| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](/docs/netdata-agent/configuration/optimize-the-netdata-agents-performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](/src/database/engine/README.md#tiering). | +| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`.
`N belongs to [1..4]` | +| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier.
`New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window).
`none`: No back filling is applied.
`N belongs to [1..4]` | +| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/src/database/README.md#ksm) | +| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](/src/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | +| gap when lost iterations above | `1` | | +| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. | +| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. | + +> ### Info +> +>The multiplication of all the **enabled** tiers `dbengine tier N update every iterations` values must be less than `65535`. + +### [directories] section options + +| setting | default | info | +|:-------------------:|:------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| config | `/etc/netdata` | The directory configuration files are kept. | +| stock config | `/usr/lib/netdata/conf.d` | | +| log | `/var/log/netdata` | The directory in which the [log files](/src/daemon/README.md#log-files) are kept. | +| web | `/usr/share/netdata/web` | The directory the web static files are kept. | +| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. | +| lib | `/var/lib/netdata` | Contains the alert log and the Netdata instance GUID. | +| home | `/var/cache/netdata` | Contains the db files for the collected metrics. | +| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. | +| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. | +| health config | `/etc/netdata/health.d` | The directory containing the user alert configuration files, to override the stock configurations | +| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alert configuration files for each collector | +| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](/src/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | + +### [logs] section options + +| setting | default | info | +|:----------------------------------:|:-----------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](/src/daemon/README.md#debugging). | +| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/src/daemon/README.md#debugging). | +| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. | +| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. | +| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. | +| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. | +| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. | +| severity level | `info` | Controls which log messages are logged, with error being the most important. Supported values: `info` and `error`. | + +### [environment variables] section options + +| setting | default | info | +|:----------:|:-----------------:|:-----------------------------------------------------------| +| TZ | `:/etc/localtime` | Where to find the timezone | +| PATH | `auto-detected` | Specifies the directories to be searched to find a command | +| PYTHONPATH | | Used to set a custom python path | + +### [sqlite] section options + +| setting | default | info | +|:------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| auto vacuum | `INCREMENTAL` | The [auto-vacuum status](https://www.sqlite.org/pragma.html#pragma_auto_vacuum) in the database | +| synchronous | `NORMAL` | The setting of the ["synchronous"](https://www.sqlite.org/pragma.html#pragma_synchronous) flag | +| journal mode | `WAL` | The [journal mode](https://www.sqlite.org/pragma.html#pragma_journal_mode) for databases | +| temp store | `MEMORY` | Used to determine where [temporary tables and indices are stored](https://www.sqlite.org/pragma.html#pragma_temp_store) | +| journal size limit | `16777216` | Used to set a new [limit in bytes for the database](https://www.sqlite.org/pragma.html#pragma_journal_size_limit) | +| cache size | `-2000` | Used to [suggest the maximum number of database disk pages](https://www.sqlite.org/pragma.html#pragma_cache_size) that SQLite will hold in memory at once per open database file | + +### [health] section options + +This section controls the general behavior of the health monitoring capabilities of Netdata. + +Specific alerts are configured in per-collector config files under the `health.d` directory. For more info, see [health +monitoring](/src/health/README.md). + +[Alert notifications](/src/health/notifications/README.md) are configured in `health_alarm_notify.conf`. + +| setting | default | info | +|:----------------------------------------------:|:------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enabled | `yes` | Set to `no` to disable all alerts and notifications | +| in memory max health log entries | 1000 | Size of the alert history held in RAM | +| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alert notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). | +| run at least every seconds | `10` | Controls how often all alert conditions should be evaluated. | +| postpone alarms during hibernation for seconds | `60` | Prevents false alerts. May need to be increased if you get alerts during hibernation. | +| health log history | `432000` | Specifies the history of alert events (in seconds) kept in the agent's sqlite database. | +| enabled alarms | * | Defines which alerts to load from both user and stock directories. This is a [simple pattern](/src/libnetdata/simple_pattern/README.md) list of alert or template names. Can be used to disable specific alerts. For example, `enabled alarms = !oom_kill *` will load all alerts except `oom_kill`. | + +### [web] section options + +Refer to the [web server documentation](/src/web/server/README.md) + +### [plugins] section options + +In this section you will see be a boolean (`yes`/`no`) option for each plugin (e.g. tc, cgroups, apps, proc etc.). Note +that the configuration options in this section for the orchestrator plugins `python.d` and `charts.d` control **all the +modules** written for that orchestrator. For instance, setting `python.d = no` means that all Python modules +under `collectors/python.d.plugin` will be disabled. + +Additionally, there will be the following options: + +| setting | default | info | +|:-------------------------------:|:---------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enable running new plugins | `yes` | When set to `yes`, Netdata will enable detected plugins, even if they are not configured explicitly. Setting this to `no` will only enable plugins explicitly configured in this file with a `yes` | +| check for new plugins every | 60 | The time in seconds to check for new plugins in the plugins directory. This allows having other applications dynamically creating plugins for Netdata. | +| checks | `no` | This is a debugging plugin for the internal latency | + +### [registry] section options + +To understand what this section is and how it should be configured, please refer to +the [registry documentation](/src/registry/README.md). + +## Per-plugin configuration + +The configuration options for plugins appear in sections following the pattern `[plugin:NAME]`. + +### Internal plugins + +Most internal plugins will provide additional options. Check [Internal Plugins](/src/collectors/README.md) for more +information. + +Please note, that by default Netdata will enable monitoring metrics for disks, memory, and network only when they are +not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, +will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them +to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You +can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics +for all internal Netdata plugins. + +### External plugins + +External plugins will have only 2 options at `netdata.conf`: + +| setting | default | info | +|:---------------:|:--------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](/docs/netdata-agent/configuration/optimize-the-netdata-agents-performance.md). | +| command options | - | Additional command line options to pass to the plugin. | + +External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their +documentation. + diff --git a/src/daemon/config/dyncfg-echo.c b/src/daemon/config/dyncfg-echo.c new file mode 100644 index 000000000..95d40a025 --- /dev/null +++ b/src/daemon/config/dyncfg-echo.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +// ---------------------------------------------------------------------------- +// echo is when we send requests to plugins without any caller +// it is used for: +// 1. the first enable/disable requests we send, and also +// 2. updates to stock or user configurations +// 3. saved dynamic jobs we need to add to templates + +struct dyncfg_echo { + const DICTIONARY_ITEM *item; + DYNCFG *df; // for additions this is the job, not the template + BUFFER *wb; + DYNCFG_CMDS cmd; + const char *cmd_str; +}; + +void dyncfg_echo_cb(BUFFER *wb __maybe_unused, int code __maybe_unused, void *result_cb_data) { + struct dyncfg_echo *e = result_cb_data; + DYNCFG *df = e->df; + + if(DYNCFG_RESP_SUCCESS(code)) { + // successful response + + if(e->cmd == DYNCFG_CMD_ADD) { + df->dyncfg.status = dyncfg_status_from_successful_response(code); + dyncfg_update_status_on_successful_add_or_update(df, code); + } + else if(e->cmd == DYNCFG_CMD_UPDATE) { + df->dyncfg.status = dyncfg_status_from_successful_response(code); + dyncfg_update_status_on_successful_add_or_update(df, code); + } + else if(e->cmd == DYNCFG_CMD_DISABLE) + df->dyncfg.status = df->current.status = DYNCFG_STATUS_DISABLED; + else if(e->cmd == DYNCFG_CMD_ENABLE) + df->dyncfg.status = df->current.status = dyncfg_status_from_successful_response(code); + } + else { + // failed response + + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: received response code %d on request to id '%s', cmd: %s", + code, dictionary_acquired_item_name(e->item), e->cmd_str); + + if(e->cmd == DYNCFG_CMD_UPDATE || e->cmd == DYNCFG_CMD_ADD) + e->df->dyncfg.plugin_rejected = true; + } + + buffer_free(e->wb); + dictionary_acquired_item_release(dyncfg_globals.nodes, e->item); + + e->wb = NULL; + e->df = NULL; + e->item = NULL; + freez((void *)e->cmd_str); + e->cmd_str = NULL; + freez(e); +} + +// ---------------------------------------------------------------------------- + +void dyncfg_echo(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id __maybe_unused, DYNCFG_CMDS cmd) { + RRDHOST *host = dyncfg_rrdhost(df); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id); + return; + } + + if(!(df->cmds & cmd)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: attempted to echo a cmd that is not supported"); + return; + } + + const char *cmd_str = dyncfg_id2cmd_one(cmd); + if(!cmd_str) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: command given does not resolve to a known command"); + return; + } + + struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo)); + e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item); + e->wb = buffer_create(0, NULL); + e->df = df; + e->cmd = cmd; + e->cmd_str = strdupz(cmd_str); + + char buf[string_strlen(df->function) + strlen(e->cmd_str) + 20]; + snprintfz(buf, sizeof(buf), "%s %s", string2str(df->function), e->cmd_str); + + rrd_function_run( + host, e->wb, 10, + HTTP_ACCESS_ALL, buf, false, NULL, + dyncfg_echo_cb, e, + NULL, NULL, + NULL, NULL, + NULL, string2str(df->dyncfg.source)); +} + +// ---------------------------------------------------------------------------- + +void dyncfg_echo_update(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id) { + RRDHOST *host = dyncfg_rrdhost(df); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id); + return; + } + + if(!df->dyncfg.payload) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: requested to send an update to '%s', but there is no payload", id); + return; + } + + struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo)); + e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item); + e->wb = buffer_create(0, NULL); + e->df = df; + e->cmd = DYNCFG_CMD_UPDATE; + e->cmd_str = strdupz("update"); + + char buf[string_strlen(df->function) + strlen(e->cmd_str) + 20]; + snprintfz(buf, sizeof(buf), "%s %s", string2str(df->function), e->cmd_str); + + rrd_function_run( + host, e->wb, 10, + HTTP_ACCESS_ALL, buf, false, NULL, + dyncfg_echo_cb, e, + NULL, NULL, + NULL, NULL, + df->dyncfg.payload, string2str(df->dyncfg.source)); +} + +// ---------------------------------------------------------------------------- + +static void dyncfg_echo_payload_add(const DICTIONARY_ITEM *item_template __maybe_unused, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *id_template, const char *cmd) { + RRDHOST *host = dyncfg_rrdhost(df_template); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id_template); + return; + } + + if(!df_job->dyncfg.payload) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: requested to send a '%s' to '%s', but there is no payload", + cmd, id_template); + return; + } + + struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo)); + e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item_job); + e->wb = buffer_create(0, NULL); + e->df = df_job; + e->cmd = DYNCFG_CMD_ADD; + e->cmd_str = strdupz(cmd); + + char buf[string_strlen(df_template->function) + strlen(cmd) + 20]; + snprintfz(buf, sizeof(buf), "%s %s", string2str(df_template->function), cmd); + + rrd_function_run( + host, e->wb, 10, + HTTP_ACCESS_ALL, buf, false, NULL, + dyncfg_echo_cb, e, + NULL, NULL, + NULL, NULL, + df_job->dyncfg.payload, string2str(df_job->dyncfg.source)); +} + +void dyncfg_echo_add(const DICTIONARY_ITEM *item_template, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *template_id, const char *job_name) { + char buf[strlen(job_name) + 20]; + snprintfz(buf, sizeof(buf), "add %s", job_name); + dyncfg_echo_payload_add(item_template, item_job, df_template, df_job, template_id, buf); +} + diff --git a/src/daemon/config/dyncfg-files.c b/src/daemon/config/dyncfg-files.c new file mode 100644 index 000000000..81b56918f --- /dev/null +++ b/src/daemon/config/dyncfg-files.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +void dyncfg_file_delete(const char *id) { + CLEAN_CHAR_P *escaped_id = dyncfg_escape_id_for_filename(id); + char filename[FILENAME_MAX]; + snprintfz(filename, sizeof(filename), "%s/%s.dyncfg", dyncfg_globals.dir, escaped_id); + unlink(filename); +} + +void dyncfg_file_save(const char *id, DYNCFG *df) { + CLEAN_CHAR_P *escaped_id = dyncfg_escape_id_for_filename(id); + char filename[FILENAME_MAX]; + snprintfz(filename, sizeof(filename), "%s/%s.dyncfg", dyncfg_globals.dir, escaped_id); + + FILE *fp = fopen(filename, "w"); + if(!fp) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot create file '%s'", filename); + return; + } + + df->dyncfg.modified_ut = now_realtime_usec(); + if(!df->dyncfg.created_ut) + df->dyncfg.created_ut = df->dyncfg.modified_ut; + + fprintf(fp, "version=%zu\n", DYNCFG_VERSION); + fprintf(fp, "id=%s\n", id); + + if(df->template) + fprintf(fp, "template=%s\n", string2str(df->template)); + + char uuid_str[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(df->host_uuid.uuid, uuid_str); + fprintf(fp, "host=%s\n", uuid_str); + + fprintf(fp, "path=%s\n", string2str(df->path)); + fprintf(fp, "type=%s\n", dyncfg_id2type(df->type)); + + fprintf(fp, "source_type=%s\n", dyncfg_id2source_type(df->dyncfg.source_type)); + fprintf(fp, "source=%s\n", string2str(df->dyncfg.source)); + + fprintf(fp, "created=%"PRIu64"\n", df->dyncfg.created_ut); + fprintf(fp, "modified=%"PRIu64"\n", df->dyncfg.modified_ut); + fprintf(fp, "sync=%s\n", df->sync ? "true" : "false"); + fprintf(fp, "user_disabled=%s\n", df->dyncfg.user_disabled ? "true" : "false"); + fprintf(fp, "saves=%"PRIu32"\n", ++df->dyncfg.saves); + + fprintf(fp, "cmds="); + dyncfg_cmds2fp(df->cmds, fp); + fprintf(fp, "\n"); + + if(df->dyncfg.payload && buffer_strlen(df->dyncfg.payload) > 0) { + fprintf(fp, "content_type=%s\n", content_type_id2string(df->dyncfg.payload->content_type)); + fprintf(fp, "content_length=%zu\n", buffer_strlen(df->dyncfg.payload)); + fprintf(fp, "---\n"); + fwrite(buffer_tostring(df->dyncfg.payload), 1, buffer_strlen(df->dyncfg.payload), fp); + } + + fclose(fp); +} + +void dyncfg_file_load(const char *d_name) { + char filename[PATH_MAX]; + snprintf(filename, sizeof(filename), "%s/%s", dyncfg_globals.dir, d_name); + + FILE *fp = fopen(filename, "r"); + if (!fp) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot open file '%s'", filename); + return; + } + + DYNCFG tmp = { 0 }; + + char line[PLUGINSD_LINE_MAX]; + CLEAN_CHAR_P *id = NULL; + + HTTP_CONTENT_TYPE content_type = CT_NONE; + size_t content_length = 0; + bool read_payload = false; + + while (fgets(line, sizeof(line), fp)) { + if(strcmp(line, "---\n") == 0) { + read_payload = true; + break; + } + + char *value = strchr(line, '='); + if(!value) continue; + + *value++ = '\0'; + + value = trim(value); + if(!value) continue; + + char *key = trim(line); + if(!key) continue; + + // Parse key-value pairs + if (strcmp(key, "version") == 0) { + size_t version = strtoull(value, NULL, 10); + + if(version > DYNCFG_VERSION) + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "DYNCFG: configuration file '%s' has version %zu, which is newer than our version %zu", + filename, version, DYNCFG_VERSION); + + } else if (strcmp(key, "id") == 0) { + freez(id); + id = strdupz(value); + } else if (strcmp(key, "template") == 0) { + tmp.template = string_strdupz(value); + } else if (strcmp(key, "host") == 0) { + uuid_parse_flexi(value, tmp.host_uuid.uuid); + } else if (strcmp(key, "path") == 0) { + tmp.path = string_strdupz(value); + } else if (strcmp(key, "type") == 0) { + tmp.type = dyncfg_type2id(value); + } else if (strcmp(key, "source_type") == 0) { + tmp.dyncfg.source_type = dyncfg_source_type2id(value); + } else if (strcmp(key, "source") == 0) { + tmp.dyncfg.source = string_strdupz(value); + } else if (strcmp(key, "created") == 0) { + tmp.dyncfg.created_ut = strtoull(value, NULL, 10); + } else if (strcmp(key, "modified") == 0) { + tmp.dyncfg.modified_ut = strtoull(value, NULL, 10); + } else if (strcmp(key, "sync") == 0) { + tmp.sync = (strcmp(value, "true") == 0); + } else if (strcmp(key, "user_disabled") == 0) { + tmp.dyncfg.user_disabled = (strcmp(value, "true") == 0); + } else if (strcmp(key, "saves") == 0) { + tmp.dyncfg.saves = strtoull(value, NULL, 10); + } else if (strcmp(key, "content_type") == 0) { + content_type = content_type_string2id(value); + } else if (strcmp(key, "content_length") == 0) { + content_length = strtoull(value, NULL, 10); + } else if (strcmp(key, "cmds") == 0) { + tmp.cmds = dyncfg_cmds2id(value); + } + } + + if (read_payload) { + // Determine the actual size of the remaining file content + long saved_position = ftell(fp); // Save current position + fseek(fp, 0, SEEK_END); + long total_size = ftell(fp); // Total size of the file + size_t actual_size = total_size - saved_position; // Calculate remaining content size + fseek(fp, saved_position, SEEK_SET); // Reset file pointer to the beginning of the payload + + // Use actual_size instead of content_length to handle the whole remaining file + tmp.dyncfg.payload = buffer_create(actual_size, NULL); + tmp.dyncfg.payload->content_type = content_type; + + buffer_need_bytes(tmp.dyncfg.payload, actual_size); + tmp.dyncfg.payload->len = fread(tmp.dyncfg.payload->buffer, 1, actual_size, fp); + + if (content_length != tmp.dyncfg.payload->len) { + nd_log(NDLS_DAEMON, NDLP_WARNING, + "DYNCFG: content_length %zu does not match actual payload size %zu for file '%s'", + content_length, actual_size, filename); + } + } + + fclose(fp); + + if(!id) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: configuration file '%s' does not include a unique id. Ignoring it.", + filename); + + dyncfg_cleanup(&tmp); + return; + } + + tmp.dyncfg.status = DYNCFG_STATUS_ORPHAN; + tmp.dyncfg.restart_required = false; + + dyncfg_set_current_from_dyncfg(&tmp); + + dictionary_set(dyncfg_globals.nodes, id, &tmp, sizeof(tmp)); + + // check if we need to rename the file + CLEAN_CHAR_P *fixed_id = dyncfg_escape_id_for_filename(id); + char fixed_filename[PATH_MAX]; + snprintf(fixed_filename, sizeof(fixed_filename), "%s/%s.dyncfg", dyncfg_globals.dir, fixed_id); + + if(strcmp(filename, fixed_filename) != 0) { + if(rename(filename, fixed_filename) != 0) + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: cannot rename file '%s' into '%s'. Saving a new configuraton may not overwrite the old one.", + filename, fixed_filename); + } +} + +void dyncfg_load_all(void) { + DIR *dir = opendir(dyncfg_globals.dir); + if (!dir) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot open directory '%s'", dyncfg_globals.dir); + return; + } + + struct dirent *entry; + while ((entry = readdir(dir)) != NULL) { + if ((entry->d_type == DT_REG || entry->d_type == DT_LNK) && strendswith(entry->d_name, ".dyncfg")) + dyncfg_file_load(entry->d_name); + } + + closedir(dir); +} + +// ---------------------------------------------------------------------------- +// schemas loading + +static bool dyncfg_read_file_to_buffer(const char *filename, BUFFER *dst) { + int fd = open(filename, O_RDONLY | O_CLOEXEC, 0666); + if(unlikely(fd == -1)) + return false; + + struct stat st = { 0 }; + if(fstat(fd, &st) != 0) { + close(fd); + return false; + } + + buffer_flush(dst); + buffer_need_bytes(dst, st.st_size + 1); // +1 for the terminating zero + + ssize_t r = read(fd, (char*)dst->buffer, st.st_size); + if(unlikely(r == -1)) { + close(fd); + return false; + } + dst->len = r; + dst->buffer[dst->len] = '\0'; + + close(fd); + return true; +} + +static bool dyncfg_get_schema_from(const char *dir, const char *id, BUFFER *dst) { + char filename[FILENAME_MAX + 1]; + + CLEAN_CHAR_P *escaped_id = dyncfg_escape_id_for_filename(id); + snprintfz(filename, sizeof(filename), "%s/schema.d/%s.json", dir, escaped_id); + if(dyncfg_read_file_to_buffer(filename, dst)) + return true; + + snprintfz(filename, sizeof(filename), "%s/schema.d/%s.json", dir, id); + if(dyncfg_read_file_to_buffer(filename, dst)) + return true; + + return false; +} + +bool dyncfg_get_schema(const char *id, BUFFER *dst) { + if(dyncfg_get_schema_from(netdata_configured_user_config_dir, id, dst)) + return true; + + if(dyncfg_get_schema_from(netdata_configured_stock_config_dir, id, dst)) + return true; + + return false; +} diff --git a/src/daemon/config/dyncfg-inline.c b/src/daemon/config/dyncfg-inline.c new file mode 100644 index 000000000..bed912e57 --- /dev/null +++ b/src/daemon/config/dyncfg-inline.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg.h" + +static DICTIONARY *dyncfg_nodes = NULL; + +static int dyncfg_inline_callback(struct rrd_function_execute *rfe, void *data __maybe_unused) { + char tr[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(*rfe->transaction, tr); + + bool cancelled = rfe->is_cancelled.cb ? rfe->is_cancelled.cb(rfe->is_cancelled.data) : false; + + int code; + if(cancelled) + code = HTTP_RESP_CLIENT_CLOSED_REQUEST; + else + code = dyncfg_node_find_and_call(dyncfg_nodes, tr, rfe->function, rfe->stop_monotonic_ut, &cancelled, + rfe->payload, rfe->user_access, rfe->source, rfe->result.wb); + + if(code == HTTP_RESP_CLIENT_CLOSED_REQUEST || (rfe->is_cancelled.cb && rfe->is_cancelled.cb(rfe->is_cancelled.data))) { + buffer_flush(rfe->result.wb); + code = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, code, rfe->result.data); + + return code; +} + +bool dyncfg_add(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, + DYNCFG_CMDS cmds, HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + dyncfg_cb_t cb, void *data) { + + struct dyncfg_node tmp = { + .cmds = cmds, + .type = type, + .cb = cb, + .data = data, + }; + dictionary_set(dyncfg_nodes, id, &tmp, sizeof(tmp)); + + if(!dyncfg_add_low_level(host, id, path, status, type, source_type, source, cmds, + 0, 0, true, view_access, edit_access, + dyncfg_inline_callback, NULL)) { + dictionary_del(dyncfg_nodes, id); + return false; + } + + return true; +} + +void dyncfg_del(RRDHOST *host, const char *id) { + dictionary_del(dyncfg_nodes, id); + dyncfg_del_low_level(host, id); +} + +void dyncfg_status(RRDHOST *host, const char *id, DYNCFG_STATUS status) { + dyncfg_status_low_level(host, id, status); +} + +void dyncfg_init(bool load_saved) { + dyncfg_nodes = dyncfg_nodes_dictionary_create(); + dyncfg_init_low_level(load_saved); +} diff --git a/src/daemon/config/dyncfg-intercept.c b/src/daemon/config/dyncfg-intercept.c new file mode 100644 index 000000000..65f8383ed --- /dev/null +++ b/src/daemon/config/dyncfg-intercept.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +// ---------------------------------------------------------------------------- +// we intercept the config function calls of the plugin + +struct dyncfg_call { + BUFFER *payload; + char *function; + char *id; + char *add_name; + char *source; + DYNCFG_CMDS cmd; + rrd_function_result_callback_t result_cb; + void *result_cb_data; + bool from_dyncfg_echo; +}; + +static void dyncfg_function_intercept_job_successfully_added(DYNCFG *df_template, int code, struct dyncfg_call *dc) { + char id[strlen(dc->id) + 1 + strlen(dc->add_name) + 1]; + snprintfz(id, sizeof(id), "%s:%s", dc->id, dc->add_name); + + RRDHOST *host = dyncfg_rrdhost(df_template); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: cannot add job '%s' because host is missing", id); + } + else { + const DICTIONARY_ITEM *item = dyncfg_add_internal( + host, + id, + string2str(df_template->path), + dyncfg_status_from_successful_response(code), + DYNCFG_TYPE_JOB, + DYNCFG_SOURCE_TYPE_DYNCFG, + dc->source, + (df_template->cmds & ~DYNCFG_CMD_ADD) | DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE | DYNCFG_CMD_TEST | + DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_REMOVE, + 0, + 0, + df_template->sync, + df_template->view_access, + df_template->edit_access, + df_template->execute_cb, + df_template->execute_cb_data, + false); + + // adding does not create df->dyncfg + // we have to do it here + + DYNCFG *df = dictionary_acquired_item_value(item); + SWAP(df->dyncfg.payload, dc->payload); + dyncfg_set_dyncfg_source_from_txt(df, dc->source); + df->dyncfg.user_disabled = false; + df->dyncfg.source_type = DYNCFG_SOURCE_TYPE_DYNCFG; + df->dyncfg.status = dyncfg_status_from_successful_response(code); + + dyncfg_file_save(id, df); // updates also the df->dyncfg timestamps + dyncfg_update_status_on_successful_add_or_update(df, code); + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } +} + +static void dyncfg_function_intercept_job_successfully_updated(DYNCFG *df, int code, struct dyncfg_call *dc) { + df->dyncfg.status = dyncfg_status_from_successful_response(code); + df->dyncfg.source_type = DYNCFG_SOURCE_TYPE_DYNCFG; + SWAP(df->dyncfg.payload, dc->payload); + dyncfg_set_dyncfg_source_from_txt(df, dc->source); + + dyncfg_update_status_on_successful_add_or_update(df, code); +} + +void dyncfg_function_intercept_result_cb(BUFFER *wb, int code, void *result_cb_data) { + struct dyncfg_call *dc = result_cb_data; + + bool called_from_dyncfg_echo = dc->from_dyncfg_echo; + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dyncfg_globals.nodes, dc->id, -1); + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + bool old_user_disabled = df->dyncfg.user_disabled; + bool save_required = false; + + if (!called_from_dyncfg_echo) { + // the command was sent by a user + + if (DYNCFG_RESP_SUCCESS(code)) { + if (dc->cmd == DYNCFG_CMD_ADD) { + dyncfg_function_intercept_job_successfully_added(df, code, dc); + } else if (dc->cmd == DYNCFG_CMD_UPDATE) { + dyncfg_function_intercept_job_successfully_updated(df, code, dc); + save_required = true; + } + else if (dc->cmd == DYNCFG_CMD_ENABLE) { + df->dyncfg.user_disabled = false; + } + else if (dc->cmd == DYNCFG_CMD_DISABLE) { + df->dyncfg.user_disabled = true; + } + else if (dc->cmd == DYNCFG_CMD_REMOVE) { + dyncfg_file_delete(dc->id); + dictionary_del(dyncfg_globals.nodes, dc->id); + } + + if (save_required || old_user_disabled != df->dyncfg.user_disabled) + dyncfg_file_save(dc->id, df); + } + else + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: plugin returned code %d to user initiated call: %s", code, dc->function); + } + else { + // the command was sent by dyncfg + // these are handled by the echo callback, we don't need to do anything here + ; + } + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } + + if(dc->result_cb) + dc->result_cb(wb, code, dc->result_cb_data); + + buffer_free(dc->payload); + freez(dc->function); + freez(dc->id); + freez(dc->source); + freez(dc->add_name); + freez(dc); +} + +// ---------------------------------------------------------------------------- + +static void dyncfg_apply_action_on_all_template_jobs(struct rrd_function_execute *rfe, const char *template_id, DYNCFG_CMDS c) { + STRING *template = string_strdupz(template_id); + DYNCFG *df; + + size_t all = 0, done = 0; + dfe_start_read(dyncfg_globals.nodes, df) { + if(df->template == template && df->type == DYNCFG_TYPE_JOB) + all++; + } + dfe_done(df); + + if(rfe->progress.cb) + rfe->progress.cb(rfe->progress.data, done, all); + + dfe_start_reentrant(dyncfg_globals.nodes, df) { + if(df->template == template && df->type == DYNCFG_TYPE_JOB) { + DYNCFG_CMDS cmd_to_send_to_plugin = c; + + if(c == DYNCFG_CMD_ENABLE) + cmd_to_send_to_plugin = df->dyncfg.user_disabled ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE; + else if(c == DYNCFG_CMD_DISABLE) + cmd_to_send_to_plugin = DYNCFG_CMD_DISABLE; + + dyncfg_echo(df_dfe.item, df, df_dfe.name, cmd_to_send_to_plugin); + + if(rfe->progress.cb) + rfe->progress.cb(rfe->progress.data, ++done, all); + } + } + dfe_done(df); + + string_freez(template); +} + +// ---------------------------------------------------------------------------- +// the callback for all config functions + +static int dyncfg_intercept_early_error(struct rrd_function_execute *rfe, int rc, const char *msg) { + rc = dyncfg_default_response(rfe->result.wb, rc, msg); + + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, rc, rfe->result.data); + + return rc; +} + +const DICTIONARY_ITEM *dyncfg_get_template_of_new_job(const char *job_id) { + char id_copy[strlen(job_id) + 1]; + memcpy(id_copy, job_id, sizeof(id_copy)); + + char *colon = strrchr(id_copy, ':'); + if(!colon) return NULL; + + *colon = '\0'; + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id_copy); + if(!item) return NULL; + + DYNCFG *df = dictionary_acquired_item_value(item); + if(df->type != DYNCFG_TYPE_TEMPLATE) { + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return NULL; + } + + return item; +} + +int dyncfg_function_intercept_cb(struct rrd_function_execute *rfe, void *data __maybe_unused) { + + // IMPORTANT: this function MUST call the result_cb even on failures + + bool called_from_dyncfg_echo = rrd_function_has_this_original_result_callback(rfe->transaction, dyncfg_echo_cb); + bool has_payload = rfe->payload && buffer_strlen(rfe->payload) ? true : false; + bool make_the_call_to_plugin = true; + + int rc = HTTP_RESP_INTERNAL_SERVER_ERROR; + DYNCFG_CMDS cmd; + const DICTIONARY_ITEM *item = NULL; + + char buf[strlen(rfe->function) + 1]; + memcpy(buf, rfe->function, sizeof(buf)); + + char *words[20]; + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, 20); + + size_t i = 0; + char *config = get_word(words, num_words, i++); + char *id = get_word(words, num_words, i++); + char *cmd_str = get_word(words, num_words, i++); + char *add_name = get_word(words, num_words, i++); + + if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this is not a dyncfg request"); + + cmd = dyncfg_cmds2id(cmd_str); + if(cmd == DYNCFG_CMD_NONE) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: invalid command received"); + + if(cmd == DYNCFG_CMD_ADD || cmd == DYNCFG_CMD_TEST || cmd == DYNCFG_CMD_USERCONFIG) { + if(cmd == DYNCFG_CMD_TEST && (!add_name || !*add_name)) { + // backwards compatibility for TEST without a name + char *colon = strrchr(id, ':'); + if(colon) { + *colon = '\0'; + add_name = ++colon; + } + else + add_name = "test"; + } + + if(!add_name || !*add_name) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this action requires a name"); + + if(!called_from_dyncfg_echo) { + char nid[strlen(id) + strlen(add_name) + 2]; + snprintfz(nid, sizeof(nid), "%s:%s", id, add_name); + + if (cmd == DYNCFG_CMD_ADD && dictionary_get(dyncfg_globals.nodes, nid)) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: a configuration with this name already exists"); + } + } + + if((cmd == DYNCFG_CMD_ADD || cmd == DYNCFG_CMD_UPDATE || cmd == DYNCFG_CMD_TEST || cmd == DYNCFG_CMD_USERCONFIG) && !has_payload) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this action requires a payload"); + + if((cmd != DYNCFG_CMD_ADD && cmd != DYNCFG_CMD_UPDATE && cmd != DYNCFG_CMD_TEST && cmd != DYNCFG_CMD_USERCONFIG) && has_payload) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this action does not require a payload"); + + item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(!item) { + if(cmd == DYNCFG_CMD_TEST || cmd == DYNCFG_CMD_USERCONFIG) { + // this may be a test on a new job + item = dyncfg_get_template_of_new_job(id); + } + + if(!item) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_NOT_FOUND, + "dyncfg functions intercept: id is not found"); + } + + DYNCFG *df = dictionary_acquired_item_value(item); + + // 1. check the permissions of the request + + switch(cmd) { + case DYNCFG_CMD_GET: + case DYNCFG_CMD_SCHEMA: + case DYNCFG_CMD_USERCONFIG: + if(!http_access_user_has_enough_access_level_for_endpoint(rfe->user_access, df->view_access)) { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_FORBIDDEN, + "dyncfg: you don't have enough view permissions to execute this command"); + } + break; + + case DYNCFG_CMD_ENABLE: + case DYNCFG_CMD_DISABLE: + case DYNCFG_CMD_ADD: + case DYNCFG_CMD_TEST: + case DYNCFG_CMD_UPDATE: + case DYNCFG_CMD_REMOVE: + case DYNCFG_CMD_RESTART: + if(!http_access_user_has_enough_access_level_for_endpoint(rfe->user_access, df->edit_access)) { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_FORBIDDEN, + "dyncfg: you don't have enough edit permissions to execute this command"); + } + break; + + default: { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_INTERNAL_SERVER_ERROR, + "dyncfg: permissions for this command are not set"); + } + break; + } + + // 2. validate the request parameters + + if(make_the_call_to_plugin) { + if (!(df->cmds & cmd)) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: this command is not supported by the configuration node: %s", rfe->function); + + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this command is not supported by this configuration node"); + } + else if (cmd == DYNCFG_CMD_ADD) { + if (df->type != DYNCFG_TYPE_TEMPLATE) { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: add command is only allowed in templates"); + + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: add command can only be applied on templates, not %s: %s", + dyncfg_id2type(df->type), rfe->function); + } + } + else if ( + cmd == DYNCFG_CMD_ENABLE && df->type == DYNCFG_TYPE_JOB && + dyncfg_is_user_disabled(string2str(df->template))) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: cannot enable a job of a disabled template: %s", + rfe->function); + + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this job belongs to disabled template"); + } + } + + // 3. check if it is one of the commands we should execute + + if(make_the_call_to_plugin) { + if (cmd & (DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_RESTART) && df->type == DYNCFG_TYPE_TEMPLATE) { + if (!called_from_dyncfg_echo) { + bool old_user_disabled = df->dyncfg.user_disabled; + if (cmd == DYNCFG_CMD_ENABLE) + df->dyncfg.user_disabled = false; + else if (cmd == DYNCFG_CMD_DISABLE) + df->dyncfg.user_disabled = true; + + if (df->dyncfg.user_disabled != old_user_disabled) + dyncfg_file_save(id, df); + } + + dyncfg_apply_action_on_all_template_jobs(rfe, id, cmd); + + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_OK, "applied to all template job"); + make_the_call_to_plugin = false; + } + else if (cmd == DYNCFG_CMD_SCHEMA) { + bool loaded = false; + if (df->type == DYNCFG_TYPE_JOB) { + if (df->template) + loaded = dyncfg_get_schema(string2str(df->template), rfe->result.wb); + } else + loaded = dyncfg_get_schema(id, rfe->result.wb); + + if (loaded) { + rfe->result.wb->content_type = CT_APPLICATION_JSON; + rfe->result.wb->expires = now_realtime_sec(); + rc = HTTP_RESP_OK; + make_the_call_to_plugin = false; + } + } + } + + // 4. execute the command + + if(make_the_call_to_plugin) { + struct dyncfg_call *dc = callocz(1, sizeof(*dc)); + dc->function = strdupz(rfe->function); + dc->id = strdupz(id); + dc->source = rfe->source ? strdupz(rfe->source) : NULL; + dc->add_name = (add_name) ? strdupz(add_name) : NULL; + dc->cmd = cmd; + dc->result_cb = rfe->result.cb; + dc->result_cb_data = rfe->result.data; + dc->payload = buffer_dup(rfe->payload); + dc->from_dyncfg_echo = called_from_dyncfg_echo; + + rfe->result.cb = dyncfg_function_intercept_result_cb; + rfe->result.data = dc; + + rc = df->execute_cb(rfe, df->execute_cb_data); + } + else if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, rc, rfe->result.data); + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return rc; +} + diff --git a/src/daemon/config/dyncfg-internals.h b/src/daemon/config/dyncfg-internals.h new file mode 100644 index 000000000..1722ae792 --- /dev/null +++ b/src/daemon/config/dyncfg-internals.h @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DYNCFG_INTERNALS_H +#define NETDATA_DYNCFG_INTERNALS_H + +#include "../common.h" +#include "database/rrd.h" +#include "database/rrdfunctions.h" +#include "database/rrdfunctions-internals.h" +#include "database/rrdcollector-internals.h" + +typedef struct dyncfg { + ND_UUID host_uuid; + STRING *function; + STRING *template; + STRING *path; + DYNCFG_CMDS cmds; + DYNCFG_TYPE type; + + HTTP_ACCESS view_access; + HTTP_ACCESS edit_access; + + struct { + DYNCFG_STATUS status; + DYNCFG_SOURCE_TYPE source_type; + STRING *source; + usec_t created_ut; + usec_t modified_ut; + } current; + + struct { + uint32_t saves; + bool restart_required; + bool plugin_rejected; + bool user_disabled; + DYNCFG_STATUS status; + DYNCFG_SOURCE_TYPE source_type; + STRING *source; + BUFFER *payload; + usec_t created_ut; + usec_t modified_ut; + } dyncfg; + + bool sync; + rrd_function_execute_cb_t execute_cb; + void *execute_cb_data; +} DYNCFG; + +struct dyncfg_globals { + const char *dir; + DICTIONARY *nodes; +}; + +extern struct dyncfg_globals dyncfg_globals; + +void dyncfg_load_all(void); +void dyncfg_file_load(const char *filename); +void dyncfg_file_save(const char *id, DYNCFG *df); +void dyncfg_file_delete(const char *id); + +bool dyncfg_get_schema(const char *id, BUFFER *dst); + +void dyncfg_echo_cb(BUFFER *wb, int code, void *result_cb_data); +void dyncfg_echo(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id, DYNCFG_CMDS cmd); +void dyncfg_echo_update(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id); +void dyncfg_echo_add(const DICTIONARY_ITEM *item_template, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *template_id, const char *job_name); + +const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, + const char *source, DYNCFG_CMDS cmds, + usec_t created_ut, usec_t modified_ut, + bool sync, HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data, + bool overwrite_cb); + +int dyncfg_function_intercept_cb(struct rrd_function_execute *rfe, void *data); +void dyncfg_cleanup(DYNCFG *v); + +const DICTIONARY_ITEM *dyncfg_get_template_of_new_job(const char *job_id); + +bool dyncfg_is_user_disabled(const char *id); + +RRDHOST *dyncfg_rrdhost_by_uuid(ND_UUID *uuid); +RRDHOST *dyncfg_rrdhost(DYNCFG *df); + +static inline void dyncfg_copy_dyncfg_source_to_current(DYNCFG *df) { + STRING *old = df->current.source; + df->current.source = string_dup(df->dyncfg.source); + string_freez(old); +} + +static inline void dyncfg_set_dyncfg_source_from_txt(DYNCFG *df, const char *source) { + STRING *old = df->dyncfg.source; + df->dyncfg.source = string_strdupz(source); + string_freez(old); +} + +static inline void dyncfg_set_current_from_dyncfg(DYNCFG *df) { + df->current.status = df->dyncfg.status; + df->current.source_type = df->dyncfg.source_type; + + dyncfg_copy_dyncfg_source_to_current(df); + + if(df->dyncfg.created_ut < df->current.created_ut) + df->current.created_ut = df->dyncfg.created_ut; + + if(df->dyncfg.modified_ut > df->current.modified_ut) + df->current.modified_ut = df->dyncfg.modified_ut; +} + +static inline void dyncfg_update_status_on_successful_add_or_update(DYNCFG *df, int code) { + df->dyncfg.plugin_rejected = false; + + if (code == DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED) + df->dyncfg.restart_required = true; + else + df->dyncfg.restart_required = false; + + dyncfg_set_current_from_dyncfg(df); +} + +static inline DYNCFG_STATUS dyncfg_status_from_successful_response(int code) { + DYNCFG_STATUS status = DYNCFG_STATUS_ACCEPTED; + + switch(code) { + default: + case DYNCFG_RESP_ACCEPTED: + case DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED: + status = DYNCFG_STATUS_ACCEPTED; + break; + + case DYNCFG_RESP_ACCEPTED_DISABLED: + status = DYNCFG_STATUS_DISABLED; + break; + + case DYNCFG_RESP_RUNNING: + status = DYNCFG_STATUS_RUNNING; + break; + + } + + return status; +} + +#endif //NETDATA_DYNCFG_INTERNALS_H diff --git a/src/daemon/config/dyncfg-tree.c b/src/daemon/config/dyncfg-tree.c new file mode 100644 index 000000000..77d031fa0 --- /dev/null +++ b/src/daemon/config/dyncfg-tree.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +static int dyncfg_tree_compar(const void *a, const void *b) { + const DICTIONARY_ITEM *item1 = *(const DICTIONARY_ITEM **)a; + const DICTIONARY_ITEM *item2 = *(const DICTIONARY_ITEM **)b; + + DYNCFG *df1 = dictionary_acquired_item_value(item1); + DYNCFG *df2 = dictionary_acquired_item_value(item2); + + int rc = string_cmp(df1->path, df2->path); + if(rc == 0) + rc = strcmp(dictionary_acquired_item_name(item1), dictionary_acquired_item_name(item2)); + + return rc; +} + +static void dyncfg_to_json(DYNCFG *df, const char *id, BUFFER *wb) { + buffer_json_member_add_object(wb, id); + { + buffer_json_member_add_string(wb, "type", dyncfg_id2type(df->type)); + + if(df->type == DYNCFG_TYPE_JOB) + buffer_json_member_add_string(wb, "template", string2str(df->template)); + + buffer_json_member_add_string(wb, "status", dyncfg_id2status(df->current.status)); + dyncfg_cmds2json_array(df->current.status == DYNCFG_STATUS_ORPHAN ? DYNCFG_CMD_REMOVE : df->cmds, "cmds", wb); + buffer_json_member_add_object(wb, "access"); + { + http_access2buffer_json_array(wb, "view", df->view_access); + http_access2buffer_json_array(wb, "edit", df->edit_access); + } + buffer_json_object_close(wb); + buffer_json_member_add_string(wb, "source_type", dyncfg_id2source_type(df->current.source_type)); + buffer_json_member_add_string(wb, "source", string2str(df->current.source)); + buffer_json_member_add_boolean(wb, "sync", df->sync); + buffer_json_member_add_boolean(wb, "user_disabled", df->dyncfg.user_disabled); + buffer_json_member_add_boolean(wb, "restart_required", df->dyncfg.restart_required); + buffer_json_member_add_boolean(wb, "plugin_rejected", df->dyncfg.plugin_rejected); + buffer_json_member_add_object(wb, "payload"); + { + if (df->dyncfg.payload && buffer_strlen(df->dyncfg.payload)) { + buffer_json_member_add_boolean(wb, "available", true); + buffer_json_member_add_string(wb, "status", dyncfg_id2status(df->dyncfg.status)); + buffer_json_member_add_string(wb, "source_type", dyncfg_id2source_type(df->dyncfg.source_type)); + buffer_json_member_add_string(wb, "source", string2str(df->dyncfg.source)); + buffer_json_member_add_uint64(wb, "created_ut", df->dyncfg.created_ut); + buffer_json_member_add_uint64(wb, "modified_ut", df->dyncfg.modified_ut); + buffer_json_member_add_string(wb, "content_type", content_type_id2string(df->dyncfg.payload->content_type)); + buffer_json_member_add_uint64(wb, "content_length", df->dyncfg.payload->len); + } else + buffer_json_member_add_boolean(wb, "available", false); + } + buffer_json_object_close(wb); // payload + buffer_json_member_add_uint64(wb, "saves", df->dyncfg.saves); + buffer_json_member_add_uint64(wb, "created_ut", df->current.created_ut); + buffer_json_member_add_uint64(wb, "modified_ut", df->current.modified_ut); + } + buffer_json_object_close(wb); +} + +static void dyncfg_tree_for_host(RRDHOST *host, BUFFER *wb, const char *path, const char *id) { + size_t entries = dictionary_entries(dyncfg_globals.nodes); + size_t used = 0; + const DICTIONARY_ITEM *items[entries]; + size_t restart_required = 0, plugin_rejected = 0, status_incomplete = 0, status_failed = 0; + + STRING *template = NULL; + if(id && *id) + template = string_strdupz(id); + + ND_UUID host_uuid = uuid2UUID(host->host_uuid); + + size_t path_len = strlen(path); + DYNCFG *df; + dfe_start_read(dyncfg_globals.nodes, df) { + if(!UUIDeq(df->host_uuid, host_uuid)) + continue; + + if(strncmp(string2str(df->path), path, path_len) != 0) + continue; + + if(!rrd_function_available(host, string2str(df->function))) + df->current.status = DYNCFG_STATUS_ORPHAN; + + if((id && strcmp(id, df_dfe.name) != 0) && (template && df->template != template)) + continue; + + items[used++] = dictionary_acquired_item_dup(dyncfg_globals.nodes, df_dfe.item); + } + dfe_done(df); + + if(used > 1) + qsort(items, used, sizeof(const DICTIONARY_ITEM *), dyncfg_tree_compar); + + buffer_flush(wb); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + + buffer_json_member_add_uint64(wb, "version", 1); + + buffer_json_member_add_object(wb, "tree"); + { + STRING *last_path = NULL; + for (size_t i = 0; i < used; i++) { + df = dictionary_acquired_item_value(items[i]); + if (df->path != last_path) { + last_path = df->path; + + if (i) + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, string2str(last_path)); + } + + dyncfg_to_json(df, dictionary_acquired_item_name(items[i]), wb); + + if (df->dyncfg.plugin_rejected) + plugin_rejected++; + + if(df->current.status != DYNCFG_STATUS_ORPHAN) { + if (df->dyncfg.restart_required) + restart_required++; + + if (df->current.status == DYNCFG_STATUS_FAILED) + status_failed++; + + if (df->current.status == DYNCFG_STATUS_INCOMPLETE) + status_incomplete++; + } + } + + if (used) + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // tree + + buffer_json_member_add_object(wb, "attention"); + { + buffer_json_member_add_boolean(wb, "degraded", restart_required + plugin_rejected + status_failed + status_incomplete > 0); + buffer_json_member_add_uint64(wb, "restart_required", restart_required); + buffer_json_member_add_uint64(wb, "plugin_rejected", plugin_rejected); + buffer_json_member_add_uint64(wb, "status_failed", status_failed); + buffer_json_member_add_uint64(wb, "status_incomplete", status_incomplete); + } + buffer_json_object_close(wb); // attention + + buffer_json_agents_v2(wb, NULL, 0, false, false); + + buffer_json_finalize(wb); + + for(size_t i = 0; i < used ;i++) + dictionary_acquired_item_release(dyncfg_globals.nodes, items[i]); +} + +static int dyncfg_config_execute_cb(struct rrd_function_execute *rfe, void *data) { + RRDHOST *host = data; + int code; + + char buf[strlen(rfe->function) + 1]; + memcpy(buf, rfe->function, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + const char *config = get_word(words, num_words, 0); + const char *action = get_word(words, num_words, 1); + const char *path = get_word(words, num_words, 2); + const char *id = get_word(words, num_words, 3); + + if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) { + char *msg = "invalid function call, expected: config"; + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg); + code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!action || !*action) { + char *msg = "invalid function call, expected: config tree"; + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg); + code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(strcmp(action, "tree") == 0) { + if(!path || !*path) + path = "/"; + + if(!id || !*id) + id = NULL; + else if(!dyncfg_is_valid_id(id)) { + char *msg = "invalid id given"; + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg); + code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + code = HTTP_RESP_OK; + dyncfg_tree_for_host(host, rfe->result.wb, path, id); + } + else { + const char *name = id; + id = action; + action = path; + path = NULL; + + DYNCFG_CMDS cmd = dyncfg_cmds2id(action); + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(!item) { + item = dyncfg_get_template_of_new_job(id); + + if(item && (!name || !*name)) { + const char *n = dictionary_acquired_item_name(item); + if(strncmp(id, n, strlen(n)) == 0 && id[strlen(n)] == ':') + name = &id[strlen(n) + 1]; + } + } + + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + + if(!rrd_function_available(host, string2str(df->function))) + df->current.status = DYNCFG_STATUS_ORPHAN; + + if(cmd == DYNCFG_CMD_REMOVE) { + bool delete = (df->current.status == DYNCFG_STATUS_ORPHAN); + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + item = NULL; + + if(delete) { + if(!http_access_user_has_enough_access_level_for_endpoint(rfe->user_access, df->edit_access)) { + code = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_FORBIDDEN, + "dyncfg: you don't have enough edit permissions to execute this command"); + goto cleanup; + } + + dictionary_del(dyncfg_globals.nodes, id); + dyncfg_file_delete(id); + code = dyncfg_default_response(rfe->result.wb, 200, ""); + goto cleanup; + } + } + else if((cmd == DYNCFG_CMD_USERCONFIG || cmd == DYNCFG_CMD_TEST) && df->current.status != DYNCFG_STATUS_ORPHAN) { + const char *old_rfe_function = rfe->function; + char buf2[2048]; + snprintfz(buf2, sizeof(buf2), "config %s %s %s", dictionary_acquired_item_name(item), action, name?name:""); + rfe->function = buf2; + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + item = NULL; + code = dyncfg_function_intercept_cb(rfe, data); + rfe->function = old_rfe_function; + return code; + } + + if(item) + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } + + code = HTTP_RESP_NOT_FOUND; + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: unknown config id '%s' in call: '%s'. " + "This can happen if the plugin that registered the dynamic configuration is not running now.", + id, rfe->function); + + rrd_call_function_error( + rfe->result.wb, + "unknown config id given", code); + } + +cleanup: + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, code, rfe->result.data); + + return code; +} + +// ---------------------------------------------------------------------------- +// this adds a 'config' function to all leaf nodes (localhost and virtual nodes) +// which is used to serve the tree and act as a catch-all for all config calls +// for which there is no id overloaded. + +void dyncfg_host_init(RRDHOST *host) { + // IMPORTANT: + // This function needs to be async, although it is internal. + // The reason is that it can call by itself another function that may or may not be internal (sync). + + rrd_function_add(host, NULL, PLUGINSD_FUNCTION_CONFIG, 120, + 1000, "Dynamic configuration", "config", HTTP_ACCESS_ANONYMOUS_DATA, + false, dyncfg_config_execute_cb, host); +} diff --git a/src/daemon/config/dyncfg-unittest.c b/src/daemon/config/dyncfg-unittest.c new file mode 100644 index 000000000..775dc7cbd --- /dev/null +++ b/src/daemon/config/dyncfg-unittest.c @@ -0,0 +1,799 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +// ---------------------------------------------------------------------------- +// unit test + +#define LINE_FILE_STR TOSTRING(__LINE__) "@" __FILE__ + +struct dyncfg_unittest { + bool enabled; + size_t errors; + + DICTIONARY *nodes; + + SPINLOCK spinlock; + struct dyncfg_unittest_action *queue; +} dyncfg_unittest_data = { 0 }; + +typedef struct { + bool enabled; + bool removed; + struct { + double dbl; + bool bln; + } value; +} TEST_CFG; + +typedef struct { + const char *id; + const char *source; + bool sync; + DYNCFG_TYPE type; + DYNCFG_CMDS cmds; + DYNCFG_SOURCE_TYPE source_type; + + TEST_CFG current; + TEST_CFG expected; + + bool received; + bool finished; + + size_t last_saves; + bool needs_save; +} TEST; + +struct dyncfg_unittest_action { + TEST *t; + BUFFER *result; + BUFFER *payload; + DYNCFG_CMDS cmd; + const char *add_name; + const char *source; + + rrd_function_result_callback_t result_cb; + void *result_cb_data; + + struct dyncfg_unittest_action *prev, *next; +}; + +static void dyncfg_unittest_register_error(const char *id, const char *msg) { + if(msg) + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: error on id '%s': %s", id ? id : "", msg); + + __atomic_add_fetch(&dyncfg_unittest_data.errors, 1, __ATOMIC_RELAXED); +} + +static int dyncfg_unittest_execute_cb(struct rrd_function_execute *rfe, void *data); + +bool dyncfg_unittest_parse_payload(BUFFER *payload, TEST *t, DYNCFG_CMDS cmd, const char *add_name, const char *source) { + CLEAN_JSON_OBJECT *jobj = json_tokener_parse(buffer_tostring(payload)); + if(!jobj) { + dyncfg_unittest_register_error(t->id, "cannot parse json payload"); + return false; + } + + struct json_object *json_double; + struct json_object *json_boolean; + + json_object_object_get_ex(jobj, "double", &json_double); + double value_double = json_object_get_double(json_double); + + json_object_object_get_ex(jobj, "boolean", &json_boolean); + int value_boolean = json_object_get_boolean(json_boolean); + + if(cmd == DYNCFG_CMD_UPDATE) { + t->current.value.dbl = value_double; + t->current.value.bln = value_boolean; + } + else if(cmd == DYNCFG_CMD_ADD) { + char buf[strlen(t->id) + strlen(add_name) + 20]; + snprintfz(buf, sizeof(buf), "%s:%s", t->id, add_name); + TEST tmp = { + .id = strdupz(buf), + .source = strdupz(source), + .cmds = (t->cmds & ~DYNCFG_CMD_ADD) | DYNCFG_CMD_GET | DYNCFG_CMD_REMOVE | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_TEST, + .sync = t->sync, + .type = DYNCFG_TYPE_JOB, + .source_type = DYNCFG_SOURCE_TYPE_DYNCFG, + .received = true, + .finished = true, + .current = + {.enabled = true, + .removed = false, + .value = + { + .dbl = value_double, + .bln = value_boolean, + }}, + .expected = { + .enabled = true, + .removed = false, + .value = { + .dbl = 3.14, + .bln = true, + } + }, + .needs_save = true, + }; + const DICTIONARY_ITEM *item = dictionary_set_and_acquire_item(dyncfg_unittest_data.nodes, buf, &tmp, sizeof(tmp)); + TEST *t2 = dictionary_acquired_item_value(item); + dictionary_acquired_item_release(dyncfg_unittest_data.nodes, item); + + dyncfg_add_low_level(localhost, t2->id, "/unittests", + DYNCFG_STATUS_RUNNING, t2->type, t2->source_type, t2->source, + t2->cmds, 0, 0, t2->sync, + HTTP_ACCESS_NONE, HTTP_ACCESS_NONE, + dyncfg_unittest_execute_cb, t2); + } + else { + dyncfg_unittest_register_error(t->id, "invalid command received to parse payload"); + return false; + } + + return true; +} + +static int dyncfg_unittest_action(struct dyncfg_unittest_action *a) { + TEST *t = a->t; + + int rc = HTTP_RESP_OK; + + if(a->cmd == DYNCFG_CMD_ENABLE) + t->current.enabled = true; + else if(a->cmd == DYNCFG_CMD_DISABLE) + t->current.enabled = false; + else if(a->cmd == DYNCFG_CMD_ADD || a->cmd == DYNCFG_CMD_UPDATE) + rc = dyncfg_unittest_parse_payload(a->payload, a->t, a->cmd, a->add_name, a->source) ? HTTP_RESP_OK : HTTP_RESP_BAD_REQUEST; + else if(a->cmd == DYNCFG_CMD_REMOVE) + t->current.removed = true; + else + rc = HTTP_RESP_BAD_REQUEST; + + dyncfg_default_response(a->result, rc, NULL); + + a->result_cb(a->result, rc, a->result_cb_data); + + buffer_free(a->payload); + freez((void *)a->add_name); + freez(a); + + __atomic_store_n(&t->finished, true, __ATOMIC_RELAXED); + + return rc; +} + +static void *dyncfg_unittest_thread_action(void *ptr) { + while(!nd_thread_signaled_to_cancel()) { + struct dyncfg_unittest_action *a = NULL; + spinlock_lock(&dyncfg_unittest_data.spinlock); + a = dyncfg_unittest_data.queue; + if(a) + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(dyncfg_unittest_data.queue, a, prev, next); + spinlock_unlock(&dyncfg_unittest_data.spinlock); + + if(a) + dyncfg_unittest_action(a); + else + sleep_usec(10 * USEC_PER_MS); + } + + return ptr; +} + +static int dyncfg_unittest_execute_cb(struct rrd_function_execute *rfe, void *data) { + + int rc; + bool run_the_callback = true; + TEST *t = data; + + t->received = true; + + char buf[strlen(rfe->function) + 1]; + memcpy(buf, rfe->function, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + const char *config = get_word(words, num_words, 0); + const char *id = get_word(words, num_words, 1); + const char *action = get_word(words, num_words, 2); + const char *add_name = get_word(words, num_words, 3); + + if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) { + char *msg = "did not receive a config call"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!id || !*id) { + char *msg = "did not receive an id"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(t->type != DYNCFG_TYPE_TEMPLATE && strcmp(t->id, id) != 0) { + char *msg = "id received is not the expected"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!action || !*action) { + char *msg = "did not receive an action"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + DYNCFG_CMDS cmd = dyncfg_cmds2id(action); + if(cmd == DYNCFG_CMD_NONE) { + char *msg = "action received is not known"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!(t->cmds & cmd)) { + char *msg = "received a command that is not supported"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(t->current.removed && cmd != DYNCFG_CMD_ADD) { + char *msg = "received a command for a removed entry"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + struct dyncfg_unittest_action *a = callocz(1, sizeof(*a)); + a->t = t; + a->add_name = add_name ? strdupz(add_name) : NULL; + a->source = rfe->source, + a->result = rfe->result.wb; + a->payload = buffer_dup(rfe->payload); + a->cmd = cmd; + a->result_cb = rfe->result.cb; + a->result_cb_data = rfe->result.data; + + run_the_callback = false; + + if(t->sync) + rc = dyncfg_unittest_action(a); + else { + spinlock_lock(&dyncfg_unittest_data.spinlock); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(dyncfg_unittest_data.queue, a, prev, next); + spinlock_unlock(&dyncfg_unittest_data.spinlock); + rc = HTTP_RESP_OK; + } + +cleanup: + if(run_the_callback) { + __atomic_store_n(&t->finished, true, __ATOMIC_RELAXED); + + if (rfe->result.cb) + rfe->result.cb(rfe->result.wb, rc, rfe->result.data); + } + + return rc; +} + +static bool dyncfg_unittest_check(TEST *t, DYNCFG_CMDS c, const char *cmd, bool received) { + size_t errors = 0; + + fprintf(stderr, "CHECK '%s' after cmd '%s'...", t->id, cmd); + + if(t->received != received) { + fprintf(stderr, "\n - received flag found '%s', expected '%s'", + t->received?"true":"false", + received?"true":"false"); + errors++; + goto cleanup; + } + + if(!received) + goto cleanup; + + usec_t give_up_ut = now_monotonic_usec() + 2 * USEC_PER_SEC; + while(!__atomic_load_n(&t->finished, __ATOMIC_RELAXED)) { + tinysleep(); + + if(now_monotonic_usec() > give_up_ut) { + fprintf(stderr, "\n - gave up waiting for the plugin to process this!"); + errors++; + goto cleanup; + } + } + + if(t->type != DYNCFG_TYPE_TEMPLATE && t->current.enabled != t->expected.enabled) { + fprintf(stderr, "\n - enabled flag found '%s', expected '%s'", + t->current.enabled?"true":"false", + t->expected.enabled?"true":"false"); + errors++; + } + if(t->current.removed != t->expected.removed) { + fprintf(stderr, "\n - removed flag found '%s', expected '%s'", + t->current.removed?"true":"false", + t->expected.removed?"true":"false"); + errors++; + } + if(t->current.value.bln != t->expected.value.bln) { + fprintf(stderr, "\n - boolean value found '%s', expected '%s'", + t->current.value.bln?"true":"false", + t->expected.value.bln?"true":"false"); + errors++; + } + if(t->current.value.dbl != t->expected.value.dbl) { + fprintf(stderr, "\n - double value found '%f', expected '%f'", + t->current.value.dbl, t->expected.value.dbl); + errors++; + } + + DYNCFG *df = dictionary_get(dyncfg_globals.nodes, t->id); + if(!df) { + fprintf(stderr, "\n - not found in DYNCFG nodes dictionary!"); + errors++; + } + else if(df->cmds != t->cmds) { + fprintf(stderr, "\n - has different cmds in DYNCFG nodes dictionary; found: "); + dyncfg_cmds2fp(df->cmds, stderr); + fprintf(stderr, ", expected: "); + dyncfg_cmds2fp(t->cmds, stderr); + fprintf(stderr, "\n"); + errors++; + } + else if(df->type == DYNCFG_TYPE_JOB && df->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && !df->dyncfg.saves) { + fprintf(stderr, "\n - DYNCFG job has no saves!"); + errors++; + } + else if(df->type == DYNCFG_TYPE_JOB && df->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && (!df->dyncfg.payload || !buffer_strlen(df->dyncfg.payload))) { + fprintf(stderr, "\n - DYNCFG job has no payload!"); + errors++; + } + else if(df->dyncfg.user_disabled && !df->dyncfg.saves) { + fprintf(stderr, "\n - DYNCFG disabled config has no saves!"); + errors++; + } + else if((c & (DYNCFG_CMD_ADD | DYNCFG_CMD_UPDATE)) && t->source && string_strcmp(df->current.source, t->source) != 0) { + fprintf(stderr, "\n - source does not match!"); + errors++; + } + else if((c & (DYNCFG_CMD_ADD | DYNCFG_CMD_UPDATE)) && df->current.source && !t->source) { + fprintf(stderr, "\n - there is a source but it shouldn't be any!"); + errors++; + } + else if(t->needs_save && df->dyncfg.saves <= t->last_saves) { + fprintf(stderr, "\n - should be saved, but it is not saved!"); + errors++; + } + else if(!t->needs_save && df->dyncfg.saves > t->last_saves) { + fprintf(stderr, "\n - should be not be saved, but it saved!"); + errors++; + } + +cleanup: + if(errors) { + fprintf(stderr, "\n >>> FAILED\n\n"); + dyncfg_unittest_register_error(NULL, NULL); + return false; + } + + fprintf(stderr, " OK\n"); + return true; +} + +static void dyncfg_unittest_reset(void) { + TEST *t; + dfe_start_read(dyncfg_unittest_data.nodes, t) { + t->received = t->finished = false; + t->needs_save = false; + + DYNCFG *df = dictionary_get(dyncfg_globals.nodes, t->id); + if(!df) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: cannot find id '%s'", t->id); + dyncfg_unittest_register_error(NULL, NULL); + } + else + t->last_saves = df->dyncfg.saves; + } + dfe_done(t); +} + +void should_be_saved(TEST *t, DYNCFG_CMDS c) { + DYNCFG *df; + + if(t->type == DYNCFG_TYPE_TEMPLATE) { + df = dictionary_get(dyncfg_globals.nodes, t->id); + t->current.enabled = !df->dyncfg.user_disabled; + } + + t->needs_save = + c == DYNCFG_CMD_UPDATE || + (t->current.enabled && c == DYNCFG_CMD_DISABLE) || + (!t->current.enabled && c == DYNCFG_CMD_ENABLE); +} + +static int dyncfg_unittest_run(const char *cmd, BUFFER *wb, const char *payload, const char *source) { + dyncfg_unittest_reset(); + + char buf[strlen(cmd) + 1]; + memcpy(buf, cmd, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + // const char *config = get_word(words, num_words, 0); + const char *id = get_word(words, num_words, 1); + char *action = get_word(words, num_words, 2); + const char *add_name = get_word(words, num_words, 3); + + DYNCFG_CMDS c = dyncfg_cmds2id(action); + + TEST *t = dictionary_get(dyncfg_unittest_data.nodes, id); + if(!t) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: cannot find id '%s' from cmd: %s", id, cmd); + dyncfg_unittest_register_error(NULL, NULL); + return HTTP_RESP_NOT_FOUND; + } + + if(t->type == DYNCFG_TYPE_TEMPLATE) + t->received = t->finished = true; + + if(c == DYNCFG_CMD_DISABLE) + t->expected.enabled = false; + if(c == DYNCFG_CMD_ENABLE) + t->expected.enabled = true; + if(c == DYNCFG_CMD_UPDATE) + memset(&t->current.value, 0, sizeof(t->current.value)); + + if(c & (DYNCFG_CMD_UPDATE) || (c & (DYNCFG_CMD_DISABLE|DYNCFG_CMD_ENABLE) && t->type != DYNCFG_TYPE_TEMPLATE)) { + freez((void *)t->source); + t->source = strdupz(source); + } + + buffer_flush(wb); + + CLEAN_BUFFER *pld = NULL; + + if(payload) { + pld = buffer_create(1024, NULL); + buffer_strcat(pld, payload); + } + + should_be_saved(t, c); + + int rc = rrd_function_run(localhost, wb, 10, HTTP_ACCESS_ALL, cmd, + true, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + pld, source); + if(!DYNCFG_RESP_SUCCESS(rc)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: failed to run: %s; returned code %d", cmd, rc); + dyncfg_unittest_register_error(NULL, NULL); + } + + dyncfg_unittest_check(t, c, cmd, true); + + if(rc == HTTP_RESP_OK && t->type == DYNCFG_TYPE_TEMPLATE) { + if(c == DYNCFG_CMD_ADD) { + char buf2[strlen(id) + strlen(add_name) + 2]; + snprintfz(buf2, sizeof(buf2), "%s:%s", id, add_name); + TEST *tt = dictionary_get(dyncfg_unittest_data.nodes, buf2); + if (!tt) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG UNITTEST: failed to find newly added id '%s' of command: %s", + id, cmd); + dyncfg_unittest_register_error(NULL, NULL); + } + dyncfg_unittest_check(tt, c, cmd, true); + } + else { + STRING *template = string_strdupz(t->id); + DYNCFG *df; + dfe_start_read(dyncfg_globals.nodes, df) { + if(df->type == DYNCFG_TYPE_JOB && df->template == template) { + TEST *tt = dictionary_get(dyncfg_unittest_data.nodes, df_dfe.name); + if (!tt) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG UNITTEST: failed to find id '%s' while running command: %s", df_dfe.name, cmd); + dyncfg_unittest_register_error(NULL, NULL); + } + else { + if(c == DYNCFG_CMD_DISABLE) + tt->expected.enabled = false; + if(c == DYNCFG_CMD_ENABLE) + tt->expected.enabled = true; + dyncfg_unittest_check(tt, c, cmd, true); + } + } + } + dfe_done(df); + string_freez(template); + } + } + + return rc; +} + +static void dyncfg_unittest_cleanup_files(void) { + char path[FILENAME_MAX]; + snprintfz(path, sizeof(path) - 1, "%s/%s", netdata_configured_varlib_dir, "config"); + + DIR *dir = opendir(path); + if (!dir) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: cannot open directory '%s'", path); + return; + } + + struct dirent *entry; + char filename[FILENAME_MAX + sizeof(entry->d_name)]; + while ((entry = readdir(dir)) != NULL) { + if ((entry->d_type == DT_REG || entry->d_type == DT_LNK) && strstartswith(entry->d_name, "unittest:") && strendswith(entry->d_name, ".dyncfg")) { + snprintf(filename, sizeof(filename), "%s/%s", path, entry->d_name); + nd_log(NDLS_DAEMON, NDLP_INFO, "DYNCFG UNITTEST: deleting file '%s'", filename); + unlink(filename); + } + } + + closedir(dir); +} + +static TEST *dyncfg_unittest_add(TEST t) { + dyncfg_unittest_reset(); + + TEST *ret = dictionary_set(dyncfg_unittest_data.nodes, t.id, &t, sizeof(t)); + + if(!dyncfg_add_low_level(localhost, t.id, "/unittests", DYNCFG_STATUS_RUNNING, t.type, + t.source_type, t.source, + t.cmds, 0, 0, t.sync, + HTTP_ACCESS_NONE, HTTP_ACCESS_NONE, + dyncfg_unittest_execute_cb, ret)) { + dyncfg_unittest_register_error(t.id, "addition of job failed"); + } + + dyncfg_unittest_check(ret, DYNCFG_CMD_NONE, "plugin create", t.type != DYNCFG_TYPE_TEMPLATE); + + return ret; +} + +void dyncfg_unittest_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + TEST *v = value; + freez((void *)v->id); + freez((void *)v->source); +} + +int dyncfg_unittest(void) { + dyncfg_unittest_data.nodes = dictionary_create(DICT_OPTION_NONE); + dictionary_register_delete_callback(dyncfg_unittest_data.nodes, dyncfg_unittest_delete_cb, NULL); + + dyncfg_unittest_cleanup_files(); + rrd_functions_inflight_init(); + dyncfg_init(false); + + // ------------------------------------------------------------------------ + // create the thread for testing async communication + + ND_THREAD *thread = nd_thread_create("unittest", NETDATA_THREAD_OPTION_JOINABLE, dyncfg_unittest_thread_action, NULL); + + // ------------------------------------------------------------------------ + // single + + TEST *single1 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:single1"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_SINGLE, + .cmds = DYNCFG_CMD_GET | DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = true, + .current = { + .enabled = true, + }, + .expected = { + .enabled = true, + } + }); (void)single1; + + TEST *single2 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:single2"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_SINGLE, + .cmds = DYNCFG_CMD_GET | DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = false, + .current = { + .enabled = true, + }, + .expected = { + .enabled = true, + } + }); (void)single2; + + // ------------------------------------------------------------------------ + // template + + TEST *template1 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_TEMPLATE, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_ADD | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = true, + }); (void)template1; + + TEST *template2 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_TEMPLATE, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_ADD | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = false, + }); (void)template2; + + // ------------------------------------------------------------------------ + // job + + TEST *user1 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1:user1"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = true, + .current = { + .enabled = true, + }, + .expected = { + .enabled = true, + } + }); (void)user1; + + TEST *user2 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2:user2"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = false, + .expected = { + .enabled = true, + } + }); (void)user2; + + // ------------------------------------------------------------------------ + + int rc; (void)rc; + BUFFER *wb = buffer_create(0, NULL); + + // ------------------------------------------------------------------------ + // dynamic job + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 add dyn1", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 add dyn2", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 add dyn3", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 add dyn4", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // saving of user_disabled + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:single1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:single2 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:user1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:user2 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn2 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn3 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn4 disable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // enabling + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:single1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:single2 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:user1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:user2 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn2 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn3 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn4 enable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // disabling template + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 disable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // enabling template + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 enable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // adding job on disabled template + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 disable", wb, NULL, LINE_FILE_STR); + + TEST *user3 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1:user3"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = true, + .expected = { + .enabled = false, + } + }); (void)user3; + + TEST *user4 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2:user4"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = false, + .expected = { + .enabled = false, + } + }); (void)user4; + + TEST *user5 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1:user5"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = true, + .expected = { + .enabled = false, + } + }); (void)user5; + + TEST *user6 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2:user6"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = false, + .expected = { + .enabled = false, + } + }); (void)user6; + +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:user5 disable", wb, NULL, LINE_FILE_STR); +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:user6 disable", wb, NULL, LINE_FILE_STR); + +// // ------------------------------------------------------------------------ +// // enable template with disabled jobs +// +// user3->expected.enabled = true; +// user5->expected.enabled = false; +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 enable", wb, NULL, LINE_FILE_STR); +// +// user4->expected.enabled = true; +// user6->expected.enabled = false; +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 enable", wb, NULL, LINE_FILE_STR); + + +// // ------------------------------------------------------------------------ +// +// rc = dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " tree", wb, NULL); +// if(rc == HTTP_RESP_OK) +// fprintf(stderr, "%s\n", buffer_tostring(wb)); + + nd_thread_signal_cancel(thread); + nd_thread_join(thread); + dyncfg_unittest_cleanup_files(); + dictionary_destroy(dyncfg_unittest_data.nodes); + buffer_free(wb); + return __atomic_load_n(&dyncfg_unittest_data.errors, __ATOMIC_RELAXED) > 0 ? 1 : 0; +} diff --git a/src/daemon/config/dyncfg.c b/src/daemon/config/dyncfg.c new file mode 100644 index 000000000..2f484d1ed --- /dev/null +++ b/src/daemon/config/dyncfg.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +struct dyncfg_globals dyncfg_globals = { 0 }; + +RRDHOST *dyncfg_rrdhost_by_uuid(ND_UUID *uuid) { + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(uuid->uuid, uuid_str); + + RRDHOST *host = rrdhost_find_by_guid(uuid_str); + if(!host) + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host with UUID '%s'", uuid_str); + + return host; +} + +RRDHOST *dyncfg_rrdhost(DYNCFG *df) { + return dyncfg_rrdhost_by_uuid(&df->host_uuid); +} + +void dyncfg_cleanup(DYNCFG *v) { + string_freez(v->dyncfg.source); + v->dyncfg.source = NULL; + + buffer_free(v->dyncfg.payload); + v->dyncfg.payload = NULL; + + string_freez(v->path); + v->path = NULL; + + string_freez(v->current.source); + v->current.source = NULL; + + string_freez(v->function); + v->function = NULL; + + string_freez(v->template); + v->template = NULL; +} + +static void dyncfg_normalize(DYNCFG *df) { + usec_t now_ut = now_realtime_usec(); + + if(!df->current.created_ut) + df->current.created_ut = now_ut; + + if(!df->current.modified_ut) + df->current.modified_ut = now_ut; +} + +static void dyncfg_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + DYNCFG *df = value; + dyncfg_cleanup(df); +} + +static void dyncfg_insert_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) { + DYNCFG *df = value; + dyncfg_normalize(df); + + const char *id = dictionary_acquired_item_name(item); + char buf[strlen(id) + 20]; + snprintfz(buf, sizeof(buf), PLUGINSD_FUNCTION_CONFIG " %s", id); + df->function = string_strdupz(buf); + + if(df->type == DYNCFG_TYPE_JOB && !df->template) { + const char *last_colon = strrchr(id, ':'); + if(last_colon) + df->template = string_strndupz(id, last_colon - id); + else + nd_log(NDLS_DAEMON, NDLP_WARNING, + "DYNCFG: id '%s' is a job, but does not contain a colon to find the template", id); + } +} + +static void dyncfg_react_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + DYNCFG *df = value; (void)df; + ; +} + +static bool dyncfg_conflict_cb(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data) { + bool *overwrite_cb_ptr = data; + bool overwrite_cb = (overwrite_cb_ptr && *overwrite_cb_ptr); + + DYNCFG *v = old_value; + DYNCFG *nv = new_value; + + size_t changes = 0; + + dyncfg_normalize(nv); + + if(!UUIDeq(v->host_uuid, nv->host_uuid)) { + SWAP(v->host_uuid, nv->host_uuid); + changes++; + } + + if(v->path != nv->path) { + SWAP(v->path, nv->path); + changes++; + } + + if(v->cmds != nv->cmds) { + SWAP(v->cmds, nv->cmds); + changes++; + } + + if(v->type != nv->type) { + SWAP(v->type, nv->type); + changes++; + } + + if(v->view_access != nv->view_access) { + SWAP(v->view_access, nv->view_access); + changes++; + } + + if(v->edit_access != nv->edit_access) { + SWAP(v->edit_access, nv->edit_access); + changes++; + } + + if(v->current.status != nv->current.status) { + SWAP(v->current.status, nv->current.status); + changes++; + } + + if (v->current.source_type != nv->current.source_type) { + SWAP(v->current.source_type, nv->current.source_type); + changes++; + } + + if (v->current.source != nv->current.source) { + SWAP(v->current.source, nv->current.source); + changes++; + } + + if(nv->current.created_ut < v->current.created_ut) { + SWAP(v->current.created_ut, nv->current.created_ut); + changes++; + } + + if(nv->current.modified_ut > v->current.modified_ut) { + SWAP(v->current.modified_ut, nv->current.modified_ut); + changes++; + } + + if(!v->execute_cb || (overwrite_cb && nv->execute_cb && (v->execute_cb != nv->execute_cb || v->execute_cb_data != nv->execute_cb_data))) { + v->sync = nv->sync, + v->execute_cb = nv->execute_cb; + v->execute_cb_data = nv->execute_cb_data; + changes++; + } + + dyncfg_cleanup(nv); + + return changes > 0; +} + +// ---------------------------------------------------------------------------- + +void dyncfg_init_low_level(bool load_saved) { + if(!dyncfg_globals.nodes) { + dyncfg_globals.nodes = dictionary_create_advanced(DICT_OPTION_FIXED_SIZE | DICT_OPTION_DONT_OVERWRITE_VALUE, NULL, sizeof(DYNCFG)); + dictionary_register_insert_callback(dyncfg_globals.nodes, dyncfg_insert_cb, NULL); + dictionary_register_react_callback(dyncfg_globals.nodes, dyncfg_react_cb, NULL); + dictionary_register_conflict_callback(dyncfg_globals.nodes, dyncfg_conflict_cb, NULL); + dictionary_register_delete_callback(dyncfg_globals.nodes, dyncfg_delete_cb, NULL); + + char path[PATH_MAX]; + snprintfz(path, sizeof(path), "%s/%s", netdata_configured_varlib_dir, "config"); + + if(mkdir(path, 0755) == -1) { + if(errno != EEXIST) + nd_log(NDLS_DAEMON, NDLP_CRIT, "DYNCFG: failed to create dynamic configuration directory '%s'", path); + } + + dyncfg_globals.dir = strdupz(path); + + if(load_saved) + dyncfg_load_all(); + } +} + +// ---------------------------------------------------------------------------- + +const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, + const char *source, DYNCFG_CMDS cmds, + usec_t created_ut, usec_t modified_ut, + bool sync, HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data, + bool overwrite_cb) { + DYNCFG tmp = { + .host_uuid = uuid2UUID(host->host_uuid), + .path = string_strdupz(path), + .cmds = cmds, + .type = type, + .view_access = view_access, + .edit_access = edit_access, + .current = { + .status = status, + .source_type = source_type, + .source = string_strdupz(source), + .created_ut = created_ut, + .modified_ut = modified_ut, + }, + .sync = sync, + .dyncfg = { 0 }, + .execute_cb = execute_cb, + .execute_cb_data = execute_cb_data, + }; + + return dictionary_set_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1, &tmp, sizeof(tmp), &overwrite_cb); +} + +static void dyncfg_send_updates(const char *id) { + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1); + if(!item) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: asked to update plugin for configuration '%s', but it is not found.", id); + return; + } + + DYNCFG *df = dictionary_acquired_item_value(item); + + if(df->type == DYNCFG_TYPE_SINGLE || df->type == DYNCFG_TYPE_JOB) { + if (df->cmds & DYNCFG_CMD_UPDATE && df->dyncfg.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->dyncfg.payload && buffer_strlen(df->dyncfg.payload)) + dyncfg_echo_update(item, df, id); + } + else if(df->type == DYNCFG_TYPE_TEMPLATE && (df->cmds & DYNCFG_CMD_ADD)) { + STRING *template = string_strdupz(id); + + size_t len = strlen(id); + DYNCFG *df_job; + dfe_start_reentrant(dyncfg_globals.nodes, df_job) { + const char *id_template = df_job_dfe.name; + if(df_job->type == DYNCFG_TYPE_JOB && // it is a job + df_job->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && // it is dynamically configured + df_job->template == template && // it has the same template name + strncmp(id_template, id, len) == 0 && // the template name matches (redundant) + id_template[len] == ':' && // immediately after the template there is ':' + id_template[len + 1]) { // and there is something else after the ':' + dyncfg_echo_add(item, df_job_dfe.item, df, df_job, id, &id_template[len + 1]); + } + } + dfe_done(df_job); + + string_freez(template); + } + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); +} + +bool dyncfg_is_user_disabled(const char *id) { + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(!item) + return false; + + DYNCFG *df = dictionary_acquired_item_value(item); + bool ret = df->dyncfg.user_disabled; + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return ret; +} + +bool dyncfg_job_has_registered_template(const char *id) { + char buf[strlen(id) + 1]; + memcpy(buf, id, sizeof(buf)); + char *colon = strrchr(buf, ':'); + if(!colon) + return false; + + *colon = '\0'; + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, buf); + if(!item) + return false; + + DYNCFG *df = dictionary_acquired_item_value(item); + bool ret = df->type == DYNCFG_TYPE_TEMPLATE; + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return ret; +} + +bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, + DYNCFG_CMDS cmds, usec_t created_ut, usec_t modified_ut, bool sync, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data) { + + if(view_access == HTTP_ACCESS_NONE) + view_access = HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_VIEW_AGENT_CONFIG; + + if(edit_access == HTTP_ACCESS_NONE) + edit_access = HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_EDIT_AGENT_CONFIG | HTTP_ACCESS_COMMERCIAL_SPACE; + + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return false; + } + + if(type == DYNCFG_TYPE_JOB && !dyncfg_job_has_registered_template(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: job id '%s' does not have a registered template. Ignoring dynamic configuration for it.", id); + return false; + } + + DYNCFG_CMDS old_cmds = cmds; + + // all configurations support schema + cmds |= DYNCFG_CMD_SCHEMA; + + // if there is either enable or disable, both are supported + if(cmds & (DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE)) + cmds |= DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE; + + // add + if(type == DYNCFG_TYPE_TEMPLATE) { + // templates must always support "add" + cmds |= DYNCFG_CMD_ADD; + } + else { + // only templates can have "add" + cmds &= ~DYNCFG_CMD_ADD; + } + + // remove + if(source_type != DYNCFG_SOURCE_TYPE_DYNCFG || type != DYNCFG_TYPE_JOB) { + // remove is only available for dyncfg jobs + cmds &= ~DYNCFG_CMD_REMOVE; + } + + // data + if(type == DYNCFG_TYPE_TEMPLATE) { + // templates do not have data + cmds &= ~(DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE); + } + + if(cmds != old_cmds) { + CLEAN_BUFFER *t = buffer_create(1024, NULL); + buffer_sprintf(t, "DYNCFG: id '%s' was declared with cmds: ", id); + dyncfg_cmds2buffer(old_cmds, t); + buffer_strcat(t, ", but they have sanitized to: "); + dyncfg_cmds2buffer(cmds, t); + nd_log(NDLS_DAEMON, NDLP_NOTICE, "%s", buffer_tostring(t)); + } + + const DICTIONARY_ITEM *item = dyncfg_add_internal(host, id, path, status, type, source_type, source, cmds, + created_ut, modified_ut, sync, view_access, edit_access, + execute_cb, execute_cb_data, true); + DYNCFG *df = dictionary_acquired_item_value(item); + +// if(df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && !df->saves) +// nd_log(NDLS_DAEMON, NDLP_WARNING, "DYNCFG: configuration '%s' is created with source type dyncfg, but we don't have a saved configuration for it", id); + + rrd_collector_started(); + rrd_function_add( + host, + NULL, + string2str(df->function), + 120, + 1000, + "Dynamic configuration", + "config", + (view_access & edit_access), + sync, + dyncfg_function_intercept_cb, + NULL); + + if(df->type != DYNCFG_TYPE_TEMPLATE && (df->cmds & (DYNCFG_CMD_ENABLE|DYNCFG_CMD_DISABLE))) { + DYNCFG_CMDS status_to_send_to_plugin = + (df->dyncfg.user_disabled || df->current.status == DYNCFG_STATUS_DISABLED) ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE; + + if (status_to_send_to_plugin == DYNCFG_CMD_ENABLE && dyncfg_is_user_disabled(string2str(df->template))) + status_to_send_to_plugin = DYNCFG_CMD_DISABLE; + + dyncfg_echo(item, df, id, status_to_send_to_plugin); + } + + if(!(df->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->type == DYNCFG_TYPE_JOB)) + dyncfg_send_updates(id); + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + + return true; +} + +void dyncfg_del_low_level(RRDHOST *host, const char *id) { + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return; + } + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + rrd_function_del(host, NULL, string2str(df->function)); + + bool garbage_collect = false; + if(df->dyncfg.saves == 0) { + dictionary_del(dyncfg_globals.nodes, id); + garbage_collect = true; + } + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + + if(garbage_collect) + dictionary_garbage_collect(dyncfg_globals.nodes); + } +} + +void dyncfg_status_low_level(RRDHOST *host __maybe_unused, const char *id, DYNCFG_STATUS status) { + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return; + } + + if(status == DYNCFG_STATUS_NONE) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: status provided to id '%s' is invalid. Ignoring it.", id); + return; + } + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + df->current.status = status; + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } +} + +// ---------------------------------------------------------------------------- + +void dyncfg_add_streaming(BUFFER *wb) { + // when sending config functions to parents, we send only 1 function called 'config'; + // the parent will send the command to the child, and the child will validate it; + // this way the parent does not need to receive removals of config functions; + + buffer_sprintf(wb + , PLUGINSD_KEYWORD_FUNCTION " GLOBAL " PLUGINSD_FUNCTION_CONFIG " %d \"%s\" \"%s\" "HTTP_ACCESS_FORMAT" %d\n" + , 120 + , "Dynamic configuration" + , "config" + , (unsigned)HTTP_ACCESS_ANONYMOUS_DATA + , 1000 + ); +} + +bool dyncfg_available_for_rrdhost(RRDHOST *host) { + if(host == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST)) + return true; + + return rrd_function_available(host, PLUGINSD_FUNCTION_CONFIG); +} + +// ---------------------------------------------------------------------------- + diff --git a/src/daemon/config/dyncfg.h b/src/daemon/config/dyncfg.h new file mode 100644 index 000000000..539eddbfb --- /dev/null +++ b/src/daemon/config/dyncfg.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DYNCFG_H +#define NETDATA_DYNCFG_H + +#include "../common.h" +#include "database/rrd.h" +#include "database/rrdfunctions.h" + +void dyncfg_add_streaming(BUFFER *wb); +bool dyncfg_available_for_rrdhost(RRDHOST *host); +void dyncfg_host_init(RRDHOST *host); + +// low-level API used by plugins.d and high-level API +bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, + DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, + usec_t created_ut, usec_t modified_ut, bool sync, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data); +void dyncfg_del_low_level(RRDHOST *host, const char *id); +void dyncfg_status_low_level(RRDHOST *host, const char *id, DYNCFG_STATUS status); +void dyncfg_init_low_level(bool load_saved); + +// high-level API for internal modules +bool dyncfg_add(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, + DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + dyncfg_cb_t cb, void *data); +void dyncfg_del(RRDHOST *host, const char *id); +void dyncfg_status(RRDHOST *host, const char *id, DYNCFG_STATUS status); + +void dyncfg_init(bool load_saved); + +#endif //NETDATA_DYNCFG_H diff --git a/src/daemon/daemon.c b/src/daemon/daemon.c new file mode 100644 index 000000000..f77b748a8 --- /dev/null +++ b/src/daemon/daemon.c @@ -0,0 +1,511 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include + +char pidfile[FILENAME_MAX + 1] = ""; +char claiming_directory[FILENAME_MAX + 1]; +char netdata_exe_path[FILENAME_MAX + 1]; +char netdata_exe_file[FILENAME_MAX + 1]; + +void get_netdata_execution_path(void) { + int ret; + size_t exepath_size = 0; + struct passwd *passwd = NULL; + char *user = NULL; + + passwd = getpwuid(getuid()); + user = (passwd && passwd->pw_name) ? passwd->pw_name : ""; + + exepath_size = sizeof(netdata_exe_file) - 1; + ret = uv_exepath(netdata_exe_file, &exepath_size); + if (0 != ret) { + netdata_log_error("uv_exepath(\"%s\", %u) (user: %s) failed (%s).", netdata_exe_file, (unsigned)exepath_size, user, + uv_strerror(ret)); + fatal("Cannot start netdata without getting execution path."); + } + + netdata_exe_file[exepath_size] = '\0'; + + // macOS's dirname(3) does not modify passed string + char *tmpdir = strdupz(netdata_exe_file); + strcpy(netdata_exe_path, dirname(tmpdir)); + freez(tmpdir); +} + +static void fix_directory_file_permissions(const char *dirname, uid_t uid, gid_t gid, bool recursive) +{ + char filename[FILENAME_MAX + 1]; + + DIR *dir = opendir(dirname); + if (!dir) + return; + + struct dirent *de = NULL; + + while ((de = readdir(dir))) { + if (de->d_type == DT_DIR && (!strcmp(de->d_name, ".") || !strcmp(de->d_name, ".."))) + continue; + + (void) snprintfz(filename, FILENAME_MAX, "%s/%s", dirname, de->d_name); + if (de->d_type == DT_REG || recursive) { + if (chown(filename, uid, gid) == -1) + netdata_log_error("Cannot chown %s '%s' to %u:%u", de->d_type == DT_DIR ? "directory" : "file", filename, (unsigned int)uid, (unsigned int)gid); + } + + if (de->d_type == DT_DIR && recursive) + fix_directory_file_permissions(filename, uid, gid, recursive); + } + + closedir(dir); +} + +static void change_dir_ownership(const char *dir, uid_t uid, gid_t gid, bool recursive) +{ + if (chown(dir, uid, gid) == -1) + netdata_log_error("Cannot chown directory '%s' to %u:%u", dir, (unsigned int)uid, (unsigned int)gid); + + fix_directory_file_permissions(dir, uid, gid, recursive); +} + +static void clean_directory(char *dirname) +{ + DIR *dir = opendir(dirname); + if(!dir) return; + + int dir_fd = dirfd(dir); + struct dirent *de = NULL; + + while((de = readdir(dir))) + if(de->d_type == DT_REG) + if (unlinkat(dir_fd, de->d_name, 0)) + netdata_log_error("Cannot delete %s/%s", dirname, de->d_name); + + closedir(dir); +} + +static void prepare_required_directories(uid_t uid, gid_t gid) { + change_dir_ownership(netdata_configured_cache_dir, uid, gid, true); + change_dir_ownership(netdata_configured_varlib_dir, uid, gid, false); + change_dir_ownership(netdata_configured_lock_dir, uid, gid, false); + change_dir_ownership(netdata_configured_log_dir, uid, gid, false); + change_dir_ownership(claiming_directory, uid, gid, false); + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/registry", netdata_configured_varlib_dir); + change_dir_ownership(filename, uid, gid, false); + + clean_directory(netdata_configured_lock_dir); +} + +static int become_user(const char *username, int pid_fd) { + int am_i_root = (getuid() == 0)?1:0; + + struct passwd *pw = getpwnam(username); + if(!pw) { + netdata_log_error("User %s is not present.", username); + return -1; + } + + uid_t uid = pw->pw_uid; + gid_t gid = pw->pw_gid; + + prepare_required_directories(uid, gid); + + if(pidfile[0]) { + if(chown(pidfile, uid, gid) == -1) + netdata_log_error("Cannot chown '%s' to %u:%u", pidfile, (unsigned int)uid, (unsigned int)gid); + } + + int ngroups = (int)sysconf(_SC_NGROUPS_MAX); + gid_t *supplementary_groups = NULL; + if(ngroups > 0) { + supplementary_groups = mallocz(sizeof(gid_t) * ngroups); + if(os_getgrouplist(username, gid, supplementary_groups, &ngroups) == -1) { + if(am_i_root) + netdata_log_error("Cannot get supplementary groups of user '%s'.", username); + + ngroups = 0; + } + } + + nd_log_chown_log_files(uid, gid); + chown_open_file(STDOUT_FILENO, uid, gid); + chown_open_file(STDERR_FILENO, uid, gid); + chown_open_file(pid_fd, uid, gid); + + if(supplementary_groups && ngroups > 0) { + if(setgroups((size_t)ngroups, supplementary_groups) == -1) { + if(am_i_root) + netdata_log_error("Cannot set supplementary groups for user '%s'", username); + } + ngroups = 0; + } + + if(supplementary_groups) + freez(supplementary_groups); + + if(os_setresgid(gid, gid, gid) != 0) { + netdata_log_error("Cannot switch to user's %s group (gid: %u).", username, gid); + return -1; + } + + if(os_setresuid(uid, uid, uid) != 0) { + netdata_log_error("Cannot switch to user %s (uid: %u).", username, uid); + return -1; + } + + if(setgid(gid) != 0) { + netdata_log_error("Cannot switch to user's %s group (gid: %u).", username, gid); + return -1; + } + if(setegid(gid) != 0) { + netdata_log_error("Cannot effectively switch to user's %s group (gid: %u).", username, gid); + return -1; + } + if(setuid(uid) != 0) { + netdata_log_error("Cannot switch to user %s (uid: %u).", username, uid); + return -1; + } + if(seteuid(uid) != 0) { + netdata_log_error("Cannot effectively switch to user %s (uid: %u).", username, uid); + return -1; + } + + return(0); +} + +#ifndef OOM_SCORE_ADJ_MAX +#define OOM_SCORE_ADJ_MAX (1000) +#endif +#ifndef OOM_SCORE_ADJ_MIN +#define OOM_SCORE_ADJ_MIN (-1000) +#endif + +static void oom_score_adj(void) { + char buf[30 + 1]; + long long int old_score, wanted_score = 0, final_score = 0; + + // read the existing score + if(read_single_signed_number_file("/proc/self/oom_score_adj", &old_score)) { + netdata_log_error("Out-Of-Memory (OOM) score setting is not supported on this system."); + return; + } + + if (old_score != 0) { + wanted_score = old_score; + analytics_report_oom_score(old_score); + } + + // check the environment + char *s = getenv("OOMScoreAdjust"); + if(!s || !*s) { + snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score); + s = buf; + } + + // check netdata.conf configuration + s = config_get(CONFIG_SECTION_GLOBAL, "OOM score", s); + if(s && *s && (isdigit((uint8_t)*s) || *s == '-' || *s == '+')) + wanted_score = atoll(s); + else if(s && !strcmp(s, "keep")) { + netdata_log_info("Out-Of-Memory (OOM) kept as-is (running with %d)", (int) old_score); + return; + } + else { + netdata_log_info("Out-Of-Memory (OOM) score not changed due to non-numeric setting: '%s' (running with %d)", s, (int)old_score); + return; + } + + if(wanted_score < OOM_SCORE_ADJ_MIN) { + netdata_log_error("Wanted Out-Of-Memory (OOM) score %d is too small. Using %d", (int)wanted_score, (int)OOM_SCORE_ADJ_MIN); + wanted_score = OOM_SCORE_ADJ_MIN; + } + + if(wanted_score > OOM_SCORE_ADJ_MAX) { + netdata_log_error("Wanted Out-Of-Memory (OOM) score %d is too big. Using %d", (int)wanted_score, (int)OOM_SCORE_ADJ_MAX); + wanted_score = OOM_SCORE_ADJ_MAX; + } + + if(old_score == wanted_score) { + netdata_log_info("Out-Of-Memory (OOM) score is already set to the wanted value %d", (int)old_score); + return; + } + + int written = 0; + int fd = open("/proc/self/oom_score_adj", O_WRONLY | O_CLOEXEC); + if(fd != -1) { + snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score); + ssize_t len = strlen(buf); + if(len > 0 && write(fd, buf, (size_t)len) == len) written = 1; + close(fd); + + if(written) { + if(read_single_signed_number_file("/proc/self/oom_score_adj", &final_score)) + netdata_log_error("Adjusted my Out-Of-Memory (OOM) score to %d, but cannot verify it.", (int)wanted_score); + else if(final_score == wanted_score) + netdata_log_info("Adjusted my Out-Of-Memory (OOM) score from %d to %d.", (int)old_score, (int)final_score); + else + netdata_log_error("Adjusted my Out-Of-Memory (OOM) score from %d to %d, but it has been set to %d.", (int)old_score, (int)wanted_score, (int)final_score); + analytics_report_oom_score(final_score); + } + else + netdata_log_error("Failed to adjust my Out-Of-Memory (OOM) score to %d. Running with %d. (systemd systems may change it via netdata.service)", (int)wanted_score, (int)old_score); + } + else + netdata_log_error("Failed to adjust my Out-Of-Memory (OOM) score. Cannot open /proc/self/oom_score_adj for writing."); +} + +static void process_nice_level(void) { +#ifdef HAVE_NICE + int nice_level = (int)config_get_number(CONFIG_SECTION_GLOBAL, "process nice level", 19); + if(nice(nice_level) == -1) + netdata_log_error("Cannot set netdata CPU nice level to %d.", nice_level); + else + netdata_log_debug(D_SYSTEM, "Set netdata nice level to %d.", nice_level); +#endif // HAVE_NICE +} + +#define SCHED_FLAG_NONE 0x00 +#define SCHED_FLAG_PRIORITY_CONFIGURABLE 0x01 // the priority is user configurable +#define SCHED_FLAG_KEEP_AS_IS 0x04 // do not attempt to set policy, priority or nice() +#define SCHED_FLAG_USE_NICE 0x08 // use nice() after setting this policy + +struct sched_def { + char *name; + int policy; + int priority; + uint8_t flags; +} scheduler_defaults[] = { + + // the order of array members is important! + // the first defined is the default used by netdata + + // the available members are important too! + // these are all the possible scheduling policies supported by netdata + +#ifdef SCHED_BATCH + { "batch", SCHED_BATCH, 0, SCHED_FLAG_USE_NICE }, +#endif + +#ifdef SCHED_OTHER + { "other", SCHED_OTHER, 0, SCHED_FLAG_USE_NICE }, + { "nice", SCHED_OTHER, 0, SCHED_FLAG_USE_NICE }, +#endif + +#ifdef SCHED_IDLE + { "idle", SCHED_IDLE, 0, SCHED_FLAG_NONE }, +#endif + +#ifdef SCHED_RR + { "rr", SCHED_RR, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, +#endif + +#ifdef SCHED_FIFO + { "fifo", SCHED_FIFO, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, +#endif + + // do not change the scheduling priority + { "keep", 0, 0, SCHED_FLAG_KEEP_AS_IS }, + { "none", 0, 0, SCHED_FLAG_KEEP_AS_IS }, + + // array termination + { NULL, 0, 0, 0 } +}; + + +#ifdef HAVE_SCHED_GETSCHEDULER +static void sched_getscheduler_report(void) { + int sched = sched_getscheduler(0); + if(sched == -1) { + netdata_log_error("Cannot get my current process scheduling policy."); + return; + } + else { + int i; + for(i = 0 ; scheduler_defaults[i].name ; i++) { + if(scheduler_defaults[i].policy == sched) { + if(scheduler_defaults[i].flags & SCHED_FLAG_PRIORITY_CONFIGURABLE) { + struct sched_param param; + if(sched_getparam(0, ¶m) == -1) { + netdata_log_error("Cannot get the process scheduling priority for my policy '%s'", scheduler_defaults[i].name); + return; + } + else { + netdata_log_info("Running with process scheduling policy '%s', priority %d", scheduler_defaults[i].name, param.sched_priority); + } + } + else if(scheduler_defaults[i].flags & SCHED_FLAG_USE_NICE) { + #ifdef HAVE_GETPRIORITY + int n = getpriority(PRIO_PROCESS, 0); + netdata_log_info("Running with process scheduling policy '%s', nice level %d", scheduler_defaults[i].name, n); + #else // !HAVE_GETPRIORITY + netdata_log_info("Running with process scheduling policy '%s'", scheduler_defaults[i].name); + #endif // !HAVE_GETPRIORITY + } + else { + netdata_log_info("Running with process scheduling policy '%s'", scheduler_defaults[i].name); + } + + return; + } + } + } +} +#endif /* HAVE_SCHED_GETSCHEDULER */ + +#ifdef HAVE_SCHED_SETSCHEDULER + +static void sched_setscheduler_set(void) { + + if(scheduler_defaults[0].name) { + const char *name = scheduler_defaults[0].name; + int policy = scheduler_defaults[0].policy, priority = scheduler_defaults[0].priority; + uint8_t flags = scheduler_defaults[0].flags; + int found = 0; + + // read the configuration + name = config_get(CONFIG_SECTION_GLOBAL, "process scheduling policy", name); + int i; + for(i = 0 ; scheduler_defaults[i].name ; i++) { + if(!strcmp(name, scheduler_defaults[i].name)) { + found = 1; + policy = scheduler_defaults[i].policy; + priority = scheduler_defaults[i].priority; + flags = scheduler_defaults[i].flags; + + if(flags & SCHED_FLAG_KEEP_AS_IS) + goto report; + + if(flags & SCHED_FLAG_PRIORITY_CONFIGURABLE) + priority = (int)config_get_number(CONFIG_SECTION_GLOBAL, "process scheduling priority", priority); + +#ifdef HAVE_SCHED_GET_PRIORITY_MIN + errno = 0; + if(priority < sched_get_priority_min(policy)) { + netdata_log_error("scheduler %s (%d) priority %d is below the minimum %d. Using the minimum.", name, policy, priority, sched_get_priority_min(policy)); + priority = sched_get_priority_min(policy); + } +#endif +#ifdef HAVE_SCHED_GET_PRIORITY_MAX + errno = 0; + if(priority > sched_get_priority_max(policy)) { + netdata_log_error("scheduler %s (%d) priority %d is above the maximum %d. Using the maximum.", name, policy, priority, sched_get_priority_max(policy)); + priority = sched_get_priority_max(policy); + } +#endif + break; + } + } + + if(!found) { + netdata_log_error("Unknown scheduling policy '%s' - falling back to nice", name); + goto fallback; + } + + const struct sched_param param = { + .sched_priority = priority + }; + + errno = 0; + i = sched_setscheduler(0, policy, ¶m); + if(i != 0) { + netdata_log_error("Cannot adjust netdata scheduling policy to %s (%d), with priority %d. Falling back to nice.", + name, + policy, + priority); + } + else { + netdata_log_info("Adjusted netdata scheduling policy to %s (%d), with priority %d.", name, policy, priority); + if(!(flags & SCHED_FLAG_USE_NICE)) + goto report; + } + } + +fallback: + process_nice_level(); + +report: + sched_getscheduler_report(); +} +#else /* HAVE_SCHED_SETSCHEDULER */ +static void sched_setscheduler_set(void) { + process_nice_level(); +} +#endif /* HAVE_SCHED_SETSCHEDULER */ + +int become_daemon(int dont_fork, const char *user) +{ + if(!dont_fork) { + int i = fork(); + if(i == -1) { + perror("cannot fork"); + exit(1); + } + if(i != 0) { + exit(0); // the parent + } + + // become session leader + if (setsid() < 0) { + perror("Cannot become session leader."); + exit(2); + } + + // fork() again + i = fork(); + if(i == -1) { + perror("cannot fork"); + exit(1); + } + if(i != 0) { + exit(0); // the parent + } + } + + // generate our pid file + int pidfd = -1; + if(pidfile[0]) { + pidfd = open(pidfile, O_WRONLY | O_CREAT | O_CLOEXEC, 0644); + if(pidfd >= 0) { + if(ftruncate(pidfd, 0) != 0) + netdata_log_error("Cannot truncate pidfile '%s'.", pidfile); + + char b[100]; + sprintf(b, "%d\n", getpid()); + ssize_t i = write(pidfd, b, strlen(b)); + if(i <= 0) + netdata_log_error("Cannot write pidfile '%s'.", pidfile); + } + else + netdata_log_error("Failed to open pidfile '%s'.", pidfile); + } + + // Set new file permissions + umask(0007); + + // adjust my Out-Of-Memory score + oom_score_adj(); + + // never become a problem + sched_setscheduler_set(); + + // Set claiming directory based on user config directory with correct ownership + snprintfz(claiming_directory, FILENAME_MAX, "%s/cloud.d", netdata_configured_varlib_dir); + + if(user && *user) { + if(become_user(user, pidfd) != 0) { + netdata_log_error("Cannot become user '%s'. Continuing as we are.", user); + } + else + netdata_log_debug(D_SYSTEM, "Successfully became user '%s'.", user); + } + else { + prepare_required_directories(getuid(), getgid()); + } + + if(pidfd != -1) + close(pidfd); + + return(0); +} diff --git a/src/daemon/daemon.h b/src/daemon/daemon.h new file mode 100644 index 000000000..1f8837fd6 --- /dev/null +++ b/src/daemon/daemon.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DAEMON_H +#define NETDATA_DAEMON_H 1 + +int become_daemon(int dont_fork, const char *user); + +void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data); + +void get_netdata_execution_path(void); + +extern char pidfile[]; +extern char netdata_exe_file[]; +extern char netdata_exe_path[]; + +#endif /* NETDATA_DAEMON_H */ diff --git a/src/daemon/event_loop.c b/src/daemon/event_loop.c new file mode 100644 index 000000000..d1908ec15 --- /dev/null +++ b/src/daemon/event_loop.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include "event_loop.h" + +// Register workers +void register_libuv_worker_jobs() { + static __thread bool registered = false; + + if(likely(registered)) + return; + + registered = true; + + worker_register("LIBUV"); + + // generic + worker_register_job_name(UV_EVENT_WORKER_INIT, "worker init"); + + // query related + worker_register_job_name(UV_EVENT_DBENGINE_QUERY, "query"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP, "extent cache"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_MMAP, "extent mmap"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION, "extent decompression"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP, "page lookup"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION, "page populate"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_PAGE_ALLOCATION, "page allocate"); + + // flushing related + worker_register_job_name(UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE, "flush main"); + worker_register_job_name(UV_EVENT_DBENGINE_EXTENT_WRITE, "extent write"); + worker_register_job_name(UV_EVENT_DBENGINE_FLUSHED_TO_OPEN, "flushed to open"); + + // datafile full + worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT, "jv2 index wait"); + worker_register_job_name(UV_EVENT_DBENGINE_JOURNAL_INDEX, "jv2 indexing"); + + // db rotation related + worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT, "datafile delete wait"); + worker_register_job_name(UV_EVENT_DBENGINE_DATAFILE_DELETE, "datafile deletion"); + worker_register_job_name(UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, "find rotated metrics"); + worker_register_job_name(UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, "find remaining retention"); + worker_register_job_name(UV_EVENT_DBENGINE_POPULATE_MRG, "update retention"); + + // other dbengine events + worker_register_job_name(UV_EVENT_DBENGINE_EVICT_MAIN_CACHE, "evict main"); + worker_register_job_name(UV_EVENT_DBENGINE_BUFFERS_CLEANUP, "dbengine buffers cleanup"); + worker_register_job_name(UV_EVENT_DBENGINE_QUIESCE, "dbengine quiesce"); + worker_register_job_name(UV_EVENT_DBENGINE_SHUTDOWN, "dbengine shutdown"); + + // metadata + worker_register_job_name(UV_EVENT_HOST_CONTEXT_LOAD, "metadata load host context"); + worker_register_job_name(UV_EVENT_METADATA_STORE, "metadata store host"); + worker_register_job_name(UV_EVENT_METADATA_CLEANUP, "metadata cleanup"); + worker_register_job_name(UV_EVENT_METADATA_ML_LOAD, "metadata load ml models"); + + // netdatacli + worker_register_job_name(UV_EVENT_SCHEDULE_CMD, "schedule command"); + + static int workers = 0; + int worker_id = __atomic_add_fetch(&workers, 1, __ATOMIC_RELAXED); + + char buf[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(buf, NETDATA_THREAD_TAG_MAX, "UV_WORKER[%d]", worker_id); + uv_thread_set_name_np(buf); +} diff --git a/src/daemon/event_loop.h b/src/daemon/event_loop.h new file mode 100644 index 000000000..c1821c646 --- /dev/null +++ b/src/daemon/event_loop.h @@ -0,0 +1,55 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EVENT_LOOP_H +#define NETDATA_EVENT_LOOP_H + +enum event_loop_job { + UV_EVENT_JOB_NONE = 0, + + // generic + UV_EVENT_WORKER_INIT, + + // query related + UV_EVENT_DBENGINE_QUERY, + UV_EVENT_DBENGINE_EXTENT_CACHE_LOOKUP, + UV_EVENT_DBENGINE_EXTENT_MMAP, + UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION, + UV_EVENT_DBENGINE_EXTENT_PAGE_LOOKUP, + UV_EVENT_DBENGINE_EXTENT_PAGE_POPULATION, + UV_EVENT_DBENGINE_EXTENT_PAGE_ALLOCATION, + + // flushing related + UV_EVENT_DBENGINE_FLUSH_MAIN_CACHE, + UV_EVENT_DBENGINE_EXTENT_WRITE, + UV_EVENT_DBENGINE_FLUSHED_TO_OPEN, + + // datafile full + UV_EVENT_DBENGINE_JOURNAL_INDEX_WAIT, + UV_EVENT_DBENGINE_JOURNAL_INDEX, + + // db rotation related + UV_EVENT_DBENGINE_DATAFILE_DELETE_WAIT, + UV_EVENT_DBENGINE_DATAFILE_DELETE, + UV_EVENT_DBENGINE_FIND_ROTATED_METRICS, // find the metrics that are rotated + UV_EVENT_DBENGINE_FIND_REMAINING_RETENTION, // find their remaining retention + UV_EVENT_DBENGINE_POPULATE_MRG, // update mrg + + // other dbengine events + UV_EVENT_DBENGINE_EVICT_MAIN_CACHE, + UV_EVENT_DBENGINE_BUFFERS_CLEANUP, + UV_EVENT_DBENGINE_QUIESCE, + UV_EVENT_DBENGINE_SHUTDOWN, + + // metadata + UV_EVENT_HOST_CONTEXT_LOAD, + UV_EVENT_METADATA_STORE, + UV_EVENT_METADATA_CLEANUP, + UV_EVENT_METADATA_ML_LOAD, + + // netdatacli + UV_EVENT_SCHEDULE_CMD, +}; + +void register_libuv_worker_jobs(); + +#endif //NETDATA_EVENT_LOOP_H diff --git a/src/daemon/get-kubernetes-labels.sh.in b/src/daemon/get-kubernetes-labels.sh.in new file mode 100755 index 000000000..bc82c2aee --- /dev/null +++ b/src/daemon/get-kubernetes-labels.sh.in @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +me="$(basename "${0}")" + +# Checks if netdata is running in a kubernetes pod and fetches: +# - pod's labels +# - kubernetes cluster name (GKE only) + +if [ -z "${KUBERNETES_SERVICE_HOST}" ] || [ -z "${KUBERNETES_PORT_443_TCP_PORT}" ] || [ -z "${MY_POD_NAMESPACE}" ] || [ -z "${MY_POD_NAME}" ]; then + exit 0 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo >&2 "${me}: jq command not available. Please install jq to get host labels for kubernetes pods." + exit 1 +fi + +TOKEN="$(< /var/run/secrets/kubernetes.io/serviceaccount/token)" +HEADER="Authorization: Bearer $TOKEN" +HOST="$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT" + +URL="https://$HOST/api/v1/namespaces/$MY_POD_NAMESPACE/pods/$MY_POD_NAME" +if ! POD_DATA=$(curl --fail -sSk -H "$HEADER" "$URL" 2>&1); then + echo >&2 "${me}: error on curl '${URL}': ${POD_DATA}." + exit 1 +fi + +URL="https://$HOST/api/v1/namespaces/kube-system" +if ! KUBE_SYSTEM_NS_DATA=$(curl --fail -sSk -H "$HEADER" "$URL" 2>&1); then + echo >&2 "${me}: error on curl '${URL}': ${KUBE_SYSTEM_NS_DATA}." + exit 1 +fi + +if ! POD_LABELS=$(jq -r '.metadata.labels' <<< "$POD_DATA" | grep ':' | tr -d '," ' 2>&1); then + echo >&2 "${me}: error on 'jq' parse pod data: ${POD_LABELS}." + exit 1 +fi + +if ! KUBE_SYSTEM_NS_UID=$(jq -r '.metadata.uid' <<< "$KUBE_SYSTEM_NS_DATA" 2>&1); then + echo >&2 "${me}: error on 'jq' parse kube_system_ns: ${KUBE_SYSTEM_NS_UID}." + exit 1 +fi + +LABELS="$POD_LABELS\nk8s_cluster_id:$KUBE_SYSTEM_NS_UID" + +GCP_META_HEADER="Metadata-Flavor: Google" +GCP_META_URL="http://metadata/computeMetadata/v1" +GKE_CLUSTER_NAME="" + +if id=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/project/project-id"); then + loc=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/instance/attributes/cluster-location") + name=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/instance/attributes/cluster-name") + [ -n "$id" ] && [ -n "$loc" ] && [ -n "$name" ] && GKE_CLUSTER_NAME="gke_${id}_${loc}_${name}" +fi + +[ -n "$GKE_CLUSTER_NAME" ] && LABELS+="\nk8s_cluster_name:$GKE_CLUSTER_NAME" + +echo -e "$LABELS" + +exit 0 diff --git a/src/daemon/global_statistics.c b/src/daemon/global_statistics.c new file mode 100644 index 000000000..429f68c0d --- /dev/null +++ b/src/daemon/global_statistics.c @@ -0,0 +1,4370 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +#define GLOBAL_STATS_RESET_WEB_USEC_MAX 0x01 + +#define WORKER_JOB_GLOBAL 0 +#define WORKER_JOB_REGISTRY 1 +#define WORKER_JOB_WORKERS 2 +#define WORKER_JOB_DBENGINE 3 +#define WORKER_JOB_HEARTBEAT 4 +#define WORKER_JOB_STRINGS 5 +#define WORKER_JOB_DICTIONARIES 6 +#define WORKER_JOB_MALLOC_TRACE 7 +#define WORKER_JOB_SQLITE3 8 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 9 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 9 +#endif + +bool global_statistics_enabled = true; + +struct netdata_buffers_statistics netdata_buffers_statistics = {}; + +static size_t dbengine_total_memory = 0; +size_t rrddim_db_memory_size = 0; + +static struct global_statistics { + uint16_t connected_clients; + + uint64_t web_requests; + uint64_t web_usec; + uint64_t web_usec_max; + uint64_t bytes_received; + uint64_t bytes_sent; + uint64_t content_size; + uint64_t compressed_content_size; + + uint64_t web_client_count; + + uint64_t api_data_queries_made; + uint64_t api_data_db_points_read; + uint64_t api_data_result_points_generated; + + uint64_t api_weights_queries_made; + uint64_t api_weights_db_points_read; + uint64_t api_weights_result_points_generated; + + uint64_t api_badges_queries_made; + uint64_t api_badges_db_points_read; + uint64_t api_badges_result_points_generated; + + uint64_t health_queries_made; + uint64_t health_db_points_read; + uint64_t health_result_points_generated; + + uint64_t ml_queries_made; + uint64_t ml_db_points_read; + uint64_t ml_result_points_generated; + uint64_t ml_models_consulted; + + uint64_t exporters_queries_made; + uint64_t exporters_db_points_read; + + uint64_t backfill_queries_made; + uint64_t backfill_db_points_read; + + uint64_t tier0_hot_gorilla_buffers; + + uint64_t tier0_disk_compressed_bytes; + uint64_t tier0_disk_uncompressed_bytes; + + uint64_t db_points_stored_per_tier[RRD_STORAGE_TIERS]; + +} global_statistics = { + .connected_clients = 0, + .web_requests = 0, + .web_usec = 0, + .bytes_received = 0, + .bytes_sent = 0, + .content_size = 0, + .compressed_content_size = 0, + .web_client_count = 1, + + .api_data_queries_made = 0, + .api_data_db_points_read = 0, + .api_data_result_points_generated = 0, + + .tier0_hot_gorilla_buffers = 0, + .tier0_disk_compressed_bytes = 0, + .tier0_disk_uncompressed_bytes = 0, +}; + +void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array) { + for(size_t tier = 0; tier < storage_tiers ;tier++) { + __atomic_fetch_add(&global_statistics.db_points_stored_per_tier[tier], points_read_per_tier_array[tier], __ATOMIC_RELAXED); + points_read_per_tier_array[tier] = 0; + } +} + +void global_statistics_ml_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.ml_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_ml_models_consulted(size_t models_consulted) { + __atomic_fetch_add(&global_statistics.ml_models_consulted, models_consulted, __ATOMIC_RELAXED); +} + +void global_statistics_exporters_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.exporters_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.exporters_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_backfill_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.backfill_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.backfill_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_gorilla_buffer_add_hot() { + __atomic_fetch_add(&global_statistics.tier0_hot_gorilla_buffers, 1, __ATOMIC_RELAXED); +} + +void global_statistics_tier0_disk_compressed_bytes(uint32_t size) { + __atomic_fetch_add(&global_statistics.tier0_disk_compressed_bytes, size, __ATOMIC_RELAXED); +} + +void global_statistics_tier0_disk_uncompressed_bytes(uint32_t size) { + __atomic_fetch_add(&global_statistics.tier0_disk_uncompressed_bytes, size, __ATOMIC_RELAXED); +} + +void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source) { + switch(query_source) { + case QUERY_SOURCE_API_DATA: + __atomic_fetch_add(&global_statistics.api_data_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_data_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_data_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_ML: + __atomic_fetch_add(&global_statistics.ml_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_API_WEIGHTS: + __atomic_fetch_add(&global_statistics.api_weights_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_weights_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_weights_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_API_BADGE: + __atomic_fetch_add(&global_statistics.api_badges_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_badges_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_badges_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_HEALTH: + __atomic_fetch_add(&global_statistics.health_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.health_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.health_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + default: + case QUERY_SOURCE_UNITTEST: + case QUERY_SOURCE_UNKNOWN: + break; + } +} + +void global_statistics_web_request_completed(uint64_t dt, + uint64_t bytes_received, + uint64_t bytes_sent, + uint64_t content_size, + uint64_t compressed_content_size) { + uint64_t old_web_usec_max = global_statistics.web_usec_max; + while(dt > old_web_usec_max) + __atomic_compare_exchange(&global_statistics.web_usec_max, &old_web_usec_max, &dt, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + __atomic_fetch_add(&global_statistics.web_requests, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.web_usec, dt, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.bytes_received, bytes_received, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.bytes_sent, bytes_sent, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.content_size, content_size, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.compressed_content_size, compressed_content_size, __ATOMIC_RELAXED); +} + +uint64_t global_statistics_web_client_connected(void) { + __atomic_fetch_add(&global_statistics.connected_clients, 1, __ATOMIC_RELAXED); + return __atomic_fetch_add(&global_statistics.web_client_count, 1, __ATOMIC_RELAXED); +} + +void global_statistics_web_client_disconnected(void) { + __atomic_fetch_sub(&global_statistics.connected_clients, 1, __ATOMIC_RELAXED); +} + +static inline void global_statistics_copy(struct global_statistics *gs, uint8_t options) { + gs->connected_clients = __atomic_load_n(&global_statistics.connected_clients, __ATOMIC_RELAXED); + gs->web_requests = __atomic_load_n(&global_statistics.web_requests, __ATOMIC_RELAXED); + gs->web_usec = __atomic_load_n(&global_statistics.web_usec, __ATOMIC_RELAXED); + gs->web_usec_max = __atomic_load_n(&global_statistics.web_usec_max, __ATOMIC_RELAXED); + gs->bytes_received = __atomic_load_n(&global_statistics.bytes_received, __ATOMIC_RELAXED); + gs->bytes_sent = __atomic_load_n(&global_statistics.bytes_sent, __ATOMIC_RELAXED); + gs->content_size = __atomic_load_n(&global_statistics.content_size, __ATOMIC_RELAXED); + gs->compressed_content_size = __atomic_load_n(&global_statistics.compressed_content_size, __ATOMIC_RELAXED); + gs->web_client_count = __atomic_load_n(&global_statistics.web_client_count, __ATOMIC_RELAXED); + + gs->api_data_queries_made = __atomic_load_n(&global_statistics.api_data_queries_made, __ATOMIC_RELAXED); + gs->api_data_db_points_read = __atomic_load_n(&global_statistics.api_data_db_points_read, __ATOMIC_RELAXED); + gs->api_data_result_points_generated = __atomic_load_n(&global_statistics.api_data_result_points_generated, __ATOMIC_RELAXED); + + gs->api_weights_queries_made = __atomic_load_n(&global_statistics.api_weights_queries_made, __ATOMIC_RELAXED); + gs->api_weights_db_points_read = __atomic_load_n(&global_statistics.api_weights_db_points_read, __ATOMIC_RELAXED); + gs->api_weights_result_points_generated = __atomic_load_n(&global_statistics.api_weights_result_points_generated, __ATOMIC_RELAXED); + + gs->api_badges_queries_made = __atomic_load_n(&global_statistics.api_badges_queries_made, __ATOMIC_RELAXED); + gs->api_badges_db_points_read = __atomic_load_n(&global_statistics.api_badges_db_points_read, __ATOMIC_RELAXED); + gs->api_badges_result_points_generated = __atomic_load_n(&global_statistics.api_badges_result_points_generated, __ATOMIC_RELAXED); + + gs->health_queries_made = __atomic_load_n(&global_statistics.health_queries_made, __ATOMIC_RELAXED); + gs->health_db_points_read = __atomic_load_n(&global_statistics.health_db_points_read, __ATOMIC_RELAXED); + gs->health_result_points_generated = __atomic_load_n(&global_statistics.health_result_points_generated, __ATOMIC_RELAXED); + + gs->ml_queries_made = __atomic_load_n(&global_statistics.ml_queries_made, __ATOMIC_RELAXED); + gs->ml_db_points_read = __atomic_load_n(&global_statistics.ml_db_points_read, __ATOMIC_RELAXED); + gs->ml_result_points_generated = __atomic_load_n(&global_statistics.ml_result_points_generated, __ATOMIC_RELAXED); + gs->ml_models_consulted = __atomic_load_n(&global_statistics.ml_models_consulted, __ATOMIC_RELAXED); + + gs->exporters_queries_made = __atomic_load_n(&global_statistics.exporters_queries_made, __ATOMIC_RELAXED); + gs->exporters_db_points_read = __atomic_load_n(&global_statistics.exporters_db_points_read, __ATOMIC_RELAXED); + gs->backfill_queries_made = __atomic_load_n(&global_statistics.backfill_queries_made, __ATOMIC_RELAXED); + gs->backfill_db_points_read = __atomic_load_n(&global_statistics.backfill_db_points_read, __ATOMIC_RELAXED); + + gs->tier0_hot_gorilla_buffers = __atomic_load_n(&global_statistics.tier0_hot_gorilla_buffers, __ATOMIC_RELAXED); + + gs->tier0_disk_compressed_bytes = __atomic_load_n(&global_statistics.tier0_disk_compressed_bytes, __ATOMIC_RELAXED); + gs->tier0_disk_uncompressed_bytes = __atomic_load_n(&global_statistics.tier0_disk_uncompressed_bytes, __ATOMIC_RELAXED); + + for(size_t tier = 0; tier < storage_tiers ;tier++) + gs->db_points_stored_per_tier[tier] = __atomic_load_n(&global_statistics.db_points_stored_per_tier[tier], __ATOMIC_RELAXED); + + if(options & GLOBAL_STATS_RESET_WEB_USEC_MAX) { + uint64_t n = 0; + __atomic_compare_exchange(&global_statistics.web_usec_max, (uint64_t *) &gs->web_usec_max, &n, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } +} + +#define dictionary_stats_memory_total(stats) \ + ((stats).memory.dict + (stats).memory.values + (stats).memory.index) + +static void global_statistics_charts(void) { + static unsigned long long old_web_requests = 0, + old_web_usec = 0, + old_content_size = 0, + old_compressed_content_size = 0; + + static collected_number compression_ratio = -1, + average_response_time = -1; + + static time_t netdata_boottime_time = 0; + if (!netdata_boottime_time) + netdata_boottime_time = now_boottime_sec(); + time_t netdata_uptime = now_boottime_sec() - netdata_boottime_time; + + struct global_statistics gs; + struct rusage me; + + struct replication_query_statistics replication = replication_get_query_statistics(); + global_statistics_copy(&gs, GLOBAL_STATS_RESET_WEB_USEC_MAX); + getrusage(RUSAGE_SELF, &me); + + // ---------------------------------------------------------------- + + { + static RRDSET *st_cpu = NULL; + static RRDDIM *rd_cpu_user = NULL, + *rd_cpu_system = NULL; + + if (unlikely(!st_cpu)) { + st_cpu = rrdset_create_localhost( + "netdata" + , "server_cpu" + , NULL + , "netdata" + , NULL + , "Netdata CPU usage" + , "milliseconds/s" + , "netdata" + , "stats" + , 130000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_cpu_user = rrddim_add(st_cpu, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + rd_cpu_system = rrddim_add(st_cpu, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_cpu, rd_cpu_user, me.ru_utime.tv_sec * 1000000ULL + me.ru_utime.tv_usec); + rrddim_set_by_pointer(st_cpu, rd_cpu_system, me.ru_stime.tv_sec * 1000000ULL + me.ru_stime.tv_usec); + rrdset_done(st_cpu); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_memory = NULL; + static RRDDIM *rd_database = NULL; + static RRDDIM *rd_collectors = NULL; + static RRDDIM *rd_hosts = NULL; + static RRDDIM *rd_rrd = NULL; + static RRDDIM *rd_contexts = NULL; + static RRDDIM *rd_health = NULL; + static RRDDIM *rd_functions = NULL; + static RRDDIM *rd_labels = NULL; + static RRDDIM *rd_strings = NULL; + static RRDDIM *rd_streaming = NULL; + static RRDDIM *rd_replication = NULL; + static RRDDIM *rd_buffers = NULL; + static RRDDIM *rd_workers = NULL; + static RRDDIM *rd_aral = NULL; + static RRDDIM *rd_judy = NULL; + static RRDDIM *rd_other = NULL; + + if (unlikely(!st_memory)) { + st_memory = rrdset_create_localhost( + "netdata", + "memory", + NULL, + "netdata", + NULL, + "Netdata Memory", + "bytes", + "netdata", + "stats", + 130100, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_database = rrddim_add(st_memory, "db", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_collectors = rrddim_add(st_memory, "collectors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_hosts = rrddim_add(st_memory, "hosts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_rrd = rrddim_add(st_memory, "rrdset rrddim", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_contexts = rrddim_add(st_memory, "contexts", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_health = rrddim_add(st_memory, "health", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_functions = rrddim_add(st_memory, "functions", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_labels = rrddim_add(st_memory, "labels", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_strings = rrddim_add(st_memory, "strings", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_streaming = rrddim_add(st_memory, "streaming", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_replication = rrddim_add(st_memory, "replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers = rrddim_add(st_memory, "buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_workers = rrddim_add(st_memory, "workers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_aral = rrddim_add(st_memory, "aral", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_judy = rrddim_add(st_memory, "judy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_other = rrddim_add(st_memory, "other", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + size_t buffers = + netdata_buffers_statistics.query_targets_size + + netdata_buffers_statistics.rrdset_done_rda_size + + netdata_buffers_statistics.buffers_aclk + + netdata_buffers_statistics.buffers_api + + netdata_buffers_statistics.buffers_functions + + netdata_buffers_statistics.buffers_sqlite + + netdata_buffers_statistics.buffers_exporters + + netdata_buffers_statistics.buffers_health + + netdata_buffers_statistics.buffers_streaming + + netdata_buffers_statistics.cbuffers_streaming + + netdata_buffers_statistics.buffers_web + + replication_allocated_buffers() + + aral_by_size_overhead() + + judy_aral_overhead(); + + size_t strings = 0; + string_statistics(NULL, NULL, NULL, NULL, NULL, &strings, NULL, NULL); + + rrddim_set_by_pointer(st_memory, rd_database, (collected_number)dbengine_total_memory + (collected_number)rrddim_db_memory_size); + rrddim_set_by_pointer(st_memory, rd_collectors, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_collectors)); + rrddim_set_by_pointer(st_memory, rd_hosts, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdhost) + (collected_number)netdata_buffers_statistics.rrdhost_allocations_size); + rrddim_set_by_pointer(st_memory, rd_rrd, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdset_rrddim)); + rrddim_set_by_pointer(st_memory, rd_contexts, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdcontext)); + rrddim_set_by_pointer(st_memory, rd_health, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdhealth)); + rrddim_set_by_pointer(st_memory, rd_functions, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_functions)); + rrddim_set_by_pointer(st_memory, rd_labels, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_rrdlabels)); + rrddim_set_by_pointer(st_memory, rd_strings, (collected_number)strings); + rrddim_set_by_pointer(st_memory, rd_streaming, (collected_number)netdata_buffers_statistics.rrdhost_senders + (collected_number)netdata_buffers_statistics.rrdhost_receivers); + rrddim_set_by_pointer(st_memory, rd_replication, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_replication) + (collected_number)replication_allocated_memory()); + rrddim_set_by_pointer(st_memory, rd_buffers, (collected_number)buffers); + rrddim_set_by_pointer(st_memory, rd_workers, (collected_number) workers_allocated_memory()); + rrddim_set_by_pointer(st_memory, rd_aral, (collected_number) aral_by_size_structures()); + rrddim_set_by_pointer(st_memory, rd_judy, (collected_number) judy_aral_structures()); + rrddim_set_by_pointer(st_memory, rd_other, (collected_number)dictionary_stats_memory_total(dictionary_stats_category_other)); + + rrdset_done(st_memory); + } + + { + static RRDSET *st_memory_buffers = NULL; + static RRDDIM *rd_queries = NULL; + static RRDDIM *rd_collectors = NULL; + static RRDDIM *rd_buffers_aclk = NULL; + static RRDDIM *rd_buffers_api = NULL; + static RRDDIM *rd_buffers_functions = NULL; + static RRDDIM *rd_buffers_sqlite = NULL; + static RRDDIM *rd_buffers_exporters = NULL; + static RRDDIM *rd_buffers_health = NULL; + static RRDDIM *rd_buffers_streaming = NULL; + static RRDDIM *rd_cbuffers_streaming = NULL; + static RRDDIM *rd_buffers_replication = NULL; + static RRDDIM *rd_buffers_web = NULL; + static RRDDIM *rd_buffers_aral = NULL; + static RRDDIM *rd_buffers_judy = NULL; + + if (unlikely(!st_memory_buffers)) { + st_memory_buffers = rrdset_create_localhost( + "netdata", + "memory_buffers", + NULL, + "netdata", + NULL, + "Netdata Memory Buffers", + "bytes", + "netdata", + "stats", + 130101, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_queries = rrddim_add(st_memory_buffers, "queries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_collectors = rrddim_add(st_memory_buffers, "collection", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_aclk = rrddim_add(st_memory_buffers, "aclk", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_api = rrddim_add(st_memory_buffers, "api", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_functions = rrddim_add(st_memory_buffers, "functions", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_sqlite = rrddim_add(st_memory_buffers, "sqlite", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_exporters = rrddim_add(st_memory_buffers, "exporters", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_health = rrddim_add(st_memory_buffers, "health", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_streaming = rrddim_add(st_memory_buffers, "streaming", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_cbuffers_streaming = rrddim_add(st_memory_buffers, "streaming cbuf", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_replication = rrddim_add(st_memory_buffers, "replication", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_web = rrddim_add(st_memory_buffers, "web", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_aral = rrddim_add(st_memory_buffers, "aral", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_buffers_judy = rrddim_add(st_memory_buffers, "judy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_memory_buffers, rd_queries, (collected_number)netdata_buffers_statistics.query_targets_size + (collected_number) onewayalloc_allocated_memory()); + rrddim_set_by_pointer(st_memory_buffers, rd_collectors, (collected_number)netdata_buffers_statistics.rrdset_done_rda_size); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_aclk, (collected_number)netdata_buffers_statistics.buffers_aclk); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_api, (collected_number)netdata_buffers_statistics.buffers_api); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_functions, (collected_number)netdata_buffers_statistics.buffers_functions); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_sqlite, (collected_number)netdata_buffers_statistics.buffers_sqlite); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_exporters, (collected_number)netdata_buffers_statistics.buffers_exporters); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_health, (collected_number)netdata_buffers_statistics.buffers_health); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_streaming, (collected_number)netdata_buffers_statistics.buffers_streaming); + rrddim_set_by_pointer(st_memory_buffers, rd_cbuffers_streaming, (collected_number)netdata_buffers_statistics.cbuffers_streaming); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_replication, (collected_number)replication_allocated_buffers()); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_web, (collected_number)netdata_buffers_statistics.buffers_web); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_aral, (collected_number)aral_by_size_overhead()); + rrddim_set_by_pointer(st_memory_buffers, rd_buffers_judy, (collected_number)judy_aral_overhead()); + + rrdset_done(st_memory_buffers); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_uptime = NULL; + static RRDDIM *rd_uptime = NULL; + + if (unlikely(!st_uptime)) { + st_uptime = rrdset_create_localhost( + "netdata", + "uptime", + NULL, + "netdata", + NULL, + "Netdata uptime", + "seconds", + "netdata", + "stats", + 130150, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_uptime = rrddim_add(st_uptime, "uptime", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_uptime, rd_uptime, netdata_uptime); + rrdset_done(st_uptime); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_clients = NULL; + static RRDDIM *rd_clients = NULL; + + if (unlikely(!st_clients)) { + st_clients = rrdset_create_localhost( + "netdata" + , "clients" + , NULL + , "api" + , NULL + , "Netdata Web Clients" + , "connected clients" + , "netdata" + , "stats" + , 130200 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_clients = rrddim_add(st_clients, "clients", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_clients, rd_clients, gs.connected_clients); + rrdset_done(st_clients); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_reqs = NULL; + static RRDDIM *rd_requests = NULL; + + if (unlikely(!st_reqs)) { + st_reqs = rrdset_create_localhost( + "netdata" + , "requests" + , NULL + , "api" + , NULL + , "Netdata Web Requests" + , "requests/s" + , "netdata" + , "stats" + , 130300 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_requests = rrddim_add(st_reqs, "requests", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_reqs, rd_requests, (collected_number) gs.web_requests); + rrdset_done(st_reqs); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_bytes = NULL; + static RRDDIM *rd_in = NULL, + *rd_out = NULL; + + if (unlikely(!st_bytes)) { + st_bytes = rrdset_create_localhost( + "netdata" + , "net" + , NULL + , "api" + , NULL + , "Netdata Network Traffic" + , "kilobits/s" + , "netdata" + , "stats" + , 130400 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st_bytes, "in", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_bytes, "out", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_bytes, rd_in, (collected_number) gs.bytes_received); + rrddim_set_by_pointer(st_bytes, rd_out, (collected_number) gs.bytes_sent); + rrdset_done(st_bytes); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_duration = NULL; + static RRDDIM *rd_average = NULL, + *rd_max = NULL; + + if (unlikely(!st_duration)) { + st_duration = rrdset_create_localhost( + "netdata" + , "response_time" + , NULL + , "api" + , NULL + , "Netdata API Response Time" + , "milliseconds/request" + , "netdata" + , "stats" + , 130500 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_average = rrddim_add(st_duration, "average", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rd_max = rrddim_add(st_duration, "max", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + uint64_t gweb_usec = gs.web_usec; + uint64_t gweb_requests = gs.web_requests; + + uint64_t web_usec = (gweb_usec >= old_web_usec) ? gweb_usec - old_web_usec : 0; + uint64_t web_requests = (gweb_requests >= old_web_requests) ? gweb_requests - old_web_requests : 0; + + old_web_usec = gweb_usec; + old_web_requests = gweb_requests; + + if (web_requests) + average_response_time = (collected_number) (web_usec / web_requests); + + if (unlikely(average_response_time != -1)) + rrddim_set_by_pointer(st_duration, rd_average, average_response_time); + else + rrddim_set_by_pointer(st_duration, rd_average, 0); + + rrddim_set_by_pointer(st_duration, rd_max, ((gs.web_usec_max)?(collected_number)gs.web_usec_max:average_response_time)); + rrdset_done(st_duration); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_compression = NULL; + static RRDDIM *rd_savings = NULL; + + if (unlikely(!st_compression)) { + st_compression = rrdset_create_localhost( + "netdata" + , "compression_ratio" + , NULL + , "api" + , NULL + , "Netdata API Responses Compression Savings Ratio" + , "percentage" + , "netdata" + , "stats" + , 130600 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + // since we don't lock here to read the global statistics + // read the smaller value first + unsigned long long gcompressed_content_size = gs.compressed_content_size; + unsigned long long gcontent_size = gs.content_size; + + unsigned long long compressed_content_size = gcompressed_content_size - old_compressed_content_size; + unsigned long long content_size = gcontent_size - old_content_size; + + old_compressed_content_size = gcompressed_content_size; + old_content_size = gcontent_size; + + if (content_size && content_size >= compressed_content_size) + compression_ratio = ((content_size - compressed_content_size) * 100 * 1000) / content_size; + + if (compression_ratio != -1) + rrddim_set_by_pointer(st_compression, rd_savings, compression_ratio); + + rrdset_done(st_compression); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_queries = NULL; + static RRDDIM *rd_api_data_queries = NULL; + static RRDDIM *rd_api_weights_queries = NULL; + static RRDDIM *rd_api_badges_queries = NULL; + static RRDDIM *rd_health_queries = NULL; + static RRDDIM *rd_ml_queries = NULL; + static RRDDIM *rd_exporters_queries = NULL; + static RRDDIM *rd_backfill_queries = NULL; + static RRDDIM *rd_replication_queries = NULL; + + if (unlikely(!st_queries)) { + st_queries = rrdset_create_localhost( + "netdata" + , "queries" + , NULL + , "queries" + , NULL + , "Netdata DB Queries" + , "queries/s" + , "netdata" + , "stats" + , 131000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_queries = rrddim_add(st_queries, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_queries = rrddim_add(st_queries, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_queries = rrddim_add(st_queries, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_queries = rrddim_add(st_queries, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_queries = rrddim_add(st_queries, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_exporters_queries = rrddim_add(st_queries, "exporters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_backfill_queries = rrddim_add(st_queries, "backfill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_queries = rrddim_add(st_queries, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_queries, rd_api_data_queries, (collected_number)gs.api_data_queries_made); + rrddim_set_by_pointer(st_queries, rd_api_weights_queries, (collected_number)gs.api_weights_queries_made); + rrddim_set_by_pointer(st_queries, rd_api_badges_queries, (collected_number)gs.api_badges_queries_made); + rrddim_set_by_pointer(st_queries, rd_health_queries, (collected_number)gs.health_queries_made); + rrddim_set_by_pointer(st_queries, rd_ml_queries, (collected_number)gs.ml_queries_made); + rrddim_set_by_pointer(st_queries, rd_exporters_queries, (collected_number)gs.exporters_queries_made); + rrddim_set_by_pointer(st_queries, rd_backfill_queries, (collected_number)gs.backfill_queries_made); + rrddim_set_by_pointer(st_queries, rd_replication_queries, (collected_number)replication.queries_finished); + + rrdset_done(st_queries); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_points_read = NULL; + static RRDDIM *rd_api_data_points_read = NULL; + static RRDDIM *rd_api_weights_points_read = NULL; + static RRDDIM *rd_api_badges_points_read = NULL; + static RRDDIM *rd_health_points_read = NULL; + static RRDDIM *rd_ml_points_read = NULL; + static RRDDIM *rd_exporters_points_read = NULL; + static RRDDIM *rd_backfill_points_read = NULL; + static RRDDIM *rd_replication_points_read = NULL; + + if (unlikely(!st_points_read)) { + st_points_read = rrdset_create_localhost( + "netdata" + , "db_points_read" + , NULL + , "queries" + , NULL + , "Netdata DB Points Query Read" + , "points/s" + , "netdata" + , "stats" + , 131001 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_points_read = rrddim_add(st_points_read, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_points_read = rrddim_add(st_points_read, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_points_read = rrddim_add(st_points_read, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_points_read = rrddim_add(st_points_read, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_points_read = rrddim_add(st_points_read, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_exporters_points_read = rrddim_add(st_points_read, "exporters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_backfill_points_read = rrddim_add(st_points_read, "backfill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_points_read = rrddim_add(st_points_read, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_points_read, rd_api_data_points_read, (collected_number)gs.api_data_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_api_weights_points_read, (collected_number)gs.api_weights_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_api_badges_points_read, (collected_number)gs.api_badges_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_health_points_read, (collected_number)gs.health_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_ml_points_read, (collected_number)gs.ml_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_exporters_points_read, (collected_number)gs.exporters_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_backfill_points_read, (collected_number)gs.backfill_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_replication_points_read, (collected_number)replication.points_read); + + rrdset_done(st_points_read); + } + + // ---------------------------------------------------------------- + + if(gs.api_data_result_points_generated || replication.points_generated) { + static RRDSET *st_points_generated = NULL; + static RRDDIM *rd_api_data_points_generated = NULL; + static RRDDIM *rd_api_weights_points_generated = NULL; + static RRDDIM *rd_api_badges_points_generated = NULL; + static RRDDIM *rd_health_points_generated = NULL; + static RRDDIM *rd_ml_points_generated = NULL; + static RRDDIM *rd_replication_points_generated = NULL; + + if (unlikely(!st_points_generated)) { + st_points_generated = rrdset_create_localhost( + "netdata" + , "db_points_results" + , NULL + , "queries" + , NULL + , "Netdata Points in Query Results" + , "points/s" + , "netdata" + , "stats" + , 131002 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_points_generated = rrddim_add(st_points_generated, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_points_generated = rrddim_add(st_points_generated, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_points_generated = rrddim_add(st_points_generated, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_points_generated = rrddim_add(st_points_generated, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_points_generated = rrddim_add(st_points_generated, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_points_generated = rrddim_add(st_points_generated, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_points_generated, rd_api_data_points_generated, (collected_number)gs.api_data_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_api_weights_points_generated, (collected_number)gs.api_weights_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_api_badges_points_generated, (collected_number)gs.api_badges_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_health_points_generated, (collected_number)gs.health_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_ml_points_generated, (collected_number)gs.ml_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_replication_points_generated, (collected_number)replication.points_generated); + + rrdset_done(st_points_generated); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_points_stored = NULL; + static RRDDIM *rds[RRD_STORAGE_TIERS] = {}; + + if (unlikely(!st_points_stored)) { + st_points_stored = rrdset_create_localhost( + "netdata" + , "db_points_stored" + , NULL + , "queries" + , NULL + , "Netdata DB Points Stored" + , "points/s" + , "netdata" + , "stats" + , 131003 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + char buf[30 + 1]; + snprintfz(buf, sizeof(buf) - 1, "tier%zu", tier); + rds[tier] = rrddim_add(st_points_stored, buf, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + } + + for(size_t tier = 0; tier < storage_tiers ;tier++) + rrddim_set_by_pointer(st_points_stored, rds[tier], (collected_number)gs.db_points_stored_per_tier[tier]); + + rrdset_done(st_points_stored); + } + + ml_update_global_statistics_charts(gs.ml_models_consulted); + + // ---------------------------------------------------------------- + +#ifdef ENABLE_DBENGINE + if (tier_page_type[0] == RRDENG_PAGE_TYPE_GORILLA_32BIT) + { + static RRDSET *st_tier0_gorilla_pages = NULL; + static RRDDIM *rd_num_gorilla_pages = NULL; + + if (unlikely(!st_tier0_gorilla_pages)) { + st_tier0_gorilla_pages = rrdset_create_localhost( + "netdata" + , "tier0_gorilla_pages" + , NULL + , "tier0_gorilla_pages" + , NULL + , "Number of gorilla_pages" + , "count" + , "netdata" + , "stats" + , 131004 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_num_gorilla_pages = rrddim_add(st_tier0_gorilla_pages, "count", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_tier0_gorilla_pages, rd_num_gorilla_pages, (collected_number)gs.tier0_hot_gorilla_buffers); + + rrdset_done(st_tier0_gorilla_pages); + } + + if (tier_page_type[0] == RRDENG_PAGE_TYPE_GORILLA_32BIT) + { + static RRDSET *st_tier0_compression_info = NULL; + + static RRDDIM *rd_compressed_bytes = NULL; + static RRDDIM *rd_uncompressed_bytes = NULL; + + if (unlikely(!st_tier0_compression_info)) { + st_tier0_compression_info = rrdset_create_localhost( + "netdata" + , "tier0_compression_info" + , NULL + , "tier0_compression_info" + , NULL + , "Tier 0 compression info" + , "bytes" + , "netdata" + , "stats" + , 131005 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_compressed_bytes = rrddim_add(st_tier0_compression_info, "compressed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_uncompressed_bytes = rrddim_add(st_tier0_compression_info, "uncompressed", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_tier0_compression_info, rd_compressed_bytes, (collected_number)gs.tier0_disk_compressed_bytes); + rrddim_set_by_pointer(st_tier0_compression_info, rd_uncompressed_bytes, (collected_number)gs.tier0_disk_uncompressed_bytes); + + rrdset_done(st_tier0_compression_info); + } +#endif +} + +// ---------------------------------------------------------------------------- +// sqlite3 statistics + +struct sqlite3_statistics { + uint64_t sqlite3_queries_made; + uint64_t sqlite3_queries_ok; + uint64_t sqlite3_queries_failed; + uint64_t sqlite3_queries_failed_busy; + uint64_t sqlite3_queries_failed_locked; + uint64_t sqlite3_rows; + uint64_t sqlite3_metadata_cache_hit; + uint64_t sqlite3_context_cache_hit; + uint64_t sqlite3_metadata_cache_miss; + uint64_t sqlite3_context_cache_miss; + uint64_t sqlite3_metadata_cache_spill; + uint64_t sqlite3_context_cache_spill; + uint64_t sqlite3_metadata_cache_write; + uint64_t sqlite3_context_cache_write; + +} sqlite3_statistics = { }; + +void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_made, 1, __ATOMIC_RELAXED); + + if(success) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_ok, 1, __ATOMIC_RELAXED); + } + else { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed, 1, __ATOMIC_RELAXED); + + if(busy) + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_busy, 1, __ATOMIC_RELAXED); + + if(locked) + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_locked, 1, __ATOMIC_RELAXED); + } +} + +void global_statistics_sqlite3_row_completed(void) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_rows, 1, __ATOMIC_RELAXED); +} + +static inline void sqlite3_statistics_copy(struct sqlite3_statistics *gs) { + static usec_t last_run = 0; + + gs->sqlite3_queries_made = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_made, __ATOMIC_RELAXED); + gs->sqlite3_queries_ok = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_ok, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_busy = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_busy, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_locked = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_locked, __ATOMIC_RELAXED); + gs->sqlite3_rows = __atomic_load_n(&sqlite3_statistics.sqlite3_rows, __ATOMIC_RELAXED); + + usec_t timeout = default_rrd_update_every * USEC_PER_SEC + default_rrd_update_every * USEC_PER_SEC / 3; + usec_t now = now_monotonic_usec(); + if(!last_run) + last_run = now; + usec_t delta = now - last_run; + bool query_sqlite3 = delta < timeout; + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_hit = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + else { + gs->sqlite3_metadata_cache_hit = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_hit = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + else { + gs->sqlite3_context_cache_hit = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_miss = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + else { + gs->sqlite3_metadata_cache_miss = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_miss = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + else { + gs->sqlite3_context_cache_miss = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_spill = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + else { + gs->sqlite3_metadata_cache_spill = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_spill = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + else { + gs->sqlite3_context_cache_spill = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_write = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); + else { + gs->sqlite3_metadata_cache_write = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_write = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); + else { + gs->sqlite3_context_cache_write = UINT64_MAX; + query_sqlite3 = false; + } + + last_run = now_monotonic_usec(); +} + +static void sqlite3_statistics_charts(void) { + struct sqlite3_statistics gs; + sqlite3_statistics_copy(&gs); + + if(gs.sqlite3_queries_made) { + static RRDSET *st_sqlite3_queries = NULL; + static RRDDIM *rd_queries = NULL; + + if (unlikely(!st_sqlite3_queries)) { + st_sqlite3_queries = rrdset_create_localhost( + "netdata" + , "sqlite3_queries" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Queries" + , "queries/s" + , "netdata" + , "stats" + , 131100 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_queries = rrddim_add(st_sqlite3_queries, "queries", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_queries, rd_queries, (collected_number)gs.sqlite3_queries_made); + + rrdset_done(st_sqlite3_queries); + } + + // ---------------------------------------------------------------- + + if(gs.sqlite3_queries_ok || gs.sqlite3_queries_failed) { + static RRDSET *st_sqlite3_queries_by_status = NULL; + static RRDDIM *rd_ok = NULL, *rd_failed = NULL, *rd_busy = NULL, *rd_locked = NULL; + + if (unlikely(!st_sqlite3_queries_by_status)) { + st_sqlite3_queries_by_status = rrdset_create_localhost( + "netdata" + , "sqlite3_queries_by_status" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Queries by status" + , "queries/s" + , "netdata" + , "stats" + , 131101 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_ok = rrddim_add(st_sqlite3_queries_by_status, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st_sqlite3_queries_by_status, "failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_busy = rrddim_add(st_sqlite3_queries_by_status, "busy", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_locked = rrddim_add(st_sqlite3_queries_by_status, "locked", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_ok, (collected_number)gs.sqlite3_queries_made); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_failed, (collected_number)gs.sqlite3_queries_failed); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_busy, (collected_number)gs.sqlite3_queries_failed_busy); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_locked, (collected_number)gs.sqlite3_queries_failed_locked); + + rrdset_done(st_sqlite3_queries_by_status); + } + + // ---------------------------------------------------------------- + + if(gs.sqlite3_rows) { + static RRDSET *st_sqlite3_rows = NULL; + static RRDDIM *rd_rows = NULL; + + if (unlikely(!st_sqlite3_rows)) { + st_sqlite3_rows = rrdset_create_localhost( + "netdata" + , "sqlite3_rows" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Rows" + , "rows/s" + , "netdata" + , "stats" + , 131102 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_rows = rrddim_add(st_sqlite3_rows, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_rows, rd_rows, (collected_number)gs.sqlite3_rows); + + rrdset_done(st_sqlite3_rows); + } + + if(gs.sqlite3_metadata_cache_hit) { + static RRDSET *st_sqlite3_cache = NULL; + static RRDDIM *rd_cache_hit = NULL; + static RRDDIM *rd_cache_miss= NULL; + static RRDDIM *rd_cache_spill= NULL; + static RRDDIM *rd_cache_write= NULL; + + if (unlikely(!st_sqlite3_cache)) { + st_sqlite3_cache = rrdset_create_localhost( + "netdata" + , "sqlite3_metatada_cache" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 metadata cache" + , "ops/s" + , "netdata" + , "stats" + , 131103 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_cache_hit = rrddim_add(st_sqlite3_cache, "cache_hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_miss = rrddim_add(st_sqlite3_cache, "cache_miss", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_spill = rrddim_add(st_sqlite3_cache, "cache_spill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + if(gs.sqlite3_metadata_cache_hit != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_metadata_cache_hit); + + if(gs.sqlite3_metadata_cache_miss != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_metadata_cache_miss); + + if(gs.sqlite3_metadata_cache_spill != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_metadata_cache_spill); + + if(gs.sqlite3_metadata_cache_write != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_metadata_cache_write); + + rrdset_done(st_sqlite3_cache); + } + + if(gs.sqlite3_context_cache_hit) { + static RRDSET *st_sqlite3_cache = NULL; + static RRDDIM *rd_cache_hit = NULL; + static RRDDIM *rd_cache_miss= NULL; + static RRDDIM *rd_cache_spill= NULL; + static RRDDIM *rd_cache_write= NULL; + + if (unlikely(!st_sqlite3_cache)) { + st_sqlite3_cache = rrdset_create_localhost( + "netdata" + , "sqlite3_context_cache" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 context cache" + , "ops/s" + , "netdata" + , "stats" + , 131104 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_cache_hit = rrddim_add(st_sqlite3_cache, "cache_hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_miss = rrddim_add(st_sqlite3_cache, "cache_miss", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_spill = rrddim_add(st_sqlite3_cache, "cache_spill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + if(gs.sqlite3_context_cache_hit != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_context_cache_hit); + + if(gs.sqlite3_context_cache_miss != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_context_cache_miss); + + if(gs.sqlite3_context_cache_spill != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_context_cache_spill); + + if(gs.sqlite3_context_cache_write != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_context_cache_write); + + rrdset_done(st_sqlite3_cache); + } + + // ---------------------------------------------------------------- +} + +#ifdef ENABLE_DBENGINE + +struct dbengine2_cache_pointers { + RRDSET *st_cache_hit_ratio; + RRDDIM *rd_hit_ratio_closest; + RRDDIM *rd_hit_ratio_exact; + + RRDSET *st_operations; + RRDDIM *rd_searches_closest; + RRDDIM *rd_searches_exact; + RRDDIM *rd_add_hot; + RRDDIM *rd_add_clean; + RRDDIM *rd_evictions; + RRDDIM *rd_flushes; + RRDDIM *rd_acquires; + RRDDIM *rd_releases; + RRDDIM *rd_acquires_for_deletion; + + RRDSET *st_pgc_memory; + RRDDIM *rd_pgc_memory_free; + RRDDIM *rd_pgc_memory_clean; + RRDDIM *rd_pgc_memory_hot; + RRDDIM *rd_pgc_memory_dirty; + RRDDIM *rd_pgc_memory_index; + RRDDIM *rd_pgc_memory_evicting; + RRDDIM *rd_pgc_memory_flushing; + + RRDSET *st_pgc_tm; + RRDDIM *rd_pgc_tm_current; + RRDDIM *rd_pgc_tm_wanted; + RRDDIM *rd_pgc_tm_hot_max; + RRDDIM *rd_pgc_tm_dirty_max; + RRDDIM *rd_pgc_tm_hot; + RRDDIM *rd_pgc_tm_dirty; + RRDDIM *rd_pgc_tm_referenced; + + RRDSET *st_pgc_pages; + RRDDIM *rd_pgc_pages_clean; + RRDDIM *rd_pgc_pages_hot; + RRDDIM *rd_pgc_pages_dirty; + RRDDIM *rd_pgc_pages_referenced; + + RRDSET *st_pgc_memory_changes; + RRDDIM *rd_pgc_memory_new_hot; + RRDDIM *rd_pgc_memory_new_clean; + RRDDIM *rd_pgc_memory_clean_evictions; + + RRDSET *st_pgc_memory_migrations; + RRDDIM *rd_pgc_memory_hot_to_dirty; + RRDDIM *rd_pgc_memory_dirty_to_clean; + + RRDSET *st_pgc_workers; + RRDDIM *rd_pgc_workers_evictors; + RRDDIM *rd_pgc_workers_flushers; + RRDDIM *rd_pgc_workers_adders; + RRDDIM *rd_pgc_workers_searchers; + RRDDIM *rd_pgc_workers_jv2_flushers; + RRDDIM *rd_pgc_workers_hot2dirty; + + RRDSET *st_pgc_memory_events; + RRDDIM *rd_pgc_memory_evictions_critical; + RRDDIM *rd_pgc_memory_evictions_aggressive; + RRDDIM *rd_pgc_memory_flushes_critical; + + RRDSET *st_pgc_waste; + RRDDIM *rd_pgc_waste_evictions_skipped; + RRDDIM *rd_pgc_waste_flushes_cancelled; + RRDDIM *rd_pgc_waste_insert_spins; + RRDDIM *rd_pgc_waste_evict_spins; + RRDDIM *rd_pgc_waste_release_spins; + RRDDIM *rd_pgc_waste_acquire_spins; + RRDDIM *rd_pgc_waste_delete_spins; + RRDDIM *rd_pgc_waste_flush_spins; + +}; + +static void dbengine2_cache_statistics_charts(struct dbengine2_cache_pointers *ptrs, struct pgc_statistics *pgc_stats, struct pgc_statistics *pgc_stats_old __maybe_unused, const char *name, int priority) { + + { + if (unlikely(!ptrs->st_cache_hit_ratio)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_hit_ratio", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Hit Ratio", name); + + ptrs->st_cache_hit_ratio = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "%", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_hit_ratio_closest = rrddim_add(ptrs->st_cache_hit_ratio, "closest", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_hit_ratio_exact = rrddim_add(ptrs->st_cache_hit_ratio, "exact", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + size_t closest_percent = 100 * 10000; + if(pgc_stats->searches_closest > pgc_stats_old->searches_closest) + closest_percent = (pgc_stats->searches_closest_hits - pgc_stats_old->searches_closest_hits) * 100 * 10000 / (pgc_stats->searches_closest - pgc_stats_old->searches_closest); + + size_t exact_percent = 100 * 10000; + if(pgc_stats->searches_exact > pgc_stats_old->searches_exact) + exact_percent = (pgc_stats->searches_exact_hits - pgc_stats_old->searches_exact_hits) * 100 * 10000 / (pgc_stats->searches_exact - pgc_stats_old->searches_exact); + + rrddim_set_by_pointer(ptrs->st_cache_hit_ratio, ptrs->rd_hit_ratio_closest, (collected_number)closest_percent); + rrddim_set_by_pointer(ptrs->st_cache_hit_ratio, ptrs->rd_hit_ratio_exact, (collected_number)exact_percent); + + rrdset_done(ptrs->st_cache_hit_ratio); + } + + { + if (unlikely(!ptrs->st_operations)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_operations", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Operations", name); + + ptrs->st_operations = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "ops/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_searches_closest = rrddim_add(ptrs->st_operations, "search closest", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_searches_exact = rrddim_add(ptrs->st_operations, "search exact", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_add_hot = rrddim_add(ptrs->st_operations, "add hot", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_add_clean = rrddim_add(ptrs->st_operations, "add clean", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_evictions = rrddim_add(ptrs->st_operations, "evictions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_flushes = rrddim_add(ptrs->st_operations, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_acquires = rrddim_add(ptrs->st_operations, "acquires", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_releases = rrddim_add(ptrs->st_operations, "releases", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_acquires_for_deletion = rrddim_add(ptrs->st_operations, "del acquires", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_searches_closest, (collected_number)pgc_stats->searches_closest); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_searches_exact, (collected_number)pgc_stats->searches_exact); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_add_hot, (collected_number)pgc_stats->queues.hot.added_entries); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_add_clean, (collected_number)(pgc_stats->added_entries - pgc_stats->queues.hot.added_entries)); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_evictions, (collected_number)pgc_stats->queues.clean.removed_entries); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_flushes, (collected_number)pgc_stats->flushes_completed); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_acquires, (collected_number)pgc_stats->acquires); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_releases, (collected_number)pgc_stats->releases); + rrddim_set_by_pointer(ptrs->st_operations, ptrs->rd_acquires_for_deletion, (collected_number)pgc_stats->acquires_for_deletion); + + rrdset_done(ptrs->st_operations); + } + + { + if (unlikely(!ptrs->st_pgc_memory)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_memory", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Memory", name); + + ptrs->st_pgc_memory = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "bytes", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + ptrs->rd_pgc_memory_free = rrddim_add(ptrs->st_pgc_memory, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_hot = rrddim_add(ptrs->st_pgc_memory, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_dirty = rrddim_add(ptrs->st_pgc_memory, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_clean = rrddim_add(ptrs->st_pgc_memory, "clean", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_index = rrddim_add(ptrs->st_pgc_memory, "index", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_evicting = rrddim_add(ptrs->st_pgc_memory, "evicting", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_memory_flushing = rrddim_add(ptrs->st_pgc_memory, "flushing", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + collected_number free = (pgc_stats->current_cache_size > pgc_stats->wanted_cache_size) ? 0 : + (collected_number)(pgc_stats->wanted_cache_size - pgc_stats->current_cache_size); + + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_free, free); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_hot, (collected_number)pgc_stats->queues.hot.size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_dirty, (collected_number)pgc_stats->queues.dirty.size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_clean, (collected_number)pgc_stats->queues.clean.size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_evicting, (collected_number)pgc_stats->evicting_size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_flushing, (collected_number)pgc_stats->flushing_size); + rrddim_set_by_pointer(ptrs->st_pgc_memory, ptrs->rd_pgc_memory_index, + (collected_number)(pgc_stats->size - pgc_stats->queues.clean.size - pgc_stats->queues.hot.size - pgc_stats->queues.dirty.size - pgc_stats->evicting_size - pgc_stats->flushing_size)); + + rrdset_done(ptrs->st_pgc_memory); + } + + { + if (unlikely(!ptrs->st_pgc_tm)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_target_memory", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Target Cache Memory", name); + + ptrs->st_pgc_tm = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "bytes", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_pgc_tm_current = rrddim_add(ptrs->st_pgc_tm, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_wanted = rrddim_add(ptrs->st_pgc_tm, "wanted", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_referenced = rrddim_add(ptrs->st_pgc_tm, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_hot_max = rrddim_add(ptrs->st_pgc_tm, "hot max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_dirty_max = rrddim_add(ptrs->st_pgc_tm, "dirty max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_hot = rrddim_add(ptrs->st_pgc_tm, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_tm_dirty = rrddim_add(ptrs->st_pgc_tm, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_current, (collected_number)pgc_stats->current_cache_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_wanted, (collected_number)pgc_stats->wanted_cache_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_referenced, (collected_number)pgc_stats->referenced_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_hot_max, (collected_number)pgc_stats->queues.hot.max_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_dirty_max, (collected_number)pgc_stats->queues.dirty.max_size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_hot, (collected_number)pgc_stats->queues.hot.size); + rrddim_set_by_pointer(ptrs->st_pgc_tm, ptrs->rd_pgc_tm_dirty, (collected_number)pgc_stats->queues.dirty.size); + + rrdset_done(ptrs->st_pgc_tm); + } + + { + if (unlikely(!ptrs->st_pgc_pages)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_pages", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Pages", name); + + ptrs->st_pgc_pages = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "pages", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_pgc_pages_clean = rrddim_add(ptrs->st_pgc_pages, "clean", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_pages_hot = rrddim_add(ptrs->st_pgc_pages, "hot", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_pages_dirty = rrddim_add(ptrs->st_pgc_pages, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_pages_referenced = rrddim_add(ptrs->st_pgc_pages, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_clean, (collected_number)pgc_stats->queues.clean.entries); + rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_hot, (collected_number)pgc_stats->queues.hot.entries); + rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_dirty, (collected_number)pgc_stats->queues.dirty.entries); + rrddim_set_by_pointer(ptrs->st_pgc_pages, ptrs->rd_pgc_pages_referenced, (collected_number)pgc_stats->referenced_entries); + + rrdset_done(ptrs->st_pgc_pages); + } + + { + if (unlikely(!ptrs->st_pgc_memory_changes)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_memory_changes", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Memory Changes", name); + + ptrs->st_pgc_memory_changes = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "bytes/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_AREA); + + ptrs->rd_pgc_memory_new_clean = rrddim_add(ptrs->st_pgc_memory_changes, "new clean", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_clean_evictions = rrddim_add(ptrs->st_pgc_memory_changes, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_new_hot = rrddim_add(ptrs->st_pgc_memory_changes, "new hot", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_memory_changes, ptrs->rd_pgc_memory_new_clean, (collected_number)(pgc_stats->added_size - pgc_stats->queues.hot.added_size)); + rrddim_set_by_pointer(ptrs->st_pgc_memory_changes, ptrs->rd_pgc_memory_clean_evictions, (collected_number)pgc_stats->queues.clean.removed_size); + rrddim_set_by_pointer(ptrs->st_pgc_memory_changes, ptrs->rd_pgc_memory_new_hot, (collected_number)pgc_stats->queues.hot.added_size); + + rrdset_done(ptrs->st_pgc_memory_changes); + } + + { + if (unlikely(!ptrs->st_pgc_memory_migrations)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_memory_migrations", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Memory Migrations", name); + + ptrs->st_pgc_memory_migrations = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "bytes/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_AREA); + + ptrs->rd_pgc_memory_dirty_to_clean = rrddim_add(ptrs->st_pgc_memory_migrations, "dirty to clean", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_hot_to_dirty = rrddim_add(ptrs->st_pgc_memory_migrations, "hot to dirty", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_memory_migrations, ptrs->rd_pgc_memory_dirty_to_clean, (collected_number)pgc_stats->queues.dirty.removed_size); + rrddim_set_by_pointer(ptrs->st_pgc_memory_migrations, ptrs->rd_pgc_memory_hot_to_dirty, (collected_number)pgc_stats->queues.dirty.added_size); + + rrdset_done(ptrs->st_pgc_memory_migrations); + } + + { + if (unlikely(!ptrs->st_pgc_memory_events)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_events", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Events", name); + + ptrs->st_pgc_memory_events = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "events/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_AREA); + + ptrs->rd_pgc_memory_evictions_aggressive = rrddim_add(ptrs->st_pgc_memory_events, "evictions aggressive", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_evictions_critical = rrddim_add(ptrs->st_pgc_memory_events, "evictions critical", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_memory_flushes_critical = rrddim_add(ptrs->st_pgc_memory_events, "flushes critical", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_memory_events, ptrs->rd_pgc_memory_evictions_aggressive, (collected_number)pgc_stats->events_cache_needs_space_aggressively); + rrddim_set_by_pointer(ptrs->st_pgc_memory_events, ptrs->rd_pgc_memory_evictions_critical, (collected_number)pgc_stats->events_cache_under_severe_pressure); + rrddim_set_by_pointer(ptrs->st_pgc_memory_events, ptrs->rd_pgc_memory_flushes_critical, (collected_number)pgc_stats->events_flush_critical); + + rrdset_done(ptrs->st_pgc_memory_events); + } + + { + if (unlikely(!ptrs->st_pgc_waste)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_waste_events", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Waste Events", name); + + ptrs->st_pgc_waste = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "events/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_pgc_waste_evictions_skipped = rrddim_add(ptrs->st_pgc_waste, "evictions skipped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_flushes_cancelled = rrddim_add(ptrs->st_pgc_waste, "flushes cancelled", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_acquire_spins = rrddim_add(ptrs->st_pgc_waste, "acquire spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_release_spins = rrddim_add(ptrs->st_pgc_waste, "release spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_insert_spins = rrddim_add(ptrs->st_pgc_waste, "insert spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_delete_spins = rrddim_add(ptrs->st_pgc_waste, "delete spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_evict_spins = rrddim_add(ptrs->st_pgc_waste, "evict spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + ptrs->rd_pgc_waste_flush_spins = rrddim_add(ptrs->st_pgc_waste, "flush spins", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_evictions_skipped, (collected_number)pgc_stats->evict_skipped); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_flushes_cancelled, (collected_number)pgc_stats->flushes_cancelled); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_acquire_spins, (collected_number)pgc_stats->acquire_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_release_spins, (collected_number)pgc_stats->release_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_insert_spins, (collected_number)pgc_stats->insert_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_delete_spins, (collected_number)pgc_stats->delete_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_evict_spins, (collected_number)pgc_stats->evict_spins); + rrddim_set_by_pointer(ptrs->st_pgc_waste, ptrs->rd_pgc_waste_flush_spins, (collected_number)pgc_stats->flush_spins); + + rrdset_done(ptrs->st_pgc_waste); + } + + { + if (unlikely(!ptrs->st_pgc_workers)) { + BUFFER *id = buffer_create(100, NULL); + buffer_sprintf(id, "dbengine_%s_cache_workers", name); + + BUFFER *family = buffer_create(100, NULL); + buffer_sprintf(family, "dbengine %s cache", name); + + BUFFER *title = buffer_create(100, NULL); + buffer_sprintf(title, "Netdata %s Cache Workers", name); + + ptrs->st_pgc_workers = rrdset_create_localhost( + "netdata", + buffer_tostring(id), + NULL, + buffer_tostring(family), + NULL, + buffer_tostring(title), + "workers", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + ptrs->rd_pgc_workers_searchers = rrddim_add(ptrs->st_pgc_workers, "searchers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_adders = rrddim_add(ptrs->st_pgc_workers, "adders", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_evictors = rrddim_add(ptrs->st_pgc_workers, "evictors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_flushers = rrddim_add(ptrs->st_pgc_workers, "flushers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_hot2dirty = rrddim_add(ptrs->st_pgc_workers, "hot2dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + ptrs->rd_pgc_workers_jv2_flushers = rrddim_add(ptrs->st_pgc_workers, "jv2 flushers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + buffer_free(id); + buffer_free(family); + buffer_free(title); + priority++; + } + + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_searchers, (collected_number)pgc_stats->workers_search); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_adders, (collected_number)pgc_stats->workers_add); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_evictors, (collected_number)pgc_stats->workers_evict); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_flushers, (collected_number)pgc_stats->workers_flush); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_hot2dirty, (collected_number)pgc_stats->workers_hot2dirty); + rrddim_set_by_pointer(ptrs->st_pgc_workers, ptrs->rd_pgc_workers_jv2_flushers, (collected_number)pgc_stats->workers_jv2_flush); + + rrdset_done(ptrs->st_pgc_workers); + } +} + + +static void dbengine2_statistics_charts(void) { + if(!main_cache || !main_mrg) + return; + + static struct dbengine2_cache_pointers main_cache_ptrs = {}, open_cache_ptrs = {}, extent_cache_ptrs = {}; + static struct rrdeng_cache_efficiency_stats cache_efficiency_stats = {}, cache_efficiency_stats_old = {}; + static struct pgc_statistics pgc_main_stats = {}, pgc_main_stats_old = {}; (void)pgc_main_stats_old; + static struct pgc_statistics pgc_open_stats = {}, pgc_open_stats_old = {}; (void)pgc_open_stats_old; + static struct pgc_statistics pgc_extent_stats = {}, pgc_extent_stats_old = {}; (void)pgc_extent_stats_old; + static struct mrg_statistics mrg_stats = {}, mrg_stats_old = {}; (void)mrg_stats_old; + + pgc_main_stats_old = pgc_main_stats; + pgc_main_stats = pgc_get_statistics(main_cache); + dbengine2_cache_statistics_charts(&main_cache_ptrs, &pgc_main_stats, &pgc_main_stats_old, "main", 135100); + + pgc_open_stats_old = pgc_open_stats; + pgc_open_stats = pgc_get_statistics(open_cache); + dbengine2_cache_statistics_charts(&open_cache_ptrs, &pgc_open_stats, &pgc_open_stats_old, "open", 135200); + + pgc_extent_stats_old = pgc_extent_stats; + pgc_extent_stats = pgc_get_statistics(extent_cache); + dbengine2_cache_statistics_charts(&extent_cache_ptrs, &pgc_extent_stats, &pgc_extent_stats_old, "extent", 135300); + + cache_efficiency_stats_old = cache_efficiency_stats; + cache_efficiency_stats = rrdeng_get_cache_efficiency_stats(); + + mrg_stats_old = mrg_stats; + mrg_get_statistics(main_mrg, &mrg_stats); + + struct rrdeng_buffer_sizes buffers = rrdeng_get_buffer_sizes(); + size_t buffers_total_size = buffers.handles + buffers.xt_buf + buffers.xt_io + buffers.pdc + buffers.descriptors + + buffers.opcodes + buffers.wal + buffers.workers + buffers.epdl + buffers.deol + buffers.pd + buffers.pgc + buffers.mrg; + +#ifdef PDC_USE_JULYL + buffers_total_size += buffers.julyl; +#endif + + dbengine_total_memory = pgc_main_stats.size + pgc_open_stats.size + pgc_extent_stats.size + mrg_stats.size + buffers_total_size; + + size_t priority = 135000; + + { + static RRDSET *st_pgc_memory = NULL; + static RRDDIM *rd_pgc_memory_main = NULL; + static RRDDIM *rd_pgc_memory_open = NULL; // open journal memory + static RRDDIM *rd_pgc_memory_extent = NULL; // extent compresses cache memory + static RRDDIM *rd_pgc_memory_metrics = NULL; // metric registry memory + static RRDDIM *rd_pgc_memory_buffers = NULL; + + if (unlikely(!st_pgc_memory)) { + st_pgc_memory = rrdset_create_localhost( + "netdata", + "dbengine_memory", + NULL, + "dbengine memory", + NULL, + "Netdata DB Memory", + "bytes", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pgc_memory_main = rrddim_add(st_pgc_memory, "main cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_open = rrddim_add(st_pgc_memory, "open cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_extent = rrddim_add(st_pgc_memory, "extent cache", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_metrics = rrddim_add(st_pgc_memory, "metrics registry", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_memory_buffers = rrddim_add(st_pgc_memory, "buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_main, (collected_number)pgc_main_stats.size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_open, (collected_number)pgc_open_stats.size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_extent, (collected_number)pgc_extent_stats.size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_metrics, (collected_number)mrg_stats.size); + rrddim_set_by_pointer(st_pgc_memory, rd_pgc_memory_buffers, (collected_number)buffers_total_size); + + rrdset_done(st_pgc_memory); + } + + { + static RRDSET *st_pgc_buffers = NULL; + static RRDDIM *rd_pgc_buffers_pgc = NULL; + static RRDDIM *rd_pgc_buffers_mrg = NULL; + static RRDDIM *rd_pgc_buffers_opcodes = NULL; + static RRDDIM *rd_pgc_buffers_handles = NULL; + static RRDDIM *rd_pgc_buffers_descriptors = NULL; + static RRDDIM *rd_pgc_buffers_wal = NULL; + static RRDDIM *rd_pgc_buffers_workers = NULL; + static RRDDIM *rd_pgc_buffers_pdc = NULL; + static RRDDIM *rd_pgc_buffers_xt_io = NULL; + static RRDDIM *rd_pgc_buffers_xt_buf = NULL; + static RRDDIM *rd_pgc_buffers_epdl = NULL; + static RRDDIM *rd_pgc_buffers_deol = NULL; + static RRDDIM *rd_pgc_buffers_pd = NULL; +#ifdef PDC_USE_JULYL + static RRDDIM *rd_pgc_buffers_julyl = NULL; +#endif + + if (unlikely(!st_pgc_buffers)) { + st_pgc_buffers = rrdset_create_localhost( + "netdata", + "dbengine_buffers", + NULL, + "dbengine memory", + NULL, + "Netdata DB Buffers", + "bytes", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pgc_buffers_pgc = rrddim_add(st_pgc_buffers, "pgc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_mrg = rrddim_add(st_pgc_buffers, "mrg", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_opcodes = rrddim_add(st_pgc_buffers, "opcodes", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_handles = rrddim_add(st_pgc_buffers, "query handles", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_descriptors = rrddim_add(st_pgc_buffers, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_wal = rrddim_add(st_pgc_buffers, "wal", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_workers = rrddim_add(st_pgc_buffers, "workers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_pdc = rrddim_add(st_pgc_buffers, "pdc", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_pd = rrddim_add(st_pgc_buffers, "pd", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_xt_io = rrddim_add(st_pgc_buffers, "extent io", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_xt_buf = rrddim_add(st_pgc_buffers, "extent buffers", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_epdl = rrddim_add(st_pgc_buffers, "epdl", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_pgc_buffers_deol = rrddim_add(st_pgc_buffers, "deol", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); +#ifdef PDC_USE_JULYL + rd_pgc_buffers_julyl = rrddim_add(st_pgc_buffers, "julyl", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); +#endif + } + priority++; + + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pgc, (collected_number)buffers.pgc); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_mrg, (collected_number)buffers.mrg); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_opcodes, (collected_number)buffers.opcodes); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_handles, (collected_number)buffers.handles); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_descriptors, (collected_number)buffers.descriptors); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_wal, (collected_number)buffers.wal); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_workers, (collected_number)buffers.workers); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pdc, (collected_number)buffers.pdc); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_pd, (collected_number)buffers.pd); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_io, (collected_number)buffers.xt_io); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_xt_buf, (collected_number)buffers.xt_buf); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_epdl, (collected_number)buffers.epdl); + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_deol, (collected_number)buffers.deol); +#ifdef PDC_USE_JULYL + rrddim_set_by_pointer(st_pgc_buffers, rd_pgc_buffers_julyl, (collected_number)buffers.julyl); +#endif + + rrdset_done(st_pgc_buffers); + } + +#ifdef PDC_USE_JULYL + { + static RRDSET *st_julyl_moved = NULL; + static RRDDIM *rd_julyl_moved = NULL; + + if (unlikely(!st_julyl_moved)) { + st_julyl_moved = rrdset_create_localhost( + "netdata", + "dbengine_julyl_moved", + NULL, + "dbengine memory", + NULL, + "Netdata JulyL Memory Moved", + "bytes/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_AREA); + + rd_julyl_moved = rrddim_add(st_julyl_moved, "moved", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_julyl_moved, rd_julyl_moved, (collected_number)julyl_bytes_moved()); + + rrdset_done(st_julyl_moved); + } +#endif + + { + static RRDSET *st_mrg_metrics = NULL; + static RRDDIM *rd_mrg_metrics = NULL; + static RRDDIM *rd_mrg_acquired = NULL; + static RRDDIM *rd_mrg_collected = NULL; + static RRDDIM *rd_mrg_multiple_writers = NULL; + + if (unlikely(!st_mrg_metrics)) { + st_mrg_metrics = rrdset_create_localhost( + "netdata", + "dbengine_metrics", + NULL, + "dbengine metrics", + NULL, + "Netdata Metrics in Metrics Registry", + "metrics", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_mrg_metrics = rrddim_add(st_mrg_metrics, "all", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_mrg_acquired = rrddim_add(st_mrg_metrics, "acquired", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_mrg_collected = rrddim_add(st_mrg_metrics, "collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_mrg_multiple_writers = rrddim_add(st_mrg_metrics, "multi-collected", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_metrics, (collected_number)mrg_stats.entries); + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_acquired, (collected_number)mrg_stats.entries_referenced); + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_collected, (collected_number)mrg_stats.writers); + rrddim_set_by_pointer(st_mrg_metrics, rd_mrg_multiple_writers, (collected_number)mrg_stats.writers_conflicts); + + rrdset_done(st_mrg_metrics); + } + + { + static RRDSET *st_mrg_ops = NULL; + static RRDDIM *rd_mrg_add = NULL; + static RRDDIM *rd_mrg_del = NULL; + static RRDDIM *rd_mrg_search = NULL; + + if (unlikely(!st_mrg_ops)) { + st_mrg_ops = rrdset_create_localhost( + "netdata", + "dbengine_metrics_registry_operations", + NULL, + "dbengine metrics", + NULL, + "Netdata Metrics Registry Operations", + "metrics", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_mrg_add = rrddim_add(st_mrg_ops, "add", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mrg_del = rrddim_add(st_mrg_ops, "delete", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mrg_search = rrddim_add(st_mrg_ops, "search", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_mrg_ops, rd_mrg_add, (collected_number)mrg_stats.additions); + rrddim_set_by_pointer(st_mrg_ops, rd_mrg_del, (collected_number)mrg_stats.deletions); + rrddim_set_by_pointer(st_mrg_ops, rd_mrg_search, (collected_number)mrg_stats.search_hits + (collected_number)mrg_stats.search_misses); + + rrdset_done(st_mrg_ops); + } + + { + static RRDSET *st_mrg_references = NULL; + static RRDDIM *rd_mrg_references = NULL; + + if (unlikely(!st_mrg_references)) { + st_mrg_references = rrdset_create_localhost( + "netdata", + "dbengine_metrics_registry_references", + NULL, + "dbengine metrics", + NULL, + "Netdata Metrics Registry References", + "references", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_mrg_references = rrddim_add(st_mrg_references, "references", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + rrddim_set_by_pointer(st_mrg_references, rd_mrg_references, (collected_number)mrg_stats.current_references); + + rrdset_done(st_mrg_references); + } + + { + static RRDSET *st_cache_hit_ratio = NULL; + static RRDDIM *rd_hit_ratio = NULL; + static RRDDIM *rd_main_cache_hit_ratio = NULL; + static RRDDIM *rd_extent_cache_hit_ratio = NULL; + static RRDDIM *rd_extent_merge_hit_ratio = NULL; + + if (unlikely(!st_cache_hit_ratio)) { + st_cache_hit_ratio = rrdset_create_localhost( + "netdata", + "dbengine_cache_hit_ratio", + NULL, + "dbengine query router", + NULL, + "Netdata Queries Cache Hit Ratio", + "%", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_hit_ratio = rrddim_add(st_cache_hit_ratio, "overall", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + rd_main_cache_hit_ratio = rrddim_add(st_cache_hit_ratio, "main cache", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + rd_extent_cache_hit_ratio = rrddim_add(st_cache_hit_ratio, "extent cache", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + rd_extent_merge_hit_ratio = rrddim_add(st_cache_hit_ratio, "extent merge", NULL, 1, 10000, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + size_t delta_pages_total = cache_efficiency_stats.pages_total - cache_efficiency_stats_old.pages_total; + size_t delta_pages_to_load_from_disk = cache_efficiency_stats.pages_to_load_from_disk - cache_efficiency_stats_old.pages_to_load_from_disk; + size_t delta_extents_loaded_from_disk = cache_efficiency_stats.extents_loaded_from_disk - cache_efficiency_stats_old.extents_loaded_from_disk; + + size_t delta_pages_data_source_main_cache = cache_efficiency_stats.pages_data_source_main_cache - cache_efficiency_stats_old.pages_data_source_main_cache; + size_t delta_pages_pending_found_in_cache_at_pass4 = cache_efficiency_stats.pages_data_source_main_cache_at_pass4 - cache_efficiency_stats_old.pages_data_source_main_cache_at_pass4; + + size_t delta_pages_data_source_extent_cache = cache_efficiency_stats.pages_data_source_extent_cache - cache_efficiency_stats_old.pages_data_source_extent_cache; + size_t delta_pages_load_extent_merged = cache_efficiency_stats.pages_load_extent_merged - cache_efficiency_stats_old.pages_load_extent_merged; + + size_t pages_total_hit = delta_pages_total - delta_extents_loaded_from_disk; + + static size_t overall_hit_ratio = 100; + size_t main_cache_hit_ratio = 0, extent_cache_hit_ratio = 0, extent_merge_hit_ratio = 0; + if(delta_pages_total) { + if(pages_total_hit > delta_pages_total) + pages_total_hit = delta_pages_total; + + overall_hit_ratio = pages_total_hit * 100 * 10000 / delta_pages_total; + + size_t delta_pages_main_cache = delta_pages_data_source_main_cache + delta_pages_pending_found_in_cache_at_pass4; + if(delta_pages_main_cache > delta_pages_total) + delta_pages_main_cache = delta_pages_total; + + main_cache_hit_ratio = delta_pages_main_cache * 100 * 10000 / delta_pages_total; + } + + if(delta_pages_to_load_from_disk) { + if(delta_pages_data_source_extent_cache > delta_pages_to_load_from_disk) + delta_pages_data_source_extent_cache = delta_pages_to_load_from_disk; + + extent_cache_hit_ratio = delta_pages_data_source_extent_cache * 100 * 10000 / delta_pages_to_load_from_disk; + + if(delta_pages_load_extent_merged > delta_pages_to_load_from_disk) + delta_pages_load_extent_merged = delta_pages_to_load_from_disk; + + extent_merge_hit_ratio = delta_pages_load_extent_merged * 100 * 10000 / delta_pages_to_load_from_disk; + } + + rrddim_set_by_pointer(st_cache_hit_ratio, rd_hit_ratio, (collected_number)overall_hit_ratio); + rrddim_set_by_pointer(st_cache_hit_ratio, rd_main_cache_hit_ratio, (collected_number)main_cache_hit_ratio); + rrddim_set_by_pointer(st_cache_hit_ratio, rd_extent_cache_hit_ratio, (collected_number)extent_cache_hit_ratio); + rrddim_set_by_pointer(st_cache_hit_ratio, rd_extent_merge_hit_ratio, (collected_number)extent_merge_hit_ratio); + + rrdset_done(st_cache_hit_ratio); + } + + { + static RRDSET *st_queries = NULL; + static RRDDIM *rd_total = NULL; + static RRDDIM *rd_open = NULL; + static RRDDIM *rd_jv2 = NULL; + static RRDDIM *rd_planned_with_gaps = NULL; + static RRDDIM *rd_executed_with_gaps = NULL; + + if (unlikely(!st_queries)) { + st_queries = rrdset_create_localhost( + "netdata", + "dbengine_queries", + NULL, + "dbengine query router", + NULL, + "Netdata Queries", + "queries/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_total = rrddim_add(st_queries, "total", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_open = rrddim_add(st_queries, "open cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_jv2 = rrddim_add(st_queries, "journal v2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_planned_with_gaps = rrddim_add(st_queries, "planned with gaps", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_executed_with_gaps = rrddim_add(st_queries, "executed with gaps", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_queries, rd_total, (collected_number)cache_efficiency_stats.queries); + rrddim_set_by_pointer(st_queries, rd_open, (collected_number)cache_efficiency_stats.queries_open); + rrddim_set_by_pointer(st_queries, rd_jv2, (collected_number)cache_efficiency_stats.queries_journal_v2); + rrddim_set_by_pointer(st_queries, rd_planned_with_gaps, (collected_number)cache_efficiency_stats.queries_planned_with_gaps); + rrddim_set_by_pointer(st_queries, rd_executed_with_gaps, (collected_number)cache_efficiency_stats.queries_executed_with_gaps); + + rrdset_done(st_queries); + } + + { + static RRDSET *st_queries_running = NULL; + static RRDDIM *rd_queries = NULL; + + if (unlikely(!st_queries_running)) { + st_queries_running = rrdset_create_localhost( + "netdata", + "dbengine_queries_running", + NULL, + "dbengine query router", + NULL, + "Netdata Queries Running", + "queries", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_queries = rrddim_add(st_queries_running, "queries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + rrddim_set_by_pointer(st_queries_running, rd_queries, (collected_number)cache_efficiency_stats.currently_running_queries); + + rrdset_done(st_queries_running); + } + + { + static RRDSET *st_query_pages_metadata_source = NULL; + static RRDDIM *rd_cache = NULL; + static RRDDIM *rd_open = NULL; + static RRDDIM *rd_jv2 = NULL; + + if (unlikely(!st_query_pages_metadata_source)) { + st_query_pages_metadata_source = rrdset_create_localhost( + "netdata", + "dbengine_query_pages_metadata_source", + NULL, + "dbengine query router", + NULL, + "Netdata Query Pages Metadata Source", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_cache = rrddim_add(st_query_pages_metadata_source, "cache hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_jv2 = rrddim_add(st_query_pages_metadata_source, "journal v2 scan", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_open = rrddim_add(st_query_pages_metadata_source, "open journal", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_pages_metadata_source, rd_cache, (collected_number)cache_efficiency_stats.pages_meta_source_main_cache); + rrddim_set_by_pointer(st_query_pages_metadata_source, rd_jv2, (collected_number)cache_efficiency_stats.pages_meta_source_journal_v2); + rrddim_set_by_pointer(st_query_pages_metadata_source, rd_open, (collected_number)cache_efficiency_stats.pages_meta_source_open_cache); + + rrdset_done(st_query_pages_metadata_source); + } + + { + static RRDSET *st_query_pages_data_source = NULL; + static RRDDIM *rd_pages_main_cache = NULL; + static RRDDIM *rd_pages_disk = NULL; + static RRDDIM *rd_pages_extent_cache = NULL; + + if (unlikely(!st_query_pages_data_source)) { + st_query_pages_data_source = rrdset_create_localhost( + "netdata", + "dbengine_query_pages_data_source", + NULL, + "dbengine query router", + NULL, + "Netdata Query Pages to Data Source", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pages_main_cache = rrddim_add(st_query_pages_data_source, "main cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_disk = rrddim_add(st_query_pages_data_source, "disk", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_extent_cache = rrddim_add(st_query_pages_data_source, "extent cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_pages_data_source, rd_pages_main_cache, (collected_number)cache_efficiency_stats.pages_data_source_main_cache + (collected_number)cache_efficiency_stats.pages_data_source_main_cache_at_pass4); + rrddim_set_by_pointer(st_query_pages_data_source, rd_pages_disk, (collected_number)cache_efficiency_stats.pages_to_load_from_disk); + rrddim_set_by_pointer(st_query_pages_data_source, rd_pages_extent_cache, (collected_number)cache_efficiency_stats.pages_data_source_extent_cache); + + rrdset_done(st_query_pages_data_source); + } + + { + static RRDSET *st_query_next_page = NULL; + static RRDDIM *rd_pass4 = NULL; + static RRDDIM *rd_nowait_failed = NULL; + static RRDDIM *rd_wait_failed = NULL; + static RRDDIM *rd_wait_loaded = NULL; + static RRDDIM *rd_nowait_loaded = NULL; + + if (unlikely(!st_query_next_page)) { + st_query_next_page = rrdset_create_localhost( + "netdata", + "dbengine_query_next_page", + NULL, + "dbengine query router", + NULL, + "Netdata Query Next Page", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pass4 = rrddim_add(st_query_next_page, "pass4", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_wait_failed = rrddim_add(st_query_next_page, "failed slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_nowait_failed = rrddim_add(st_query_next_page, "failed fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_wait_loaded = rrddim_add(st_query_next_page, "loaded slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_nowait_loaded = rrddim_add(st_query_next_page, "loaded fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_next_page, rd_pass4, (collected_number)cache_efficiency_stats.pages_data_source_main_cache_at_pass4); + rrddim_set_by_pointer(st_query_next_page, rd_wait_failed, (collected_number)cache_efficiency_stats.page_next_wait_failed); + rrddim_set_by_pointer(st_query_next_page, rd_nowait_failed, (collected_number)cache_efficiency_stats.page_next_nowait_failed); + rrddim_set_by_pointer(st_query_next_page, rd_wait_loaded, (collected_number)cache_efficiency_stats.page_next_wait_loaded); + rrddim_set_by_pointer(st_query_next_page, rd_nowait_loaded, (collected_number)cache_efficiency_stats.page_next_nowait_loaded); + + rrdset_done(st_query_next_page); + } + + { + static RRDSET *st_query_page_issues = NULL; + static RRDDIM *rd_pages_zero_time = NULL; + static RRDDIM *rd_pages_past_time = NULL; + static RRDDIM *rd_pages_invalid_size = NULL; + static RRDDIM *rd_pages_fixed_update_every = NULL; + static RRDDIM *rd_pages_fixed_entries = NULL; + static RRDDIM *rd_pages_overlapping = NULL; + + if (unlikely(!st_query_page_issues)) { + st_query_page_issues = rrdset_create_localhost( + "netdata", + "dbengine_query_next_page_issues", + NULL, + "dbengine query router", + NULL, + "Netdata Query Next Page Issues", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_pages_zero_time = rrddim_add(st_query_page_issues, "zero timestamp", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_invalid_size = rrddim_add(st_query_page_issues, "invalid size", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_past_time = rrddim_add(st_query_page_issues, "past time", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_overlapping = rrddim_add(st_query_page_issues, "overlapping", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_fixed_update_every = rrddim_add(st_query_page_issues, "update every fixed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pages_fixed_entries = rrddim_add(st_query_page_issues, "entries fixed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_page_issues, rd_pages_zero_time, (collected_number)cache_efficiency_stats.pages_zero_time_skipped); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_invalid_size, (collected_number)cache_efficiency_stats.pages_invalid_size_skipped); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_past_time, (collected_number)cache_efficiency_stats.pages_past_time_skipped); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_overlapping, (collected_number)cache_efficiency_stats.pages_overlapping_skipped); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_fixed_update_every, (collected_number)cache_efficiency_stats.pages_invalid_update_every_fixed); + rrddim_set_by_pointer(st_query_page_issues, rd_pages_fixed_entries, (collected_number)cache_efficiency_stats.pages_invalid_entries_fixed); + + rrdset_done(st_query_page_issues); + } + + { + static RRDSET *st_query_pages_from_disk = NULL; + static RRDDIM *rd_compressed = NULL; + static RRDDIM *rd_invalid = NULL; + static RRDDIM *rd_uncompressed = NULL; + static RRDDIM *rd_mmap_failed = NULL; + static RRDDIM *rd_unavailable = NULL; + static RRDDIM *rd_unroutable = NULL; + static RRDDIM *rd_not_found = NULL; + static RRDDIM *rd_cancelled = NULL; + static RRDDIM *rd_invalid_extent = NULL; + static RRDDIM *rd_extent_merged = NULL; + + if (unlikely(!st_query_pages_from_disk)) { + st_query_pages_from_disk = rrdset_create_localhost( + "netdata", + "dbengine_query_pages_disk_load", + NULL, + "dbengine query router", + NULL, + "Netdata Query Pages Loaded from Disk", + "pages/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_compressed = rrddim_add(st_query_pages_from_disk, "ok compressed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_invalid = rrddim_add(st_query_pages_from_disk, "fail invalid page", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_uncompressed = rrddim_add(st_query_pages_from_disk, "ok uncompressed", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_mmap_failed = rrddim_add(st_query_pages_from_disk, "fail cant mmap", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_unavailable = rrddim_add(st_query_pages_from_disk, "fail unavailable", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_unroutable = rrddim_add(st_query_pages_from_disk, "fail unroutable", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_not_found = rrddim_add(st_query_pages_from_disk, "fail not found", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_invalid_extent = rrddim_add(st_query_pages_from_disk, "fail invalid extent", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_extent_merged = rrddim_add(st_query_pages_from_disk, "extent merged", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cancelled = rrddim_add(st_query_pages_from_disk, "cancelled", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_pages_from_disk, rd_compressed, (collected_number)cache_efficiency_stats.pages_load_ok_compressed); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_invalid, (collected_number)cache_efficiency_stats.pages_load_fail_invalid_page_in_extent); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_uncompressed, (collected_number)cache_efficiency_stats.pages_load_ok_uncompressed); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_mmap_failed, (collected_number)cache_efficiency_stats.pages_load_fail_cant_mmap_extent); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_unavailable, (collected_number)cache_efficiency_stats.pages_load_fail_datafile_not_available); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_unroutable, (collected_number)cache_efficiency_stats.pages_load_fail_unroutable); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_not_found, (collected_number)cache_efficiency_stats.pages_load_fail_not_found); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_cancelled, (collected_number)cache_efficiency_stats.pages_load_fail_cancelled); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_invalid_extent, (collected_number)cache_efficiency_stats.pages_load_fail_invalid_extent); + rrddim_set_by_pointer(st_query_pages_from_disk, rd_extent_merged, (collected_number)cache_efficiency_stats.pages_load_extent_merged); + + rrdset_done(st_query_pages_from_disk); + } + + { + static RRDSET *st_events = NULL; + static RRDDIM *rd_journal_v2_mapped = NULL; + static RRDDIM *rd_journal_v2_unmapped = NULL; + static RRDDIM *rd_datafile_creation = NULL; + static RRDDIM *rd_datafile_deletion = NULL; + static RRDDIM *rd_datafile_deletion_spin = NULL; + static RRDDIM *rd_jv2_indexing = NULL; + static RRDDIM *rd_retention = NULL; + + if (unlikely(!st_events)) { + st_events = rrdset_create_localhost( + "netdata", + "dbengine_events", + NULL, + "dbengine query router", + NULL, + "Netdata Database Events", + "events/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_journal_v2_mapped = rrddim_add(st_events, "journal v2 mapped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_journal_v2_unmapped = rrddim_add(st_events, "journal v2 unmapped", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_datafile_creation = rrddim_add(st_events, "datafile creation", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_datafile_deletion = rrddim_add(st_events, "datafile deletion", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_datafile_deletion_spin = rrddim_add(st_events, "datafile deletion spin", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_jv2_indexing = rrddim_add(st_events, "journal v2 indexing", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_retention = rrddim_add(st_events, "retention", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_events, rd_journal_v2_mapped, (collected_number)cache_efficiency_stats.journal_v2_mapped); + rrddim_set_by_pointer(st_events, rd_journal_v2_unmapped, (collected_number)cache_efficiency_stats.journal_v2_unmapped); + rrddim_set_by_pointer(st_events, rd_datafile_creation, (collected_number)cache_efficiency_stats.datafile_creation_started); + rrddim_set_by_pointer(st_events, rd_datafile_deletion, (collected_number)cache_efficiency_stats.datafile_deletion_started); + rrddim_set_by_pointer(st_events, rd_datafile_deletion_spin, (collected_number)cache_efficiency_stats.datafile_deletion_spin); + rrddim_set_by_pointer(st_events, rd_jv2_indexing, (collected_number)cache_efficiency_stats.journal_v2_indexing_started); + rrddim_set_by_pointer(st_events, rd_retention, (collected_number)cache_efficiency_stats.metrics_retention_started); + + rrdset_done(st_events); + } + + { + static RRDSET *st_prep_timings = NULL; + static RRDDIM *rd_routing = NULL; + static RRDDIM *rd_main_cache = NULL; + static RRDDIM *rd_open_cache = NULL; + static RRDDIM *rd_journal_v2 = NULL; + static RRDDIM *rd_pass4 = NULL; + + if (unlikely(!st_prep_timings)) { + st_prep_timings = rrdset_create_localhost( + "netdata", + "dbengine_prep_timings", + NULL, + "dbengine query router", + NULL, + "Netdata Query Preparation Timings", + "usec/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_routing = rrddim_add(st_prep_timings, "routing", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_main_cache = rrddim_add(st_prep_timings, "main cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_open_cache = rrddim_add(st_prep_timings, "open cache", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_journal_v2 = rrddim_add(st_prep_timings, "journal v2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_pass4 = rrddim_add(st_prep_timings, "pass4", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_prep_timings, rd_routing, (collected_number)cache_efficiency_stats.prep_time_to_route); + rrddim_set_by_pointer(st_prep_timings, rd_main_cache, (collected_number)cache_efficiency_stats.prep_time_in_main_cache_lookup); + rrddim_set_by_pointer(st_prep_timings, rd_open_cache, (collected_number)cache_efficiency_stats.prep_time_in_open_cache_lookup); + rrddim_set_by_pointer(st_prep_timings, rd_journal_v2, (collected_number)cache_efficiency_stats.prep_time_in_journal_v2_lookup); + rrddim_set_by_pointer(st_prep_timings, rd_pass4, (collected_number)cache_efficiency_stats.prep_time_in_pass4_lookup); + + rrdset_done(st_prep_timings); + } + + { + static RRDSET *st_query_timings = NULL; + static RRDDIM *rd_init = NULL; + static RRDDIM *rd_prep_wait = NULL; + static RRDDIM *rd_next_page_disk_fast = NULL; + static RRDDIM *rd_next_page_disk_slow = NULL; + static RRDDIM *rd_next_page_preload_fast = NULL; + static RRDDIM *rd_next_page_preload_slow = NULL; + + if (unlikely(!st_query_timings)) { + st_query_timings = rrdset_create_localhost( + "netdata", + "dbengine_query_timings", + NULL, + "dbengine query router", + NULL, + "Netdata Query Timings", + "usec/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_init = rrddim_add(st_query_timings, "init", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_prep_wait = rrddim_add(st_query_timings, "prep wait", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_next_page_disk_fast = rrddim_add(st_query_timings, "next page disk fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_next_page_disk_slow = rrddim_add(st_query_timings, "next page disk slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_next_page_preload_fast = rrddim_add(st_query_timings, "next page preload fast", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_next_page_preload_slow = rrddim_add(st_query_timings, "next page preload slow", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_query_timings, rd_init, (collected_number)cache_efficiency_stats.query_time_init); + rrddim_set_by_pointer(st_query_timings, rd_prep_wait, (collected_number)cache_efficiency_stats.query_time_wait_for_prep); + rrddim_set_by_pointer(st_query_timings, rd_next_page_disk_fast, (collected_number)cache_efficiency_stats.query_time_to_fast_disk_next_page); + rrddim_set_by_pointer(st_query_timings, rd_next_page_disk_slow, (collected_number)cache_efficiency_stats.query_time_to_slow_disk_next_page); + rrddim_set_by_pointer(st_query_timings, rd_next_page_preload_fast, (collected_number)cache_efficiency_stats.query_time_to_fast_preload_next_page); + rrddim_set_by_pointer(st_query_timings, rd_next_page_preload_slow, (collected_number)cache_efficiency_stats.query_time_to_slow_preload_next_page); + + rrdset_done(st_query_timings); + } + + if(netdata_rwlock_tryrdlock(&rrd_rwlock) == 0) { + priority = 135400; + + RRDHOST *host; + unsigned long long stats_array[RRDENG_NR_STATS] = {0}; + unsigned long long local_stats_array[RRDENG_NR_STATS]; + unsigned dbengine_contexts = 0, counted_multihost_db[RRD_STORAGE_TIERS] = { 0 }, i; + + rrdhost_foreach_read(host) { + if (!rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { + + /* get localhost's DB engine's statistics for each tier */ + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(host->db[tier].mode != RRD_MEMORY_MODE_DBENGINE) continue; + if(!host->db[tier].si) continue; + + if(counted_multihost_db[tier]) + continue; + else + counted_multihost_db[tier] = 1; + + ++dbengine_contexts; + rrdeng_get_37_statistics((struct rrdengine_instance *)host->db[tier].si, local_stats_array); + for (i = 0; i < RRDENG_NR_STATS; ++i) { + /* aggregate statistics across hosts */ + stats_array[i] += local_stats_array[i]; + } + } + } + } + rrd_rdunlock(); + + if (dbengine_contexts) { + /* deduplicate global statistics by getting the ones from the last context */ + stats_array[30] = local_stats_array[30]; + stats_array[31] = local_stats_array[31]; + stats_array[32] = local_stats_array[32]; + stats_array[34] = local_stats_array[34]; + stats_array[36] = local_stats_array[36]; + + // ---------------------------------------------------------------- + + { + static RRDSET *st_compression = NULL; + static RRDDIM *rd_savings = NULL; + + if (unlikely(!st_compression)) { + st_compression = rrdset_create_localhost( + "netdata", + "dbengine_compression_ratio", + NULL, + "dbengine io", + NULL, + "Netdata DB engine data extents' compression savings ratio", + "percentage", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + unsigned long long ratio; + unsigned long long compressed_content_size = stats_array[12]; + unsigned long long content_size = stats_array[11]; + + if (content_size) { + // allow negative savings + ratio = ((content_size - compressed_content_size) * 100 * 1000) / content_size; + } else { + ratio = 0; + } + rrddim_set_by_pointer(st_compression, rd_savings, ratio); + + rrdset_done(st_compression); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_io_stats = NULL; + static RRDDIM *rd_reads = NULL; + static RRDDIM *rd_writes = NULL; + + if (unlikely(!st_io_stats)) { + st_io_stats = rrdset_create_localhost( + "netdata", + "dbengine_io_throughput", + NULL, + "dbengine io", + NULL, + "Netdata DB engine I/O throughput", + "MiB/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[17]); + rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[15]); + rrdset_done(st_io_stats); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_io_stats = NULL; + static RRDDIM *rd_reads = NULL; + static RRDDIM *rd_writes = NULL; + + if (unlikely(!st_io_stats)) { + st_io_stats = rrdset_create_localhost( + "netdata", + "dbengine_io_operations", + NULL, + "dbengine io", + NULL, + "Netdata DB engine I/O operations", + "operations/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[18]); + rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[16]); + rrdset_done(st_io_stats); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_errors = NULL; + static RRDDIM *rd_fs_errors = NULL; + static RRDDIM *rd_io_errors = NULL; + static RRDDIM *pg_cache_over_half_dirty_events = NULL; + + if (unlikely(!st_errors)) { + st_errors = rrdset_create_localhost( + "netdata", + "dbengine_global_errors", + NULL, + "dbengine io", + NULL, + "Netdata DB engine errors", + "errors/s", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_io_errors = rrddim_add(st_errors, "io_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_fs_errors = rrddim_add(st_errors, "fs_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + pg_cache_over_half_dirty_events = + rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + priority++; + + rrddim_set_by_pointer(st_errors, rd_io_errors, (collected_number)stats_array[30]); + rrddim_set_by_pointer(st_errors, rd_fs_errors, (collected_number)stats_array[31]); + rrddim_set_by_pointer(st_errors, pg_cache_over_half_dirty_events, (collected_number)stats_array[34]); + rrdset_done(st_errors); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_fd = NULL; + static RRDDIM *rd_fd_current = NULL; + static RRDDIM *rd_fd_max = NULL; + + if (unlikely(!st_fd)) { + st_fd = rrdset_create_localhost( + "netdata", + "dbengine_global_file_descriptors", + NULL, + "dbengine io", + NULL, + "Netdata DB engine File Descriptors", + "descriptors", + "netdata", + "stats", + priority, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_fd_current = rrddim_add(st_fd, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_fd_max = rrddim_add(st_fd, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + priority++; + + rrddim_set_by_pointer(st_fd, rd_fd_current, (collected_number)stats_array[32]); + /* Careful here, modify this accordingly if the File-Descriptor budget ever changes */ + rrddim_set_by_pointer(st_fd, rd_fd_max, (collected_number)rlimit_nofile.rlim_cur / 4); + rrdset_done(st_fd); + } + } + } +} +#endif // ENABLE_DBENGINE + +static void update_strings_charts() { + static RRDSET *st_ops = NULL, *st_entries = NULL, *st_mem = NULL; + static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL; + static RRDDIM *rd_entries_entries = NULL; + static RRDDIM *rd_mem = NULL; +#ifdef NETDATA_INTERNAL_CHECKS + static RRDDIM *rd_entries_refs = NULL, *rd_ops_releases = NULL, *rd_ops_duplications = NULL, *rd_ops_searches = NULL; +#endif + + size_t inserts, deletes, searches, entries, references, memory, duplications, releases; + + string_statistics(&inserts, &deletes, &searches, &entries, &references, &memory, &duplications, &releases); + + if (unlikely(!st_ops)) { + st_ops = rrdset_create_localhost( + "netdata" + , "strings_ops" + , NULL + , "strings" + , NULL + , "Strings operations" + , "ops/s" + , "netdata" + , "stats" + , 910000 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE); + + rd_ops_inserts = rrddim_add(st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_deletes = rrddim_add(st_ops, "deletes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); +#ifdef NETDATA_INTERNAL_CHECKS + rd_ops_searches = rrddim_add(st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_duplications = rrddim_add(st_ops, "duplications", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_releases = rrddim_add(st_ops, "releases", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); +#endif + } + + rrddim_set_by_pointer(st_ops, rd_ops_inserts, (collected_number)inserts); + rrddim_set_by_pointer(st_ops, rd_ops_deletes, (collected_number)deletes); +#ifdef NETDATA_INTERNAL_CHECKS + rrddim_set_by_pointer(st_ops, rd_ops_searches, (collected_number)searches); + rrddim_set_by_pointer(st_ops, rd_ops_duplications, (collected_number)duplications); + rrddim_set_by_pointer(st_ops, rd_ops_releases, (collected_number)releases); +#endif + rrdset_done(st_ops); + + if (unlikely(!st_entries)) { + st_entries = rrdset_create_localhost( + "netdata" + , "strings_entries" + , NULL + , "strings" + , NULL + , "Strings entries" + , "entries" + , "netdata" + , "stats" + , 910001 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_entries_entries = rrddim_add(st_entries, "entries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); +#ifdef NETDATA_INTERNAL_CHECKS + rd_entries_refs = rrddim_add(st_entries, "references", NULL, 1, -1, RRD_ALGORITHM_ABSOLUTE); +#endif + } + + rrddim_set_by_pointer(st_entries, rd_entries_entries, (collected_number)entries); +#ifdef NETDATA_INTERNAL_CHECKS + rrddim_set_by_pointer(st_entries, rd_entries_refs, (collected_number)references); +#endif + rrdset_done(st_entries); + + if (unlikely(!st_mem)) { + st_mem = rrdset_create_localhost( + "netdata" + , "strings_memory" + , NULL + , "strings" + , NULL + , "Strings memory" + , "bytes" + , "netdata" + , "stats" + , 910001 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_mem = rrddim_add(st_mem, "memory", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem, rd_mem, (collected_number)memory); + rrdset_done(st_mem); +} + +static void update_heartbeat_charts() { + static RRDSET *st_heartbeat = NULL; + static RRDDIM *rd_heartbeat_min = NULL; + static RRDDIM *rd_heartbeat_max = NULL; + static RRDDIM *rd_heartbeat_avg = NULL; + + if (unlikely(!st_heartbeat)) { + st_heartbeat = rrdset_create_localhost( + "netdata" + , "heartbeat" + , NULL + , "heartbeat" + , NULL + , "System clock jitter" + , "microseconds" + , "netdata" + , "stats" + , 900000 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_heartbeat_min = rrddim_add(st_heartbeat, "min", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_heartbeat_max = rrddim_add(st_heartbeat, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_heartbeat_avg = rrddim_add(st_heartbeat, "average", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + usec_t min, max, average; + size_t count; + + heartbeat_statistics(&min, &max, &average, &count); + + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_min, (collected_number)min); + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_max, (collected_number)max); + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_avg, (collected_number)average); + + rrdset_done(st_heartbeat); +} + +// --------------------------------------------------------------------------------------------------------------------- +// dictionary statistics + +struct dictionary_stats dictionary_stats_category_collectors = { .name = "collectors" }; +struct dictionary_stats dictionary_stats_category_rrdhost = { .name = "rrdhost" }; +struct dictionary_stats dictionary_stats_category_rrdset_rrddim = { .name = "rrdset_rrddim" }; +struct dictionary_stats dictionary_stats_category_rrdcontext = { .name = "context" }; +struct dictionary_stats dictionary_stats_category_rrdlabels = { .name = "labels" }; +struct dictionary_stats dictionary_stats_category_rrdhealth = { .name = "health" }; +struct dictionary_stats dictionary_stats_category_functions = { .name = "functions" }; +struct dictionary_stats dictionary_stats_category_replication = { .name = "replication" }; + +#ifdef DICT_WITH_STATS +struct dictionary_categories { + struct dictionary_stats *stats; + const char *family; + const char *context_prefix; + int priority; + + RRDSET *st_dicts; + RRDDIM *rd_dicts_active; + RRDDIM *rd_dicts_deleted; + + RRDSET *st_items; + RRDDIM *rd_items_entries; + RRDDIM *rd_items_referenced; + RRDDIM *rd_items_pending_deletion; + + RRDSET *st_ops; + RRDDIM *rd_ops_creations; + RRDDIM *rd_ops_destructions; + RRDDIM *rd_ops_flushes; + RRDDIM *rd_ops_traversals; + RRDDIM *rd_ops_walkthroughs; + RRDDIM *rd_ops_garbage_collections; + RRDDIM *rd_ops_searches; + RRDDIM *rd_ops_inserts; + RRDDIM *rd_ops_resets; + RRDDIM *rd_ops_deletes; + + RRDSET *st_callbacks; + RRDDIM *rd_callbacks_inserts; + RRDDIM *rd_callbacks_conflicts; + RRDDIM *rd_callbacks_reacts; + RRDDIM *rd_callbacks_deletes; + + RRDSET *st_memory; + RRDDIM *rd_memory_indexed; + RRDDIM *rd_memory_values; + RRDDIM *rd_memory_dict; + + RRDSET *st_spins; + RRDDIM *rd_spins_use; + RRDDIM *rd_spins_search; + RRDDIM *rd_spins_insert; + RRDDIM *rd_spins_delete; + +} dictionary_categories[] = { + { .stats = &dictionary_stats_category_collectors, "dictionaries collectors", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdhost, "dictionaries hosts", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdset_rrddim, "dictionaries rrd", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdcontext, "dictionaries contexts", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdlabels, "dictionaries labels", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_rrdhealth, "dictionaries health", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_functions, "dictionaries functions", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_replication, "dictionaries replication", "dictionaries", 900000 }, + { .stats = &dictionary_stats_category_other, "dictionaries other", "dictionaries", 900000 }, + + // terminator + { .stats = NULL, NULL, NULL, 0 }, +}; + +#define load_dictionary_stats_entry(x) total += (size_t)(stats.x = __atomic_load_n(&c->stats->x, __ATOMIC_RELAXED)) + +static void update_dictionary_category_charts(struct dictionary_categories *c) { + struct dictionary_stats stats; + stats.name = c->stats->name; + + // ------------------------------------------------------------------------ + + size_t total = 0; + load_dictionary_stats_entry(dictionaries.active); + load_dictionary_stats_entry(dictionaries.deleted); + + if(c->st_dicts || total != 0) { + if (unlikely(!c->st_dicts)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.dictionaries", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.dictionaries", c->context_prefix); + + c->st_dicts = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionaries" + , "dictionaries" + , "netdata" + , "stats" + , c->priority + 0 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_dicts_active = rrddim_add(c->st_dicts, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_dicts_deleted = rrddim_add(c->st_dicts, "deleted", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_dicts->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_dicts, c->rd_dicts_active, (collected_number)stats.dictionaries.active); + rrddim_set_by_pointer(c->st_dicts, c->rd_dicts_deleted, (collected_number)stats.dictionaries.deleted); + rrdset_done(c->st_dicts); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(items.entries); + load_dictionary_stats_entry(items.referenced); + load_dictionary_stats_entry(items.pending_deletion); + + if(c->st_items || total != 0) { + if (unlikely(!c->st_items)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.items", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.items", c->context_prefix); + + c->st_items = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Items" + , "items" + , "netdata" + , "stats" + , c->priority + 1 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_items_entries = rrddim_add(c->st_items, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_items_pending_deletion = rrddim_add(c->st_items, "deleted", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_items_referenced = rrddim_add(c->st_items, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_items->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_items, c->rd_items_entries, stats.items.entries); + rrddim_set_by_pointer(c->st_items, c->rd_items_pending_deletion, stats.items.pending_deletion); + rrddim_set_by_pointer(c->st_items, c->rd_items_referenced, stats.items.referenced); + rrdset_done(c->st_items); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(ops.creations); + load_dictionary_stats_entry(ops.destructions); + load_dictionary_stats_entry(ops.flushes); + load_dictionary_stats_entry(ops.traversals); + load_dictionary_stats_entry(ops.walkthroughs); + load_dictionary_stats_entry(ops.garbage_collections); + load_dictionary_stats_entry(ops.searches); + load_dictionary_stats_entry(ops.inserts); + load_dictionary_stats_entry(ops.resets); + load_dictionary_stats_entry(ops.deletes); + + if(c->st_ops || total != 0) { + if (unlikely(!c->st_ops)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.ops", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.ops", c->context_prefix); + + c->st_ops = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Operations" + , "ops/s" + , "netdata" + , "stats" + , c->priority + 2 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_ops_creations = rrddim_add(c->st_ops, "creations", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_destructions = rrddim_add(c->st_ops, "destructions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_flushes = rrddim_add(c->st_ops, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_traversals = rrddim_add(c->st_ops, "traversals", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_walkthroughs = rrddim_add(c->st_ops, "walkthroughs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_garbage_collections = rrddim_add(c->st_ops, "garbage_collections", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_searches = rrddim_add(c->st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_inserts = rrddim_add(c->st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_resets = rrddim_add(c->st_ops, "resets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_deletes = rrddim_add(c->st_ops, "deletes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_ops->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_ops, c->rd_ops_creations, (collected_number)stats.ops.creations); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_destructions, (collected_number)stats.ops.destructions); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_flushes, (collected_number)stats.ops.flushes); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_traversals, (collected_number)stats.ops.traversals); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_walkthroughs, (collected_number)stats.ops.walkthroughs); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_garbage_collections, (collected_number)stats.ops.garbage_collections); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_searches, (collected_number)stats.ops.searches); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_inserts, (collected_number)stats.ops.inserts); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_resets, (collected_number)stats.ops.resets); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_deletes, (collected_number)stats.ops.deletes); + + rrdset_done(c->st_ops); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(callbacks.inserts); + load_dictionary_stats_entry(callbacks.conflicts); + load_dictionary_stats_entry(callbacks.reacts); + load_dictionary_stats_entry(callbacks.deletes); + + if(c->st_callbacks || total != 0) { + if (unlikely(!c->st_callbacks)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.callbacks", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.callbacks", c->context_prefix); + + c->st_callbacks = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Callbacks" + , "callbacks/s" + , "netdata" + , "stats" + , c->priority + 3 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_callbacks_inserts = rrddim_add(c->st_callbacks, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_deletes = rrddim_add(c->st_callbacks, "deletes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_conflicts = rrddim_add(c->st_callbacks, "conflicts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_reacts = rrddim_add(c->st_callbacks, "reacts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_callbacks->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_inserts, (collected_number)stats.callbacks.inserts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_conflicts, (collected_number)stats.callbacks.conflicts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_reacts, (collected_number)stats.callbacks.reacts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_deletes, (collected_number)stats.callbacks.deletes); + + rrdset_done(c->st_callbacks); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(memory.index); + load_dictionary_stats_entry(memory.values); + load_dictionary_stats_entry(memory.dict); + + if(c->st_memory || total != 0) { + if (unlikely(!c->st_memory)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.memory", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.memory", c->context_prefix); + + c->st_memory = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Memory" + , "bytes" + , "netdata" + , "stats" + , c->priority + 4 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + c->rd_memory_indexed = rrddim_add(c->st_memory, "index", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_memory_values = rrddim_add(c->st_memory, "data", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_memory_dict = rrddim_add(c->st_memory, "structures", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_memory->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_memory, c->rd_memory_indexed, (collected_number)stats.memory.index); + rrddim_set_by_pointer(c->st_memory, c->rd_memory_values, (collected_number)stats.memory.values); + rrddim_set_by_pointer(c->st_memory, c->rd_memory_dict, (collected_number)stats.memory.dict); + + rrdset_done(c->st_memory); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(spin_locks.use_spins); + load_dictionary_stats_entry(spin_locks.search_spins); + load_dictionary_stats_entry(spin_locks.insert_spins); + load_dictionary_stats_entry(spin_locks.delete_spins); + + if(c->st_spins || total != 0) { + if (unlikely(!c->st_spins)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.spins", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.spins", c->context_prefix); + + c->st_spins = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Spins" + , "count" + , "netdata" + , "stats" + , c->priority + 5 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_spins_use = rrddim_add(c->st_spins, "use", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_search = rrddim_add(c->st_spins, "search", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_insert = rrddim_add(c->st_spins, "insert", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_delete = rrddim_add(c->st_spins, "delete", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_spins->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_spins, c->rd_spins_use, (collected_number)stats.spin_locks.use_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_search, (collected_number)stats.spin_locks.search_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_insert, (collected_number)stats.spin_locks.insert_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_delete, (collected_number)stats.spin_locks.delete_spins); + + rrdset_done(c->st_spins); + } +} + +static void dictionary_statistics(void) { + for(int i = 0; dictionary_categories[i].stats ;i++) { + update_dictionary_category_charts(&dictionary_categories[i]); + } +} +#endif // DICT_WITH_STATS + +#ifdef NETDATA_TRACE_ALLOCATIONS + +struct memory_trace_data { + RRDSET *st_memory; + RRDSET *st_allocations; + RRDSET *st_avg_alloc; + RRDSET *st_ops; +}; + +static int do_memory_trace_item(void *item, void *data) { + struct memory_trace_data *tmp = data; + struct malloc_trace *p = item; + + // ------------------------------------------------------------------------ + + if(!p->rd_bytes) + p->rd_bytes = rrddim_add(tmp->st_memory, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number bytes = (collected_number)__atomic_load_n(&p->bytes, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_memory, p->rd_bytes, bytes); + + // ------------------------------------------------------------------------ + + if(!p->rd_allocations) + p->rd_allocations = rrddim_add(tmp->st_allocations, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number allocs = (collected_number)__atomic_load_n(&p->allocations, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_allocations, p->rd_allocations, allocs); + + // ------------------------------------------------------------------------ + + if(!p->rd_avg_alloc) + p->rd_avg_alloc = rrddim_add(tmp->st_avg_alloc, p->function, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + collected_number avg_alloc = (allocs)?(bytes * 100 / allocs):0; + rrddim_set_by_pointer(tmp->st_avg_alloc, p->rd_avg_alloc, avg_alloc); + + // ------------------------------------------------------------------------ + + if(!p->rd_ops) + p->rd_ops = rrddim_add(tmp->st_ops, p->function, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + collected_number ops = 0; + ops += (collected_number)__atomic_load_n(&p->malloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->calloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->realloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->strdup_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->free_calls, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_ops, p->rd_ops, ops); + + // ------------------------------------------------------------------------ + + return 1; +} +static void malloc_trace_statistics(void) { + static struct memory_trace_data tmp = { + .st_memory = NULL, + .st_allocations = NULL, + .st_avg_alloc = NULL, + .st_ops = NULL, + }; + + if(!tmp.st_memory) { + tmp.st_memory = rrdset_create_localhost( + "netdata" + , "memory_size" + , NULL + , "memory" + , "netdata.memory.size" + , "Netdata Memory Used by Function" + , "bytes" + , "netdata" + , "stats" + , 900000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + if(!tmp.st_ops) { + tmp.st_ops = rrdset_create_localhost( + "netdata" + , "memory_operations" + , NULL + , "memory" + , "netdata.memory.operations" + , "Netdata Memory Operations by Function" + , "ops/s" + , "netdata" + , "stats" + , 900001 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + + if(!tmp.st_allocations) { + tmp.st_allocations = rrdset_create_localhost( + "netdata" + , "memory_allocations" + , NULL + , "memory" + , "netdata.memory.allocations" + , "Netdata Memory Allocations by Function" + , "allocations" + , "netdata" + , "stats" + , 900002 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + if(!tmp.st_avg_alloc) { + tmp.st_avg_alloc = rrdset_create_localhost( + "netdata" + , "memory_avg_alloc" + , NULL + , "memory" + , "netdata.memory.avg_alloc" + , "Netdata Average Allocation Size by Function" + , "bytes" + , "netdata" + , "stats" + , 900003 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + + malloc_trace_walkthrough(do_memory_trace_item, &tmp); + + rrdset_done(tmp.st_memory); + rrdset_done(tmp.st_ops); + rrdset_done(tmp.st_allocations); + rrdset_done(tmp.st_avg_alloc); +} +#endif + +// --------------------------------------------------------------------------------------------------------------------- +// worker utilization + +#define WORKERS_MIN_PERCENT_DEFAULT 10000.0 + +struct worker_job_type_gs { + STRING *name; + STRING *units; + + size_t jobs_started; + usec_t busy_time; + + RRDDIM *rd_jobs_started; + RRDDIM *rd_busy_time; + + WORKER_METRIC_TYPE type; + NETDATA_DOUBLE min_value; + NETDATA_DOUBLE max_value; + NETDATA_DOUBLE sum_value; + size_t count_value; + + RRDSET *st; + RRDDIM *rd_min; + RRDDIM *rd_max; + RRDDIM *rd_avg; +}; + +struct worker_thread { + pid_t pid; + bool enabled; + + bool cpu_enabled; + double cpu; + + kernel_uint_t utime; + kernel_uint_t stime; + + kernel_uint_t utime_old; + kernel_uint_t stime_old; + + usec_t collected_time; + usec_t collected_time_old; + + size_t jobs_started; + usec_t busy_time; + + struct worker_thread *next; + struct worker_thread *prev; +}; + +struct worker_utilization { + const char *name; + const char *family; + size_t priority; + uint32_t flags; + + char *name_lowercase; + + struct worker_job_type_gs per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES]; + + size_t workers_max_job_id; + size_t workers_registered; + size_t workers_busy; + usec_t workers_total_busy_time; + usec_t workers_total_duration; + size_t workers_total_jobs_started; + double workers_min_busy_time; + double workers_max_busy_time; + + size_t workers_cpu_registered; + double workers_cpu_min; + double workers_cpu_max; + double workers_cpu_total; + + struct worker_thread *threads; + + RRDSET *st_workers_time; + RRDDIM *rd_workers_time_avg; + RRDDIM *rd_workers_time_min; + RRDDIM *rd_workers_time_max; + + RRDSET *st_workers_cpu; + RRDDIM *rd_workers_cpu_avg; + RRDDIM *rd_workers_cpu_min; + RRDDIM *rd_workers_cpu_max; + + RRDSET *st_workers_threads; + RRDDIM *rd_workers_threads_free; + RRDDIM *rd_workers_threads_busy; + + RRDSET *st_workers_jobs_per_job_type; + RRDSET *st_workers_busy_per_job_type; + + RRDDIM *rd_total_cpu_utilizaton; +}; + +static struct worker_utilization all_workers_utilization[] = { + { .name = "STATS", .family = "workers global statistics", .priority = 1000000 }, + { .name = "HEALTH", .family = "workers health alarms", .priority = 1000000 }, + { .name = "MLTRAIN", .family = "workers ML training", .priority = 1000000 }, + { .name = "MLDETECT", .family = "workers ML detection", .priority = 1000000 }, + { .name = "STREAMRCV", .family = "workers streaming receive", .priority = 1000000 }, + { .name = "STREAMSND", .family = "workers streaming send", .priority = 1000000 }, + { .name = "DBENGINE", .family = "workers dbengine instances", .priority = 1000000 }, + { .name = "LIBUV", .family = "workers libuv threadpool", .priority = 1000000 }, + { .name = "WEB", .family = "workers web server", .priority = 1000000 }, + { .name = "ACLKQUERY", .family = "workers aclk query", .priority = 1000000 }, + { .name = "ACLKSYNC", .family = "workers aclk host sync", .priority = 1000000 }, + { .name = "METASYNC", .family = "workers metadata sync", .priority = 1000000 }, + { .name = "PLUGINSD", .family = "workers plugins.d", .priority = 1000000 }, + { .name = "STATSD", .family = "workers plugin statsd", .priority = 1000000 }, + { .name = "STATSDFLUSH", .family = "workers plugin statsd flush", .priority = 1000000 }, + { .name = "PROC", .family = "workers plugin proc", .priority = 1000000 }, + { .name = "WIN", .family = "workers plugin windows", .priority = 1000000 }, + { .name = "NETDEV", .family = "workers plugin proc netdev", .priority = 1000000 }, + { .name = "FREEBSD", .family = "workers plugin freebsd", .priority = 1000000 }, + { .name = "MACOS", .family = "workers plugin macos", .priority = 1000000 }, + { .name = "CGROUPS", .family = "workers plugin cgroups", .priority = 1000000 }, + { .name = "CGROUPSDISC", .family = "workers plugin cgroups find", .priority = 1000000 }, + { .name = "DISKSPACE", .family = "workers plugin diskspace", .priority = 1000000 }, + { .name = "TC", .family = "workers plugin tc", .priority = 1000000 }, + { .name = "TIMEX", .family = "workers plugin timex", .priority = 1000000 }, + { .name = "IDLEJITTER", .family = "workers plugin idlejitter", .priority = 1000000 }, + { .name = "LOGSMANAGPLG",.family = "workers plugin logs management", .priority = 1000000 }, + { .name = "RRDCONTEXT", .family = "workers contexts", .priority = 1000000 }, + { .name = "REPLICATION", .family = "workers replication sender", .priority = 1000000 }, + { .name = "SERVICE", .family = "workers service", .priority = 1000000 }, + { .name = "PROFILER", .family = "workers profile", .priority = 1000000 }, + + // has to be terminated with a NULL + { .name = NULL, .family = NULL } +}; + +static void workers_total_cpu_utilization_chart(void) { + size_t i, cpu_enabled = 0; + for(i = 0; all_workers_utilization[i].name ;i++) + if(all_workers_utilization[i].workers_cpu_registered) cpu_enabled++; + + if(!cpu_enabled) return; + + static RRDSET *st = NULL; + + if(!st) { + st = rrdset_create_localhost( + "netdata", + "workers_cpu", + NULL, + "workers", + "netdata.workers.cpu_total", + "Netdata Workers CPU Utilization (100% = 1 core)", + "%", + "netdata", + "stats", + 999000, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + } + + for(i = 0; all_workers_utilization[i].name ;i++) { + struct worker_utilization *wu = &all_workers_utilization[i]; + if(!wu->workers_cpu_registered) continue; + + if(!wu->rd_total_cpu_utilizaton) + wu->rd_total_cpu_utilizaton = rrddim_add(st, wu->name_lowercase, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st, wu->rd_total_cpu_utilizaton, (collected_number)((double)wu->workers_cpu_total * 100.0)); + } + + rrdset_done(st); +} + +#define WORKER_CHART_DECIMAL_PRECISION 100 + +static void workers_utilization_update_chart(struct worker_utilization *wu) { + if(!wu->workers_registered) return; + + //fprintf(stderr, "%-12s WORKER UTILIZATION: %-3.2f%%, %zu jobs done, %zu running, on %zu workers, min %-3.02f%%, max %-3.02f%%.\n", + // wu->name, + // (double)wu->workers_total_busy_time * 100.0 / (double)wu->workers_total_duration, + // wu->workers_total_jobs_started, wu->workers_busy, wu->workers_registered, + // wu->workers_min_busy_time, wu->workers_max_busy_time); + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_time)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_time_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.time", wu->name_lowercase); + + wu->st_workers_time = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Busy Time (100% = all workers busy)" + , "%" + , "netdata" + , "stats" + , wu->priority + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + } + + // we add the min and max dimensions only when we have multiple workers + + if(unlikely(!wu->rd_workers_time_min && wu->workers_registered > 1)) + wu->rd_workers_time_min = rrddim_add(wu->st_workers_time, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_time_max && wu->workers_registered > 1)) + wu->rd_workers_time_max = rrddim_add(wu->st_workers_time, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_time_avg)) + wu->rd_workers_time_avg = rrddim_add(wu->st_workers_time, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(wu->workers_min_busy_time == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_min_busy_time = 0.0; + + if(wu->rd_workers_time_min) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_min, (collected_number)((double)wu->workers_min_busy_time * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->rd_workers_time_max) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_max, (collected_number)((double)wu->workers_max_busy_time * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->workers_total_duration == 0) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, 0); + else + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, (collected_number)((double)wu->workers_total_busy_time * 100.0 * WORKER_CHART_DECIMAL_PRECISION / (double)wu->workers_total_duration)); + + rrdset_done(wu->st_workers_time); + + // ---------------------------------------------------------------------- + +#ifdef __linux__ + if(wu->workers_cpu_registered || wu->st_workers_cpu) { + if(unlikely(!wu->st_workers_cpu)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_cpu_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.cpu", wu->name_lowercase); + + wu->st_workers_cpu = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers CPU Utilization (100% = all workers busy)" + , "%" + , "netdata" + , "stats" + , wu->priority + 1 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + } + + if (unlikely(!wu->rd_workers_cpu_min && wu->workers_registered > 1)) + wu->rd_workers_cpu_min = rrddim_add(wu->st_workers_cpu, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if (unlikely(!wu->rd_workers_cpu_max && wu->workers_registered > 1)) + wu->rd_workers_cpu_max = rrddim_add(wu->st_workers_cpu, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_cpu_avg)) + wu->rd_workers_cpu_avg = rrddim_add(wu->st_workers_cpu, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(wu->workers_cpu_min == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_cpu_min = 0.0; + + if(wu->rd_workers_cpu_min) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_min, (collected_number)(wu->workers_cpu_min * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->rd_workers_cpu_max) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_max, (collected_number)(wu->workers_cpu_max * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->workers_cpu_registered == 0) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, 0); + else + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, (collected_number)( wu->workers_cpu_total * WORKER_CHART_DECIMAL_PRECISION / (NETDATA_DOUBLE)wu->workers_cpu_registered )); + + rrdset_done(wu->st_workers_cpu); + } +#endif + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_jobs_per_job_type)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_jobs_by_type_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.jobs_started_by_type", wu->name_lowercase); + + wu->st_workers_jobs_per_job_type = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Jobs Started by Type" + , "jobs" + , "netdata" + , "stats" + , wu->priority + 2 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + { + size_t i; + for(i = 0; i <= wu->workers_max_job_id ;i++) { + if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) + continue; + + if (wu->per_job_type[i].name) { + + if(unlikely(!wu->per_job_type[i].rd_jobs_started)) + wu->per_job_type[i].rd_jobs_started = rrddim_add(wu->st_workers_jobs_per_job_type, string2str(wu->per_job_type[i].name), NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(wu->st_workers_jobs_per_job_type, wu->per_job_type[i].rd_jobs_started, (collected_number)(wu->per_job_type[i].jobs_started)); + } + } + } + + rrdset_done(wu->st_workers_jobs_per_job_type); + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_busy_per_job_type)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_busy_time_by_type_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.time_by_type", wu->name_lowercase); + + wu->st_workers_busy_per_job_type = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Busy Time by Type" + , "ms" + , "netdata" + , "stats" + , wu->priority + 3 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + { + size_t i; + for(i = 0; i <= wu->workers_max_job_id ;i++) { + if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) + continue; + + if (wu->per_job_type[i].name) { + + if(unlikely(!wu->per_job_type[i].rd_busy_time)) + wu->per_job_type[i].rd_busy_time = rrddim_add(wu->st_workers_busy_per_job_type, string2str(wu->per_job_type[i].name), NULL, 1, USEC_PER_MS, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(wu->st_workers_busy_per_job_type, wu->per_job_type[i].rd_busy_time, (collected_number)(wu->per_job_type[i].busy_time)); + } + } + } + + rrdset_done(wu->st_workers_busy_per_job_type); + + // ---------------------------------------------------------------------- + + if(wu->st_workers_threads || wu->workers_registered > 1) { + if(unlikely(!wu->st_workers_threads)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_threads_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.threads", wu->name_lowercase); + + wu->st_workers_threads = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Threads" + , "threads" + , "netdata" + , "stats" + , wu->priority + 4 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + wu->rd_workers_threads_free = rrddim_add(wu->st_workers_threads, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + wu->rd_workers_threads_busy = rrddim_add(wu->st_workers_threads, "busy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_free, (collected_number)(wu->workers_registered - wu->workers_busy)); + rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_busy, (collected_number)(wu->workers_busy)); + rrdset_done(wu->st_workers_threads); + } + + // ---------------------------------------------------------------------- + // custom metric types WORKER_METRIC_ABSOLUTE + + { + size_t i; + for (i = 0; i <= wu->workers_max_job_id; i++) { + if(wu->per_job_type[i].type != WORKER_METRIC_ABSOLUTE) + continue; + + if(!wu->per_job_type[i].count_value) + continue; + + if(!wu->per_job_type[i].st) { + size_t job_name_len = string_strlen(wu->per_job_type[i].name); + if(job_name_len > RRD_ID_LENGTH_MAX) job_name_len = RRD_ID_LENGTH_MAX; + + char job_name_sanitized[job_name_len + 1]; + rrdset_strncpyz_name(job_name_sanitized, string2str(wu->per_job_type[i].name), job_name_len); + + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_%s_value_%s", wu->name_lowercase, job_name_sanitized); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.value.%s", wu->name_lowercase, job_name_sanitized); + + char title[1000 + 1]; + snprintf(title, 1000, "Netdata Workers %s value of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + + wu->per_job_type[i].st = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , title + , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"value" + , "netdata" + , "stats" + , wu->priority + 5 + i + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + wu->per_job_type[i].rd_min = rrddim_add(wu->per_job_type[i].st, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_max = rrddim_add(wu->per_job_type[i].st, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_avg = rrddim_add(wu->per_job_type[i].st, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_min, (collected_number)(wu->per_job_type[i].min_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_max, (collected_number)(wu->per_job_type[i].max_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_avg, (collected_number)(wu->per_job_type[i].sum_value / wu->per_job_type[i].count_value * WORKER_CHART_DECIMAL_PRECISION)); + + rrdset_done(wu->per_job_type[i].st); + } + } + + // ---------------------------------------------------------------------- + // custom metric types WORKER_METRIC_INCREMENTAL + + { + size_t i; + for (i = 0; i <= wu->workers_max_job_id ; i++) { + if(wu->per_job_type[i].type != WORKER_METRIC_INCREMENT && wu->per_job_type[i].type != WORKER_METRIC_INCREMENTAL_TOTAL) + continue; + + if(!wu->per_job_type[i].count_value) + continue; + + if(!wu->per_job_type[i].st) { + size_t job_name_len = string_strlen(wu->per_job_type[i].name); + if(job_name_len > RRD_ID_LENGTH_MAX) job_name_len = RRD_ID_LENGTH_MAX; + + char job_name_sanitized[job_name_len + 1]; + rrdset_strncpyz_name(job_name_sanitized, string2str(wu->per_job_type[i].name), job_name_len); + + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_%s_rate_%s", wu->name_lowercase, job_name_sanitized); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.rate.%s", wu->name_lowercase, job_name_sanitized); + + char title[1000 + 1]; + snprintf(title, 1000, "Netdata Workers %s rate of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + + wu->per_job_type[i].st = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , title + , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"rate" + , "netdata" + , "stats" + , wu->priority + 5 + i + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + wu->per_job_type[i].rd_min = rrddim_add(wu->per_job_type[i].st, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_max = rrddim_add(wu->per_job_type[i].st, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_avg = rrddim_add(wu->per_job_type[i].st, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_min, (collected_number)(wu->per_job_type[i].min_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_max, (collected_number)(wu->per_job_type[i].max_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_avg, (collected_number)(wu->per_job_type[i].sum_value / wu->per_job_type[i].count_value * WORKER_CHART_DECIMAL_PRECISION)); + + rrdset_done(wu->per_job_type[i].st); + } + } +} + +static void workers_utilization_reset_statistics(struct worker_utilization *wu) { + wu->workers_registered = 0; + wu->workers_busy = 0; + wu->workers_total_busy_time = 0; + wu->workers_total_duration = 0; + wu->workers_total_jobs_started = 0; + wu->workers_min_busy_time = WORKERS_MIN_PERCENT_DEFAULT; + wu->workers_max_busy_time = 0; + + wu->workers_cpu_registered = 0; + wu->workers_cpu_min = WORKERS_MIN_PERCENT_DEFAULT; + wu->workers_cpu_max = 0; + wu->workers_cpu_total = 0; + + size_t i; + for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + if(unlikely(!wu->name_lowercase)) { + wu->name_lowercase = strdupz(wu->name); + char *s = wu->name_lowercase; + for( ; *s ; s++) *s = tolower(*s); + } + + wu->per_job_type[i].jobs_started = 0; + wu->per_job_type[i].busy_time = 0; + + wu->per_job_type[i].min_value = NAN; + wu->per_job_type[i].max_value = NAN; + wu->per_job_type[i].sum_value = NAN; + wu->per_job_type[i].count_value = 0; + } + + struct worker_thread *wt; + for(wt = wu->threads; wt ; wt = wt->next) { + wt->enabled = false; + wt->cpu_enabled = false; + } +} + +#define TASK_STAT_PREFIX "/proc/self/task/" +#define TASK_STAT_SUFFIX "/stat" + +static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_uint_t *utime __maybe_unused, kernel_uint_t *stime __maybe_unused) { +#ifdef __linux__ + static char filename[sizeof(TASK_STAT_PREFIX) + sizeof(TASK_STAT_SUFFIX) + 20] = TASK_STAT_PREFIX; + static size_t start_pos = sizeof(TASK_STAT_PREFIX) - 1; + static procfile *ff = NULL; + + // construct the filename + size_t end_pos = snprintfz(&filename[start_pos], 20, "%d", pid); + strcpy(&filename[start_pos + end_pos], TASK_STAT_SUFFIX); + + // (re)open the procfile to the new filename + bool set_quotes = (ff == NULL) ? true : false; + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_ERROR_ON_ERROR_LOG); + if(unlikely(!ff)) return -1; + + if(set_quotes) + procfile_set_open_close(ff, "(", ")"); + + // read the entire file and split it to lines and words + ff = procfile_readall(ff); + if(unlikely(!ff)) return -1; + + // parse the numbers we are interested + *utime = str2kernel_uint_t(procfile_lineword(ff, 0, 13)); + *stime = str2kernel_uint_t(procfile_lineword(ff, 0, 14)); + + // leave the file open for the next iteration + + return 0; +#else + // TODO: add here cpu time detection per thread, for FreeBSD and MacOS + *utime = 0; + *stime = 0; + return 1; +#endif +} + +static Pvoid_t workers_by_pid_JudyL_array = NULL; + +static void workers_threads_cleanup(struct worker_utilization *wu) { + struct worker_thread *t = wu->threads; + while(t) { + struct worker_thread *next = t->next; + + if(!t->enabled) { + JudyLDel(&workers_by_pid_JudyL_array, t->pid, PJE0); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wu->threads, t, prev, next); + freez(t); + } + t = next; + } + } + +static struct worker_thread *worker_thread_find(struct worker_utilization *wu __maybe_unused, pid_t pid) { + struct worker_thread *wt = NULL; + + Pvoid_t *PValue = JudyLGet(workers_by_pid_JudyL_array, pid, PJE0); + if(PValue) + wt = *PValue; + + return wt; +} + +static struct worker_thread *worker_thread_create(struct worker_utilization *wu, pid_t pid) { + struct worker_thread *wt; + + wt = (struct worker_thread *)callocz(1, sizeof(struct worker_thread)); + wt->pid = pid; + + Pvoid_t *PValue = JudyLIns(&workers_by_pid_JudyL_array, pid, PJE0); + *PValue = wt; + + // link it + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(wu->threads, wt, prev, next); + + return wt; +} + +static struct worker_thread *worker_thread_find_or_create(struct worker_utilization *wu, pid_t pid) { + struct worker_thread *wt; + wt = worker_thread_find(wu, pid); + if(!wt) wt = worker_thread_create(wu, pid); + + return wt; +} + +static void worker_utilization_charts_callback(void *ptr + , pid_t pid __maybe_unused + , const char *thread_tag __maybe_unused + , size_t max_job_id __maybe_unused + , size_t utilization_usec __maybe_unused + , size_t duration_usec __maybe_unused + , size_t jobs_started __maybe_unused + , size_t is_running __maybe_unused + , STRING **job_types_names __maybe_unused + , STRING **job_types_units __maybe_unused + , WORKER_METRIC_TYPE *job_types_metric_types __maybe_unused + , size_t *job_types_jobs_started __maybe_unused + , usec_t *job_types_busy_time __maybe_unused + , NETDATA_DOUBLE *job_types_custom_metrics __maybe_unused + ) { + struct worker_utilization *wu = (struct worker_utilization *)ptr; + + // find the worker_thread in the list + struct worker_thread *wt = worker_thread_find_or_create(wu, pid); + + if(utilization_usec > duration_usec) + utilization_usec = duration_usec; + + wt->enabled = true; + wt->busy_time = utilization_usec; + wt->jobs_started = jobs_started; + + wt->utime_old = wt->utime; + wt->stime_old = wt->stime; + wt->collected_time_old = wt->collected_time; + + if(max_job_id > wu->workers_max_job_id) + wu->workers_max_job_id = max_job_id; + + wu->workers_total_busy_time += utilization_usec; + wu->workers_total_duration += duration_usec; + wu->workers_total_jobs_started += jobs_started; + wu->workers_busy += is_running; + wu->workers_registered++; + + double util = (double)utilization_usec * 100.0 / (double)duration_usec; + if(util > wu->workers_max_busy_time) + wu->workers_max_busy_time = util; + + if(util < wu->workers_min_busy_time) + wu->workers_min_busy_time = util; + + // accumulate per job type statistics + size_t i; + for(i = 0; i <= max_job_id ;i++) { + if(!wu->per_job_type[i].name && job_types_names[i]) + wu->per_job_type[i].name = string_dup(job_types_names[i]); + + if(!wu->per_job_type[i].units && job_types_units[i]) + wu->per_job_type[i].units = string_dup(job_types_units[i]); + + wu->per_job_type[i].type = job_types_metric_types[i]; + + wu->per_job_type[i].jobs_started += job_types_jobs_started[i]; + wu->per_job_type[i].busy_time += job_types_busy_time[i]; + + NETDATA_DOUBLE value = job_types_custom_metrics[i]; + if(netdata_double_isnumber(value)) { + if(!wu->per_job_type[i].count_value) { + wu->per_job_type[i].count_value = 1; + wu->per_job_type[i].min_value = value; + wu->per_job_type[i].max_value = value; + wu->per_job_type[i].sum_value = value; + } + else { + wu->per_job_type[i].count_value++; + wu->per_job_type[i].sum_value += value; + if(value < wu->per_job_type[i].min_value) wu->per_job_type[i].min_value = value; + if(value > wu->per_job_type[i].max_value) wu->per_job_type[i].max_value = value; + } + } + } + + // find its CPU utilization + if((!read_thread_cpu_time_from_proc_stat(pid, &wt->utime, &wt->stime))) { + wt->collected_time = now_realtime_usec(); + usec_t delta = wt->collected_time - wt->collected_time_old; + + double utime = (double)(wt->utime - wt->utime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; + double stime = (double)(wt->stime - wt->stime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; + double cpu = utime + stime; + wt->cpu = cpu; + wt->cpu_enabled = true; + + wu->workers_cpu_total += cpu; + if(cpu < wu->workers_cpu_min) wu->workers_cpu_min = cpu; + if(cpu > wu->workers_cpu_max) wu->workers_cpu_max = cpu; + } + wu->workers_cpu_registered += (wt->cpu_enabled) ? 1 : 0; +} + +static void worker_utilization_charts(void) { + static size_t iterations = 0; + iterations++; + + for(int i = 0; all_workers_utilization[i].name ;i++) { + workers_utilization_reset_statistics(&all_workers_utilization[i]); + + workers_foreach(all_workers_utilization[i].name, worker_utilization_charts_callback, &all_workers_utilization[i]); + + // skip the first iteration, so that we don't accumulate startup utilization to our charts + if(likely(iterations > 1)) + workers_utilization_update_chart(&all_workers_utilization[i]); + + workers_threads_cleanup(&all_workers_utilization[i]); + } + + workers_total_cpu_utilization_chart(); +} + +static void worker_utilization_finish(void) { + int i, j; + for(i = 0; all_workers_utilization[i].name ;i++) { + struct worker_utilization *wu = &all_workers_utilization[i]; + + if(wu->name_lowercase) { + freez(wu->name_lowercase); + wu->name_lowercase = NULL; + } + + for(j = 0; j < WORKER_UTILIZATION_MAX_JOB_TYPES ;j++) { + string_freez(wu->per_job_type[j].name); + wu->per_job_type[j].name = NULL; + + string_freez(wu->per_job_type[j].units); + wu->per_job_type[j].units = NULL; + } + + // mark all threads as not enabled + struct worker_thread *t; + for(t = wu->threads; t ; t = t->next) + t->enabled = false; + + // let the cleanup job free them + workers_threads_cleanup(wu); + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// global statistics thread + + +static void global_statistics_register_workers(void) { + worker_register("STATS"); + worker_register_job_name(WORKER_JOB_GLOBAL, "global"); + worker_register_job_name(WORKER_JOB_REGISTRY, "registry"); + worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine"); + worker_register_job_name(WORKER_JOB_STRINGS, "strings"); + worker_register_job_name(WORKER_JOB_DICTIONARIES, "dictionaries"); + worker_register_job_name(WORKER_JOB_MALLOC_TRACE, "malloc_trace"); + worker_register_job_name(WORKER_JOB_WORKERS, "workers"); + worker_register_job_name(WORKER_JOB_SQLITE3, "sqlite3"); +} + +static void global_statistics_cleanup(void *pptr) +{ + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + worker_unregister(); + netdata_log_info("cleaning up..."); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *global_statistics_main(void *ptr) +{ + CLEANUP_FUNCTION_REGISTER(global_statistics_cleanup) cleanup_ptr = ptr; + + global_statistics_register_workers(); + + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + // keep the randomness at zero + // to make sure we are not close to any other thread + hb.randomness = 0; + + while (service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_JOB_GLOBAL); + global_statistics_charts(); + + worker_is_busy(WORKER_JOB_REGISTRY); + registry_statistics(); + +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + worker_is_busy(WORKER_JOB_DBENGINE); + dbengine2_statistics_charts(); + } +#endif + + worker_is_busy(WORKER_JOB_HEARTBEAT); + update_heartbeat_charts(); + + worker_is_busy(WORKER_JOB_STRINGS); + update_strings_charts(); + +#ifdef DICT_WITH_STATS + worker_is_busy(WORKER_JOB_DICTIONARIES); + dictionary_statistics(); +#endif + +#ifdef NETDATA_TRACE_ALLOCATIONS + worker_is_busy(WORKER_JOB_MALLOC_TRACE); + malloc_trace_statistics(); +#endif + } + + return NULL; +} + + +// --------------------------------------------------------------------------------------------------------------------- +// workers thread + +static void global_statistics_workers_cleanup(void *pptr) +{ + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + netdata_log_info("cleaning up..."); + + worker_unregister(); + worker_utilization_finish(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *global_statistics_workers_main(void *ptr) +{ + CLEANUP_FUNCTION_REGISTER(global_statistics_workers_cleanup) cleanup_ptr = ptr; + + global_statistics_register_workers(); + + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_JOB_WORKERS); + worker_utilization_charts(); + } + + return NULL; +} + +// --------------------------------------------------------------------------------------------------------------------- +// sqlite3 thread + +static void global_statistics_sqlite3_cleanup(void *pptr) +{ + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + worker_unregister(); + netdata_log_info("cleaning up..."); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *global_statistics_sqlite3_main(void *ptr) +{ + CLEANUP_FUNCTION_REGISTER(global_statistics_sqlite3_cleanup) cleanup_ptr = ptr; + + global_statistics_register_workers(); + + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_JOB_SQLITE3); + sqlite3_statistics_charts(); + } + + return NULL; +} + diff --git a/src/daemon/global_statistics.h b/src/daemon/global_statistics.h new file mode 100644 index 000000000..44717c6cf --- /dev/null +++ b/src/daemon/global_statistics.h @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_GLOBAL_STATISTICS_H +#define NETDATA_GLOBAL_STATISTICS_H 1 + +#include "database/rrd.h" + +extern struct netdata_buffers_statistics { + size_t rrdhost_allocations_size; + size_t rrdhost_senders; + size_t rrdhost_receivers; + size_t query_targets_size; + size_t rrdset_done_rda_size; + size_t buffers_aclk; + size_t buffers_api; + size_t buffers_functions; + size_t buffers_sqlite; + size_t buffers_exporters; + size_t buffers_health; + size_t buffers_streaming; + size_t cbuffers_streaming; + size_t buffers_web; +} netdata_buffers_statistics; + +extern struct dictionary_stats dictionary_stats_category_collectors; +extern struct dictionary_stats dictionary_stats_category_rrdhost; +extern struct dictionary_stats dictionary_stats_category_rrdset_rrddim; +extern struct dictionary_stats dictionary_stats_category_rrdcontext; +extern struct dictionary_stats dictionary_stats_category_rrdlabels; +extern struct dictionary_stats dictionary_stats_category_rrdhealth; +extern struct dictionary_stats dictionary_stats_category_functions; +extern struct dictionary_stats dictionary_stats_category_replication; + +extern size_t rrddim_db_memory_size; + +// ---------------------------------------------------------------------------- +// global statistics + +void global_statistics_ml_query_completed(size_t points_read); +void global_statistics_ml_models_consulted(size_t models_consulted); +void global_statistics_exporters_query_completed(size_t points_read); +void global_statistics_backfill_query_completed(size_t points_read); +void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source); +void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked); +void global_statistics_sqlite3_row_completed(void); +void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array); + +void global_statistics_gorilla_buffer_add_hot(); + +void global_statistics_tier0_disk_compressed_bytes(uint32_t size); +void global_statistics_tier0_disk_uncompressed_bytes(uint32_t size); + +void global_statistics_web_request_completed(uint64_t dt, + uint64_t bytes_received, + uint64_t bytes_sent, + uint64_t content_size, + uint64_t compressed_content_size); + +uint64_t global_statistics_web_client_connected(void); +void global_statistics_web_client_disconnected(void); + +extern bool global_statistics_enabled; + +#endif /* NETDATA_GLOBAL_STATISTICS_H */ diff --git a/src/daemon/main.c b/src/daemon/main.c new file mode 100644 index 000000000..e2db02097 --- /dev/null +++ b/src/daemon/main.c @@ -0,0 +1,2370 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include "buildinfo.h" +#include "daemon/watcher.h" +#include "static_threads.h" + +#include "database/engine/page_test.h" + +#ifdef ENABLE_SENTRY +#include "sentry-native/sentry-native.h" +#endif + +#if defined(ENV32BIT) +#warning COMPILING 32BIT NETDATA +#endif + +bool unittest_running = false; +int netdata_anonymous_statistics_enabled; + +int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; +bool ieee754_doubles = false; +time_t netdata_start_time = 0; +struct netdata_static_thread *static_threads; +bool i_am_the_spawn_server = false; + +struct config netdata_config = { + .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { + .avl_tree = { + .root = NULL, + .compar = appconfig_section_compare + }, + .rwlock = AVL_LOCK_INITIALIZER + } +}; + +typedef struct service_thread { + pid_t tid; + SERVICE_THREAD_TYPE type; + SERVICE_TYPE services; + char name[ND_THREAD_TAG_MAX + 1]; + bool stop_immediately; + bool cancelled; + + union { + ND_THREAD *netdata_thread; + uv_thread_t uv_thread; + }; + + force_quit_t force_quit_callback; + request_quit_t request_quit_callback; + void *data; +} SERVICE_THREAD; + +struct service_globals { + SPINLOCK lock; + Pvoid_t pid_judy; +} service_globals = { + .pid_judy = NULL, +}; + +SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused) { + SERVICE_THREAD *sth = NULL; + pid_t tid = gettid_cached(); + + spinlock_lock(&service_globals.lock); + Pvoid_t *PValue = JudyLIns(&service_globals.pid_judy, tid, PJE0); + if(!*PValue) { + sth = callocz(1, sizeof(SERVICE_THREAD)); + sth->tid = tid; + sth->type = thread_type; + sth->request_quit_callback = request_quit_callback; + sth->force_quit_callback = force_quit_callback; + sth->data = data; + *PValue = sth; + + switch(thread_type) { + default: + case SERVICE_THREAD_TYPE_NETDATA: + sth->netdata_thread = nd_thread_self(); + break; + + case SERVICE_THREAD_TYPE_EVENT_LOOP: + case SERVICE_THREAD_TYPE_LIBUV: + sth->uv_thread = uv_thread_self(); + break; + } + + const char *name = nd_thread_tag(); + if(!name) name = ""; + strncpyz(sth->name, name, sizeof(sth->name) - 1); + } + else { + sth = *PValue; + } + spinlock_unlock(&service_globals.lock); + + return sth; +} + +void service_exits(void) { + pid_t tid = gettid_cached(); + + spinlock_lock(&service_globals.lock); + Pvoid_t *PValue = JudyLGet(service_globals.pid_judy, tid, PJE0); + if(PValue) { + freez(*PValue); + JudyLDel(&service_globals.pid_judy, tid, PJE0); + } + spinlock_unlock(&service_globals.lock); +} + +bool service_running(SERVICE_TYPE service) { + static __thread SERVICE_THREAD *sth = NULL; + + if(unlikely(!sth)) + sth = service_register(SERVICE_THREAD_TYPE_NETDATA, NULL, NULL, NULL, false); + + sth->services |= service; + + bool cancelled = false; + if (sth->type == SERVICE_THREAD_TYPE_NETDATA) + cancelled = nd_thread_signaled_to_cancel(); + + return !sth->stop_immediately && !netdata_exit && !cancelled; +} + +void service_signal_exit(SERVICE_TYPE service) { + spinlock_lock(&service_globals.lock); + + Pvoid_t *PValue; + Word_t tid = 0; + bool first = true; + while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) { + SERVICE_THREAD *sth = *PValue; + + if((sth->services & service)) { + sth->stop_immediately = true; + + switch(sth->type) { + default: + case SERVICE_THREAD_TYPE_NETDATA: + nd_thread_signal_cancel(sth->netdata_thread); + break; + + case SERVICE_THREAD_TYPE_EVENT_LOOP: + case SERVICE_THREAD_TYPE_LIBUV: + break; + } + + if(sth->request_quit_callback) { + spinlock_unlock(&service_globals.lock); + sth->request_quit_callback(sth->data); + spinlock_lock(&service_globals.lock); + } + } + } + + spinlock_unlock(&service_globals.lock); +} + +static void service_to_buffer(BUFFER *wb, SERVICE_TYPE service) { + if(service & SERVICE_MAINTENANCE) + buffer_strcat(wb, "MAINTENANCE "); + if(service & SERVICE_COLLECTORS) + buffer_strcat(wb, "COLLECTORS "); + if(service & SERVICE_REPLICATION) + buffer_strcat(wb, "REPLICATION "); + if(service & ABILITY_DATA_QUERIES) + buffer_strcat(wb, "DATA_QUERIES "); + if(service & ABILITY_WEB_REQUESTS) + buffer_strcat(wb, "WEB_REQUESTS "); + if(service & SERVICE_WEB_SERVER) + buffer_strcat(wb, "WEB_SERVER "); + if(service & SERVICE_ACLK) + buffer_strcat(wb, "ACLK "); + if(service & SERVICE_HEALTH) + buffer_strcat(wb, "HEALTH "); + if(service & SERVICE_STREAMING) + buffer_strcat(wb, "STREAMING "); + if(service & ABILITY_STREAMING_CONNECTIONS) + buffer_strcat(wb, "STREAMING_CONNECTIONS "); + if(service & SERVICE_CONTEXT) + buffer_strcat(wb, "CONTEXT "); + if(service & SERVICE_ANALYTICS) + buffer_strcat(wb, "ANALYTICS "); + if(service & SERVICE_EXPORTERS) + buffer_strcat(wb, "EXPORTERS "); + if(service & SERVICE_HTTPD) + buffer_strcat(wb, "HTTPD "); +} + +static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) { + BUFFER *service_list = buffer_create(1024, NULL); + BUFFER *thread_list = buffer_create(1024, NULL); + usec_t started_ut = now_monotonic_usec(), ended_ut; + size_t running; + SERVICE_TYPE running_services = 0; + + // cancel the threads + running = 0; + running_services = 0; + { + buffer_flush(thread_list); + + spinlock_lock(&service_globals.lock); + + Pvoid_t *PValue; + Word_t tid = 0; + bool first = true; + while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) { + SERVICE_THREAD *sth = *PValue; + if(sth->services & service && sth->tid != gettid_cached() && !sth->cancelled) { + sth->cancelled = true; + + switch(sth->type) { + default: + case SERVICE_THREAD_TYPE_NETDATA: + nd_thread_signal_cancel(sth->netdata_thread); + break; + + case SERVICE_THREAD_TYPE_EVENT_LOOP: + case SERVICE_THREAD_TYPE_LIBUV: + break; + } + + if(running) + buffer_strcat(thread_list, ", "); + + buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid); + + running++; + running_services |= sth->services & service; + + if(sth->force_quit_callback) { + spinlock_unlock(&service_globals.lock); + sth->force_quit_callback(sth->data); + spinlock_lock(&service_globals.lock); + continue; + } + } + } + + spinlock_unlock(&service_globals.lock); + } + + service_signal_exit(service); + + // signal them to stop + size_t last_running = 0; + size_t stale_time_ut = 0; + usec_t sleep_ut = 50 * USEC_PER_MS; + size_t log_countdown_ut = sleep_ut; + do { + if(running != last_running) + stale_time_ut = 0; + + last_running = running; + running = 0; + running_services = 0; + buffer_flush(thread_list); + + spinlock_lock(&service_globals.lock); + + Pvoid_t *PValue; + Word_t tid = 0; + bool first = true; + while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) { + SERVICE_THREAD *sth = *PValue; + if(sth->services & service && sth->tid != gettid_cached()) { + if(running) + buffer_strcat(thread_list, ", "); + + buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid); + + running_services |= sth->services & service; + running++; + } + } + + spinlock_unlock(&service_globals.lock); + + if(running) { + log_countdown_ut -= (log_countdown_ut >= sleep_ut) ? sleep_ut : log_countdown_ut; + if(log_countdown_ut == 0 || running != last_running) { + log_countdown_ut = 20 * sleep_ut; + + buffer_flush(service_list); + service_to_buffer(service_list, running_services); + netdata_log_info("SERVICE CONTROL: waiting for the following %zu services [ %s] to exit: %s", + running, buffer_tostring(service_list), + running <= 10 ? buffer_tostring(thread_list) : ""); + } + + sleep_usec(sleep_ut); + stale_time_ut += sleep_ut; + } + + ended_ut = now_monotonic_usec(); + } while(running && (ended_ut - started_ut < timeout_ut || stale_time_ut < timeout_ut)); + + if(running) { + buffer_flush(service_list); + service_to_buffer(service_list, running_services); + netdata_log_info("SERVICE CONTROL: " + "the following %zu service(s) [ %s] take too long to exit: %s; " + "giving up on them...", + running, buffer_tostring(service_list), + buffer_tostring(thread_list)); + } + + buffer_free(thread_list); + buffer_free(service_list); + + return (running == 0); +} + +void web_client_cache_destroy(void); + +void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) { + if (i_am_the_spawn_server) + exit(ret); + + watcher_shutdown_begin(); + + nd_log_limits_unlimited(); + netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret); + + // send the stat from our caller + analytics_statistic_t statistic = { action, action_result, action_data }; + analytics_statistic_send(&statistic); + + // notify we are exiting + statistic = (analytics_statistic_t) {"EXIT", ret?"ERROR":"OK","-"}; + analytics_statistic_send(&statistic); + + char agent_crash_file[FILENAME_MAX + 1]; + char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; + snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); + snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); + (void) rename(agent_crash_file, agent_incomplete_shutdown_file); + watcher_step_complete(WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE); + +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_exit_mode(multidb_ctx[tier]); + } +#endif + watcher_step_complete(WATCHER_STEP_ID_DBENGINE_EXIT_MODE); + + webrtc_close_all_connections(); + watcher_step_complete(WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS); + + service_signal_exit(SERVICE_MAINTENANCE | ABILITY_DATA_QUERIES | ABILITY_WEB_REQUESTS | + ABILITY_STREAMING_CONNECTIONS | SERVICE_ACLK | SERVICE_ACLKSYNC); + watcher_step_complete(WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK); + + service_wait_exit(SERVICE_MAINTENANCE, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD); + + service_wait_exit(SERVICE_EXPORTERS | SERVICE_HEALTH | SERVICE_WEB_SERVER | SERVICE_HTTPD, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS); + + service_wait_exit(SERVICE_COLLECTORS | SERVICE_STREAMING, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS); + + service_wait_exit(SERVICE_REPLICATION, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_REPLICATION_THREADS); + + metadata_sync_shutdown_prepare(); + watcher_step_complete(WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN); + + ml_stop_threads(); + ml_fini(); + watcher_step_complete(WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS); + + service_wait_exit(SERVICE_CONTEXT, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_CONTEXT_THREAD); + + web_client_cache_destroy(); + watcher_step_complete(WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE); + + service_wait_exit(SERVICE_ACLK, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_ACLK_THREADS); + + service_wait_exit(~0, 10 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS); + + cancel_main_threads(); + watcher_step_complete(WATCHER_STEP_ID_CANCEL_MAIN_THREADS); + + if (ret) + { + watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); + + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } + else + { + // exit cleanly + +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_prepare_exit(multidb_ctx[tier]); + + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (!multidb_ctx[tier]) + continue; + completion_wait_for(&multidb_ctx[tier]->quiesce.completion); + completion_destroy(&multidb_ctx[tier]->quiesce.completion); + } + } +#endif + watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); + + rrd_finalize_collection_for_all_hosts(); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); + + metadata_sync_shutdown(); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); + +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + size_t running = 1; + size_t count = 10; + while(running && count) { + running = 0; + for (size_t tier = 0; tier < storage_tiers; tier++) + running += rrdeng_collectors_running(multidb_ctx[tier]); + + if (running) { + nd_log_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, "waiting for %zu collectors to finish", running); + } + count--; + } + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + + while (pgc_hot_and_dirty_entries(main_cache)) { + pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL); + sleep_usec(100 * USEC_PER_MS); + } + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_exit(multidb_ctx[tier]); + rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } else { + // Skip these steps + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } +#else + // Skip these steps + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); +#endif + } + + sqlite_close_databases(); + watcher_step_complete(WATCHER_STEP_ID_CLOSE_SQL_DATABASES); + sqlite_library_shutdown(); + + + // unlink the pid + if(pidfile[0]) { + if(unlink(pidfile) != 0) + netdata_log_error("EXIT: cannot unlink pidfile '%s'.", pidfile); + } + watcher_step_complete(WATCHER_STEP_ID_REMOVE_PID_FILE); + +#ifdef ENABLE_HTTPS + netdata_ssl_cleanup(); +#endif + watcher_step_complete(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); + + (void) unlink(agent_incomplete_shutdown_file); + watcher_step_complete(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE); + + watcher_shutdown_end(); + watcher_thread_stop(); + +#ifdef ENABLE_SENTRY + if (ret) + abort(); + else { + nd_sentry_fini(); + exit(ret); + } +#else + exit(ret); +#endif +} + +void web_server_threading_selection(void) { + web_server_mode = web_server_mode_id(config_get(CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode))); + + int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED); + + int i; + for (i = 0; static_threads[i].name; i++) { + if (static_threads[i].start_routine == socket_listen_main_static_threaded) + static_threads[i].enabled = static_threaded; + } +} + +int make_dns_decision(const char *section_name, const char *config_name, const char *default_value, SIMPLE_PATTERN *p) +{ + char *value = config_get(section_name,config_name,default_value); + if(!strcmp("yes",value)) + return 1; + if(!strcmp("no",value)) + return 0; + if(strcmp("heuristic",value)) + netdata_log_error("Invalid configuration option '%s' for '%s'/'%s'. Valid options are 'yes', 'no' and 'heuristic'. Proceeding with 'heuristic'", + value, section_name, config_name); + + return simple_pattern_is_potential_name(p); +} + +void web_server_config_options(void) +{ + web_client_timeout = + (int)config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", web_client_timeout); + web_client_first_request_timeout = + (int)config_get_number(CONFIG_SECTION_WEB, "timeout for first request", web_client_first_request_timeout); + web_client_streaming_rate_t = + config_get_number(CONFIG_SECTION_WEB, "accept a streaming request every seconds", web_client_streaming_rate_t); + + respect_web_browser_do_not_track_policy = + config_get_boolean(CONFIG_SECTION_WEB, "respect do not track policy", respect_web_browser_do_not_track_policy); + web_x_frame_options = config_get(CONFIG_SECTION_WEB, "x-frame-options response header", ""); + if(!*web_x_frame_options) + web_x_frame_options = NULL; + + web_allow_connections_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"), + NULL, SIMPLE_PATTERN_EXACT, true); + web_allow_connections_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow connections by dns", "heuristic", web_allow_connections_from); + web_allow_dashboard_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"), + NULL, SIMPLE_PATTERN_EXACT, true); + web_allow_dashboard_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow dashboard by dns", "heuristic", web_allow_dashboard_from); + web_allow_badges_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), NULL, SIMPLE_PATTERN_EXACT, + true); + web_allow_badges_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow badges by dns", "heuristic", web_allow_badges_from); + web_allow_registry_from = + simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT, + true); + web_allow_registry_dns = make_dns_decision(CONFIG_SECTION_REGISTRY, "allow by dns", "heuristic", + web_allow_registry_from); + web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"), + NULL, SIMPLE_PATTERN_EXACT, true); + web_allow_streaming_dns = make_dns_decision(CONFIG_SECTION_WEB, "allow streaming by dns", "heuristic", + web_allow_streaming_from); + // Note the default is not heuristic, the wildcards could match DNS but the intent is ip-addresses. + web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from", + "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.*" + " 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.*" + " 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.*" + " 172.31.* UNKNOWN"), NULL, SIMPLE_PATTERN_EXACT, + true); + web_allow_netdataconf_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow netdata.conf by dns", "no", web_allow_netdataconf_from); + web_allow_mgmt_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow management from", "localhost"), + NULL, SIMPLE_PATTERN_EXACT, true); + web_allow_mgmt_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow management by dns","heuristic",web_allow_mgmt_from); + + web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip); + + char *s = config_get(CONFIG_SECTION_WEB, "gzip compression strategy", "default"); + if(!strcmp(s, "default")) + web_gzip_strategy = Z_DEFAULT_STRATEGY; + else if(!strcmp(s, "filtered")) + web_gzip_strategy = Z_FILTERED; + else if(!strcmp(s, "huffman only")) + web_gzip_strategy = Z_HUFFMAN_ONLY; + else if(!strcmp(s, "rle")) + web_gzip_strategy = Z_RLE; + else if(!strcmp(s, "fixed")) + web_gzip_strategy = Z_FIXED; + else { + netdata_log_error("Invalid compression strategy '%s'. Valid strategies are 'default', 'filtered', 'huffman only', 'rle' and 'fixed'. Proceeding with 'default'.", s); + web_gzip_strategy = Z_DEFAULT_STRATEGY; + } + + web_gzip_level = (int)config_get_number(CONFIG_SECTION_WEB, "gzip compression level", 3); + if(web_gzip_level < 1) { + netdata_log_error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 1 (fastest compression).", web_gzip_level); + web_gzip_level = 1; + } + else if(web_gzip_level > 9) { + netdata_log_error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 9 (best compression).", web_gzip_level); + web_gzip_level = 9; + } +} + + +// killpid kills pid with SIGTERM. +int killpid(pid_t pid) { + int ret; + netdata_log_debug(D_EXIT, "Request to kill pid %d", pid); + + int signal = SIGTERM; +//#ifdef NETDATA_INTERNAL_CHECKS +// if(service_running(SERVICE_COLLECTORS)) +// signal = SIGABRT; +//#endif + + errno = 0; + ret = kill(pid, signal); + if (ret == -1) { + switch(errno) { + case ESRCH: + // We wanted the process to exit so just let the caller handle. + return ret; + + case EPERM: + netdata_log_error("Cannot kill pid %d, but I do not have enough permissions.", pid); + break; + + default: + netdata_log_error("Cannot kill pid %d, but I received an error.", pid); + break; + } + } + + return ret; +} + +static void set_nofile_limit(struct rlimit *rl) { + // get the num files allowed + if(getrlimit(RLIMIT_NOFILE, rl) != 0) { + netdata_log_error("getrlimit(RLIMIT_NOFILE) failed"); + return; + } + + netdata_log_info("resources control: allowed file descriptors: soft = %zu, max = %zu", + (size_t) rl->rlim_cur, (size_t) rl->rlim_max); + + // make the soft/hard limits equal + rl->rlim_cur = rl->rlim_max; + if (setrlimit(RLIMIT_NOFILE, rl) != 0) { + netdata_log_error("setrlimit(RLIMIT_NOFILE, { %zu, %zu }) failed", (size_t)rl->rlim_cur, (size_t)rl->rlim_max); + } + + // sanity check to make sure we have enough file descriptors available to open + if (getrlimit(RLIMIT_NOFILE, rl) != 0) { + netdata_log_error("getrlimit(RLIMIT_NOFILE) failed"); + return; + } + + if (rl->rlim_cur < 1024) + netdata_log_error("Number of open file descriptors allowed for this process is too low (RLIMIT_NOFILE=%zu)", (size_t)rl->rlim_cur); +} + +void cancel_main_threads() { + nd_log_limits_unlimited(); + + if (!static_threads) + return; + + int i, found = 0; + usec_t max = 5 * USEC_PER_SEC, step = 100000; + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled == NETDATA_MAIN_THREAD_RUNNING) { + if (static_threads[i].thread) { + netdata_log_info("EXIT: Stopping main thread: %s", static_threads[i].name); + nd_thread_signal_cancel(static_threads[i].thread); + } else { + netdata_log_info("EXIT: No thread running (marking as EXITED): %s", static_threads[i].name); + static_threads[i].enabled = NETDATA_MAIN_THREAD_EXITED; + } + found++; + } + } + + netdata_exit = 1; + + while(found && max > 0) { + max -= step; + netdata_log_info("Waiting %d threads to finish...", found); + sleep_usec(step); + found = 0; + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled == NETDATA_MAIN_THREAD_EXITED) + continue; + + // Don't wait ourselves. + if (nd_thread_is_me(static_threads[i].thread)) + continue; + + found++; + } + } + + if(found) { + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) + netdata_log_error("Main thread %s takes too long to exit. Giving up...", static_threads[i].name); + } + } + else + netdata_log_info("All threads finished."); + + freez(static_threads); + static_threads = NULL; +} + +static const struct option_def { + const char val; + const char *description; + const char *arg_name; + const char *default_value; +} option_definitions[] = { + {'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME}, + {'D', "Do not fork. Run in the foreground.", NULL, "run in the background"}, + {'d', "Fork. Run in the background.", NULL, "run in the background"}, + {'h', "Display this help message.", NULL, NULL}, + {'P', "File to save a pid while running.", "filename", "do not save pid to a file"}, + {'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"}, + {'p', "API/Web port to use.", "port", "19999"}, + {'s', "Prefix for /proc and /sys (for containers).", "path", "no prefix"}, + {'t', "The internal clock of netdata.", "seconds", "1"}, + {'u', "Run as user.", "username", "netdata"}, + {'v', "Print netdata version and exit.", NULL, NULL}, + {'V', "Print netdata version and exit.", NULL, NULL}, + {'W', "See Advanced options below.", "options", NULL}, +}; + +int help(int exitcode) { + FILE *stream; + if(exitcode == 0) + stream = stdout; + else + stream = stderr; + + int num_opts = sizeof(option_definitions) / sizeof(struct option_def); + int i; + int max_len_arg = 0; + + // Compute maximum argument length + for( i = 0; i < num_opts; i++ ) { + if(option_definitions[i].arg_name) { + int len_arg = (int)strlen(option_definitions[i].arg_name); + if(len_arg > max_len_arg) max_len_arg = len_arg; + } + } + + if(max_len_arg > 30) max_len_arg = 30; + if(max_len_arg < 20) max_len_arg = 20; + + fprintf(stream, "%s", "\n" + " ^\n" + " |.-. .-. .-. .-. . netdata \n" + " | '-' '-' '-' '-' real-time performance monitoring, done right! \n" + " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n" + "\n" + " Copyright (C) 2016-2023, Netdata, Inc. \n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " Home Page : https://netdata.cloud\n" + " Source Code: https://github.com/netdata/netdata\n" + " Docs : https://learn.netdata.cloud\n" + " Support : https://github.com/netdata/netdata/issues\n" + " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n" + "\n" + " Twitter : https://twitter.com/netdatahq\n" + " LinkedIn : https://linkedin.com/company/netdata-cloud/\n" + " Facebook : https://facebook.com/linuxnetdata/\n" + "\n" + "\n" + ); + + fprintf(stream, " SYNOPSIS: netdata [options]\n"); + fprintf(stream, "\n"); + fprintf(stream, " Options:\n\n"); + + // Output options description. + for( i = 0; i < num_opts; i++ ) { + fprintf(stream, " -%c %-*s %s", option_definitions[i].val, max_len_arg, option_definitions[i].arg_name ? option_definitions[i].arg_name : "", option_definitions[i].description); + if(option_definitions[i].default_value) { + fprintf(stream, "\n %c %-*s Default: %s\n", ' ', max_len_arg, "", option_definitions[i].default_value); + } else { + fprintf(stream, "\n"); + } + fprintf(stream, "\n"); + } + + fprintf(stream, "\n Advanced options:\n\n" + " -W stacksize=N Set the stacksize (in bytes).\n\n" + " -W debug_flags=N Set runtime tracing to debug.log.\n\n" + " -W unittest Run internal unittests and exit.\n\n" + " -W sqlite-meta-recover Run recovery on the metadata database and exit.\n\n" + " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n" + " -W sqlite-analyze Run update statistics and exit.\n\n" + " -W sqlite-alert-cleanup Perform maintenance on the alerts table.\n\n" +#ifdef ENABLE_DBENGINE + " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n" + " -W stresstest=A,B,C,D,E,F,G\n" + " Run a DB engine stress test for A seconds,\n" + " with B writers and C readers, with a ramp up\n" + " time of D seconds for writers, a page cache\n" + " size of E MiB, an optional disk space limit\n" + " of F MiB, G libuv workers (default 16) and exit.\n\n" +#endif + " -W set section option value\n" + " set netdata.conf option from the command line.\n\n" + " -W buildinfo Print the version, the configure options,\n" + " a list of optional features, and whether they\n" + " are enabled or not.\n\n" + " -W buildinfojson Print the version, the configure options,\n" + " a list of optional features, and whether they\n" + " are enabled or not, in JSON format.\n\n" + " -W simple-pattern pattern string\n" + " Check if string matches pattern and exit.\n\n" + " -W \"claim -token=TOKEN -rooms=ROOM1,ROOM2\"\n" + " Claim the agent to the workspace rooms pointed to by TOKEN and ROOM*.\n\n" +#ifdef OS_WINDOWS + " -W perflibdump [key]\n" + " Dump the Windows Performance Counters Registry in JSON.\n\n" +#endif + ); + + fprintf(stream, "\n Signals netdata handles:\n\n" + " - HUP Close and reopen log files.\n" + " - USR2 Reload health configuration.\n" + "\n" + ); + + fflush(stream); + return exitcode; +} + +#ifdef ENABLE_HTTPS +static void security_init(){ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir); + netdata_ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename); + + snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir); + netdata_ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename); + + tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3"); + tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none"); + + netdata_ssl_initialize_openssl(); +} +#endif + +static void log_init(void) { + nd_log_set_facility(config_get(CONFIG_SECTION_LOGS, "facility", "daemon")); + + time_t period = ND_LOG_DEFAULT_THROTTLE_PERIOD; + size_t logs = ND_LOG_DEFAULT_THROTTLE_LOGS; + period = config_get_number(CONFIG_SECTION_LOGS, "logs flood protection period", period); + logs = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "logs to trigger flood protection", (long long int)logs); + nd_log_set_flood_protection(logs, period); + + const char *netdata_log_level = getenv("NETDATA_LOG_LEVEL"); + netdata_log_level = netdata_log_level ? nd_log_id2priority(nd_log_priority2id(netdata_log_level)) : NDLP_INFO_STR; + + nd_log_set_priority_level(config_get(CONFIG_SECTION_LOGS, "level", netdata_log_level)); + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_DEBUG, config_get(CONFIG_SECTION_LOGS, "debug", filename)); + + bool with_journal = is_stderr_connected_to_journal() /* || nd_log_journal_socket_available() */; + if(with_journal) + snprintfz(filename, FILENAME_MAX, "journal"); + else + snprintfz(filename, FILENAME_MAX, "%s/daemon.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_DAEMON, config_get(CONFIG_SECTION_LOGS, "daemon", filename)); + + if(with_journal) + snprintfz(filename, FILENAME_MAX, "journal"); + else + snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_COLLECTORS, config_get(CONFIG_SECTION_LOGS, "collector", filename)); + + snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_ACCESS, config_get(CONFIG_SECTION_LOGS, "access", filename)); + + if(with_journal) + snprintfz(filename, FILENAME_MAX, "journal"); + else + snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_HEALTH, config_get(CONFIG_SECTION_LOGS, "health", filename)); + +#ifdef ENABLE_ACLK + aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO); + if (aclklog_enabled) { + snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_ACLK, config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename)); + } +#endif +} + +char *initialize_lock_directory_path(char *prefix) +{ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/lock", prefix); + + return config_get(CONFIG_SECTION_DIRECTORIES, "lock", filename); +} + +static void backwards_compatible_config() { + // move [global] options to the [web] section + config_move(CONFIG_SECTION_GLOBAL, "http port listen backlog", + CONFIG_SECTION_WEB, "listen backlog"); + + config_move(CONFIG_SECTION_GLOBAL, "bind socket to IP", + CONFIG_SECTION_WEB, "bind to"); + + config_move(CONFIG_SECTION_GLOBAL, "bind to", + CONFIG_SECTION_WEB, "bind to"); + + config_move(CONFIG_SECTION_GLOBAL, "port", + CONFIG_SECTION_WEB, "default port"); + + config_move(CONFIG_SECTION_GLOBAL, "default port", + CONFIG_SECTION_WEB, "default port"); + + config_move(CONFIG_SECTION_GLOBAL, "disconnect idle web clients after seconds", + CONFIG_SECTION_WEB, "disconnect idle clients after seconds"); + + config_move(CONFIG_SECTION_GLOBAL, "respect web browser do not track policy", + CONFIG_SECTION_WEB, "respect do not track policy"); + + config_move(CONFIG_SECTION_GLOBAL, "web x-frame-options header", + CONFIG_SECTION_WEB, "x-frame-options response header"); + + config_move(CONFIG_SECTION_GLOBAL, "enable web responses gzip compression", + CONFIG_SECTION_WEB, "enable gzip compression"); + + config_move(CONFIG_SECTION_GLOBAL, "web compression strategy", + CONFIG_SECTION_WEB, "gzip compression strategy"); + + config_move(CONFIG_SECTION_GLOBAL, "web compression level", + CONFIG_SECTION_WEB, "gzip compression level"); + + config_move(CONFIG_SECTION_GLOBAL, "config directory", + CONFIG_SECTION_DIRECTORIES, "config"); + + config_move(CONFIG_SECTION_GLOBAL, "stock config directory", + CONFIG_SECTION_DIRECTORIES, "stock config"); + + config_move(CONFIG_SECTION_GLOBAL, "log directory", + CONFIG_SECTION_DIRECTORIES, "log"); + + config_move(CONFIG_SECTION_GLOBAL, "web files directory", + CONFIG_SECTION_DIRECTORIES, "web"); + + config_move(CONFIG_SECTION_GLOBAL, "cache directory", + CONFIG_SECTION_DIRECTORIES, "cache"); + + config_move(CONFIG_SECTION_GLOBAL, "lib directory", + CONFIG_SECTION_DIRECTORIES, "lib"); + + config_move(CONFIG_SECTION_GLOBAL, "home directory", + CONFIG_SECTION_DIRECTORIES, "home"); + + config_move(CONFIG_SECTION_GLOBAL, "lock directory", + CONFIG_SECTION_DIRECTORIES, "lock"); + + config_move(CONFIG_SECTION_GLOBAL, "plugins directory", + CONFIG_SECTION_DIRECTORIES, "plugins"); + + config_move(CONFIG_SECTION_HEALTH, "health configuration directory", + CONFIG_SECTION_DIRECTORIES, "health config"); + + config_move(CONFIG_SECTION_HEALTH, "stock health configuration directory", + CONFIG_SECTION_DIRECTORIES, "stock health config"); + + config_move(CONFIG_SECTION_REGISTRY, "registry db directory", + CONFIG_SECTION_DIRECTORIES, "registry"); + + config_move(CONFIG_SECTION_GLOBAL, "debug log", + CONFIG_SECTION_LOGS, "debug"); + + config_move(CONFIG_SECTION_GLOBAL, "error log", + CONFIG_SECTION_LOGS, "error"); + + config_move(CONFIG_SECTION_GLOBAL, "access log", + CONFIG_SECTION_LOGS, "access"); + + config_move(CONFIG_SECTION_GLOBAL, "facility log", + CONFIG_SECTION_LOGS, "facility"); + + config_move(CONFIG_SECTION_GLOBAL, "errors flood protection period", + CONFIG_SECTION_LOGS, "errors flood protection period"); + + config_move(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection", + CONFIG_SECTION_LOGS, "errors to trigger flood protection"); + + config_move(CONFIG_SECTION_GLOBAL, "debug flags", + CONFIG_SECTION_LOGS, "debug flags"); + + config_move(CONFIG_SECTION_GLOBAL, "TZ environment variable", + CONFIG_SECTION_ENV_VARS, "TZ"); + + config_move(CONFIG_SECTION_PLUGINS, "PATH environment variable", + CONFIG_SECTION_ENV_VARS, "PATH"); + + config_move(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable", + CONFIG_SECTION_ENV_VARS, "PYTHONPATH"); + + config_move(CONFIG_SECTION_STATSD, "enabled", + CONFIG_SECTION_PLUGINS, "statsd"); + + config_move(CONFIG_SECTION_GLOBAL, "memory mode", + CONFIG_SECTION_DB, "mode"); + + config_move(CONFIG_SECTION_GLOBAL, "history", + CONFIG_SECTION_DB, "retention"); + + config_move(CONFIG_SECTION_GLOBAL, "update every", + CONFIG_SECTION_DB, "update every"); + + config_move(CONFIG_SECTION_GLOBAL, "page cache size", + CONFIG_SECTION_DB, "dbengine page cache size MB"); + + config_move(CONFIG_SECTION_DB, "page cache size", + CONFIG_SECTION_DB, "dbengine page cache size MB"); + + config_move(CONFIG_SECTION_GLOBAL, "page cache uses malloc", + CONFIG_SECTION_DB, "dbengine page cache with malloc"); + + config_move(CONFIG_SECTION_DB, "page cache with malloc", + CONFIG_SECTION_DB, "dbengine page cache with malloc"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine disk space", + CONFIG_SECTION_DB, "dbengine disk space MB"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space", + CONFIG_SECTION_DB, "dbengine multihost disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine disk space MB", + CONFIG_SECTION_DB, "dbengine multihost disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 0 disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine tier 1 multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 1 disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine tier 2 multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 2 disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine tier 3 multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 3 disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine tier 4 multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 4 disk space MB"); + + config_move(CONFIG_SECTION_GLOBAL, "memory deduplication (ksm)", + CONFIG_SECTION_DB, "memory deduplication (ksm)"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch timeout", + CONFIG_SECTION_DB, "dbengine page fetch timeout secs"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch retries", + CONFIG_SECTION_DB, "dbengine page fetch retries"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine extent pages", + CONFIG_SECTION_DB, "dbengine pages per extent"); + + config_move(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", + CONFIG_SECTION_DB, "cleanup obsolete charts after secs"); + + config_move(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", + CONFIG_SECTION_DB, "gap when lost iterations above"); + + config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds", + CONFIG_SECTION_DB, "cleanup orphan hosts after secs"); + + config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics", + CONFIG_SECTION_DB, "enable zero metrics"); + + config_move(CONFIG_SECTION_LOGS, "error", + CONFIG_SECTION_LOGS, "daemon"); + + config_move(CONFIG_SECTION_LOGS, "severity level", + CONFIG_SECTION_LOGS, "level"); + + config_move(CONFIG_SECTION_LOGS, "errors to trigger flood protection", + CONFIG_SECTION_LOGS, "logs to trigger flood protection"); + + config_move(CONFIG_SECTION_LOGS, "errors flood protection period", + CONFIG_SECTION_LOGS, "logs flood protection period"); + config_move(CONFIG_SECTION_HEALTH, "is ephemeral", + CONFIG_SECTION_GLOBAL, "is ephemeral node"); + + config_move(CONFIG_SECTION_HEALTH, "has unstable connection", + CONFIG_SECTION_GLOBAL, "has unstable connection"); +} + +static int get_hostname(char *buf, size_t buf_size) { + if (netdata_configured_host_prefix && *netdata_configured_host_prefix) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/etc/hostname", netdata_configured_host_prefix); + + if (!read_txt_file(filename, buf, buf_size)) { + trim(buf); + return 0; + } + } + + return gethostname(buf, buf_size); +} + +static void get_netdata_configured_variables() +{ +#ifdef ENABLE_DBENGINE + legacy_multihost_db_space = config_exists(CONFIG_SECTION_DB, "dbengine multihost disk space MB"); + if (!legacy_multihost_db_space) + legacy_multihost_db_space = config_exists(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space"); + if (!legacy_multihost_db_space) + legacy_multihost_db_space = config_exists(CONFIG_SECTION_GLOBAL, "dbengine disk space"); +#endif + + backwards_compatible_config(); + + // ------------------------------------------------------------------------ + // get the hostname + + netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", ""); + verify_netdata_host_prefix(true); + + char buf[HOSTNAME_MAX + 1]; + if (get_hostname(buf, HOSTNAME_MAX)) + netdata_log_error("Cannot get machine hostname."); + + netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf); + netdata_log_debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname); + + // ------------------------------------------------------------------------ + // get default database update frequency + + default_rrd_update_every = (int) config_get_number(CONFIG_SECTION_DB, "update every", UPDATE_EVERY); + if(default_rrd_update_every < 1 || default_rrd_update_every > 600) { + netdata_log_error("Invalid data collection frequency (update every) %d given. Defaulting to %d.", default_rrd_update_every, UPDATE_EVERY); + default_rrd_update_every = UPDATE_EVERY; + config_set_number(CONFIG_SECTION_DB, "update every", default_rrd_update_every); + } + + // ------------------------------------------------------------------------ + // get default memory mode for the database + + { + const char *mode = config_get(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode)); + default_rrd_memory_mode = rrd_memory_mode_id(mode); + if(strcmp(mode, rrd_memory_mode_name(default_rrd_memory_mode)) != 0) { + netdata_log_error("Invalid memory mode '%s' given. Using '%s'", mode, rrd_memory_mode_name(default_rrd_memory_mode)); + config_set(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode)); + } + } + + // ------------------------------------------------------------------------ + // get default database size + + if(default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && default_rrd_memory_mode != RRD_MEMORY_MODE_NONE) { + default_rrd_history_entries = (int)config_get_number( + CONFIG_SECTION_DB, "retention", + align_entries_to_pagesize(default_rrd_memory_mode, RRD_DEFAULT_HISTORY_ENTRIES)); + + long h = align_entries_to_pagesize(default_rrd_memory_mode, default_rrd_history_entries); + if (h != default_rrd_history_entries) { + config_set_number(CONFIG_SECTION_DB, "retention", h); + default_rrd_history_entries = (int)h; + } + } + + // ------------------------------------------------------------------------ + // get system paths + + netdata_configured_user_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "config", netdata_configured_user_config_dir); + netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "stock config", netdata_configured_stock_config_dir); + netdata_configured_log_dir = config_get(CONFIG_SECTION_DIRECTORIES, "log", netdata_configured_log_dir); + netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir); + netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir); + netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir); + + netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir); + + { + pluginsd_initialize_plugin_directories(); + netdata_configured_primary_plugins_dir = plugin_directories[PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH]; + } + +#ifdef ENABLE_DBENGINE + // ------------------------------------------------------------------------ + // get default Database Engine page type + + const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "gorilla"); + if (strcmp(page_type, "gorilla") == 0) + tier_page_type[0] = RRDENG_PAGE_TYPE_GORILLA_32BIT; + else if (strcmp(page_type, "raw") == 0) + tier_page_type[0] = RRDENG_PAGE_TYPE_ARRAY_32BIT; + else { + tier_page_type[0] = RRDENG_PAGE_TYPE_ARRAY_32BIT; + netdata_log_error("Invalid dbengine page type ''%s' given. Defaulting to 'raw'.", page_type); + } + + // ------------------------------------------------------------------------ + // get default Database Engine page cache size in MiB + + default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); + default_rrdeng_extent_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine extent cache size MB", default_rrdeng_extent_cache_mb); + db_engine_journal_check = config_get_boolean(CONFIG_SECTION_DB, "dbengine enable journal integrity check", CONFIG_BOOLEAN_NO); + + if(default_rrdeng_extent_cache_mb < 0) + default_rrdeng_extent_cache_mb = 0; + + if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) { + netdata_log_error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB); + default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB; + config_set_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); + } + + // ------------------------------------------------------------------------ + // get default Database Engine disk space quota in MiB +// +// // if (!config_exists(CONFIG_SECTION_DB, "dbengine disk space MB") && !config_exists(CONFIG_SECTION_DB, "dbengine multihost disk space MB")) +// +// default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb); +// if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) { +// netdata_log_error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB); +// default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB; +// config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb); +// } +// +// default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace()); +// if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) { +// netdata_log_error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb); +// default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb; +// config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb); +// } + +#else + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead."); + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; + } +#endif + + // -------------------------------------------------------------------- + // get KSM settings + +#ifdef MADV_MERGEABLE + enable_ksm = config_get_boolean(CONFIG_SECTION_DB, "memory deduplication (ksm)", enable_ksm); +#endif + + // -------------------------------------------------------------------- + // metric correlations + + enable_metric_correlations = config_get_boolean(CONFIG_SECTION_GLOBAL, "enable metric correlations", enable_metric_correlations); + default_metric_correlations_method = weights_string_to_method(config_get( + CONFIG_SECTION_GLOBAL, "metric correlations method", + weights_method_to_string(default_metric_correlations_method))); + + // -------------------------------------------------------------------- + + rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s); + rrdhost_free_ephemeral_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup ephemeral hosts after secs", rrdhost_free_ephemeral_time_s); + // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short + // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at + // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information. + if (rrdset_free_obsolete_time_s < 10) { + rrdset_free_obsolete_time_s = 10; + netdata_log_info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds."); + config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s); + } + + gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); + if (gap_when_lost_iterations_above < 1) { + gap_when_lost_iterations_above = 1; + config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); + } + gap_when_lost_iterations_above += 2; + + // -------------------------------------------------------------------- + // get various system parameters + + os_get_system_HZ(); + os_get_system_cpus_uncached(); + os_get_system_pid_max(); + + +} + +static void post_conf_load(char **user) +{ + // -------------------------------------------------------------------- + // get the user we should run + + // IMPORTANT: this is required before web_files_uid() + if(getuid() == 0) { + *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", NETDATA_USER); + } + else { + struct passwd *passwd = getpwuid(getuid()); + *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", (passwd && passwd->pw_name)?passwd->pw_name:""); + } +} + +static bool load_netdata_conf(char *filename, char overwrite_used, char **user) { + errno = 0; + + int ret = 0; + + if(filename && *filename) { + ret = config_load(filename, overwrite_used, NULL); + if(!ret) + netdata_log_error("CONFIG: cannot load config file '%s'.", filename); + } + else { + filename = strdupz_path_subpath(netdata_configured_user_config_dir, "netdata.conf"); + + ret = config_load(filename, overwrite_used, NULL); + if(!ret) { + netdata_log_info("CONFIG: cannot load user config '%s'. Will try the stock version.", filename); + freez(filename); + + filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "netdata.conf"); + ret = config_load(filename, overwrite_used, NULL); + if(!ret) + netdata_log_info("CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename); + } + + freez(filename); + } + + post_conf_load(user); + return ret; +} + +// coverity[ +tainted_string_sanitize_content : arg-0 ] +static inline void coverity_remove_taint(char *s) +{ + (void)s; +} + +int get_system_info(struct rrdhost_system_info *system_info) { + char *script; + script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2)); + sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh"); + if (unlikely(access(script, R_OK) != 0)) { + netdata_log_error("System info script %s not found.",script); + freez(script); + return 1; + } + + pid_t command_pid; + + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); + if(fp_child_output) { + char line[200 + 1]; + // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert. + // One time init code, but I'm curious about the warning... + while (fgets(line, 200, fp_child_output) != NULL) { + char *value=line; + while (*value && *value != '=') value++; + if (*value=='=') { + *value='\0'; + value++; + char *end = value; + while (*end && *end != '\n') end++; + *end = '\0'; // Overwrite newline if present + coverity_remove_taint(line); // I/O is controlled result of system_info.sh - not tainted + coverity_remove_taint(value); + + if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) { + netdata_log_error("Unexpected environment variable %s=%s", line, value); + } else { + setenv(line, value, 1); + } + } + } + netdata_pclose(fp_child_input, fp_child_output, command_pid); + } + freez(script); + return 0; +} + +/* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST* + be set in this procedure to be called in all the relevant code paths. +*/ + +#define delta_startup_time(msg) \ + { \ + usec_t now_ut = now_monotonic_usec(); \ + if(prev_msg) \ + netdata_log_info("NETDATA STARTUP: in %7llu ms, %s - next: %s", (now_ut - last_ut) / USEC_PER_MS, prev_msg, msg); \ + else \ + netdata_log_info("NETDATA STARTUP: next: %s", msg); \ + last_ut = now_ut; \ + prev_msg = msg; \ + } + +int buffer_unittest(void); +int pgc_unittest(void); +int mrg_unittest(void); +int julytest(void); +int pluginsd_parser_unittest(void); +void replication_initialize(void); +void bearer_tokens_init(void); +int unittest_rrdpush_compressions(void); +int uuid_unittest(void); +int progress_unittest(void); +int dyncfg_unittest(void); + +#ifdef OS_WINDOWS +int windows_perflib_dump(const char *key); +#endif + +int unittest_prepare_rrd(char **user) { + post_conf_load(user); + get_netdata_configured_variables(); + default_rrd_update_every = 1; + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; + health_plugin_disable(); + storage_tiers = 1; + registry_init(); + if(rrd_init("unittest", NULL, true)) { + fprintf(stderr, "rrd_init failed for unittest\n"); + return 1; + } + default_rrdpush_enabled = 0; + + return 0; +} + +int main(int argc, char **argv) { + // initialize the system clocks + clocks_init(); + netdata_start_time = now_realtime_sec(); + + usec_t started_ut = now_monotonic_usec(); + usec_t last_ut = started_ut; + const char *prev_msg = NULL; + + int i; + int config_loaded = 0; + int dont_fork = 0; + bool close_open_fds = true; + size_t default_stacksize; + char *user = NULL; + + static_threads = static_threads_get(); + + netdata_ready = false; + // set the name for logging + program_name = "netdata"; + + if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) { + // don't run netdata, this is the spawn server + i_am_the_spawn_server = true; + spawn_server(); + exit(0); + } + + // parse options + { + int num_opts = sizeof(option_definitions) / sizeof(struct option_def); + char optstring[(num_opts * 2) + 1]; + + int string_i = 0; + for( i = 0; i < num_opts; i++ ) { + optstring[string_i] = option_definitions[i].val; + string_i++; + if(option_definitions[i].arg_name) { + optstring[string_i] = ':'; + string_i++; + } + } + // terminate optstring + optstring[string_i] ='\0'; + optstring[(num_opts *2)] ='\0'; + + int opt; + while( (opt = getopt(argc, argv, optstring)) != -1 ) { + switch(opt) { + case 'c': + if(!load_netdata_conf(optarg, 1, &user)) { + netdata_log_error("Cannot load configuration file %s.", optarg); + return 1; + } + else { + netdata_log_debug(D_OPTIONS, "Configuration loaded from %s.", optarg); + load_cloud_conf(1); + config_loaded = 1; + } + break; + case 'D': + dont_fork = 1; + break; + case 'd': + dont_fork = 0; + break; + case 'h': + return help(0); + case 'i': + config_set(CONFIG_SECTION_WEB, "bind to", optarg); + break; + case 'P': + strncpy(pidfile, optarg, FILENAME_MAX); + pidfile[FILENAME_MAX] = '\0'; + break; + case 'p': + config_set(CONFIG_SECTION_GLOBAL, "default port", optarg); + break; + case 's': + config_set(CONFIG_SECTION_GLOBAL, "host access prefix", optarg); + break; + case 't': + config_set(CONFIG_SECTION_GLOBAL, "update every", optarg); + break; + case 'u': + config_set(CONFIG_SECTION_GLOBAL, "run as user", optarg); + break; + case 'v': + case 'V': + printf("%s %s\n", program_name, NETDATA_VERSION); + return 0; + case 'W': + { + char* stacksize_string = "stacksize="; + char* debug_flags_string = "debug_flags="; + char* claim_string = "claim"; +#ifdef ENABLE_DBENGINE + char* createdataset_string = "createdataset="; + char* stresstest_string = "stresstest="; + + if(strcmp(optarg, "pgd-tests") == 0) { + return pgd_test(argc, argv); + } +#endif + + if(strcmp(optarg, "sqlite-meta-recover") == 0) { + sql_init_meta_database(DB_CHECK_RECOVER, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-compact") == 0) { + sql_init_meta_database(DB_CHECK_RECLAIM_SPACE, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-analyze") == 0) { + sql_init_meta_database(DB_CHECK_ANALYZE, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-alert-cleanup") == 0) { + sql_alert_cleanup(true); + return 0; + } + + if(strcmp(optarg, "unittest") == 0) { + unittest_running = true; + + // set defaults for dbegnine unittest + config_set(CONFIG_SECTION_DB, "dbengine page type", "gorilla"); +#ifdef ENABLE_DBENGINE + default_rrdeng_disk_quota_mb = default_multidb_disk_quota_mb = 256; +#endif + + if (sqlite_library_init()) + return 1; + + if (pluginsd_parser_unittest()) return 1; + if (unit_test_static_threads()) return 1; + if (unit_test_buffer()) return 1; + if (unit_test_str2ld()) return 1; + if (buffer_unittest()) return 1; + + // No call to load the config file on this code-path + if (unittest_prepare_rrd(&user)) return 1; + if (run_all_mockup_tests()) return 1; + if (unit_test_storage()) return 1; +#ifdef ENABLE_DBENGINE + if (test_dbengine()) return 1; +#endif + if (test_sqlite()) return 1; + if (string_unittest(10000)) return 1; + if (dictionary_unittest(10000)) return 1; + if (aral_unittest(10000)) return 1; + if (rrdlabels_unittest()) return 1; + if (ctx_unittest()) return 1; + if (uuid_unittest()) return 1; + if (dyncfg_unittest()) return 1; + sqlite_library_shutdown(); + fprintf(stderr, "\n\nALL TESTS PASSED\n\n"); + return 0; + } + else if(strcmp(optarg, "escapetest") == 0) { + return command_argument_sanitization_tests(); + } + else if(strcmp(optarg, "dicttest") == 0) { + unittest_running = true; + return dictionary_unittest(10000); + } + else if(strcmp(optarg, "araltest") == 0) { + unittest_running = true; + return aral_unittest(10000); + } + else if(strcmp(optarg, "stringtest") == 0) { + unittest_running = true; + return string_unittest(10000); + } + else if(strcmp(optarg, "rrdlabelstest") == 0) { + unittest_running = true; + return rrdlabels_unittest(); + } + else if(strcmp(optarg, "buffertest") == 0) { + unittest_running = true; + return buffer_unittest(); + } + else if(strcmp(optarg, "uuidtest") == 0) { + unittest_running = true; + return uuid_unittest(); + } +#ifdef OS_WINDOWS + else if(strcmp(optarg, "perflibdump") == 0) { + return windows_perflib_dump(optind + 1 > argc ? NULL : argv[optind]); + } +#endif +#ifdef ENABLE_DBENGINE + else if(strcmp(optarg, "mctest") == 0) { + unittest_running = true; + return mc_unittest(); + } + else if(strcmp(optarg, "ctxtest") == 0) { + unittest_running = true; + return ctx_unittest(); + } + else if(strcmp(optarg, "metatest") == 0) { + unittest_running = true; + return metadata_unittest(); + } + else if(strcmp(optarg, "pgctest") == 0) { + unittest_running = true; + return pgc_unittest(); + } + else if(strcmp(optarg, "mrgtest") == 0) { + unittest_running = true; + return mrg_unittest(); + } + else if(strcmp(optarg, "julytest") == 0) { + unittest_running = true; + return julytest(); + } + else if(strcmp(optarg, "parsertest") == 0) { + unittest_running = true; + return pluginsd_parser_unittest(); + } + else if(strcmp(optarg, "rrdpush_compressions_test") == 0) { + unittest_running = true; + return unittest_rrdpush_compressions(); + } + else if(strcmp(optarg, "progresstest") == 0) { + unittest_running = true; + return progress_unittest(); + } + else if(strcmp(optarg, "dyncfgtest") == 0) { + unittest_running = true; + if(unittest_prepare_rrd(&user)) + return 1; + return dyncfg_unittest(); + } + else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { + optarg += strlen(createdataset_string); + unsigned history_seconds = strtoul(optarg, NULL, 0); + post_conf_load(&user); + get_netdata_configured_variables(); + default_rrd_update_every = 1; + registry_init(); + if(rrd_init("dbengine-dataset", NULL, true)) { + fprintf(stderr, "rrd_init failed for unittest\n"); + return 1; + } + generate_dbengine_dataset(history_seconds); + return 0; + } + else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) { + char *endptr; + unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0, + page_cache_mb = 0, disk_space_mb = 0, workers = 16; + + optarg += strlen(stresstest_string); + test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0); + if (',' == *endptr) + dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + workers = (unsigned)strtoul(endptr + 1, &endptr, 0); + + if (workers > 1024) + workers = 1024; + + char workers_str[16]; + snprintf(workers_str, 15, "%u", workers); + setenv("UV_THREADPOOL_SIZE", workers_str, 1); + dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds, + page_cache_mb, disk_space_mb); + return 0; + } +#endif + else if(strcmp(optarg, "simple-pattern") == 0) { + if(optind + 2 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n" + " Checks if 'pattern' matches the given 'string'.\n" + " - 'pattern' can be one or more space separated words.\n" + " - each 'word' can contain one or more asterisks.\n" + " - words starting with '!' give negative matches.\n" + " - words are processed left to right\n" + "\n" + "Examples:\n" + "\n" + " > match all veth interfaces, except veth0:\n" + "\n" + " -W simple-pattern '!veth0 veth*' 'veth12'\n" + "\n" + "\n" + " > match all *.ext files directly in /path/:\n" + " (this will not match *.ext files in a subdir of /path/)\n" + "\n" + " -W simple-pattern '!/path/*/*.ext /path/*.ext' '/path/test.ext'\n" + "\n" + ); + return 1; + } + + const char *haystack = argv[optind]; + const char *needle = argv[optind + 1]; + size_t len = strlen(needle) + 1; + char wildcarded[len]; + + SIMPLE_PATTERN *p = simple_pattern_create(haystack, NULL, SIMPLE_PATTERN_EXACT, true); + SIMPLE_PATTERN_RESULT ret = simple_pattern_matches_extract(p, needle, wildcarded, len); + simple_pattern_free(p); + + if(ret == SP_MATCHED_POSITIVE) { + fprintf(stdout, "RESULT: POSITIVE MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 0; + } + else if(ret == SP_MATCHED_NEGATIVE) { + fprintf(stdout, "RESULT: NEGATIVE MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 0; + } + else { + fprintf(stdout, "RESULT: NOT MATCHED - pattern '%s' does not match '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 1; + } + } + else if(strncmp(optarg, stacksize_string, strlen(stacksize_string)) == 0) { + optarg += strlen(stacksize_string); + config_set(CONFIG_SECTION_GLOBAL, "pthread stack size", optarg); + } + else if(strncmp(optarg, debug_flags_string, strlen(debug_flags_string)) == 0) { + optarg += strlen(debug_flags_string); + config_set(CONFIG_SECTION_LOGS, "debug flags", optarg); + debug_flags = strtoull(optarg, NULL, 0); + } + else if(strcmp(optarg, "set") == 0) { + if(optind + 3 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W set 'section' 'key' 'value'\n\n" + " Overwrites settings of netdata.conf.\n" + "\n" + " These options interact with: -c netdata.conf\n" + " If -c netdata.conf is given on the command line,\n" + " before -W set... the user may overwrite command\n" + " line parameters at netdata.conf\n" + " If -c netdata.conf is given after (or missing)\n" + " -W set... the user cannot overwrite the command line\n" + " parameters." + "\n" + ); + return 1; + } + const char *section = argv[optind]; + const char *key = argv[optind + 1]; + const char *value = argv[optind + 2]; + optind += 3; + + // set this one as the default + // only if it is not already set in the config file + // so the caller can use -c netdata.conf before or + // after this parameter to prevent or allow overwriting + // variables at netdata.conf + config_set_default(section, key, value); + + // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value); + } + else if(strcmp(optarg, "set2") == 0) { + if(optind + 4 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W set 'conf_file' 'section' 'key' 'value'\n\n" + " Overwrites settings of netdata.conf or cloud.conf\n" + "\n" + " These options interact with: -c netdata.conf\n" + " If -c netdata.conf is given on the command line,\n" + " before -W set... the user may overwrite command\n" + " line parameters at netdata.conf\n" + " If -c netdata.conf is given after (or missing)\n" + " -W set... the user cannot overwrite the command line\n" + " parameters." + " conf_file can be \"cloud\" or \"netdata\".\n" + "\n" + ); + return 1; + } + const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + const char *section = argv[optind + 1]; + const char *key = argv[optind + 2]; + const char *value = argv[optind + 3]; + optind += 4; + + // set this one as the default + // only if it is not already set in the config file + // so the caller can use -c netdata.conf before or + // after this parameter to prevent or allow overwriting + // variables at netdata.conf + appconfig_set_default(tmp_config, section, key, value); + + // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value); + } + else if(strcmp(optarg, "get") == 0) { + if(optind + 3 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W get 'section' 'key' 'value'\n\n" + " Prints settings of netdata.conf.\n" + "\n" + " These options interact with: -c netdata.conf\n" + " -c netdata.conf has to be given before -W get.\n" + "\n" + ); + return 1; + } + + if(!config_loaded) { + fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n"); + load_netdata_conf(NULL, 0, &user); + } + + get_netdata_configured_variables(); + + const char *section = argv[optind]; + const char *key = argv[optind + 1]; + const char *def = argv[optind + 2]; + const char *value = config_get(section, key, def); + printf("%s\n", value); + return 0; + } + else if(strcmp(optarg, "get2") == 0) { + if(optind + 4 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W get2 'conf_file' 'section' 'key' 'value'\n\n" + " Prints settings of netdata.conf or cloud.conf\n" + "\n" + " These options interact with: -c netdata.conf\n" + " -c netdata.conf has to be given before -W get2.\n" + " conf_file can be \"cloud\" or \"netdata\".\n" + "\n" + ); + return 1; + } + + if(!config_loaded) { + fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n"); + load_netdata_conf(NULL, 0, &user); + load_cloud_conf(1); + } + + get_netdata_configured_variables(); + + const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + const char *section = argv[optind + 1]; + const char *key = argv[optind + 2]; + const char *def = argv[optind + 3]; + const char *value = appconfig_get(tmp_config, section, key, def); + printf("%s\n", value); + return 0; + } + else if(strncmp(optarg, claim_string, strlen(claim_string)) == 0) { + /* will trigger a claiming attempt when the agent is initialized */ + claiming_pending_arguments = optarg + strlen(claim_string); + } + else if(strcmp(optarg, "buildinfo") == 0) { + print_build_info(); + return 0; + } + else if(strcmp(optarg, "buildinfojson") == 0) { + print_build_info_json(); + return 0; + } + else if(strcmp(optarg, "keepopenfds") == 0) { + // Internal dev option to skip closing inherited + // open FDs. Useful, when we want to run the agent + // under profiling tools that open/maintain their + // own FDs. + close_open_fds = false; + } else { + fprintf(stderr, "Unknown -W parameter '%s'\n", optarg); + return help(1); + } + } + break; + + default: /* ? */ + fprintf(stderr, "Unknown parameter '%c'\n", opt); + return help(1); + } + } + } + + if (close_open_fds == true) { + // close all open file descriptors, except the standard ones + // the caller may have left open files (lxc-attach has this issue) + for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR); + } + + if(!config_loaded) { + load_netdata_conf(NULL, 0, &user); + load_cloud_conf(0); + } + + // ------------------------------------------------------------------------ + // initialize netdata + { + char *pmax = config_get(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for plugins", "1"); + if(pmax && *pmax) + setenv("MALLOC_ARENA_MAX", pmax, 1); + +#if defined(HAVE_C_MALLOPT) + i = (int)config_get_number(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for netdata", 1); + if(i > 0) + mallopt(M_ARENA_MAX, 1); + + +#ifdef NETDATA_INTERNAL_CHECKS + mallopt(M_PERTURB, 0x5A); + // mallopt(M_MXFAST, 0); +#endif +#endif + + // set libuv worker threads + libuv_worker_threads = (int)get_netdata_cpus() * 6; + + if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) + libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; + + if(libuv_worker_threads > MAX_LIBUV_WORKER_THREADS) + libuv_worker_threads = MAX_LIBUV_WORKER_THREADS; + + + libuv_worker_threads = config_get_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads); + if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) { + libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; + config_set_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads); + } + + { + char buf[20 + 1]; + snprintfz(buf, sizeof(buf) - 1, "%d", libuv_worker_threads); + setenv("UV_THREADPOOL_SIZE", buf, 1); + } + + // prepare configuration environment variables for the plugins + get_netdata_configured_variables(); + set_global_environment(); + + // work while we are cd into config_dir + // to allow the plugins refer to their config + // files using relative filenames + if(chdir(netdata_configured_user_config_dir) == -1) + fatal("Cannot cd to '%s'", netdata_configured_user_config_dir); + + // Get execution path before switching user to avoid permission issues + get_netdata_execution_path(); + } + + { + // -------------------------------------------------------------------- + // get the debugging flags from the configuration file + + char *flags = config_get(CONFIG_SECTION_LOGS, "debug flags", "0x0000000000000000"); + setenv("NETDATA_DEBUG_FLAGS", flags, 1); + + debug_flags = strtoull(flags, NULL, 0); + netdata_log_debug(D_OPTIONS, "Debug flags set to '0x%" PRIX64 "'.", debug_flags); + + if(debug_flags != 0) { + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if(setrlimit(RLIMIT_CORE, &rl) != 0) + netdata_log_error("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); + +#ifdef HAVE_SYS_PRCTL_H + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif + } + + + // -------------------------------------------------------------------- + // get log filenames and settings + + log_init(); + nd_log_limits_unlimited(); + + // initialize the log files + nd_log_initialize(); + netdata_log_info("Netdata agent version '%s' is starting", NETDATA_VERSION); + + ieee754_doubles = is_system_ieee754_double(); + if(!ieee754_doubles) + globally_disabled_capabilities |= STREAM_CAP_IEEE754; + + aral_judy_init(); + + get_system_timezone(); + + bearer_tokens_init(); + + replication_initialize(); + + rrd_functions_inflight_init(); + + // -------------------------------------------------------------------- + // get the certificate and start security + +#ifdef ENABLE_HTTPS + security_init(); +#endif + + // -------------------------------------------------------------------- + // This is the safest place to start the SILENCERS structure + + health_set_silencers_filename(); + health_initialize_global_silencers(); + +// // -------------------------------------------------------------------- +// // Initialize ML configuration +// +// delta_startup_time("initialize ML"); +// ml_init(); + + // -------------------------------------------------------------------- + // setup process signals + + // block signals while initializing threads. + // this causes the threads to block signals. + + delta_startup_time("initialize signals"); + signals_block(); + signals_init(); // setup the signals we want to use + + // -------------------------------------------------------------------- + // check which threads are enabled and initialize them + + delta_startup_time("initialize static threads"); + + // setup threads configs + default_stacksize = netdata_threads_init(); + // musl default thread stack size is 128k, let's set it to a higher value to avoid random crashes + if (default_stacksize < 1 * 1024 * 1024) + default_stacksize = 1 * 1024 * 1024; + +#ifdef NETDATA_INTERNAL_CHECKS + config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring", true); + config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", true); +#endif + + if(config_get_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", false)) + // this has to run before starting any other threads that use workers + workers_utilization_enable(); + + for (i = 0; static_threads[i].name != NULL ; i++) { + struct netdata_static_thread *st = &static_threads[i]; + + if(st->config_name) + st->enabled = config_get_boolean(st->config_section, st->config_name, st->enabled); + + if(st->enabled && st->init_routine) + st->init_routine(); + + if(st->env_name) + setenv(st->env_name, st->enabled?"YES":"NO", 1); + + if(st->global_variable) + *st->global_variable = (st->enabled) ? true : false; + } + + // -------------------------------------------------------------------- + // create the listening sockets + + delta_startup_time("initialize web server"); + + web_client_api_v1_init(); + web_server_threading_selection(); + + if(web_server_mode != WEB_SERVER_MODE_NONE) { + if (!api_listen_sockets_setup()) { + netdata_log_error("Cannot setup listen port(s). Is Netdata already running?"); + exit(1); + } + } + if (sqlite_library_init()) + fatal("Failed to initialize sqlite library"); + + // -------------------------------------------------------------------- + // Initialize ML configuration + + delta_startup_time("initialize ML"); + ml_init(); + +#ifdef ENABLE_H2O + delta_startup_time("initialize h2o server"); + for (int t = 0; static_threads[t].name; t++) { + if (static_threads[t].start_routine == h2o_main) + static_threads[t].enabled = httpd_is_enabled(); + } +#endif + } + + delta_startup_time("set resource limits"); + +#ifdef NETDATA_INTERNAL_CHECKS + if(debug_flags != 0) { + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if(setrlimit(RLIMIT_CORE, &rl) != 0) + netdata_log_error("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); +#ifdef HAVE_SYS_PRCTL_H + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif + } +#endif /* NETDATA_INTERNAL_CHECKS */ + + set_nofile_limit(&rlimit_nofile); + + delta_startup_time("become daemon"); + +#if defined(OS_LINUX) || defined(OS_MACOS) || defined(OS_FREEBSD) + // fork, switch user, create pid file, set process priority + if(become_daemon(dont_fork, user) == -1) + fatal("Cannot daemonize myself."); +#else + (void)dont_fork; +#endif + + watcher_thread_start(); + + // init sentry +#ifdef ENABLE_SENTRY + nd_sentry_init(); +#endif + + // The "HOME" env var points to the root's home dir because Netdata starts as root. Can't use "HOME". + struct passwd *pw = getpwuid(getuid()); + if (config_exists(CONFIG_SECTION_DIRECTORIES, "home") || !pw || !pw->pw_dir) { + netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", netdata_configured_home_dir); + } else { + netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", pw->pw_dir); + } + + setenv("HOME", netdata_configured_home_dir, 1); + + dyncfg_init(true); + + netdata_log_info("netdata started on pid %d.", getpid()); + + delta_startup_time("initialize threads after fork"); + + netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize)); + + // initialize internal registry + delta_startup_time("initialize registry"); + registry_init(); + + // fork the spawn server + delta_startup_time("fork the spawn server"); + spawn_init(); + + /* + * Libuv uv_spawn() uses SIGCHLD internally: + * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485 + * and inadvertently replaces the netdata signal handler which was setup during initialization. + * Thusly, we must explicitly restore the signal handler for SIGCHLD. + * Warning: extreme care is needed when mixing and matching POSIX and libuv. + */ + signals_restore_SIGCHLD(); + + // ------------------------------------------------------------------------ + // initialize rrd, registry, health, rrdpush, etc. + + delta_startup_time("collecting system info"); + + netdata_anonymous_statistics_enabled=-1; + struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info)); + __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED); + get_system_info(system_info); + + const char *guid = registry_get_this_machine_guid(); +#ifdef ENABLE_SENTRY + nd_sentry_set_user(guid); +#else + UNUSED(guid); +#endif + + system_info->hops = 0; + get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist); + + delta_startup_time("initialize RRD structures"); + + if(rrd_init(netdata_configured_hostname, system_info, false)) { + set_late_global_environment(system_info); + fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname); + } + + delta_startup_time("check for incomplete shutdown"); + + char agent_crash_file[FILENAME_MAX + 1]; + char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; + snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); + int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0); + snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); + int crash_detected = (unlink(agent_crash_file) == 0); + int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 444); + if (fd >= 0) + close(fd); + + + // ------------------------------------------------------------------------ + // Claim netdata agent to a cloud endpoint + + delta_startup_time("collect claiming info"); + + if (claiming_pending_arguments) + claim_agent(claiming_pending_arguments, false, NULL); + load_claiming_state(); + + // ------------------------------------------------------------------------ + // enable log flood protection + + nd_log_limits_reset(); + + // Load host labels + delta_startup_time("collect host labels"); + reload_host_labels(); + + // ------------------------------------------------------------------------ + // spawn the threads + + delta_startup_time("start the static threads"); + + web_server_config_options(); + + set_late_global_environment(system_info); + for (i = 0; static_threads[i].name != NULL ; i++) { + struct netdata_static_thread *st = &static_threads[i]; + + if(st->enabled) { + netdata_log_debug(D_SYSTEM, "Starting thread %s.", st->name); + st->thread = nd_thread_create(st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st); + } + else + netdata_log_debug(D_SYSTEM, "Not starting thread %s.", st->name); + } + ml_start_threads(); + + // ------------------------------------------------------------------------ + // Initialize netdata agent command serving from cli and signals + + delta_startup_time("initialize commands API"); + + commands_init(); + + delta_startup_time("ready"); + + usec_t ready_ut = now_monotonic_usec(); + netdata_log_info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS); + netdata_ready = true; + + analytics_statistic_t start_statistic = { "START", "-", "-" }; + analytics_statistic_send(&start_statistic); + if (crash_detected) { + analytics_statistic_t crash_statistic = { "CRASH", "-", "-" }; + analytics_statistic_send(&crash_statistic); + } + if (incomplete_shutdown_detected) { + analytics_statistic_t incomplete_shutdown_statistic = { "INCOMPLETE_SHUTDOWN", "-", "-" }; + analytics_statistic_send(&incomplete_shutdown_statistic); + } + + //check if ANALYTICS needs to start + if (netdata_anonymous_statistics_enabled == 1) { + for (i = 0; static_threads[i].name != NULL; i++) { + if (!strncmp(static_threads[i].name, "ANALYTICS", 9)) { + struct netdata_static_thread *st = &static_threads[i]; + st->enabled = 1; + netdata_log_debug(D_SYSTEM, "Starting thread %s.", st->name); + st->thread = nd_thread_create(st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st); + } + } + } + + // ------------------------------------------------------------------------ + // Report ACLK build failure +#ifndef ENABLE_ACLK + netdata_log_error("This agent doesn't have ACLK."); + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir); + if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized + analytics_statistic_t statistic = { "ACLK_DISABLED", "-", "-" }; + analytics_statistic_send(&statistic); + + int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444); + if (fd == -1) + netdata_log_error("Cannot create file '%s'. Please fix this.", filename); + else + close(fd); + } +#endif + + // ------------------------------------------------------------------------ + // initialize WebRTC + + webrtc_initialize(); + + // ------------------------------------------------------------------------ + // unblock signals + + signals_unblock(); + + // ------------------------------------------------------------------------ + // Handle signals + + signals_handle(); + + // should never reach this point + // but we need it for rpmlint #2752 + return 1; +} diff --git a/src/daemon/main.h b/src/daemon/main.h new file mode 100644 index 000000000..faf7d5b69 --- /dev/null +++ b/src/daemon/main.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_MAIN_H +#define NETDATA_MAIN_H 1 + +#include "common.h" + +extern struct config netdata_config; + +void cancel_main_threads(void); +int killpid(pid_t pid); + +typedef enum { + ABILITY_DATA_QUERIES = (1 << 0), + ABILITY_WEB_REQUESTS = (1 << 1), + ABILITY_STREAMING_CONNECTIONS = (1 << 2), + SERVICE_MAINTENANCE = (1 << 3), + SERVICE_COLLECTORS = (1 << 4), + SERVICE_REPLICATION = (1 << 5), + SERVICE_WEB_SERVER = (1 << 6), + SERVICE_ACLK = (1 << 7), + SERVICE_HEALTH = (1 << 8), + SERVICE_STREAMING = (1 << 9), + SERVICE_CONTEXT = (1 << 10), + SERVICE_ANALYTICS = (1 << 11), + SERVICE_EXPORTERS = (1 << 12), + SERVICE_ACLKSYNC = (1 << 13), + SERVICE_HTTPD = (1 << 14) +} SERVICE_TYPE; + +typedef enum { + SERVICE_THREAD_TYPE_NETDATA, + SERVICE_THREAD_TYPE_LIBUV, + SERVICE_THREAD_TYPE_EVENT_LOOP, +} SERVICE_THREAD_TYPE; + +typedef void (*force_quit_t)(void *data); +typedef void (*request_quit_t)(void *data); + +void service_exits(void); +bool service_running(SERVICE_TYPE service); +struct service_thread *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused); + +#endif /* NETDATA_MAIN_H */ diff --git a/src/daemon/metrics.csv b/src/daemon/metrics.csv new file mode 100644 index 000000000..4aa71a364 --- /dev/null +++ b/src/daemon/metrics.csv @@ -0,0 +1,254 @@ +metric,scope,dimensions,unit,description,chart_type,labels,plugin,module +netdata.aclk_cloud_req,,"received, malformed",req/s,Requests received from cloud,stacked,,netdata,stats +netdata.aclk_cloud_req_http_type,,"other, info, data, alarms, alarm_log, chart, charts, function, functions",req/s,Requests received from cloud via HTTP by their type,stacked,,netdata,stats +netdata.aclk_mqtt_tx_perc,,used,%,"Actively used percentage of MQTT Tx Buffer,",line,,netdata,stats +netdata.aclk_mqtt_tx_queue,,"usable, reclaimable, used, free, size",B,State of transmit MQTT queue.,line,,netdata,stats +netdata.aclk_mqtt_wss_time,,"keep-alive, socket_read_ssl, socket_write_ssl, process_websocket, process_mqtt",us,"Time spent handling MQTT, WSS, SSL and network communication.",stacked,,netdata,stats +netdata.aclk_openssl_bytes,,"sent, received",B/s,Received and Sent bytes.,stacked,,netdata,stats +netdata.aclk_processed_query_type,,"unknown, http_api_request_v2, register_node, node_state_update, chart_and_dim_update, chart_config_updated, reset_chart_messages, update_retention_info, update_node_info, alarm_checkpoint, provide_alarm_config, alarm_snapshot, update_node_collectors, generic_binary_proto_message",cmd/s,Query thread commands processed by their type,stacked,,netdata,stats +netdata.aclk_protobuf_rx_types,,"cmd, CreateNodeInstanceResult, SendNodeInstances, StreamChartsAndDimensions, ChartsAndDimensionsAck, UpdateChartConfigs, StartAlarmStreaming, SendAlarmCheckpoint, SendAlarmConfiguration, SendAlarmSnapshot, DisconnectReq, ContextsCheckpoint, StopStreamingContexts, CancelPendingRequest",msg/s,Received new cloud architecture messages by their type.,stacked,,netdata,stats +netdata.aclk_query_per_second,,"added, dispatched",queries/s,ACLK Queries per second,area,,netdata,stats +netdata.aclk_query_threads,,"Query 0, Query 1, Query 2, Query 3, Query 4, Query 5",req/s,Queries Processed Per Thread,stacked,,netdata,stats +netdata.aclk_query_time,,"avg, max, total",us,Time it took to process cloud requested DB queries,line,,netdata,stats +netdata.aclk_status,,online,connected,ACLK/Cloud connection status,line,,netdata,stats +netdata.apps_children_fix,,"cutime, cstime, cgtime, cminflt, cmajflt",percentage,Apps Plugin Exited Children Normalization Ratios,line,,apps.plugin, +netdata.apps_cpu,,"user, system",milliseconds/s,Apps Plugin CPU,stacked,,apps.plugin, +netdata.apps_fix,,"utime, stime, gtime, minflt, majflt",percentage,Apps Plugin Normalization Ratios,line,,apps.plugin, +netdata.apps_sizes,,"calls, files, filenames, inode_changes, link_changes, pids, fds, targets, new_pids",files/s,Apps Plugin Files,line,,apps.plugin, +netdata.clients,,clients,connected clients,Netdata Web Clients,line,,netdata,stats +netdata.compression_ratio,,savings,percentage,Netdata API Responses Compression Savings Ratio,line,,netdata,stats +netdata.db_points_read,,"/api/v1/data, /api/v1/weights, /api/v1/badge, health, ml, exporters, backfill, replication",points/s,Netdata DB Points Query Read,stacked,,netdata,stats +netdata.db_points_results,,"/api/v1/data, /api/v1/weights, /api/v1/badge, health, ml, replication",points/s,Netdata Points in Query Results,stacked,,netdata,stats +netdata.db_points_stored,,"tier0, tier1, tier2",points/s,Netdata DB Points Stored,stacked,,netdata,stats +netdata.dbengine_buffers,,"pgc, mrg, opcodes, query handles, descriptors, wal, workers, pdc, pd, extent io, extent buffers, epdl, deol",bytes,Netdata DB Buffers,stacked,,netdata,stats +netdata.dbengine_cache_hit_ratio,,"overall, main cache, extent cache, extent merge",%,Netdata Queries Cache Hit Ratio,line,,netdata,stats +netdata.dbengine_compression_ratio,,savings,percentage,Netdata DB engine data extents' compression savings ratio,line,,netdata,stats +netdata.dbengine_events,,"journal v2 mapped, journal v2 unmapped, datafile creation, datafile deletion, datafile deletion spin, journal v2 indexing, retention",events/s,Netdata Database Events,line,,netdata,stats +netdata.dbengine_extent_cache_events,,"evictions aggressive, evictions critical, flushes critical",events/s,Netdata extent Cache Events,area,,netdata,stats +netdata.dbengine_extent_cache_hit_ratio,,"closest, exact",%,Netdata extent Cache Hit Ratio,line,,netdata,stats +netdata.dbengine_extent_cache_memory,,"free, hot, dirty, clean, index, evicting, flushing",bytes,Netdata extent Cache Memory,stacked,,netdata,stats +netdata.dbengine_extent_cache_memory_changes,,"new clean, evictions, new hot",bytes/s,Netdata extent Cache Memory Changes,area,,netdata,stats +netdata.dbengine_extent_cache_memory_migrations,,"dirty to clean, hot to dirty",bytes/s,Netdata extent Cache Memory Migrations,area,,netdata,stats +netdata.dbengine_extent_cache_operations,,"search closest, search exact, add hot, add clean, evictions, flushes, acquires, releases, del acquires",ops/s,Netdata extent Cache Operations,line,,netdata,stats +netdata.dbengine_extent_cache_pages,,"clean, hot, dirty, referenced",pages,Netdata extent Cache Pages,line,,netdata,stats +netdata.dbengine_extent_cache_workers,,"searchers, adders, evictors, flushers, hot2dirty, jv2 flushers",workers,Netdata extent Cache Workers,line,,netdata,stats +netdata.dbengine_extent_target_memory,,"current, wanted, referenced, hot max, dirty max, hot, dirty",bytes,Netdata extent Target Cache Memory,line,,netdata,stats +netdata.dbengine_extent_waste_events,,"evictions skipped, flushes cancelled, acquire spins, release spins, insert spins, delete spins, evict spins, flush spins",events/s,Netdata extent Waste Events,line,,netdata,stats +netdata.dbengine_global_errors,,"io_errors, fs_errors, pg_cache_over_half_dirty_events",errors/s,Netdata DB engine errors,line,,netdata,stats +netdata.dbengine_global_file_descriptors,,"current, max",descriptors,Netdata DB engine File Descriptors,line,,netdata,stats +netdata.dbengine_io_operations,,"reads, writes",operations/s,Netdata DB engine I/O operations,line,,netdata,stats +netdata.dbengine_io_throughput,,"reads, writes",MiB/s,Netdata DB engine I/O throughput,line,,netdata,stats +netdata.dbengine_main_cache_events,,"evictions aggressive, evictions critical, flushes critical",events/s,Netdata main Cache Events,area,,netdata,stats +netdata.dbengine_main_cache_hit_ratio,,"closest, exact",%,Netdata main Cache Hit Ratio,line,,netdata,stats +netdata.dbengine_main_cache_memory,,"free, hot, dirty, clean, index, evicting, flushing",bytes,Netdata main Cache Memory,stacked,,netdata,stats +netdata.dbengine_main_cache_memory_changes,,"new clean, evictions, new hot",bytes/s,Netdata main Cache Memory Changes,area,,netdata,stats +netdata.dbengine_main_cache_memory_migrations,,"dirty to clean, hot to dirty",bytes/s,Netdata main Cache Memory Migrations,area,,netdata,stats +netdata.dbengine_main_cache_operations,,"search closest, search exact, add hot, add clean, evictions, flushes, acquires, releases, del acquires",ops/s,Netdata main Cache Operations,line,,netdata,stats +netdata.dbengine_main_cache_pages,,"clean, hot, dirty, referenced",pages,Netdata main Cache Pages,line,,netdata,stats +netdata.dbengine_main_cache_workers,,"searchers, adders, evictors, flushers, hot2dirty, jv2 flushers",workers,Netdata main Cache Workers,line,,netdata,stats +netdata.dbengine_main_target_memory,,"current, wanted, referenced, hot max, dirty max, hot, dirty",bytes,Netdata main Target Cache Memory,line,,netdata,stats +netdata.dbengine_main_waste_events,,"evictions skipped, flushes cancelled, acquire spins, release spins, insert spins, delete spins, evict spins, flush spins",events/s,Netdata main Waste Events,line,,netdata,stats +netdata.dbengine_memory,,"main cache, open cache, extent cache, metrics registry, buffers",bytes,Netdata DB Memory,stacked,,netdata,stats +netdata.dbengine_metrics,,"all, acquired, collected, with retention, without retention, multi-collected",metrics,Netdata Metrics in Metrics Registry,line,,netdata,stats +netdata.dbengine_metrics_registry_operations,,"add, delete, search",metrics,Netdata Metrics Registry Operations,line,,netdata,stats +netdata.dbengine_metrics_registry_references,,references,references,Netdata Metrics Registry References,line,,netdata,stats +netdata.dbengine_open_cache_events,,"evictions aggressive, evictions critical, flushes critical",events/s,Netdata open Cache Events,area,,netdata,stats +netdata.dbengine_open_cache_hit_ratio,,"closest, exact",%,Netdata open Cache Hit Ratio,line,,netdata,stats +netdata.dbengine_open_cache_memory,,"free, hot, dirty, clean, index, evicting, flushing",bytes,Netdata open Cache Memory,stacked,,netdata,stats +netdata.dbengine_open_cache_memory_changes,,"new clean, evictions, new hot",bytes/s,Netdata open Cache Memory Changes,area,,netdata,stats +netdata.dbengine_open_cache_memory_migrations,,"dirty to clean, hot to dirty",bytes/s,Netdata open Cache Memory Migrations,area,,netdata,stats +netdata.dbengine_open_cache_operations,,"search closest, search exact, add hot, add clean, evictions, flushes, acquires, releases, del acquires",ops/s,Netdata open Cache Operations,line,,netdata,stats +netdata.dbengine_open_cache_pages,,"clean, hot, dirty, referenced",pages,Netdata open Cache Pages,line,,netdata,stats +netdata.dbengine_open_cache_workers,,"searchers, adders, evictors, flushers, hot2dirty, jv2 flushers",workers,Netdata open Cache Workers,line,,netdata,stats +netdata.dbengine_open_target_memory,,"current, wanted, referenced, hot max, dirty max, hot, dirty",bytes,Netdata open Target Cache Memory,line,,netdata,stats +netdata.dbengine_open_waste_events,,"evictions skipped, flushes cancelled, acquire spins, release spins, insert spins, delete spins, evict spins, flush spins",events/s,Netdata open Waste Events,line,,netdata,stats +netdata.dbengine_prep_timings,,"routing, main cache, open cache, journal v2, pass4",usec/s,Netdata Query Preparation Timings,stacked,,netdata,stats +netdata.dbengine_queries,,"total, open cache, journal v2, planned with gaps, executed with gaps",queries/s,Netdata Queries,line,,netdata,stats +netdata.dbengine_queries_running,,queries,queries,Netdata Queries Running,line,,netdata,stats +netdata.dbengine_query_next_page,,"pass4, failed slow, failed fast, loaded slow, loaded fast",pages/s,Netdata Query Next Page,stacked,,netdata,stats +netdata.dbengine_query_next_page_issues,,"zero timestamp, invalid size, past time, overlapping, update every fixed, entries fixed",pages/s,Netdata Query Next Page Issues,stacked,,netdata,stats +netdata.dbengine_query_pages_data_source,,"main cache, disk, extent cache",pages/s,Netdata Query Pages to Data Source,stacked,,netdata,stats +netdata.dbengine_query_pages_disk_load,,"ok compressed, fail invalid page, ok uncompressed, fail cant mmap, fail unavailable, fail unroutable, fail not found, fail invalid extent, extent merged, cancelled",pages/s,Netdata Query Pages Loaded from Disk,line,,netdata,stats +netdata.dbengine_query_pages_metadata_source,,"cache hit, journal v2 scan, open journal",pages/s,Netdata Query Pages Metadata Source,stacked,,netdata,stats +netdata.dbengine_query_timings,,"init, prep wait, next page disk fast, next page disk slow, next page preload fast, next page preload slow",usec/s,Netdata Query Timings,stacked,,netdata,stats +netdata.dictionaries.category.callbacks,category,"inserts, deletes, conflicts, reacts",callbacks/s,Dictionary Callbacks,line,category,netdata,stats +netdata.dictionaries.category.dictionaries,category,"active, deleted",dictionaries,Dictionaries,line,category,netdata,stats +netdata.dictionaries.category.items,category,"active, deleted, referenced",items,Dictionary Items,line,category,netdata,stats +netdata.dictionaries.category.memory,category,"index, data, structures",bytes,Dictionary Memory,stacked,category,netdata,stats +netdata.dictionaries.category.ops,category,"creations, destructions, flushes, traversals, walkthroughs, garbage_collections, searches, inserts, resets, deletes",ops/s,Dictionary Operations,line,category,netdata,stats +netdata.dictionaries.category.spins,category,"use, search, insert, delete",count,Dictionary Spins,line,category,netdata,stats +netdata.ebpf_hash_tables_count,,hash_table,hash tables,Number of hash tables loaded.,line,,ebpf.plugin,process +netdata.ebpf_hash_tables_per_core,,"per_core, unique",threads,How threads are loading hash/array tables.,line,,ebpf.plugin,process +netdata.ebpf_kernel_memory,,memory_locked,bytes,Memory allocated for hash tables.,line,,ebpf.plugin,process +netdata.ebpf_load_methods,,"legacy, co-re",methods,Load info.,line,,ebpf.plugin,process +netdata.ebpf_threads,,"total, running",threads,Threads info.,line,,ebpf.plugin,process +netdata.go_plugin_execution_time,,time,ms,Execution time,line,,go.d,logind +netdata.heartbeat,,"min, max, average",microseconds,System clock jitter,area,,netdata,stats +netdata.machine_learning_status,,"enabled, disabled-sp",dimensions,Machine learning status,line,,ml.plugin,training +netdata.memory,,"db, collectors, hosts, rrdset rrddim, contexts, health, functions, labels, strings, streaming, replication, buffers, workers, aral, judy, other",bytes,Netdata Memory,stacked,,netdata,stats +netdata.memory_buffers,,"queries, collection, aclk, api, functions, sqlite, exporters, health, streaming, streaming cbuf, replication, web, aral, judy",bytes,Netdata Memory Buffers,stacked,,netdata,stats +netdata.metric_types,,"constant, variable",dimensions,Dimensions by metric type,line,,ml.plugin,training +netdata.ml_models_consulted,,num_models_consulted,models,KMeans models used for prediction,area,,ml.plugin,detection +netdata.net,,"in, out",kilobits/s,Netdata Network Traffic,area,,netdata,stats +netdata.private_charts,,charts,charts,Private metric charts created by the netdata statsd server,area,,statsd.plugin,stats +netdata.queries,,"/api/v1/data, /api/v1/weights, /api/v1/badge, health, ml, exporters, backfill, replication",queries/s,Netdata DB Queries,stacked,,netdata,stats +netdata.queue_stats,,"queue_size, popped_items",items,Training queue stats,line,,ml.plugin,training +netdata.requests,,requests,requests/s,Netdata Web Requests,line,,netdata,stats +netdata.response_time,,"average, max",milliseconds/request,Netdata API Response Time,line,,netdata,stats +netdata.server_cpu,,"user, system",milliseconds/s,Netdata CPU usage,stacked,,netdata,stats +netdata.sqlite3_context_cache,,"cache_hit, cache_miss, cache_spill, cache_write",ops/s,Netdata SQLite3 context cache,line,,netdata,stats +netdata.sqlite3_metatada_cache,,"cache_hit, cache_miss, cache_spill, cache_write",ops/s,Netdata SQLite3 metadata cache,line,,netdata,stats +netdata.sqlite3_queries,,queries,queries/s,Netdata SQLite3 Queries,line,,netdata,stats +netdata.sqlite3_queries_by_status,,"ok, failed, busy, locked",queries/s,Netdata SQLite3 Queries by status,line,,netdata,stats +netdata.sqlite3_rows,,ok,rows/s,Netdata SQLite3 Rows,line,,netdata,stats +netdata.statsd_bytes,,"tcp, udp",kilobits/s,Bytes read by the netdata statsd server,stacked,,statsd.plugin,stats +netdata.statsd_events,,"gauges, counters, timers, meters, histograms, sets, dictionaries, unknown, errors",events/s,Events processed by the netdata statsd server,stacked,,statsd.plugin,stats +netdata.statsd_metrics,,"gauges, counters, timers, meters, histograms, sets, dictionaries",metrics,Metrics in the netdata statsd database,stacked,,statsd.plugin,stats +netdata.statsd_packets,,"tcp, udp",packets/s,Network packets processed by the netdata statsd server,stacked,,statsd.plugin,stats +netdata.statsd_reads,,"tcp, udp",reads/s,Read operations made by the netdata statsd server,stacked,,statsd.plugin,stats +netdata.statsd_useful_metrics,,"gauges, counters, timers, meters, histograms, sets, dictionaries",metrics,Useful metrics in the netdata statsd database,stacked,,statsd.plugin,stats +netdata.strings_entries,,"entries, references",entries,Strings entries,area,,netdata,stats +netdata.strings_memory,,memory,bytes,Strings memory,area,,netdata,stats +netdata.strings_ops,,"inserts, deletes, searches, duplications, releases",ops/s,Strings operations,line,,netdata,stats +netdata.tcp_connected,,connected,sockets,statsd server TCP connected sockets,line,,statsd.plugin,stats +netdata.tcp_connects,,"connects, disconnects",events,statsd server TCP connects and disconnects,line,,statsd.plugin,stats +netdata.training_results,,"ok, invalid-queries, not-enough-values, null-acquired-dimensions, chart-under-replication",events,Training results,line,,ml.plugin,training +netdata.training_status,,"untrained, pending-without-model, trained, pending-with-model, silenced",dimensions,Training status of dimensions,line,,ml.plugin,training +netdata.training_time_stats,,"allotted, consumed, remaining",milliseconds,Training time stats,line,,ml.plugin,training +netdata.uptime,,uptime,seconds,Netdata uptime,line,,netdata,stats +netdata.workers.aclkquery.cpu,,"min, max, average",%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.aclkquery.jobs_started_by_type,,"http_api_request_v2, register_node, node_state_update, chart_and_dim_update, chart_config_updated, reset_chart_messages, update_retention_info, update_node_info, alarm_checkpoint, provide_alarm_config, alarm_snapshot, update_node_collectors, generic_binary_proto_message",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.aclkquery.threads,,"free, busy",threads,Netdata Workers Threads,stacked,,netdata,stats +netdata.workers.aclkquery.time,,"min, max, average",%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.aclkquery.time_by_type,,"http_api_request_v2, register_node, node_state_update, chart_and_dim_update, chart_config_updated, reset_chart_messages, update_retention_info, update_node_info, alarm_checkpoint, provide_alarm_config, alarm_snapshot, update_node_collectors, generic_binary_proto_message",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.aclksync.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.aclksync.jobs_started_by_type,,"noop, cleanup, node delete, node state, alert push, alert conf push, alert snapshot, alert checkpoint, alerts check, timer",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.aclksync.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.aclksync.time_by_type,,"noop, cleanup, node delete, node state, alert push, alert conf push, alert snapshot, alert checkpoint, alerts check, timer",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.cgroups.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.cgroups.jobs_started_by_type,,"lock, read, chart",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.cgroups.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.cgroups.time_by_type,,"lock, read, chart",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.cgroupsdisc.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.cgroupsdisc.jobs_started_by_type,,"init, find, process, rename, network, new, update, cleanup, copy, share, lock",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.cgroupsdisc.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.cgroupsdisc.time_by_type,,"init, find, process, rename, network, new, update, cleanup, copy, share, lock",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.cpu_total,,"stats, health, mltrain, mldetect, streamsnd, dbengine, libuv, web, aclksync, metasync, pluginsd, statsd, statsdflush, proc, netdev, cgroups, cgroupsdisc, diskspace, tc, timex, idlejitter, rrdcontext, replication, service, aclkquery, streamrcv",%,Netdata Workers CPU Utilization,stacked,,netdata,stats +netdata.workers.dbengine.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.dbengine.jobs_started_by_type,,"noop, query, extent write, extent read, flushed to open, db rotate, journal index, flush init, evict init, ctx shutdown, ctx quiesce, get opcode, query cb, extent write cb, extent read cb, flushed to open cb, db rotate cb, journal index cb, flush init cb, evict init cb, ctx shutdown cb, ctx quiesce cb, timer, transaction buffer flush cb",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.dbengine.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.dbengine.time_by_type,,"noop, query, extent write, extent read, flushed to open, db rotate, journal index, flush init, evict init, ctx shutdown, ctx quiesce, get opcode, query cb, extent write cb, extent read cb, flushed to open cb, db rotate cb, journal index cb, flush init cb, evict init cb, ctx shutdown cb, ctx quiesce cb, timer, transaction buffer flush cb",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.dbengine.value.opcodes_waiting,,"min, max, average",opcodes,Netdata Workers dbengine value of opcodes waiting,line,,netdata,stats +netdata.workers.dbengine.value.works_dispatched,,"min, max, average",works,Netdata Workers dbengine value of works dispatched,line,,netdata,stats +netdata.workers.dbengine.value.works_executing,,"min, max, average",works,Netdata Workers dbengine value of works executing,line,,netdata,stats +netdata.workers.diskspace.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.diskspace.jobs_started_by_type,,"mountinfo, mountpoint, cleanup",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.diskspace.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.diskspace.time_by_type,,"mountinfo, mountpoint, cleanup",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.health.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.health.jobs_started_by_type,,"rrd lock, host lock, db lookup, calc eval, warning eval, critical eval, alarm log entry, alarm log process, rrdset init, rrddim init",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.health.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.health.time_by_type,,"rrd lock, host lock, db lookup, calc eval, warning eval, critical eval, alarm log entry, alarm log process, rrdset init, rrddim init",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.idlejitter.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.idlejitter.jobs_started_by_type,,measurements,jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.idlejitter.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.idlejitter.time_by_type,,measurements,ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.libuv.cpu,,"min, max, average",%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.libuv.jobs_started_by_type,,"worker init, query, extent cache, extent mmap, extent decompression, page lookup, page populate, page allocate, flush main, extent write, flushed to open, jv2 index wait, jv2 indexing, datafile delete wait, datafile deletion, find rotated metrics, find remaining retention, update retention, evict main, dbengine buffers cleanup, dbengine quiesce, dbengine shutdown, metadata load host context, metadata store host, metadata cleanup, schedule command",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.libuv.threads,,"free, busy",threads,Netdata Workers Threads,stacked,,netdata,stats +netdata.workers.libuv.time,,"min, max, average",%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.libuv.time_by_type,,"worker init, query, extent cache, extent mmap, extent decompression, page lookup, page populate, page allocate, flush main, extent write, flushed to open, jv2 index wait, jv2 indexing, datafile delete wait, datafile deletion, find rotated metrics, find remaining retention, update retention, evict main, dbengine buffers cleanup, dbengine quiesce, dbengine shutdown, metadata load host context, metadata store host, metadata cleanup, schedule command",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.metasync.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.metasync.jobs_started_by_type,,"noop, timer, delete dimension, add claim id, add host info, maintenance, ml load models",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.metasync.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.metasync.time_by_type,,"noop, timer, delete dimension, add claim id, add host info, maintenance, ml load models",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.mldetect.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.mldetect.jobs_started_by_type,,"collect stats, dim chart, host chart, training stats",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.mldetect.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.mldetect.time_by_type,,"collect stats, dim chart, host chart, training stats",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.mltrain.cpu,,"min, max, average",%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.mltrain.jobs_started_by_type,,"pop queue, acquire, query, kmeans, update models, release, update host, flush models",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.mltrain.threads,,"free, busy",threads,Netdata Workers Threads,stacked,,netdata,stats +netdata.workers.mltrain.time,,"min, max, average",%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.mltrain.time_by_type,,"pop queue, acquire, query, kmeans, update models, release, update host, flush models",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.netdev.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.netdev.jobs_started_by_type,,netdev,jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.netdev.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.netdev.time_by_type,,netdev,ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.pluginsd.cpu,,"min, max, average",%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.pluginsd.jobs_started_by_type,,"FLUSH, DISABLE, HOST_DEFINE, HOST_DEFINE_END, HOST_LABEL, HOST, EXIT, CHART, DIMENSION, VARIABLE, LABEL, OVERWRITE, CLABEL_COMMIT, CLABEL, FUNCTION, FUNCTION_RESULT_BEGIN, BEGIN, SET, END",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.pluginsd.threads,,"free, busy",threads,Netdata Workers Threads,stacked,,netdata,stats +netdata.workers.pluginsd.time,,"min, max, average",%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.pluginsd.time_by_type,,"FLUSH, DISABLE, HOST_DEFINE, HOST_DEFINE_END, HOST_LABEL, HOST, EXIT, CHART, DIMENSION, VARIABLE, LABEL, OVERWRITE, CLABEL_COMMIT, CLABEL, FUNCTION, FUNCTION_RESULT_BEGIN, BEGIN, SET, END",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.proc.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.proc.jobs_started_by_type,,"stat, uptime, loadavg, entropy, pressure, interrupts, softirqs, vmstat, meminfo, ksm, zram, ecc, numa, pagetypeinfo, netwireless, sockstat, sockstat6, netstat, sctp, softnet, ipvs, infiniband, conntrack, synproxy, diskstats, mdstat, nfsd, nfs, zfs_arcstats, zfs_pool_state, btrfs, ipc, power_supply",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.proc.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.proc.time_by_type,,"stat, uptime, loadavg, entropy, pressure, interrupts, softirqs, vmstat, meminfo, ksm, zram, ecc, numa, pagetypeinfo, netwireless, sockstat, sockstat6, netstat, sctp, softnet, ipvs, infiniband, conntrack, synproxy, diskstats, mdstat, nfsd, nfs, zfs_arcstats, zfs_pool_state, btrfs, ipc, power_supply",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.replication.cpu,,"min, max, average",%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.replication.jobs_started_by_type,,"find next, querying, dict delete, find chart, prepare query, check consistency, commit, cleanup, wait, statistics",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.replication.rate.added_requests,,"min, max, average",requests/s,Netdata Workers replication rate of added requests,line,,netdata,stats +netdata.workers.replication.rate.finished_requests,,"min, max, average",requests/s,Netdata Workers replication rate of finished requests,line,,netdata,stats +netdata.workers.replication.rate.sender_resets,,"min, max, average",resets/s,Netdata Workers replication rate of sender resets,line,,netdata,stats +netdata.workers.replication.threads,,"free, busy",threads,Netdata Workers Threads,stacked,,netdata,stats +netdata.workers.replication.time,,"min, max, average",%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.replication.time_by_type,,"find next, querying, dict delete, find chart, prepare query, check consistency, commit, cleanup, wait, statistics",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.replication.value.completion,,"min, max, average",%,Netdata Workers replication value of completion,line,,netdata,stats +netdata.workers.replication.value.no_room_requests,,"min, max, average",requests,Netdata Workers replication value of no room requests,line,,netdata,stats +netdata.workers.replication.value.pending_requests,,"min, max, average",requests,Netdata Workers replication value of pending requests,line,,netdata,stats +netdata.workers.replication.value.senders_full,,"min, max, average",senders,Netdata Workers replication value of senders full,line,,netdata,stats +netdata.workers.rrdcontext.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.rrdcontext.jobs_started_by_type,,"hosts, dedup checks, sent contexts, deduplicated contexts, metrics retention, queued contexts, cleanups, deletes, check metrics, check instances, check contexts",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.rrdcontext.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.rrdcontext.time_by_type,,"hosts, dedup checks, sent contexts, deduplicated contexts, metrics retention, queued contexts, cleanups, deletes, check metrics, check instances, check contexts",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.rrdcontext.value.hub_queue_size,,"min, max, average",contexts,Netdata Workers rrdcontext value of hub queue size,line,,netdata,stats +netdata.workers.rrdcontext.value.post_processing_queue_size,,"min, max, average",contexts,Netdata Workers rrdcontext value of post processing queue size,line,,netdata,stats +netdata.workers.service.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.service.jobs_started_by_type,,"child chart obsoletion check, cleanup obsolete charts, archive chart, archive chart dimensions, archive dimension, cleanup orphan hosts, cleanup obsolete charts on all hosts, free host, save host charts, delete host charts, free chart, save chart, delete chart, free dimension, main cache evictions, main cache flushes, open cache evictions, open cache flushes",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.service.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.service.time_by_type,,"child chart obsoletion check, cleanup obsolete charts, archive chart, archive chart dimensions, archive dimension, cleanup orphan hosts, cleanup obsolete charts on all hosts, free host, save host charts, delete host charts, free chart, save chart, delete chart, free dimension, main cache evictions, main cache flushes, open cache evictions, open cache flushes",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.stats.cpu,,"min, max, average",%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.stats.jobs_started_by_type,,"global, registry, workers, dbengine, strings, dictionaries, malloc_trace, sqlite3",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.stats.threads,,"free, busy",threads,Netdata Workers Threads,stacked,,netdata,stats +netdata.workers.stats.time,,"min, max, average",%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.stats.time_by_type,,"global, registry, workers, dbengine, strings, dictionaries, malloc_trace, sqlite3",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.statsd.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.statsd.jobs_started_by_type,,"tcp connect, tcp disconnect, receive, send",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.statsd.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.statsd.time_by_type,,"tcp connect, tcp disconnect, receive, send",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.statsdflush.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.statsdflush.jobs_started_by_type,,"gauges, counters, meters, timers, histograms, sets, dictionaries, statistics",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.statsdflush.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.statsdflush.time_by_type,,"gauges, counters, meters, timers, histograms, sets, dictionaries, statistics",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.streamrcv.cpu,,"average, min, max",%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.streamrcv.jobs_started_by_type,,"CHART, DIMENSION, VARIABLE, LABEL, OVERWRITE, CLABEL_COMMIT, CLABEL, FUNCTION, FUNCTION_RESULT_BEGIN, BEGIN, SET, END, CHART_DEFINITION_END, RBEGIN, RSET, RDSTATE, RSSTATE, REND, BEGIN2, SET2, END2, CLAIMED_ID",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.streamrcv.rate.received_bytes,,"min, max, average",bytes/s,Netdata Workers streamrcv rate of received bytes,line,,netdata,stats +netdata.workers.streamrcv.rate.uncompressed_bytes,,"min, max, average",bytes/s,Netdata Workers streamrcv rate of uncompressed bytes,line,,netdata,stats +netdata.workers.streamrcv.threads,,"free, busy",threads,Netdata Workers Threads,stacked,,netdata,stats +netdata.workers.streamrcv.time,,"average, min, max",%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.streamrcv.time_by_type,,"CHART, DIMENSION, VARIABLE, LABEL, OVERWRITE, CLABEL_COMMIT, CLABEL, FUNCTION, FUNCTION_RESULT_BEGIN, BEGIN, SET, END, CHART_DEFINITION_END, RBEGIN, RSET, RDSTATE, RSSTATE, REND, BEGIN2, SET2, END2, CLAIMED_ID",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.streamrcv.value.replication_completion,,"min, max, average",%,Netdata Workers streamrcv value of replication completion,line,,netdata,stats +netdata.workers.streamsnd.cpu,,"average, min, max",%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.streamsnd.jobs_started_by_type,,"connect, pipe read, receive, execute, send, disconnect bad handshake, disconnect overflow, disconnect timeout, disconnect poll error, disconnect socket error, disconnect ssl error, disconnect parent closed, disconnect receive error, disconnect send error, disconnect no compression, replay request, function",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.streamsnd.rate.bytes_received,,"min, max, average",bytes/s,Netdata Workers streamsnd rate of bytes received,line,,netdata,stats +netdata.workers.streamsnd.rate.bytes_sent,,"min, max, average",bytes/s,Netdata Workers streamsnd rate of bytes sent,line,,netdata,stats +netdata.workers.streamsnd.threads,,"free, busy",threads,Netdata Workers Threads,stacked,,netdata,stats +netdata.workers.streamsnd.time,,"average, min, max",%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.streamsnd.time_by_type,,"connect, pipe read, receive, execute, send, disconnect bad handshake, disconnect overflow, disconnect timeout, disconnect poll error, disconnect socket error, disconnect ssl error, disconnect parent closed, disconnect receive error, disconnect send error, disconnect no compression, replay request, function",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.streamsnd.value.replication_dict_entries,,"min, max, average",entries,Netdata Workers streamsnd value of replication dict entries,line,,netdata,stats +netdata.workers.streamsnd.value.used_buffer_ratio,,"min, max, average",%,Netdata Workers streamsnd value of used buffer ratio,line,,netdata,stats +netdata.workers.tc.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.tc.jobs_started_by_type,,"class, begin, end, sent, lended, tokens, devicename, devicegroup, classname, worktime",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.tc.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.tc.time_by_type,,"class, begin, end, sent, lended, tokens, devicename, devicegroup, classname, worktime",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.tc.value.number_of_classes,,"min, max, average",classes,Netdata Workers tc value of number of classes,line,,netdata,stats +netdata.workers.tc.value.number_of_devices,,"min, max, average",devices,Netdata Workers tc value of number of devices,line,,netdata,stats +netdata.workers.tc.value.tc_script_execution_time,,"min, max, average",milliseconds/run,Netdata Workers tc value of tc script execution time,line,,netdata,stats +netdata.workers.timex.cpu,,average,%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.timex.jobs_started_by_type,,clock check,jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.timex.time,,average,%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.timex.time_by_type,,clock check,ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats +netdata.workers.web.cpu,,"min, max, average",%,Netdata Workers CPU Utilization,area,,netdata,stats +netdata.workers.web.jobs_started_by_type,,"connect, disconnect, file start, file end, file read, file write, receive, send, process",jobs,Netdata Workers Jobs Started by Type,stacked,,netdata,stats +netdata.workers.web.threads,,"free, busy",threads,Netdata Workers Threads,stacked,,netdata,stats +netdata.workers.web.time,,"min, max, average",%,Netdata Workers Busy Time,area,,netdata,stats +netdata.workers.web.time_by_type,,"connect, disconnect, file start, file end, file read, file write, receive, send, process",ms,Netdata Workers Busy Time by Type,stacked,,netdata,stats diff --git a/src/daemon/pipename.c b/src/daemon/pipename.c new file mode 100644 index 000000000..70b6a25b4 --- /dev/null +++ b/src/daemon/pipename.c @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "pipename.h" + +#include + +const char *daemon_pipename(void) { + const char *pipename = getenv("NETDATA_PIPENAME"); + if (pipename) + return pipename; + +#ifdef _WIN32 + return "\\\\?\\pipe\\netdata-cli"; +#else + return "/tmp/netdata-ipc"; +#endif +} diff --git a/src/daemon/pipename.h b/src/daemon/pipename.h new file mode 100644 index 000000000..6ca6e8d08 --- /dev/null +++ b/src/daemon/pipename.h @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef DAEMON_PIPENAME_H +#define DAEMON_PIPENAME_H + +const char *daemon_pipename(void); + +#endif /* DAEMON_PIPENAME_H */ diff --git a/src/daemon/sentry-native/sentry-native.c b/src/daemon/sentry-native/sentry-native.c new file mode 100644 index 000000000..9e6930e55 --- /dev/null +++ b/src/daemon/sentry-native/sentry-native.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sentry-native.h" +#include "daemon/common.h" + +#include "sentry.h" + +static bool sentry_telemetry_disabled(void) +{ + char path[FILENAME_MAX + 1]; + sprintf(path, "%s/%s", netdata_configured_user_config_dir, ".opt-out-from-anonymous-statistics"); + + struct stat buffer; + bool opt_out_file_exists = (stat(path, &buffer) == 0); + + if (opt_out_file_exists) + return true; + + return getenv("DISABLE_TELEMETRY") != NULL; +} + +void nd_sentry_init(void) +{ + if (sentry_telemetry_disabled()) + return; + + // path where sentry should save stuff + char path[FILENAME_MAX]; + snprintfz(path, FILENAME_MAX - 1, "%s/%s", netdata_configured_cache_dir, ".sentry-native"); + + sentry_options_t *options = sentry_options_new(); + sentry_options_set_dsn(options, NETDATA_SENTRY_DSN); + sentry_options_set_database_path(options, path); + sentry_options_set_environment(options, NETDATA_SENTRY_ENVIRONMENT); + + char release[64]; + snprintfz(release, 64 - 1, "%s.%s.%s", + NETDATA_VERSION_MINOR, NETDATA_VERSION_PATCH, NETDATA_VERSION_TWEAK); + sentry_options_set_release(options, release); + + sentry_options_set_dist(options, NETDATA_SENTRY_DIST); +#ifdef NETDATA_INTERNAL_CHECKS + sentry_options_set_debug(options, 1); +#endif + + sentry_init(options); +} + +void nd_sentry_fini(void) +{ + if (sentry_telemetry_disabled()) + return; + + sentry_close(); +} + +void nd_sentry_set_user(const char *guid) +{ + sentry_value_t user = sentry_value_new_object(); + sentry_value_set_by_key(user, "id", sentry_value_new_string(guid)); + sentry_set_user(user); +} diff --git a/src/daemon/sentry-native/sentry-native.h b/src/daemon/sentry-native/sentry-native.h new file mode 100644 index 000000000..81f909d9f --- /dev/null +++ b/src/daemon/sentry-native/sentry-native.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ND_SENTRY_H +#define ND_SENTRY_H + +void nd_sentry_init(void); +void nd_sentry_fini(void); + +void nd_sentry_set_user(const char *guid); + +#endif /* ND_SENTRY_H */ diff --git a/src/daemon/service.c b/src/daemon/service.c new file mode 100644 index 000000000..ead633445 --- /dev/null +++ b/src/daemon/service.c @@ -0,0 +1,328 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +/* Run service jobs every X seconds */ +#define SERVICE_HEARTBEAT 10 + +#define TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT (3600 / 2) +#define ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT 60 + +#define WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK 1 +#define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS 2 +#define WORKER_JOB_ARCHIVE_CHART 3 +#define WORKER_JOB_ARCHIVE_CHART_DIMENSIONS 4 +#define WORKER_JOB_ARCHIVE_DIMENSION 5 +#define WORKER_JOB_CLEANUP_ORPHAN_HOSTS 6 +#define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS 7 +#define WORKER_JOB_FREE_HOST 9 +#define WORKER_JOB_FREE_CHART 12 +#define WORKER_JOB_FREE_DIMENSION 15 +#define WORKER_JOB_PGC_MAIN_EVICT 16 +#define WORKER_JOB_PGC_MAIN_FLUSH 17 +#define WORKER_JOB_PGC_OPEN_EVICT 18 +#define WORKER_JOB_PGC_OPEN_FLUSH 19 + +static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { + RRDSET *st = rd->rrdset; + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED) || !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) + return; + + worker_is_busy(WORKER_JOB_ARCHIVE_DIMENSION); + + rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED); + rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); + + if (rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + /* only a collector can mark a chart as obsolete, so we must remove the reference */ + if (!rrddim_finalize_collection_and_check_retention(rd)) { + /* This metric has no data and no references */ + metaqueue_delete_dimension_uuid(&rd->metric_uuid); + } + else { + /* Do not delete this dimension */ + return; + } + } + + worker_is_busy(WORKER_JOB_FREE_DIMENSION); + rrddim_free(st, rd); +} + +static inline bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) { + if(!all_dimensions && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) + return true; + + worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS); + + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + + RRDDIM *rd; + time_t now = now_realtime_sec(); + + size_t dim_candidates = 0; + size_t dim_archives = 0; + + dfe_start_write(st->rrddim_root_index, rd) { + bool candidate = (all_dimensions || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)); + + if(candidate) { + dim_candidates++; + + if(rd->collector.last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now) { + size_t references = dictionary_acquired_item_references(rd_dfe.item); + if(references == 1) { +// netdata_log_info("Removing obsolete dimension 'host:%s/chart:%s/dim:%s'", +// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); + svc_rrddim_obsolete_to_archive(rd); + dim_archives++; + } +// else +// netdata_log_info("Cannot remove obsolete dimension 'host:%s/chart:%s/dim:%s'", +// rrdhost_hostname(st->rrdhost), rrdset_id(st), rrddim_id(rd)); + } + } + } + dfe_done(rd); + + if(dim_archives != dim_candidates) { + rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + return false; + } + + return true; +} + +static void svc_rrdset_obsolete_to_free(RRDSET *st) { + if(!svc_rrdset_archive_obsolete_dimensions(st, true)) + return; + + worker_is_busy(WORKER_JOB_FREE_CHART); + + rrdcalc_unlink_and_delete_all_rrdset_alerts(st); + + // has to be run after all dimensions are archived - or use-after-free will occur + rrdvar_delete_all(st->rrdvars); + + rrdset_free(st); +} + +static inline void svc_rrdhost_cleanup_charts_marked_obsolete(RRDHOST *host) { + if(!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) + return; + + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS); + + rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + + size_t full_candidates = 0; + size_t full_archives = 0; + size_t partial_candidates = 0; + size_t partial_archives = 0; + + time_t now = now_realtime_sec(); + RRDSET *st; + rrdset_foreach_reentrant(st, host) { + if(rrdset_is_replicating(st)) + continue; + + RRDSET_FLAGS flags = rrdset_flag_get(st); + bool obsolete_chart = flags & RRDSET_FLAG_OBSOLETE; + bool obsolete_dims = flags & RRDSET_FLAG_OBSOLETE_DIMENSIONS; + + if(obsolete_dims) { + partial_candidates++; + + if(svc_rrdset_archive_obsolete_dimensions(st, false)) + partial_archives++; + } + + if(obsolete_chart) { + full_candidates++; + + if(unlikely( st->last_accessed_time_s + rrdset_free_obsolete_time_s < now + && st->last_updated.tv_sec + rrdset_free_obsolete_time_s < now + && st->last_collected_time.tv_sec + rrdset_free_obsolete_time_s < now + )) { + svc_rrdset_obsolete_to_free(st); + full_archives++; + } + } + } + rrdset_foreach_done(st); + + if(partial_archives != partial_candidates) + rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + + if(full_archives != full_candidates) + rrdhost_flag_set(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS); +} + +static void svc_rrdhost_detect_obsolete_charts(RRDHOST *host) { + worker_is_busy(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK); + + time_t now = now_realtime_sec(); + time_t last_entry_t; + RRDSET *st; + + time_t child_connect_time = host->child_connect_time; + + rrdset_foreach_read(st, host) { + if(rrdset_is_replicating(st)) + continue; + + last_entry_t = rrdset_last_entry_s(st); + + if (last_entry_t && last_entry_t < child_connect_time && + child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + + (ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every) < + now) + + rrdset_is_obsolete___safe_from_collector_thread(st); + } + rrdset_foreach_done(st); +} + +static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS); + + rrd_rdlock(); + + RRDHOST *host; + rrdhost_foreach_read(host) { + + if (!service_running(SERVICE_MAINTENANCE)) + break; + + if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host)) + continue; + + svc_rrdhost_cleanup_charts_marked_obsolete(host); + + if (host == localhost) + continue; + + netdata_mutex_lock(&host->receiver_lock); + + time_t now = now_realtime_sec(); + + if (host->trigger_chart_obsoletion_check && + ((host->child_last_chart_command && + host->child_last_chart_command + host->health.health_delay_up_to < now) || + (host->child_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now))) { + svc_rrdhost_detect_obsolete_charts(host); + host->trigger_chart_obsoletion_check = 0; + } + + netdata_mutex_unlock(&host->receiver_lock); + } + + rrd_rdunlock(); +} + +static void svc_rrdhost_cleanup_orphan_hosts(RRDHOST *protected_host) { + worker_is_busy(WORKER_JOB_CLEANUP_ORPHAN_HOSTS); + rrd_wrlock(); + + time_t now = now_realtime_sec(); + + RRDHOST *host; + +restart_after_removal: + rrdhost_foreach_write(host) { + if(!rrdhost_should_be_removed(host, protected_host, now)) + continue; + + bool force = false; + if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST) && now - host->last_connected > rrdhost_free_ephemeral_time_s) + force = true; + + bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED); + if (!force && is_archived) + continue; + + if (force) { + netdata_log_info("Host '%s' with machine guid '%s' is archived, ephemeral clean up.", rrdhost_hostname(host), host->machine_guid); + } + + worker_is_busy(WORKER_JOB_FREE_HOST); +#ifdef ENABLE_ACLK + // in case we have cloud connection we inform cloud + // a child disconnected + if (netdata_cloud_enabled && force) { + aclk_host_state_update(host, 0, 0); + unregister_node(host->machine_guid); + } +#endif + rrdhost_free___while_having_rrd_wrlock(host, force); + goto restart_after_removal; + } + + rrd_wrunlock(); +} + +static void service_main_cleanup(void *pptr) +{ + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + netdata_log_debug(D_SYSTEM, "Cleaning up..."); + worker_unregister(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +/* + * The service thread. + */ +void *service_main(void *ptr) +{ + worker_register("SERVICE"); + worker_register_job_name(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK, "child chart obsoletion check"); + worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS, "cleanup obsolete charts"); + worker_register_job_name(WORKER_JOB_ARCHIVE_CHART, "archive chart"); + worker_register_job_name(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS, "archive chart dimensions"); + worker_register_job_name(WORKER_JOB_ARCHIVE_DIMENSION, "archive dimension"); + worker_register_job_name(WORKER_JOB_CLEANUP_ORPHAN_HOSTS, "cleanup orphan hosts"); + worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS, "cleanup obsolete charts on all hosts"); + worker_register_job_name(WORKER_JOB_FREE_HOST, "free host"); + worker_register_job_name(WORKER_JOB_FREE_CHART, "free chart"); + worker_register_job_name(WORKER_JOB_FREE_DIMENSION, "free dimension"); + worker_register_job_name(WORKER_JOB_PGC_MAIN_EVICT, "main cache evictions"); + worker_register_job_name(WORKER_JOB_PGC_MAIN_FLUSH, "main cache flushes"); + worker_register_job_name(WORKER_JOB_PGC_OPEN_EVICT, "open cache evictions"); + worker_register_job_name(WORKER_JOB_PGC_OPEN_FLUSH, "open cache flushes"); + + CLEANUP_FUNCTION_REGISTER(service_main_cleanup) cleanup_ptr = ptr; + + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = USEC_PER_SEC * SERVICE_HEARTBEAT; + usec_t real_step = USEC_PER_SEC; + + netdata_log_debug(D_SYSTEM, "Service thread starts"); + + while (service_running(SERVICE_MAINTENANCE)) { + worker_is_idle(); + heartbeat_next(&hb, USEC_PER_SEC); + if (real_step < step) { + real_step += USEC_PER_SEC; + continue; + } + real_step = USEC_PER_SEC; + +#ifdef ENABLE_DBENGINE + dbengine_retention_statistics(); +#endif + + svc_rrd_cleanup_obsolete_charts_from_all_hosts(); + + if (service_running(SERVICE_MAINTENANCE)) + svc_rrdhost_cleanup_orphan_hosts(localhost); + } + + return NULL; +} diff --git a/src/daemon/signals.c b/src/daemon/signals.c new file mode 100644 index 000000000..c014452b7 --- /dev/null +++ b/src/daemon/signals.c @@ -0,0 +1,244 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +typedef enum signal_action { + NETDATA_SIGNAL_END_OF_LIST, + NETDATA_SIGNAL_IGNORE, + NETDATA_SIGNAL_EXIT_CLEANLY, + NETDATA_SIGNAL_REOPEN_LOGS, + NETDATA_SIGNAL_RELOAD_HEALTH, + NETDATA_SIGNAL_FATAL, + NETDATA_SIGNAL_CHILD, +} SIGNAL_ACTION; + +static struct { + int signo; // the signal + const char *name; // the name of the signal + size_t count; // the number of signals received + SIGNAL_ACTION action; // the action to take +} signals_waiting[] = { + { SIGPIPE, "SIGPIPE", 0, NETDATA_SIGNAL_IGNORE }, + { SIGINT , "SIGINT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, + { SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, + { SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, + { SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS }, + { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH }, + { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL }, + { SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD }, + + // terminator + { 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST } +}; + +static void signal_handler(int signo) { + // find the entry in the list + int i; + for(i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST ; i++) { + if(unlikely(signals_waiting[i].signo == signo)) { + signals_waiting[i].count++; + + if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) { + char buffer[200 + 1]; + snprintfz(buffer, sizeof(buffer) - 1, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name); + if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) { + // nothing to do - we cannot write but there is no way to complain about it + ; + } + } + + return; + } + } +} + +void signals_block(void) { + sigset_t sigset; + sigfillset(&sigset); + + if(pthread_sigmask(SIG_BLOCK, &sigset, NULL) == -1) + netdata_log_error("SIGNAL: Could not block signals for threads"); +} + +void signals_unblock(void) { + sigset_t sigset; + sigfillset(&sigset); + + if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) { + netdata_log_error("SIGNAL: Could not unblock signals for threads"); + } +} + +void signals_init(void) { + // Catch signals which we want to use + struct sigaction sa; + sa.sa_flags = 0; + + // ignore all signals while we run in a signal handler + sigfillset(&sa.sa_mask); + + int i; + for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { + switch (signals_waiting[i].action) { + case NETDATA_SIGNAL_IGNORE: + sa.sa_handler = SIG_IGN; + break; + default: + sa.sa_handler = signal_handler; + break; + } + + if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1) + netdata_log_error("SIGNAL: Failed to change signal handler for: %s", signals_waiting[i].name); + } +} + +void signals_restore_SIGCHLD(void) +{ + struct sigaction sa; + + sa.sa_flags = 0; + sigfillset(&sa.sa_mask); + sa.sa_handler = signal_handler; + + if(sigaction(SIGCHLD, &sa, NULL) == -1) + netdata_log_error("SIGNAL: Failed to change signal handler for: SIGCHLD"); +} + +void signals_reset(void) { + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_handler = SIG_DFL; + sa.sa_flags = 0; + + int i; + for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { + if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1) + netdata_log_error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name); + } +} + +// reap_child reaps the child identified by pid. +static void reap_child(pid_t pid) { + siginfo_t i; + + errno = 0; + netdata_log_debug(D_CHILDS, "SIGNAL: reap_child(%d)...", pid); + if (netdata_waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) { + if (errno != ECHILD) + netdata_log_error("SIGNAL: waitid(%d): failed to wait for child", pid); + else + netdata_log_info("SIGNAL: waitid(%d): failed - it seems the child is already reaped", pid); + return; + } + else if (i.si_pid == 0) { + // Process didn't exit, this shouldn't happen. + netdata_log_error("SIGNAL: waitid(%d): reports pid 0 - child has not exited", pid); + return; + } + + switch (i.si_code) { + case CLD_EXITED: + netdata_log_info("SIGNAL: reap_child(%d) exited with code: %d", pid, i.si_status); + break; + case CLD_KILLED: + netdata_log_info("SIGNAL: reap_child(%d) killed by signal: %d", pid, i.si_status); + break; + case CLD_DUMPED: + netdata_log_info("SIGNAL: reap_child(%d) dumped core by signal: %d", pid, i.si_status); + break; + case CLD_STOPPED: + netdata_log_info("SIGNAL: reap_child(%d) stopped by signal: %d", pid, i.si_status); + break; + case CLD_TRAPPED: + netdata_log_info("SIGNAL: reap_child(%d) trapped by signal: %d", pid, i.si_status); + break; + case CLD_CONTINUED: + netdata_log_info("SIGNAL: reap_child(%d) continued by signal: %d", pid, i.si_status); + break; + default: + netdata_log_info("SIGNAL: reap_child(%d) gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status); + break; + } +} + +// reap_children reaps all pending children which are not managed by myp. +static void reap_children() { + siginfo_t i; + + while(1) { + i.si_pid = 0; + if (netdata_waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1 || i.si_pid == 0) + // nothing to do + return; + + reap_child(i.si_pid); + } +} + +void signals_handle(void) { + while(1) { + + // pause() causes the calling process (or thread) to sleep until a signal + // is delivered that either terminates the process or causes the invocation + // of a signal-catching function. + if(pause() == -1 && errno == EINTR) { + + // loop once, but keep looping while signals are coming in + // this is needed because a few operations may take some time + // so we need to check for new signals before pausing again + int found = 1; + while(found) { + found = 0; + + // execute the actions of the signals + int i; + for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { + if (signals_waiting[i].count) { + found = 1; + signals_waiting[i].count = 0; + const char *name = signals_waiting[i].name; + + switch (signals_waiting[i].action) { + case NETDATA_SIGNAL_RELOAD_HEALTH: + nd_log_limits_unlimited(); + netdata_log_info("SIGNAL: Received %s. Reloading HEALTH configuration...", name); + nd_log_limits_reset(); + execute_command(CMD_RELOAD_HEALTH, NULL, NULL); + break; + + case NETDATA_SIGNAL_REOPEN_LOGS: + nd_log_limits_unlimited(); + netdata_log_info("SIGNAL: Received %s. Reopening all log files...", name); + nd_log_limits_reset(); + execute_command(CMD_REOPEN_LOGS, NULL, NULL); + break; + + case NETDATA_SIGNAL_EXIT_CLEANLY: + nd_log_limits_unlimited(); + netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name); + commands_exit(); + netdata_cleanup_and_exit(0, NULL, NULL, NULL); + exit(0); + break; + + case NETDATA_SIGNAL_FATAL: + fatal("SIGNAL: Received %s. netdata now exits.", name); + break; + + case NETDATA_SIGNAL_CHILD: + reap_children(); + break; + + default: + netdata_log_info("SIGNAL: Received %s. No signal handler configured. Ignoring it.", name); + break; + } + } + } + } + } + else + netdata_log_error("SIGNAL: pause() returned but it was not interrupted by a signal."); + } +} diff --git a/src/daemon/signals.h b/src/daemon/signals.h new file mode 100644 index 000000000..12b1ed198 --- /dev/null +++ b/src/daemon/signals.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SIGNALS_H +#define NETDATA_SIGNALS_H 1 + +void signals_init(void); +void signals_block(void); +void signals_unblock(void); +void signals_restore_SIGCHLD(void); +void signals_reset(void); +void signals_handle(void) NORETURN; + +#endif //NETDATA_SIGNALS_H diff --git a/src/daemon/static_threads.c b/src/daemon/static_threads.c new file mode 100644 index 000000000..4199e9306 --- /dev/null +++ b/src/daemon/static_threads.c @@ -0,0 +1,226 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *aclk_main(void *ptr); +void *analytics_main(void *ptr); +void *cpuidlejitter_main(void *ptr); +void *global_statistics_main(void *ptr); +void *global_statistics_workers_main(void *ptr); +void *global_statistics_sqlite3_main(void *ptr); +void *health_main(void *ptr); +void *pluginsd_main(void *ptr); +void *service_main(void *ptr); +void *statsd_main(void *ptr); +void *profile_main(void *ptr); +void *replication_thread_main(void *ptr); + +extern bool global_statistics_enabled; + +const struct netdata_static_thread static_threads_common[] = { + { + .name = "P[idlejitter]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "idlejitter", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = cpuidlejitter_main + }, + { + .name = "HEALTH", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = health_main + }, + { + .name = "ANALYTICS", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = analytics_main + }, + { + .name = "STATS_GLOBAL", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring", + .env_name = "NETDATA_INTERNALS_MONITORING", + .global_variable = &global_statistics_enabled, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = global_statistics_main + }, + { + .name = "STATS_WORKERS", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring extended", + .env_name = "NETDATA_INTERNALS_EXTENDED_MONITORING", + .global_variable = &global_statistics_enabled, + .enabled = 0, // this is ignored - check main() for "netdata monitoring extended" + .thread = NULL, + .init_routine = NULL, + .start_routine = global_statistics_workers_main + }, + { + .name = "STATS_SQLITE3", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring extended", + .env_name = "NETDATA_INTERNALS_EXTENDED_MONITORING", + .global_variable = &global_statistics_enabled, + .enabled = 0, // this is ignored - check main() for "netdata monitoring extended" + .thread = NULL, + .init_routine = NULL, + .start_routine = global_statistics_sqlite3_main + }, + { + .name = "PLUGINSD", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = pluginsd_main + }, + { + .name = "SERVICE", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = service_main + }, + { + .name = "STATSD_FLUSH", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = statsd_main + }, +#ifndef OS_WINDOWS + // this crashes the debugger under windows + { + .name = "EXPORTING", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = exporting_main + }, +#endif + { + .name = "SNDR[localhost]", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = rrdpush_sender_thread + }, + { + .name = "WEB[1]", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = socket_listen_main_static_threaded + }, + +#ifdef ENABLE_H2O + { + .name = "h2o", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = h2o_main + }, +#endif + +#ifdef ENABLE_ACLK + { + .name = "ACLK_MAIN", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = aclk_main + }, +#endif + + { + .name = "RRDCONTEXT", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = rrdcontext_main + }, + + { + .name = "REPLAY[1]", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = replication_thread_main + }, + { + .name = "P[PROFILE]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "profile", + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = profile_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +struct netdata_static_thread * +static_threads_concat(const struct netdata_static_thread *lhs, + const struct netdata_static_thread *rhs) +{ + struct netdata_static_thread *res; + + int lhs_size = 0; + for (; lhs[lhs_size].name; lhs_size++) {} + + int rhs_size = 0; + for (; rhs[rhs_size].name; rhs_size++) {} + + res = callocz(lhs_size + rhs_size + 1, sizeof(struct netdata_static_thread)); + + for (int i = 0; i != lhs_size; i++) + memcpy(&res[i], &lhs[i], sizeof(struct netdata_static_thread)); + + for (int i = 0; i != rhs_size; i++) + memcpy(&res[lhs_size + i], &rhs[i], sizeof(struct netdata_static_thread)); + + return res; +} diff --git a/src/daemon/static_threads.h b/src/daemon/static_threads.h new file mode 100644 index 000000000..9c9be7620 --- /dev/null +++ b/src/daemon/static_threads.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STATIC_THREADS_H +#define NETDATA_STATIC_THREADS_H + +#include "common.h" + +extern const struct netdata_static_thread static_threads_common[]; + +struct netdata_static_thread * +static_threads_concat(const struct netdata_static_thread *lhs, + const struct netdata_static_thread *rhs); + +struct netdata_static_thread *static_threads_get(); + +#endif /* NETDATA_STATIC_THREADS_H */ diff --git a/src/daemon/static_threads_freebsd.c b/src/daemon/static_threads_freebsd.c new file mode 100644 index 000000000..1bb671a68 --- /dev/null +++ b/src/daemon/static_threads_freebsd.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *freebsd_main(void *ptr); +void *timex_main(void *ptr); + +static const struct netdata_static_thread static_threads_freebsd[] = { + { + .name = "P[freebsd]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "freebsd", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = freebsd_main + }, + { + .name = "P[timex]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "timex", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = timex_main + }, + + {NULL, NULL, NULL, 0, NULL, NULL, NULL} +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_freebsd); +} diff --git a/src/daemon/static_threads_linux.c b/src/daemon/static_threads_linux.c new file mode 100644 index 000000000..1efd63755 --- /dev/null +++ b/src/daemon/static_threads_linux.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *cgroups_main(void *ptr); +void *proc_main(void *ptr); +void *diskspace_main(void *ptr); +void *tc_main(void *ptr); +void *timex_main(void *ptr); + +static const struct netdata_static_thread static_threads_linux[] = { + { + .name = "P[tc]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "tc", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = tc_main + }, + { + .name = "P[diskspace]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "diskspace", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = diskspace_main + }, + { + .name = "P[proc]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "proc", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = proc_main + }, + { + .name = "P[cgroups]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "cgroups", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = cgroups_main + }, + { + .name = "P[timex]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "timex", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = timex_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_linux); +} diff --git a/src/daemon/static_threads_macos.c b/src/daemon/static_threads_macos.c new file mode 100644 index 000000000..3b417c0b2 --- /dev/null +++ b/src/daemon/static_threads_macos.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *macos_main(void *ptr); +void *timex_main(void *ptr); + +static const struct netdata_static_thread static_threads_macos[] = { + { + .name = "P[timex]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "timex", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = timex_main + }, + { + .name = "P[macos]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "macos", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = macos_main, + .env_name = NULL, + .global_variable = NULL, + }, + + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_macos); +} diff --git a/src/daemon/static_threads_windows.c b/src/daemon/static_threads_windows.c new file mode 100644 index 000000000..57c47be0c --- /dev/null +++ b/src/daemon/static_threads_windows.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *win_plugin_main(void *ptr); + +static const struct netdata_static_thread static_threads_windows[] = { + { + .name = "P[windows]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "windows", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = win_plugin_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_windows); +} diff --git a/src/daemon/system-info.sh b/src/daemon/system-info.sh new file mode 100755 index 000000000..aaca7fd4b --- /dev/null +++ b/src/daemon/system-info.sh @@ -0,0 +1,522 @@ +#!/usr/bin/env sh + +# ------------------------------------------------------------------------------------------------- +# detect the kernel + +KERNEL_NAME="$(uname -s)" +KERNEL_VERSION="$(uname -r)" +ARCHITECTURE="$(uname -m)" + +# ------------------------------------------------------------------------------------------------- +# detect the virtualization and possibly the container technology + +# systemd-detect-virt: https://github.com/systemd/systemd/blob/df423851fcc05cf02281d11aab6aee7b476c1c3b/src/basic/virt.c#L999 +# lscpu: https://github.com/util-linux/util-linux/blob/581b77da7aa4a5205902857184d555bed367e3e0/sys-utils/lscpu.c#L52 +virtualization_normalize_name() { + vname="$1" + case "$vname" in + "User-mode Linux") vname="uml" ;; + "Windows Subsystem for Linux") vname="wsl" ;; + esac + + echo "$vname" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g' +} + +CONTAINER="unknown" +CONT_DETECTION="none" +CONTAINER_IS_OFFICIAL_IMAGE="${NETDATA_OFFICIAL_IMAGE:-false}" + +if [ -z "${VIRTUALIZATION}" ]; then + VIRTUALIZATION="unknown" + VIRT_DETECTION="none" + + if command -v systemd-detect-virt >/dev/null 2>&1; then + VIRTUALIZATION="$(systemd-detect-virt -v)" + VIRT_DETECTION="systemd-detect-virt" + CONTAINER_DETECT_TMP="$(systemd-detect-virt -c)" + [ -n "$CONTAINER_DETECT_TMP" ] && CONTAINER="$CONTAINER_DETECT_TMP" + CONT_DETECTION="systemd-detect-virt" + elif command -v lscpu >/dev/null 2>&1; then + VIRTUALIZATION=$(lscpu | grep "Hypervisor vendor:" | cut -d: -f 2 | awk '{$1=$1};1') + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="lscpu" + [ -z "$VIRTUALIZATION" ] && lscpu | grep -q "Virtualization:" && VIRTUALIZATION="none" + elif command -v dmidecode >/dev/null 2>&1; then + VIRTUALIZATION=$(dmidecode -s system-product-name 2>/dev/null | grep "VMware\|Virtual\|KVM\|Bochs") + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="dmidecode" + fi + + if [ -z "${VIRTUALIZATION}" ] || [ "$VIRTUALIZATION" = "unknown" ]; then + if [ "${KERNEL_NAME}" = "FreeBSD" ]; then + VIRTUALIZATION=$(sysctl kern.vm_guest 2>/dev/null | cut -d: -f 2 | awk '{$1=$1};1') + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="sysctl" + fi + fi + + if [ -z "${VIRTUALIZATION}" ]; then + # Output from the command is outside of spec + VIRTUALIZATION="unknown" + VIRT_DETECTION="none" + elif [ "$VIRTUALIZATION" != "none" ] && [ "$VIRTUALIZATION" != "unknown" ]; then + VIRTUALIZATION=$(virtualization_normalize_name "$VIRTUALIZATION") + fi +else + # Passed from outside - probably in docker run + VIRT_DETECTION="provided" +fi + +# ------------------------------------------------------------------------------------------------- +# detect containers with heuristics + +if [ "${CONTAINER}" = "unknown" ]; then + if [ -f /proc/1/sched ]; then + IFS='(, ' read -r process _ < /proc/1/sched + if [ "${process}" = "netdata" ]; then + CONTAINER="container" + CONT_DETECTION="process" + fi + fi + # ubuntu and debian supply /bin/running-in-container + # https://www.apt-browse.org/browse/ubuntu/trusty/main/i386/upstart/1.12.1-0ubuntu4/file/bin/running-in-container + if /bin/running-in-container > /dev/null 2>&1; then + CONTAINER="container" + CONT_DETECTION="/bin/running-in-container" + fi + + # lxc sets environment variable 'container' + #shellcheck disable=SC2154 + if [ -n "${container}" ]; then + CONTAINER="lxc" + CONT_DETECTION="containerenv" + fi + + # docker creates /.dockerenv + # http://stackoverflow.com/a/25518345 + if [ -f "/.dockerenv" ]; then + CONTAINER="docker" + CONT_DETECTION="dockerenv" + fi + + if [ -n "${KUBERNETES_SERVICE_HOST}" ]; then + CONTAINER="container" + CONT_DETECTION="kubernetes" + fi + + if [ "${KERNEL_NAME}" = FreeBSD ] && command -v sysctl && sysctl security.jail.jailed 2>/dev/null | grep -q "1$"; then + CONTAINER="jail" + CONT_DETECTION="sysctl" + fi +fi + +# ------------------------------------------------------------------------------------------------- +# detect the operating system + +# Initially assume all OS detection values are for a container, these are moved later if we are bare-metal + +CONTAINER_OS_DETECTION="unknown" +CONTAINER_NAME="unknown" +CONTAINER_VERSION="unknown" +CONTAINER_VERSION_ID="unknown" +CONTAINER_ID="unknown" +CONTAINER_ID_LIKE="unknown" + +if [ "${KERNEL_NAME}" = "Darwin" ]; then + CONTAINER_ID=$(sw_vers -productName) + CONTAINER_ID_LIKE="mac" + CONTAINER_NAME="mac" + CONTAINER_VERSION=$(sw_vers -productVersion) + CONTAINER_OS_DETECTION="sw_vers" +elif [ "${KERNEL_NAME}" = "FreeBSD" ]; then + CONTAINER_ID="FreeBSD" + CONTAINER_ID_LIKE="FreeBSD" + CONTAINER_NAME="FreeBSD" + CONTAINER_OS_DETECTION="uname" + CONTAINER_VERSION=$(uname -r) + KERNEL_VERSION=$(uname -K) +else + if [ -f "/etc/os-release" ]; then + eval "$(grep -E "^(NAME|ID|ID_LIKE|VERSION|VERSION_ID)=" < /etc/os-release | sed 's/^/CONTAINER_/')" + CONTAINER_OS_DETECTION="/etc/os-release" + fi + + # shellcheck disable=SC2153 + if [ "${CONTAINER_NAME}" = "unknown" ] || [ "${CONTAINER_VERSION}" = "unknown" ] || [ "${CONTAINER_ID}" = "unknown" ]; then + if [ -f "/etc/lsb-release" ]; then + if [ "${CONTAINER_OS_DETECTION}" = "unknown" ]; then + CONTAINER_OS_DETECTION="/etc/lsb-release" + else + CONTAINER_OS_DETECTION="Mixed" + fi + DISTRIB_ID="unknown" + DISTRIB_RELEASE="unknown" + DISTRIB_CODENAME="unknown" + eval "$(grep -E "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=" < /etc/lsb-release)" + if [ "${CONTAINER_NAME}" = "unknown" ]; then CONTAINER_NAME="${DISTRIB_ID}"; fi + if [ "${CONTAINER_VERSION}" = "unknown" ]; then CONTAINER_VERSION="${DISTRIB_RELEASE}"; fi + if [ "${CONTAINER_ID}" = "unknown" ]; then CONTAINER_ID="${DISTRIB_CODENAME}"; fi + fi + if [ -n "$(command -v lsb_release 2> /dev/null)" ]; then + if [ "${CONTAINER_OS_DETECTION}" = "unknown" ]; then + CONTAINER_OS_DETECTION="lsb_release" + else + CONTAINER_OS_DETECTION="Mixed" + fi + if [ "${CONTAINER_NAME}" = "unknown" ]; then CONTAINER_NAME="$(lsb_release -is 2> /dev/null)"; fi + if [ "${CONTAINER_VERSION}" = "unknown" ]; then CONTAINER_VERSION="$(lsb_release -rs 2> /dev/null)"; fi + if [ "${CONTAINER_ID}" = "unknown" ]; then CONTAINER_ID="$(lsb_release -cs 2> /dev/null)"; fi + fi + fi +fi + +# If Netdata is not running in a container then use the local detection as the host +HOST_OS_DETECTION="unknown" +HOST_NAME="unknown" +HOST_VERSION="unknown" +HOST_VERSION_ID="unknown" +HOST_ID="unknown" +HOST_ID_LIKE="unknown" + +# 'systemd-detect-virt' returns 'none' if there is no hardware/container virtualization. +if [ "${CONTAINER}" = "unknown" ] || [ "${CONTAINER}" = "none" ]; then + for v in NAME ID ID_LIKE VERSION VERSION_ID OS_DETECTION; do + eval "HOST_$v=\$CONTAINER_$v; CONTAINER_$v=none" + done +else + # Otherwise try and use a user-supplied bind-mount into the container to resolve the host details + if [ -e "/host/etc/os-release" ]; then + eval "$(grep -E "^(NAME|ID|ID_LIKE|VERSION|VERSION_ID)=" < /host/etc/os-release | sed 's/^/HOST_/')" + HOST_OS_DETECTION="/host/etc/os-release" + fi + if [ "${HOST_NAME}" = "unknown" ] || [ "${HOST_VERSION}" = "unknown" ] || [ "${HOST_ID}" = "unknown" ]; then + if [ -f "/host/etc/lsb-release" ]; then + if [ "${HOST_OS_DETECTION}" = "unknown" ]; then + HOST_OS_DETECTION="/etc/lsb-release" + else + HOST_OS_DETECTION="Mixed" + fi + DISTRIB_ID="unknown" + DISTRIB_RELEASE="unknown" + DISTRIB_CODENAME="unknown" + eval "$(grep -E "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=" < /etc/lsb-release)" + if [ "${HOST_NAME}" = "unknown" ]; then HOST_NAME="${DISTRIB_ID}"; fi + if [ "${HOST_VERSION}" = "unknown" ]; then HOST_VERSION="${DISTRIB_RELEASE}"; fi + if [ "${HOST_ID}" = "unknown" ]; then HOST_ID="${DISTRIB_CODENAME}"; fi + fi + fi +fi + +# ------------------------------------------------------------------------------------------------- +# Detect information about the CPU + +LCPU_COUNT="unknown" +CPU_MODEL="unknown" +CPU_VENDOR="unknown" +CPU_FREQ="unknown" +CPU_INFO_SOURCE="none" + +possible_cpu_freq="" +nproc="$(command -v nproc)" +lscpu="$(command -v lscpu)" +lscpu_output="" +dmidecode="$(command -v dmidecode)" +dmidecode_output="" + +if [ -n "${lscpu}" ] && lscpu > /dev/null 2>&1; then + lscpu_output="$(LC_NUMERIC=C ${lscpu} 2> /dev/null)" + CPU_INFO_SOURCE="lscpu" + LCPU_COUNT="$(echo "${lscpu_output}" | grep "^CPU(s):" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_VENDOR="$(echo "${lscpu_output}" | grep "^Vendor ID:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_MODEL="$(echo "${lscpu_output}" | grep "^Model name:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + if grep -q "^lxcfs /proc" /proc/self/mounts 2>/dev/null && count=$(grep -c ^processor /proc/cpuinfo 2>/dev/null); then + LCPU_COUNT="$count" + fi + possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU max MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" + if [ -z "$possible_cpu_freq" ]; then + possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" + fi + if [ -z "$possible_cpu_freq" ]; then + possible_cpu_freq="$(echo "${lscpu_output}" | grep "^Model name:" | grep -Eo "[0-9\.]+GHz" | grep -o "^[0-9\.]*" | awk '{print int($0*1000)}')" + fi + [ -n "$possible_cpu_freq" ] && possible_cpu_freq="${possible_cpu_freq} MHz" +elif [ -n "${dmidecode}" ] && dmidecode -t processor > /dev/null 2>&1; then + dmidecode_output="$(${dmidecode} -t processor 2> /dev/null)" + CPU_INFO_SOURCE="dmidecode" + LCPU_COUNT="$(echo "${dmidecode_output}" | grep -F "Thread Count:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_VENDOR="$(echo "${dmidecode_output}" | grep -F "Manufacturer:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_MODEL="$(echo "${dmidecode_output}" | grep -F "Version:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + possible_cpu_freq="$(echo "${dmidecode_output}" | grep -F "Current Speed:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" +else + if [ -n "${nproc}" ]; then + CPU_INFO_SOURCE="nproc" + LCPU_COUNT="$(${nproc})" + elif [ "${KERNEL_NAME}" = FreeBSD ]; then + CPU_INFO_SOURCE="sysctl" + LCPU_COUNT="$(sysctl -n kern.smp.cpus)" + if ! possible_cpu_freq=$(sysctl -n machdep.tsc_freq 2> /dev/null); then + possible_cpu_freq=$(sysctl -n hw.model 2> /dev/null | grep -Eo "[0-9\.]+GHz" | grep -o "^[0-9\.]*" | awk '{print int($0*1000)}') + [ -n "$possible_cpu_freq" ] && possible_cpu_freq="${possible_cpu_freq} MHz" + fi + elif [ "${KERNEL_NAME}" = Darwin ]; then + CPU_INFO_SOURCE="sysctl" + LCPU_COUNT="$(sysctl -n hw.logicalcpu)" + elif [ -d /sys/devices/system/cpu ]; then + CPU_INFO_SOURCE="sysfs" + # This is potentially more accurate than checking `/proc/cpuinfo`. + LCPU_COUNT="$(find /sys/devices/system/cpu -mindepth 1 -maxdepth 1 -type d -name 'cpu*' | grep -cEv 'idle|freq')" + elif [ -r /proc/cpuinfo ]; then + CPU_INFO_SOURCE="procfs" + LCPU_COUNT="$(grep -c ^processor /proc/cpuinfo)" + fi + + if [ "${KERNEL_NAME}" = Darwin ]; then + CPU_MODEL="$(sysctl -n machdep.cpu.brand_string)" + if [ "${ARCHITECTURE}" = "x86_64" ]; then + CPU_VENDOR="$(sysctl -n machdep.cpu.vendor)" + else + CPU_VENDOR="Apple" + fi + echo "${CPU_INFO_SOURCE}" | grep -qv sysctl && CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysctl" + elif uname --version 2> /dev/null | grep -qF 'GNU coreutils'; then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} uname" + CPU_MODEL="$(uname -p)" + CPU_VENDOR="$(uname -i)" + elif [ "${KERNEL_NAME}" = FreeBSD ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv sysctl); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysctl" + fi + + CPU_MODEL="$(sysctl -n hw.model)" + elif [ -r /proc/cpuinfo ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv procfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} procfs" + fi + + CPU_MODEL="$(grep -F "model name" /proc/cpuinfo | head -n 1 | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_VENDOR="$(grep -F "vendor_id" /proc/cpuinfo | head -n 1 | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + fi +fi + +if [ "${KERNEL_NAME}" = Darwin ] && [ "${ARCHITECTURE}" = "x86_64" ]; then + CPU_FREQ="$(sysctl -n hw.cpufrequency)" +elif [ -r /sys/devices/system/cpu/cpu0/cpufreq/base_frequency ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv sysfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysfs" + fi + + value="$(cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency)" + CPU_FREQ="$((value * 1000))" +elif [ -n "${possible_cpu_freq}" ]; then + CPU_FREQ="${possible_cpu_freq}" +elif [ -r /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv sysfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysfs" + fi + + value="$(cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq)" + CPU_FREQ="$((value * 1000))" +elif [ -r /proc/cpuinfo ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv procfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} procfs" + fi + value=$(grep "cpu MHz" /proc/cpuinfo 2>/dev/null | grep -o "[0-9]*" | head -n 1 | awk '{printf "%0.f",int($0*1000000)}') + [ -n "$value" ] && CPU_FREQ="$value" +fi + +freq_units="$(echo "${CPU_FREQ}" | cut -f 2 -d ' ')" + +case "${freq_units}" in + GHz) + value="$(echo "${CPU_FREQ}" | cut -f 1 -d ' ')" + CPU_FREQ="$((value * 1000 * 1000 * 1000))" + ;; + MHz) + value="$(echo "${CPU_FREQ}" | cut -f 1 -d ' ')" + CPU_FREQ="$((value * 1000 * 1000))" + ;; + KHz) + value="$(echo "${CPU_FREQ}" | cut -f 1 -d ' ')" + CPU_FREQ="$((value * 1000))" + ;; + *) ;; + +esac + +# ------------------------------------------------------------------------------------------------- +# Detect the total system RAM + +TOTAL_RAM="unknown" +RAM_DETECTION="none" + +if [ "${KERNEL_NAME}" = FreeBSD ]; then + RAM_DETECTION="sysctl" + TOTAL_RAM="$(sysctl -n hw.physmem)" +elif [ "${KERNEL_NAME}" = Darwin ]; then + RAM_DETECTION="sysctl" + TOTAL_RAM="$(sysctl -n hw.memsize)" +elif [ -r /proc/meminfo ]; then + RAM_DETECTION="procfs" + TOTAL_RAM="$(grep -F MemTotal /proc/meminfo | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | cut -f 1 -d ' ')" + TOTAL_RAM="$((TOTAL_RAM * 1024))" +fi + +# ------------------------------------------------------------------------------------------------- +# Detect the total system disk space + +DISK_SIZE="unknown" +DISK_DETECTION="none" + +if [ "${KERNEL_NAME}" = "Darwin" ]; then + if DISK_SIZE=$(diskutil info / 2>/dev/null | awk '/Disk Size/ {total += substr($5,2,length($5))} END { print total }') && + [ -n "$DISK_SIZE" ] && [ "$DISK_SIZE" != "0" ]; then + DISK_DETECTION="diskutil" + else + types='hfs' + + if (lsvfs | grep -q apfs); then + types="${types},apfs" + fi + + if (lsvfs | grep -q ufs); then + types="${types},ufs" + fi + + DISK_DETECTION="df" + DISK_SIZE=$(($(/bin/df -k -t ${types} | tail -n +2 | sed -E 's/\/dev\/disk([[:digit:]]*)s[[:digit:]]*/\/dev\/disk\1/g' | sort -k 1 | awk -F ' ' '{s=$NF;for(i=NF-1;i>=1;i--)s=s FS $i;print s}' | uniq -f 9 | awk '{print $8}' | tr '\n' '+' | rev | cut -f 2- -d '+' | rev) * 1024)) + fi +elif [ "${KERNEL_NAME}" = FreeBSD ]; then + types='ufs' + + if (lsvfs | grep -q zfs); then + types="${types},zfs" + fi + + DISK_DETECTION="df" + total="$(df -t ${types} -c -k | tail -n 1 | awk '{print $2}')" + DISK_SIZE="$((total * 1024))" +else + if [ -d /sys/block ] && [ -r /proc/devices ]; then + dev_major_whitelist='' + + # This is a list of device names used for block storage devices. + # These translate to the prefixs of files in `/dev` indicating the device type. + # They are sorted by lowest used device major number, with dynamically assigned ones at the end. + # We use this to look up device major numbers in `/proc/devices` + device_names='hd sd mfm ad ftl pd nftl dasd intfl mmcblk mmc ub xvd rfd vbd nvme virtblk blkext' + + for name in ${device_names}; do + if grep -qE " ${name}\$" /proc/devices; then + dev_major_whitelist="${dev_major_whitelist}:$(grep -E "${name}\$" /proc/devices | sed -e 's/^[[:space:]]*//' | cut -f 1 -d ' ' | tr '\n' ':'):" + fi + done + + DISK_DETECTION="sysfs" + DISK_SIZE="0" + for disk in /sys/block/*; do + if [ -r "${disk}/size" ] \ + && (echo "${dev_major_whitelist}" | grep -q ":$(cut -f 1 -d ':' "${disk}/dev"):") \ + && grep -qv 1 "${disk}/removable"; then + size="$(($(cat "${disk}/size") * 512))" + DISK_SIZE="$((DISK_SIZE + size))" + fi + done + elif df --version 2> /dev/null | grep -qF "GNU coreutils"; then + DISK_DETECTION="df" + DISK_SIZE=$(($(df -x tmpfs -x devtmpfs -x squashfs -l -B1 --output=source,size | tail -n +2 | sort -u -k 1 | awk '{print $2}' | tr '\n' '+' | head -c -1))) + else + DISK_DETECTION="df" + include_fs_types="ext*|btrfs|xfs|jfs|reiser*|zfs" + DISK_SIZE=$(($(df -T -P | tail -n +2 | sort -u -k 1 | grep "${include_fs_types}" | awk '{print $3}' | tr '\n' '+' | head -c -1) * 1024)) + fi +fi + +# ------------------------------------------------------------------------------------------------- +# Detect whether the node is kubernetes node + +HOST_IS_K8S_NODE="false" + +if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_SERVICE_PORT}" ]; then + # These env vars are set for every container managed by k8s. + HOST_IS_K8S_NODE="true" +elif pgrep "kubelet"; then + # The kubelet is the primary "node agent" that runs on each node. + HOST_IS_K8S_NODE="true" +fi + +# ------------------------------------------------------------------------------------------------ +# Detect instance metadata for VMs running on cloud providers + +CLOUD_TYPE="unknown" +CLOUD_INSTANCE_TYPE="unknown" +CLOUD_INSTANCE_REGION="unknown" + +if [ "${VIRTUALIZATION}" != "none" ] && command -v curl > /dev/null 2>&1; then + # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404. + curl --fail -s -m 1 --noproxy "*" http://169.254.169.254 >/dev/null 2>&1 + ret=$? + # anything but operation timeout. + if [ "$ret" != 28 ]; then + # Try AWS IMDSv2 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + AWS_IMDS_TOKEN="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")" + if [ -n "${AWS_IMDS_TOKEN}" ]; then + CLOUD_TYPE="AWS" + CLOUD_INSTANCE_TYPE="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/instance-type" 2>/dev/null)" + CLOUD_INSTANCE_REGION="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/placement/region" 2>/dev/null)" + fi + fi + + # Try GCE computeMetadata v1 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + if curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1" | grep -sq computeMetadata; then + CLOUD_TYPE="GCP" + CLOUD_INSTANCE_TYPE="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/machine-type")" + [ -n "$CLOUD_INSTANCE_TYPE" ] && CLOUD_INSTANCE_TYPE=$(basename "$CLOUD_INSTANCE_TYPE") + CLOUD_INSTANCE_REGION="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/zone")" + [ -n "$CLOUD_INSTANCE_REGION" ] && CLOUD_INSTANCE_REGION=$(basename "$CLOUD_INSTANCE_REGION") && CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION%-*} + fi + fi + + # Try Azure IMDS + if [ "${CLOUD_TYPE}" = "unknown" ]; then + AZURE_IMDS_DATA="$(curl --fail -s --connect-timeout 1 -m 3 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance?api-version=2021-10-01")" + if [ -n "${AZURE_IMDS_DATA}" ] && echo "${AZURE_IMDS_DATA}" | grep -sq azEnvironment; then + CLOUD_TYPE="Azure" + CLOUD_INSTANCE_TYPE="$(curl --fail -s --connect-timeout 1 -m 3 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/vmSize?api-version=2021-10-01&format=text")" + CLOUD_INSTANCE_REGION="$(curl --fail -s --connect-timeout 1 -m 3 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/location?api-version=2021-10-01&format=text")" + fi + fi + fi +fi + +echo "NETDATA_CONTAINER_OS_NAME=${CONTAINER_NAME}" +echo "NETDATA_CONTAINER_OS_ID=${CONTAINER_ID}" +echo "NETDATA_CONTAINER_OS_ID_LIKE=${CONTAINER_ID_LIKE}" +echo "NETDATA_CONTAINER_OS_VERSION=${CONTAINER_VERSION}" +echo "NETDATA_CONTAINER_OS_VERSION_ID=${CONTAINER_VERSION_ID}" +echo "NETDATA_CONTAINER_OS_DETECTION=${CONTAINER_OS_DETECTION}" +echo "NETDATA_CONTAINER_IS_OFFICIAL_IMAGE=${CONTAINER_IS_OFFICIAL_IMAGE}" +echo "NETDATA_HOST_OS_NAME=${HOST_NAME}" +echo "NETDATA_HOST_OS_ID=${HOST_ID}" +echo "NETDATA_HOST_OS_ID_LIKE=${HOST_ID_LIKE}" +echo "NETDATA_HOST_OS_VERSION=${HOST_VERSION}" +echo "NETDATA_HOST_OS_VERSION_ID=${HOST_VERSION_ID}" +echo "NETDATA_HOST_OS_DETECTION=${HOST_OS_DETECTION}" +echo "NETDATA_HOST_IS_K8S_NODE=${HOST_IS_K8S_NODE}" +echo "NETDATA_SYSTEM_KERNEL_NAME=${KERNEL_NAME}" +echo "NETDATA_SYSTEM_KERNEL_VERSION=${KERNEL_VERSION}" +echo "NETDATA_SYSTEM_ARCHITECTURE=${ARCHITECTURE}" +echo "NETDATA_SYSTEM_VIRTUALIZATION=${VIRTUALIZATION}" +echo "NETDATA_SYSTEM_VIRT_DETECTION=${VIRT_DETECTION}" +echo "NETDATA_SYSTEM_CONTAINER=${CONTAINER}" +echo "NETDATA_SYSTEM_CONTAINER_DETECTION=${CONT_DETECTION}" +echo "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT=${LCPU_COUNT}" +echo "NETDATA_SYSTEM_CPU_VENDOR=${CPU_VENDOR}" +echo "NETDATA_SYSTEM_CPU_MODEL=${CPU_MODEL}" +echo "NETDATA_SYSTEM_CPU_FREQ=${CPU_FREQ}" +echo "NETDATA_SYSTEM_CPU_DETECTION=${CPU_INFO_SOURCE}" +echo "NETDATA_SYSTEM_TOTAL_RAM=${TOTAL_RAM}" +echo "NETDATA_SYSTEM_RAM_DETECTION=${RAM_DETECTION}" +echo "NETDATA_SYSTEM_TOTAL_DISK_SIZE=${DISK_SIZE}" +echo "NETDATA_SYSTEM_DISK_DETECTION=${DISK_DETECTION}" +echo "NETDATA_INSTANCE_CLOUD_TYPE=${CLOUD_TYPE}" +echo "NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE=${CLOUD_INSTANCE_TYPE}" +echo "NETDATA_INSTANCE_CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION}" diff --git a/src/daemon/unit_test.c b/src/daemon/unit_test.c new file mode 100644 index 000000000..e7a743603 --- /dev/null +++ b/src/daemon/unit_test.c @@ -0,0 +1,1695 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static bool cmd_arg_sanitization_test(const char *expected, const char *src, char *dst, size_t dst_size) { + bool ok = sanitize_command_argument_string(dst, src, dst_size); + + if (!expected) + return ok == false; + + return strcmp(expected, dst) == 0; +} + +bool command_argument_sanitization_tests() { + char dst[1024]; + + for (size_t i = 0; i != 5; i++) { + const char *expected = i == 4 ? "'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 9; i++) { + const char *expected = i == 8 ? "'\\'''\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "''", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "'\\''a" : NULL; + if (cmd_arg_sanitization_test(expected, "'a", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "a'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "a'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 22; i++) { + const char *expected = i == 21 ? "foo'\\''a'\\'''\\'''\\''b" : NULL; + if (cmd_arg_sanitization_test(expected, "--foo'a'''b", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n length: %zu\n", expected, dst, strlen(dst)); + return 1; + } + } + + return 0; +} + +static int check_number_printing(void) { + struct { + NETDATA_DOUBLE n; + const char *correct; + } values[] = { + { .n = 0, .correct = "0" }, + { .n = 0.0000001, .correct = "0.0000001" }, + { .n = 0.00000009, .correct = "0.0000001" }, + { .n = 0.000000001, .correct = "0" }, + { .n = 99.99999999999999999, .correct = "100" }, + { .n = -99.99999999999999999, .correct = "-100" }, + { .n = 123.4567899123456789, .correct = "123.4567899" }, + { .n = 123.4567890123456789, .correct = "123.456789" }, + { .n = 123.4567800123456789, .correct = "123.45678" }, + { .n = 123.4567000123456789, .correct = "123.4567" }, + { .n = 123.4560000123456789, .correct = "123.456" }, + { .n = 123.4500000123456789, .correct = "123.45" }, + { .n = 123.4000000123456789, .correct = "123.4" }, + { .n = 123.0000000123456789, .correct = "123" }, + { .n = 123.0000000923456789, .correct = "123.0000001" }, + { .n = 4294967295.123456789, .correct = "4294967295.123457" }, + { .n = 8294967295.123456789, .correct = "8294967295.123457" }, + { .n = 1.000000000000002e+19, .correct = "1.000000000000001998e+19" }, + { .n = 9.2233720368547676e+18, .correct = "9.223372036854767584e+18" }, + { .n = 18446744073709541376.0, .correct = "1.84467440737095424e+19" }, + { .n = 18446744073709551616.0, .correct = "1.844674407370955136e+19" }, + { .n = 12318446744073710600192.0, .correct = "1.231844674407371008e+22" }, + { .n = 1677721499999999885312.0, .correct = "1.677721499999999872e+21" }, + { .n = -1677721499999999885312.0, .correct = "-1.677721499999999872e+21" }, + { .n = -1.677721499999999885312e40, .correct = "-1.677721499999999872e+40" }, + { .n = -16777214999999997337621690403742592008192.0, .correct = "-1.677721499999999616e+40" }, + { .n = 9999.9999999, .correct = "9999.9999999" }, + { .n = -9999.9999999, .correct = "-9999.9999999" }, + { .n = 0, .correct = NULL }, + }; + + char netdata[512 + 2], system[512 + 2]; + int i, failed = 0; + for(i = 0; values[i].correct ; i++) { + print_netdata_double(netdata, values[i].n); + snprintfz(system, sizeof(system) - 1, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n); + + int ok = 1; + if(strcmp(netdata, values[i].correct) != 0) { + ok = 0; + failed++; + } + + NETDATA_DOUBLE parsed_netdata = str2ndd(netdata, NULL); + NETDATA_DOUBLE parsed_system = strtondd(netdata, NULL); + + if(parsed_system != parsed_netdata) + failed++; + + fprintf(stderr, "[%d]. '%s' (system) printed as '%s' (netdata): PRINT %s, " + "PARSED %0.12" NETDATA_DOUBLE_MODIFIER " (system), %0.12" NETDATA_DOUBLE_MODIFIER " (netdata): %s\n", + i, + system, netdata, ok?"OK":"FAILED", + parsed_system, parsed_netdata, + parsed_netdata == parsed_system ? "OK" : "FAILED"); + } + + if(failed) return 1; + return 0; +} + +static int check_rrdcalc_comparisons(void) { + RRDCALC_STATUS a, b; + + // make sure calloc() sets the status to UNINITIALIZED + memset(&a, 0, sizeof(RRDCALC_STATUS)); + if(a != RRDCALC_STATUS_UNINITIALIZED) { + fprintf(stderr, "%s is not zero.\n", rrdcalc_status2string(RRDCALC_STATUS_UNINITIALIZED)); + return 1; + } + + a = RRDCALC_STATUS_REMOVED; + b = RRDCALC_STATUS_UNDEFINED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNDEFINED; + b = RRDCALC_STATUS_UNINITIALIZED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNINITIALIZED; + b = RRDCALC_STATUS_CLEAR; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_CLEAR; + b = RRDCALC_STATUS_RAISED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_RAISED; + b = RRDCALC_STATUS_WARNING; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_WARNING; + b = RRDCALC_STATUS_CRITICAL; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + fprintf(stderr, "RRDCALC_STATUSes are sortable.\n"); + + return 0; +} + +int check_storage_number(NETDATA_DOUBLE n, int debug) { + char buffer[100]; + uint32_t flags = SN_DEFAULT_FLAGS; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(!does_storage_number_exist(s)) { + fprintf(stderr, "Exists flags missing for number " NETDATA_DOUBLE_FORMAT "!\n", n); + return 5; + } + + NETDATA_DOUBLE ddiff = d - n; + NETDATA_DOUBLE dcdiff = ddiff * 100.0 / n; + + if(dcdiff < 0) dcdiff = -dcdiff; + + size_t len = (size_t)print_netdata_double(buffer, d); + NETDATA_DOUBLE p = str2ndd(buffer, NULL); + NETDATA_DOUBLE pdiff = n - p; + NETDATA_DOUBLE pcdiff = pdiff * 100.0 / n; + if(pcdiff < 0) pcdiff = -pcdiff; + + if(debug) { + fprintf(stderr, + NETDATA_DOUBLE_FORMAT + " original\n" NETDATA_DOUBLE_FORMAT " packed and unpacked, (stored as 0x%08X, diff " NETDATA_DOUBLE_FORMAT + ", " NETDATA_DOUBLE_FORMAT "%%)\n" + "%s printed after unpacked (%zu bytes)\n" NETDATA_DOUBLE_FORMAT + " re-parsed from printed (diff " NETDATA_DOUBLE_FORMAT ", " NETDATA_DOUBLE_FORMAT "%%)\n\n", + n, + d, s, ddiff, dcdiff, + buffer, len, + p, pdiff, pcdiff + ); + if(len != strlen(buffer)) fprintf(stderr, "ERROR: printed number %s is reported to have length %zu but it has %zu\n", buffer, len, strlen(buffer)); + + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: packing number " NETDATA_DOUBLE_FORMAT " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, dcdiff); + + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: re-parsing the packed, unpacked and printed number " NETDATA_DOUBLE_FORMAT + " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, pcdiff); + } + + if(len != strlen(buffer)) return 1; + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 3; + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 4; + return 0; +} + +NETDATA_DOUBLE storage_number_min(NETDATA_DOUBLE n) { + NETDATA_DOUBLE r = 1, last; + + do { + last = n; + n /= 2.0; + storage_number t = pack_storage_number(n, SN_DEFAULT_FLAGS); + r = unpack_storage_number(t); + } while(r != 0.0 && r != last); + + return last; +} + +void benchmark_storage_number(int loop, int multiplier) { + int i, j; + NETDATA_DOUBLE n, d; + storage_number s; + unsigned long long user, system, total, mine, their; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_positive_max = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW); + + char buffer[100]; + + struct rusage now, last; + + fprintf(stderr, "\n\nBenchmarking %d numbers, please wait...\n\n", loop); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE SIZE: %zu bytes\n", sizeof(NETDATA_DOUBLE)); + fprintf(stderr, "NETDATA FLOATING POINT SIZE: %zu bytes\n", sizeof(storage_number)); + + mine = (NETDATA_DOUBLE)sizeof(storage_number) * (NETDATA_DOUBLE)loop; + their = (NETDATA_DOUBLE)sizeof(NETDATA_DOUBLE) * (NETDATA_DOUBLE)loop; + + if(mine > their) { + fprintf(stderr, "\nNETDATA NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES MORE MEMORY. Sorry!\n", (NETDATA_DOUBLE)(mine / their)); + } + else { + fprintf(stderr, "\nNETDATA INTERNAL FLOATING POINT ARITHMETICS NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES LESS MEMORY.\n", (NETDATA_DOUBLE)(their / mine)); + } + + fprintf(stderr, "\nNETDATA FLOATING POINT\n"); + fprintf(stderr, "MIN POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW)); + fprintf(stderr, "MAX POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW)); + fprintf(stderr, "MIN NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MIN_RAW)); + fprintf(stderr, "MAX NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW)); + fprintf(stderr, "Maximum accuracy loss accepted: " NETDATA_DOUBLE_FORMAT "%%\n\n\n", (NETDATA_DOUBLE)ACCURACY_LOSS_ACCEPTED_PERCENT); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "INTERNAL LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + print_netdata_double(buffer, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + snprintfz(buffer, sizeof(buffer) - 1, NETDATA_DOUBLE_FORMAT, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + their = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > total) { + fprintf(stderr, "NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + + fprintf(stderr, "\nINTERNAL LONG DOUBLE PRINTING WITH PACK / UNPACK: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + s = pack_storage_number(n, SN_DEFAULT_FLAGS); + d = unpack_storage_number(s); + print_netdata_double(buffer, d); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > their) { + fprintf(stderr, "WITH PACKING UNPACKING NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "EVEN WITH PACKING AND UNPACKING, NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + +} + +static int check_storage_number_exists() { + uint32_t flags = SN_DEFAULT_FLAGS; + NETDATA_DOUBLE n = 0.0; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(n != d) { + fprintf(stderr, "Wrong number returned. Expected " NETDATA_DOUBLE_FORMAT ", returned " NETDATA_DOUBLE_FORMAT "!\n", n, d); + return 1; + } + + return 0; +} + +int unit_test_storage() { + if(check_storage_number_exists()) return 0; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_negative_max = unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW); + + NETDATA_DOUBLE c, a = 0; + int i, j, g, r = 0; + + for(g = -1; g <= 1 ; g++) { + a = 0; + + if(!g) continue; + + for(j = 0; j < 9 ;j++) { + a += 0.0000001; + c = a * g; + for(i = 0; i < 21 ;i++, c *= 10) { + if(c > 0 && c < storage_number_positive_min) continue; + if(c < 0 && c > storage_number_negative_max) continue; + + if(check_storage_number(c, 1)) return 1; + } + } + } + + // if(check_storage_number(858993459.1234567, 1)) return 1; + benchmark_storage_number(1000000, 2); + return r; +} + +int unit_test_str2ld() { + is_system_ieee754_double(); + + char *values[] = { + "1.2345678", + "-35.6", + "0.00123", + "23842384234234.2", + ".1", + "1.2e-10", + "18446744073709551616.0", + "18446744073709551616123456789123456789123456789123456789123456789123456789123456789.0", + "1.8446744073709551616123456789123456789123456789123456789123456789123456789123456789e+300", + "9.", + "9.e2", + "1.2e", + "1.2e+", + "1.2e-", + "1.2e0", + "1.2e-0", + "1.2e+0", + "-1.2e+1", + "-1.2e-1", + "1.2e1", + "1.2e400", + "hello", + "1wrong", + "nan", + "inf", + NULL + }; + + int i; + for(i = 0; values[i] ; i++) { + char *e_mine = "hello", *e_sys = "world"; + NETDATA_DOUBLE mine = str2ndd(values[i], &e_mine); + NETDATA_DOUBLE sys = strtondd(values[i], &e_sys); + + if(isnan(mine)) { + if(!isnan(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(isinf(mine)) { + if(!isinf(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(mine != sys && ABS(mine-sys) > 0.000001) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ", delta %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys, sys-mine); + return -1; + } + + if(e_mine != e_sys) { + fprintf(stderr, "Value '%s' is parsed correctly, but endptr is not right (netdata returned %d, but system returned %d)\n", + values[i], (int)(e_mine - values[i]), (int)(e_sys - values[i])); + return -1; + } + + fprintf(stderr, "str2ndd() parsed value '%s' exactly the same way with strtold(), returned %" NETDATA_DOUBLE_MODIFIER + " vs %" NETDATA_DOUBLE_MODIFIER "\n", values[i], mine, sys); + } + + return 0; +} + +int unit_test_buffer() { + BUFFER *wb = buffer_create(1, NULL); + char string[2048 + 1]; + char final[9000 + 1]; + int i; + + for(i = 0; i < 2048; i++) + string[i] = (char)((i % 24) + 'a'); + string[2048] = '\0'; + + const char *fmt = "string1: %s\nstring2: %s\nstring3: %s\nstring4: %s"; + buffer_sprintf(wb, fmt, string, string, string, string); + snprintfz(final, sizeof(final) - 1, fmt, string, string, string, string); + + const char *s = buffer_tostring(wb); + + if(buffer_strlen(wb) != strlen(final) || strcmp(s, final) != 0) { + fprintf(stderr, "\nbuffer_sprintf() is faulty.\n"); + fprintf(stderr, "\nstring : %s (length %zu)\n", string, strlen(string)); + fprintf(stderr, "\nbuffer : %s (length %zu)\n", s, buffer_strlen(wb)); + fprintf(stderr, "\nexpected: %s (length %zu)\n", final, strlen(final)); + buffer_free(wb); + return -1; + } + + fprintf(stderr, "buffer_sprintf() works as expected.\n"); + buffer_free(wb); + return 0; +} + +int unit_test_static_threads() { + struct netdata_static_thread *static_threads = static_threads_get(); + + /* + * make sure enough static threads have been registered + */ + if (!static_threads) { + fprintf(stderr, "empty static_threads array\n"); + return 1; + } + + int n; + for (n = 0; static_threads[n].start_routine != NULL; n++) {} + + if (n < 2) { + fprintf(stderr, "only %d static threads registered", n); + freez(static_threads); + return 1; + } + + /* + * verify that each thread's start routine is unique. + */ + for (int i = 0; i != n - 1; i++) { + for (int j = i + 1; j != n; j++) { + if (static_threads[i].start_routine != static_threads[j].start_routine) + continue; + + fprintf(stderr, "Found duplicate threads with name: %s\n", static_threads[i].name); + freez(static_threads); + return 1; + } + } + + freez(static_threads); + return 0; +} + +// -------------------------------------------------------------------------------------------------------------------- + +struct feed_values { + unsigned long long microseconds; + collected_number value; +}; + +struct test { + char name[100]; + char description[1024]; + + int update_every; + unsigned long long multiplier; + unsigned long long divisor; + RRD_ALGORITHM algorithm; + + unsigned long feed_entries; + unsigned long result_entries; + struct feed_values *feed; + NETDATA_DOUBLE *results; + + collected_number *feed2; + NETDATA_DOUBLE *results2; +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test1 +// test absolute values stored + +struct feed_values test1_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test1_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test1 = { + "test1", // name + "test absolute values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test1_feed, // feed + test1_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test2 +// test absolute values stored in the middle of second boundaries + +struct feed_values test2_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test2_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test2 = { + "test2", // name + "test absolute values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test2_feed, // feed + test2_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test3 + +struct feed_values test3_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test3_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test3 = { + "test3", // name + "test incremental values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test3_feed, // feed + test3_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test4 + +struct feed_values test4_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test4_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test4 = { + "test4", // name + "test incremental values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test4_feed, // feed + test4_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5 - 32 bit overflows + +struct feed_values test5_feed[] = { + { 0, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5_results[] = { + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, +}; + +struct test test5 = { + "test5", // name + "test 32-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5_feed, // feed + test5_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5b - 64 bit overflows + +struct feed_values test5b_feed[] = { + { 0, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5b_results[] = { + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, +}; + +struct test test5b = { + "test5b", // name + "test 64-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5b_feed, // feed + test5b_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test6 + +struct feed_values test6_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test6_results[] = { + 4000, 4000, 4000, 4000 +}; + +struct test test6 = { + "test6", // name + "test incremental values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 16, // feed entries + 4, // result entries + test6_feed, // feed + test6_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test7 + +struct feed_values test7_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, + { 2000000, 7000 }, + { 2000000, 8000 }, + { 2000000, 9000 }, + { 2000000, 10000 }, +}; + +NETDATA_DOUBLE test7_results[] = { + 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500 +}; + +struct test test7 = { + "test7", // name + "test incremental values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 18, // result entries + test7_feed, // feed + test7_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test8 + +struct feed_values test8_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, +}; + +NETDATA_DOUBLE test8_results[] = { + 1250, 2000, 2250, 3000, 3250, 4000, 4250, 5000, 5250, 6000 +}; + +struct test test8 = { + "test8", // name + "test absolute values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 6, // feed entries + 10, // result entries + test8_feed, // feed + test8_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test9 + +struct feed_values test9_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test9_results[] = { + 4000, 8000, 12000, 16000 +}; + +struct test test9 = { + "test9", // name + "test absolute values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 16, // feed entries + 4, // result entries + test9_feed, // feed + test9_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test10 + +struct feed_values test10_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test10_results[] = { + 1000, 1000, 1000, 1000, 1000, 1000, 1000 +}; + +struct test test10 = { + "test10", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 7, // result entries + test10_feed, // feed + test10_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test11 + +struct feed_values test11_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test11_feed2[] = { + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +NETDATA_DOUBLE test11_results[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +NETDATA_DOUBLE test11_results2[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +struct test test11 = { + "test11", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test11_feed, // feed + test11_results, // results + test11_feed2, // feed2 + test11_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test12 + +struct feed_values test12_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test12_feed2[] = { + 10*3, 20*3, 30*3, 40*3, 50*3, 60*3, 70*3, 80*3, 90*3, 100*3 +}; + +NETDATA_DOUBLE test12_results[] = { + 25, 25, 25, 25, 25, 25, 25, 25, 25 +}; + +NETDATA_DOUBLE test12_results2[] = { + 75, 75, 75, 75, 75, 75, 75, 75, 75 +}; + +struct test test12 = { + "test12", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test12_feed, // feed + test12_results, // results + test12_feed2, // feed2 + test12_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test13 + +struct feed_values test13_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test13_results[] = { + 83.3333300, 100, 100, 100, 100, 100, 100 +}; + +struct test test13 = { + "test13", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 7, // result entries + test13_feed, // feed + test13_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test14 + +struct feed_values test14_feed[] = { + { 0, 0x015397dc42151c41ULL }, + { 13573000, 0x015397e612e3ff5dULL }, + { 29969000, 0x015397f905ecdaa8ULL }, + { 29958000, 0x0153980c2a6cb5e4ULL }, + { 30054000, 0x0153981f4032fb83ULL }, + { 34952000, 0x015398355efadaccULL }, + { 25046000, 0x01539845ba4b09f8ULL }, + { 29947000, 0x0153985948bf381dULL }, + { 30054000, 0x0153986c5b9c27e2ULL }, + { 29942000, 0x0153987f888982d0ULL }, +}; + +NETDATA_DOUBLE test14_results[] = { + 23.1383300, 21.8515600, 21.8804600, 21.7788000, 22.0112200, 22.4386100, 22.0906100, 21.9150800 +}; + +struct test test14 = { + "test14", // name + "issue #981 with real data", + 30, // update_every + 8, // multiplier + 1000000000, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14_feed, // feed + test14_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14b_feed[] = { + { 0, 0 }, + { 13573000, 13573000 }, + { 29969000, 13573000 + 29969000 }, + { 29958000, 13573000 + 29969000 + 29958000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 }, + { 34952000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 }, + { 25046000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 }, + { 29947000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 }, + { 29942000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 + 29942000 }, +}; + +NETDATA_DOUBLE test14b_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14b = { + "test14b", // name + "issue #981 with dummy data", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14b_feed, // feed + test14b_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14c_feed[] = { + { 29000000, 29000000 }, + { 1000000, 29000000 + 1000000 }, + { 30000000, 29000000 + 1000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, +}; + +NETDATA_DOUBLE test14c_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14c = { + "test14c", // name + "issue #981 with dummy data, checking for late start", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test14c_feed, // feed + test14c_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test15 + +struct feed_values test15_feed[] = { + { 0, 1068066388 }, + { 1008752, 1068822698 }, + { 993809, 1069573072 }, + { 995911, 1070324135 }, + { 1014562, 1071078166 }, + { 994684, 1071831349 }, + { 993128, 1072235739 }, + { 1010332, 1072958871 }, + { 1003394, 1073707019 }, + { 995201, 1074460255 }, +}; + +collected_number test15_feed2[] = { + 178825286, 178825286, 178825286, 178825286, 178825498, 178825498, 179165652, 179202964, 179203282, 179204130 +}; + +NETDATA_DOUBLE test15_results[] = { + 5857.4080000, 5898.4540000, 5891.6590000, 5806.3160000, 5914.2640000, 3202.2630000, 5589.6560000, 5822.5260000, 5911.7520000 +}; + +NETDATA_DOUBLE test15_results2[] = { + 0.0000000, 0.0000000, 0.0024944, 1.6324779, 0.0212777, 2655.1890000, 290.5387000, 5.6733610, 6.5960220 +}; + +struct test test15 = { + "test15", // name + "test incremental with 2 dimensions", + 1, // update_every + 8, // multiplier + 1024, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test15_feed, // feed + test15_results, // results + test15_feed2, // feed2 + test15_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- + +int run_test(struct test *test) +{ + fprintf(stderr, "\nRunning test '%s':\n%s\n", test->name, test->description); + + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = test->update_every; + + char name[101]; + snprintfz(name, sizeof(name) - 1, "unittest-%s", test->name); + + // create the chart + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1 + , test->update_every, RRDSET_TYPE_LINE); + RRDDIM *rd = rrddim_add(st, "dim1", NULL, test->multiplier, test->divisor, test->algorithm); + + RRDDIM *rd2 = NULL; + if(test->feed2) + rd2 = rrddim_add(st, "dim2", NULL, test->multiplier, test->divisor, test->algorithm); + + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + // feed it with the test data + time_t time_now = 0, time_start = now_realtime_sec(); + unsigned long c; + collected_number last = 0; + for(c = 0; c < test->feed_entries; c++) { + if(debug_flags) fprintf(stderr, "\n\n"); + + if(c) { + time_now += test->feed[c].microseconds; + fprintf(stderr, " > %s: feeding position %lu, after %0.3f seconds (%0.3f seconds from start), delta " NETDATA_DOUBLE_FORMAT + ", rate " NETDATA_DOUBLE_FORMAT "\n", + test->name, c+1, + (float)test->feed[c].microseconds / 1000000.0, + (float)time_now / 1000000.0, + ((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor, + (((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor) / (NETDATA_DOUBLE)test->feed[c].microseconds * (NETDATA_DOUBLE)1000000); + + // rrdset_next_usec_unfiltered(st, test->feed[c].microseconds); + st->usec_since_last_update = test->feed[c].microseconds; + } + else { + fprintf(stderr, " > %s: feeding position %lu\n", test->name, c+1); + } + + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd), test->feed[c].value); + rrddim_set(st, "dim1", test->feed[c].value); + last = test->feed[c].value; + + if(rd2) { + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd2), test->feed2[c]); + rrddim_set(st, "dim2", test->feed2[c]); + } + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st, now, false); + + // align the first entry to second boundary + if(!c) { + fprintf(stderr, " > %s: fixing first collection time to be %llu microseconds to second boundary\n", test->name, test->feed[c].microseconds); + rd->collector.last_collected_time.tv_usec = st->last_collected_time.tv_usec = st->last_updated.tv_usec = test->feed[c].microseconds; + // time_start = st->last_collected_time.tv_sec; + } + } + + // check the result + int errors = 0; + + if(st->counter != test->result_entries) { + fprintf(stderr, " %s stored %u entries, but we were expecting %lu, ### E R R O R ###\n", + test->name, st->counter, test->result_entries); + errors++; + } + + unsigned long max = (st->counter < test->result_entries)?st->counter:test->result_entries; + for(c = 0 ; c < max ; c++) { + NETDATA_DOUBLE v = unpack_storage_number(rd->db.data[c]); + NETDATA_DOUBLE n = unpack_storage_number(pack_storage_number(test->results[c], SN_DEFAULT_FLAGS)); + int same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd), c+1, + (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + + if(!same) errors++; + + if(rd2) { + v = unpack_storage_number(rd2->db.data[c]); + n = test->results2[c]; + same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd2), c+1, + (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + if(!same) errors++; + } + } + + return errors; +} + +static int test_variable_renames(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + + fprintf(stderr, "Creating chart\n"); + RRDSET *st = rrdset_create_localhost("chart", "ID", NULL, "family", "context", "Unit Testing", "a value", "unittest", NULL, 1, 1, RRDSET_TYPE_LINE); + fprintf(stderr, "Created chart with id '%s', name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Creating dimension DIM1\n"); + RRDDIM *rd1 = rrddim_add(st, "DIM1", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Creating dimension DIM2\n"); + RRDDIM *rd2 = rrddim_add(st, "DIM2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming chart to CHARTNAME1\n"); + rrdset_reset_name(st, "CHARTNAME1"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming chart to CHARTNAME2\n"); + rrdset_reset_name(st, "CHARTNAME2"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME1\n"); + rrddim_reset_name(st, rd1, "DIM1NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME2\n"); + rrddim_reset_name(st, rd1, "DIM1NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME1\n"); + rrddim_reset_name(st, rd2, "DIM2NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME2\n"); + rrddim_reset_name(st, rd2, "DIM2NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + BUFFER *buf = buffer_create(1, NULL); + health_api_v1_chart_variables2json(st, buf); + fprintf(stderr, "%s", buffer_tostring(buf)); + buffer_free(buf); + return 1; +} + +int check_strdupz_path_subpath() { + + struct strdupz_path_subpath_checks { + const char *path; + const char *subpath; + const char *result; + } checks[] = { + { "", "", "." }, + { "/", "", "/" }, + { "/etc/netdata", "", "/etc/netdata" }, + { "/etc/netdata///", "", "/etc/netdata" }, + { "/etc/netdata///", "health.d", "/etc/netdata/health.d" }, + { "/etc/netdata///", "///health.d", "/etc/netdata/health.d" }, + { "/etc/netdata", "///health.d", "/etc/netdata/health.d" }, + { "", "///health.d", "./health.d" }, + { "/", "///health.d", "/health.d" }, + + // terminator + { NULL, NULL, NULL } + }; + + size_t i; + for(i = 0; checks[i].result ; i++) { + char *s = strdupz_path_subpath(checks[i].path, checks[i].subpath); + fprintf(stderr, "strdupz_path_subpath(\"%s\", \"%s\") = \"%s\": ", checks[i].path, checks[i].subpath, s); + if(!s || strcmp(s, checks[i].result) != 0) { + freez(s); + fprintf(stderr, "FAILED\n"); + return 1; + } + else { + freez(s); + fprintf(stderr, "OK\n"); + } + } + + return 0; +} + +int run_all_mockup_tests(void) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + if(check_strdupz_path_subpath()) + return 1; + + if(check_number_printing()) + return 1; + + if(check_rrdcalc_comparisons()) + return 1; + + if(!test_variable_renames()) + return 1; + + if(run_test(&test1)) + return 1; + + if(run_test(&test2)) + return 1; + + if(run_test(&test3)) + return 1; + + if(run_test(&test4)) + return 1; + + if(run_test(&test5)) + return 1; + + if(run_test(&test5b)) + return 1; + + if(run_test(&test6)) + return 1; + + if(run_test(&test7)) + return 1; + + if(run_test(&test8)) + return 1; + + if(run_test(&test9)) + return 1; + + if(run_test(&test10)) + return 1; + + if(run_test(&test11)) + return 1; + + if(run_test(&test12)) + return 1; + + if(run_test(&test13)) + return 1; + + if(run_test(&test14)) + return 1; + + if(run_test(&test14b)) + return 1; + + if(run_test(&test14c)) + return 1; + + if(run_test(&test15)) + return 1; + + + + return 0; +} + +int unit_test(long delay, long shift) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + static int repeat = 0; + repeat++; + + char name[101]; + snprintfz(name, sizeof(name) - 1, "unittest-%d-%ld-%ld", repeat, delay, shift); + + //debug_flags = 0xffffffff; + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = 1; + + int do_abs = 1; + int do_inc = 1; + int do_abst = 0; + int do_absi = 0; + + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1, 1 + , RRDSET_TYPE_LINE); + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + RRDDIM *rdabs = NULL; + RRDDIM *rdinc = NULL; + RRDDIM *rdabst = NULL; + RRDDIM *rdabsi = NULL; + + if(do_abs) rdabs = rrddim_add(st, "absolute", "absolute", 1, 1, RRD_ALGORITHM_ABSOLUTE); + if(do_inc) rdinc = rrddim_add(st, "incremental", "incremental", 1, 1, RRD_ALGORITHM_INCREMENTAL); + if(do_abst) rdabst = rrddim_add(st, "percentage-of-absolute-row", "percentage-of-absolute-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL); + if(do_absi) rdabsi = rrddim_add(st, "percentage-of-incremental-row", "percentage-of-incremental-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + + long increment = 1000; + collected_number i = 0; + + unsigned long c, dimensions = rrdset_number_of_dimensions(st); + RRDDIM *rd; + + for(c = 0; c < 20 ;c++) { + i += increment; + + fprintf(stderr, "\n\nLOOP = %lu, DELAY = %ld, VALUE = " COLLECTED_NUMBER_FORMAT "\n", c, delay, i); + if(c) { + // rrdset_next_usec_unfiltered(st, delay); + st->usec_since_last_update = delay; + } + if(do_abs) rrddim_set(st, "absolute", i); + if(do_inc) rrddim_set(st, "incremental", i); + if(do_abst) rrddim_set(st, "percentage-of-absolute-row", i); + if(do_absi) rrddim_set(st, "percentage-of-incremental-row", i); + + if(!c) { + now_realtime_timeval(&st->last_collected_time); + st->last_collected_time.tv_usec = shift; + } + + // prevent it from deleting the dimensions + rrddim_foreach_read(rd, st) { + rd->collector.last_collected_time.tv_sec = st->last_collected_time.tv_sec; + } + rrddim_foreach_done(rd); + + rrdset_done(st); + } + + unsigned long oincrement = increment; + increment = increment * st->update_every * 1000000 / delay; + fprintf(stderr, "\n\nORIGINAL INCREMENT: %lu, INCREMENT %ld, DELAY %ld, SHIFT %ld\n", oincrement * 10, increment * 10, delay, shift); + + int ret = 0; + storage_number sn; + NETDATA_DOUBLE cn, v; + for(c = 0 ; c < st->counter ; c++) { + fprintf(stderr, "\nPOSITION: c = %lu, EXPECTED VALUE %lu\n", c, (oincrement + c * increment + increment * (1000000 - shift) / 1000000 )* 10); + + rrddim_foreach_read(rd, st) { + sn = rd->db.data[c]; + cn = unpack_storage_number(sn); + fprintf(stderr, "\t %s " NETDATA_DOUBLE_FORMAT " (PACKED AS " STORAGE_NUMBER_FORMAT ") -> ", rrddim_id(rd), cn, sn); + + if(rd == rdabs) v = + ( oincrement + // + (increment * (1000000 - shift) / 1000000) + + (c + 1) * increment + ); + + else if(rd == rdinc) v = (c?(increment):(increment * (1000000 - shift) / 1000000)); + else if(rd == rdabst) v = oincrement / dimensions / 10; + else if(rd == rdabsi) v = oincrement / dimensions / 10; + else v = 0; + + if(v == cn) fprintf(stderr, "passed.\n"); + else { + fprintf(stderr, "ERROR! (expected " NETDATA_DOUBLE_FORMAT ")\n", v); + ret = 1; + } + } + rrddim_foreach_done(rd); + } + + if(ret) + fprintf(stderr, "\n\nUNIT TEST(%ld, %ld) FAILED\n\n", delay, shift); + + return ret; +} + +int test_sqlite(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + sqlite3 *db_mt; + fprintf(stderr, "Testing SQLIte\n"); + + int rc = sqlite3_open(":memory:", &db_mt); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: DB init failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "CREATE TABLE IF NOT EXISTS mine (id1, id2);", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Create table failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "DELETE FROM MINE LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Delete with LIMIT failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "UPDATE MINE SET id1=1 LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Update with LIMIT failed\n"); + return 1; + } + + rc = sqlite3_create_function(db_mt, "now_usec", 1, SQLITE_ANY, 0, sqlite_now_usec, 0, 0); + if (unlikely(rc != SQLITE_OK)) { + fprintf(stderr, "Failed to register internal now_usec function"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "UPDATE MINE SET id1=now_usec(0);", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Update with now_usec() failed\n"); + return 1; + } + + BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE, NULL); + char *uuid_str = "0000_000"; + + buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str); + rc = sqlite3_exec_monitored(db_mt, buffer_tostring(sql), 0, 0, NULL); + if (rc != SQLITE_OK) + goto error; + + buffer_free(sql); + fprintf(stderr,"SQLite is OK\n"); + rc = sqlite3_close_v2(db_mt); + return 0; +error: + rc = sqlite3_close_v2(db_mt); + fprintf(stderr,"SQLite statement failed: %s\n", buffer_tostring(sql)); + buffer_free(sql); + fprintf(stderr,"SQLite tests failed\n"); + return 1; +} diff --git a/src/daemon/unit_test.h b/src/daemon/unit_test.h new file mode 100644 index 000000000..c7cd104e1 --- /dev/null +++ b/src/daemon/unit_test.h @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_UNIT_TEST_H +#define NETDATA_UNIT_TEST_H 1 + +#include "stdbool.h" + +int unit_test_storage(void); +int unit_test(long delay, long shift); +int run_all_mockup_tests(void); +int unit_test_str2ld(void); +int unit_test_buffer(void); +int unit_test_static_threads(void); +int test_sqlite(void); +int unit_test_bitmaps(void); +#ifdef ENABLE_DBENGINE +int test_dbengine(void); +void generate_dbengine_dataset(unsigned history_seconds); +void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, + unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB); + +#endif + +bool command_argument_sanitization_tests(); + +#endif /* NETDATA_UNIT_TEST_H */ diff --git a/src/daemon/watcher.c b/src/daemon/watcher.c new file mode 100644 index 000000000..1e0090e24 --- /dev/null +++ b/src/daemon/watcher.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "watcher.h" + +watcher_step_t *watcher_steps; + +static struct completion shutdown_begin_completion; +static struct completion shutdown_end_completion; +static ND_THREAD *watcher_thread; + +void watcher_shutdown_begin(void) { + completion_mark_complete(&shutdown_begin_completion); +} + +void watcher_shutdown_end(void) { + completion_mark_complete(&shutdown_end_completion); +} + +void watcher_step_complete(watcher_step_id_t step_id) { + completion_mark_complete(&watcher_steps[step_id].p); +} + +static void watcher_wait_for_step(const watcher_step_id_t step_id) +{ + unsigned timeout = 90; + + usec_t step_start_time = now_monotonic_usec(); + +#ifdef ENABLE_SENTRY + // Wait with a timeout + bool ok = completion_timedwait_for(&watcher_steps[step_id].p, timeout); +#else + // Wait indefinitely + bool ok = true; + completion_wait_for(&watcher_steps[step_id].p); +#endif + + usec_t step_duration = now_monotonic_usec() - step_start_time; + + if (ok) { + netdata_log_info("shutdown step: [%d/%d] - '%s' finished in %llu milliseconds", + (int)step_id + 1, (int)WATCHER_STEP_ID_MAX, + watcher_steps[step_id].msg, step_duration / USEC_PER_MS); + } else { + // Do not call fatal() because it will try to execute the exit + // sequence twice. + netdata_log_error("shutdown step: [%d/%d] - '%s' took more than %u seconds (ie. %llu milliseconds)", + (int)step_id + 1, (int)WATCHER_STEP_ID_MAX, watcher_steps[step_id].msg, + timeout, step_duration / USEC_PER_MS); + + abort(); + } +} + +void *watcher_main(void *arg) +{ + UNUSED(arg); + + netdata_log_debug(D_SYSTEM, "Watcher thread started"); + + // wait until the agent starts the shutdown process + completion_wait_for(&shutdown_begin_completion); + netdata_log_error("Shutdown process started"); + + usec_t shutdown_start_time = now_monotonic_usec(); + + watcher_wait_for_step(WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE); + watcher_wait_for_step(WATCHER_STEP_ID_DBENGINE_EXIT_MODE); + watcher_wait_for_step(WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS); + watcher_wait_for_step(WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_REPLICATION_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN); + watcher_wait_for_step(WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_CONTEXT_THREAD); + watcher_wait_for_step(WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_ACLK_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_CANCEL_MAIN_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_METASYNC_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_wait_for_step(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + watcher_wait_for_step(WATCHER_STEP_ID_CLOSE_SQL_DATABASES); + watcher_wait_for_step(WATCHER_STEP_ID_REMOVE_PID_FILE); + watcher_wait_for_step(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); + watcher_wait_for_step(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE); + + completion_wait_for(&shutdown_end_completion); + usec_t shutdown_end_time = now_monotonic_usec(); + + usec_t shutdown_duration = shutdown_end_time - shutdown_start_time; + netdata_log_error("Shutdown process ended in %llu milliseconds", + shutdown_duration / USEC_PER_MS); + + return NULL; +} + +void watcher_thread_start() { + watcher_steps = callocz(WATCHER_STEP_ID_MAX, sizeof(watcher_step_t)); + + watcher_steps[WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE].msg = + "create shutdown file"; + watcher_steps[WATCHER_STEP_ID_DBENGINE_EXIT_MODE].msg = + "dbengine exit mode"; + watcher_steps[WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS].msg = + "close webrtc connections"; + watcher_steps[WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK].msg = + "disable maintenance, new queries, new web requests, new streaming connections and aclk"; + watcher_steps[WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD].msg = + "stop maintenance thread"; + watcher_steps[WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS].msg = + "stop exporters, health and web servers threads"; + watcher_steps[WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS].msg = + "stop collectors and streaming threads"; + watcher_steps[WATCHER_STEP_ID_STOP_REPLICATION_THREADS].msg = + "stop replication threads"; + watcher_steps[WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN].msg = + "prepare metasync shutdown"; + watcher_steps[WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS].msg = + "disable ML detection and training threads"; + watcher_steps[WATCHER_STEP_ID_STOP_CONTEXT_THREAD].msg = + "stop context thread"; + watcher_steps[WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE].msg = + "clear web client cache"; + watcher_steps[WATCHER_STEP_ID_STOP_ACLK_THREADS].msg = + "stop aclk threads"; + watcher_steps[WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS].msg = + "stop all remaining worker threads"; + watcher_steps[WATCHER_STEP_ID_CANCEL_MAIN_THREADS].msg = + "cancel main threads"; + watcher_steps[WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS].msg = + "flush dbengine tiers"; + watcher_steps[WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS].msg = + "stop collection for all hosts"; + watcher_steps[WATCHER_STEP_ID_STOP_METASYNC_THREADS].msg = + "stop metasync threads"; + watcher_steps[WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH].msg = + "wait for dbengine collectors to finish"; + watcher_steps[WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING].msg = + "wait for dbengine main cache to finish flushing"; + watcher_steps[WATCHER_STEP_ID_STOP_DBENGINE_TIERS].msg = + "stop dbengine tiers"; + watcher_steps[WATCHER_STEP_ID_CLOSE_SQL_DATABASES].msg = + "close SQL databases"; + watcher_steps[WATCHER_STEP_ID_REMOVE_PID_FILE].msg = + "remove pid file"; + watcher_steps[WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES].msg = + "free openssl structures"; + watcher_steps[WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE].msg = + "remove incomplete shutdown file"; + + for (size_t i = 0; i != WATCHER_STEP_ID_MAX; i++) { + completion_init(&watcher_steps[i].p); + } + + completion_init(&shutdown_begin_completion); + completion_init(&shutdown_end_completion); + + watcher_thread = nd_thread_create("P[WATCHER]", NETDATA_THREAD_OPTION_JOINABLE, watcher_main, NULL); +} + +void watcher_thread_stop() { + nd_thread_join(watcher_thread); + + for (size_t i = 0; i != WATCHER_STEP_ID_MAX; i++) { + completion_destroy(&watcher_steps[i].p); + } + + completion_destroy(&shutdown_begin_completion); + completion_destroy(&shutdown_end_completion); + + freez(watcher_steps); +} diff --git a/src/daemon/watcher.h b/src/daemon/watcher.h new file mode 100644 index 000000000..b785ca436 --- /dev/null +++ b/src/daemon/watcher.h @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef DAEMON_WATCHER_H +#define DAEMON_WATCHER_H + +#include "libnetdata/libnetdata.h" + +typedef enum { + WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE = 0, + WATCHER_STEP_ID_DBENGINE_EXIT_MODE, + WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS, + WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK, + WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD, + WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS, + WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS, + WATCHER_STEP_ID_STOP_REPLICATION_THREADS, + WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN, + WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS, + WATCHER_STEP_ID_STOP_CONTEXT_THREAD, + WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE, + WATCHER_STEP_ID_STOP_ACLK_THREADS, + WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS, + WATCHER_STEP_ID_CANCEL_MAIN_THREADS, + WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS, + WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS, + WATCHER_STEP_ID_STOP_METASYNC_THREADS, + WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH, + WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING, + WATCHER_STEP_ID_STOP_DBENGINE_TIERS, + WATCHER_STEP_ID_CLOSE_SQL_DATABASES, + WATCHER_STEP_ID_REMOVE_PID_FILE, + WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES, + WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE, + + // Always keep this as the last enum value + WATCHER_STEP_ID_MAX +} watcher_step_id_t; + +typedef struct { + const char *msg; + struct completion p; +} watcher_step_t; + +extern watcher_step_t *watcher_steps; + +void watcher_thread_start(void); +void watcher_thread_stop(void); + +void watcher_shutdown_begin(void); +void watcher_shutdown_end(void); + +void watcher_step_complete(watcher_step_id_t step_id); + +#endif /* DAEMON_WATCHER_H */ -- cgit v1.2.3