diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 14:31:17 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 14:31:17 +0000 |
commit | 8020f71afd34d7696d7933659df2d763ab05542f (patch) | |
tree | 2fdf1b5447ffd8bdd61e702ca183e814afdcb4fc /daemon | |
parent | Initial commit. (diff) | |
download | netdata-upstream.tar.xz netdata-upstream.zip |
Adding upstream version 1.37.1.upstream/1.37.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'daemon')
30 files changed, 12782 insertions, 0 deletions
diff --git a/daemon/Makefile.am b/daemon/Makefile.am new file mode 100644 index 0000000..d3102f6 --- /dev/null +++ b/daemon/Makefile.am @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in +CLEANFILES = \ + anonymous-statistics.sh \ + $(NULL) + +include $(top_srcdir)/build/subst.inc +SUFFIXES = .in + +dist_noinst_DATA = \ + README.md \ + config/README.md \ + anonymous-statistics.sh.in \ + get-kubernetes-labels.sh.in \ + $(NULL) + +dist_plugins_SCRIPTS = \ + anonymous-statistics.sh \ + system-info.sh \ + get-kubernetes-labels.sh \ + $(NULL) diff --git a/daemon/README.md b/daemon/README.md new file mode 100644 index 0000000..c5951c6 --- /dev/null +++ b/daemon/README.md @@ -0,0 +1,520 @@ +<!-- +title: "Netdata daemon" +date: 2020-04-29 +custom_edit_url: https://github.com/netdata/netdata/edit/master/daemon/README.md +--> + +# Netdata daemon + +## Starting netdata + +- You can start Netdata by executing it with `/usr/sbin/netdata` (the installer will also start it). + +- You can stop Netdata by killing it with `killall netdata`. You can stop and start Netdata at any point. When + exiting, the [database engine](/database/engine/README.md) saves metrics to `/var/cache/netdata/dbengine/` so that + it can continue when started again. + +Access to the web site, for all graphs, is by default on port `19999`, so go to: + +```sh +http://127.0.0.1:19999/ +``` + +You can get the running config file at any time, by accessing `http://127.0.0.1:19999/netdata.conf`. + +### Starting Netdata at boot + +In the `system` directory you can find scripts and configurations for the +various distros. + +#### systemd + +The installer already installs `netdata.service` if it detects a systemd system. + +To install `netdata.service` by hand, run: + +```sh +# stop Netdata +killall netdata + +# copy netdata.service to systemd +cp system/netdata.service /etc/systemd/system/ + +# let systemd know there is a new service +systemctl daemon-reload + +# enable Netdata at boot +systemctl enable netdata + +# start Netdata +systemctl start netdata +``` + +#### init.d + +In the system directory you can find `netdata-lsb`. Copy it to the proper place according to your distribution +documentation. For Ubuntu, this can be done via running the following commands as root. + +```sh +# copy the Netdata startup file to /etc/init.d +cp system/netdata-lsb /etc/init.d/netdata + +# make sure it is executable +chmod +x /etc/init.d/netdata + +# enable it +update-rc.d netdata defaults +``` + +#### openrc (gentoo) + +In the `system` directory you can find `netdata-openrc`. Copy it to the proper +place according to your distribution documentation. + +#### CentOS / Red Hat Enterprise Linux + +For older versions of RHEL/CentOS that don't have systemd, an init script is included in the system directory. This can +be installed by running the following commands as root. + +```sh +# copy the Netdata startup file to /etc/init.d +cp system/netdata-init-d /etc/init.d/netdata + +# make sure it is executable +chmod +x /etc/init.d/netdata + +# enable it +chkconfig --add netdata +``` + +_There have been some recent work on the init script, see PR +<https://github.com/netdata/netdata/pull/403>_ + +#### other systems + +You can start Netdata by running it from `/etc/rc.local` or equivalent. + +## Command line options + +Normally you don't need to supply any command line arguments to netdata. + +If you do though, they override the configuration equivalent options. + +To get a list of all command line parameters supported, run: + +```sh +netdata -h +``` + +The program will print the supported command line parameters. + +The command line options of the Netdata 1.10.0 version are the following: + +```sh + ^ + |.-. .-. .-. .-. . netdata + | '-' '-' '-' '-' real-time performance monitoring, done right! + +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---> + + Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud> + Released under GNU General Public License v3 or later. + All rights reserved. + + Home Page : https://netdata.cloud + Source Code: https://github.com/netdata/netdata + Docs : https://learn.netdata.cloud + Support : https://github.com/netdata/netdata/issues + License : https://github.com/netdata/netdata/blob/master/LICENSE.md + + Twitter : https://twitter.com/linuxnetdata + LinkedIn : https://linkedin.com/company/netdata-cloud/ + Facebook : https://facebook.com/linuxnetdata/ + + + SYNOPSIS: netdata [options] + + Options: + + -c filename Configuration file to load. + Default: /etc/netdata/netdata.conf + + -D Do not fork. Run in the foreground. + Default: run in the background + + -h Display this help message. + + -P filename File to save a pid while running. + Default: do not save pid to a file + + -i IP The IP address to listen to. + Default: all IP addresses IPv4 and IPv6 + + -p port API/Web port to use. + Default: 19999 + + -s path Prefix for /proc and /sys (for containers). + Default: no prefix + + -t seconds The internal clock of netdata. + Default: 1 + + -u username Run as user. + Default: netdata + + -v Print netdata version and exit. + + -V Print netdata version and exit. + + -W options See Advanced options below. + + + Advanced options: + + -W stacksize=N Set the stacksize (in bytes). + + -W debug_flags=N Set runtime tracing to debug.log. + + -W unittest Run internal unittests and exit. + + -W createdataset=N Create a DB engine dataset of N seconds and exit. + + -W set section option value + set netdata.conf option from the command line. + + -W buildinfo Print the version, the configure options, + a list of optional features, and whether they + are enabled or not. + + -W buildinfojson Print the version, the configure options, + a list of optional features, and whether they + are enabled or not, in JSON format. + + -W simple-pattern pattern string + Check if string matches pattern and exit. + + -W "claim -token=TOKEN -rooms=ROOM1,ROOM2 url=https://api.netdata.cloud" + Connect the agent to the workspace rooms pointed to by TOKEN and ROOM*. + + Signals netdata handles: + + - HUP Close and reopen log files. + - USR1 Save internal DB to disk. + - USR2 Reload health configuration. +``` + +You can send commands during runtime via [netdatacli](/cli/README.md). + +## Log files + +Netdata uses 3 log files: + +1. `error.log` +2. `access.log` +3. `debug.log` + +Any of them can be disabled by setting it to `/dev/null` or `none` in `netdata.conf`. By default `error.log` and +`access.log` are enabled. `debug.log` is only enabled if debugging/tracing is also enabled (Netdata needs to be compiled +with debugging enabled). + +Log files are stored in `/var/log/netdata/` by default. + +### error.log + +The `error.log` is the `stderr` of the `netdata` daemon and all external plugins +run by `netdata`. + +So if any process, in the Netdata process tree, writes anything to its standard error, +it will appear in `error.log`. + +For most Netdata programs (including standard external plugins shipped by netdata), the following lines may appear: + +| tag | description | +|:-:|:----------| +| `INFO` | Something important the user should know. | +| `ERROR` | Something that might disable a part of netdata.<br/>The log line includes `errno` (if it is not zero). | +| `FATAL` | Something prevented a program from running.<br/>The log line includes `errno` (if it is not zero) and the program exited. | + +So, when auto-detection of data collection fail, `ERROR` lines are logged and the relevant modules are disabled, but the +program continues to run. + +When a Netdata program cannot run at all, a `FATAL` line is logged. + +### access.log + +The `access.log` logs web requests. The format is: + +```txt +DATE: ID: (sent/all = SENT_BYTES/ALL_BYTES bytes PERCENT_COMPRESSION%, prep/sent/total PREP_TIME/SENT_TIME/TOTAL_TIME ms): ACTION CODE URL +``` + +where: + +- `ID` is the client ID. Client IDs are auto-incremented every time a client connects to netdata. +- `SENT_BYTES` is the number of bytes sent to the client, without the HTTP response header. +- `ALL_BYTES` is the number of bytes of the response, before compression. +- `PERCENT_COMPRESSION` is the percentage of traffic saved due to compression. +- `PREP_TIME` is the time in milliseconds needed to prepared the response. +- `SENT_TIME` is the time in milliseconds needed to sent the response to the client. +- `TOTAL_TIME` is the total time the request was inside Netdata (from the first byte of the request to the last byte + of the response). +- `ACTION` can be `filecopy`, `options` (used in CORS), `data` (API call). + +### debug.log + +See [debugging](#debugging). + +## Netdata process scheduling policy + +By default Netdata versions prior to 1.34.0 run with the `idle` process scheduling policy, so that it uses CPU +resources, only when there is idle CPU to spare. On very busy servers (or weak servers), this can lead to gaps on +the charts. + +Starting with version 1.34.0, Netdata instead uses the `batch` scheduling policy by default. This largely eliminates +issues with gaps in charts on busy systems while still keeping the impact on the rest of the system low. + +You can set Netdata scheduling policy in `netdata.conf`, like this: + +```conf +[global] + process scheduling policy = idle +``` + +You can use the following: + +| policy | description | +| :-----------------------: | :---------- | +| `idle` | use CPU only when there is spare - this is lower than nice 19 - it is the default for Netdata and it is so low that Netdata will run in "slow motion" under extreme system load, resulting in short (1-2 seconds) gaps at the charts. | +| `other`<br/>or<br/>`nice` | this is the default policy for all processes under Linux. It provides dynamic priorities based on the `nice` level of each process. Check below for setting this `nice` level for netdata. | +| `batch` | This policy is similar to `other` in that it schedules the thread according to its dynamic priority (based on the `nice` value). The difference is that this policy will cause the scheduler to always assume that the thread is CPU-intensive. Consequently, the scheduler will apply a small scheduling penalty with respect to wake-up behavior, so that this thread is mildly disfavored in scheduling decisions. | +| `fifo` | `fifo` can be used only with static priorities higher than 0, which means that when a `fifo` threads becomes runnable, it will always immediately preempt any currently running `other`, `batch`, or `idle` thread. `fifo` is a simple scheduling algorithm without time slicing. | +| `rr` | a simple enhancement of `fifo`. Everything described above for `fifo` also applies to `rr`, except that each thread is allowed to run only for a maximum time quantum. | +| `keep`<br/>or<br/>`none` | do not set scheduling policy, priority or nice level - i.e. keep running with whatever it is set already (e.g. by systemd). | + +For more information see `man sched`. + +### scheduling priority for `rr` and `fifo` + +Once the policy is set to one of `rr` or `fifo`, the following will appear: + +```conf +[global] + process scheduling priority = 0 +``` + +These priorities are usually from 0 to 99. Higher numbers make the process more +important. + +### nice level for policies `other` or `batch` + +When the policy is set to `other`, `nice`, or `batch`, the following will appear: + +```conf +[global] + process nice level = 19 +``` + +## scheduling settings and systemd + +Netdata will not be able to set its scheduling policy and priority to more important values when it is started as the +`netdata` user (systemd case). + +You can set these settings at `/etc/systemd/system/netdata.service`: + +```sh +[Service] +# By default Netdata switches to scheduling policy idle, which makes it use CPU, only +# when there is spare available. +# Valid policies: other (the system default) | batch | idle | fifo | rr +#CPUSchedulingPolicy=other + +# This sets the maximum scheduling priority Netdata can set (for policies: rr and fifo). +# Netdata (via [global].process scheduling priority in netdata.conf) can only lower this value. +# Priority gets values 1 (lowest) to 99 (highest). +#CPUSchedulingPriority=1 + +# For scheduling policy 'other' and 'batch', this sets the lowest niceness of netdata. +# Netdata (via [global].process nice level in netdata.conf) can only increase the value set here. +#Nice=0 +``` + +Run `systemctl daemon-reload` to reload these changes. + +Now, tell Netdata to keep these settings, as set by systemd, by editing +`netdata.conf` and setting: + +```conf +[global] + process scheduling policy = keep +``` + +Using the above, whatever scheduling settings you have set at `netdata.service` +will be maintained by netdata. + +### Example 1: Netdata with nice -1 on non-systemd systems + +On a system that is not based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for +all programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = other + process nice level = -1 +``` + +then execute this to [restart Netdata](/docs/configure/start-stop-restart.md): + +```sh +sudo systemctl restart netdata +``` + +#### Example 2: Netdata with nice -1 on systemd systems + +On a system that is based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for all +programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = keep +``` + +edit /etc/systemd/system/netdata.service and set: + +```sh +[Service] +CPUSchedulingPolicy=other +Nice=-1 +``` + +then execute: + +```sh +sudo systemctl daemon-reload +sudo systemctl restart netdata +``` + +## Virtual memory + +You may notice that netdata's virtual memory size, as reported by `ps` or `/proc/pid/status` (or even netdata's +applications virtual memory chart) is unrealistically high. + +For example, it may be reported to be 150+MB, even if the resident memory size is just 25MB. Similar values may be +reported for Netdata plugins too. + +Check this for example: A Netdata installation with default settings on Ubuntu +16.04LTS. The top chart is **real memory used**, while the bottom one is +**virtual memory**: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013772/5eb7173e-87e3-11e6-8f2b-a2ccfeb06faf.png) + +### Why does this happen? + +The system memory allocator allocates virtual memory arenas, per thread running. On Linux systems this defaults to 16MB +per thread on 64 bit machines. So, if you get the difference between real and virtual memory and divide it by 16MB you +will roughly get the number of threads running. + +The system does this for speed. Having a separate memory arena for each thread, allows the threads to run in parallel in +multi-core systems, without any locks between them. + +This behaviour is system specific. For example, the chart above when running +Netdata on Alpine Linux (that uses **musl** instead of **glibc**) is this: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013807/7cf5878e-87e4-11e6-9651-082e68701eab.png) + +### Can we do anything to lower it? + +Since Netdata already uses minimal memory allocations while it runs (i.e. it adapts its memory on start, so that while +repeatedly collects data it does not do memory allocations), it already instructs the system memory allocator to +minimize the memory arenas for each thread. We have also added [2 configuration +options](https://github.com/netdata/netdata/blob/5645b1ee35248d94e6931b64a8688f7f0d865ec6/src/main.c#L410-L418) to allow +you tweak these settings: `glibc malloc arena max for plugins` and `glibc malloc arena max for netdata`. + +However, even if we instructed the memory allocator to use just one arena, it +seems it allocates an arena per thread. + +Netdata also supports `jemalloc` and `tcmalloc`, however both behave exactly the +same to the glibc memory allocator in this aspect. + +### Is this a problem? + +No, it is not. + +Linux reserves real memory (physical RAM) in pages (on x86 machines pages are 4KB each). So even if the system memory +allocator is allocating huge amounts of virtual memory, only the 4KB pages that are actually used are reserving physical +RAM. The **real memory** chart on Netdata application section, shows the amount of physical memory these pages occupy(it +accounts the whole pages, even if parts of them are actually used). + +## Debugging + +When you compile Netdata with debugging: + +1. compiler optimizations for your CPU are disabled (Netdata will run somewhat slower) + +2. a lot of code is added all over netdata, to log debug messages to `/var/log/netdata/debug.log`. However, nothing is + printed by default. Netdata allows you to select which sections of Netdata you want to trace. Tracing is activated + via the config option `debug flags`. It accepts a hex number, to enable or disable specific sections. You can find + the options supported at [log.h](https://raw.githubusercontent.com/netdata/netdata/master/libnetdata/log/log.h). + They are the `D_*` defines. The value `0xffffffffffffffff` will enable all possible debug flags. + +Once Netdata is compiled with debugging and tracing is enabled for a few sections, the file `/var/log/netdata/debug.log` +will contain the messages. + +> Do not forget to disable tracing (`debug flags = 0`) when you are done tracing. The file `debug.log` can grow too +> fast. + +### compiling Netdata with debugging + +To compile Netdata with debugging, use this: + +```sh +# step into the Netdata source directory +cd /usr/src/netdata.git + +# run the installer with debugging enabled +CFLAGS="-O1 -ggdb -DNETDATA_INTERNAL_CHECKS=1" ./netdata-installer.sh +``` + +The above will compile and install Netdata with debugging info embedded. You can now use `debug flags` to set the +section(s) you need to trace. + +### debugging crashes + +We have made the most to make Netdata crash free. If however, Netdata crashes on your system, it would be very helpful +to provide stack traces of the crash. Without them, is will be almost impossible to find the issue (the code base is +quite large to find such an issue by just observing it). + +To provide stack traces, **you need to have Netdata compiled with debugging**. There is no need to enable any tracing +(`debug flags`). + +Then you need to be in one of the following 2 cases: + +1. Netdata crashes and you have a core dump + +2. you can reproduce the crash + +If you are not on these cases, you need to find a way to be (i.e. if your system does not produce core dumps, check your +distro documentation to enable them). + +### Netdata crashes and you have a core dump + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Run the following command and post the output on a github issue. + +```sh +gdb $(which netdata) /path/to/core/dump +``` + +### you can reproduce a Netdata crash on your system + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Install the package `valgrind` and run: + +```sh +valgrind $(which netdata) -D +``` + +Netdata will start and it will be a lot slower. Now reproduce the crash and `valgrind` will dump on your console the +stack trace. Open a new github issue and post the output. + + diff --git a/daemon/analytics.c b/daemon/analytics.c new file mode 100644 index 0000000..3d0e514 --- /dev/null +++ b/daemon/analytics.c @@ -0,0 +1,1034 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include "buildinfo.h" + +struct analytics_data analytics_data; +extern void analytics_exporting_connectors (BUFFER *b); +extern void analytics_exporting_connectors_ssl (BUFFER *b); +extern void analytics_build_info (BUFFER *b); +extern int aclk_connected; + +struct collector { + const char *plugin; + const char *module; +}; + +struct array_printer { + int c; + BUFFER *both; +}; + +/* + * Debug logging + */ +void analytics_log_data(void) +{ + debug(D_ANALYTICS, "NETDATA_CONFIG_STREAM_ENABLED : [%s]", analytics_data.netdata_config_stream_enabled); + debug(D_ANALYTICS, "NETDATA_CONFIG_MEMORY_MODE : [%s]", analytics_data.netdata_config_memory_mode); + debug(D_ANALYTICS, "NETDATA_CONFIG_EXPORTING_ENABLED : [%s]", analytics_data.netdata_config_exporting_enabled); + debug(D_ANALYTICS, "NETDATA_EXPORTING_CONNECTORS : [%s]", analytics_data.netdata_exporting_connectors); + debug(D_ANALYTICS, "NETDATA_ALLMETRICS_PROMETHEUS_USED : [%s]", analytics_data.netdata_allmetrics_prometheus_used); + debug(D_ANALYTICS, "NETDATA_ALLMETRICS_SHELL_USED : [%s]", analytics_data.netdata_allmetrics_shell_used); + debug(D_ANALYTICS, "NETDATA_ALLMETRICS_JSON_USED : [%s]", analytics_data.netdata_allmetrics_json_used); + debug(D_ANALYTICS, "NETDATA_DASHBOARD_USED : [%s]", analytics_data.netdata_dashboard_used); + debug(D_ANALYTICS, "NETDATA_COLLECTORS : [%s]", analytics_data.netdata_collectors); + debug(D_ANALYTICS, "NETDATA_COLLECTORS_COUNT : [%s]", analytics_data.netdata_collectors_count); + debug(D_ANALYTICS, "NETDATA_BUILDINFO : [%s]", analytics_data.netdata_buildinfo); + debug(D_ANALYTICS, "NETDATA_CONFIG_PAGE_CACHE_SIZE : [%s]", analytics_data.netdata_config_page_cache_size); + debug(D_ANALYTICS, "NETDATA_CONFIG_MULTIDB_DISK_QUOTA : [%s]", analytics_data.netdata_config_multidb_disk_quota); + debug(D_ANALYTICS, "NETDATA_CONFIG_HTTPS_ENABLED : [%s]", analytics_data.netdata_config_https_enabled); + debug(D_ANALYTICS, "NETDATA_CONFIG_WEB_ENABLED : [%s]", analytics_data.netdata_config_web_enabled); + debug(D_ANALYTICS, "NETDATA_CONFIG_RELEASE_CHANNEL : [%s]", analytics_data.netdata_config_release_channel); + debug(D_ANALYTICS, "NETDATA_MIRRORED_HOST_COUNT : [%s]", analytics_data.netdata_mirrored_host_count); + debug(D_ANALYTICS, "NETDATA_MIRRORED_HOSTS_REACHABLE : [%s]", analytics_data.netdata_mirrored_hosts_reachable); + debug(D_ANALYTICS, "NETDATA_MIRRORED_HOSTS_UNREACHABLE : [%s]", analytics_data.netdata_mirrored_hosts_unreachable); + debug(D_ANALYTICS, "NETDATA_NOTIFICATION_METHODS : [%s]", analytics_data.netdata_notification_methods); + debug(D_ANALYTICS, "NETDATA_ALARMS_NORMAL : [%s]", analytics_data.netdata_alarms_normal); + debug(D_ANALYTICS, "NETDATA_ALARMS_WARNING : [%s]", analytics_data.netdata_alarms_warning); + debug(D_ANALYTICS, "NETDATA_ALARMS_CRITICAL : [%s]", analytics_data.netdata_alarms_critical); + debug(D_ANALYTICS, "NETDATA_CHARTS_COUNT : [%s]", analytics_data.netdata_charts_count); + debug(D_ANALYTICS, "NETDATA_METRICS_COUNT : [%s]", analytics_data.netdata_metrics_count); + debug(D_ANALYTICS, "NETDATA_CONFIG_IS_PARENT : [%s]", analytics_data.netdata_config_is_parent); + debug(D_ANALYTICS, "NETDATA_CONFIG_HOSTS_AVAILABLE : [%s]", analytics_data.netdata_config_hosts_available); + debug(D_ANALYTICS, "NETDATA_HOST_CLOUD_AVAILABLE : [%s]", analytics_data.netdata_host_cloud_available); + debug(D_ANALYTICS, "NETDATA_HOST_ACLK_AVAILABLE : [%s]", analytics_data.netdata_host_aclk_available); + debug(D_ANALYTICS, "NETDATA_HOST_ACLK_PROTOCOL : [%s]", analytics_data.netdata_host_aclk_protocol); + debug(D_ANALYTICS, "NETDATA_HOST_ACLK_IMPLEMENTATION : [%s]", analytics_data.netdata_host_aclk_implementation); + debug(D_ANALYTICS, "NETDATA_HOST_AGENT_CLAIMED : [%s]", analytics_data.netdata_host_agent_claimed); + debug(D_ANALYTICS, "NETDATA_HOST_CLOUD_ENABLED : [%s]", analytics_data.netdata_host_cloud_enabled); + debug(D_ANALYTICS, "NETDATA_CONFIG_HTTPS_AVAILABLE : [%s]", analytics_data.netdata_config_https_available); + debug(D_ANALYTICS, "NETDATA_INSTALL_TYPE : [%s]", analytics_data.netdata_install_type); + debug(D_ANALYTICS, "NETDATA_PREBUILT_DISTRO : [%s]", analytics_data.netdata_prebuilt_distro); + debug(D_ANALYTICS, "NETDATA_CONFIG_IS_PRIVATE_REGISTRY : [%s]", analytics_data.netdata_config_is_private_registry); + debug(D_ANALYTICS, "NETDATA_CONFIG_USE_PRIVATE_REGISTRY: [%s]", analytics_data.netdata_config_use_private_registry); + debug(D_ANALYTICS, "NETDATA_CONFIG_OOM_SCORE : [%s]", analytics_data.netdata_config_oom_score); +} + +/* + * Free data + */ +void analytics_free_data(void) +{ + freez(analytics_data.netdata_config_stream_enabled); + freez(analytics_data.netdata_config_memory_mode); + freez(analytics_data.netdata_config_exporting_enabled); + freez(analytics_data.netdata_exporting_connectors); + freez(analytics_data.netdata_allmetrics_prometheus_used); + freez(analytics_data.netdata_allmetrics_shell_used); + freez(analytics_data.netdata_allmetrics_json_used); + freez(analytics_data.netdata_dashboard_used); + freez(analytics_data.netdata_collectors); + freez(analytics_data.netdata_collectors_count); + freez(analytics_data.netdata_buildinfo); + freez(analytics_data.netdata_config_page_cache_size); + freez(analytics_data.netdata_config_multidb_disk_quota); + freez(analytics_data.netdata_config_https_enabled); + freez(analytics_data.netdata_config_web_enabled); + freez(analytics_data.netdata_config_release_channel); + freez(analytics_data.netdata_mirrored_host_count); + freez(analytics_data.netdata_mirrored_hosts_reachable); + freez(analytics_data.netdata_mirrored_hosts_unreachable); + freez(analytics_data.netdata_notification_methods); + freez(analytics_data.netdata_alarms_normal); + freez(analytics_data.netdata_alarms_warning); + freez(analytics_data.netdata_alarms_critical); + freez(analytics_data.netdata_charts_count); + freez(analytics_data.netdata_metrics_count); + freez(analytics_data.netdata_config_is_parent); + freez(analytics_data.netdata_config_hosts_available); + freez(analytics_data.netdata_host_cloud_available); + freez(analytics_data.netdata_host_aclk_available); + freez(analytics_data.netdata_host_aclk_protocol); + freez(analytics_data.netdata_host_aclk_implementation); + freez(analytics_data.netdata_host_agent_claimed); + freez(analytics_data.netdata_host_cloud_enabled); + freez(analytics_data.netdata_config_https_available); + freez(analytics_data.netdata_install_type); + freez(analytics_data.netdata_config_is_private_registry); + freez(analytics_data.netdata_config_use_private_registry); + freez(analytics_data.netdata_config_oom_score); + freez(analytics_data.netdata_prebuilt_distro); +} + +/* + * Set a numeric/boolean data with a value + */ +void analytics_set_data(char **name, char *value) +{ + if (*name) { + analytics_data.data_length -= strlen(*name); + freez(*name); + } + *name = strdupz(value); + analytics_data.data_length += strlen(*name); +} + +/* + * Set a string data with a value + */ +void analytics_set_data_str(char **name, char *value) +{ + size_t value_string_len; + if (*name) { + analytics_data.data_length -= strlen(*name); + freez(*name); + } + value_string_len = strlen(value) + 4; + *name = mallocz(sizeof(char) * value_string_len); + snprintfz(*name, value_string_len - 1, "\"%s\"", value); + analytics_data.data_length += strlen(*name); +} + +/* + * Get data, used by web api v1 + */ +void analytics_get_data(char *name, BUFFER *wb) +{ + buffer_strcat(wb, name); +} + +/* + * Log hits on the allmetrics page, with prometheus parameter + */ +void analytics_log_prometheus(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.prometheus_hits < ANALYTICS_MAX_PROMETHEUS_HITS)) { + analytics_data.prometheus_hits++; + char b[7]; + snprintfz(b, 6, "%d", analytics_data.prometheus_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b); + } +} + +/* + * Log hits on the allmetrics page, with shell parameter (or default) + */ +void analytics_log_shell(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.shell_hits < ANALYTICS_MAX_SHELL_HITS)) { + analytics_data.shell_hits++; + char b[7]; + snprintfz(b, 6, "%d", analytics_data.shell_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b); + } +} + +/* + * Log hits on the allmetrics page, with json parameter + */ +void analytics_log_json(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.json_hits < ANALYTICS_MAX_JSON_HITS)) { + analytics_data.json_hits++; + char b[7]; + snprintfz(b, 6, "%d", analytics_data.json_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b); + } +} + +/* + * Log hits on the dashboard, (when calling HELLO). + */ +void analytics_log_dashboard(void) +{ + if (netdata_anonymous_statistics_enabled == 1 && likely(analytics_data.dashboard_hits < ANALYTICS_MAX_DASHBOARD_HITS)) { + analytics_data.dashboard_hits++; + char b[7]; + snprintfz(b, 6, "%d", analytics_data.dashboard_hits); + analytics_set_data(&analytics_data.netdata_dashboard_used, b); + } +} + +/* + * Called when setting the oom score + */ +void analytics_report_oom_score(long long int score){ + char b[7]; + snprintfz(b, 6, "%d", (int)score); + analytics_set_data(&analytics_data.netdata_config_oom_score, b); +} + +void analytics_mirrored_hosts(void) +{ + RRDHOST *host; + int count = 0; + int reachable = 0; + int unreachable = 0; + char b[11]; + + rrd_rdlock(); + rrdhost_foreach_read(host) + { + if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) + continue; + + netdata_mutex_lock(&host->receiver_lock); + ((host->receiver || host == localhost) ? reachable++ : unreachable++); + netdata_mutex_unlock(&host->receiver_lock); + + count++; + } + rrd_unlock(); + + snprintfz(b, 10, "%d", count); + analytics_set_data(&analytics_data.netdata_mirrored_host_count, b); + snprintfz(b, 10, "%d", reachable); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_reachable, b); + snprintfz(b, 10, "%d", unreachable); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_unreachable, b); +} + +void analytics_exporters(void) +{ + //when no exporters are available, an empty string will be sent + //decide if something else is more suitable (but probably not null) + BUFFER *bi = buffer_create(1000); + analytics_exporting_connectors(bi); + analytics_set_data_str(&analytics_data.netdata_exporting_connectors, (char *)buffer_tostring(bi)); + buffer_free(bi); +} + +int collector_counter_callb(const DICTIONARY_ITEM *item __maybe_unused, void *entry, void *data) { + + struct array_printer *ap = (struct array_printer *)data; + struct collector *col = (struct collector *)entry; + + BUFFER *bt = ap->both; + + if (likely(ap->c)) { + buffer_strcat(bt, ","); + } + + buffer_strcat(bt, "{"); + buffer_strcat(bt, " \"plugin\": \""); + buffer_strcat(bt, col->plugin); + buffer_strcat(bt, "\", \"module\":\""); + buffer_strcat(bt, col->module); + buffer_strcat(bt, "\" }"); + + (ap->c)++; + + return 0; +} + +/* + * Create a JSON array of available collectors, same as in api/v1/info + */ +void analytics_collectors(void) +{ + RRDSET *st; + DICTIONARY *dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + char name[500]; + BUFFER *bt = buffer_create(1000); + + rrdset_foreach_read(st, localhost) { + if(!rrdset_is_available_for_viewers(st)) + continue; + + struct collector col = { + .plugin = rrdset_plugin_name(st), + .module = rrdset_module_name(st) + }; + snprintfz(name, 499, "%s:%s", col.plugin, col.module); + dictionary_set(dict, name, &col, sizeof(struct collector)); + } + rrdset_foreach_done(st); + + struct array_printer ap; + ap.c = 0; + ap.both = bt; + + dictionary_walkthrough_read(dict, collector_counter_callb, &ap); + dictionary_destroy(dict); + + analytics_set_data(&analytics_data.netdata_collectors, (char *)buffer_tostring(ap.both)); + + { + char b[7]; + snprintfz(b, 6, "%d", ap.c); + analytics_set_data(&analytics_data.netdata_collectors_count, b); + } + + buffer_free(bt); +} + +/* + * Run alarm-notify.sh script using the dump_methods parameter + * SEND_CUSTOM is always available + */ +void analytics_alarms_notifications(void) +{ + char *script; + script = mallocz( + sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("alarm-notify.sh dump_methods") + 2)); + sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "alarm-notify.sh"); + if (unlikely(access(script, R_OK) != 0)) { + info("Alarm notify script %s not found.", script); + freez(script); + return; + } + + strcat(script, " dump_methods"); + + pid_t command_pid; + + debug(D_ANALYTICS, "Executing %s", script); + + BUFFER *b = buffer_create(1000); + int cnt = 0; + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); + if (fp_child_output) { + char line[200 + 1]; + + while (fgets(line, 200, fp_child_output) != NULL) { + char *end = line; + while (*end && *end != '\n') + end++; + *end = '\0'; + + if (likely(cnt)) + buffer_strcat(b, "|"); + + buffer_strcat(b, line); + + cnt++; + } + netdata_pclose(fp_child_input, fp_child_output, command_pid); + } + freez(script); + + analytics_set_data_str(&analytics_data.netdata_notification_methods, (char *)buffer_tostring(b)); + + buffer_free(b); +} + +void analytics_get_install_type(void) +{ + if (localhost->system_info->install_type == NULL) { + analytics_set_data_str(&analytics_data.netdata_install_type, "unknown"); + } else { + analytics_set_data_str(&analytics_data.netdata_install_type, localhost->system_info->install_type); + } + + if (localhost->system_info->prebuilt_dist != NULL) { + analytics_set_data_str(&analytics_data.netdata_prebuilt_distro, localhost->system_info->prebuilt_dist); + } +} + +/* + * Pick up if https is actually used + */ +void analytics_https(void) +{ + BUFFER *b = buffer_create(30); +#ifdef ENABLE_HTTPS + analytics_exporting_connectors_ssl(b); + buffer_strcat(b, netdata_ssl_client_ctx && rrdhost_flag_check(localhost, RRDHOST_FLAG_RRDPUSH_SENDER_CONNECTED) && localhost->sender->ssl.flags == NETDATA_SSL_HANDSHAKE_COMPLETE ? "streaming|" : "|"); + buffer_strcat(b, netdata_ssl_srv_ctx ? "web" : ""); +#else + buffer_strcat(b, "||"); +#endif + + analytics_set_data_str(&analytics_data.netdata_config_https_available, (char *)buffer_tostring(b)); + buffer_free(b); +} + +void analytics_charts(void) +{ + RRDSET *st; + int c = 0; + + rrdset_foreach_read(st, localhost) + if(rrdset_is_available_for_viewers(st)) c++; + rrdset_foreach_done(st); + + { + char b[7]; + snprintfz(b, 6, "%d", c); + analytics_set_data(&analytics_data.netdata_charts_count, b); + } +} + +void analytics_metrics(void) +{ + RRDSET *st; + long int dimensions = 0; + rrdset_foreach_read(st, localhost) { + if (rrdset_is_available_for_viewers(st)) { + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if (rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN) || rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) + continue; + dimensions++; + } + rrddim_foreach_done(rd); + } + } + rrdset_foreach_done(st); + + { + char b[7]; + snprintfz(b, 6, "%ld", dimensions); + analytics_set_data(&analytics_data.netdata_metrics_count, b); + } +} + +void analytics_alarms(void) +{ + int alarm_warn = 0, alarm_crit = 0, alarm_normal = 0; + char b[10]; + RRDCALC *rc; + foreach_rrdcalc_in_rrdhost_read(localhost, rc) { + if (unlikely(!rc->rrdset || !rc->rrdset->last_collected_time.tv_sec)) + continue; + + switch (rc->status) { + case RRDCALC_STATUS_WARNING: + alarm_warn++; + break; + case RRDCALC_STATUS_CRITICAL: + alarm_crit++; + break; + default: + alarm_normal++; + } + } + foreach_rrdcalc_in_rrdhost_done(rc); + + snprintfz(b, 9, "%d", alarm_normal); + analytics_set_data(&analytics_data.netdata_alarms_normal, b); + snprintfz(b, 9, "%d", alarm_warn); + analytics_set_data(&analytics_data.netdata_alarms_warning, b); + snprintfz(b, 9, "%d", alarm_crit); + analytics_set_data(&analytics_data.netdata_alarms_critical, b); +} + +/* + * Misc attributes to get (run from start) + */ +void analytics_misc(void) +{ +#ifdef ENABLE_ACLK + analytics_set_data(&analytics_data.netdata_host_cloud_available, "true"); + analytics_set_data_str(&analytics_data.netdata_host_aclk_implementation, "Next Generation"); +#else + analytics_set_data(&analytics_data.netdata_host_cloud_available, "false"); + analytics_set_data_str(&analytics_data.netdata_host_aclk_implementation, ""); +#endif + + analytics_set_data(&analytics_data.netdata_config_exporting_enabled, appconfig_get_boolean(&exporting_config, CONFIG_SECTION_EXPORTING, "enabled", CONFIG_BOOLEAN_NO) ? "true" : "false"); + + analytics_set_data(&analytics_data.netdata_config_is_private_registry, "false"); + analytics_set_data(&analytics_data.netdata_config_use_private_registry, "false"); + + if (strcmp( + config_get(CONFIG_SECTION_REGISTRY, "registry to announce", "https://registry.my-netdata.io"), + "https://registry.my-netdata.io")) + analytics_set_data(&analytics_data.netdata_config_use_private_registry, "true"); + + //do we need both registry to announce and enabled to indicate that this is a private registry ? + if (config_get_boolean(CONFIG_SECTION_REGISTRY, "enabled", CONFIG_BOOLEAN_NO) && + web_server_mode != WEB_SERVER_MODE_NONE) + analytics_set_data(&analytics_data.netdata_config_is_private_registry, "true"); +} + +void analytics_aclk(void) +{ +#ifdef ENABLE_ACLK + if (aclk_connected) { + analytics_set_data(&analytics_data.netdata_host_aclk_available, "true"); + analytics_set_data_str(&analytics_data.netdata_host_aclk_protocol, "New"); + } + else +#endif + analytics_set_data(&analytics_data.netdata_host_aclk_available, "false"); +} + +/* + * Get the meta data, called from the thread once after the original delay + * These are values that won't change during agent runtime, and therefore + * don't try to read them on each META event send + */ +void analytics_gather_immutable_meta_data(void) +{ + analytics_misc(); + analytics_exporters(); + analytics_https(); +} + +/* + * Get the meta data, called from the thread on every heartbeat, and right before the EXIT event + * These are values that can change between agent restarts, and therefore + * try to read them on each META event send + */ +void analytics_gather_mutable_meta_data(void) +{ + analytics_collectors(); + analytics_alarms(); + analytics_charts(); + analytics_metrics(); + analytics_aclk(); + analytics_mirrored_hosts(); + analytics_alarms_notifications(); + + analytics_set_data( + &analytics_data.netdata_config_is_parent, (rrdhost_hosts_available() > 1 || configured_as_parent()) ? "true" : "false"); + + char *claim_id = get_agent_claimid(); + analytics_set_data(&analytics_data.netdata_host_agent_claimed, claim_id ? "true" : "false"); + freez(claim_id); + + { + char b[7]; + snprintfz(b, 6, "%d", analytics_data.prometheus_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, b); + + snprintfz(b, 6, "%d", analytics_data.shell_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, b); + + snprintfz(b, 6, "%d", analytics_data.json_hits); + analytics_set_data(&analytics_data.netdata_allmetrics_json_used, b); + + snprintfz(b, 6, "%d", analytics_data.dashboard_hits); + analytics_set_data(&analytics_data.netdata_dashboard_used, b); + + snprintfz(b, 6, "%zu", rrd_hosts_available); + analytics_set_data(&analytics_data.netdata_config_hosts_available, b); + } +} + +void analytics_main_cleanup(void *ptr) +{ + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + debug(D_ANALYTICS, "Cleaning up..."); + analytics_free_data(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +/* + * The analytics thread. Sleep for ANALYTICS_INIT_SLEEP_SEC, + * gather the data, and then go to a loop where every ANALYTICS_HEARTBEAT + * it will send a new META event after gathering data that could be changed + * while the agent is running + */ +void *analytics_main(void *ptr) +{ + netdata_thread_cleanup_push(analytics_main_cleanup, ptr); + unsigned int sec = 0; + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step_ut = USEC_PER_SEC; + + debug(D_ANALYTICS, "Analytics thread starts"); + + //first delay after agent start + while (!netdata_exit && likely(sec <= ANALYTICS_INIT_SLEEP_SEC)) { + heartbeat_next(&hb, step_ut); + sec++; + } + + if (unlikely(netdata_exit)) + goto cleanup; + + analytics_gather_immutable_meta_data(); + analytics_gather_mutable_meta_data(); + send_statistics("META_START", "-", "-"); + analytics_log_data(); + + sec = 0; + while (1) { + heartbeat_next(&hb, step_ut * 2); + sec += 2; + + if (unlikely(netdata_exit)) + break; + + if (likely(sec < ANALYTICS_HEARTBEAT)) + continue; + + analytics_gather_mutable_meta_data(); + send_statistics("META", "-", "-"); + analytics_log_data(); + sec = 0; + } + +cleanup: + netdata_thread_cleanup_pop(1); + return NULL; +} + +static const char *verify_required_directory(const char *dir) +{ + if (chdir(dir) == -1) + fatal("Cannot change directory to '%s'", dir); + + DIR *d = opendir(dir); + if (!d) + fatal("Cannot examine the contents of directory '%s'", dir); + closedir(d); + + return dir; +} + +/* + * This is called after the rrdinit + * These values will be sent on the START event + */ +void set_late_global_environment() +{ + analytics_set_data(&analytics_data.netdata_config_stream_enabled, default_rrdpush_enabled ? "true" : "false"); + analytics_set_data_str(&analytics_data.netdata_config_memory_mode, (char *)rrd_memory_mode_name(default_rrd_memory_mode)); + +#ifdef DISABLE_CLOUD + analytics_set_data(&analytics_data.netdata_host_cloud_enabled, "false"); +#else + analytics_set_data( + &analytics_data.netdata_host_cloud_enabled, + appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", CONFIG_BOOLEAN_YES) ? "true" : "false"); +#endif + +#ifdef ENABLE_DBENGINE + { + char b[16]; + snprintfz(b, 15, "%d", default_rrdeng_page_cache_mb); + analytics_set_data(&analytics_data.netdata_config_page_cache_size, b); + + snprintfz(b, 15, "%d", default_multidb_disk_quota_mb); + analytics_set_data(&analytics_data.netdata_config_multidb_disk_quota, b); + } +#endif + +#ifdef ENABLE_HTTPS + analytics_set_data(&analytics_data.netdata_config_https_enabled, "true"); +#else + analytics_set_data(&analytics_data.netdata_config_https_enabled, "false"); +#endif + + if (web_server_mode == WEB_SERVER_MODE_NONE) + analytics_set_data(&analytics_data.netdata_config_web_enabled, "false"); + else + analytics_set_data(&analytics_data.netdata_config_web_enabled, "true"); + + analytics_set_data_str(&analytics_data.netdata_config_release_channel, (char *)get_release_channel()); + + { + BUFFER *bi = buffer_create(1000); + analytics_build_info(bi); + analytics_set_data_str(&analytics_data.netdata_buildinfo, (char *)buffer_tostring(bi)); + buffer_free(bi); + } + + analytics_get_install_type(); +} + +void get_system_timezone(void) +{ + // avoid flood calls to stat(/etc/localtime) + // http://stackoverflow.com/questions/4554271/how-to-avoid-excessive-stat-etc-localtime-calls-in-strftime-on-linux + const char *tz = getenv("TZ"); + if (!tz || !*tz) + setenv("TZ", config_get(CONFIG_SECTION_ENV_VARS, "TZ", ":/etc/localtime"), 0); + + char buffer[FILENAME_MAX + 1] = ""; + const char *timezone = NULL; + ssize_t ret; + + // use the TZ variable + if (tz && *tz && *tz != ':') { + timezone = tz; + info("TIMEZONE: using TZ variable '%s'", timezone); + } + + // use the contents of /etc/timezone + if (!timezone && !read_file("/etc/timezone", buffer, FILENAME_MAX)) { + timezone = buffer; + info("TIMEZONE: using the contents of /etc/timezone"); + } + + // read the link /etc/localtime + if (!timezone) { + ret = readlink("/etc/localtime", buffer, FILENAME_MAX); + + if (ret > 0) { + buffer[ret] = '\0'; + + char *cmp = "/usr/share/zoneinfo/"; + size_t cmp_len = strlen(cmp); + + char *s = strstr(buffer, cmp); + if (s && s[cmp_len]) { + timezone = &s[cmp_len]; + info("TIMEZONE: using the link of /etc/localtime: '%s'", timezone); + } + } else + buffer[0] = '\0'; + } + + // find the timezone from strftime() + if (!timezone) { + time_t t; + struct tm *tmp, tmbuf; + + t = now_realtime_sec(); + tmp = localtime_r(&t, &tmbuf); + + if (tmp != NULL) { + if (strftime(buffer, FILENAME_MAX, "%Z", tmp) == 0) + buffer[0] = '\0'; + else { + buffer[FILENAME_MAX] = '\0'; + timezone = buffer; + info("TIMEZONE: using strftime(): '%s'", timezone); + } + } + } + + if (timezone && *timezone) { + // make sure it does not have illegal characters + // info("TIMEZONE: fixing '%s'", timezone); + + size_t len = strlen(timezone); + char tmp[len + 1]; + char *d = tmp; + *d = '\0'; + + while (*timezone) { + if (isalnum(*timezone) || *timezone == '_' || *timezone == '/') + *d++ = *timezone++; + else + timezone++; + } + *d = '\0'; + strncpyz(buffer, tmp, len); + timezone = buffer; + info("TIMEZONE: fixed as '%s'", timezone); + } + + if (!timezone || !*timezone) + timezone = "unknown"; + + netdata_configured_timezone = config_get(CONFIG_SECTION_GLOBAL, "timezone", timezone); + + //get the utc offset, and the timezone as returned by strftime + //will be sent to the cloud + //Note: This will need an agent restart to get new offset on time change (dst, etc). + { + time_t t; + struct tm *tmp, tmbuf; + char zone[FILENAME_MAX + 1]; + char sign[2], hh[3], mm[3]; + + t = now_realtime_sec(); + tmp = localtime_r(&t, &tmbuf); + + if (tmp != NULL) { + if (strftime(zone, FILENAME_MAX, "%Z", tmp) == 0) { + netdata_configured_abbrev_timezone = strdupz("UTC"); + } else + netdata_configured_abbrev_timezone = strdupz(zone); + + if (strftime(zone, FILENAME_MAX, "%z", tmp) == 0) { + netdata_configured_utc_offset = 0; + } else { + sign[0] = zone[0] == '-' || zone[0] == '+' ? zone[0] : '0'; + sign[1] = '\0'; + hh[0] = isdigit(zone[1]) ? zone[1] : '0'; + hh[1] = isdigit(zone[2]) ? zone[2] : '0'; + hh[2] = '\0'; + mm[0] = isdigit(zone[3]) ? zone[3] : '0'; + mm[1] = isdigit(zone[4]) ? zone[4] : '0'; + mm[2] = '\0'; + + netdata_configured_utc_offset = (str2i(hh) * 3600) + (str2i(mm) * 60); + netdata_configured_utc_offset = + sign[0] == '-' ? -netdata_configured_utc_offset : netdata_configured_utc_offset; + } + } else { + netdata_configured_abbrev_timezone = strdupz("UTC"); + netdata_configured_utc_offset = 0; + } + } +} + +void set_global_environment() +{ + { + char b[16]; + snprintfz(b, 15, "%d", default_rrd_update_every); + setenv("NETDATA_UPDATE_EVERY", b, 1); + } + + setenv("NETDATA_VERSION", program_version, 1); + setenv("NETDATA_HOSTNAME", netdata_configured_hostname, 1); + setenv("NETDATA_CONFIG_DIR", verify_required_directory(netdata_configured_user_config_dir), 1); + setenv("NETDATA_USER_CONFIG_DIR", verify_required_directory(netdata_configured_user_config_dir), 1); + setenv("NETDATA_STOCK_CONFIG_DIR", verify_required_directory(netdata_configured_stock_config_dir), 1); + setenv("NETDATA_PLUGINS_DIR", verify_required_directory(netdata_configured_primary_plugins_dir), 1); + setenv("NETDATA_WEB_DIR", verify_required_directory(netdata_configured_web_dir), 1); + setenv("NETDATA_CACHE_DIR", verify_required_directory(netdata_configured_cache_dir), 1); + setenv("NETDATA_LIB_DIR", verify_required_directory(netdata_configured_varlib_dir), 1); + setenv("NETDATA_LOCK_DIR", netdata_configured_lock_dir, 1); + setenv("NETDATA_LOG_DIR", verify_required_directory(netdata_configured_log_dir), 1); + setenv("HOME", verify_required_directory(netdata_configured_home_dir), 1); + setenv("NETDATA_HOST_PREFIX", netdata_configured_host_prefix, 1); + + { + BUFFER *user_plugins_dirs = buffer_create(FILENAME_MAX); + + for (size_t i = 1; i < PLUGINSD_MAX_DIRECTORIES && plugin_directories[i]; i++) { + if (i > 1) + buffer_strcat(user_plugins_dirs, " "); + buffer_strcat(user_plugins_dirs, plugin_directories[i]); + } + + setenv("NETDATA_USER_PLUGINS_DIRS", buffer_tostring(user_plugins_dirs), 1); + + buffer_free(user_plugins_dirs); + } + + analytics_data.data_length = 0; + analytics_set_data(&analytics_data.netdata_config_stream_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_memory_mode, "null"); + analytics_set_data(&analytics_data.netdata_config_exporting_enabled, "null"); + analytics_set_data(&analytics_data.netdata_exporting_connectors, "null"); + analytics_set_data(&analytics_data.netdata_allmetrics_prometheus_used, "null"); + analytics_set_data(&analytics_data.netdata_allmetrics_shell_used, "null"); + analytics_set_data(&analytics_data.netdata_allmetrics_json_used, "null"); + analytics_set_data(&analytics_data.netdata_dashboard_used, "null"); + analytics_set_data(&analytics_data.netdata_collectors, "null"); + analytics_set_data(&analytics_data.netdata_collectors_count, "null"); + analytics_set_data(&analytics_data.netdata_buildinfo, "null"); + analytics_set_data(&analytics_data.netdata_config_page_cache_size, "null"); + analytics_set_data(&analytics_data.netdata_config_multidb_disk_quota, "null"); + analytics_set_data(&analytics_data.netdata_config_https_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_web_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_release_channel, "null"); + analytics_set_data(&analytics_data.netdata_mirrored_host_count, "null"); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_reachable, "null"); + analytics_set_data(&analytics_data.netdata_mirrored_hosts_unreachable, "null"); + analytics_set_data(&analytics_data.netdata_notification_methods, "null"); + analytics_set_data(&analytics_data.netdata_alarms_normal, "null"); + analytics_set_data(&analytics_data.netdata_alarms_warning, "null"); + analytics_set_data(&analytics_data.netdata_alarms_critical, "null"); + analytics_set_data(&analytics_data.netdata_charts_count, "null"); + analytics_set_data(&analytics_data.netdata_metrics_count, "null"); + analytics_set_data(&analytics_data.netdata_config_is_parent, "null"); + analytics_set_data(&analytics_data.netdata_config_hosts_available, "null"); + analytics_set_data(&analytics_data.netdata_host_cloud_available, "null"); + analytics_set_data(&analytics_data.netdata_host_aclk_implementation, "null"); + analytics_set_data(&analytics_data.netdata_host_aclk_available, "null"); + analytics_set_data(&analytics_data.netdata_host_aclk_protocol, "null"); + analytics_set_data(&analytics_data.netdata_host_agent_claimed, "null"); + analytics_set_data(&analytics_data.netdata_host_cloud_enabled, "null"); + analytics_set_data(&analytics_data.netdata_config_https_available, "null"); + analytics_set_data(&analytics_data.netdata_install_type, "null"); + analytics_set_data(&analytics_data.netdata_config_is_private_registry, "null"); + analytics_set_data(&analytics_data.netdata_config_use_private_registry, "null"); + analytics_set_data(&analytics_data.netdata_config_oom_score, "null"); + analytics_set_data(&analytics_data.netdata_prebuilt_distro, "null"); + + analytics_data.prometheus_hits = 0; + analytics_data.shell_hits = 0; + analytics_data.json_hits = 0; + analytics_data.dashboard_hits = 0; + + char *default_port = appconfig_get(&netdata_config, CONFIG_SECTION_WEB, "default port", NULL); + int clean = 0; + if (!default_port) { + default_port = strdupz("19999"); + clean = 1; + } + + setenv("NETDATA_LISTEN_PORT", default_port, 1); + if (clean) + freez(default_port); + + // set the path we need + char path[1024 + 1], *p = getenv("PATH"); + if (!p) + p = "/bin:/usr/bin"; + snprintfz(path, 1024, "%s:%s", p, "/sbin:/usr/sbin:/usr/local/bin:/usr/local/sbin"); + setenv("PATH", config_get(CONFIG_SECTION_ENV_VARS, "PATH", path), 1); + + // python options + p = getenv("PYTHONPATH"); + if (!p) + p = ""; + setenv("PYTHONPATH", config_get(CONFIG_SECTION_ENV_VARS, "PYTHONPATH", p), 1); + + // disable buffering for python plugins + setenv("PYTHONUNBUFFERED", "1", 1); + + // switch to standard locale for plugins + setenv("LC_ALL", "C", 1); +} + +void send_statistics(const char *action, const char *action_result, const char *action_data) +{ + static char *as_script; + + if (netdata_anonymous_statistics_enabled == -1) { + char *optout_file = mallocz( + sizeof(char) * + (strlen(netdata_configured_user_config_dir) + strlen(".opt-out-from-anonymous-statistics") + 2)); + sprintf(optout_file, "%s/%s", netdata_configured_user_config_dir, ".opt-out-from-anonymous-statistics"); + if (likely(access(optout_file, R_OK) != 0)) { + as_script = mallocz( + sizeof(char) * + (strlen(netdata_configured_primary_plugins_dir) + strlen("anonymous-statistics.sh") + 2)); + sprintf(as_script, "%s/%s", netdata_configured_primary_plugins_dir, "anonymous-statistics.sh"); + if (unlikely(access(as_script, R_OK) != 0)) { + netdata_anonymous_statistics_enabled = 0; + info("Anonymous statistics script %s not found.", as_script); + freez(as_script); + } else { + netdata_anonymous_statistics_enabled = 1; + } + } else { + netdata_anonymous_statistics_enabled = 0; + as_script = NULL; + } + freez(optout_file); + } + if (!netdata_anonymous_statistics_enabled) + return; + if (!action) + return; + if (!action_result) + action_result = ""; + if (!action_data) + action_data = ""; + char *command_to_run = mallocz( + sizeof(char) * (strlen(action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + + analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15)); + pid_t command_pid; + + sprintf( + command_to_run, + "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", + as_script, + action, + action_result, + action_data, + analytics_data.netdata_config_stream_enabled, + analytics_data.netdata_config_memory_mode, + analytics_data.netdata_config_exporting_enabled, + analytics_data.netdata_exporting_connectors, + analytics_data.netdata_allmetrics_prometheus_used, + analytics_data.netdata_allmetrics_shell_used, + analytics_data.netdata_allmetrics_json_used, + analytics_data.netdata_dashboard_used, + analytics_data.netdata_collectors, + analytics_data.netdata_collectors_count, + analytics_data.netdata_buildinfo, + analytics_data.netdata_config_page_cache_size, + analytics_data.netdata_config_multidb_disk_quota, + analytics_data.netdata_config_https_enabled, + analytics_data.netdata_config_web_enabled, + analytics_data.netdata_config_release_channel, + analytics_data.netdata_mirrored_host_count, + analytics_data.netdata_mirrored_hosts_reachable, + analytics_data.netdata_mirrored_hosts_unreachable, + analytics_data.netdata_notification_methods, + analytics_data.netdata_alarms_normal, + analytics_data.netdata_alarms_warning, + analytics_data.netdata_alarms_critical, + analytics_data.netdata_charts_count, + analytics_data.netdata_metrics_count, + analytics_data.netdata_config_is_parent, + analytics_data.netdata_config_hosts_available, + analytics_data.netdata_host_cloud_available, + analytics_data.netdata_host_aclk_available, + analytics_data.netdata_host_aclk_protocol, + analytics_data.netdata_host_aclk_implementation, + analytics_data.netdata_host_agent_claimed, + analytics_data.netdata_host_cloud_enabled, + analytics_data.netdata_config_https_available, + analytics_data.netdata_install_type, + analytics_data.netdata_config_is_private_registry, + analytics_data.netdata_config_use_private_registry, + analytics_data.netdata_config_oom_score, + analytics_data.netdata_prebuilt_distro); + + info("%s '%s' '%s' '%s'", as_script, action, action_result, action_data); + + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input); + if (fp_child_output) { + char buffer[4 + 1]; + char *s = fgets(buffer, 4, fp_child_output); + int exit_code = netdata_pclose(fp_child_input, fp_child_output, command_pid); + if (exit_code) + error("Execution of anonymous statistics script returned %d.", exit_code); + if (s && strncmp(buffer, "200", 3)) + error("Execution of anonymous statistics script returned http code %s.", buffer); + } else { + error("Failed to run anonymous statistics script %s.", as_script); + } + freez(command_to_run); +} diff --git a/daemon/analytics.h b/daemon/analytics.h new file mode 100644 index 0000000..d1ffcec --- /dev/null +++ b/daemon/analytics.h @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_ANALYTICS_H +#define NETDATA_ANALYTICS_H 1 + +#include "daemon/common.h" + +/* Max number of seconds before the first META analytics is sent */ +#define ANALYTICS_INIT_SLEEP_SEC 120 + +/* Send a META event every X seconds */ +#define ANALYTICS_HEARTBEAT 7200 + +/* Maximum number of hits to log */ +#define ANALYTICS_MAX_PROMETHEUS_HITS 255 +#define ANALYTICS_MAX_SHELL_HITS 255 +#define ANALYTICS_MAX_JSON_HITS 255 +#define ANALYTICS_MAX_DASHBOARD_HITS 255 + +/* Needed to calculate the space needed for parameters */ +#define ANALYTICS_NO_OF_ITEMS 39 + +struct analytics_data { + char *netdata_config_stream_enabled; + char *netdata_config_memory_mode; + char *netdata_exporting_connectors; + char *netdata_config_exporting_enabled; + char *netdata_allmetrics_prometheus_used; + char *netdata_allmetrics_shell_used; + char *netdata_allmetrics_json_used; + char *netdata_dashboard_used; + char *netdata_collectors; + char *netdata_collectors_count; + char *netdata_buildinfo; + char *netdata_config_page_cache_size; + char *netdata_config_multidb_disk_quota; + char *netdata_config_https_enabled; + char *netdata_config_web_enabled; + char *netdata_config_release_channel; + char *netdata_mirrored_host_count; + char *netdata_mirrored_hosts_reachable; + char *netdata_mirrored_hosts_unreachable; + char *netdata_notification_methods; + char *netdata_alarms_normal; + char *netdata_alarms_warning; + char *netdata_alarms_critical; + char *netdata_charts_count; + char *netdata_metrics_count; + char *netdata_config_is_parent; + char *netdata_config_hosts_available; + char *netdata_host_cloud_available; + char *netdata_host_aclk_available; + char *netdata_host_aclk_protocol; + char *netdata_host_aclk_implementation; + char *netdata_host_agent_claimed; + char *netdata_host_cloud_enabled; + char *netdata_config_https_available; + char *netdata_install_type; + char *netdata_config_is_private_registry; + char *netdata_config_use_private_registry; + char *netdata_config_oom_score; + char *netdata_prebuilt_distro; + + size_t data_length; + + uint8_t prometheus_hits; + uint8_t shell_hits; + uint8_t json_hits; + uint8_t dashboard_hits; +}; + +void analytics_get_data(char *name, BUFFER *wb); +void set_late_global_environment(void); +void analytics_free_data(void); +void set_global_environment(void); +void send_statistics(const char *action, const char *action_result, const char *action_data); +void analytics_log_shell(void); +void analytics_log_json(void); +void analytics_log_prometheus(void); +void analytics_log_dashboard(void); +void analytics_gather_mutable_meta_data(void); +void analytics_report_oom_score(long long int score); +void get_system_timezone(void); + +extern struct analytics_data analytics_data; + +#endif //NETDATA_ANALYTICS_H diff --git a/daemon/anonymous-statistics.sh.in b/daemon/anonymous-statistics.sh.in new file mode 100755 index 0000000..9f8df18 --- /dev/null +++ b/daemon/anonymous-statistics.sh.in @@ -0,0 +1,177 @@ +#!/usr/bin/env sh + +# Valid actions: + +# - FATAL - netdata exited due to a fatal condition +# ACTION_RESULT -- program name and thread tag +# ACTION_DATA -- fmt, args passed to fatal +# - START - netdata started +# ACTION_DATA -- nan +# - EXIT - installation action +# ACTION_DATA -- ret value of + +ACTION="${1}" +ACTION_RESULT="${2}" +ACTION_DATA="${3}" +ACTION_DATA=$(echo "${ACTION_DATA}" | tr '"' "'") + +# ------------------------------------------------------------------------------------------------- +# check opt-out + +if [ -f "@configdir_POST@/.opt-out-from-anonymous-statistics" ] || + [ ! "${DISABLE_TELEMETRY:-0}" -eq 0 ] || + [ -n "$DISABLE_TELEMETRY" ] || + [ ! "${DO_NOT_TRACK:-0}" -eq 0 ] || + [ -n "$DO_NOT_TRACK" ]; then + exit 0 +fi + +# ------------------------------------------------------------------------------------------------- +# Get the extra variables + +NETDATA_CONFIG_STREAM_ENABLED="${4}" +NETDATA_CONFIG_MEMORY_MODE="${5}" +NETDATA_CONFIG_EXPORTING_ENABLED="${6}" +NETDATA_EXPORTING_CONNECTORS="${7}" +NETDATA_ALLMETRICS_PROMETHEUS_USED="${8}" +NETDATA_ALLMETRICS_SHELL_USED="${9}" +NETDATA_ALLMETRICS_JSON_USED="${10}" +NETDATA_DASHBOARD_USED="${11}" +NETDATA_COLLECTORS="${12}" +NETDATA_COLLECTORS_COUNT="${13}" +NETDATA_BUILDINFO="${14}" +NETDATA_CONFIG_PAGE_CACHE_SIZE="${15}" +NETDATA_CONFIG_MULTIDB_DISK_QUOTA="${16}" +NETDATA_CONFIG_HTTPS_ENABLED="${17}" +NETDATA_CONFIG_WEB_ENABLED="${18}" +NETDATA_CONFIG_RELEASE_CHANNEL="${19}" +NETDATA_MIRRORED_HOST_COUNT="${20}" +NETDATA_MIRRORED_HOSTS_REACHABLE="${21}" +NETDATA_MIRRORED_HOSTS_UNREACHABLE="${22}" +NETDATA_NOTIFICATION_METHODS="${23}" +NETDATA_ALARMS_NORMAL="${24}" +NETDATA_ALARMS_WARNING="${25}" +NETDATA_ALARMS_CRITICAL="${26}" +NETDATA_CHARTS_COUNT="${27}" +NETDATA_METRICS_COUNT="${28}" +NETDATA_CONFIG_IS_PARENT="${29}" +NETDATA_CONFIG_HOSTS_AVAILABLE="${30}" +NETDATA_HOST_CLOUD_AVAILABLE="${31}" +NETDATA_HOST_ACLK_AVAILABLE="${32}" +NETDATA_HOST_ACLK_PROTOCOL="${33}" +NETDATA_HOST_ACLK_IMPLEMENTATION="${34}" +NETDATA_HOST_AGENT_CLAIMED="${35}" +NETDATA_HOST_CLOUD_ENABLED="${36}" +NETDATA_CONFIG_HTTPS_AVAILABLE="${37}" +NETDATA_INSTALL_TYPE="${38}" +NETDATA_IS_PRIVATE_REGISTRY="${39}" +NETDATA_USE_PRIVATE_REGISTRY="${40}" +NETDATA_CONFIG_OOM_SCORE="${41}" +NETDATA_PREBUILT_DISTRO="${42}" + + +# define body of request to be sent +REQ_BODY="$(cat << EOF +{ + "api_key": "mqkwGT0JNFqO-zX2t0mW6Tec9yooaVu7xCBlXtHnt5Y", + "event": "${ACTION} ${ACTION_RESULT}", + "properties": { + "distinct_id": "${NETDATA_REGISTRY_UNIQUE_ID}", + "\$current_url": "agent backend", + "\$pathname": "netdata-backend", + "\$host": "backend.netdata.io", + "\$ip": "127.0.0.1", + "event_source": "agent backend", + "action": "${ACTION}", + "action_result": "${ACTION_RESULT}", + "action_data": "${ACTION_DATA}", + "netdata_machine_guid": "${NETDATA_REGISTRY_UNIQUE_ID}", + "netdata_version": "${NETDATA_VERSION}", + "netdata_buildinfo": ${NETDATA_BUILDINFO}, + "netdata_release_channel": ${NETDATA_CONFIG_RELEASE_CHANNEL}, + "netdata_install_type": ${NETDATA_INSTALL_TYPE}, + "netdata_prebuilt_distro": ${NETDATA_PREBUILT_DISTRO}, + "host_os_name": "${NETDATA_HOST_OS_NAME}", + "host_os_id": "${NETDATA_HOST_OS_ID}", + "host_os_id_like": "${NETDATA_HOST_OS_ID_LIKE}", + "host_os_version": "${NETDATA_HOST_OS_VERSION}", + "host_os_version_id": "${NETDATA_HOST_OS_VERSION_ID}", + "host_os_detection": "${NETDATA_HOST_OS_DETECTION}", + "host_is_k8s_node": "${NETDATA_HOST_IS_K8S_NODE}", + "system_kernel_name": "${NETDATA_SYSTEM_KERNEL_NAME}", + "system_kernel_version": "${NETDATA_SYSTEM_KERNEL_VERSION}", + "system_architecture": "${NETDATA_SYSTEM_ARCHITECTURE}", + "system_virtualization": "${NETDATA_SYSTEM_VIRTUALIZATION}", + "system_virt_detection": "${NETDATA_SYSTEM_VIRT_DETECTION}", + "system_container": "${NETDATA_SYSTEM_CONTAINER}", + "system_container_detection": "${NETDATA_SYSTEM_CONTAINER_DETECTION}", + "container_os_name": "${NETDATA_CONTAINER_OS_NAME}", + "container_os_id": "${NETDATA_CONTAINER_OS_ID}", + "container_os_id_like": "${NETDATA_CONTAINER_OS_ID_LIKE}", + "container_os_version": "${NETDATA_CONTAINER_OS_VERSION}", + "container_os_version_id": "${NETDATA_CONTAINER_OS_VERSION_ID}", + "container_os_detection": "${NETDATA_CONTAINER_OS_DETECTION}", + "container_is_official_image": ${NETDATA_CONTAINER_IS_OFFICIAL_IMAGE}, + "system_cpu_detection": "${NETDATA_SYSTEM_CPU_DETECTION}", + "system_cpu_freq": "${NETDATA_SYSTEM_CPU_FREQ}", + "system_cpu_logical_cpu_count": "${NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT}", + "system_cpu_model": "${NETDATA_SYSTEM_CPU_MODEL}", + "system_cpu_vendor": "${NETDATA_SYSTEM_CPU_VENDOR}", + "system_disk_detection": "${NETDATA_SYSTEM_DISK_DETECTION}", + "system_ram_detection": "${NETDATA_SYSTEM_RAM_DETECTION}", + "system_total_disk_size": "${NETDATA_SYSTEM_TOTAL_DISK_SIZE}", + "system_total_ram": "${NETDATA_SYSTEM_TOTAL_RAM}", + "config_stream_enabled": ${NETDATA_CONFIG_STREAM_ENABLED}, + "config_memory_mode": ${NETDATA_CONFIG_MEMORY_MODE}, + "config_page_cache_size": ${NETDATA_CONFIG_PAGE_CACHE_SIZE}, + "config_multidb_disk_quota": ${NETDATA_CONFIG_MULTIDB_DISK_QUOTA}, + "config_https_enabled": ${NETDATA_CONFIG_HTTPS_ENABLED}, + "config_https_available": ${NETDATA_CONFIG_HTTPS_AVAILABLE}, + "config_web_enabled": ${NETDATA_CONFIG_WEB_ENABLED}, + "config_exporting_enabled": ${NETDATA_CONFIG_EXPORTING_ENABLED}, + "config_is_parent": ${NETDATA_CONFIG_IS_PARENT}, + "config_is_private_registry": ${NETDATA_IS_PRIVATE_REGISTRY}, + "config_private_registry_used": ${NETDATA_USE_PRIVATE_REGISTRY}, + "config_hosts_available": ${NETDATA_CONFIG_HOSTS_AVAILABLE}, + "config_oom_score": ${NETDATA_CONFIG_OOM_SCORE}, + "alarms_normal": ${NETDATA_ALARMS_NORMAL}, + "alarms_warning": ${NETDATA_ALARMS_WARNING}, + "alarms_critical": ${NETDATA_ALARMS_CRITICAL}, + "host_charts_count": ${NETDATA_CHARTS_COUNT}, + "host_metrics_count": ${NETDATA_METRICS_COUNT}, + "host_collectors":[ + ${NETDATA_COLLECTORS} + ], + "host_collectors_count": ${NETDATA_COLLECTORS_COUNT}, + "host_notification_methods": ${NETDATA_NOTIFICATION_METHODS}, + "host_allmetrics_prometheus_used": ${NETDATA_ALLMETRICS_PROMETHEUS_USED}, + "host_allmetrics_shell_used": ${NETDATA_ALLMETRICS_SHELL_USED}, + "host_allmetrics_json_used": ${NETDATA_ALLMETRICS_JSON_USED}, + "host_dashboard_used": ${NETDATA_DASHBOARD_USED}, + "host_cloud_available": ${NETDATA_HOST_CLOUD_AVAILABLE}, + "host_cloud_enabled": ${NETDATA_HOST_CLOUD_ENABLED}, + "host_agent_claimed": ${NETDATA_HOST_AGENT_CLAIMED}, + "host_aclk_available": ${NETDATA_HOST_ACLK_AVAILABLE}, + "host_aclk_protocol": ${NETDATA_HOST_ACLK_PROTOCOL}, + "host_aclk_implementation": ${NETDATA_HOST_ACLK_IMPLEMENTATION}, + "mirrored_host_count": ${NETDATA_MIRRORED_HOST_COUNT}, + "mirrored_hosts_reachable": ${NETDATA_MIRRORED_HOSTS_REACHABLE}, + "mirrored_hosts_unreachable": ${NETDATA_MIRRORED_HOSTS_UNREACHABLE}, + "exporting_connectors": ${NETDATA_EXPORTING_CONNECTORS} + } +} +EOF +)" + +# send the anonymous statistics to the Netdata PostHog +if [ -n "$(command -v curl 2> /dev/null)" ]; then + curl --silent -o /dev/null --write-out '%{http_code}' -X POST --max-time 2 --header "Content-Type: application/json" -d "${REQ_BODY}" https://posthog.netdata.cloud/capture/ +else + wget -q -O - --no-check-certificate \ + --server-response \ + --method POST \ + --timeout=1 \ + --header 'Content-Type: application/json' \ + --body-data "${REQ_BODY}" \ + 'https://posthog.netdata.cloud/capture/' 2>&1 | awk '/^ HTTP/{print $2}' +fi diff --git a/daemon/buildinfo.c b/daemon/buildinfo.c new file mode 100644 index 0000000..ef813a9 --- /dev/null +++ b/daemon/buildinfo.c @@ -0,0 +1,470 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <stdio.h> +#include "./config.h" +#include "common.h" +#include "buildinfo.h" + +// Optional features + +#ifdef ENABLE_ACLK +#define FEAT_CLOUD 1 +#define FEAT_CLOUD_MSG "" +#else +#ifdef DISABLE_CLOUD +#define FEAT_CLOUD 0 +#define FEAT_CLOUD_MSG "(by user request)" +#else +#define FEAT_CLOUD 0 +#define FEAT_CLOUD_MSG "" +#endif +#endif + +#ifdef ENABLE_DBENGINE +#define FEAT_DBENGINE 1 +#else +#define FEAT_DBENGINE 0 +#endif + +#if defined(HAVE_X509_VERIFY_PARAM_set1_host) && HAVE_X509_VERIFY_PARAM_set1_host == 1 +#define FEAT_TLS_HOST_VERIFY 1 +#else +#define FEAT_TLS_HOST_VERIFY 0 +#endif + +#ifdef ENABLE_HTTPS +#define FEAT_NATIVE_HTTPS 1 +#else +#define FEAT_NATIVE_HTTPS 0 +#endif + +#ifdef ENABLE_ML +#define FEAT_ML 1 +#else +#define FEAT_ML 0 +#endif + +#ifdef ENABLE_COMPRESSION +#define FEAT_STREAM_COMPRESSION 1 +#else +#define FEAT_STREAM_COMPRESSION 0 +#endif //ENABLE_COMPRESSION + + +// Optional libraries + +#ifdef HAVE_PROTOBUF +#define FEAT_PROTOBUF 1 +#ifdef BUNDLED_PROTOBUF +#define FEAT_PROTOBUF_BUNDLED " (bundled)" +#else +#define FEAT_PROTOBUF_BUNDLED " (system)" +#endif +#else +#define FEAT_PROTOBUF 0 +#define FEAT_PROTOBUF_BUNDLED "" +#endif + +#ifdef ENABLE_JSONC +#define FEAT_JSONC 1 +#else +#define FEAT_JSONC 0 +#endif + +#ifdef ENABLE_JEMALLOC +#define FEAT_JEMALLOC 1 +#else +#define FEAT_JEMALLOC 0 +#endif + +#ifdef ENABLE_TCMALLOC +#define FEAT_TCMALLOC 1 +#else +#define FEAT_TCMALLOC 0 +#endif + +#ifdef HAVE_CAPABILITY +#define FEAT_LIBCAP 1 +#else +#define FEAT_LIBCAP 0 +#endif + +#ifdef NETDATA_WITH_ZLIB +#define FEAT_ZLIB 1 +#else +#define FEAT_ZLIB 0 +#endif + +#ifdef STORAGE_WITH_MATH +#define FEAT_LIBM 1 +#else +#define FEAT_LIBM 0 +#endif + +#ifdef HAVE_CRYPTO +#define FEAT_CRYPTO 1 +#else +#define FEAT_CRYPTO 0 +#endif + +// Optional plugins + +#ifdef ENABLE_APPS_PLUGIN +#define FEAT_APPS_PLUGIN 1 +#else +#define FEAT_APPS_PLUGIN 0 +#endif + +#ifdef HAVE_FREEIPMI +#define FEAT_IPMI 1 +#else +#define FEAT_IPMI 0 +#endif + +#ifdef HAVE_CUPS +#define FEAT_CUPS 1 +#else +#define FEAT_CUPS 0 +#endif + +#ifdef HAVE_NFACCT +#define FEAT_NFACCT 1 +#else +#define FEAT_NFACCT 0 +#endif + +#ifdef HAVE_LIBXENSTAT +#define FEAT_XEN 1 +#else +#define FEAT_XEN 0 +#endif + +#ifdef HAVE_XENSTAT_VBD_ERROR +#define FEAT_XEN_VBD_ERROR 1 +#else +#define FEAT_XEN_VBD_ERROR 0 +#endif + +#ifdef HAVE_LIBBPF +#define FEAT_EBPF 1 +#else +#define FEAT_EBPF 0 +#endif + +#ifdef HAVE_SETNS +#define FEAT_CGROUP_NET 1 +#else +#define FEAT_CGROUP_NET 0 +#endif + +#ifdef ENABLE_PERF_PLUGIN +#define FEAT_PERF 1 +#else +#define FEAT_PERF 0 +#endif + +#ifdef ENABLE_SLABINFO +#define FEAT_SLABINFO 1 +#else +#define FEAT_SLABINFO 0 +#endif + +// Optional Exporters + +#ifdef HAVE_KINESIS +#define FEAT_KINESIS 1 +#else +#define FEAT_KINESIS 0 +#endif + +#ifdef ENABLE_EXPORTING_PUBSUB +#define FEAT_PUBSUB 1 +#else +#define FEAT_PUBSUB 0 +#endif + +#ifdef HAVE_MONGOC +#define FEAT_MONGO 1 +#else +#define FEAT_MONGO 0 +#endif + +#ifdef ENABLE_PROMETHEUS_REMOTE_WRITE +#define FEAT_REMOTE_WRITE 1 +#else +#define FEAT_REMOTE_WRITE 0 +#endif + +#define FEAT_YES_NO(x) ((x) ? "YES" : "NO") + +#ifdef NETDATA_TRACE_ALLOCATIONS +#define FEAT_TRACE_ALLOC 1 +#else +#define FEAT_TRACE_ALLOC 0 +#endif + +char *get_value_from_key(char *buffer, char *key) { + char *s = NULL, *t = NULL; + s = t = buffer + strlen(key) + 2; + if (s) { + while (*s == '\'') + s++; + while (*++t != '\0'); + while (--t > s && *t == '\'') + *t = '\0'; + } + return s; +} + +void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist) { + char *install_type_filename; + + int install_type_filename_len = (strlen(netdata_configured_user_config_dir) + strlen(".install-type") + 3); + install_type_filename = mallocz(sizeof(char) * install_type_filename_len); + snprintfz(install_type_filename, install_type_filename_len - 1, "%s/%s", netdata_configured_user_config_dir, ".install-type"); + + FILE *fp = fopen(install_type_filename, "r"); + if (fp) { + char *s, buf[256 + 1]; + size_t len = 0; + + while ((s = fgets_trim_len(buf, 256, fp, &len))) { + if (!strncmp(buf, "INSTALL_TYPE='", 14)) + *install_type = strdupz((char *)get_value_from_key(buf, "INSTALL_TYPE")); + else if (!strncmp(buf, "PREBUILT_ARCH='", 15)) + *prebuilt_arch = strdupz((char *)get_value_from_key(buf, "PREBUILT_ARCH")); + else if (!strncmp(buf, "PREBUILT_DISTRO='", 17)) + *prebuilt_dist = strdupz((char *)get_value_from_key(buf, "PREBUILT_DISTRO")); + } + fclose(fp); + } + freez(install_type_filename); +} + +void print_build_info(void) { + char *install_type = NULL; + char *prebuilt_arch = NULL; + char *prebuilt_distro = NULL; + get_install_type(&install_type, &prebuilt_arch, &prebuilt_distro); + + printf("Configure options: %s\n", CONFIGURE_COMMAND); + + if (install_type == NULL) { + printf("Install type: unknown\n"); + } else { + printf("Install type: %s\n", install_type); + } + + if (prebuilt_arch != NULL) { + printf(" Binary architecture: %s\n", prebuilt_arch); + } + + if (prebuilt_distro != NULL) { + printf(" Packaging distro: %s\n", prebuilt_distro); + } + + freez(install_type); + freez(prebuilt_arch); + freez(prebuilt_distro); + + printf("Features:\n"); + printf(" dbengine: %s\n", FEAT_YES_NO(FEAT_DBENGINE)); + printf(" Native HTTPS: %s\n", FEAT_YES_NO(FEAT_NATIVE_HTTPS)); + printf(" Netdata Cloud: %s %s\n", FEAT_YES_NO(FEAT_CLOUD), FEAT_CLOUD_MSG); + printf(" ACLK: %s\n", FEAT_YES_NO(FEAT_CLOUD)); + printf(" TLS Host Verification: %s\n", FEAT_YES_NO(FEAT_TLS_HOST_VERIFY)); + printf(" Machine Learning: %s\n", FEAT_YES_NO(FEAT_ML)); + printf(" Stream Compression: %s\n", FEAT_YES_NO(FEAT_STREAM_COMPRESSION)); + + printf("Libraries:\n"); + printf(" protobuf: %s%s\n", FEAT_YES_NO(FEAT_PROTOBUF), FEAT_PROTOBUF_BUNDLED); + printf(" jemalloc: %s\n", FEAT_YES_NO(FEAT_JEMALLOC)); + printf(" JSON-C: %s\n", FEAT_YES_NO(FEAT_JSONC)); + printf(" libcap: %s\n", FEAT_YES_NO(FEAT_LIBCAP)); + printf(" libcrypto: %s\n", FEAT_YES_NO(FEAT_CRYPTO)); + printf(" libm: %s\n", FEAT_YES_NO(FEAT_LIBM)); + printf(" tcalloc: %s\n", FEAT_YES_NO(FEAT_TCMALLOC)); + printf(" zlib: %s\n", FEAT_YES_NO(FEAT_ZLIB)); + + printf("Plugins:\n"); + printf(" apps: %s\n", FEAT_YES_NO(FEAT_APPS_PLUGIN)); + printf(" cgroup Network Tracking: %s\n", FEAT_YES_NO(FEAT_CGROUP_NET)); + printf(" CUPS: %s\n", FEAT_YES_NO(FEAT_CUPS)); + printf(" EBPF: %s\n", FEAT_YES_NO(FEAT_EBPF)); + printf(" IPMI: %s\n", FEAT_YES_NO(FEAT_IPMI)); + printf(" NFACCT: %s\n", FEAT_YES_NO(FEAT_NFACCT)); + printf(" perf: %s\n", FEAT_YES_NO(FEAT_PERF)); + printf(" slabinfo: %s\n", FEAT_YES_NO(FEAT_SLABINFO)); + printf(" Xen: %s\n", FEAT_YES_NO(FEAT_XEN)); + printf(" Xen VBD Error Tracking: %s\n", FEAT_YES_NO(FEAT_XEN_VBD_ERROR)); + + printf("Exporters:\n"); + printf(" AWS Kinesis: %s\n", FEAT_YES_NO(FEAT_KINESIS)); + printf(" GCP PubSub: %s\n", FEAT_YES_NO(FEAT_PUBSUB)); + printf(" MongoDB: %s\n", FEAT_YES_NO(FEAT_MONGO)); + printf(" Prometheus Remote Write: %s\n", FEAT_YES_NO(FEAT_REMOTE_WRITE)); + + printf("Debug/Developer Features:\n"); + printf(" Trace Allocations: %s\n", FEAT_YES_NO(FEAT_TRACE_ALLOC)); +}; + +#define FEAT_JSON_BOOL(x) ((x) ? "true" : "false") +// This intentionally does not use JSON-C so it works even if JSON-C is not present +// This is used for anonymous statistics reporting, so it intentionally +// does not include the configure options, which would be very easy to use +// for tracking custom builds (and complicate outputting valid JSON). +void print_build_info_json(void) { + printf("{\n"); + printf(" \"features\": {\n"); + printf(" \"dbengine\": %s,\n", FEAT_JSON_BOOL(FEAT_DBENGINE)); + printf(" \"native-https\": %s,\n", FEAT_JSON_BOOL(FEAT_NATIVE_HTTPS)); + printf(" \"cloud\": %s,\n", FEAT_JSON_BOOL(FEAT_CLOUD)); +#ifdef DISABLE_CLOUD + printf(" \"cloud-disabled\": true,\n"); +#else + printf(" \"cloud-disabled\": false,\n"); +#endif + printf(" \"aclk\": %s,\n", FEAT_JSON_BOOL(FEAT_CLOUD)); + + printf(" \"tls-host-verify\": %s,\n", FEAT_JSON_BOOL(FEAT_TLS_HOST_VERIFY)); + printf(" \"machine-learning\": %s\n", FEAT_JSON_BOOL(FEAT_ML)); + printf(" \"stream-compression\": %s\n", FEAT_JSON_BOOL(FEAT_STREAM_COMPRESSION)); + printf(" },\n"); + + printf(" \"libs\": {\n"); + printf(" \"protobuf\": %s,\n", FEAT_JSON_BOOL(FEAT_PROTOBUF)); + printf(" \"protobuf-source\": \"%s\",\n", FEAT_PROTOBUF_BUNDLED); + printf(" \"jemalloc\": %s,\n", FEAT_JSON_BOOL(FEAT_JEMALLOC)); + printf(" \"jsonc\": %s,\n", FEAT_JSON_BOOL(FEAT_JSONC)); + printf(" \"libcap\": %s,\n", FEAT_JSON_BOOL(FEAT_LIBCAP)); + printf(" \"libcrypto\": %s,\n", FEAT_JSON_BOOL(FEAT_CRYPTO)); + printf(" \"libm\": %s,\n", FEAT_JSON_BOOL(FEAT_LIBM)); + printf(" \"tcmalloc\": %s,\n", FEAT_JSON_BOOL(FEAT_TCMALLOC)); + printf(" \"zlib\": %s\n", FEAT_JSON_BOOL(FEAT_ZLIB)); + printf(" },\n"); + + printf(" \"plugins\": {\n"); + printf(" \"apps\": %s,\n", FEAT_JSON_BOOL(FEAT_APPS_PLUGIN)); + printf(" \"cgroup-net\": %s,\n", FEAT_JSON_BOOL(FEAT_CGROUP_NET)); + printf(" \"cups\": %s,\n", FEAT_JSON_BOOL(FEAT_CUPS)); + printf(" \"ebpf\": %s,\n", FEAT_JSON_BOOL(FEAT_EBPF)); + printf(" \"ipmi\": %s,\n", FEAT_JSON_BOOL(FEAT_IPMI)); + printf(" \"nfacct\": %s,\n", FEAT_JSON_BOOL(FEAT_NFACCT)); + printf(" \"perf\": %s,\n", FEAT_JSON_BOOL(FEAT_PERF)); + printf(" \"slabinfo\": %s,\n", FEAT_JSON_BOOL(FEAT_SLABINFO)); + printf(" \"xen\": %s,\n", FEAT_JSON_BOOL(FEAT_XEN)); + printf(" \"xen-vbd-error\": %s\n", FEAT_JSON_BOOL(FEAT_XEN_VBD_ERROR)); + printf(" },\n"); + + printf(" \"exporters\": {\n"); + printf(" \"kinesis\": %s,\n", FEAT_JSON_BOOL(FEAT_KINESIS)); + printf(" \"pubsub\": %s,\n", FEAT_JSON_BOOL(FEAT_PUBSUB)); + printf(" \"mongodb\": %s,\n", FEAT_JSON_BOOL(FEAT_MONGO)); + printf(" \"prom-remote-write\": %s\n", FEAT_JSON_BOOL(FEAT_REMOTE_WRITE)); + printf(" }\n"); + printf(" \"debug-n-devel\": {\n"); + printf(" \"trace-allocations\": %s\n }\n",FEAT_JSON_BOOL(FEAT_TRACE_ALLOC)); + printf("}\n"); +}; + +#define add_to_bi(buffer, str) \ + { if(first) { \ + buffer_strcat (b, str); \ + first = 0; \ + } else \ + buffer_strcat (b, "|" str); } + +void analytics_build_info(BUFFER *b) { + int first = 1; +#ifdef ENABLE_DBENGINE + add_to_bi(b, "dbengine"); +#endif +#ifdef ENABLE_HTTPS + add_to_bi(b, "Native HTTPS"); +#endif +#ifdef ENABLE_ACLK + add_to_bi(b, "Netdata Cloud"); +#endif +#if (FEAT_TLS_HOST_VERIFY!=0) + add_to_bi(b, "TLS Host Verification"); +#endif +#ifdef ENABLE_ML + add_to_bi(b, "Machine Learning"); +#endif +#ifdef ENABLE_COMPRESSION + add_to_bi(b, "Stream Compression"); +#endif + +#ifdef HAVE_PROTOBUF + add_to_bi(b, "protobuf"); +#endif +#ifdef ENABLE_JEMALLOC + add_to_bi(b, "jemalloc"); +#endif +#ifdef ENABLE_JSONC + add_to_bi(b, "JSON-C"); +#endif +#ifdef HAVE_CAPABILITY + add_to_bi(b, "libcap"); +#endif +#ifdef HAVE_CRYPTO + add_to_bi(b, "libcrypto"); +#endif +#ifdef STORAGE_WITH_MATH + add_to_bi(b, "libm"); +#endif + +#ifdef ENABLE_TCMALLOC + add_to_bi(b, "tcalloc"); +#endif +#ifdef NETDATA_WITH_ZLIB + add_to_bi(b, "zlib"); +#endif + +#ifdef ENABLE_APPS_PLUGIN + add_to_bi(b, "apps"); +#endif +#ifdef HAVE_SETNS + add_to_bi(b, "cgroup Network Tracking"); +#endif +#ifdef HAVE_CUPS + add_to_bi(b, "CUPS"); +#endif +#ifdef HAVE_LIBBPF + add_to_bi(b, "EBPF"); +#endif +#ifdef HAVE_FREEIPMI + add_to_bi(b, "IPMI"); +#endif +#ifdef HAVE_NFACCT + add_to_bi(b, "NFACCT"); +#endif +#ifdef ENABLE_PERF_PLUGIN + add_to_bi(b, "perf"); +#endif +#ifdef ENABLE_SLABINFO + add_to_bi(b, "slabinfo"); +#endif +#ifdef HAVE_LIBXENSTAT + add_to_bi(b, "Xen"); +#endif +#ifdef HAVE_XENSTAT_VBD_ERROR + add_to_bi(b, "Xen VBD Error Tracking"); +#endif + +#ifdef HAVE_KINESIS + add_to_bi(b, "AWS Kinesis"); +#endif +#ifdef ENABLE_EXPORTING_PUBSUB + add_to_bi(b, "GCP PubSub"); +#endif +#ifdef HAVE_MONGOC + add_to_bi(b, "MongoDB"); +#endif +#ifdef ENABLE_PROMETHEUS_REMOTE_WRITE + add_to_bi(b, "Prometheus Remote Write"); +#endif +#ifdef NETDATA_TRACE_ALLOCATIONS + add_to_bi(b, "DebugTraceAlloc"); +#endif +} diff --git a/daemon/buildinfo.h b/daemon/buildinfo.h new file mode 100644 index 0000000..d3b439f --- /dev/null +++ b/daemon/buildinfo.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_BUILDINFO_H +#define NETDATA_BUILDINFO_H 1 + +void print_build_info(void); + +void print_build_info_json(void); + +char *get_value_from_key(char *buffer, char *key); + +void get_install_type(char **install_type, char **prebuilt_arch, char **prebuilt_dist); + +#endif // NETDATA_BUILDINFO_H diff --git a/daemon/commands.c b/daemon/commands.c new file mode 100644 index 0000000..6288ee5 --- /dev/null +++ b/daemon/commands.c @@ -0,0 +1,736 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static uv_thread_t thread; +static uv_loop_t* loop; +static uv_async_t async; +static struct completion completion; +static uv_pipe_t server_pipe; + +char cmd_prefix_by_status[] = { + CMD_PREFIX_INFO, + CMD_PREFIX_ERROR, + CMD_PREFIX_ERROR +}; + +static int command_server_initialized = 0; +static int command_thread_error; +static int command_thread_shutdown; +static unsigned clients = 0; + +struct command_context { + /* embedded client pipe structure at address 0 */ + uv_pipe_t client; + + uv_work_t work; + uv_write_t write_req; + cmd_t idx; + char *args; + char *message; + cmd_status_t status; + char command_string[MAX_COMMAND_LENGTH]; + unsigned command_string_size; +}; + +/* Forward declarations */ +static cmd_status_t cmd_help_execute(char *args, char **message); +static cmd_status_t cmd_reload_health_execute(char *args, char **message); +static cmd_status_t cmd_save_database_execute(char *args, char **message); +static cmd_status_t cmd_reopen_logs_execute(char *args, char **message); +static cmd_status_t cmd_exit_execute(char *args, char **message); +static cmd_status_t cmd_fatal_execute(char *args, char **message); +static cmd_status_t cmd_reload_claiming_state_execute(char *args, char **message); +static cmd_status_t cmd_reload_labels_execute(char *args, char **message); +static cmd_status_t cmd_read_config_execute(char *args, char **message); +static cmd_status_t cmd_write_config_execute(char *args, char **message); +static cmd_status_t cmd_ping_execute(char *args, char **message); +static cmd_status_t cmd_aclk_state(char *args, char **message); + +static command_info_t command_info_array[] = { + {"help", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY}, // show help menu + {"reload-health", cmd_reload_health_execute, CMD_TYPE_ORTHOGONAL}, // reload health configuration + {"save-database", cmd_save_database_execute, CMD_TYPE_ORTHOGONAL}, // save database for memory mode save + {"reopen-logs", cmd_reopen_logs_execute, CMD_TYPE_ORTHOGONAL}, // Close and reopen log files + {"shutdown-agent", cmd_exit_execute, CMD_TYPE_EXCLUSIVE}, // exit cleanly + {"fatal-agent", cmd_fatal_execute, CMD_TYPE_HIGH_PRIORITY}, // exit with fatal error + {"reload-claiming-state", cmd_reload_claiming_state_execute, CMD_TYPE_ORTHOGONAL}, // reload claiming state + {"reload-labels", cmd_reload_labels_execute, CMD_TYPE_ORTHOGONAL}, // reload the labels + {"read-config", cmd_read_config_execute, CMD_TYPE_CONCURRENT}, + {"write-config", cmd_write_config_execute, CMD_TYPE_ORTHOGONAL}, + {"ping", cmd_ping_execute, CMD_TYPE_ORTHOGONAL}, + {"aclk-state", cmd_aclk_state, CMD_TYPE_ORTHOGONAL} +}; + +/* Mutexes for commands of type CMD_TYPE_ORTHOGONAL */ +static uv_mutex_t command_lock_array[CMD_TOTAL_COMMANDS]; +/* Commands of type CMD_TYPE_EXCLUSIVE are writers */ +static uv_rwlock_t exclusive_rwlock; +/* + * Locking order: + * 1. exclusive_rwlock + * 2. command_lock_array[] + */ + +/* Forward declarations */ +static void cmd_lock_exclusive(unsigned index); +static void cmd_lock_orthogonal(unsigned index); +static void cmd_lock_idempotent(unsigned index); +static void cmd_lock_high_priority(unsigned index); + +static command_lock_t *cmd_lock_by_type[] = { + cmd_lock_exclusive, + cmd_lock_orthogonal, + cmd_lock_idempotent, + cmd_lock_high_priority +}; + +/* Forward declarations */ +static void cmd_unlock_exclusive(unsigned index); +static void cmd_unlock_orthogonal(unsigned index); +static void cmd_unlock_idempotent(unsigned index); +static void cmd_unlock_high_priority(unsigned index); + +static command_lock_t *cmd_unlock_by_type[] = { + cmd_unlock_exclusive, + cmd_unlock_orthogonal, + cmd_unlock_idempotent, + cmd_unlock_high_priority +}; + +static cmd_status_t cmd_help_execute(char *args, char **message) +{ + (void)args; + + *message = mallocz(MAX_COMMAND_LENGTH); + strncpyz(*message, + "\nThe commands are (arguments are in brackets):\n" + "help\n" + " Show this help menu.\n" + "reload-health\n" + " Reload health configuration.\n" + "reload-labels\n" + " Reload all labels.\n" + "save-database\n" + " Save internal DB to disk for memory mode save.\n" + "reopen-logs\n" + " Close and reopen log files.\n" + "shutdown-agent\n" + " Cleanup and exit the netdata agent.\n" + "fatal-agent\n" + " Log the state and halt the netdata agent.\n" + "reload-claiming-state\n" + " Reload agent claiming state from disk.\n" + "ping\n" + " Return with 'pong' if agent is alive.\n" + "aclk-state [json]\n" + " Returns current state of ACLK and Cloud connection. (optionally in json)\n", + MAX_COMMAND_LENGTH - 1); + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reload_health_execute(char *args, char **message) +{ + (void)args; + (void)message; + + error_log_limit_unlimited(); + info("COMMAND: Reloading HEALTH configuration."); + health_reload(); + error_log_limit_reset(); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_save_database_execute(char *args, char **message) +{ + (void)args; + (void)message; + + error_log_limit_unlimited(); + info("COMMAND: Saving databases."); + rrdhost_save_all(); + info("COMMAND: Databases saved."); + error_log_limit_reset(); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reopen_logs_execute(char *args, char **message) +{ + (void)args; + (void)message; + + error_log_limit_unlimited(); + info("COMMAND: Reopening all log files."); + reopen_all_log_files(); + error_log_limit_reset(); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_exit_execute(char *args, char **message) +{ + (void)args; + (void)message; + + error_log_limit_unlimited(); + info("COMMAND: Cleaning up to exit."); + netdata_cleanup_and_exit(0); + exit(0); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_fatal_execute(char *args, char **message) +{ + (void)args; + (void)message; + + fatal("COMMAND: netdata now exits."); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reload_claiming_state_execute(char *args, char **message) +{ + (void)args; + (void)message; +#if defined(DISABLE_CLOUD) || !defined(ENABLE_ACLK) + info("The claiming feature has been explicitly disabled"); + *message = strdupz("This agent cannot be claimed, it was built without support for Cloud"); + return CMD_STATUS_FAILURE; +#endif + error_log_limit_unlimited(); + info("COMMAND: Reloading Agent Claiming configuration."); + load_claiming_state(); + registry_update_cloud_base_url(); + rrdpush_claimed_id(localhost); + error_log_limit_reset(); + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_reload_labels_execute(char *args, char **message) +{ + (void)args; + info("COMMAND: reloading host labels."); + reload_host_labels(); + + BUFFER *wb = buffer_create(10); + rrdlabels_log_to_buffer(localhost->rrdlabels, wb); + (*message)=strdupz(buffer_tostring(wb)); + buffer_free(wb); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_read_config_execute(char *args, char **message) +{ + size_t n = strlen(args); + char *separator = strchr(args,'|'); + if (separator == NULL) + return CMD_STATUS_FAILURE; + char *separator2 = strchr(separator + 1,'|'); + if (separator2 == NULL) + return CMD_STATUS_FAILURE; + + char *temp = callocz(n + 1, 1); + strcpy(temp, args); + size_t offset = separator - args; + temp[offset] = 0; + size_t offset2 = separator2 - args; + temp[offset2] = 0; + + const char *conf_file = temp; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + + char *value = appconfig_get(tmp_config, temp + offset + 1, temp + offset2 + 1, NULL); + if (value == NULL) + { + error("Cannot execute read-config conf_file=%s section=%s / key=%s because no value set", conf_file, + temp + offset + 1, temp + offset2 + 1); + freez(temp); + return CMD_STATUS_FAILURE; + } + else + { + (*message) = strdupz(value); + freez(temp); + return CMD_STATUS_SUCCESS; + } + +} + +static cmd_status_t cmd_write_config_execute(char *args, char **message) +{ + UNUSED(message); + info("write-config %s", args); + size_t n = strlen(args); + char *separator = strchr(args,'|'); + if (separator == NULL) + return CMD_STATUS_FAILURE; + char *separator2 = strchr(separator + 1,'|'); + if (separator2 == NULL) + return CMD_STATUS_FAILURE; + char *separator3 = strchr(separator2 + 1,'|'); + if (separator3 == NULL) + return CMD_STATUS_FAILURE; + char *temp = callocz(n + 1, 1); + strcpy(temp, args); + size_t offset = separator - args; + temp[offset] = 0; + size_t offset2 = separator2 - args; + temp[offset2] = 0; + size_t offset3 = separator3 - args; + temp[offset3] = 0; + + const char *conf_file = temp; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + + appconfig_set(tmp_config, temp + offset + 1, temp + offset2 + 1, temp + offset3 + 1); + info("write-config conf_file=%s section=%s key=%s value=%s",conf_file, temp + offset + 1, temp + offset2 + 1, + temp + offset3 + 1); + freez(temp); + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_ping_execute(char *args, char **message) +{ + (void)args; + + *message = strdupz("pong"); + + return CMD_STATUS_SUCCESS; +} + +static cmd_status_t cmd_aclk_state(char *args, char **message) +{ + info("COMMAND: Reopening aclk/cloud state."); + if (strstr(args, "json")) + *message = aclk_state_json(); + else + *message = aclk_state(); + + return CMD_STATUS_SUCCESS; +} + +static void cmd_lock_exclusive(unsigned index) +{ + (void)index; + + uv_rwlock_wrlock(&exclusive_rwlock); +} + +static void cmd_lock_orthogonal(unsigned index) +{ + uv_rwlock_rdlock(&exclusive_rwlock); + uv_mutex_lock(&command_lock_array[index]); +} + +static void cmd_lock_idempotent(unsigned index) +{ + (void)index; + + uv_rwlock_rdlock(&exclusive_rwlock); +} + +static void cmd_lock_high_priority(unsigned index) +{ + (void)index; +} + +static void cmd_unlock_exclusive(unsigned index) +{ + (void)index; + + uv_rwlock_wrunlock(&exclusive_rwlock); +} + +static void cmd_unlock_orthogonal(unsigned index) +{ + uv_rwlock_rdunlock(&exclusive_rwlock); + uv_mutex_unlock(&command_lock_array[index]); +} + +static void cmd_unlock_idempotent(unsigned index) +{ + (void)index; + + uv_rwlock_rdunlock(&exclusive_rwlock); +} + +static void cmd_unlock_high_priority(unsigned index) +{ + (void)index; +} + +static void pipe_close_cb(uv_handle_t* handle) +{ + /* Also frees command context */ + freez(handle); +} + +static void pipe_write_cb(uv_write_t* req, int status) +{ + (void)status; + uv_pipe_t *client = req->data; + + uv_close((uv_handle_t *)client, pipe_close_cb); + --clients; + freez(client->data); + info("Command Clients = %u\n", clients); +} + +static inline void add_char_to_command_reply(char *reply_string, unsigned *reply_string_size, char character) +{ + reply_string[(*reply_string_size)++] = character; +} + +static inline void add_string_to_command_reply(char *reply_string, unsigned *reply_string_size, char *str) +{ + unsigned len; + + len = strlen(str); + + if (MAX_COMMAND_LENGTH - 1 < len + *reply_string_size) + len = MAX_COMMAND_LENGTH - *reply_string_size - 1; + + strncpyz(reply_string + *reply_string_size, str, len); + *reply_string_size += len; +} + +static void send_command_reply(struct command_context *cmd_ctx, cmd_status_t status, char *message) +{ + int ret; + char *reply_string = mallocz(MAX_COMMAND_LENGTH); + char exit_status_string[MAX_EXIT_STATUS_LENGTH + 1] = {'\0', }; + unsigned reply_string_size = 0; + uv_buf_t write_buf; + uv_stream_t *client = (uv_stream_t *)(uv_pipe_t *)cmd_ctx; + + snprintfz(exit_status_string, MAX_EXIT_STATUS_LENGTH, "%u", status); + add_char_to_command_reply(reply_string, &reply_string_size, CMD_PREFIX_EXIT_CODE); + add_string_to_command_reply(reply_string, &reply_string_size, exit_status_string); + add_char_to_command_reply(reply_string, &reply_string_size, '\0'); + + if (message) { + add_char_to_command_reply(reply_string, &reply_string_size, cmd_prefix_by_status[status]); + add_string_to_command_reply(reply_string, &reply_string_size, message); + } + + cmd_ctx->write_req.data = client; + client->data = reply_string; + write_buf.base = reply_string; + write_buf.len = reply_string_size; + ret = uv_write(&cmd_ctx->write_req, (uv_stream_t *)client, &write_buf, 1, pipe_write_cb); + if (ret) { + error("uv_write(): %s", uv_strerror(ret)); + } + info("COMMAND: Sending reply: \"%s\"", reply_string); +} + +cmd_status_t execute_command(cmd_t idx, char *args, char **message) +{ + cmd_status_t status; + cmd_type_t type = command_info_array[idx].type; + + cmd_lock_by_type[type](idx); + status = command_info_array[idx].func(args, message); + cmd_unlock_by_type[type](idx); + + return status; +} + +static void after_schedule_command(uv_work_t *req, int status) +{ + struct command_context *cmd_ctx = req->data; + + (void)status; + + send_command_reply(cmd_ctx, cmd_ctx->status, cmd_ctx->message); + if (cmd_ctx->message) + freez(cmd_ctx->message); +} + +static void schedule_command(uv_work_t *req) +{ + struct command_context *cmd_ctx = req->data; + + cmd_ctx->status = execute_command(cmd_ctx->idx, cmd_ctx->args, &cmd_ctx->message); +} + +/* This will alter the state of the command_info_array.cmd_str +*/ +static void parse_commands(struct command_context *cmd_ctx) +{ + char *message = NULL, *pos, *lstrip, *rstrip; + cmd_t i; + cmd_status_t status; + + status = CMD_STATUS_FAILURE; + + /* Skip white-space characters */ + for (pos = cmd_ctx->command_string ; isspace(*pos) && ('\0' != *pos) ; ++pos) {;} + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { + if (!strncmp(pos, command_info_array[i].cmd_str, strlen(command_info_array[i].cmd_str))) { + if (CMD_EXIT == i) { + /* musl C does not like libuv workqueues calling exit() */ + execute_command(CMD_EXIT, NULL, NULL); + } + for (lstrip=pos + strlen(command_info_array[i].cmd_str); isspace(*lstrip) && ('\0' != *lstrip); ++lstrip) {;} + for (rstrip=lstrip+strlen(lstrip)-1; rstrip>lstrip && isspace(*rstrip); *(rstrip--) = 0 ); + + cmd_ctx->work.data = cmd_ctx; + cmd_ctx->idx = i; + cmd_ctx->args = lstrip; + cmd_ctx->message = NULL; + + fatal_assert(0 == uv_queue_work(loop, &cmd_ctx->work, schedule_command, after_schedule_command)); + break; + } + } + if (CMD_TOTAL_COMMANDS == i) { + /* no command found */ + message = strdupz("Illegal command. Please type \"help\" for instructions."); + send_command_reply(cmd_ctx, status, message); + freez(message); + } +} + +static void pipe_read_cb(uv_stream_t *client, ssize_t nread, const uv_buf_t *buf) +{ + struct command_context *cmd_ctx = (struct command_context *)client; + + if (0 == nread) { + info("%s: Zero bytes read by command pipe.", __func__); + } else if (UV_EOF == nread) { + info("EOF found in command pipe."); + parse_commands(cmd_ctx); + } else if (nread < 0) { + error("%s: %s", __func__, uv_strerror(nread)); + } + + if (nread < 0) { /* stop stream due to EOF or error */ + (void)uv_read_stop((uv_stream_t *)client); + } else if (nread) { + size_t to_copy; + + to_copy = MIN((size_t) nread, MAX_COMMAND_LENGTH - 1 - cmd_ctx->command_string_size); + memcpy(cmd_ctx->command_string + cmd_ctx->command_string_size, buf->base, to_copy); + cmd_ctx->command_string_size += to_copy; + cmd_ctx->command_string[cmd_ctx->command_string_size] = '\0'; + } + if (buf && buf->len) { + freez(buf->base); + } + + if (nread < 0 && UV_EOF != nread) { + uv_close((uv_handle_t *)client, pipe_close_cb); + --clients; + info("Command Clients = %u\n", clients); + } +} + +static void alloc_cb(uv_handle_t *handle, size_t suggested_size, uv_buf_t *buf) +{ + (void)handle; + + buf->base = mallocz(suggested_size); + buf->len = suggested_size; +} + +static void connection_cb(uv_stream_t *server, int status) +{ + int ret; + uv_pipe_t *client; + struct command_context *cmd_ctx; + fatal_assert(status == 0); + + /* combined allocation of client pipe and command context */ + cmd_ctx = mallocz(sizeof(*cmd_ctx)); + client = (uv_pipe_t *)cmd_ctx; + ret = uv_pipe_init(server->loop, client, 1); + if (ret) { + error("uv_pipe_init(): %s", uv_strerror(ret)); + freez(cmd_ctx); + return; + } + ret = uv_accept(server, (uv_stream_t *)client); + if (ret) { + error("uv_accept(): %s", uv_strerror(ret)); + uv_close((uv_handle_t *)client, pipe_close_cb); + return; + } + + ++clients; + info("Command Clients = %u\n", clients); + /* Start parsing a new command */ + cmd_ctx->command_string_size = 0; + cmd_ctx->command_string[0] = '\0'; + + ret = uv_read_start((uv_stream_t*)client, alloc_cb, pipe_read_cb); + if (ret) { + error("uv_read_start(): %s", uv_strerror(ret)); + uv_close((uv_handle_t *)client, pipe_close_cb); + --clients; + info("Command Clients = %u\n", clients); + return; + } +} + +static void async_cb(uv_async_t *handle) +{ + uv_stop(handle->loop); +} + +static void command_thread(void *arg) +{ + int ret; + uv_fs_t req; + + (void) arg; + loop = mallocz(sizeof(uv_loop_t)); + ret = uv_loop_init(loop); + if (ret) { + error("uv_loop_init(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_loop_init; + } + loop->data = NULL; + + ret = uv_async_init(loop, &async, async_cb); + if (ret) { + error("uv_async_init(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_async_init; + } + async.data = NULL; + + ret = uv_pipe_init(loop, &server_pipe, 0); + if (ret) { + error("uv_pipe_init(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_pipe_init; + } + (void)uv_fs_unlink(loop, &req, PIPENAME, NULL); + uv_fs_req_cleanup(&req); + ret = uv_pipe_bind(&server_pipe, PIPENAME); + if (ret) { + error("uv_pipe_bind(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_pipe_bind; + } + ret = uv_listen((uv_stream_t *)&server_pipe, SOMAXCONN, connection_cb); + if (ret) { + /* Fallback to backlog of 1 */ + info("uv_listen() failed with backlog = %d, falling back to backlog = 1.", SOMAXCONN); + ret = uv_listen((uv_stream_t *)&server_pipe, 1, connection_cb); + } + if (ret) { + error("uv_listen(): %s", uv_strerror(ret)); + command_thread_error = ret; + goto error_after_uv_listen; + } + + command_thread_error = 0; + command_thread_shutdown = 0; + /* wake up initialization thread */ + completion_mark_complete(&completion); + + while (command_thread_shutdown == 0) { + uv_run(loop, UV_RUN_DEFAULT); + } + /* cleanup operations of the event loop */ + info("Shutting down command event loop."); + uv_close((uv_handle_t *)&async, NULL); + uv_close((uv_handle_t*)&server_pipe, NULL); + uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */ + + info("Shutting down command loop complete."); + fatal_assert(0 == uv_loop_close(loop)); + freez(loop); + + return; + +error_after_uv_listen: +error_after_pipe_bind: + uv_close((uv_handle_t*)&server_pipe, NULL); +error_after_pipe_init: + uv_close((uv_handle_t *)&async, NULL); +error_after_async_init: + uv_run(loop, UV_RUN_DEFAULT); /* flush all libuv handles */ + fatal_assert(0 == uv_loop_close(loop)); +error_after_loop_init: + freez(loop); + + /* wake up initialization thread */ + completion_mark_complete(&completion); +} + +static void sanity_check(void) +{ + /* The size of command_info_array must be CMD_TOTAL_COMMANDS elements */ + BUILD_BUG_ON(CMD_TOTAL_COMMANDS != sizeof(command_info_array) / sizeof(command_info_array[0])); +} + +void commands_init(void) +{ + cmd_t i; + int error; + + sanity_check(); + if (command_server_initialized) + return; + + info("Initializing command server."); + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { + fatal_assert(0 == uv_mutex_init(&command_lock_array[i])); + } + fatal_assert(0 == uv_rwlock_init(&exclusive_rwlock)); + + completion_init(&completion); + error = uv_thread_create(&thread, command_thread, NULL); + if (error) { + error("uv_thread_create(): %s", uv_strerror(error)); + goto after_error; + } + /* wait for worker thread to initialize */ + completion_wait_for(&completion); + completion_destroy(&completion); + uv_thread_set_name_np(thread, "DAEMON_COMMAND"); + + if (command_thread_error) { + error = uv_thread_join(&thread); + if (error) { + error("uv_thread_create(): %s", uv_strerror(error)); + } + goto after_error; + } + + command_server_initialized = 1; + return; + +after_error: + error("Failed to initialize command server. The netdata cli tool will be unable to send commands."); +} + +void commands_exit(void) +{ + cmd_t i; + + if (!command_server_initialized) + return; + + command_thread_shutdown = 1; + info("Shutting down command server."); + /* wake up event loop */ + fatal_assert(0 == uv_async_send(&async)); + fatal_assert(0 == uv_thread_join(&thread)); + + for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { + uv_mutex_destroy(&command_lock_array[i]); + } + uv_rwlock_destroy(&exclusive_rwlock); + info("Command server has stopped."); + command_server_initialized = 0; +} diff --git a/daemon/commands.h b/daemon/commands.h new file mode 100644 index 0000000..f0e38ce --- /dev/null +++ b/daemon/commands.h @@ -0,0 +1,82 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COMMANDS_H +#define NETDATA_COMMANDS_H 1 + +#ifdef _WIN32 +# define PIPENAME "\\\\?\\pipe\\netdata-cli" +#else +# define PIPENAME "/tmp/netdata-ipc" +#endif + +#define MAX_COMMAND_LENGTH 4096 +#define MAX_EXIT_STATUS_LENGTH 23 /* Can't ever be bigger than "X-18446744073709551616" */ + +typedef enum cmd { + CMD_HELP = 0, + CMD_RELOAD_HEALTH, + CMD_SAVE_DATABASE, + CMD_REOPEN_LOGS, + CMD_EXIT, + CMD_FATAL, + CMD_RELOAD_CLAIMING_STATE, + CMD_RELOAD_LABELS, + CMD_READ_CONFIG, + CMD_WRITE_CONFIG, + CMD_PING, + CMD_ACLK_STATE, + CMD_TOTAL_COMMANDS +} cmd_t; + +typedef enum cmd_status { + CMD_STATUS_SUCCESS = 0, + CMD_STATUS_FAILURE, + CMD_STATUS_BUSY +} cmd_status_t; + +#define CMD_PREFIX_INFO 'O' /* Following string should go to cli stdout */ +#define CMD_PREFIX_ERROR 'E' /* Following string should go to cli stderr */ +#define CMD_PREFIX_EXIT_CODE 'X' /* Following string is cli integer exit code */ + +typedef enum cmd_type { + /* + * No other command is allowed to run at the same time (except for CMD_TYPE_HIGH_PRIORITY). + */ + CMD_TYPE_EXCLUSIVE = 0, + /* + * Other commands are allowed to run concurrently (except for CMD_TYPE_EXCLUSIVE) but calls to this command are + * serialized. + */ + CMD_TYPE_ORTHOGONAL, + /* + * Other commands are allowed to run concurrently (except for CMD_TYPE_EXCLUSIVE) as are calls to this command. + */ + CMD_TYPE_CONCURRENT, + /* + * Those commands are always allowed to run. + */ + CMD_TYPE_HIGH_PRIORITY +} cmd_type_t; + +/** + * Executes a command and returns the status. + * + * @param args a string that may contain additional parameters to be parsed + * @param message allocate and return a message if need be (up to MAX_COMMAND_LENGTH bytes) + * @return CMD_FAILURE or CMD_SUCCESS + */ +typedef cmd_status_t (command_action_t) (char *args, char **message); + +typedef struct command_info { + char *cmd_str; // the command string + command_action_t *func; // the function that executes the command + cmd_type_t type; // Concurrency control information for the command +} command_info_t; + +typedef void (command_lock_t) (unsigned index); + +cmd_status_t execute_command(cmd_t idx, char *args, char **message); +void commands_init(void); +void commands_exit(void); + +#endif //NETDATA_COMMANDS_H diff --git a/daemon/common.c b/daemon/common.c new file mode 100644 index 0000000..85d6386 --- /dev/null +++ b/daemon/common.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +char *netdata_configured_hostname = NULL; +char *netdata_configured_user_config_dir = CONFIG_DIR; +char *netdata_configured_stock_config_dir = LIBCONFIG_DIR; +char *netdata_configured_log_dir = LOG_DIR; +char *netdata_configured_primary_plugins_dir = NULL; +char *netdata_configured_web_dir = WEB_DIR; +char *netdata_configured_cache_dir = CACHE_DIR; +char *netdata_configured_varlib_dir = VARLIB_DIR; +char *netdata_configured_lock_dir = NULL; +char *netdata_configured_home_dir = VARLIB_DIR; +char *netdata_configured_host_prefix = NULL; +char *netdata_configured_timezone = NULL; +char *netdata_configured_abbrev_timezone = NULL; +int32_t netdata_configured_utc_offset = 0; +int netdata_ready; +int netdata_cloud_setting; + diff --git a/daemon/common.h b/daemon/common.h new file mode 100644 index 0000000..f3d8686 --- /dev/null +++ b/daemon/common.h @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COMMON_H +#define NETDATA_COMMON_H 1 + +#include "libnetdata/libnetdata.h" + +// ---------------------------------------------------------------------------- +// shortcuts for the default netdata configuration + +#define config_load(filename, overwrite_used, section) appconfig_load(&netdata_config, filename, overwrite_used, section) +#define config_get(section, name, default_value) appconfig_get(&netdata_config, section, name, default_value) +#define config_get_number(section, name, value) appconfig_get_number(&netdata_config, section, name, value) +#define config_get_float(section, name, value) appconfig_get_float(&netdata_config, section, name, value) +#define config_get_boolean(section, name, value) appconfig_get_boolean(&netdata_config, section, name, value) +#define config_get_boolean_ondemand(section, name, value) appconfig_get_boolean_ondemand(&netdata_config, section, name, value) +#define config_get_duration(section, name, value) appconfig_get_duration(&netdata_config, section, name, value) + +#define config_set(section, name, default_value) appconfig_set(&netdata_config, section, name, default_value) +#define config_set_default(section, name, value) appconfig_set_default(&netdata_config, section, name, value) +#define config_set_number(section, name, value) appconfig_set_number(&netdata_config, section, name, value) +#define config_set_float(section, name, value) appconfig_set_float(&netdata_config, section, name, value) +#define config_set_boolean(section, name, value) appconfig_set_boolean(&netdata_config, section, name, value) + +#define config_exists(section, name) appconfig_exists(&netdata_config, section, name) +#define config_move(section_old, name_old, section_new, name_new) appconfig_move(&netdata_config, section_old, name_old, section_new, name_new) + +#define config_generate(buffer, only_changed) appconfig_generate(&netdata_config, buffer, only_changed) + +#define config_section_option_destroy(section, name) appconfig_section_option_destroy_non_loaded(&netdata_config, section, name) + +// ---------------------------------------------------------------------------- +// netdata include files + +#include "global_statistics.h" + +// the netdata database +#include "database/rrd.h" + +// the netdata webserver(s) +#include "web/server/web_server.h" + +// streaming metrics between netdata servers +#include "streaming/rrdpush.h" + +// health monitoring and alarm notifications +#include "health/health.h" + +// anomaly detection +#include "ml/ml.h" + +// the netdata registry +// the registry is actually an API feature +#include "registry/registry.h" + +// exporting engine for archiving the metrics +#include "exporting/exporting_engine.h" + +// the netdata API +#include "web/api/web_api_v1.h" + +// all data collection plugins +#include "collectors/all.h" + +// netdata unit tests +#include "unit_test.h" + +// netdata agent claiming +#include "claim/claim.h" + +// netdata agent cloud link +#include "aclk/aclk.h" + +// global GUID map functions + +// netdata agent spawn server +#include "spawn/spawn.h" + +// the netdata daemon +#include "daemon.h" +#include "main.h" +#include "static_threads.h" +#include "signals.h" +#include "commands.h" +#include "analytics.h" + +// global netdata daemon variables +extern char *netdata_configured_hostname; +extern char *netdata_configured_user_config_dir; +extern char *netdata_configured_stock_config_dir; +extern char *netdata_configured_log_dir; +extern char *netdata_configured_primary_plugins_dir; +extern char *netdata_configured_web_dir; +extern char *netdata_configured_cache_dir; +extern char *netdata_configured_varlib_dir; +extern char *netdata_configured_lock_dir; +extern char *netdata_configured_home_dir; +extern char *netdata_configured_host_prefix; +extern char *netdata_configured_timezone; +extern char *netdata_configured_abbrev_timezone; +extern int32_t netdata_configured_utc_offset; +extern int netdata_zero_metrics_enabled; +extern int netdata_anonymous_statistics_enabled; + +extern int netdata_ready; +extern int netdata_cloud_setting; + +#endif /* NETDATA_COMMON_H */ diff --git a/daemon/config/README.md b/daemon/config/README.md new file mode 100644 index 0000000..7b4d27e --- /dev/null +++ b/daemon/config/README.md @@ -0,0 +1,230 @@ +<!-- +title: "Daemon configuration" +description: "The Netdata Agent's daemon is installed preconfigured to collect thousands of metrics every second, but is highly configurable for real-world workloads." +custom_edit_url: https://github.com/netdata/netdata/edit/master/daemon/config/README.md +--> + +# Daemon configuration + +<details> +<summary>The daemon configuration file is read from /etc/netdata/netdata.conf.</summary> + +Depending on your installation method, Netdata will have been installed either directly under `/`, or +under `/opt/netdata`. The paths mentioned here and in the documentation in general assume that your installation is +under `/`. If it is not, you will find the exact same paths under `/opt/netdata` as well. (i.e. `/etc/netdata` will +be `/opt/netdata/etc/netdata`). + +</details> + +This config file **is not needed by default**. Netdata works fine out of the box without it. But it does allow you to +adapt the general behavior of Netdata, in great detail. You can find all these settings, with their default values, by +accessing the URL `https://netdata.server.hostname:19999/netdata.conf`. For example check the configuration file +of [netdata.firehol.org](http://netdata.firehol.org/netdata.conf). HTTP access to this file is limited by default to +[private IPs](https://en.wikipedia.org/wiki/Private_network), via +the [web server access lists](/web/server/README.md#access-lists). + +`netdata.conf` has sections stated with `[section]`. You will see the following sections: + +1. `[global]` to [configure](#global-section-options) the [Netdata daemon](/daemon/README.md). +2. `[db]` to [configure](#db-section-options) the database of Netdata. +3. `[directories]` to [configure](#directories-section-options) the directories used by Netdata. +4. `[logs]` to [configure](#logs-section-options) the Netdata logging. +5. `[environment variables]` to [configure](#environment-variables-section-options) the environment variables used + Netdata. +6. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](/daemon/README.md) SQLite settings. +7. `[ml]` to configure settings for [machine learning](/ml/README.md). +8. `[health]` to [configure](#health-section-options) general settings for [health monitoring](/health/README.md). +9. `[web]` to [configure the web server](/web/server/README.md). +10. `[registry]` for the [Netdata registry](/registry/README.md). +11. `[global statistics]` for the [Netdata registry](/registry/README.md). +12. `[statsd]` for the general settings of the [stats.d.plugin](/collectors/statsd.plugin/README.md). +13. `[plugins]` to [configure](#plugins-section-options) which [collectors](/collectors/README.md) to use and PATH + settings. +14. `[plugin:NAME]` sections for each collector plugin, under the + comment [Per plugin configuration](#per-plugin-configuration). + +The configuration file is a `name = value` dictionary. Netdata will not complain if you set options unknown to it. When +you check the running configuration by accessing the URL `/netdata.conf` on your Netdata server, Netdata will add a +comment on settings it does not currently use. + +## Applying changes + +After `netdata.conf` has been modified, Netdata needs to be [restarted](/docs/configure/start-stop-restart.md) for +changes to apply: + +```bash +sudo systemctl restart netdata +``` + +If the above does not work, try the following: + +```bash +sudo killall netdata; sleep 10; sudo netdata +``` + +Please note that your data history will be lost if you have modified `history` parameter in section `[global]`. + +## Sections + +### [global] section options + +| setting | default | info | +|:-------------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| process scheduling policy | `keep` | See [Netdata process scheduling policy](/daemon/README.md#netdata-process-scheduling-policy) | +| OOM score | `0` | | +| glibc malloc arena max for plugins | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). | +| glibc malloc arena max for Netdata | `1` | See [Virtual memory](/daemon/README.md#virtual-memory). | +| hostname | auto-detected | The hostname of the computer running Netdata. | +| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | +| timezone | auto-detected | The timezone retrieved from the environment variable | +| run as user | `netdata` | The user Netdata will run as. | +| pthread stack size | auto-detected | | + +### [db] section options + +| setting | default | info | +|:---------------------------------------------:|:----------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`. <br />`save`: Netdata will save its round robin database on exit and load it on startup. <br />`map`: Cache files will be updated in real-time. Not ideal for systems with high load or slow disks (check `man mmap`). <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. | +| retention | `3600` | Used with `mode = save/map/ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](/database/README.md) for more information. | +| storage tiers | `1` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. | +| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. | +| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier. <br /> `N belongs to [1..4]` || + | dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). | +| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. | +| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well. <br /> `N belongs to [1..4]` | +| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](/database/engine/README.md#tiering). | +| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`. <br /> `N belongs to [1..4]` | +| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier. <br /> `New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window). <br /> `none`: No back filling is applied. <br /> `N belongs to [1..4]` | +| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/database/README.md#ksm) | +| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | +| gap when lost iterations above | `1` | | +| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. | +| delete obsolete charts files | `yes` | See [monitoring ephemeral containers](/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also affects the deletion of files for obsolete dimensions | +| delete orphan hosts files | `yes` | Set to `no` to disable non-responsive host removal. | +| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. | + +:::info + +The multiplication of all the **enabled** tiers `dbengine tier N update every iterations` values must be less than `65535`. + +::: + + +### [directories] section options + +| setting | default | info | +|:-------------------:|:------------------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| config | `/etc/netdata` | The directory configuration files are kept. | +| stock config | `/usr/lib/netdata/conf.d` | | +| log | `/var/log/netdata` | The directory in which the [log files](/daemon/README.md#log-files) are kept. | +| web | `/usr/share/netdata/web` | The directory the web static files are kept. | +| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. | +| lib | `/var/lib/netdata` | Contains the alarm log and the Netdata instance GUID. | +| home | `/var/cache/netdata` | Contains the db files for the collected metrics. | +| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. | +| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. | +| health config | `/etc/netdata/health.d` | The directory containing the user alarm configuration files, to override the stock configurations | +| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alarm configuration files for each collector | +| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | + +### [logs] section options + +| setting | default | info | +|:----------------------------------:|:-----------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](/daemon/README.md#debugging). | +| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/daemon/README.md#debugging). | +| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. | +| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. | +| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. | +| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. | +| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. | + +### [environment variables] section options + +| setting | default | info | +|:----------:|:-----------------:|:-----------------------------------------------------------| +| TZ | `:/etc/localtime` | Where to find the timezone | +| PATH | `auto-detected` | Specifies the directories to be searched to find a command | +| PYTHONPATH | | Used to set a custom python path | + +### [sqlite] section options + +| setting | default | info | +|:------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| auto vacuum | `INCREMENTAL` | The [auto-vacuum status](https://www.sqlite.org/pragma.html#pragma_auto_vacuum) in the database | +| synchronous | `NORMAL` | The setting of the ["synchronous"](https://www.sqlite.org/pragma.html#pragma_synchronous) flag | +| journal mode | `WAL` | The [journal mode](https://www.sqlite.org/pragma.html#pragma_journal_mode) for databases | +| temp store | `MEMORY` | Used to determine where [temporary tables and indices are stored](https://www.sqlite.org/pragma.html#pragma_temp_store) | +| journal size limit | `16777216` | Used to set a new [limit in bytes for the database](https://www.sqlite.org/pragma.html#pragma_journal_size_limit) | +| cache size | `-2000` | Used to [suggest the maximum number of database disk pages](https://www.sqlite.org/pragma.html#pragma_cache_size) that SQLite will hold in memory at once per open database file | + +### [health] section options + +This section controls the general behavior of the health monitoring capabilities of Netdata. + +Specific alarms are configured in per-collector config files under the `health.d` directory. For more info, see [health +monitoring](/health/README.md). + +[Alarm notifications](/health/notifications/README.md) are configured in `health_alarm_notify.conf`. + +| setting | default | info | +|:----------------------------------------------:|:------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enabled | `yes` | Set to `no` to disable all alarms and notifications | +| in memory max health log entries | 1000 | Size of the alarm history held in RAM | +| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alarm notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). | +| run at least every seconds | `10` | Controls how often all alarm conditions should be evaluated. | +| postpone alarms during hibernation for seconds | `60` | Prevents false alarms. May need to be increased if you get alarms during hibernation. | +| rotate log every lines | 2000 | Controls the number of alarm log entries stored in `<lib directory>/health-log.db`, where `<lib directory>` is the one configured in the [\[global\] section](#global-section-options) | + +### [web] section options + +Refer to the [web server documentation](/web/server/README.md) + +### [plugins] section options + +In this section you will see be a boolean (`yes`/`no`) option for each plugin (e.g. tc, cgroups, apps, proc etc.). Note +that the configuration options in this section for the orchestrator plugins `python.d` and `charts.d` control **all the +modules** written for that orchestrator. For instance, setting `python.d = no` means that all Python modules +under `collectors/python.d.plugin` will be disabled. + +Additionally, there will be the following options: + +| setting | default | info | +|:-------------------------------:|:---------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enable running new plugins | `yes` | When set to `yes`, Netdata will enable detected plugins, even if they are not configured explicitly. Setting this to `no` will only enable plugins explicitly configured in this file with a `yes` | +| check for new plugins every | 60 | The time in seconds to check for new plugins in the plugins directory. This allows having other applications dynamically creating plugins for Netdata. | +| checks | `no` | This is a debugging plugin for the internal latency | + +### [registry] section options + +To understand what this section is and how it should be configured, please refer to +the [registry documentation](/registry/README.md). + +## Per-plugin configuration + +The configuration options for plugins appear in sections following the pattern `[plugin:NAME]`. + +### Internal plugins + +Most internal plugins will provide additional options. Check [Internal Plugins](/collectors/README.md) for more +information. + +Please note, that by default Netdata will enable monitoring metrics for disks, memory, and network only when they are +not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, +will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them +to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You +can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics +for all internal Netdata plugins. + +### External plugins + +External plugins will have only 2 options at `netdata.conf`: + +| setting | default | info | +|:---------------:|:--------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------| +| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](/docs/guides/configure/performance.md). | +| command options | - | Additional command line options to pass to the plugin. | | + +External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their +documentation. + diff --git a/daemon/daemon.c b/daemon/daemon.c new file mode 100644 index 0000000..2b8a655 --- /dev/null +++ b/daemon/daemon.c @@ -0,0 +1,502 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include <sched.h> + +char pidfile[FILENAME_MAX + 1] = ""; +char claimingdirectory[FILENAME_MAX + 1]; +char exepath[FILENAME_MAX + 1]; + +void get_netdata_execution_path(void) +{ + int ret; + size_t exepath_size = 0; + struct passwd *passwd = NULL; + char *user = NULL; + + passwd = getpwuid(getuid()); + user = (passwd && passwd->pw_name) ? passwd->pw_name : ""; + + exepath_size = sizeof(exepath) - 1; + ret = uv_exepath(exepath, &exepath_size); + if (0 != ret) { + error("uv_exepath(\"%s\", %u) (user: %s) failed (%s).", exepath, (unsigned)exepath_size, user, + uv_strerror(ret)); + fatal("Cannot start netdata without getting execution path."); + } + exepath[exepath_size] = '\0'; +} + +static void chown_open_file(int fd, uid_t uid, gid_t gid) { + if(fd == -1) return; + + struct stat buf; + + if(fstat(fd, &buf) == -1) { + error("Cannot fstat() fd %d", fd); + return; + } + + if((buf.st_uid != uid || buf.st_gid != gid) && S_ISREG(buf.st_mode)) { + if(fchown(fd, uid, gid) == -1) + error("Cannot fchown() fd %d.", fd); + } +} + +void create_needed_dir(const char *dir, uid_t uid, gid_t gid) +{ + // attempt to create the directory + if(mkdir(dir, 0755) == 0) { + // we created it + + // chown it to match the required user + if(chown(dir, uid, gid) == -1) + error("Cannot chown directory '%s' to %u:%u", dir, (unsigned int)uid, (unsigned int)gid); + } + else if(errno != EEXIST) + // log an error only if the directory does not exist + error("Cannot create directory '%s'", dir); +} + +void clean_directory(char *dirname) +{ + DIR *dir = opendir(dirname); + if(!dir) return; + + int dir_fd = dirfd(dir); + struct dirent *de = NULL; + + while((de = readdir(dir))) + if(de->d_type == DT_REG) + if (unlinkat(dir_fd, de->d_name, 0)) + error("Cannot delete %s/%s", dirname, de->d_name); + + closedir(dir); +} + +int become_user(const char *username, int pid_fd) { + int am_i_root = (getuid() == 0)?1:0; + + struct passwd *pw = getpwnam(username); + if(!pw) { + error("User %s is not present.", username); + return -1; + } + + uid_t uid = pw->pw_uid; + gid_t gid = pw->pw_gid; + + create_needed_dir(netdata_configured_cache_dir, uid, gid); + create_needed_dir(netdata_configured_varlib_dir, uid, gid); + create_needed_dir(netdata_configured_lock_dir, uid, gid); + create_needed_dir(claimingdirectory, uid, gid); + + clean_directory(netdata_configured_lock_dir); + + if(pidfile[0]) { + if(chown(pidfile, uid, gid) == -1) + error("Cannot chown '%s' to %u:%u", pidfile, (unsigned int)uid, (unsigned int)gid); + } + + int ngroups = (int)sysconf(_SC_NGROUPS_MAX); + gid_t *supplementary_groups = NULL; + if(ngroups > 0) { + supplementary_groups = mallocz(sizeof(gid_t) * ngroups); +#ifdef __APPLE__ + if(getgrouplist(username, gid, (int *)supplementary_groups, &ngroups) == -1) { +#else + if(getgrouplist(username, gid, supplementary_groups, &ngroups) == -1) { +#endif /* __APPLE__ */ + if(am_i_root) + error("Cannot get supplementary groups of user '%s'.", username); + + ngroups = 0; + } + } + + chown_open_file(STDOUT_FILENO, uid, gid); + chown_open_file(STDERR_FILENO, uid, gid); + chown_open_file(stdaccess_fd, uid, gid); + chown_open_file(pid_fd, uid, gid); + + if(supplementary_groups && ngroups > 0) { + if(setgroups((size_t)ngroups, supplementary_groups) == -1) { + if(am_i_root) + error("Cannot set supplementary groups for user '%s'", username); + } + ngroups = 0; + } + + if(supplementary_groups) + freez(supplementary_groups); + +#ifdef __APPLE__ + if(setregid(gid, gid) != 0) { +#else + if(setresgid(gid, gid, gid) != 0) { +#endif /* __APPLE__ */ + error("Cannot switch to user's %s group (gid: %u).", username, gid); + return -1; + } + +#ifdef __APPLE__ + if(setreuid(uid, uid) != 0) { +#else + if(setresuid(uid, uid, uid) != 0) { +#endif /* __APPLE__ */ + error("Cannot switch to user %s (uid: %u).", username, uid); + return -1; + } + + if(setgid(gid) != 0) { + error("Cannot switch to user's %s group (gid: %u).", username, gid); + return -1; + } + if(setegid(gid) != 0) { + error("Cannot effectively switch to user's %s group (gid: %u).", username, gid); + return -1; + } + if(setuid(uid) != 0) { + error("Cannot switch to user %s (uid: %u).", username, uid); + return -1; + } + if(seteuid(uid) != 0) { + error("Cannot effectively switch to user %s (uid: %u).", username, uid); + return -1; + } + + return(0); +} + +#ifndef OOM_SCORE_ADJ_MAX +#define OOM_SCORE_ADJ_MAX (1000) +#endif +#ifndef OOM_SCORE_ADJ_MIN +#define OOM_SCORE_ADJ_MIN (-1000) +#endif + +static void oom_score_adj(void) { + char buf[30 + 1]; + long long int old_score, wanted_score = 0, final_score = 0; + + // read the existing score + if(read_single_signed_number_file("/proc/self/oom_score_adj", &old_score)) { + error("Out-Of-Memory (OOM) score setting is not supported on this system."); + return; + } + + if (old_score != 0) { + wanted_score = old_score; + analytics_report_oom_score(old_score); + } + + // check the environment + char *s = getenv("OOMScoreAdjust"); + if(!s || !*s) { + snprintfz(buf, 30, "%d", (int)wanted_score); + s = buf; + } + + // check netdata.conf configuration + s = config_get(CONFIG_SECTION_GLOBAL, "OOM score", s); + if(s && *s && (isdigit(*s) || *s == '-' || *s == '+')) + wanted_score = atoll(s); + else if(s && !strcmp(s, "keep")) { + info("Out-Of-Memory (OOM) kept as-is (running with %d)", (int) old_score); + return; + } + else { + info("Out-Of-Memory (OOM) score not changed due to non-numeric setting: '%s' (running with %d)", s, (int)old_score); + return; + } + + if(wanted_score < OOM_SCORE_ADJ_MIN) { + error("Wanted Out-Of-Memory (OOM) score %d is too small. Using %d", (int)wanted_score, (int)OOM_SCORE_ADJ_MIN); + wanted_score = OOM_SCORE_ADJ_MIN; + } + + if(wanted_score > OOM_SCORE_ADJ_MAX) { + error("Wanted Out-Of-Memory (OOM) score %d is too big. Using %d", (int)wanted_score, (int)OOM_SCORE_ADJ_MAX); + wanted_score = OOM_SCORE_ADJ_MAX; + } + + if(old_score == wanted_score) { + info("Out-Of-Memory (OOM) score is already set to the wanted value %d", (int)old_score); + return; + } + + int written = 0; + int fd = open("/proc/self/oom_score_adj", O_WRONLY); + if(fd != -1) { + snprintfz(buf, 30, "%d", (int)wanted_score); + ssize_t len = strlen(buf); + if(len > 0 && write(fd, buf, (size_t)len) == len) written = 1; + close(fd); + + if(written) { + if(read_single_signed_number_file("/proc/self/oom_score_adj", &final_score)) + error("Adjusted my Out-Of-Memory (OOM) score to %d, but cannot verify it.", (int)wanted_score); + else if(final_score == wanted_score) + info("Adjusted my Out-Of-Memory (OOM) score from %d to %d.", (int)old_score, (int)final_score); + else + error("Adjusted my Out-Of-Memory (OOM) score from %d to %d, but it has been set to %d.", (int)old_score, (int)wanted_score, (int)final_score); + analytics_report_oom_score(final_score); + } + else + error("Failed to adjust my Out-Of-Memory (OOM) score to %d. Running with %d. (systemd systems may change it via netdata.service)", (int)wanted_score, (int)old_score); + } + else + error("Failed to adjust my Out-Of-Memory (OOM) score. Cannot open /proc/self/oom_score_adj for writing."); +} + +static void process_nice_level(void) { +#ifdef HAVE_NICE + int nice_level = (int)config_get_number(CONFIG_SECTION_GLOBAL, "process nice level", 19); + if(nice(nice_level) == -1) error("Cannot set netdata CPU nice level to %d.", nice_level); + else debug(D_SYSTEM, "Set netdata nice level to %d.", nice_level); +#endif // HAVE_NICE +}; + +#define SCHED_FLAG_NONE 0x00 +#define SCHED_FLAG_PRIORITY_CONFIGURABLE 0x01 // the priority is user configurable +#define SCHED_FLAG_KEEP_AS_IS 0x04 // do not attempt to set policy, priority or nice() +#define SCHED_FLAG_USE_NICE 0x08 // use nice() after setting this policy + +struct sched_def { + char *name; + int policy; + int priority; + uint8_t flags; +} scheduler_defaults[] = { + + // the order of array members is important! + // the first defined is the default used by netdata + + // the available members are important too! + // these are all the possible scheduling policies supported by netdata + +#ifdef SCHED_BATCH + { "batch", SCHED_BATCH, 0, SCHED_FLAG_USE_NICE }, +#endif + +#ifdef SCHED_OTHER + { "other", SCHED_OTHER, 0, SCHED_FLAG_USE_NICE }, + { "nice", SCHED_OTHER, 0, SCHED_FLAG_USE_NICE }, +#endif + +#ifdef SCHED_IDLE + { "idle", SCHED_IDLE, 0, SCHED_FLAG_NONE }, +#endif + +#ifdef SCHED_RR + { "rr", SCHED_RR, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, +#endif + +#ifdef SCHED_FIFO + { "fifo", SCHED_FIFO, 0, SCHED_FLAG_PRIORITY_CONFIGURABLE }, +#endif + + // do not change the scheduling priority + { "keep", 0, 0, SCHED_FLAG_KEEP_AS_IS }, + { "none", 0, 0, SCHED_FLAG_KEEP_AS_IS }, + + // array termination + { NULL, 0, 0, 0 } +}; + + +#ifdef HAVE_SCHED_GETSCHEDULER +static void sched_getscheduler_report(void) { + int sched = sched_getscheduler(0); + if(sched == -1) { + error("Cannot get my current process scheduling policy."); + return; + } + else { + int i; + for(i = 0 ; scheduler_defaults[i].name ; i++) { + if(scheduler_defaults[i].policy == sched) { + if(scheduler_defaults[i].flags & SCHED_FLAG_PRIORITY_CONFIGURABLE) { + struct sched_param param; + if(sched_getparam(0, ¶m) == -1) { + error("Cannot get the process scheduling priority for my policy '%s'", scheduler_defaults[i].name); + return; + } + else { + info("Running with process scheduling policy '%s', priority %d", scheduler_defaults[i].name, param.sched_priority); + } + } + else if(scheduler_defaults[i].flags & SCHED_FLAG_USE_NICE) { + #ifdef HAVE_GETPRIORITY + int n = getpriority(PRIO_PROCESS, 0); + info("Running with process scheduling policy '%s', nice level %d", scheduler_defaults[i].name, n); + #else // !HAVE_GETPRIORITY + info("Running with process scheduling policy '%s'", scheduler_defaults[i].name); + #endif // !HAVE_GETPRIORITY + } + else { + info("Running with process scheduling policy '%s'", scheduler_defaults[i].name); + } + + return; + } + } + } +} +#endif /* HAVE_SCHED_GETSCHEDULER */ + +#ifdef HAVE_SCHED_SETSCHEDULER + +static void sched_setscheduler_set(void) { + + if(scheduler_defaults[0].name) { + const char *name = scheduler_defaults[0].name; + int policy = scheduler_defaults[0].policy, priority = scheduler_defaults[0].priority; + uint8_t flags = scheduler_defaults[0].flags; + int found = 0; + + // read the configuration + name = config_get(CONFIG_SECTION_GLOBAL, "process scheduling policy", name); + int i; + for(i = 0 ; scheduler_defaults[i].name ; i++) { + if(!strcmp(name, scheduler_defaults[i].name)) { + found = 1; + policy = scheduler_defaults[i].policy; + priority = scheduler_defaults[i].priority; + flags = scheduler_defaults[i].flags; + + if(flags & SCHED_FLAG_KEEP_AS_IS) + goto report; + + if(flags & SCHED_FLAG_PRIORITY_CONFIGURABLE) + priority = (int)config_get_number(CONFIG_SECTION_GLOBAL, "process scheduling priority", priority); + +#ifdef HAVE_SCHED_GET_PRIORITY_MIN + errno = 0; + if(priority < sched_get_priority_min(policy)) { + error("scheduler %s (%d) priority %d is below the minimum %d. Using the minimum.", name, policy, priority, sched_get_priority_min(policy)); + priority = sched_get_priority_min(policy); + } +#endif +#ifdef HAVE_SCHED_GET_PRIORITY_MAX + errno = 0; + if(priority > sched_get_priority_max(policy)) { + error("scheduler %s (%d) priority %d is above the maximum %d. Using the maximum.", name, policy, priority, sched_get_priority_max(policy)); + priority = sched_get_priority_max(policy); + } +#endif + break; + } + } + + if(!found) { + error("Unknown scheduling policy '%s' - falling back to nice", name); + goto fallback; + } + + const struct sched_param param = { + .sched_priority = priority + }; + + errno = 0; + i = sched_setscheduler(0, policy, ¶m); + if(i != 0) { + error("Cannot adjust netdata scheduling policy to %s (%d), with priority %d. Falling back to nice.", name, policy, priority); + } + else { + info("Adjusted netdata scheduling policy to %s (%d), with priority %d.", name, policy, priority); + if(!(flags & SCHED_FLAG_USE_NICE)) + goto report; + } + } + +fallback: + process_nice_level(); + +report: + sched_getscheduler_report(); +} +#else /* HAVE_SCHED_SETSCHEDULER */ +static void sched_setscheduler_set(void) { + process_nice_level(); +} +#endif /* HAVE_SCHED_SETSCHEDULER */ + +int become_daemon(int dont_fork, const char *user) +{ + if(!dont_fork) { + int i = fork(); + if(i == -1) { + perror("cannot fork"); + exit(1); + } + if(i != 0) { + exit(0); // the parent + } + + // become session leader + if (setsid() < 0) { + perror("Cannot become session leader."); + exit(2); + } + + // fork() again + i = fork(); + if(i == -1) { + perror("cannot fork"); + exit(1); + } + if(i != 0) { + exit(0); // the parent + } + } + + // generate our pid file + int pidfd = -1; + if(pidfile[0]) { + pidfd = open(pidfile, O_WRONLY | O_CREAT, 0644); + if(pidfd >= 0) { + if(ftruncate(pidfd, 0) != 0) + error("Cannot truncate pidfile '%s'.", pidfile); + + char b[100]; + sprintf(b, "%d\n", getpid()); + ssize_t i = write(pidfd, b, strlen(b)); + if(i <= 0) + error("Cannot write pidfile '%s'.", pidfile); + } + else error("Failed to open pidfile '%s'.", pidfile); + } + + // Set new file permissions + umask(0007); + + // adjust my Out-Of-Memory score + oom_score_adj(); + + // never become a problem + sched_setscheduler_set(); + + // Set claiming directory based on user config directory with correct ownership + snprintfz(claimingdirectory, FILENAME_MAX, "%s/cloud.d", netdata_configured_varlib_dir); + + if(user && *user) { + if(become_user(user, pidfd) != 0) { + error("Cannot become user '%s'. Continuing as we are.", user); + } + else debug(D_SYSTEM, "Successfully became user '%s'.", user); + } + else { + create_needed_dir(netdata_configured_cache_dir, getuid(), getgid()); + create_needed_dir(netdata_configured_varlib_dir, getuid(), getgid()); + create_needed_dir(netdata_configured_lock_dir, getuid(), getgid()); + create_needed_dir(claimingdirectory, getuid(), getgid()); + + clean_directory(netdata_configured_lock_dir); + } + + if(pidfd != -1) + close(pidfd); + + return(0); +} diff --git a/daemon/daemon.h b/daemon/daemon.h new file mode 100644 index 0000000..2a8a58e --- /dev/null +++ b/daemon/daemon.h @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DAEMON_H +#define NETDATA_DAEMON_H 1 + +int become_user(const char *username, int pid_fd); + +int become_daemon(int dont_fork, const char *user); + +void netdata_cleanup_and_exit(int i); +void send_statistics(const char *action, const char *action_result, const char *action_data); + +void get_netdata_execution_path(void); + +extern char pidfile[]; +extern char exepath[]; + +#endif /* NETDATA_DAEMON_H */ diff --git a/daemon/get-kubernetes-labels.sh.in b/daemon/get-kubernetes-labels.sh.in new file mode 100644 index 0000000..bc82c2a --- /dev/null +++ b/daemon/get-kubernetes-labels.sh.in @@ -0,0 +1,59 @@ +#!/usr/bin/env bash +me="$(basename "${0}")" + +# Checks if netdata is running in a kubernetes pod and fetches: +# - pod's labels +# - kubernetes cluster name (GKE only) + +if [ -z "${KUBERNETES_SERVICE_HOST}" ] || [ -z "${KUBERNETES_PORT_443_TCP_PORT}" ] || [ -z "${MY_POD_NAMESPACE}" ] || [ -z "${MY_POD_NAME}" ]; then + exit 0 +fi + +if ! command -v jq >/dev/null 2>&1; then + echo >&2 "${me}: jq command not available. Please install jq to get host labels for kubernetes pods." + exit 1 +fi + +TOKEN="$(< /var/run/secrets/kubernetes.io/serviceaccount/token)" +HEADER="Authorization: Bearer $TOKEN" +HOST="$KUBERNETES_SERVICE_HOST:$KUBERNETES_PORT_443_TCP_PORT" + +URL="https://$HOST/api/v1/namespaces/$MY_POD_NAMESPACE/pods/$MY_POD_NAME" +if ! POD_DATA=$(curl --fail -sSk -H "$HEADER" "$URL" 2>&1); then + echo >&2 "${me}: error on curl '${URL}': ${POD_DATA}." + exit 1 +fi + +URL="https://$HOST/api/v1/namespaces/kube-system" +if ! KUBE_SYSTEM_NS_DATA=$(curl --fail -sSk -H "$HEADER" "$URL" 2>&1); then + echo >&2 "${me}: error on curl '${URL}': ${KUBE_SYSTEM_NS_DATA}." + exit 1 +fi + +if ! POD_LABELS=$(jq -r '.metadata.labels' <<< "$POD_DATA" | grep ':' | tr -d '," ' 2>&1); then + echo >&2 "${me}: error on 'jq' parse pod data: ${POD_LABELS}." + exit 1 +fi + +if ! KUBE_SYSTEM_NS_UID=$(jq -r '.metadata.uid' <<< "$KUBE_SYSTEM_NS_DATA" 2>&1); then + echo >&2 "${me}: error on 'jq' parse kube_system_ns: ${KUBE_SYSTEM_NS_UID}." + exit 1 +fi + +LABELS="$POD_LABELS\nk8s_cluster_id:$KUBE_SYSTEM_NS_UID" + +GCP_META_HEADER="Metadata-Flavor: Google" +GCP_META_URL="http://metadata/computeMetadata/v1" +GKE_CLUSTER_NAME="" + +if id=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/project/project-id"); then + loc=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/instance/attributes/cluster-location") + name=$(curl --fail -s -m 5 --noproxy "*" -H "$GCP_META_HEADER" "$GCP_META_URL/instance/attributes/cluster-name") + [ -n "$id" ] && [ -n "$loc" ] && [ -n "$name" ] && GKE_CLUSTER_NAME="gke_${id}_${loc}_${name}" +fi + +[ -n "$GKE_CLUSTER_NAME" ] && LABELS+="\nk8s_cluster_name:$GKE_CLUSTER_NAME" + +echo -e "$LABELS" + +exit 0 diff --git a/daemon/global_statistics.c b/daemon/global_statistics.c new file mode 100644 index 0000000..a4e9d32 --- /dev/null +++ b/daemon/global_statistics.c @@ -0,0 +1,2894 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +#define GLOBAL_STATS_RESET_WEB_USEC_MAX 0x01 + +#define WORKER_JOB_GLOBAL 0 +#define WORKER_JOB_REGISTRY 1 +#define WORKER_JOB_WORKERS 2 +#define WORKER_JOB_DBENGINE 3 +#define WORKER_JOB_HEARTBEAT 4 +#define WORKER_JOB_STRINGS 5 +#define WORKER_JOB_DICTIONARIES 6 +#define WORKER_JOB_MALLOC_TRACE 7 +#define WORKER_JOB_SQLITE3 8 + +#if WORKER_UTILIZATION_MAX_JOB_TYPES < 9 +#error WORKER_UTILIZATION_MAX_JOB_TYPES has to be at least 9 +#endif + +bool global_statistics_enabled = true; + +static struct global_statistics { + uint16_t connected_clients; + + uint64_t web_requests; + uint64_t web_usec; + uint64_t web_usec_max; + uint64_t bytes_received; + uint64_t bytes_sent; + uint64_t content_size; + uint64_t compressed_content_size; + + uint64_t web_client_count; + + uint64_t api_data_queries_made; + uint64_t api_data_db_points_read; + uint64_t api_data_result_points_generated; + + uint64_t api_weights_queries_made; + uint64_t api_weights_db_points_read; + uint64_t api_weights_result_points_generated; + + uint64_t api_badges_queries_made; + uint64_t api_badges_db_points_read; + uint64_t api_badges_result_points_generated; + + uint64_t health_queries_made; + uint64_t health_db_points_read; + uint64_t health_result_points_generated; + + uint64_t ml_queries_made; + uint64_t ml_db_points_read; + uint64_t ml_result_points_generated; + + uint64_t exporters_queries_made; + uint64_t exporters_db_points_read; + + uint64_t backfill_queries_made; + uint64_t backfill_db_points_read; + + uint64_t db_points_stored_per_tier[RRD_STORAGE_TIERS]; + +} global_statistics = { + .connected_clients = 0, + .web_requests = 0, + .web_usec = 0, + .bytes_received = 0, + .bytes_sent = 0, + .content_size = 0, + .compressed_content_size = 0, + .web_client_count = 1, + + .api_data_queries_made = 0, + .api_data_db_points_read = 0, + .api_data_result_points_generated = 0, +}; + +void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array) { + for(size_t tier = 0; tier < storage_tiers ;tier++) { + __atomic_fetch_add(&global_statistics.db_points_stored_per_tier[tier], points_read_per_tier_array[tier], __ATOMIC_RELAXED); + points_read_per_tier_array[tier] = 0; + } +} + +void global_statistics_ml_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.ml_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_exporters_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.exporters_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.exporters_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_backfill_query_completed(size_t points_read) { + __atomic_fetch_add(&global_statistics.backfill_queries_made, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.backfill_db_points_read, points_read, __ATOMIC_RELAXED); +} + +void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source) { + switch(query_source) { + case QUERY_SOURCE_API_DATA: + __atomic_fetch_add(&global_statistics.api_data_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_data_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_data_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_ML: + __atomic_fetch_add(&global_statistics.ml_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.ml_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_API_WEIGHTS: + __atomic_fetch_add(&global_statistics.api_weights_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_weights_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_weights_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_API_BADGE: + __atomic_fetch_add(&global_statistics.api_badges_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_badges_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.api_badges_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + case QUERY_SOURCE_HEALTH: + __atomic_fetch_add(&global_statistics.health_queries_made, queries, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.health_db_points_read, db_points_read, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.health_result_points_generated, result_points_generated, __ATOMIC_RELAXED); + break; + + default: + case QUERY_SOURCE_UNITTEST: + case QUERY_SOURCE_UNKNOWN: + break; + } +} + +void global_statistics_web_request_completed(uint64_t dt, + uint64_t bytes_received, + uint64_t bytes_sent, + uint64_t content_size, + uint64_t compressed_content_size) { + uint64_t old_web_usec_max = global_statistics.web_usec_max; + while(dt > old_web_usec_max) + __atomic_compare_exchange(&global_statistics.web_usec_max, &old_web_usec_max, &dt, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + + __atomic_fetch_add(&global_statistics.web_requests, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.web_usec, dt, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.bytes_received, bytes_received, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.bytes_sent, bytes_sent, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.content_size, content_size, __ATOMIC_RELAXED); + __atomic_fetch_add(&global_statistics.compressed_content_size, compressed_content_size, __ATOMIC_RELAXED); +} + +uint64_t global_statistics_web_client_connected(void) { + __atomic_fetch_add(&global_statistics.connected_clients, 1, __ATOMIC_RELAXED); + return __atomic_fetch_add(&global_statistics.web_client_count, 1, __ATOMIC_RELAXED); +} + +void global_statistics_web_client_disconnected(void) { + __atomic_fetch_sub(&global_statistics.connected_clients, 1, __ATOMIC_RELAXED); +} + +static inline void global_statistics_copy(struct global_statistics *gs, uint8_t options) { + gs->connected_clients = __atomic_load_n(&global_statistics.connected_clients, __ATOMIC_RELAXED); + gs->web_requests = __atomic_load_n(&global_statistics.web_requests, __ATOMIC_RELAXED); + gs->web_usec = __atomic_load_n(&global_statistics.web_usec, __ATOMIC_RELAXED); + gs->web_usec_max = __atomic_load_n(&global_statistics.web_usec_max, __ATOMIC_RELAXED); + gs->bytes_received = __atomic_load_n(&global_statistics.bytes_received, __ATOMIC_RELAXED); + gs->bytes_sent = __atomic_load_n(&global_statistics.bytes_sent, __ATOMIC_RELAXED); + gs->content_size = __atomic_load_n(&global_statistics.content_size, __ATOMIC_RELAXED); + gs->compressed_content_size = __atomic_load_n(&global_statistics.compressed_content_size, __ATOMIC_RELAXED); + gs->web_client_count = __atomic_load_n(&global_statistics.web_client_count, __ATOMIC_RELAXED); + + gs->api_data_queries_made = __atomic_load_n(&global_statistics.api_data_queries_made, __ATOMIC_RELAXED); + gs->api_data_db_points_read = __atomic_load_n(&global_statistics.api_data_db_points_read, __ATOMIC_RELAXED); + gs->api_data_result_points_generated = __atomic_load_n(&global_statistics.api_data_result_points_generated, __ATOMIC_RELAXED); + + gs->api_weights_queries_made = __atomic_load_n(&global_statistics.api_weights_queries_made, __ATOMIC_RELAXED); + gs->api_weights_db_points_read = __atomic_load_n(&global_statistics.api_weights_db_points_read, __ATOMIC_RELAXED); + gs->api_weights_result_points_generated = __atomic_load_n(&global_statistics.api_weights_result_points_generated, __ATOMIC_RELAXED); + + gs->api_badges_queries_made = __atomic_load_n(&global_statistics.api_badges_queries_made, __ATOMIC_RELAXED); + gs->api_badges_db_points_read = __atomic_load_n(&global_statistics.api_badges_db_points_read, __ATOMIC_RELAXED); + gs->api_badges_result_points_generated = __atomic_load_n(&global_statistics.api_badges_result_points_generated, __ATOMIC_RELAXED); + + gs->health_queries_made = __atomic_load_n(&global_statistics.health_queries_made, __ATOMIC_RELAXED); + gs->health_db_points_read = __atomic_load_n(&global_statistics.health_db_points_read, __ATOMIC_RELAXED); + gs->health_result_points_generated = __atomic_load_n(&global_statistics.health_result_points_generated, __ATOMIC_RELAXED); + + gs->ml_queries_made = __atomic_load_n(&global_statistics.ml_queries_made, __ATOMIC_RELAXED); + gs->ml_db_points_read = __atomic_load_n(&global_statistics.ml_db_points_read, __ATOMIC_RELAXED); + gs->ml_result_points_generated = __atomic_load_n(&global_statistics.ml_result_points_generated, __ATOMIC_RELAXED); + + gs->exporters_queries_made = __atomic_load_n(&global_statistics.exporters_queries_made, __ATOMIC_RELAXED); + gs->exporters_db_points_read = __atomic_load_n(&global_statistics.exporters_db_points_read, __ATOMIC_RELAXED); + gs->backfill_queries_made = __atomic_load_n(&global_statistics.backfill_queries_made, __ATOMIC_RELAXED); + gs->backfill_db_points_read = __atomic_load_n(&global_statistics.backfill_db_points_read, __ATOMIC_RELAXED); + + for(size_t tier = 0; tier < storage_tiers ;tier++) + gs->db_points_stored_per_tier[tier] = __atomic_load_n(&global_statistics.db_points_stored_per_tier[tier], __ATOMIC_RELAXED); + + if(options & GLOBAL_STATS_RESET_WEB_USEC_MAX) { + uint64_t n = 0; + __atomic_compare_exchange(&global_statistics.web_usec_max, (uint64_t *) &gs->web_usec_max, &n, 1, __ATOMIC_RELAXED, __ATOMIC_RELAXED); + } +} + +static void global_statistics_charts(void) { + static unsigned long long old_web_requests = 0, + old_web_usec = 0, + old_content_size = 0, + old_compressed_content_size = 0; + + static collected_number compression_ratio = -1, + average_response_time = -1; + + static time_t netdata_start_time = 0; + if (!netdata_start_time) + netdata_start_time = now_boottime_sec(); + time_t netdata_uptime = now_boottime_sec() - netdata_start_time; + + struct global_statistics gs; + struct rusage me; + + struct replication_query_statistics replication = replication_get_query_statistics(); + global_statistics_copy(&gs, GLOBAL_STATS_RESET_WEB_USEC_MAX); + getrusage(RUSAGE_SELF, &me); + + // ---------------------------------------------------------------- + + { + static RRDSET *st_cpu = NULL; + static RRDDIM *rd_cpu_user = NULL, + *rd_cpu_system = NULL; + + if (unlikely(!st_cpu)) { + st_cpu = rrdset_create_localhost( + "netdata" + , "server_cpu" + , NULL + , "netdata" + , NULL + , "Netdata CPU usage" + , "milliseconds/s" + , "netdata" + , "stats" + , 130000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_cpu_user = rrddim_add(st_cpu, "user", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + rd_cpu_system = rrddim_add(st_cpu, "system", NULL, 1, 1000, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_cpu, rd_cpu_user, me.ru_utime.tv_sec * 1000000ULL + me.ru_utime.tv_usec); + rrddim_set_by_pointer(st_cpu, rd_cpu_system, me.ru_stime.tv_sec * 1000000ULL + me.ru_stime.tv_usec); + rrdset_done(st_cpu); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_uptime = NULL; + static RRDDIM *rd_uptime = NULL; + + if (unlikely(!st_uptime)) { + st_uptime = rrdset_create_localhost( + "netdata", + "uptime", + NULL, + "netdata", + NULL, + "Netdata uptime", + "seconds", + "netdata", + "stats", + 130100, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_uptime = rrddim_add(st_uptime, "uptime", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_uptime, rd_uptime, netdata_uptime); + rrdset_done(st_uptime); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_clients = NULL; + static RRDDIM *rd_clients = NULL; + + if (unlikely(!st_clients)) { + st_clients = rrdset_create_localhost( + "netdata" + , "clients" + , NULL + , "api" + , NULL + , "Netdata Web Clients" + , "connected clients" + , "netdata" + , "stats" + , 130200 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_clients = rrddim_add(st_clients, "clients", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_clients, rd_clients, gs.connected_clients); + rrdset_done(st_clients); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_reqs = NULL; + static RRDDIM *rd_requests = NULL; + + if (unlikely(!st_reqs)) { + st_reqs = rrdset_create_localhost( + "netdata" + , "requests" + , NULL + , "api" + , NULL + , "Netdata Web Requests" + , "requests/s" + , "netdata" + , "stats" + , 130300 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_requests = rrddim_add(st_reqs, "requests", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_reqs, rd_requests, (collected_number) gs.web_requests); + rrdset_done(st_reqs); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_bytes = NULL; + static RRDDIM *rd_in = NULL, + *rd_out = NULL; + + if (unlikely(!st_bytes)) { + st_bytes = rrdset_create_localhost( + "netdata" + , "net" + , NULL + , "api" + , NULL + , "Netdata Network Traffic" + , "kilobits/s" + , "netdata" + , "stats" + , 130400 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + + rd_in = rrddim_add(st_bytes, "in", NULL, 8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + rd_out = rrddim_add(st_bytes, "out", NULL, -8, BITS_IN_A_KILOBIT, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_bytes, rd_in, (collected_number) gs.bytes_received); + rrddim_set_by_pointer(st_bytes, rd_out, (collected_number) gs.bytes_sent); + rrdset_done(st_bytes); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_duration = NULL; + static RRDDIM *rd_average = NULL, + *rd_max = NULL; + + if (unlikely(!st_duration)) { + st_duration = rrdset_create_localhost( + "netdata" + , "response_time" + , NULL + , "api" + , NULL + , "Netdata API Response Time" + , "milliseconds/request" + , "netdata" + , "stats" + , 130500 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_average = rrddim_add(st_duration, "average", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + rd_max = rrddim_add(st_duration, "max", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + uint64_t gweb_usec = gs.web_usec; + uint64_t gweb_requests = gs.web_requests; + + uint64_t web_usec = (gweb_usec >= old_web_usec) ? gweb_usec - old_web_usec : 0; + uint64_t web_requests = (gweb_requests >= old_web_requests) ? gweb_requests - old_web_requests : 0; + + old_web_usec = gweb_usec; + old_web_requests = gweb_requests; + + if (web_requests) + average_response_time = (collected_number) (web_usec / web_requests); + + if (unlikely(average_response_time != -1)) + rrddim_set_by_pointer(st_duration, rd_average, average_response_time); + else + rrddim_set_by_pointer(st_duration, rd_average, 0); + + rrddim_set_by_pointer(st_duration, rd_max, ((gs.web_usec_max)?(collected_number)gs.web_usec_max:average_response_time)); + rrdset_done(st_duration); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_compression = NULL; + static RRDDIM *rd_savings = NULL; + + if (unlikely(!st_compression)) { + st_compression = rrdset_create_localhost( + "netdata" + , "compression_ratio" + , NULL + , "api" + , NULL + , "Netdata API Responses Compression Savings Ratio" + , "percentage" + , "netdata" + , "stats" + , 130600 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + // since we don't lock here to read the global statistics + // read the smaller value first + unsigned long long gcompressed_content_size = gs.compressed_content_size; + unsigned long long gcontent_size = gs.content_size; + + unsigned long long compressed_content_size = gcompressed_content_size - old_compressed_content_size; + unsigned long long content_size = gcontent_size - old_content_size; + + old_compressed_content_size = gcompressed_content_size; + old_content_size = gcontent_size; + + if (content_size && content_size >= compressed_content_size) + compression_ratio = ((content_size - compressed_content_size) * 100 * 1000) / content_size; + + if (compression_ratio != -1) + rrddim_set_by_pointer(st_compression, rd_savings, compression_ratio); + + rrdset_done(st_compression); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_queries = NULL; + static RRDDIM *rd_api_data_queries = NULL; + static RRDDIM *rd_api_weights_queries = NULL; + static RRDDIM *rd_api_badges_queries = NULL; + static RRDDIM *rd_health_queries = NULL; + static RRDDIM *rd_ml_queries = NULL; + static RRDDIM *rd_exporters_queries = NULL; + static RRDDIM *rd_backfill_queries = NULL; + static RRDDIM *rd_replication_queries = NULL; + + if (unlikely(!st_queries)) { + st_queries = rrdset_create_localhost( + "netdata" + , "queries" + , NULL + , "queries" + , NULL + , "Netdata DB Queries" + , "queries/s" + , "netdata" + , "stats" + , 131000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_queries = rrddim_add(st_queries, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_queries = rrddim_add(st_queries, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_queries = rrddim_add(st_queries, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_queries = rrddim_add(st_queries, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_queries = rrddim_add(st_queries, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_exporters_queries = rrddim_add(st_queries, "exporters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_backfill_queries = rrddim_add(st_queries, "backfill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_queries = rrddim_add(st_queries, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_queries, rd_api_data_queries, (collected_number)gs.api_data_queries_made); + rrddim_set_by_pointer(st_queries, rd_api_weights_queries, (collected_number)gs.api_weights_queries_made); + rrddim_set_by_pointer(st_queries, rd_api_badges_queries, (collected_number)gs.api_badges_queries_made); + rrddim_set_by_pointer(st_queries, rd_health_queries, (collected_number)gs.health_queries_made); + rrddim_set_by_pointer(st_queries, rd_ml_queries, (collected_number)gs.ml_queries_made); + rrddim_set_by_pointer(st_queries, rd_exporters_queries, (collected_number)gs.exporters_queries_made); + rrddim_set_by_pointer(st_queries, rd_backfill_queries, (collected_number)gs.backfill_queries_made); + rrddim_set_by_pointer(st_queries, rd_replication_queries, (collected_number)replication.queries_finished); + + rrdset_done(st_queries); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_points_read = NULL; + static RRDDIM *rd_api_data_points_read = NULL; + static RRDDIM *rd_api_weights_points_read = NULL; + static RRDDIM *rd_api_badges_points_read = NULL; + static RRDDIM *rd_health_points_read = NULL; + static RRDDIM *rd_ml_points_read = NULL; + static RRDDIM *rd_exporters_points_read = NULL; + static RRDDIM *rd_backfill_points_read = NULL; + static RRDDIM *rd_replication_points_read = NULL; + + if (unlikely(!st_points_read)) { + st_points_read = rrdset_create_localhost( + "netdata" + , "db_points_read" + , NULL + , "queries" + , NULL + , "Netdata DB Points Query Read" + , "points/s" + , "netdata" + , "stats" + , 131001 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_points_read = rrddim_add(st_points_read, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_points_read = rrddim_add(st_points_read, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_points_read = rrddim_add(st_points_read, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_points_read = rrddim_add(st_points_read, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_points_read = rrddim_add(st_points_read, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_exporters_points_read = rrddim_add(st_points_read, "exporters", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_backfill_points_read = rrddim_add(st_points_read, "backfill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_points_read = rrddim_add(st_points_read, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_points_read, rd_api_data_points_read, (collected_number)gs.api_data_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_api_weights_points_read, (collected_number)gs.api_weights_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_api_badges_points_read, (collected_number)gs.api_badges_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_health_points_read, (collected_number)gs.health_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_ml_points_read, (collected_number)gs.ml_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_exporters_points_read, (collected_number)gs.exporters_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_backfill_points_read, (collected_number)gs.backfill_db_points_read); + rrddim_set_by_pointer(st_points_read, rd_replication_points_read, (collected_number)replication.points_read); + + rrdset_done(st_points_read); + } + + // ---------------------------------------------------------------- + + if(gs.api_data_result_points_generated || replication.points_generated) { + static RRDSET *st_points_generated = NULL; + static RRDDIM *rd_api_data_points_generated = NULL; + static RRDDIM *rd_api_weights_points_generated = NULL; + static RRDDIM *rd_api_badges_points_generated = NULL; + static RRDDIM *rd_health_points_generated = NULL; + static RRDDIM *rd_ml_points_generated = NULL; + static RRDDIM *rd_replication_points_generated = NULL; + + if (unlikely(!st_points_generated)) { + st_points_generated = rrdset_create_localhost( + "netdata" + , "db_points_results" + , NULL + , "queries" + , NULL + , "Netdata Points in Query Results" + , "points/s" + , "netdata" + , "stats" + , 131002 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + rd_api_data_points_generated = rrddim_add(st_points_generated, "/api/v1/data", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_weights_points_generated = rrddim_add(st_points_generated, "/api/v1/weights", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_api_badges_points_generated = rrddim_add(st_points_generated, "/api/v1/badge", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_health_points_generated = rrddim_add(st_points_generated, "health", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ml_points_generated = rrddim_add(st_points_generated, "ml", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_replication_points_generated = rrddim_add(st_points_generated, "replication", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_points_generated, rd_api_data_points_generated, (collected_number)gs.api_data_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_api_weights_points_generated, (collected_number)gs.api_weights_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_api_badges_points_generated, (collected_number)gs.api_badges_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_health_points_generated, (collected_number)gs.health_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_ml_points_generated, (collected_number)gs.ml_result_points_generated); + rrddim_set_by_pointer(st_points_generated, rd_replication_points_generated, (collected_number)replication.points_generated); + + rrdset_done(st_points_generated); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_points_stored = NULL; + static RRDDIM *rds[RRD_STORAGE_TIERS] = {}; + + if (unlikely(!st_points_stored)) { + st_points_stored = rrdset_create_localhost( + "netdata" + , "db_points_stored" + , NULL + , "queries" + , NULL + , "Netdata DB Points Stored" + , "points/s" + , "netdata" + , "stats" + , 131003 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + char buf[30 + 1]; + snprintfz(buf, 30, "tier%zu", tier); + rds[tier] = rrddim_add(st_points_stored, buf, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + } + + for(size_t tier = 0; tier < storage_tiers ;tier++) + rrddim_set_by_pointer(st_points_stored, rds[tier], (collected_number)gs.db_points_stored_per_tier[tier]); + + rrdset_done(st_points_stored); + } +} + +// ---------------------------------------------------------------------------- +// sqlite3 statistics + +struct sqlite3_statistics { + uint64_t sqlite3_queries_made; + uint64_t sqlite3_queries_ok; + uint64_t sqlite3_queries_failed; + uint64_t sqlite3_queries_failed_busy; + uint64_t sqlite3_queries_failed_locked; + uint64_t sqlite3_rows; + uint64_t sqlite3_metadata_cache_hit; + uint64_t sqlite3_context_cache_hit; + uint64_t sqlite3_metadata_cache_miss; + uint64_t sqlite3_context_cache_miss; + uint64_t sqlite3_metadata_cache_spill; + uint64_t sqlite3_context_cache_spill; + uint64_t sqlite3_metadata_cache_write; + uint64_t sqlite3_context_cache_write; + +} sqlite3_statistics = { }; + +void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_made, 1, __ATOMIC_RELAXED); + + if(success) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_ok, 1, __ATOMIC_RELAXED); + } + else { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed, 1, __ATOMIC_RELAXED); + + if(busy) + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_busy, 1, __ATOMIC_RELAXED); + + if(locked) + __atomic_fetch_add(&sqlite3_statistics.sqlite3_queries_failed_locked, 1, __ATOMIC_RELAXED); + } +} + +void global_statistics_sqlite3_row_completed(void) { + __atomic_fetch_add(&sqlite3_statistics.sqlite3_rows, 1, __ATOMIC_RELAXED); +} + +static inline void sqlite3_statistics_copy(struct sqlite3_statistics *gs) { + static usec_t last_run = 0; + + gs->sqlite3_queries_made = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_made, __ATOMIC_RELAXED); + gs->sqlite3_queries_ok = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_ok, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_busy = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_busy, __ATOMIC_RELAXED); + gs->sqlite3_queries_failed_locked = __atomic_load_n(&sqlite3_statistics.sqlite3_queries_failed_locked, __ATOMIC_RELAXED); + gs->sqlite3_rows = __atomic_load_n(&sqlite3_statistics.sqlite3_rows, __ATOMIC_RELAXED); + + usec_t timeout = default_rrd_update_every * USEC_PER_SEC + default_rrd_update_every * USEC_PER_SEC / 3; + usec_t now = now_monotonic_usec(); + if(!last_run) + last_run = now; + usec_t delta = now - last_run; + bool query_sqlite3 = delta < timeout; + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_hit = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + else { + gs->sqlite3_metadata_cache_hit = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_hit = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_HIT); + else { + gs->sqlite3_context_cache_hit = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_miss = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + else { + gs->sqlite3_metadata_cache_miss = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_miss = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_MISS); + else { + gs->sqlite3_context_cache_miss = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_spill = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + else { + gs->sqlite3_metadata_cache_spill = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_spill = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_SPILL); + else { + gs->sqlite3_context_cache_spill = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_metadata_cache_write = (uint64_t) sql_metadata_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); + else { + gs->sqlite3_metadata_cache_write = UINT64_MAX; + query_sqlite3 = false; + } + + if(query_sqlite3 && now_monotonic_usec() - last_run < timeout) + gs->sqlite3_context_cache_write = (uint64_t) sql_context_cache_stats(SQLITE_DBSTATUS_CACHE_WRITE); + else { + gs->sqlite3_context_cache_write = UINT64_MAX; + query_sqlite3 = false; + } + + last_run = now_monotonic_usec(); +} + +static void sqlite3_statistics_charts(void) { + struct sqlite3_statistics gs; + sqlite3_statistics_copy(&gs); + + if(gs.sqlite3_queries_made) { + static RRDSET *st_sqlite3_queries = NULL; + static RRDDIM *rd_queries = NULL; + + if (unlikely(!st_sqlite3_queries)) { + st_sqlite3_queries = rrdset_create_localhost( + "netdata" + , "sqlite3_queries" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Queries" + , "queries/s" + , "netdata" + , "stats" + , 131100 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_queries = rrddim_add(st_sqlite3_queries, "queries", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_queries, rd_queries, (collected_number)gs.sqlite3_queries_made); + + rrdset_done(st_sqlite3_queries); + } + + // ---------------------------------------------------------------- + + if(gs.sqlite3_queries_ok || gs.sqlite3_queries_failed) { + static RRDSET *st_sqlite3_queries_by_status = NULL; + static RRDDIM *rd_ok = NULL, *rd_failed = NULL, *rd_busy = NULL, *rd_locked = NULL; + + if (unlikely(!st_sqlite3_queries_by_status)) { + st_sqlite3_queries_by_status = rrdset_create_localhost( + "netdata" + , "sqlite3_queries_by_status" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Queries by status" + , "queries/s" + , "netdata" + , "stats" + , 131101 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_ok = rrddim_add(st_sqlite3_queries_by_status, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_failed = rrddim_add(st_sqlite3_queries_by_status, "failed", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_busy = rrddim_add(st_sqlite3_queries_by_status, "busy", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_locked = rrddim_add(st_sqlite3_queries_by_status, "locked", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_ok, (collected_number)gs.sqlite3_queries_made); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_failed, (collected_number)gs.sqlite3_queries_failed); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_busy, (collected_number)gs.sqlite3_queries_failed_busy); + rrddim_set_by_pointer(st_sqlite3_queries_by_status, rd_locked, (collected_number)gs.sqlite3_queries_failed_locked); + + rrdset_done(st_sqlite3_queries_by_status); + } + + // ---------------------------------------------------------------- + + if(gs.sqlite3_rows) { + static RRDSET *st_sqlite3_rows = NULL; + static RRDDIM *rd_rows = NULL; + + if (unlikely(!st_sqlite3_rows)) { + st_sqlite3_rows = rrdset_create_localhost( + "netdata" + , "sqlite3_rows" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 Rows" + , "rows/s" + , "netdata" + , "stats" + , 131102 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_rows = rrddim_add(st_sqlite3_rows, "ok", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_sqlite3_rows, rd_rows, (collected_number)gs.sqlite3_rows); + + rrdset_done(st_sqlite3_rows); + } + + if(gs.sqlite3_metadata_cache_hit) { + static RRDSET *st_sqlite3_cache = NULL; + static RRDDIM *rd_cache_hit = NULL; + static RRDDIM *rd_cache_miss= NULL; + static RRDDIM *rd_cache_spill= NULL; + static RRDDIM *rd_cache_write= NULL; + + if (unlikely(!st_sqlite3_cache)) { + st_sqlite3_cache = rrdset_create_localhost( + "netdata" + , "sqlite3_metatada_cache" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 metadata cache" + , "ops/s" + , "netdata" + , "stats" + , 131103 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_cache_hit = rrddim_add(st_sqlite3_cache, "cache_hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_miss = rrddim_add(st_sqlite3_cache, "cache_miss", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_spill = rrddim_add(st_sqlite3_cache, "cache_spill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + if(gs.sqlite3_metadata_cache_hit != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_metadata_cache_hit); + + if(gs.sqlite3_metadata_cache_miss != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_metadata_cache_miss); + + if(gs.sqlite3_metadata_cache_spill != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_metadata_cache_spill); + + if(gs.sqlite3_metadata_cache_write != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_metadata_cache_write); + + rrdset_done(st_sqlite3_cache); + } + + if(gs.sqlite3_context_cache_hit) { + static RRDSET *st_sqlite3_cache = NULL; + static RRDDIM *rd_cache_hit = NULL; + static RRDDIM *rd_cache_miss= NULL; + static RRDDIM *rd_cache_spill= NULL; + static RRDDIM *rd_cache_write= NULL; + + if (unlikely(!st_sqlite3_cache)) { + st_sqlite3_cache = rrdset_create_localhost( + "netdata" + , "sqlite3_context_cache" + , NULL + , "sqlite3" + , NULL + , "Netdata SQLite3 context cache" + , "ops/s" + , "netdata" + , "stats" + , 131104 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + rd_cache_hit = rrddim_add(st_sqlite3_cache, "cache_hit", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_miss = rrddim_add(st_sqlite3_cache, "cache_miss", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_spill = rrddim_add(st_sqlite3_cache, "cache_spill", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_cache_write = rrddim_add(st_sqlite3_cache, "cache_write", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + if(gs.sqlite3_context_cache_hit != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_hit, (collected_number)gs.sqlite3_context_cache_hit); + + if(gs.sqlite3_context_cache_miss != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_miss, (collected_number)gs.sqlite3_context_cache_miss); + + if(gs.sqlite3_context_cache_spill != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_spill, (collected_number)gs.sqlite3_context_cache_spill); + + if(gs.sqlite3_context_cache_write != UINT64_MAX) + rrddim_set_by_pointer(st_sqlite3_cache, rd_cache_write, (collected_number)gs.sqlite3_context_cache_write); + + rrdset_done(st_sqlite3_cache); + } + + // ---------------------------------------------------------------- +} + +static void dbengine_statistics_charts(void) { +#ifdef ENABLE_DBENGINE + if(netdata_rwlock_tryrdlock(&rrd_rwlock) == 0) { + RRDHOST *host; + unsigned long long stats_array[RRDENG_NR_STATS] = {0}; + unsigned long long local_stats_array[RRDENG_NR_STATS]; + unsigned dbengine_contexts = 0, counted_multihost_db[RRD_STORAGE_TIERS] = { 0 }, i; + + rrdhost_foreach_read(host) { + if (!rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { + + /* get localhost's DB engine's statistics for each tier */ + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(host->db[tier].mode != RRD_MEMORY_MODE_DBENGINE) continue; + if(!host->db[tier].instance) continue; + + if(is_storage_engine_shared(host->db[tier].instance)) { + if(counted_multihost_db[tier]) + continue; + else + counted_multihost_db[tier] = 1; + } + + ++dbengine_contexts; + rrdeng_get_37_statistics((struct rrdengine_instance *)host->db[tier].instance, local_stats_array); + for (i = 0; i < RRDENG_NR_STATS; ++i) { + /* aggregate statistics across hosts */ + stats_array[i] += local_stats_array[i]; + } + } + } + } + rrd_unlock(); + + if (dbengine_contexts) { + /* deduplicate global statistics by getting the ones from the last context */ + stats_array[30] = local_stats_array[30]; + stats_array[31] = local_stats_array[31]; + stats_array[32] = local_stats_array[32]; + stats_array[34] = local_stats_array[34]; + stats_array[36] = local_stats_array[36]; + + // ---------------------------------------------------------------- + + { + static RRDSET *st_compression = NULL; + static RRDDIM *rd_savings = NULL; + + if (unlikely(!st_compression)) { + st_compression = rrdset_create_localhost( + "netdata", + "dbengine_compression_ratio", + NULL, + "dbengine", + NULL, + "Netdata DB engine data extents' compression savings ratio", + "percentage", + "netdata", + "stats", + 132000, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_savings = rrddim_add(st_compression, "savings", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + unsigned long long ratio; + unsigned long long compressed_content_size = stats_array[12]; + unsigned long long content_size = stats_array[11]; + + if (content_size) { + // allow negative savings + ratio = ((content_size - compressed_content_size) * 100 * 1000) / content_size; + } else { + ratio = 0; + } + rrddim_set_by_pointer(st_compression, rd_savings, ratio); + + rrdset_done(st_compression); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_pg_cache_hit_ratio = NULL; + static RRDDIM *rd_hit_ratio = NULL; + + if (unlikely(!st_pg_cache_hit_ratio)) { + st_pg_cache_hit_ratio = rrdset_create_localhost( + "netdata", + "page_cache_hit_ratio", + NULL, + "dbengine", + NULL, + "Netdata DB engine page cache hit ratio", + "percentage", + "netdata", + "stats", + 132003, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_hit_ratio = rrddim_add(st_pg_cache_hit_ratio, "ratio", NULL, 1, 1000, RRD_ALGORITHM_ABSOLUTE); + } + + static unsigned long long old_hits = 0; + static unsigned long long old_misses = 0; + unsigned long long hits = stats_array[7]; + unsigned long long misses = stats_array[8]; + unsigned long long hits_delta; + unsigned long long misses_delta; + unsigned long long ratio; + + hits_delta = hits - old_hits; + misses_delta = misses - old_misses; + old_hits = hits; + old_misses = misses; + + if (hits_delta + misses_delta) { + ratio = (hits_delta * 100 * 1000) / (hits_delta + misses_delta); + } else { + ratio = 0; + } + rrddim_set_by_pointer(st_pg_cache_hit_ratio, rd_hit_ratio, ratio); + + rrdset_done(st_pg_cache_hit_ratio); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_pg_cache_pages = NULL; + static RRDDIM *rd_descriptors = NULL; + static RRDDIM *rd_populated = NULL; + static RRDDIM *rd_dirty = NULL; + static RRDDIM *rd_backfills = NULL; + static RRDDIM *rd_evictions = NULL; + static RRDDIM *rd_used_by_collectors = NULL; + + if (unlikely(!st_pg_cache_pages)) { + st_pg_cache_pages = rrdset_create_localhost( + "netdata", + "page_cache_stats", + NULL, + "dbengine", + NULL, + "Netdata dbengine page cache statistics", + "pages", + "netdata", + "stats", + 132004, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_descriptors = rrddim_add(st_pg_cache_pages, "descriptors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_populated = rrddim_add(st_pg_cache_pages, "populated", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_dirty = rrddim_add(st_pg_cache_pages, "dirty", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_backfills = rrddim_add(st_pg_cache_pages, "backfills", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_evictions = rrddim_add(st_pg_cache_pages, "evictions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_used_by_collectors = + rrddim_add(st_pg_cache_pages, "used_by_collectors", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_pg_cache_pages, rd_descriptors, (collected_number)stats_array[27]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_populated, (collected_number)stats_array[3]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_dirty, (collected_number)stats_array[0] + stats_array[4]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_backfills, (collected_number)stats_array[9]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_evictions, (collected_number)stats_array[10]); + rrddim_set_by_pointer(st_pg_cache_pages, rd_used_by_collectors, (collected_number)stats_array[0]); + rrdset_done(st_pg_cache_pages); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_long_term_pages = NULL; + static RRDDIM *rd_total = NULL; + static RRDDIM *rd_insertions = NULL; + static RRDDIM *rd_deletions = NULL; + static RRDDIM *rd_flushing_pressure_deletions = NULL; + + if (unlikely(!st_long_term_pages)) { + st_long_term_pages = rrdset_create_localhost( + "netdata", + "dbengine_long_term_page_stats", + NULL, + "dbengine", + NULL, + "Netdata dbengine long-term page statistics", + "pages", + "netdata", + "stats", + 132005, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_total = rrddim_add(st_long_term_pages, "total", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_insertions = rrddim_add(st_long_term_pages, "insertions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_deletions = rrddim_add(st_long_term_pages, "deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_flushing_pressure_deletions = rrddim_add( + st_long_term_pages, "flushing_pressure_deletions", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_long_term_pages, rd_total, (collected_number)stats_array[2]); + rrddim_set_by_pointer(st_long_term_pages, rd_insertions, (collected_number)stats_array[5]); + rrddim_set_by_pointer(st_long_term_pages, rd_deletions, (collected_number)stats_array[6]); + rrddim_set_by_pointer( + st_long_term_pages, rd_flushing_pressure_deletions, (collected_number)stats_array[36]); + rrdset_done(st_long_term_pages); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_io_stats = NULL; + static RRDDIM *rd_reads = NULL; + static RRDDIM *rd_writes = NULL; + + if (unlikely(!st_io_stats)) { + st_io_stats = rrdset_create_localhost( + "netdata", + "dbengine_io_throughput", + NULL, + "dbengine", + NULL, + "Netdata DB engine I/O throughput", + "MiB/s", + "netdata", + "stats", + 132006, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1024 * 1024, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[17]); + rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[15]); + rrdset_done(st_io_stats); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_io_stats = NULL; + static RRDDIM *rd_reads = NULL; + static RRDDIM *rd_writes = NULL; + + if (unlikely(!st_io_stats)) { + st_io_stats = rrdset_create_localhost( + "netdata", + "dbengine_io_operations", + NULL, + "dbengine", + NULL, + "Netdata DB engine I/O operations", + "operations/s", + "netdata", + "stats", + 132007, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_reads = rrddim_add(st_io_stats, "reads", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_writes = rrddim_add(st_io_stats, "writes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_io_stats, rd_reads, (collected_number)stats_array[18]); + rrddim_set_by_pointer(st_io_stats, rd_writes, (collected_number)stats_array[16]); + rrdset_done(st_io_stats); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_errors = NULL; + static RRDDIM *rd_fs_errors = NULL; + static RRDDIM *rd_io_errors = NULL; + static RRDDIM *pg_cache_over_half_dirty_events = NULL; + + if (unlikely(!st_errors)) { + st_errors = rrdset_create_localhost( + "netdata", + "dbengine_global_errors", + NULL, + "dbengine", + NULL, + "Netdata DB engine errors", + "errors/s", + "netdata", + "stats", + 132008, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_io_errors = rrddim_add(st_errors, "io_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_fs_errors = rrddim_add(st_errors, "fs_errors", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + pg_cache_over_half_dirty_events = + rrddim_add(st_errors, "pg_cache_over_half_dirty_events", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_errors, rd_io_errors, (collected_number)stats_array[30]); + rrddim_set_by_pointer(st_errors, rd_fs_errors, (collected_number)stats_array[31]); + rrddim_set_by_pointer(st_errors, pg_cache_over_half_dirty_events, (collected_number)stats_array[34]); + rrdset_done(st_errors); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_fd = NULL; + static RRDDIM *rd_fd_current = NULL; + static RRDDIM *rd_fd_max = NULL; + + if (unlikely(!st_fd)) { + st_fd = rrdset_create_localhost( + "netdata", + "dbengine_global_file_descriptors", + NULL, + "dbengine", + NULL, + "Netdata DB engine File Descriptors", + "descriptors", + "netdata", + "stats", + 132009, + localhost->rrd_update_every, + RRDSET_TYPE_LINE); + + rd_fd_current = rrddim_add(st_fd, "current", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_fd_max = rrddim_add(st_fd, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_fd, rd_fd_current, (collected_number)stats_array[32]); + /* Careful here, modify this accordingly if the File-Descriptor budget ever changes */ + rrddim_set_by_pointer(st_fd, rd_fd_max, (collected_number)rlimit_nofile.rlim_cur / 4); + rrdset_done(st_fd); + } + + // ---------------------------------------------------------------- + + { + static RRDSET *st_ram_usage = NULL; + static RRDDIM *rd_cached = NULL; + static RRDDIM *rd_pinned = NULL; + static RRDDIM *rd_cache_metadata = NULL; + static RRDDIM *rd_index_metadata = NULL; + static RRDDIM *rd_pages_metadata = NULL; + + collected_number API_producers, populated_pages, cache_metadata, pages_on_disk, + page_cache_descriptors, index_metadata, pages_metadata; + + if (unlikely(!st_ram_usage)) { + st_ram_usage = rrdset_create_localhost( + "netdata", + "dbengine_ram", + NULL, + "dbengine", + NULL, + "Netdata DB engine RAM usage", + "MiB", + "netdata", + "stats", + 132010, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + + rd_cached = rrddim_add(st_ram_usage, "cache", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_pinned = rrddim_add(st_ram_usage, "collectors", NULL, RRDENG_BLOCK_SIZE, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_cache_metadata = rrddim_add(st_ram_usage, "cache metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_pages_metadata = rrddim_add(st_ram_usage, "pages metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + rd_index_metadata = rrddim_add(st_ram_usage, "index metadata", NULL, 1, 1024*1024, RRD_ALGORITHM_ABSOLUTE); + } + + API_producers = (collected_number)stats_array[0]; + pages_on_disk = (collected_number)stats_array[2]; + populated_pages = (collected_number)stats_array[3]; + page_cache_descriptors = (collected_number)stats_array[27]; + + cache_metadata = page_cache_descriptors * sizeof(struct page_cache_descr); + + pages_metadata = pages_on_disk * sizeof(struct rrdeng_page_descr); + + /* This is an empirical estimation for Judy array indexing and extent structures */ + index_metadata = pages_on_disk * 58; + + rrddim_set_by_pointer(st_ram_usage, rd_cached, populated_pages - API_producers); + rrddim_set_by_pointer(st_ram_usage, rd_pinned, API_producers); + rrddim_set_by_pointer(st_ram_usage, rd_cache_metadata, cache_metadata); + rrddim_set_by_pointer(st_ram_usage, rd_pages_metadata, pages_metadata); + rrddim_set_by_pointer(st_ram_usage, rd_index_metadata, index_metadata); + rrdset_done(st_ram_usage); + } + } + } +#endif +} + +static void update_strings_charts() { + static RRDSET *st_ops = NULL, *st_entries = NULL, *st_mem = NULL; + static RRDDIM *rd_ops_inserts = NULL, *rd_ops_deletes = NULL, *rd_ops_searches = NULL, *rd_ops_duplications = NULL, *rd_ops_releases = NULL; + static RRDDIM *rd_entries_entries = NULL, *rd_entries_refs = NULL; + static RRDDIM *rd_mem = NULL; + + size_t inserts, deletes, searches, entries, references, memory, duplications, releases; + + string_statistics(&inserts, &deletes, &searches, &entries, &references, &memory, &duplications, &releases); + + if (unlikely(!st_ops)) { + st_ops = rrdset_create_localhost( + "netdata" + , "strings_ops" + , NULL + , "strings" + , NULL + , "Strings operations" + , "ops/s" + , "netdata" + , "stats" + , 910000 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE); + + rd_ops_inserts = rrddim_add(st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_deletes = rrddim_add(st_ops, "deletes", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_searches = rrddim_add(st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_duplications = rrddim_add(st_ops, "duplications", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + rd_ops_releases = rrddim_add(st_ops, "releases", NULL, -1, 1, RRD_ALGORITHM_INCREMENTAL); + } + + rrddim_set_by_pointer(st_ops, rd_ops_inserts, (collected_number)inserts); + rrddim_set_by_pointer(st_ops, rd_ops_deletes, (collected_number)deletes); + rrddim_set_by_pointer(st_ops, rd_ops_searches, (collected_number)searches); + rrddim_set_by_pointer(st_ops, rd_ops_duplications, (collected_number)duplications); + rrddim_set_by_pointer(st_ops, rd_ops_releases, (collected_number)releases); + rrdset_done(st_ops); + + if (unlikely(!st_entries)) { + st_entries = rrdset_create_localhost( + "netdata" + , "strings_entries" + , NULL + , "strings" + , NULL + , "Strings entries" + , "entries" + , "netdata" + , "stats" + , 910001 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_entries_entries = rrddim_add(st_entries, "entries", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_entries_refs = rrddim_add(st_entries, "references", NULL, 1, -1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_entries, rd_entries_entries, (collected_number)entries); + rrddim_set_by_pointer(st_entries, rd_entries_refs, (collected_number)references); + rrdset_done(st_entries); + + if (unlikely(!st_mem)) { + st_mem = rrdset_create_localhost( + "netdata" + , "strings_memory" + , NULL + , "strings" + , NULL + , "Strings memory" + , "bytes" + , "netdata" + , "stats" + , 910001 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_mem = rrddim_add(st_mem, "memory", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(st_mem, rd_mem, (collected_number)memory); + rrdset_done(st_mem); +} + +static void update_heartbeat_charts() { + static RRDSET *st_heartbeat = NULL; + static RRDDIM *rd_heartbeat_min = NULL; + static RRDDIM *rd_heartbeat_max = NULL; + static RRDDIM *rd_heartbeat_avg = NULL; + + if (unlikely(!st_heartbeat)) { + st_heartbeat = rrdset_create_localhost( + "netdata" + , "heartbeat" + , NULL + , "heartbeat" + , NULL + , "System clock jitter" + , "microseconds" + , "netdata" + , "stats" + , 900000 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA); + + rd_heartbeat_min = rrddim_add(st_heartbeat, "min", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_heartbeat_max = rrddim_add(st_heartbeat, "max", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + rd_heartbeat_avg = rrddim_add(st_heartbeat, "average", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + usec_t min, max, average; + size_t count; + + heartbeat_statistics(&min, &max, &average, &count); + + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_min, (collected_number)min); + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_max, (collected_number)max); + rrddim_set_by_pointer(st_heartbeat, rd_heartbeat_avg, (collected_number)average); + + rrdset_done(st_heartbeat); +} + +// --------------------------------------------------------------------------------------------------------------------- +// dictionary statistics + +struct dictionary_categories { + struct dictionary_stats *stats; + const char *family; + const char *context_prefix; + int priority; + + RRDSET *st_dicts; + RRDDIM *rd_dicts_active; + RRDDIM *rd_dicts_deleted; + + RRDSET *st_items; + RRDDIM *rd_items_entries; + RRDDIM *rd_items_referenced; + RRDDIM *rd_items_pending_deletion; + + RRDSET *st_ops; + RRDDIM *rd_ops_creations; + RRDDIM *rd_ops_destructions; + RRDDIM *rd_ops_flushes; + RRDDIM *rd_ops_traversals; + RRDDIM *rd_ops_walkthroughs; + RRDDIM *rd_ops_garbage_collections; + RRDDIM *rd_ops_searches; + RRDDIM *rd_ops_inserts; + RRDDIM *rd_ops_resets; + RRDDIM *rd_ops_deletes; + + RRDSET *st_callbacks; + RRDDIM *rd_callbacks_inserts; + RRDDIM *rd_callbacks_conflicts; + RRDDIM *rd_callbacks_reacts; + RRDDIM *rd_callbacks_deletes; + + RRDSET *st_memory; + RRDDIM *rd_memory_indexed; + RRDDIM *rd_memory_values; + RRDDIM *rd_memory_dict; + + RRDSET *st_spins; + RRDDIM *rd_spins_use; + RRDDIM *rd_spins_search; + RRDDIM *rd_spins_insert; + RRDDIM *rd_spins_delete; + +} dictionary_categories[] = { + { .stats = &dictionary_stats_category_other, "dictionaries", "dictionaries", 900000 }, + + // terminator + { .stats = NULL, NULL, NULL, 0 }, +}; + +#define load_dictionary_stats_entry(x) total += (size_t)(stats.x = __atomic_load_n(&c->stats->x, __ATOMIC_RELAXED)) + +static void update_dictionary_category_charts(struct dictionary_categories *c) { + struct dictionary_stats stats; + stats.name = c->stats->name; + + // ------------------------------------------------------------------------ + + size_t total = 0; + load_dictionary_stats_entry(dictionaries.active); + load_dictionary_stats_entry(dictionaries.deleted); + + if(c->st_dicts || total != 0) { + if (unlikely(!c->st_dicts)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.dictionaries", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.dictionaries", c->context_prefix); + + c->st_dicts = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionaries" + , "dictionaries" + , "netdata" + , "stats" + , c->priority + 0 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_dicts_active = rrddim_add(c->st_dicts, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_dicts_deleted = rrddim_add(c->st_dicts, "deleted", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_dicts->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_dicts, c->rd_dicts_active, (collected_number)stats.dictionaries.active); + rrddim_set_by_pointer(c->st_dicts, c->rd_dicts_deleted, (collected_number)stats.dictionaries.deleted); + rrdset_done(c->st_dicts); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(items.entries); + load_dictionary_stats_entry(items.referenced); + load_dictionary_stats_entry(items.pending_deletion); + + if(c->st_items || total != 0) { + if (unlikely(!c->st_items)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.items", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.items", c->context_prefix); + + c->st_items = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Items" + , "items" + , "netdata" + , "stats" + , c->priority + 1 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_items_entries = rrddim_add(c->st_items, "active", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_items_pending_deletion = rrddim_add(c->st_items, "deleted", NULL, -1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_items_referenced = rrddim_add(c->st_items, "referenced", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_items->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_items, c->rd_items_entries, stats.items.entries); + rrddim_set_by_pointer(c->st_items, c->rd_items_pending_deletion, stats.items.pending_deletion); + rrddim_set_by_pointer(c->st_items, c->rd_items_referenced, stats.items.referenced); + rrdset_done(c->st_items); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(ops.creations); + load_dictionary_stats_entry(ops.destructions); + load_dictionary_stats_entry(ops.flushes); + load_dictionary_stats_entry(ops.traversals); + load_dictionary_stats_entry(ops.walkthroughs); + load_dictionary_stats_entry(ops.garbage_collections); + load_dictionary_stats_entry(ops.searches); + load_dictionary_stats_entry(ops.inserts); + load_dictionary_stats_entry(ops.resets); + load_dictionary_stats_entry(ops.deletes); + + if(c->st_ops || total != 0) { + if (unlikely(!c->st_ops)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.ops", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.ops", c->context_prefix); + + c->st_ops = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Operations" + , "ops/s" + , "netdata" + , "stats" + , c->priority + 2 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_ops_creations = rrddim_add(c->st_ops, "creations", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_destructions = rrddim_add(c->st_ops, "destructions", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_flushes = rrddim_add(c->st_ops, "flushes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_traversals = rrddim_add(c->st_ops, "traversals", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_walkthroughs = rrddim_add(c->st_ops, "walkthroughs", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_garbage_collections = rrddim_add(c->st_ops, "garbage_collections", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_searches = rrddim_add(c->st_ops, "searches", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_inserts = rrddim_add(c->st_ops, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_resets = rrddim_add(c->st_ops, "resets", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_ops_deletes = rrddim_add(c->st_ops, "deletes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_ops->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_ops, c->rd_ops_creations, (collected_number)stats.ops.creations); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_destructions, (collected_number)stats.ops.destructions); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_flushes, (collected_number)stats.ops.flushes); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_traversals, (collected_number)stats.ops.traversals); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_walkthroughs, (collected_number)stats.ops.walkthroughs); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_garbage_collections, (collected_number)stats.ops.garbage_collections); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_searches, (collected_number)stats.ops.searches); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_inserts, (collected_number)stats.ops.inserts); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_resets, (collected_number)stats.ops.resets); + rrddim_set_by_pointer(c->st_ops, c->rd_ops_deletes, (collected_number)stats.ops.deletes); + + rrdset_done(c->st_ops); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(callbacks.inserts); + load_dictionary_stats_entry(callbacks.conflicts); + load_dictionary_stats_entry(callbacks.reacts); + load_dictionary_stats_entry(callbacks.deletes); + + if(c->st_callbacks || total != 0) { + if (unlikely(!c->st_callbacks)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.callbacks", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.callbacks", c->context_prefix); + + c->st_callbacks = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Callbacks" + , "callbacks/s" + , "netdata" + , "stats" + , c->priority + 3 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_callbacks_inserts = rrddim_add(c->st_callbacks, "inserts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_deletes = rrddim_add(c->st_callbacks, "deletes", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_conflicts = rrddim_add(c->st_callbacks, "conflicts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_callbacks_reacts = rrddim_add(c->st_callbacks, "reacts", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_callbacks->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_inserts, (collected_number)stats.callbacks.inserts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_conflicts, (collected_number)stats.callbacks.conflicts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_reacts, (collected_number)stats.callbacks.reacts); + rrddim_set_by_pointer(c->st_callbacks, c->rd_callbacks_deletes, (collected_number)stats.callbacks.deletes); + + rrdset_done(c->st_callbacks); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(memory.indexed); + load_dictionary_stats_entry(memory.values); + load_dictionary_stats_entry(memory.dict); + + if(c->st_memory || total != 0) { + if (unlikely(!c->st_memory)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.memory", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.memory", c->context_prefix); + + c->st_memory = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Memory" + , "bytes" + , "netdata" + , "stats" + , c->priority + 4 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + c->rd_memory_indexed = rrddim_add(c->st_memory, "index", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_memory_values = rrddim_add(c->st_memory, "data", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + c->rd_memory_dict = rrddim_add(c->st_memory, "structures", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrdlabels_add(c->st_memory->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_memory, c->rd_memory_indexed, (collected_number)stats.memory.indexed); + rrddim_set_by_pointer(c->st_memory, c->rd_memory_values, (collected_number)stats.memory.values); + rrddim_set_by_pointer(c->st_memory, c->rd_memory_dict, (collected_number)stats.memory.dict); + + rrdset_done(c->st_memory); + } + + // ------------------------------------------------------------------------ + + total = 0; + load_dictionary_stats_entry(spin_locks.use_spins); + load_dictionary_stats_entry(spin_locks.search_spins); + load_dictionary_stats_entry(spin_locks.insert_spins); + load_dictionary_stats_entry(spin_locks.delete_spins); + + if(c->st_spins || total != 0) { + if (unlikely(!c->st_spins)) { + char id[RRD_ID_LENGTH_MAX + 1]; + snprintfz(id, RRD_ID_LENGTH_MAX, "%s.%s.spins", c->context_prefix, stats.name); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintfz(context, RRD_ID_LENGTH_MAX, "netdata.%s.category.spins", c->context_prefix); + + c->st_spins = rrdset_create_localhost( + "netdata" + , id + , NULL + , c->family + , context + , "Dictionary Spins" + , "count" + , "netdata" + , "stats" + , c->priority + 5 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + c->rd_spins_use = rrddim_add(c->st_spins, "use", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_search = rrddim_add(c->st_spins, "search", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_insert = rrddim_add(c->st_spins, "insert", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + c->rd_spins_delete = rrddim_add(c->st_spins, "delete", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + rrdlabels_add(c->st_spins->rrdlabels, "category", stats.name, RRDLABEL_SRC_AUTO); + } + + rrddim_set_by_pointer(c->st_spins, c->rd_spins_use, (collected_number)stats.spin_locks.use_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_search, (collected_number)stats.spin_locks.search_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_insert, (collected_number)stats.spin_locks.insert_spins); + rrddim_set_by_pointer(c->st_spins, c->rd_spins_delete, (collected_number)stats.spin_locks.delete_spins); + + rrdset_done(c->st_spins); + } +} + +#ifdef NETDATA_TRACE_ALLOCATIONS + +struct memory_trace_data { + RRDSET *st_memory; + RRDSET *st_allocations; + RRDSET *st_avg_alloc; + RRDSET *st_ops; +}; + +static int do_memory_trace_item(void *item, void *data) { + struct memory_trace_data *tmp = data; + struct malloc_trace *p = item; + + // ------------------------------------------------------------------------ + + if(!p->rd_bytes) + p->rd_bytes = rrddim_add(tmp->st_memory, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number bytes = (collected_number)__atomic_load_n(&p->bytes, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_memory, p->rd_bytes, bytes); + + // ------------------------------------------------------------------------ + + if(!p->rd_allocations) + p->rd_allocations = rrddim_add(tmp->st_allocations, p->function, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + collected_number allocs = (collected_number)__atomic_load_n(&p->allocations, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_allocations, p->rd_allocations, allocs); + + // ------------------------------------------------------------------------ + + if(!p->rd_avg_alloc) + p->rd_avg_alloc = rrddim_add(tmp->st_avg_alloc, p->function, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + collected_number avg_alloc = (allocs)?(bytes * 100 / allocs):0; + rrddim_set_by_pointer(tmp->st_avg_alloc, p->rd_avg_alloc, avg_alloc); + + // ------------------------------------------------------------------------ + + if(!p->rd_ops) + p->rd_ops = rrddim_add(tmp->st_ops, p->function, NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + + collected_number ops = 0; + ops += (collected_number)__atomic_load_n(&p->malloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->calloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->realloc_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->strdup_calls, __ATOMIC_RELAXED); + ops += (collected_number)__atomic_load_n(&p->free_calls, __ATOMIC_RELAXED); + rrddim_set_by_pointer(tmp->st_ops, p->rd_ops, ops); + + // ------------------------------------------------------------------------ + + return 1; +} +static void malloc_trace_statistics(void) { + static struct memory_trace_data tmp = { + .st_memory = NULL, + .st_allocations = NULL, + .st_avg_alloc = NULL, + .st_ops = NULL, + }; + + if(!tmp.st_memory) { + tmp.st_memory = rrdset_create_localhost( + "netdata" + , "memory_size" + , NULL + , "memory" + , "netdata.memory.size" + , "Netdata Memory Used by Function" + , "bytes" + , "netdata" + , "stats" + , 900000 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + if(!tmp.st_ops) { + tmp.st_ops = rrdset_create_localhost( + "netdata" + , "memory_operations" + , NULL + , "memory" + , "netdata.memory.operations" + , "Netdata Memory Operations by Function" + , "ops/s" + , "netdata" + , "stats" + , 900001 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + + if(!tmp.st_allocations) { + tmp.st_allocations = rrdset_create_localhost( + "netdata" + , "memory_allocations" + , NULL + , "memory" + , "netdata.memory.allocations" + , "Netdata Memory Allocations by Function" + , "allocations" + , "netdata" + , "stats" + , 900002 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + if(!tmp.st_avg_alloc) { + tmp.st_avg_alloc = rrdset_create_localhost( + "netdata" + , "memory_avg_alloc" + , NULL + , "memory" + , "netdata.memory.avg_alloc" + , "Netdata Average Allocation Size by Function" + , "bytes" + , "netdata" + , "stats" + , 900003 + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + } + + malloc_trace_walkthrough(do_memory_trace_item, &tmp); + + rrdset_done(tmp.st_memory); + rrdset_done(tmp.st_ops); + rrdset_done(tmp.st_allocations); + rrdset_done(tmp.st_avg_alloc); +} +#endif + +static void dictionary_statistics(void) { + for(int i = 0; dictionary_categories[i].stats ;i++) { + update_dictionary_category_charts(&dictionary_categories[i]); + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// worker utilization + +#define WORKERS_MIN_PERCENT_DEFAULT 10000.0 + +struct worker_job_type_gs { + STRING *name; + STRING *units; + + size_t jobs_started; + usec_t busy_time; + + RRDDIM *rd_jobs_started; + RRDDIM *rd_busy_time; + + WORKER_METRIC_TYPE type; + NETDATA_DOUBLE min_value; + NETDATA_DOUBLE max_value; + NETDATA_DOUBLE sum_value; + size_t count_value; + + RRDSET *st; + RRDDIM *rd_min; + RRDDIM *rd_max; + RRDDIM *rd_avg; +}; + +struct worker_thread { + pid_t pid; + bool enabled; + + bool cpu_enabled; + double cpu; + + kernel_uint_t utime; + kernel_uint_t stime; + + kernel_uint_t utime_old; + kernel_uint_t stime_old; + + usec_t collected_time; + usec_t collected_time_old; + + size_t jobs_started; + usec_t busy_time; + + struct worker_thread *next; + struct worker_thread *prev; +}; + +struct worker_utilization { + const char *name; + const char *family; + size_t priority; + uint32_t flags; + + char *name_lowercase; + + struct worker_job_type_gs per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES]; + + size_t workers_max_job_id; + size_t workers_registered; + size_t workers_busy; + usec_t workers_total_busy_time; + usec_t workers_total_duration; + size_t workers_total_jobs_started; + double workers_min_busy_time; + double workers_max_busy_time; + + size_t workers_cpu_registered; + double workers_cpu_min; + double workers_cpu_max; + double workers_cpu_total; + + struct worker_thread *threads; + + RRDSET *st_workers_time; + RRDDIM *rd_workers_time_avg; + RRDDIM *rd_workers_time_min; + RRDDIM *rd_workers_time_max; + + RRDSET *st_workers_cpu; + RRDDIM *rd_workers_cpu_avg; + RRDDIM *rd_workers_cpu_min; + RRDDIM *rd_workers_cpu_max; + + RRDSET *st_workers_threads; + RRDDIM *rd_workers_threads_free; + RRDDIM *rd_workers_threads_busy; + + RRDSET *st_workers_jobs_per_job_type; + RRDSET *st_workers_busy_per_job_type; + + RRDDIM *rd_total_cpu_utilizaton; +}; + +static struct worker_utilization all_workers_utilization[] = { + { .name = "STATS", .family = "workers global statistics", .priority = 1000000 }, + { .name = "HEALTH", .family = "workers health alarms", .priority = 1000000 }, + { .name = "MLTRAIN", .family = "workers ML training", .priority = 1000000 }, + { .name = "MLDETECT", .family = "workers ML detection", .priority = 1000000 }, + { .name = "STREAMRCV", .family = "workers streaming receive", .priority = 1000000 }, + { .name = "STREAMSND", .family = "workers streaming send", .priority = 1000000 }, + { .name = "DBENGINE", .family = "workers dbengine instances", .priority = 1000000 }, + { .name = "WEB", .family = "workers web server", .priority = 1000000 }, + { .name = "ACLKQUERY", .family = "workers aclk query", .priority = 1000000 }, + { .name = "ACLKSYNC", .family = "workers aclk host sync", .priority = 1000000 }, + { .name = "METASYNC", .family = "workers metadata sync", .priority = 1000000 }, + { .name = "PLUGINSD", .family = "workers plugins.d", .priority = 1000000 }, + { .name = "STATSD", .family = "workers plugin statsd", .priority = 1000000 }, + { .name = "STATSDFLUSH", .family = "workers plugin statsd flush", .priority = 1000000 }, + { .name = "PROC", .family = "workers plugin proc", .priority = 1000000 }, + { .name = "NETDEV", .family = "workers plugin proc netdev", .priority = 1000000 }, + { .name = "FREEBSD", .family = "workers plugin freebsd", .priority = 1000000 }, + { .name = "MACOS", .family = "workers plugin macos", .priority = 1000000 }, + { .name = "CGROUPS", .family = "workers plugin cgroups", .priority = 1000000 }, + { .name = "CGROUPSDISC", .family = "workers plugin cgroups find", .priority = 1000000 }, + { .name = "DISKSPACE", .family = "workers plugin diskspace", .priority = 1000000 }, + { .name = "TC", .family = "workers plugin tc", .priority = 1000000 }, + { .name = "TIMEX", .family = "workers plugin timex", .priority = 1000000 }, + { .name = "IDLEJITTER", .family = "workers plugin idlejitter", .priority = 1000000 }, + { .name = "RRDCONTEXT", .family = "workers contexts", .priority = 1000000 }, + { .name = "REPLICATION", .family = "workers replication sender", .priority = 1000000 }, + { .name = "SERVICE", .family = "workers service", .priority = 1000000 }, + + // has to be terminated with a NULL + { .name = NULL, .family = NULL } +}; + +static void workers_total_cpu_utilization_chart(void) { + size_t i, cpu_enabled = 0; + for(i = 0; all_workers_utilization[i].name ;i++) + if(all_workers_utilization[i].workers_cpu_registered) cpu_enabled++; + + if(!cpu_enabled) return; + + static RRDSET *st = NULL; + + if(!st) { + st = rrdset_create_localhost( + "netdata", + "workers_cpu", + NULL, + "workers", + "netdata.workers.cpu_total", + "Netdata Workers CPU Utilization (100% = 1 core)", + "%", + "netdata", + "stats", + 999000, + localhost->rrd_update_every, + RRDSET_TYPE_STACKED); + } + + for(i = 0; all_workers_utilization[i].name ;i++) { + struct worker_utilization *wu = &all_workers_utilization[i]; + if(!wu->workers_cpu_registered) continue; + + if(!wu->rd_total_cpu_utilizaton) + wu->rd_total_cpu_utilizaton = rrddim_add(st, wu->name_lowercase, NULL, 1, 100, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(st, wu->rd_total_cpu_utilizaton, (collected_number)((double)wu->workers_cpu_total * 100.0)); + } + + rrdset_done(st); +} + +#define WORKER_CHART_DECIMAL_PRECISION 100 + +static void workers_utilization_update_chart(struct worker_utilization *wu) { + if(!wu->workers_registered) return; + + //fprintf(stderr, "%-12s WORKER UTILIZATION: %-3.2f%%, %zu jobs done, %zu running, on %zu workers, min %-3.02f%%, max %-3.02f%%.\n", + // wu->name, + // (double)wu->workers_total_busy_time * 100.0 / (double)wu->workers_total_duration, + // wu->workers_total_jobs_started, wu->workers_busy, wu->workers_registered, + // wu->workers_min_busy_time, wu->workers_max_busy_time); + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_time)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_time_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.time", wu->name_lowercase); + + wu->st_workers_time = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Busy Time (100% = all workers busy)" + , "%" + , "netdata" + , "stats" + , wu->priority + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + } + + // we add the min and max dimensions only when we have multiple workers + + if(unlikely(!wu->rd_workers_time_min && wu->workers_registered > 1)) + wu->rd_workers_time_min = rrddim_add(wu->st_workers_time, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_time_max && wu->workers_registered > 1)) + wu->rd_workers_time_max = rrddim_add(wu->st_workers_time, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_time_avg)) + wu->rd_workers_time_avg = rrddim_add(wu->st_workers_time, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(wu->workers_min_busy_time == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_min_busy_time = 0.0; + + if(wu->rd_workers_time_min) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_min, (collected_number)((double)wu->workers_min_busy_time * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->rd_workers_time_max) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_max, (collected_number)((double)wu->workers_max_busy_time * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->workers_total_duration == 0) + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, 0); + else + rrddim_set_by_pointer(wu->st_workers_time, wu->rd_workers_time_avg, (collected_number)((double)wu->workers_total_busy_time * 100.0 * WORKER_CHART_DECIMAL_PRECISION / (double)wu->workers_total_duration)); + + rrdset_done(wu->st_workers_time); + + // ---------------------------------------------------------------------- + +#ifdef __linux__ + if(wu->workers_cpu_registered || wu->st_workers_cpu) { + if(unlikely(!wu->st_workers_cpu)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_cpu_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.cpu", wu->name_lowercase); + + wu->st_workers_cpu = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers CPU Utilization (100% = all workers busy)" + , "%" + , "netdata" + , "stats" + , wu->priority + 1 + , localhost->rrd_update_every + , RRDSET_TYPE_AREA + ); + } + + if (unlikely(!wu->rd_workers_cpu_min && wu->workers_registered > 1)) + wu->rd_workers_cpu_min = rrddim_add(wu->st_workers_cpu, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if (unlikely(!wu->rd_workers_cpu_max && wu->workers_registered > 1)) + wu->rd_workers_cpu_max = rrddim_add(wu->st_workers_cpu, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(!wu->rd_workers_cpu_avg)) + wu->rd_workers_cpu_avg = rrddim_add(wu->st_workers_cpu, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + + if(unlikely(wu->workers_cpu_min == WORKERS_MIN_PERCENT_DEFAULT)) wu->workers_cpu_min = 0.0; + + if(wu->rd_workers_cpu_min) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_min, (collected_number)(wu->workers_cpu_min * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->rd_workers_cpu_max) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_max, (collected_number)(wu->workers_cpu_max * WORKER_CHART_DECIMAL_PRECISION)); + + if(wu->workers_cpu_registered == 0) + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, 0); + else + rrddim_set_by_pointer(wu->st_workers_cpu, wu->rd_workers_cpu_avg, (collected_number)( wu->workers_cpu_total * WORKER_CHART_DECIMAL_PRECISION / (NETDATA_DOUBLE)wu->workers_cpu_registered )); + + rrdset_done(wu->st_workers_cpu); + } +#endif + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_jobs_per_job_type)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_jobs_by_type_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.jobs_started_by_type", wu->name_lowercase); + + wu->st_workers_jobs_per_job_type = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Jobs Started by Type" + , "jobs" + , "netdata" + , "stats" + , wu->priority + 2 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + { + size_t i; + for(i = 0; i <= wu->workers_max_job_id ;i++) { + if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) + continue; + + if (wu->per_job_type[i].name) { + + if(unlikely(!wu->per_job_type[i].rd_jobs_started)) + wu->per_job_type[i].rd_jobs_started = rrddim_add(wu->st_workers_jobs_per_job_type, string2str(wu->per_job_type[i].name), NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(wu->st_workers_jobs_per_job_type, wu->per_job_type[i].rd_jobs_started, (collected_number)(wu->per_job_type[i].jobs_started)); + } + } + } + + rrdset_done(wu->st_workers_jobs_per_job_type); + + // ---------------------------------------------------------------------- + + if(unlikely(!wu->st_workers_busy_per_job_type)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_busy_time_by_type_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.time_by_type", wu->name_lowercase); + + wu->st_workers_busy_per_job_type = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Busy Time by Type" + , "ms" + , "netdata" + , "stats" + , wu->priority + 3 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + } + + { + size_t i; + for(i = 0; i <= wu->workers_max_job_id ;i++) { + if(unlikely(wu->per_job_type[i].type != WORKER_METRIC_IDLE_BUSY)) + continue; + + if (wu->per_job_type[i].name) { + + if(unlikely(!wu->per_job_type[i].rd_busy_time)) + wu->per_job_type[i].rd_busy_time = rrddim_add(wu->st_workers_busy_per_job_type, string2str(wu->per_job_type[i].name), NULL, 1, USEC_PER_MS, RRD_ALGORITHM_ABSOLUTE); + + rrddim_set_by_pointer(wu->st_workers_busy_per_job_type, wu->per_job_type[i].rd_busy_time, (collected_number)(wu->per_job_type[i].busy_time)); + } + } + } + + rrdset_done(wu->st_workers_busy_per_job_type); + + // ---------------------------------------------------------------------- + + if(wu->st_workers_threads || wu->workers_registered > 1) { + if(unlikely(!wu->st_workers_threads)) { + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_threads_%s", wu->name_lowercase); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.threads", wu->name_lowercase); + + wu->st_workers_threads = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , "Netdata Workers Threads" + , "threads" + , "netdata" + , "stats" + , wu->priority + 4 + , localhost->rrd_update_every + , RRDSET_TYPE_STACKED + ); + + wu->rd_workers_threads_free = rrddim_add(wu->st_workers_threads, "free", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + wu->rd_workers_threads_busy = rrddim_add(wu->st_workers_threads, "busy", NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_free, (collected_number)(wu->workers_registered - wu->workers_busy)); + rrddim_set_by_pointer(wu->st_workers_threads, wu->rd_workers_threads_busy, (collected_number)(wu->workers_busy)); + rrdset_done(wu->st_workers_threads); + } + + // ---------------------------------------------------------------------- + // custom metric types WORKER_METRIC_ABSOLUTE + + { + size_t i; + for (i = 0; i <= wu->workers_max_job_id; i++) { + if(wu->per_job_type[i].type != WORKER_METRIC_ABSOLUTE) + continue; + + if(!wu->per_job_type[i].count_value) + continue; + + if(!wu->per_job_type[i].st) { + size_t job_name_len = string_strlen(wu->per_job_type[i].name); + if(job_name_len > RRD_ID_LENGTH_MAX) job_name_len = RRD_ID_LENGTH_MAX; + + char job_name_sanitized[job_name_len + 1]; + rrdset_strncpyz_name(job_name_sanitized, string2str(wu->per_job_type[i].name), job_name_len); + + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_%s_value_%s", wu->name_lowercase, job_name_sanitized); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.value.%s", wu->name_lowercase, job_name_sanitized); + + char title[1000 + 1]; + snprintf(title, 1000, "Netdata Workers %s value of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + + wu->per_job_type[i].st = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , title + , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"value" + , "netdata" + , "stats" + , wu->priority + 5 + i + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + wu->per_job_type[i].rd_min = rrddim_add(wu->per_job_type[i].st, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_max = rrddim_add(wu->per_job_type[i].st, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_avg = rrddim_add(wu->per_job_type[i].st, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_min, (collected_number)(wu->per_job_type[i].min_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_max, (collected_number)(wu->per_job_type[i].max_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_avg, (collected_number)(wu->per_job_type[i].sum_value / wu->per_job_type[i].count_value * WORKER_CHART_DECIMAL_PRECISION)); + + rrdset_done(wu->per_job_type[i].st); + } + } + + // ---------------------------------------------------------------------- + // custom metric types WORKER_METRIC_INCREMENTAL + + { + size_t i; + for (i = 0; i <= wu->workers_max_job_id ; i++) { + if(wu->per_job_type[i].type != WORKER_METRIC_INCREMENT && wu->per_job_type[i].type != WORKER_METRIC_INCREMENTAL_TOTAL) + continue; + + if(!wu->per_job_type[i].count_value) + continue; + + if(!wu->per_job_type[i].st) { + size_t job_name_len = string_strlen(wu->per_job_type[i].name); + if(job_name_len > RRD_ID_LENGTH_MAX) job_name_len = RRD_ID_LENGTH_MAX; + + char job_name_sanitized[job_name_len + 1]; + rrdset_strncpyz_name(job_name_sanitized, string2str(wu->per_job_type[i].name), job_name_len); + + char name[RRD_ID_LENGTH_MAX + 1]; + snprintfz(name, RRD_ID_LENGTH_MAX, "workers_%s_rate_%s", wu->name_lowercase, job_name_sanitized); + + char context[RRD_ID_LENGTH_MAX + 1]; + snprintf(context, RRD_ID_LENGTH_MAX, "netdata.workers.%s.rate.%s", wu->name_lowercase, job_name_sanitized); + + char title[1000 + 1]; + snprintf(title, 1000, "Netdata Workers %s rate of %s", wu->name_lowercase, string2str(wu->per_job_type[i].name)); + + wu->per_job_type[i].st = rrdset_create_localhost( + "netdata" + , name + , NULL + , wu->family + , context + , title + , (wu->per_job_type[i].units)?string2str(wu->per_job_type[i].units):"rate" + , "netdata" + , "stats" + , wu->priority + 5 + i + , localhost->rrd_update_every + , RRDSET_TYPE_LINE + ); + + wu->per_job_type[i].rd_min = rrddim_add(wu->per_job_type[i].st, "min", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_max = rrddim_add(wu->per_job_type[i].st, "max", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + wu->per_job_type[i].rd_avg = rrddim_add(wu->per_job_type[i].st, "average", NULL, 1, WORKER_CHART_DECIMAL_PRECISION, RRD_ALGORITHM_ABSOLUTE); + } + + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_min, (collected_number)(wu->per_job_type[i].min_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_max, (collected_number)(wu->per_job_type[i].max_value * WORKER_CHART_DECIMAL_PRECISION)); + rrddim_set_by_pointer(wu->per_job_type[i].st, wu->per_job_type[i].rd_avg, (collected_number)(wu->per_job_type[i].sum_value / wu->per_job_type[i].count_value * WORKER_CHART_DECIMAL_PRECISION)); + + rrdset_done(wu->per_job_type[i].st); + } + } +} + +static void workers_utilization_reset_statistics(struct worker_utilization *wu) { + wu->workers_registered = 0; + wu->workers_busy = 0; + wu->workers_total_busy_time = 0; + wu->workers_total_duration = 0; + wu->workers_total_jobs_started = 0; + wu->workers_min_busy_time = WORKERS_MIN_PERCENT_DEFAULT; + wu->workers_max_busy_time = 0; + + wu->workers_cpu_registered = 0; + wu->workers_cpu_min = WORKERS_MIN_PERCENT_DEFAULT; + wu->workers_cpu_max = 0; + wu->workers_cpu_total = 0; + + size_t i; + for(i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + if(unlikely(!wu->name_lowercase)) { + wu->name_lowercase = strdupz(wu->name); + char *s = wu->name_lowercase; + for( ; *s ; s++) *s = tolower(*s); + } + + wu->per_job_type[i].jobs_started = 0; + wu->per_job_type[i].busy_time = 0; + + wu->per_job_type[i].min_value = NAN; + wu->per_job_type[i].max_value = NAN; + wu->per_job_type[i].sum_value = NAN; + wu->per_job_type[i].count_value = 0; + } + + struct worker_thread *wt; + for(wt = wu->threads; wt ; wt = wt->next) { + wt->enabled = false; + wt->cpu_enabled = false; + } +} + +#define TASK_STAT_PREFIX "/proc/self/task/" +#define TASK_STAT_SUFFIX "/stat" + +static int read_thread_cpu_time_from_proc_stat(pid_t pid __maybe_unused, kernel_uint_t *utime __maybe_unused, kernel_uint_t *stime __maybe_unused) { +#ifdef __linux__ + static char filename[sizeof(TASK_STAT_PREFIX) + sizeof(TASK_STAT_SUFFIX) + 20] = TASK_STAT_PREFIX; + static size_t start_pos = sizeof(TASK_STAT_PREFIX) - 1; + static procfile *ff = NULL; + + // construct the filename + size_t end_pos = snprintfz(&filename[start_pos], 20, "%d", pid); + strcpy(&filename[start_pos + end_pos], TASK_STAT_SUFFIX); + + // (re)open the procfile to the new filename + bool set_quotes = (ff == NULL) ? true : false; + ff = procfile_reopen(ff, filename, NULL, PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) return -1; + + if(set_quotes) + procfile_set_open_close(ff, "(", ")"); + + // read the entire file and split it to lines and words + ff = procfile_readall(ff); + if(unlikely(!ff)) return -1; + + // parse the numbers we are interested + *utime = str2kernel_uint_t(procfile_lineword(ff, 0, 13)); + *stime = str2kernel_uint_t(procfile_lineword(ff, 0, 14)); + + // leave the file open for the next iteration + + return 0; +#else + // TODO: add here cpu time detection per thread, for FreeBSD and MacOS + *utime = 0; + *stime = 0; + return 1; +#endif +} + +static Pvoid_t workers_by_pid_JudyL_array = NULL; + +static void workers_threads_cleanup(struct worker_utilization *wu) { + struct worker_thread *t = wu->threads; + while(t) { + struct worker_thread *next = t->next; + + if(!t->enabled) { + JudyLDel(&workers_by_pid_JudyL_array, t->pid, PJE0); + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(wu->threads, t, prev, next); + freez(t); + } + t = next; + } + } + +static struct worker_thread *worker_thread_find(struct worker_utilization *wu __maybe_unused, pid_t pid) { + struct worker_thread *wt = NULL; + + Pvoid_t *PValue = JudyLGet(workers_by_pid_JudyL_array, pid, PJE0); + if(PValue) + wt = *PValue; + + return wt; +} + +static struct worker_thread *worker_thread_create(struct worker_utilization *wu, pid_t pid) { + struct worker_thread *wt; + + wt = (struct worker_thread *)callocz(1, sizeof(struct worker_thread)); + wt->pid = pid; + + Pvoid_t *PValue = JudyLIns(&workers_by_pid_JudyL_array, pid, PJE0); + *PValue = wt; + + // link it + DOUBLE_LINKED_LIST_APPEND_UNSAFE(wu->threads, wt, prev, next); + + return wt; +} + +static struct worker_thread *worker_thread_find_or_create(struct worker_utilization *wu, pid_t pid) { + struct worker_thread *wt; + wt = worker_thread_find(wu, pid); + if(!wt) wt = worker_thread_create(wu, pid); + + return wt; +} + +static void worker_utilization_charts_callback(void *ptr + , pid_t pid __maybe_unused + , const char *thread_tag __maybe_unused + , size_t max_job_id __maybe_unused + , size_t utilization_usec __maybe_unused + , size_t duration_usec __maybe_unused + , size_t jobs_started __maybe_unused + , size_t is_running __maybe_unused + , STRING **job_types_names __maybe_unused + , STRING **job_types_units __maybe_unused + , WORKER_METRIC_TYPE *job_types_metric_types __maybe_unused + , size_t *job_types_jobs_started __maybe_unused + , usec_t *job_types_busy_time __maybe_unused + , NETDATA_DOUBLE *job_types_custom_metrics __maybe_unused + ) { + struct worker_utilization *wu = (struct worker_utilization *)ptr; + + // find the worker_thread in the list + struct worker_thread *wt = worker_thread_find_or_create(wu, pid); + + wt->enabled = true; + wt->busy_time = utilization_usec; + wt->jobs_started = jobs_started; + + wt->utime_old = wt->utime; + wt->stime_old = wt->stime; + wt->collected_time_old = wt->collected_time; + + if(max_job_id > wu->workers_max_job_id) + wu->workers_max_job_id = max_job_id; + + wu->workers_total_busy_time += utilization_usec; + wu->workers_total_duration += duration_usec; + wu->workers_total_jobs_started += jobs_started; + wu->workers_busy += is_running; + wu->workers_registered++; + + double util = (double)utilization_usec * 100.0 / (double)duration_usec; + if(util > wu->workers_max_busy_time) + wu->workers_max_busy_time = util; + + if(util < wu->workers_min_busy_time) + wu->workers_min_busy_time = util; + + // accumulate per job type statistics + size_t i; + for(i = 0; i <= max_job_id ;i++) { + if(!wu->per_job_type[i].name && job_types_names[i]) + wu->per_job_type[i].name = string_dup(job_types_names[i]); + + if(!wu->per_job_type[i].units && job_types_units[i]) + wu->per_job_type[i].units = string_dup(job_types_units[i]); + + wu->per_job_type[i].type = job_types_metric_types[i]; + + wu->per_job_type[i].jobs_started += job_types_jobs_started[i]; + wu->per_job_type[i].busy_time += job_types_busy_time[i]; + + NETDATA_DOUBLE value = job_types_custom_metrics[i]; + if(netdata_double_isnumber(value)) { + if(!wu->per_job_type[i].count_value) { + wu->per_job_type[i].count_value = 1; + wu->per_job_type[i].min_value = value; + wu->per_job_type[i].max_value = value; + wu->per_job_type[i].sum_value = value; + } + else { + wu->per_job_type[i].count_value++; + wu->per_job_type[i].sum_value += value; + if(value < wu->per_job_type[i].min_value) wu->per_job_type[i].min_value = value; + if(value > wu->per_job_type[i].max_value) wu->per_job_type[i].max_value = value; + } + } + } + + // find its CPU utilization + if((!read_thread_cpu_time_from_proc_stat(pid, &wt->utime, &wt->stime))) { + wt->collected_time = now_realtime_usec(); + usec_t delta = wt->collected_time - wt->collected_time_old; + + double utime = (double)(wt->utime - wt->utime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; + double stime = (double)(wt->stime - wt->stime_old) / (double)system_hz * 100.0 * (double)USEC_PER_SEC / (double)delta; + double cpu = utime + stime; + wt->cpu = cpu; + wt->cpu_enabled = true; + + wu->workers_cpu_total += cpu; + if(cpu < wu->workers_cpu_min) wu->workers_cpu_min = cpu; + if(cpu > wu->workers_cpu_max) wu->workers_cpu_max = cpu; + } + wu->workers_cpu_registered += (wt->cpu_enabled) ? 1 : 0; +} + +static void worker_utilization_charts(void) { + static size_t iterations = 0; + iterations++; + + for(int i = 0; all_workers_utilization[i].name ;i++) { + workers_utilization_reset_statistics(&all_workers_utilization[i]); + + netdata_thread_disable_cancelability(); + workers_foreach(all_workers_utilization[i].name, worker_utilization_charts_callback, &all_workers_utilization[i]); + netdata_thread_enable_cancelability(); + + // skip the first iteration, so that we don't accumulate startup utilization to our charts + if(likely(iterations > 1)) + workers_utilization_update_chart(&all_workers_utilization[i]); + + netdata_thread_disable_cancelability(); + workers_threads_cleanup(&all_workers_utilization[i]); + netdata_thread_enable_cancelability(); + } + + workers_total_cpu_utilization_chart(); +} + +static void worker_utilization_finish(void) { + int i, j; + for(i = 0; all_workers_utilization[i].name ;i++) { + struct worker_utilization *wu = &all_workers_utilization[i]; + + if(wu->name_lowercase) { + freez(wu->name_lowercase); + wu->name_lowercase = NULL; + } + + for(j = 0; j < WORKER_UTILIZATION_MAX_JOB_TYPES ;j++) { + string_freez(wu->per_job_type[j].name); + wu->per_job_type[j].name = NULL; + + string_freez(wu->per_job_type[j].units); + wu->per_job_type[j].units = NULL; + } + + // mark all threads as not enabled + struct worker_thread *t; + for(t = wu->threads; t ; t = t->next) + t->enabled = false; + + // let the cleanup job free them + workers_threads_cleanup(wu); + } +} + +// --------------------------------------------------------------------------------------------------------------------- +// global statistics thread + + +static void global_statistics_register_workers(void) { + worker_register("STATS"); + worker_register_job_name(WORKER_JOB_GLOBAL, "global"); + worker_register_job_name(WORKER_JOB_REGISTRY, "registry"); + worker_register_job_name(WORKER_JOB_DBENGINE, "dbengine"); + worker_register_job_name(WORKER_JOB_STRINGS, "strings"); + worker_register_job_name(WORKER_JOB_DICTIONARIES, "dictionaries"); + worker_register_job_name(WORKER_JOB_MALLOC_TRACE, "malloc_trace"); + worker_register_job_name(WORKER_JOB_WORKERS, "workers"); + worker_register_job_name(WORKER_JOB_SQLITE3, "sqlite3"); +} + +static void global_statistics_cleanup(void *ptr) +{ + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + worker_utilization_finish(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *global_statistics_main(void *ptr) +{ + global_statistics_register_workers(); + + netdata_thread_cleanup_push(global_statistics_cleanup, ptr); + + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + // keep the randomness at zero + // to make sure we are not close to any other thread + hb.randomness = 0; + + while (!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_JOB_GLOBAL); + global_statistics_charts(); + + worker_is_busy(WORKER_JOB_SQLITE3); + sqlite3_statistics_charts(); + + worker_is_busy(WORKER_JOB_REGISTRY); + registry_statistics(); + + if(dbengine_enabled) { + worker_is_busy(WORKER_JOB_DBENGINE); + dbengine_statistics_charts(); + } + + worker_is_busy(WORKER_JOB_HEARTBEAT); + update_heartbeat_charts(); + + worker_is_busy(WORKER_JOB_STRINGS); + update_strings_charts(); + + worker_is_busy(WORKER_JOB_DICTIONARIES); + dictionary_statistics(); + +#ifdef NETDATA_TRACE_ALLOCATIONS + worker_is_busy(WORKER_JOB_MALLOC_TRACE); + malloc_trace_statistics(); +#endif + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + + +// --------------------------------------------------------------------------------------------------------------------- +// workers thread + +static void global_statistics_workers_cleanup(void *ptr) +{ + worker_unregister(); + + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + info("cleaning up..."); + + worker_utilization_finish(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +void *global_statistics_workers_main(void *ptr) +{ + global_statistics_register_workers(); + + netdata_thread_cleanup_push(global_statistics_workers_cleanup, ptr); + + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; + + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); + + while (!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + worker_is_busy(WORKER_JOB_WORKERS); + worker_utilization_charts(); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} + diff --git a/daemon/global_statistics.h b/daemon/global_statistics.h new file mode 100644 index 0000000..f7d6775 --- /dev/null +++ b/daemon/global_statistics.h @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_GLOBAL_STATISTICS_H +#define NETDATA_GLOBAL_STATISTICS_H 1 + +#include "database/rrd.h" + +// ---------------------------------------------------------------------------- +// global statistics + +void global_statistics_ml_query_completed(size_t points_read); +void global_statistics_exporters_query_completed(size_t points_read); +void global_statistics_backfill_query_completed(size_t points_read); +void global_statistics_rrdr_query_completed(size_t queries, uint64_t db_points_read, uint64_t result_points_generated, QUERY_SOURCE query_source); +void global_statistics_sqlite3_query_completed(bool success, bool busy, bool locked); +void global_statistics_sqlite3_row_completed(void); +void global_statistics_rrdset_done_chart_collection_completed(size_t *points_read_per_tier_array); + +void global_statistics_web_request_completed(uint64_t dt, + uint64_t bytes_received, + uint64_t bytes_sent, + uint64_t content_size, + uint64_t compressed_content_size); + +uint64_t global_statistics_web_client_connected(void); +void global_statistics_web_client_disconnected(void); + +extern bool global_statistics_enabled; + +#endif /* NETDATA_GLOBAL_STATISTICS_H */ diff --git a/daemon/main.c b/daemon/main.c new file mode 100644 index 0000000..6b59138 --- /dev/null +++ b/daemon/main.c @@ -0,0 +1,1617 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include "buildinfo.h" +#include "static_threads.h" + +bool unittest_running = false; +int netdata_zero_metrics_enabled; +int netdata_anonymous_statistics_enabled; + +struct netdata_static_thread *static_threads; + +struct config netdata_config = { + .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { + .avl_tree = { + .root = NULL, + .compar = appconfig_section_compare + }, + .rwlock = AVL_LOCK_INITIALIZER + } +}; + +void netdata_cleanup_and_exit(int ret) { + // enabling this, is wrong + // because the threads will be cancelled while cleaning up + // netdata_exit = 1; + + error_log_limit_unlimited(); + info("EXIT: netdata prepares to exit with code %d...", ret); + + send_statistics("EXIT", ret?"ERROR":"OK","-"); + + char agent_crash_file[FILENAME_MAX + 1]; + char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; + snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); + snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); + (void) rename(agent_crash_file, agent_incomplete_shutdown_file); + + // cleanup/save the database and exit + info("EXIT: cleaning up the database..."); + rrdhost_cleanup_all(); + + if(!ret) { + // exit cleanly + + // stop everything + info("EXIT: stopping static threads..."); +#ifdef ENABLE_ACLK + aclk_sync_exit_all(); +#endif + cancel_main_threads(); + + // free the database + info("EXIT: freeing database memory..."); +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_prepare_exit(multidb_ctx[tier]); + } +#endif + metadata_sync_shutdown_prepare(); + rrdhost_free_all(); + metadata_sync_shutdown(); +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_exit(multidb_ctx[tier]); + } +#endif + } + sql_close_context_database(); + sql_close_database(); + + // unlink the pid + if(pidfile[0]) { + info("EXIT: removing netdata PID file '%s'...", pidfile); + if(unlink(pidfile) != 0) + error("EXIT: cannot unlink pidfile '%s'.", pidfile); + } + +#ifdef ENABLE_HTTPS + security_clean_openssl(); +#endif + info("EXIT: all done - netdata is now exiting - bye bye..."); + (void) unlink(agent_incomplete_shutdown_file); + exit(ret); +} + +void web_server_threading_selection(void) { + web_server_mode = web_server_mode_id(config_get(CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode))); + + int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED); + + int i; + for (i = 0; static_threads[i].name; i++) { + if (static_threads[i].start_routine == socket_listen_main_static_threaded) + static_threads[i].enabled = static_threaded; + } +} + +int make_dns_decision(const char *section_name, const char *config_name, const char *default_value, SIMPLE_PATTERN *p) +{ + char *value = config_get(section_name,config_name,default_value); + if(!strcmp("yes",value)) + return 1; + if(!strcmp("no",value)) + return 0; + if(strcmp("heuristic",value)) + error("Invalid configuration option '%s' for '%s'/'%s'. Valid options are 'yes', 'no' and 'heuristic'. Proceeding with 'heuristic'", + value, section_name, config_name); + + return simple_pattern_is_potential_name(p); +} + +void web_server_config_options(void) +{ + web_client_timeout = + (int)config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", web_client_timeout); + web_client_first_request_timeout = + (int)config_get_number(CONFIG_SECTION_WEB, "timeout for first request", web_client_first_request_timeout); + web_client_streaming_rate_t = + config_get_number(CONFIG_SECTION_WEB, "accept a streaming request every seconds", web_client_streaming_rate_t); + + respect_web_browser_do_not_track_policy = + config_get_boolean(CONFIG_SECTION_WEB, "respect do not track policy", respect_web_browser_do_not_track_policy); + web_x_frame_options = config_get(CONFIG_SECTION_WEB, "x-frame-options response header", ""); + if(!*web_x_frame_options) + web_x_frame_options = NULL; + + web_allow_connections_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"), + NULL, SIMPLE_PATTERN_EXACT); + web_allow_connections_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow connections by dns", "heuristic", web_allow_connections_from); + web_allow_dashboard_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"), + NULL, SIMPLE_PATTERN_EXACT); + web_allow_dashboard_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow dashboard by dns", "heuristic", web_allow_dashboard_from); + web_allow_badges_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_badges_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow badges by dns", "heuristic", web_allow_badges_from); + web_allow_registry_from = + simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_registry_dns = make_dns_decision(CONFIG_SECTION_REGISTRY, "allow by dns", "heuristic", + web_allow_registry_from); + web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"), + NULL, SIMPLE_PATTERN_EXACT); + web_allow_streaming_dns = make_dns_decision(CONFIG_SECTION_WEB, "allow streaming by dns", "heuristic", + web_allow_streaming_from); + // Note the default is not heuristic, the wildcards could match DNS but the intent is ip-addresses. + web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from", + "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.*" + " 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.*" + " 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.*" + " 172.31.* UNKNOWN"), NULL, SIMPLE_PATTERN_EXACT); + web_allow_netdataconf_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow netdata.conf by dns", "no", web_allow_netdataconf_from); + web_allow_mgmt_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow management from", "localhost"), + NULL, SIMPLE_PATTERN_EXACT); + web_allow_mgmt_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow management by dns","heuristic",web_allow_mgmt_from); + + +#ifdef NETDATA_WITH_ZLIB + web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip); + + char *s = config_get(CONFIG_SECTION_WEB, "gzip compression strategy", "default"); + if(!strcmp(s, "default")) + web_gzip_strategy = Z_DEFAULT_STRATEGY; + else if(!strcmp(s, "filtered")) + web_gzip_strategy = Z_FILTERED; + else if(!strcmp(s, "huffman only")) + web_gzip_strategy = Z_HUFFMAN_ONLY; + else if(!strcmp(s, "rle")) + web_gzip_strategy = Z_RLE; + else if(!strcmp(s, "fixed")) + web_gzip_strategy = Z_FIXED; + else { + error("Invalid compression strategy '%s'. Valid strategies are 'default', 'filtered', 'huffman only', 'rle' and 'fixed'. Proceeding with 'default'.", s); + web_gzip_strategy = Z_DEFAULT_STRATEGY; + } + + web_gzip_level = (int)config_get_number(CONFIG_SECTION_WEB, "gzip compression level", 3); + if(web_gzip_level < 1) { + error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 1 (fastest compression).", web_gzip_level); + web_gzip_level = 1; + } + else if(web_gzip_level > 9) { + error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 9 (best compression).", web_gzip_level); + web_gzip_level = 9; + } +#endif /* NETDATA_WITH_ZLIB */ +} + + +// killpid kills pid with SIGTERM. +int killpid(pid_t pid) { + int ret; + debug(D_EXIT, "Request to kill pid %d", pid); + + errno = 0; + ret = kill(pid, SIGTERM); + if (ret == -1) { + switch(errno) { + case ESRCH: + // We wanted the process to exit so just let the caller handle. + return ret; + + case EPERM: + error("Cannot kill pid %d, but I do not have enough permissions.", pid); + break; + + default: + error("Cannot kill pid %d, but I received an error.", pid); + break; + } + } + + return ret; +} + +void cancel_main_threads() { + error_log_limit_unlimited(); + + int i, found = 0; + usec_t max = 5 * USEC_PER_SEC, step = 100000; + for (i = 0; static_threads[i].name != NULL ; i++) { + if(static_threads[i].enabled == NETDATA_MAIN_THREAD_RUNNING) { + info("EXIT: Stopping main thread: %s", static_threads[i].name); + netdata_thread_cancel(*static_threads[i].thread); + found++; + } + } + + netdata_exit = 1; + + while(found && max > 0) { + max -= step; + info("Waiting %d threads to finish...", found); + sleep_usec(step); + found = 0; + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) + found++; + } + } + + if(found) { + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) + error("Main thread %s takes too long to exit. Giving up...", static_threads[i].name); + } + } + else + info("All threads finished."); + + for (i = 0; static_threads[i].name != NULL ; i++) + freez(static_threads[i].thread); + + freez(static_threads); +} + +struct option_def option_definitions[] = { + // opt description arg name default value + { 'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME}, + { 'D', "Do not fork. Run in the foreground.", NULL, "run in the background"}, + { 'd', "Fork. Run in the background.", NULL, "run in the background"}, + { 'h', "Display this help message.", NULL, NULL}, + { 'P', "File to save a pid while running.", "filename", "do not save pid to a file"}, + { 'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"}, + { 'p', "API/Web port to use.", "port", "19999"}, + { 's', "Prefix for /proc and /sys (for containers).", "path", "no prefix"}, + { 't', "The internal clock of netdata.", "seconds", "1"}, + { 'u', "Run as user.", "username", "netdata"}, + { 'v', "Print netdata version and exit.", NULL, NULL}, + { 'V', "Print netdata version and exit.", NULL, NULL}, + { 'W', "See Advanced options below.", "options", NULL}, +}; + +int help(int exitcode) { + FILE *stream; + if(exitcode == 0) + stream = stdout; + else + stream = stderr; + + int num_opts = sizeof(option_definitions) / sizeof(struct option_def); + int i; + int max_len_arg = 0; + + // Compute maximum argument length + for( i = 0; i < num_opts; i++ ) { + if(option_definitions[i].arg_name) { + int len_arg = (int)strlen(option_definitions[i].arg_name); + if(len_arg > max_len_arg) max_len_arg = len_arg; + } + } + + if(max_len_arg > 30) max_len_arg = 30; + if(max_len_arg < 20) max_len_arg = 20; + + fprintf(stream, "%s", "\n" + " ^\n" + " |.-. .-. .-. .-. . netdata \n" + " | '-' '-' '-' '-' real-time performance monitoring, done right! \n" + " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n" + "\n" + " Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud>\n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " Home Page : https://netdata.cloud\n" + " Source Code: https://github.com/netdata/netdata\n" + " Docs : https://learn.netdata.cloud\n" + " Support : https://github.com/netdata/netdata/issues\n" + " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n" + "\n" + " Twitter : https://twitter.com/linuxnetdata\n" + " LinkedIn : https://linkedin.com/company/netdata-cloud/\n" + " Facebook : https://facebook.com/linuxnetdata/\n" + "\n" + "\n" + ); + + fprintf(stream, " SYNOPSIS: netdata [options]\n"); + fprintf(stream, "\n"); + fprintf(stream, " Options:\n\n"); + + // Output options description. + for( i = 0; i < num_opts; i++ ) { + fprintf(stream, " -%c %-*s %s", option_definitions[i].val, max_len_arg, option_definitions[i].arg_name ? option_definitions[i].arg_name : "", option_definitions[i].description); + if(option_definitions[i].default_value) { + fprintf(stream, "\n %c %-*s Default: %s\n", ' ', max_len_arg, "", option_definitions[i].default_value); + } else { + fprintf(stream, "\n"); + } + fprintf(stream, "\n"); + } + + fprintf(stream, "\n Advanced options:\n\n" + " -W stacksize=N Set the stacksize (in bytes).\n\n" + " -W debug_flags=N Set runtime tracing to debug.log.\n\n" + " -W unittest Run internal unittests and exit.\n\n" + " -W sqlite-check Check metadata database integrity and exit.\n\n" + " -W sqlite-fix Check metadata database integrity, fix if needed and exit.\n\n" + " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n" +#ifdef ENABLE_DBENGINE + " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n" + " -W stresstest=A,B,C,D,E,F,G\n" + " Run a DB engine stress test for A seconds,\n" + " with B writers and C readers, with a ramp up\n" + " time of D seconds for writers, a page cache\n" + " size of E MiB, an optional disk space limit\n" + " of F MiB, G libuv workers (default 16) and exit.\n\n" +#endif + " -W set section option value\n" + " set netdata.conf option from the command line.\n\n" + " -W buildinfo Print the version, the configure options,\n" + " a list of optional features, and whether they\n" + " are enabled or not.\n\n" + " -W buildinfojson Print the version, the configure options,\n" + " a list of optional features, and whether they\n" + " are enabled or not, in JSON format.\n\n" + " -W simple-pattern pattern string\n" + " Check if string matches pattern and exit.\n\n" + " -W \"claim -token=TOKEN -rooms=ROOM1,ROOM2\"\n" + " Claim the agent to the workspace rooms pointed to by TOKEN and ROOM*.\n\n" + ); + + fprintf(stream, "\n Signals netdata handles:\n\n" + " - HUP Close and reopen log files.\n" + " - USR1 Save internal DB to disk.\n" + " - USR2 Reload health configuration.\n" + "\n" + ); + + fflush(stream); + return exitcode; +} + +#ifdef ENABLE_HTTPS +static void security_init(){ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir); + netdata_ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename); + + snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir); + netdata_ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename); + + tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3"); + tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none"); + + security_openssl_library(); +} +#endif + +static void log_init(void) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir); + stdout_filename = config_get(CONFIG_SECTION_LOGS, "debug", filename); + + snprintfz(filename, FILENAME_MAX, "%s/error.log", netdata_configured_log_dir); + stderr_filename = config_get(CONFIG_SECTION_LOGS, "error", filename); + + snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir); + stdaccess_filename = config_get(CONFIG_SECTION_LOGS, "access", filename); + + snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir); + stdhealth_filename = config_get(CONFIG_SECTION_LOGS, "health", filename); + +#ifdef ENABLE_ACLK + aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO); + if (aclklog_enabled) { + snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir); + aclklog_filename = config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename); + } +#endif + + char deffacility[8]; + snprintfz(deffacility,7,"%s","daemon"); + facility_log = config_get(CONFIG_SECTION_LOGS, "facility", deffacility); + + error_log_throttle_period = config_get_number(CONFIG_SECTION_LOGS, "errors flood protection period", error_log_throttle_period); + error_log_errors_per_period = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "errors to trigger flood protection", (long long int)error_log_errors_per_period); + error_log_errors_per_period_backup = error_log_errors_per_period; + + setenv("NETDATA_ERRORS_THROTTLE_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors flood protection period" , ""), 1); + setenv("NETDATA_ERRORS_PER_PERIOD", config_get(CONFIG_SECTION_LOGS, "errors to trigger flood protection", ""), 1); +} + +char *initialize_lock_directory_path(char *prefix) +{ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/lock", prefix); + + return config_get(CONFIG_SECTION_DIRECTORIES, "lock", filename); +} + +static void backwards_compatible_config() { + // move [global] options to the [web] section + config_move(CONFIG_SECTION_GLOBAL, "http port listen backlog", + CONFIG_SECTION_WEB, "listen backlog"); + + config_move(CONFIG_SECTION_GLOBAL, "bind socket to IP", + CONFIG_SECTION_WEB, "bind to"); + + config_move(CONFIG_SECTION_GLOBAL, "bind to", + CONFIG_SECTION_WEB, "bind to"); + + config_move(CONFIG_SECTION_GLOBAL, "port", + CONFIG_SECTION_WEB, "default port"); + + config_move(CONFIG_SECTION_GLOBAL, "default port", + CONFIG_SECTION_WEB, "default port"); + + config_move(CONFIG_SECTION_GLOBAL, "disconnect idle web clients after seconds", + CONFIG_SECTION_WEB, "disconnect idle clients after seconds"); + + config_move(CONFIG_SECTION_GLOBAL, "respect web browser do not track policy", + CONFIG_SECTION_WEB, "respect do not track policy"); + + config_move(CONFIG_SECTION_GLOBAL, "web x-frame-options header", + CONFIG_SECTION_WEB, "x-frame-options response header"); + + config_move(CONFIG_SECTION_GLOBAL, "enable web responses gzip compression", + CONFIG_SECTION_WEB, "enable gzip compression"); + + config_move(CONFIG_SECTION_GLOBAL, "web compression strategy", + CONFIG_SECTION_WEB, "gzip compression strategy"); + + config_move(CONFIG_SECTION_GLOBAL, "web compression level", + CONFIG_SECTION_WEB, "gzip compression level"); + + config_move(CONFIG_SECTION_GLOBAL, "config directory", + CONFIG_SECTION_DIRECTORIES, "config"); + + config_move(CONFIG_SECTION_GLOBAL, "stock config directory", + CONFIG_SECTION_DIRECTORIES, "stock config"); + + config_move(CONFIG_SECTION_GLOBAL, "log directory", + CONFIG_SECTION_DIRECTORIES, "log"); + + config_move(CONFIG_SECTION_GLOBAL, "web files directory", + CONFIG_SECTION_DIRECTORIES, "web"); + + config_move(CONFIG_SECTION_GLOBAL, "cache directory", + CONFIG_SECTION_DIRECTORIES, "cache"); + + config_move(CONFIG_SECTION_GLOBAL, "lib directory", + CONFIG_SECTION_DIRECTORIES, "lib"); + + config_move(CONFIG_SECTION_GLOBAL, "home directory", + CONFIG_SECTION_DIRECTORIES, "home"); + + config_move(CONFIG_SECTION_GLOBAL, "lock directory", + CONFIG_SECTION_DIRECTORIES, "lock"); + + config_move(CONFIG_SECTION_GLOBAL, "plugins directory", + CONFIG_SECTION_DIRECTORIES, "plugins"); + + config_move(CONFIG_SECTION_HEALTH, "health configuration directory", + CONFIG_SECTION_DIRECTORIES, "health config"); + + config_move(CONFIG_SECTION_HEALTH, "stock health configuration directory", + CONFIG_SECTION_DIRECTORIES, "stock health config"); + + config_move(CONFIG_SECTION_REGISTRY, "registry db directory", + CONFIG_SECTION_DIRECTORIES, "registry"); + + config_move(CONFIG_SECTION_GLOBAL, "debug log", + CONFIG_SECTION_LOGS, "debug"); + + config_move(CONFIG_SECTION_GLOBAL, "error log", + CONFIG_SECTION_LOGS, "error"); + + config_move(CONFIG_SECTION_GLOBAL, "access log", + CONFIG_SECTION_LOGS, "access"); + + config_move(CONFIG_SECTION_GLOBAL, "facility log", + CONFIG_SECTION_LOGS, "facility"); + + config_move(CONFIG_SECTION_GLOBAL, "errors flood protection period", + CONFIG_SECTION_LOGS, "errors flood protection period"); + + config_move(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection", + CONFIG_SECTION_LOGS, "errors to trigger flood protection"); + + config_move(CONFIG_SECTION_GLOBAL, "debug flags", + CONFIG_SECTION_LOGS, "debug flags"); + + config_move(CONFIG_SECTION_GLOBAL, "TZ environment variable", + CONFIG_SECTION_ENV_VARS, "TZ"); + + config_move(CONFIG_SECTION_PLUGINS, "PATH environment variable", + CONFIG_SECTION_ENV_VARS, "PATH"); + + config_move(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable", + CONFIG_SECTION_ENV_VARS, "PYTHONPATH"); + + config_move(CONFIG_SECTION_STATSD, "enabled", + CONFIG_SECTION_PLUGINS, "statsd"); + + config_move(CONFIG_SECTION_GLOBAL, "memory mode", + CONFIG_SECTION_DB, "mode"); + + config_move(CONFIG_SECTION_GLOBAL, "history", + CONFIG_SECTION_DB, "retention"); + + config_move(CONFIG_SECTION_GLOBAL, "update every", + CONFIG_SECTION_DB, "update every"); + + config_move(CONFIG_SECTION_GLOBAL, "page cache size", + CONFIG_SECTION_DB, "dbengine page cache size MB"); + + config_move(CONFIG_SECTION_DB, "page cache size", + CONFIG_SECTION_DB, "dbengine page cache size MB"); + + config_move(CONFIG_SECTION_GLOBAL, "page cache uses malloc", + CONFIG_SECTION_DB, "dbengine page cache with malloc"); + + config_move(CONFIG_SECTION_DB, "page cache with malloc", + CONFIG_SECTION_DB, "dbengine page cache with malloc"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine disk space", + CONFIG_SECTION_DB, "dbengine disk space MB"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space", + CONFIG_SECTION_DB, "dbengine multihost disk space MB"); + + config_move(CONFIG_SECTION_GLOBAL, "memory deduplication (ksm)", + CONFIG_SECTION_DB, "memory deduplication (ksm)"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch timeout", + CONFIG_SECTION_DB, "dbengine page fetch timeout secs"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch retries", + CONFIG_SECTION_DB, "dbengine page fetch retries"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine extent pages", + CONFIG_SECTION_DB, "dbengine pages per extent"); + + config_move(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", + CONFIG_SECTION_DB, "cleanup obsolete charts after secs"); + + config_move(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", + CONFIG_SECTION_DB, "gap when lost iterations above"); + + config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds", + CONFIG_SECTION_DB, "cleanup orphan hosts after secs"); + + config_move(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", + CONFIG_SECTION_DB, "delete obsolete charts files"); + + config_move(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", + CONFIG_SECTION_DB, "delete orphan hosts files"); + + config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics", + CONFIG_SECTION_DB, "enable zero metrics"); + +} + +static void get_netdata_configured_variables() { + backwards_compatible_config(); + + // ------------------------------------------------------------------------ + // get the hostname + + char buf[HOSTNAME_MAX + 1]; + if(gethostname(buf, HOSTNAME_MAX) == -1){ + error("Cannot get machine hostname."); + } + + netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf); + debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname); + + // ------------------------------------------------------------------------ + // get default database update frequency + + default_rrd_update_every = (int) config_get_number(CONFIG_SECTION_DB, "update every", UPDATE_EVERY); + if(default_rrd_update_every < 1 || default_rrd_update_every > 600) { + error("Invalid data collection frequency (update every) %d given. Defaulting to %d.", default_rrd_update_every, UPDATE_EVERY); + default_rrd_update_every = UPDATE_EVERY; + config_set_number(CONFIG_SECTION_DB, "update every", default_rrd_update_every); + } + + // ------------------------------------------------------------------------ + // get default memory mode for the database + + { + const char *mode = config_get(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode)); + default_rrd_memory_mode = rrd_memory_mode_id(mode); + if(strcmp(mode, rrd_memory_mode_name(default_rrd_memory_mode)) != 0) { + error("Invalid memory mode '%s' given. Using '%s'", mode, rrd_memory_mode_name(default_rrd_memory_mode)); + config_set(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode)); + } + } + + // ------------------------------------------------------------------------ + // get default database size + + if(default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && default_rrd_memory_mode != RRD_MEMORY_MODE_NONE) { + default_rrd_history_entries = (int)config_get_number( + CONFIG_SECTION_DB, "retention", + align_entries_to_pagesize(default_rrd_memory_mode, RRD_DEFAULT_HISTORY_ENTRIES)); + + long h = align_entries_to_pagesize(default_rrd_memory_mode, default_rrd_history_entries); + if (h != default_rrd_history_entries) { + config_set_number(CONFIG_SECTION_DB, "retention", h); + default_rrd_history_entries = (int)h; + } + } + + // ------------------------------------------------------------------------ + // get system paths + + netdata_configured_user_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "config", netdata_configured_user_config_dir); + netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "stock config", netdata_configured_stock_config_dir); + netdata_configured_log_dir = config_get(CONFIG_SECTION_DIRECTORIES, "log", netdata_configured_log_dir); + netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir); + netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir); + netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir); + char *env_home=getenv("HOME"); + netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", env_home?env_home:netdata_configured_home_dir); + + netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir); + + { + pluginsd_initialize_plugin_directories(); + netdata_configured_primary_plugins_dir = plugin_directories[PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH]; + } + +#ifdef ENABLE_DBENGINE + // ------------------------------------------------------------------------ + // get default Database Engine page cache size in MiB + + db_engine_use_malloc = config_get_boolean(CONFIG_SECTION_DB, "dbengine page cache with malloc", CONFIG_BOOLEAN_YES); + default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); + if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) { + error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB); + default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB; + config_set_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); + } + + // ------------------------------------------------------------------------ + // get default Database Engine disk space quota in MiB + + default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb); + if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) { + error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB); + default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB; + config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb); + } + + default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace()); + if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) { + error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb); + default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb; + config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb); + } +#else + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead."); + default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE; + } +#endif + // ------------------------------------------------------------------------ + + netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", ""); + verify_netdata_host_prefix(); + + // -------------------------------------------------------------------- + // get KSM settings + +#ifdef MADV_MERGEABLE + enable_ksm = config_get_boolean(CONFIG_SECTION_DB, "memory deduplication (ksm)", enable_ksm); +#endif + + // -------------------------------------------------------------------- + // metric correlations + + enable_metric_correlations = config_get_boolean(CONFIG_SECTION_GLOBAL, "enable metric correlations", enable_metric_correlations); + default_metric_correlations_method = weights_string_to_method(config_get( + CONFIG_SECTION_GLOBAL, "metric correlations method", + weights_method_to_string(default_metric_correlations_method))); + + // -------------------------------------------------------------------- + + rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time); + // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short + // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at + // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information. + if (rrdset_free_obsolete_time < 10) { + rrdset_free_obsolete_time = 10; + info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds."); + config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time); + } + + gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); + if (gap_when_lost_iterations_above < 1) { + gap_when_lost_iterations_above = 1; + config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); + } + + // -------------------------------------------------------------------- + // get various system parameters + + get_system_HZ(); + get_system_cpus(); + get_system_pid_max(); + + +} + +int load_netdata_conf(char *filename, char overwrite_used) { + errno = 0; + + int ret = 0; + + if(filename && *filename) { + ret = config_load(filename, overwrite_used, NULL); + if(!ret) + error("CONFIG: cannot load config file '%s'.", filename); + } + else { + filename = strdupz_path_subpath(netdata_configured_user_config_dir, "netdata.conf"); + + ret = config_load(filename, overwrite_used, NULL); + if(!ret) { + info("CONFIG: cannot load user config '%s'. Will try the stock version.", filename); + freez(filename); + + filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "netdata.conf"); + ret = config_load(filename, overwrite_used, NULL); + if(!ret) + info("CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename); + } + + freez(filename); + } + + return ret; +} + +// coverity[ +tainted_string_sanitize_content : arg-0 ] +static inline void coverity_remove_taint(char *s) +{ + (void)s; +} + +int get_system_info(struct rrdhost_system_info *system_info) { + char *script; + script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2)); + sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh"); + if (unlikely(access(script, R_OK) != 0)) { + info("System info script %s not found.",script); + freez(script); + return 1; + } + + pid_t command_pid; + + info("Executing %s", script); + + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); + if(fp_child_output) { + char line[200 + 1]; + // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert. + // One time init code, but I'm curious about the warning... + while (fgets(line, 200, fp_child_output) != NULL) { + char *value=line; + while (*value && *value != '=') value++; + if (*value=='=') { + *value='\0'; + value++; + char *end = value; + while (*end && *end != '\n') end++; + *end = '\0'; // Overwrite newline if present + coverity_remove_taint(line); // I/O is controlled result of system_info.sh - not tainted + coverity_remove_taint(value); + + if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) { + info("Unexpected environment variable %s=%s", line, value); + } + else { + info("%s=%s", line, value); + setenv(line, value, 1); + } + } + } + netdata_pclose(fp_child_input, fp_child_output, command_pid); + } + freez(script); + return 0; +} + +void set_silencers_filename() { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir); + silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename); +} + +/* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST* + be set in this procedure to be called in all the relevant code paths. +*/ +void post_conf_load(char **user) +{ + // -------------------------------------------------------------------- + // get the user we should run + + // IMPORTANT: this is required before web_files_uid() + if(getuid() == 0) { + *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", NETDATA_USER); + } + else { + struct passwd *passwd = getpwuid(getuid()); + *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", (passwd && passwd->pw_name)?passwd->pw_name:""); + } + + // -------------------------------------------------------------------- + // Check if the cloud is enabled +#if defined( DISABLE_CLOUD ) || !defined( ENABLE_ACLK ) + netdata_cloud_setting = 0; +#else + netdata_cloud_setting = appconfig_get_boolean(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", 1); +#endif + // This must be set before any point in the code that accesses it. Do not move it from this function. + appconfig_get(&cloud_config, CONFIG_SECTION_GLOBAL, "cloud base url", DEFAULT_CLOUD_BASE_URL); +} + +int main(int argc, char **argv) { + int i; + int config_loaded = 0; + int dont_fork = 0; + bool close_open_fds = true; + size_t default_stacksize; + char *user = NULL; + + static_threads = static_threads_get(); + + netdata_ready=0; + // set the name for logging + program_name = "netdata"; + + if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) { + // don't run netdata, this is the spawn server + spawn_server(); + exit(0); + } + + // parse options + { + int num_opts = sizeof(option_definitions) / sizeof(struct option_def); + char optstring[(num_opts * 2) + 1]; + + int string_i = 0; + for( i = 0; i < num_opts; i++ ) { + optstring[string_i] = option_definitions[i].val; + string_i++; + if(option_definitions[i].arg_name) { + optstring[string_i] = ':'; + string_i++; + } + } + // terminate optstring + optstring[string_i] ='\0'; + optstring[(num_opts *2)] ='\0'; + + int opt; + while( (opt = getopt(argc, argv, optstring)) != -1 ) { + switch(opt) { + case 'c': + if(load_netdata_conf(optarg, 1) != 1) { + error("Cannot load configuration file %s.", optarg); + return 1; + } + else { + debug(D_OPTIONS, "Configuration loaded from %s.", optarg); + post_conf_load(&user); + load_cloud_conf(1); + config_loaded = 1; + } + break; + case 'D': + dont_fork = 1; + break; + case 'd': + dont_fork = 0; + break; + case 'h': + return help(0); + case 'i': + config_set(CONFIG_SECTION_WEB, "bind to", optarg); + break; + case 'P': + strncpy(pidfile, optarg, FILENAME_MAX); + pidfile[FILENAME_MAX] = '\0'; + break; + case 'p': + config_set(CONFIG_SECTION_GLOBAL, "default port", optarg); + break; + case 's': + config_set(CONFIG_SECTION_GLOBAL, "host access prefix", optarg); + break; + case 't': + config_set(CONFIG_SECTION_GLOBAL, "update every", optarg); + break; + case 'u': + config_set(CONFIG_SECTION_GLOBAL, "run as user", optarg); + break; + case 'v': + case 'V': + printf("%s %s\n", program_name, program_version); + return 0; + case 'W': + { + char* stacksize_string = "stacksize="; + char* debug_flags_string = "debug_flags="; + char* claim_string = "claim"; +#ifdef ENABLE_DBENGINE + char* createdataset_string = "createdataset="; + char* stresstest_string = "stresstest="; +#endif + if(strcmp(optarg, "sqlite-check") == 0) { + sql_init_database(DB_CHECK_INTEGRITY, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-fix") == 0) { + sql_init_database(DB_CHECK_FIX_DB, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-compact") == 0) { + sql_init_database(DB_CHECK_RECLAIM_SPACE, 0); + return 0; + } + + if(strcmp(optarg, "unittest") == 0) { + unittest_running = true; + + if (unit_test_static_threads()) + return 1; + if (unit_test_buffer()) + return 1; + if (unit_test_str2ld()) + return 1; + if (unit_test_bitmap256()) + return 1; + // No call to load the config file on this code-path + post_conf_load(&user); + get_netdata_configured_variables(); + default_rrd_update_every = 1; + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; + default_health_enabled = 0; + storage_tiers = 1; + registry_init(); + if(rrd_init("unittest", NULL)) { + fprintf(stderr, "rrd_init failed for unittest\n"); + return 1; + } + default_rrdpush_enabled = 0; + if(run_all_mockup_tests()) return 1; + if(unit_test_storage()) return 1; +#ifdef ENABLE_DBENGINE + if(test_dbengine()) return 1; +#endif + if(test_sqlite()) return 1; + if(string_unittest(10000)) return 1; + if (dictionary_unittest(10000)) + return 1; + if(aral_unittest(10000)) + return 1; + if (rrdlabels_unittest()) + return 1; + if (ctx_unittest()) + return 1; + fprintf(stderr, "\n\nALL TESTS PASSED\n\n"); + return 0; + } + else if(strcmp(optarg, "escapetest") == 0) { + return command_argument_sanitization_tests(); + } +#ifdef ENABLE_ML_TESTS + else if(strcmp(optarg, "mltest") == 0) { + return test_ml(argc, argv); + } +#endif +#ifdef ENABLE_DBENGINE + else if(strcmp(optarg, "mctest") == 0) { + unittest_running = true; + return mc_unittest(); + } + else if(strcmp(optarg, "ctxtest") == 0) { + unittest_running = true; + return ctx_unittest(); + } + else if(strcmp(optarg, "dicttest") == 0) { + unittest_running = true; + return dictionary_unittest(10000); + } + else if(strcmp(optarg, "araltest") == 0) { + unittest_running = true; + return aral_unittest(10000); + } + else if(strcmp(optarg, "stringtest") == 0) { + unittest_running = true; + return string_unittest(10000); + } + else if(strcmp(optarg, "rrdlabelstest") == 0) { + unittest_running = true; + return rrdlabels_unittest(); + } + else if(strcmp(optarg, "metatest") == 0) { + unittest_running = true; + return metadata_unittest(); + } + else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { + optarg += strlen(createdataset_string); + unsigned history_seconds = strtoul(optarg, NULL, 0); + generate_dbengine_dataset(history_seconds); + return 0; + } + else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) { + char *endptr; + unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0, + page_cache_mb = 0, disk_space_mb = 0, workers = 16; + + optarg += strlen(stresstest_string); + test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0); + if (',' == *endptr) + dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + workers = (unsigned)strtoul(endptr + 1, &endptr, 0); + + if (workers > 1024) + workers = 1024; + + char workers_str[16]; + snprintf(workers_str, 15, "%u", workers); + setenv("UV_THREADPOOL_SIZE", workers_str, 1); + dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds, + page_cache_mb, disk_space_mb); + return 0; + } +#endif + else if(strcmp(optarg, "simple-pattern") == 0) { + if(optind + 2 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n" + " Checks if 'pattern' matches the given 'string'.\n" + " - 'pattern' can be one or more space separated words.\n" + " - each 'word' can contain one or more asterisks.\n" + " - words starting with '!' give negative matches.\n" + " - words are processed left to right\n" + "\n" + "Examples:\n" + "\n" + " > match all veth interfaces, except veth0:\n" + "\n" + " -W simple-pattern '!veth0 veth*' 'veth12'\n" + "\n" + "\n" + " > match all *.ext files directly in /path/:\n" + " (this will not match *.ext files in a subdir of /path/)\n" + "\n" + " -W simple-pattern '!/path/*/*.ext /path/*.ext' '/path/test.ext'\n" + "\n" + ); + return 1; + } + + const char *haystack = argv[optind]; + const char *needle = argv[optind + 1]; + size_t len = strlen(needle) + 1; + char wildcarded[len]; + + SIMPLE_PATTERN *p = simple_pattern_create(haystack, NULL, SIMPLE_PATTERN_EXACT); + int ret = simple_pattern_matches_extract(p, needle, wildcarded, len); + simple_pattern_free(p); + + if(ret) { + fprintf(stdout, "RESULT: MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 0; + } + else { + fprintf(stdout, "RESULT: NOT MATCHED - pattern '%s' does not match '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 1; + } + } + else if(strncmp(optarg, stacksize_string, strlen(stacksize_string)) == 0) { + optarg += strlen(stacksize_string); + config_set(CONFIG_SECTION_GLOBAL, "pthread stack size", optarg); + } + else if(strncmp(optarg, debug_flags_string, strlen(debug_flags_string)) == 0) { + optarg += strlen(debug_flags_string); + config_set(CONFIG_SECTION_LOGS, "debug flags", optarg); + debug_flags = strtoull(optarg, NULL, 0); + } + else if(strcmp(optarg, "set") == 0) { + if(optind + 3 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W set 'section' 'key' 'value'\n\n" + " Overwrites settings of netdata.conf.\n" + "\n" + " These options interact with: -c netdata.conf\n" + " If -c netdata.conf is given on the command line,\n" + " before -W set... the user may overwrite command\n" + " line parameters at netdata.conf\n" + " If -c netdata.conf is given after (or missing)\n" + " -W set... the user cannot overwrite the command line\n" + " parameters." + "\n" + ); + return 1; + } + const char *section = argv[optind]; + const char *key = argv[optind + 1]; + const char *value = argv[optind + 2]; + optind += 3; + + // set this one as the default + // only if it is not already set in the config file + // so the caller can use -c netdata.conf before or + // after this parameter to prevent or allow overwriting + // variables at netdata.conf + config_set_default(section, key, value); + + // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value); + } + else if(strcmp(optarg, "set2") == 0) { + if(optind + 4 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W set 'conf_file' 'section' 'key' 'value'\n\n" + " Overwrites settings of netdata.conf or cloud.conf\n" + "\n" + " These options interact with: -c netdata.conf\n" + " If -c netdata.conf is given on the command line,\n" + " before -W set... the user may overwrite command\n" + " line parameters at netdata.conf\n" + " If -c netdata.conf is given after (or missing)\n" + " -W set... the user cannot overwrite the command line\n" + " parameters." + " conf_file can be \"cloud\" or \"netdata\".\n" + "\n" + ); + return 1; + } + const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + const char *section = argv[optind + 1]; + const char *key = argv[optind + 2]; + const char *value = argv[optind + 3]; + optind += 4; + + // set this one as the default + // only if it is not already set in the config file + // so the caller can use -c netdata.conf before or + // after this parameter to prevent or allow overwriting + // variables at netdata.conf + appconfig_set_default(tmp_config, section, key, value); + + // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value); + } + else if(strcmp(optarg, "get") == 0) { + if(optind + 3 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W get 'section' 'key' 'value'\n\n" + " Prints settings of netdata.conf.\n" + "\n" + " These options interact with: -c netdata.conf\n" + " -c netdata.conf has to be given before -W get.\n" + "\n" + ); + return 1; + } + + if(!config_loaded) { + fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n"); + load_netdata_conf(NULL, 0); + post_conf_load(&user); + } + + get_netdata_configured_variables(); + + const char *section = argv[optind]; + const char *key = argv[optind + 1]; + const char *def = argv[optind + 2]; + const char *value = config_get(section, key, def); + printf("%s\n", value); + return 0; + } + else if(strcmp(optarg, "get2") == 0) { + if(optind + 4 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W get2 'conf_file' 'section' 'key' 'value'\n\n" + " Prints settings of netdata.conf or cloud.conf\n" + "\n" + " These options interact with: -c netdata.conf\n" + " -c netdata.conf has to be given before -W get2.\n" + " conf_file can be \"cloud\" or \"netdata\".\n" + "\n" + ); + return 1; + } + + if(!config_loaded) { + fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n"); + load_netdata_conf(NULL, 0); + post_conf_load(&user); + load_cloud_conf(1); + } + + get_netdata_configured_variables(); + + const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + const char *section = argv[optind + 1]; + const char *key = argv[optind + 2]; + const char *def = argv[optind + 3]; + const char *value = appconfig_get(tmp_config, section, key, def); + printf("%s\n", value); + return 0; + } + else if(strncmp(optarg, claim_string, strlen(claim_string)) == 0) { + /* will trigger a claiming attempt when the agent is initialized */ + claiming_pending_arguments = optarg + strlen(claim_string); + } + else if(strcmp(optarg, "buildinfo") == 0) { + printf("Version: %s %s\n", program_name, program_version); + print_build_info(); + return 0; + } + else if(strcmp(optarg, "buildinfojson") == 0) { + print_build_info_json(); + return 0; + } + else if(strcmp(optarg, "keepopenfds") == 0) { + // Internal dev option to skip closing inherited + // open FDs. Useful, when we want to run the agent + // under profiling tools that open/maintain their + // own FDs. + close_open_fds = false; + } else { + fprintf(stderr, "Unknown -W parameter '%s'\n", optarg); + return help(1); + } + } + break; + + default: /* ? */ + fprintf(stderr, "Unknown parameter '%c'\n", opt); + return help(1); + } + } + } + +#ifdef _SC_OPEN_MAX + if (close_open_fds == true) { + // close all open file descriptors, except the standard ones + // the caller may have left open files (lxc-attach has this issue) + for(int fd = (int) (sysconf(_SC_OPEN_MAX) - 1); fd > 2; fd--) + if(fd_is_valid(fd)) + close(fd); + } +#endif + + + if(!config_loaded) + { + load_netdata_conf(NULL, 0); + post_conf_load(&user); + load_cloud_conf(0); + } + + char *nd_disable_cloud = getenv("NETDATA_DISABLE_CLOUD"); + if (nd_disable_cloud && !strncmp(nd_disable_cloud, "1", 1)) { + appconfig_set(&cloud_config, CONFIG_SECTION_GLOBAL, "enabled", "false"); + } + + + // ------------------------------------------------------------------------ + // initialize netdata + { + char *pmax = config_get(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for plugins", "1"); + if(pmax && *pmax) + setenv("MALLOC_ARENA_MAX", pmax, 1); + +#if defined(HAVE_C_MALLOPT) + i = (int)config_get_number(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for netdata", 1); + if(i > 0) + mallopt(M_ARENA_MAX, 1); + + +#ifdef NETDATA_INTERNAL_CHECKS + mallopt(M_PERTURB, 0x5A); + // mallopt(M_MXFAST, 0); +#endif +#endif + + // initialize the system clocks + clocks_init(); + + // prepare configuration environment variables for the plugins + + setenv("UV_THREADPOOL_SIZE", config_get(CONFIG_SECTION_GLOBAL, "libuv worker threads", "16"), 1); + get_netdata_configured_variables(); + set_global_environment(); + + // work while we are cd into config_dir + // to allow the plugins refer to their config + // files using relative filenames + if(chdir(netdata_configured_user_config_dir) == -1) + fatal("Cannot cd to '%s'", netdata_configured_user_config_dir); + + // Get execution path before switching user to avoid permission issues + get_netdata_execution_path(); + } + + { + // -------------------------------------------------------------------- + // get the debugging flags from the configuration file + + char *flags = config_get(CONFIG_SECTION_LOGS, "debug flags", "0x0000000000000000"); + setenv("NETDATA_DEBUG_FLAGS", flags, 1); + + debug_flags = strtoull(flags, NULL, 0); + debug(D_OPTIONS, "Debug flags set to '0x%" PRIX64 "'.", debug_flags); + + if(debug_flags != 0) { + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if(setrlimit(RLIMIT_CORE, &rl) != 0) + error("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); + +#ifdef HAVE_SYS_PRCTL_H + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif + } + + + // -------------------------------------------------------------------- + // get log filenames and settings + + log_init(); + error_log_limit_unlimited(); + + // initialize the log files + open_all_log_files(); + + get_system_timezone(); + + // -------------------------------------------------------------------- + // get the certificate and start security + +#ifdef ENABLE_HTTPS + security_init(); +#endif + + // -------------------------------------------------------------------- + // This is the safest place to start the SILENCERS structure + + set_silencers_filename(); + health_initialize_global_silencers(); + + // -------------------------------------------------------------------- + // Initialize ML configuration + + ml_init(); + + // -------------------------------------------------------------------- + // setup process signals + + // block signals while initializing threads. + // this causes the threads to block signals. + + signals_block(); + + // setup the signals we want to use + + signals_init(); + + // setup threads configs + default_stacksize = netdata_threads_init(); + + + // -------------------------------------------------------------------- + // check which threads are enabled and initialize them + + for (i = 0; static_threads[i].name != NULL ; i++) { + struct netdata_static_thread *st = &static_threads[i]; + + if(st->config_name) + st->enabled = config_get_boolean(st->config_section, st->config_name, st->enabled); + + if(st->enabled && st->init_routine) + st->init_routine(); + + if(st->env_name) + setenv(st->env_name, st->enabled?"YES":"NO", 1); + + if(st->global_variable) + *st->global_variable = (st->enabled) ? true : false; + } + + // -------------------------------------------------------------------- + // create the listening sockets + + web_client_api_v1_init(); + web_server_threading_selection(); + + if(web_server_mode != WEB_SERVER_MODE_NONE) + api_listen_sockets_setup(); + + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(debug_flags != 0) { + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if(setrlimit(RLIMIT_CORE, &rl) != 0) + error("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); +#ifdef HAVE_SYS_PRCTL_H + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif + } +#endif /* NETDATA_INTERNAL_CHECKS */ + + // get the max file limit + if(getrlimit(RLIMIT_NOFILE, &rlimit_nofile) != 0) + error("getrlimit(RLIMIT_NOFILE) failed"); + else + info("resources control: allowed file descriptors: soft = %zu, max = %zu", (size_t)rlimit_nofile.rlim_cur, (size_t)rlimit_nofile.rlim_max); + + // fork, switch user, create pid file, set process priority + if(become_daemon(dont_fork, user) == -1) + fatal("Cannot daemonize myself."); + + info("netdata started on pid %d.", getpid()); + + netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize)); + + // initialize internal registry + registry_init(); + // fork the spawn server + spawn_init(); + /* + * Libuv uv_spawn() uses SIGCHLD internally: + * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485 + * and inadvertently replaces the netdata signal handler which was setup during initialization. + * Thusly, we must explicitly restore the signal handler for SIGCHLD. + * Warning: extreme care is needed when mixing and matching POSIX and libuv. + */ + signals_restore_SIGCHLD(); + + // ------------------------------------------------------------------------ + // initialize rrd, registry, health, rrdpush, etc. + + netdata_anonymous_statistics_enabled=-1; + struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info)); + get_system_info(system_info); + system_info->hops = 0; + get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist); + + if(rrd_init(netdata_configured_hostname, system_info)) + fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname); + + char agent_crash_file[FILENAME_MAX + 1]; + char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; + snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); + int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0); + snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); + int crash_detected = (unlink(agent_crash_file) == 0); + int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC, 444); + if (fd >= 0) + close(fd); + + + // ------------------------------------------------------------------------ + // Claim netdata agent to a cloud endpoint + + if (claiming_pending_arguments) + claim_agent(claiming_pending_arguments); + load_claiming_state(); + + // ------------------------------------------------------------------------ + // enable log flood protection + + error_log_limit_reset(); + + // Load host labels + reload_host_labels(); + + // ------------------------------------------------------------------------ + // spawn the threads + + web_server_config_options(); + + netdata_zero_metrics_enabled = config_get_boolean_ondemand(CONFIG_SECTION_DB, "enable zero metrics", CONFIG_BOOLEAN_NO); + + set_late_global_environment(); + + for (i = 0; static_threads[i].name != NULL ; i++) { + struct netdata_static_thread *st = &static_threads[i]; + + if(st->enabled) { + st->thread = mallocz(sizeof(netdata_thread_t)); + debug(D_SYSTEM, "Starting thread %s.", st->name); + netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st); + } + else debug(D_SYSTEM, "Not starting thread %s.", st->name); + } + + // ------------------------------------------------------------------------ + // Initialize netdata agent command serving from cli and signals + + commands_init(); + + info("netdata initialization completed. Enjoy real-time performance monitoring!"); + netdata_ready = 1; + + send_statistics("START", "-", "-"); + if (crash_detected) + send_statistics("CRASH", "-", "-"); + if (incomplete_shutdown_detected) + send_statistics("INCOMPLETE_SHUTDOWN", "-", "-"); + + //check if ANALYTICS needs to start + if (netdata_anonymous_statistics_enabled == 1) { + for (i = 0; static_threads[i].name != NULL; i++) { + if (!strncmp(static_threads[i].name, "ANALYTICS", 9)) { + struct netdata_static_thread *st = &static_threads[i]; + st->thread = mallocz(sizeof(netdata_thread_t)); + st->enabled = 1; + debug(D_SYSTEM, "Starting thread %s.", st->name); + netdata_thread_create(st->thread, st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st); + } + } + } + + // ------------------------------------------------------------------------ + // Report ACLK build failure +#ifndef ENABLE_ACLK + error("This agent doesn't have ACLK."); + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir); + if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized + send_statistics("ACLK_DISABLED", "-", "-"); + int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444); + if (fd == -1) + error("Cannot create file '%s'. Please fix this.", filename); + else + close(fd); + } +#endif + + // ------------------------------------------------------------------------ + // unblock signals + + signals_unblock(); + + // ------------------------------------------------------------------------ + // Handle signals + + signals_handle(); + + // should never reach this point + // but we need it for rpmlint #2752 + return 1; +} diff --git a/daemon/main.h b/daemon/main.h new file mode 100644 index 0000000..a4e2b3a --- /dev/null +++ b/daemon/main.h @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_MAIN_H +#define NETDATA_MAIN_H 1 + +#include "common.h" + +extern struct config netdata_config; + +/** + * This struct contains information about command line options. + */ +struct option_def { + /** The option character */ + const char val; + /** The name of the long option. */ + const char *description; + /** Short description what the option does */ + /** Name of the argument displayed in SYNOPSIS */ + const char *arg_name; + /** Default value if not set */ + const char *default_value; +}; + +void cancel_main_threads(void); +int killpid(pid_t pid); +void netdata_cleanup_and_exit(int ret) NORETURN; +void send_statistics(const char *action, const char *action_result, const char *action_data); + +#endif /* NETDATA_MAIN_H */ diff --git a/daemon/service.c b/daemon/service.c new file mode 100644 index 0000000..6db2ef6 --- /dev/null +++ b/daemon/service.c @@ -0,0 +1,297 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +/* Run service jobs every X seconds */ +#define SERVICE_HEARTBEAT 10 + +#define TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT (3600 / 2) +#define ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT 60 + +#define WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK 1 +#define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS 2 +#define WORKER_JOB_ARCHIVE_CHART 3 +#define WORKER_JOB_ARCHIVE_CHART_DIMENSIONS 4 +#define WORKER_JOB_ARCHIVE_DIMENSION 5 +#define WORKER_JOB_CLEANUP_ORPHAN_HOSTS 6 +#define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS 7 +#define WORKER_JOB_FREE_HOST 9 +#define WORKER_JOB_SAVE_HOST_CHARTS 10 +#define WORKER_JOB_DELETE_HOST_CHARTS 11 +#define WORKER_JOB_FREE_CHART 12 +#define WORKER_JOB_SAVE_CHART 13 +#define WORKER_JOB_DELETE_CHART 14 +#define WORKER_JOB_FREE_DIMENSION 15 + +static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { + RRDSET *st = rd->rrdset; + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED) || !rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) + return; + + worker_is_busy(WORKER_JOB_ARCHIVE_DIMENSION); + + rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED); + rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); + + const char *cache_filename = rrddim_cache_filename(rd); + if(cache_filename) { + info("Deleting dimension file '%s'.", cache_filename); + if (unlikely(unlink(cache_filename) == -1)) + error("Cannot delete dimension file '%s'", cache_filename); + } + + if (rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + rrddimvar_delete_all(rd); + + /* only a collector can mark a chart as obsolete, so we must remove the reference */ + + size_t tiers_available = 0, tiers_said_no_retention = 0; + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if(rd->tiers[tier]) { + tiers_available++; + + if(rd->tiers[tier]->collect_ops->finalize(rd->tiers[tier]->db_collection_handle)) + tiers_said_no_retention++; + + rd->tiers[tier]->db_collection_handle = NULL; + } + } + + if (tiers_available == tiers_said_no_retention && tiers_said_no_retention) { + /* This metric has no data and no references */ + metaqueue_delete_dimension_uuid(&rd->metric_uuid); + } + else { + /* Do not delete this dimension */ + return; + } + } + + worker_is_busy(WORKER_JOB_FREE_DIMENSION); + rrddim_free(st, rd); +} + +static bool svc_rrdset_archive_obsolete_dimensions(RRDSET *st, bool all_dimensions) { + worker_is_busy(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS); + + RRDDIM *rd; + time_t now = now_realtime_sec(); + + bool done_all_dimensions = true; + + dfe_start_write(st->rrddim_root_index, rd) { + if(unlikely( + all_dimensions || + (rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE) && (rd->last_collected_time.tv_sec + rrdset_free_obsolete_time < now)) + )) { + + if(dictionary_acquired_item_references(rd_dfe.item) == 1) { + info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rrddim_name(rd), rrddim_id(rd), rrdset_name(st), rrdset_id(st)); + svc_rrddim_obsolete_to_archive(rd); + } + else + done_all_dimensions = false; + } + else + done_all_dimensions = false; + } + dfe_done(rd); + + return done_all_dimensions; +} + +static void svc_rrdset_obsolete_to_archive(RRDSET *st) { + worker_is_busy(WORKER_JOB_ARCHIVE_CHART); + + if(!svc_rrdset_archive_obsolete_dimensions(st, true)) + return; + + rrdset_flag_set(st, RRDSET_FLAG_ARCHIVED); + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE); + + rrdcalc_unlink_all_rrdset_alerts(st); + + rrdsetvar_release_and_delete_all(st); + + // has to be run after all dimensions are archived - or use-after-free will occur + rrdvar_delete_all(st->rrdvars); + + if(st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) { + if(rrdhost_option_check(st->rrdhost, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS)) { + worker_is_busy(WORKER_JOB_DELETE_CHART); + rrdset_delete_files(st); + } + else { + worker_is_busy(WORKER_JOB_SAVE_CHART); + rrdset_save(st); + } + + worker_is_busy(WORKER_JOB_FREE_CHART); + rrdset_free(st); + } +} + +static void svc_rrdhost_cleanup_obsolete_charts(RRDHOST *host) { + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS); + + time_t now = now_realtime_sec(); + RRDSET *st; + rrdset_foreach_reentrant(st, host) { + if(rrdset_is_replicating(st)) + continue; + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) + && st->last_accessed_time + rrdset_free_obsolete_time < now + && st->last_updated.tv_sec + rrdset_free_obsolete_time < now + && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now + )) { + svc_rrdset_obsolete_to_archive(st); + } + else if(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) { + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + svc_rrdset_archive_obsolete_dimensions(st, false); + } + } + rrdset_foreach_done(st); +} + +static void svc_rrdset_check_obsoletion(RRDHOST *host) { + worker_is_busy(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK); + + time_t now = now_realtime_sec(); + time_t last_entry_t; + RRDSET *st; + rrdset_foreach_read(st, host) { + if(rrdset_is_replicating(st)) + continue; + + last_entry_t = rrdset_last_entry_t(st); + + if(last_entry_t && last_entry_t < host->senders_connect_time && + host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT + ITERATIONS_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT * st->update_every + < now) + + rrdset_is_obsolete(st); + } + rrdset_foreach_done(st); +} + +static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { + worker_is_busy(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS); + + rrd_rdlock(); + + RRDHOST *host; + rrdhost_foreach_read(host) { + if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host)) + continue; + + if(rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS)) { + rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_OBSOLETE_CHARTS|RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + svc_rrdhost_cleanup_obsolete_charts(host); + } + + if(host != localhost + && host->trigger_chart_obsoletion_check + && ( + ( + host->senders_last_chart_command + && host->senders_last_chart_command + host->health_delay_up_to < now_realtime_sec() + ) + || (host->senders_connect_time + TIME_TO_RUN_OBSOLETIONS_ON_CHILD_CONNECT < now_realtime_sec()) + ) + ) { + svc_rrdset_check_obsoletion(host); + host->trigger_chart_obsoletion_check = 0; + } + } + + rrd_unlock(); +} + +static void svc_rrdhost_cleanup_orphan_hosts(RRDHOST *protected_host) { + worker_is_busy(WORKER_JOB_CLEANUP_ORPHAN_HOSTS); + rrd_wrlock(); + + time_t now = now_realtime_sec(); + + RRDHOST *host; + +restart_after_removal: + rrdhost_foreach_write(host) { + if(!rrdhost_should_be_removed(host, protected_host, now)) + continue; + + info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); + + if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) + /* don't delete multi-host DB host files */ + && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) + ) { + worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); + rrdhost_delete_charts(host); + } + else { + worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS); + rrdhost_save_charts(host); + } + + worker_is_busy(WORKER_JOB_FREE_HOST); + rrdhost_free(host, 0); + goto restart_after_removal; + } + + rrd_unlock(); +} + +static void service_main_cleanup(void *ptr) +{ + struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + + debug(D_SYSTEM, "Cleaning up..."); + worker_unregister(); + + static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; +} + +/* + * The service thread. + */ +void *service_main(void *ptr) +{ + worker_register("SERVICE"); + worker_register_job_name(WORKER_JOB_CHILD_CHART_OBSOLETION_CHECK, "child chart obsoletion check"); + worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS, "cleanup obsolete charts"); + worker_register_job_name(WORKER_JOB_ARCHIVE_CHART, "archive chart"); + worker_register_job_name(WORKER_JOB_ARCHIVE_CHART_DIMENSIONS, "archive chart dimensions"); + worker_register_job_name(WORKER_JOB_ARCHIVE_DIMENSION, "archive dimension"); + worker_register_job_name(WORKER_JOB_CLEANUP_ORPHAN_HOSTS, "cleanup orphan hosts"); + worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS, "cleanup obsolete charts on all hosts"); + worker_register_job_name(WORKER_JOB_FREE_HOST, "free host"); + worker_register_job_name(WORKER_JOB_SAVE_HOST_CHARTS, "save host charts"); + worker_register_job_name(WORKER_JOB_DELETE_HOST_CHARTS, "delete host charts"); + worker_register_job_name(WORKER_JOB_FREE_CHART, "free chart"); + worker_register_job_name(WORKER_JOB_SAVE_CHART, "save chart"); + worker_register_job_name(WORKER_JOB_DELETE_CHART, "delete chart"); + worker_register_job_name(WORKER_JOB_FREE_DIMENSION, "free dimension"); + + netdata_thread_cleanup_push(service_main_cleanup, ptr); + heartbeat_t hb; + heartbeat_init(&hb); + usec_t step = USEC_PER_SEC * SERVICE_HEARTBEAT; + + debug(D_SYSTEM, "Service thread starts"); + + while (!netdata_exit) { + worker_is_idle(); + heartbeat_next(&hb, step); + + svc_rrd_cleanup_obsolete_charts_from_all_hosts(); + svc_rrdhost_cleanup_orphan_hosts(localhost); + } + + netdata_thread_cleanup_pop(1); + return NULL; +} diff --git a/daemon/signals.c b/daemon/signals.c new file mode 100644 index 0000000..c857a9b --- /dev/null +++ b/daemon/signals.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static int reaper_enabled = 0; + +typedef enum signal_action { + NETDATA_SIGNAL_END_OF_LIST, + NETDATA_SIGNAL_IGNORE, + NETDATA_SIGNAL_EXIT_CLEANLY, + NETDATA_SIGNAL_SAVE_DATABASE, + NETDATA_SIGNAL_REOPEN_LOGS, + NETDATA_SIGNAL_RELOAD_HEALTH, + NETDATA_SIGNAL_FATAL, + NETDATA_SIGNAL_CHILD, +} SIGNAL_ACTION; + +static struct { + int signo; // the signal + const char *name; // the name of the signal + size_t count; // the number of signals received + SIGNAL_ACTION action; // the action to take +} signals_waiting[] = { + { SIGPIPE, "SIGPIPE", 0, NETDATA_SIGNAL_IGNORE }, + { SIGINT , "SIGINT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, + { SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, + { SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, + { SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS }, + { SIGUSR1, "SIGUSR1", 0, NETDATA_SIGNAL_SAVE_DATABASE }, + { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH }, + { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL }, + { SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD }, + + // terminator + { 0, "NONE", 0, NETDATA_SIGNAL_END_OF_LIST } +}; + +static void signal_handler(int signo) { + // find the entry in the list + int i; + for(i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST ; i++) { + if(unlikely(signals_waiting[i].signo == signo)) { + signals_waiting[i].count++; + + if(signals_waiting[i].action == NETDATA_SIGNAL_FATAL) { + char buffer[200 + 1]; + snprintfz(buffer, 200, "\nSIGNAL HANDLER: received: %s. Oops! This is bad!\n", signals_waiting[i].name); + if(write(STDERR_FILENO, buffer, strlen(buffer)) == -1) { + // nothing to do - we cannot write but there is no way to complain about it + ; + } + } + + return; + } + } +} + +void signals_block(void) { + sigset_t sigset; + sigfillset(&sigset); + + if(pthread_sigmask(SIG_BLOCK, &sigset, NULL) == -1) + error("SIGNAL: Could not block signals for threads"); +} + +void signals_unblock(void) { + sigset_t sigset; + sigfillset(&sigset); + + if(pthread_sigmask(SIG_UNBLOCK, &sigset, NULL) == -1) { + error("SIGNAL: Could not unblock signals for threads"); + } +} + +void signals_init(void) { + // Catch signals which we want to use + struct sigaction sa; + sa.sa_flags = 0; + + // Enable process tracking / reaper if running as init (pid == 1). + // This prevents zombie processes when running in a container. + if (getpid() == 1) { + info("SIGNAL: Enabling reaper"); + netdata_popen_tracking_init(); + reaper_enabled = 1; + } else { + info("SIGNAL: Not enabling reaper"); + } + + // ignore all signals while we run in a signal handler + sigfillset(&sa.sa_mask); + + int i; + for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { + switch (signals_waiting[i].action) { + case NETDATA_SIGNAL_IGNORE: + sa.sa_handler = SIG_IGN; + break; + case NETDATA_SIGNAL_CHILD: + if (reaper_enabled == 0) + continue; + // FALLTHROUGH + default: + sa.sa_handler = signal_handler; + break; + } + + if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1) + error("SIGNAL: Failed to change signal handler for: %s", signals_waiting[i].name); + } +} + +void signals_restore_SIGCHLD(void) +{ + struct sigaction sa; + + if (reaper_enabled == 0) + return; + + sa.sa_flags = 0; + sigfillset(&sa.sa_mask); + sa.sa_handler = signal_handler; + + if(sigaction(SIGCHLD, &sa, NULL) == -1) + error("SIGNAL: Failed to change signal handler for: SIGCHLD"); +} + +void signals_reset(void) { + struct sigaction sa; + sigemptyset(&sa.sa_mask); + sa.sa_handler = SIG_DFL; + sa.sa_flags = 0; + + int i; + for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { + if(sigaction(signals_waiting[i].signo, &sa, NULL) == -1) + error("SIGNAL: Failed to reset signal handler for: %s", signals_waiting[i].name); + } + + if (reaper_enabled == 1) + netdata_popen_tracking_cleanup(); +} + +// reap_child reaps the child identified by pid. +static void reap_child(pid_t pid) { + siginfo_t i; + + errno = 0; + debug(D_CHILDS, "SIGNAL: Reaping pid: %d...", pid); + if (waitid(P_PID, (id_t)pid, &i, WEXITED|WNOHANG) == -1) { + if (errno != ECHILD) + error("SIGNAL: Failed to wait for: %d", pid); + else + debug(D_CHILDS, "SIGNAL: Already reaped: %d", pid); + return; + } else if (i.si_pid == 0) { + // Process didn't exit, this shouldn't happen. + return; + } + + switch (i.si_code) { + case CLD_EXITED: + debug(D_CHILDS, "SIGNAL: Child %d exited: %d", pid, i.si_status); + break; + case CLD_KILLED: + debug(D_CHILDS, "SIGNAL: Child %d killed by signal: %d", pid, i.si_status); + break; + case CLD_DUMPED: + debug(D_CHILDS, "SIGNAL: Child %d dumped core by signal: %d", pid, i.si_status); + break; + case CLD_STOPPED: + debug(D_CHILDS, "SIGNAL: Child %d stopped by signal: %d", pid, i.si_status); + break; + case CLD_TRAPPED: + debug(D_CHILDS, "SIGNAL: Child %d trapped by signal: %d", pid, i.si_status); + break; + case CLD_CONTINUED: + debug(D_CHILDS, "SIGNAL: Child %d continued by signal: %d", pid, i.si_status); + break; + default: + debug(D_CHILDS, "SIGNAL: Child %d gave us a SIGCHLD with code %d and status %d.", pid, i.si_code, i.si_status); + } +} + +// reap_children reaps all pending children which are not managed by myp. +static void reap_children() { + siginfo_t i; + + while (1 == 1) { + // Identify which process caused the signal so we can determine + // if we need to reap a re-parented process. + i.si_pid = 0; + if (waitid(P_ALL, (id_t)0, &i, WEXITED|WNOHANG|WNOWAIT) == -1) { + if (errno != ECHILD) // This shouldn't happen with WNOHANG but does. + error("SIGNAL: Failed to wait"); + return; + } else if (i.si_pid == 0) { + // No child exited. + return; + } else if (netdata_popen_tracking_pid_shoud_be_reaped(i.si_pid) == 0) { + // myp managed, sleep for a short time to avoid busy wait while + // this is handled by myp. + usleep(10000); + } else { + // Unknown process, likely a re-parented child, reap it. + reap_child(i.si_pid); + } + } +} + +void signals_handle(void) { + while(1) { + + // pause() causes the calling process (or thread) to sleep until a signal + // is delivered that either terminates the process or causes the invocation + // of a signal-catching function. + if(pause() == -1 && errno == EINTR) { + + // loop once, but keep looping while signals are coming in + // this is needed because a few operations may take some time + // so we need to check for new signals before pausing again + int found = 1; + while(found) { + found = 0; + + // execute the actions of the signals + int i; + for (i = 0; signals_waiting[i].action != NETDATA_SIGNAL_END_OF_LIST; i++) { + if (signals_waiting[i].count) { + found = 1; + signals_waiting[i].count = 0; + const char *name = signals_waiting[i].name; + + switch (signals_waiting[i].action) { + case NETDATA_SIGNAL_RELOAD_HEALTH: + error_log_limit_unlimited(); + info("SIGNAL: Received %s. Reloading HEALTH configuration...", name); + error_log_limit_reset(); + execute_command(CMD_RELOAD_HEALTH, NULL, NULL); + break; + + case NETDATA_SIGNAL_SAVE_DATABASE: + error_log_limit_unlimited(); + info("SIGNAL: Received %s. Saving databases...", name); + error_log_limit_reset(); + execute_command(CMD_SAVE_DATABASE, NULL, NULL); + break; + + case NETDATA_SIGNAL_REOPEN_LOGS: + error_log_limit_unlimited(); + info("SIGNAL: Received %s. Reopening all log files...", name); + error_log_limit_reset(); + execute_command(CMD_REOPEN_LOGS, NULL, NULL); + break; + + case NETDATA_SIGNAL_EXIT_CLEANLY: + error_log_limit_unlimited(); + info("SIGNAL: Received %s. Cleaning up to exit...", name); + commands_exit(); + netdata_cleanup_and_exit(0); + exit(0); + break; + + case NETDATA_SIGNAL_FATAL: + fatal("SIGNAL: Received %s. netdata now exits.", name); + break; + + case NETDATA_SIGNAL_CHILD: + debug(D_CHILDS, "SIGNAL: Received %s. Reaping...", name); + reap_children(); + break; + + default: + info("SIGNAL: Received %s. No signal handler configured. Ignoring it.", name); + break; + } + } + } + } + } + else + error("SIGNAL: pause() returned but it was not interrupted by a signal."); + } +} diff --git a/daemon/signals.h b/daemon/signals.h new file mode 100644 index 0000000..12b1ed1 --- /dev/null +++ b/daemon/signals.h @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SIGNALS_H +#define NETDATA_SIGNALS_H 1 + +void signals_init(void); +void signals_block(void); +void signals_unblock(void); +void signals_restore_SIGCHLD(void); +void signals_reset(void); +void signals_handle(void) NORETURN; + +#endif //NETDATA_SIGNALS_H diff --git a/daemon/static_threads.c b/daemon/static_threads.c new file mode 100644 index 0000000..b7730bc --- /dev/null +++ b/daemon/static_threads.c @@ -0,0 +1,190 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *aclk_main(void *ptr); +void *analytics_main(void *ptr); +void *cpuidlejitter_main(void *ptr); +void *global_statistics_main(void *ptr); +void *global_statistics_workers_main(void *ptr); +void *health_main(void *ptr); +void *pluginsd_main(void *ptr); +void *service_main(void *ptr); +void *statsd_main(void *ptr); +void *timex_main(void *ptr); +void *replication_thread_main(void *ptr __maybe_unused); + +extern bool global_statistics_enabled; + +const struct netdata_static_thread static_threads_common[] = { + { + .name = "PLUGIN[timex]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "timex", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = timex_main + }, + { + .name = "PLUGIN[idlejitter]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "idlejitter", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = cpuidlejitter_main + }, + { + .name = "ANALYTICS", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = analytics_main + }, + { + .name = "GLOBAL_STATS", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring", + .env_name = "NETDATA_INTERNALS_MONITORING", + .global_variable = &global_statistics_enabled, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = global_statistics_main + }, + { + .name = "WORKERS_STATS", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "netdata monitoring", + .env_name = "NETDATA_INTERNALS_MONITORING", + .global_variable = &global_statistics_enabled, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = global_statistics_workers_main + }, + { + .name = "PLUGINSD", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = pluginsd_main + }, + { + .name = "SERVICE", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = service_main + }, + { + .name = "STATSD", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = statsd_main + }, + { + .name = "EXPORTING", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = exporting_main + }, + { + .name = "STREAM", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = rrdpush_sender_thread + }, + { + .name = "WEB_SERVER[static1]", + .config_section = NULL, + .config_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = socket_listen_main_static_threaded + }, + +#ifdef ENABLE_ACLK + { + .name = "ACLK_Main", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = aclk_main + }, +#endif + + { + .name = "RRDCONTEXT", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = rrdcontext_main + }, + + { + .name = "REPLICATION", + .config_section = NULL, + .config_name = NULL, + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = replication_thread_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +struct netdata_static_thread * +static_threads_concat(const struct netdata_static_thread *lhs, + const struct netdata_static_thread *rhs) +{ + struct netdata_static_thread *res; + + int lhs_size = 0; + for (; lhs[lhs_size].name; lhs_size++) {} + + int rhs_size = 0; + for (; rhs[rhs_size].name; rhs_size++) {} + + res = callocz(lhs_size + rhs_size + 1, sizeof(struct netdata_static_thread)); + + for (int i = 0; i != lhs_size; i++) + memcpy(&res[i], &lhs[i], sizeof(struct netdata_static_thread)); + + for (int i = 0; i != rhs_size; i++) + memcpy(&res[lhs_size + i], &rhs[i], sizeof(struct netdata_static_thread)); + + return res; +} diff --git a/daemon/static_threads.h b/daemon/static_threads.h new file mode 100644 index 0000000..9597da7 --- /dev/null +++ b/daemon/static_threads.h @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STATIC_THREADS_H +#define NETDATA_STATIC_THREADS_H + +#include "common.h" + +struct netdata_static_thread { + // the name of the thread as it should appear in the logs + char *name; + + // the section of netdata.conf to check if this is enabled or not + char *config_section; + + // the name of the config option to check if it is true or false + char *config_name; + + // the current status of the thread + volatile sig_atomic_t enabled; + + // internal use, to maintain a pointer to the created thread + netdata_thread_t *thread; + + // an initialization function to run before spawning the thread + void (*init_routine) (void); + + // the threaded worker + void *(*start_routine) (void *); + + // the environment variable to create + char *env_name; + + // global variable + bool *global_variable; +}; + +#define NETDATA_MAIN_THREAD_RUNNING CONFIG_BOOLEAN_YES +#define NETDATA_MAIN_THREAD_EXITING (CONFIG_BOOLEAN_YES + 1) +#define NETDATA_MAIN_THREAD_EXITED CONFIG_BOOLEAN_NO + +extern const struct netdata_static_thread static_threads_common[]; +extern const struct netdata_static_thread static_threads_linux[]; +extern const struct netdata_static_thread static_threads_freebsd[]; +extern const struct netdata_static_thread static_threads_macos[]; + +struct netdata_static_thread * +static_threads_concat(const struct netdata_static_thread *lhs, + const struct netdata_static_thread *rhs); + +struct netdata_static_thread *static_threads_get(); + +#endif /* NETDATA_STATIC_THREADS_H */ diff --git a/daemon/static_threads_freebsd.c b/daemon/static_threads_freebsd.c new file mode 100644 index 0000000..48066bf --- /dev/null +++ b/daemon/static_threads_freebsd.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +extern void *freebsd_main(void *ptr); + +const struct netdata_static_thread static_threads_freebsd[] = { + { + .name = "PLUGIN[freebsd]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "freebsd", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = freebsd_main + }, + + {NULL, NULL, NULL, 0, NULL, NULL, NULL} +}; + +const struct netdata_static_thread static_threads_linux[] = { + {NULL, NULL, NULL, 0, NULL, NULL, NULL} +}; + +const struct netdata_static_thread static_threads_macos[] = { + {NULL, NULL, NULL, 0, NULL, NULL, NULL} +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_freebsd); +} diff --git a/daemon/static_threads_linux.c b/daemon/static_threads_linux.c new file mode 100644 index 0000000..260b2c1 --- /dev/null +++ b/daemon/static_threads_linux.c @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +extern void *cgroups_main(void *ptr); +extern void *proc_main(void *ptr); +extern void *diskspace_main(void *ptr); +extern void *tc_main(void *ptr); +extern void *timex_main(void *ptr); + +const struct netdata_static_thread static_threads_linux[] = { + { + .name = "PLUGIN[tc]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "tc", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = tc_main + }, + { + .name = "PLUGIN[diskspace]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "diskspace", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = diskspace_main + }, + { + .name = "PLUGIN[proc]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "proc", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = proc_main + }, + { + .name = "PLUGIN[cgroups]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "cgroups", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = cgroups_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +const struct netdata_static_thread static_threads_freebsd[] = { + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +const struct netdata_static_thread static_threads_macos[] = { + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_linux); +} diff --git a/daemon/static_threads_macos.c b/daemon/static_threads_macos.c new file mode 100644 index 0000000..72c0324 --- /dev/null +++ b/daemon/static_threads_macos.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +extern void *macos_main(void *ptr); + +const struct netdata_static_thread static_threads_macos[] = { + { + .name = "PLUGIN[macos]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "macos", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = macos_main, + .env_name = NULL, + .global_variable = NULL, + }, + + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} +}; + +const struct netdata_static_thread static_threads_freebsd[] = { + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} +}; + +const struct netdata_static_thread static_threads_linux[] = { + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_macos); +} diff --git a/daemon/system-info.sh b/daemon/system-info.sh new file mode 100755 index 0000000..68cdc48 --- /dev/null +++ b/daemon/system-info.sh @@ -0,0 +1,511 @@ +#!/usr/bin/env sh + +# ------------------------------------------------------------------------------------------------- +# detect the kernel + +KERNEL_NAME="$(uname -s)" +KERNEL_VERSION="$(uname -r)" +ARCHITECTURE="$(uname -m)" + +# ------------------------------------------------------------------------------------------------- +# detect the virtualization and possibly the container technology + +# systemd-detect-virt: https://github.com/systemd/systemd/blob/df423851fcc05cf02281d11aab6aee7b476c1c3b/src/basic/virt.c#L999 +# lscpu: https://github.com/util-linux/util-linux/blob/581b77da7aa4a5205902857184d555bed367e3e0/sys-utils/lscpu.c#L52 +virtualization_normalize_name() { + vname="$1" + case "$vname" in + "User-mode Linux") vname="uml" ;; + "Windows Subsystem for Linux") vname="wsl" ;; + esac + + echo "$vname" | tr '[:upper:]' '[:lower:]' | sed 's/ /-/g' +} + +CONTAINER="unknown" +CONT_DETECTION="none" +CONTAINER_IS_OFFICIAL_IMAGE="${NETDATA_OFFICIAL_IMAGE:-false}" + +if [ -z "${VIRTUALIZATION}" ]; then + VIRTUALIZATION="unknown" + VIRT_DETECTION="none" + + if command -v systemd-detect-virt >/dev/null 2>&1; then + VIRTUALIZATION="$(systemd-detect-virt -v)" + VIRT_DETECTION="systemd-detect-virt" + CONTAINER_DETECT_TMP="$(systemd-detect-virt -c)" + [ -n "$CONTAINER_DETECT_TMP" ] && CONTAINER="$CONTAINER_DETECT_TMP" + CONT_DETECTION="systemd-detect-virt" + elif command -v lscpu >/dev/null 2>&1; then + VIRTUALIZATION=$(lscpu | grep "Hypervisor vendor:" | cut -d: -f 2 | awk '{$1=$1};1') + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="lscpu" + [ -z "$VIRTUALIZATION" ] && lscpu | grep -q "Virtualization:" && VIRTUALIZATION="none" + elif command -v dmidecode >/dev/null 2>&1; then + VIRTUALIZATION=$(dmidecode -s system-product-name 2>/dev/null | grep "VMware\|Virtual\|KVM\|Bochs") + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="dmidecode" + fi + + if [ -z "${VIRTUALIZATION}" ] || [ "$VIRTUALIZATION" = "unknown" ]; then + if [ "${KERNEL_NAME}" = "FreeBSD" ]; then + VIRTUALIZATION=$(sysctl kern.vm_guest 2>/dev/null | cut -d: -f 2 | awk '{$1=$1};1') + [ -n "$VIRTUALIZATION" ] && VIRT_DETECTION="sysctl" + fi + fi + + if [ -z "${VIRTUALIZATION}" ]; then + # Output from the command is outside of spec + VIRTUALIZATION="unknown" + VIRT_DETECTION="none" + elif [ "$VIRTUALIZATION" != "none" ] && [ "$VIRTUALIZATION" != "unknown" ]; then + VIRTUALIZATION=$(virtualization_normalize_name $VIRTUALIZATION) + fi +else + # Passed from outside - probably in docker run + VIRT_DETECTION="provided" +fi + +# ------------------------------------------------------------------------------------------------- +# detect containers with heuristics + +if [ "${CONTAINER}" = "unknown" ]; then + if [ -f /proc/1/sched ]; then + IFS='(, ' read -r process _ < /proc/1/sched + if [ "${process}" = "netdata" ]; then + CONTAINER="container" + CONT_DETECTION="process" + fi + fi + # ubuntu and debian supply /bin/running-in-container + # https://www.apt-browse.org/browse/ubuntu/trusty/main/i386/upstart/1.12.1-0ubuntu4/file/bin/running-in-container + if /bin/running-in-container > /dev/null 2>&1; then + CONTAINER="container" + CONT_DETECTION="/bin/running-in-container" + fi + + # lxc sets environment variable 'container' + #shellcheck disable=SC2154 + if [ -n "${container}" ]; then + CONTAINER="lxc" + CONT_DETECTION="containerenv" + fi + + # docker creates /.dockerenv + # http://stackoverflow.com/a/25518345 + if [ -f "/.dockerenv" ]; then + CONTAINER="docker" + CONT_DETECTION="dockerenv" + fi + +fi + +# ------------------------------------------------------------------------------------------------- +# detect the operating system + +# Initially assume all OS detection values are for a container, these are moved later if we are bare-metal + +CONTAINER_OS_DETECTION="unknown" +CONTAINER_NAME="unknown" +CONTAINER_VERSION="unknown" +CONTAINER_VERSION_ID="unknown" +CONTAINER_ID="unknown" +CONTAINER_ID_LIKE="unknown" + +if [ "${KERNEL_NAME}" = "Darwin" ]; then + CONTAINER_ID=$(sw_vers -productName) + CONTAINER_ID_LIKE="mac" + CONTAINER_NAME="mac" + CONTAINER_VERSION=$(sw_vers -productVersion) + CONTAINER_OS_DETECTION="sw_vers" +elif [ "${KERNEL_NAME}" = "FreeBSD" ]; then + CONTAINER_ID="FreeBSD" + CONTAINER_ID_LIKE="FreeBSD" + CONTAINER_NAME="FreeBSD" + CONTAINER_OS_DETECTION="uname" + CONTAINER_VERSION=$(uname -r) + KERNEL_VERSION=$(uname -K) +else + if [ -f "/etc/os-release" ]; then + eval "$(grep -E "^(NAME|ID|ID_LIKE|VERSION|VERSION_ID)=" < /etc/os-release | sed 's/^/CONTAINER_/')" + CONTAINER_OS_DETECTION="/etc/os-release" + fi + + # shellcheck disable=SC2153 + if [ "${CONTAINER_NAME}" = "unknown" ] || [ "${CONTAINER_VERSION}" = "unknown" ] || [ "${CONTAINER_ID}" = "unknown" ]; then + if [ -f "/etc/lsb-release" ]; then + if [ "${CONTAINER_OS_DETECTION}" = "unknown" ]; then + CONTAINER_OS_DETECTION="/etc/lsb-release" + else + CONTAINER_OS_DETECTION="Mixed" + fi + DISTRIB_ID="unknown" + DISTRIB_RELEASE="unknown" + DISTRIB_CODENAME="unknown" + eval "$(grep -E "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=" < /etc/lsb-release)" + if [ "${CONTAINER_NAME}" = "unknown" ]; then CONTAINER_NAME="${DISTRIB_ID}"; fi + if [ "${CONTAINER_VERSION}" = "unknown" ]; then CONTAINER_VERSION="${DISTRIB_RELEASE}"; fi + if [ "${CONTAINER_ID}" = "unknown" ]; then CONTAINER_ID="${DISTRIB_CODENAME}"; fi + fi + if [ -n "$(command -v lsb_release 2> /dev/null)" ]; then + if [ "${CONTAINER_OS_DETECTION}" = "unknown" ]; then + CONTAINER_OS_DETECTION="lsb_release" + else + CONTAINER_OS_DETECTION="Mixed" + fi + if [ "${CONTAINER_NAME}" = "unknown" ]; then CONTAINER_NAME="$(lsb_release -is 2> /dev/null)"; fi + if [ "${CONTAINER_VERSION}" = "unknown" ]; then CONTAINER_VERSION="$(lsb_release -rs 2> /dev/null)"; fi + if [ "${CONTAINER_ID}" = "unknown" ]; then CONTAINER_ID="$(lsb_release -cs 2> /dev/null)"; fi + fi + fi +fi + +# If Netdata is not running in a container then use the local detection as the host +HOST_OS_DETECTION="unknown" +HOST_NAME="unknown" +HOST_VERSION="unknown" +HOST_VERSION_ID="unknown" +HOST_ID="unknown" +HOST_ID_LIKE="unknown" + +# 'systemd-detect-virt' returns 'none' if there is no hardware/container virtualization. +if [ "${CONTAINER}" = "unknown" ] || [ "${CONTAINER}" = "none" ]; then + for v in NAME ID ID_LIKE VERSION VERSION_ID OS_DETECTION; do + eval "HOST_$v=\$CONTAINER_$v; CONTAINER_$v=none" + done +else + # Otherwise try and use a user-supplied bind-mount into the container to resolve the host details + if [ -e "/host/etc/os-release" ]; then + eval "$(grep -E "^(NAME|ID|ID_LIKE|VERSION|VERSION_ID)=" < /host/etc/os-release | sed 's/^/HOST_/')" + HOST_OS_DETECTION="/host/etc/os-release" + fi + if [ "${HOST_NAME}" = "unknown" ] || [ "${HOST_VERSION}" = "unknown" ] || [ "${HOST_ID}" = "unknown" ]; then + if [ -f "/host/etc/lsb-release" ]; then + if [ "${HOST_OS_DETECTION}" = "unknown" ]; then + HOST_OS_DETECTION="/etc/lsb-release" + else + HOST_OS_DETECTION="Mixed" + fi + DISTRIB_ID="unknown" + DISTRIB_RELEASE="unknown" + DISTRIB_CODENAME="unknown" + eval "$(grep -E "^(DISTRIB_ID|DISTRIB_RELEASE|DISTRIB_CODENAME)=" < /etc/lsb-release)" + if [ "${HOST_NAME}" = "unknown" ]; then HOST_NAME="${DISTRIB_ID}"; fi + if [ "${HOST_VERSION}" = "unknown" ]; then HOST_VERSION="${DISTRIB_RELEASE}"; fi + if [ "${HOST_ID}" = "unknown" ]; then HOST_ID="${DISTRIB_CODENAME}"; fi + fi + fi +fi + +# ------------------------------------------------------------------------------------------------- +# Detect information about the CPU + +LCPU_COUNT="unknown" +CPU_MODEL="unknown" +CPU_VENDOR="unknown" +CPU_FREQ="unknown" +CPU_INFO_SOURCE="none" + +possible_cpu_freq="" +nproc="$(command -v nproc)" +lscpu="$(command -v lscpu)" +lscpu_output="" +dmidecode="$(command -v dmidecode)" +dmidecode_output="" + +if [ -n "${lscpu}" ] && lscpu > /dev/null 2>&1; then + lscpu_output="$(LC_NUMERIC=C ${lscpu} 2> /dev/null)" + CPU_INFO_SOURCE="lscpu" + LCPU_COUNT="$(echo "${lscpu_output}" | grep "^CPU(s):" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_VENDOR="$(echo "${lscpu_output}" | grep "^Vendor ID:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_MODEL="$(echo "${lscpu_output}" | grep "^Model name:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU max MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" + if [ -z "$possible_cpu_freq" ]; then + possible_cpu_freq="$(echo "${lscpu_output}" | grep -F "CPU MHz:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | grep -o '^[0-9]*')" + fi + if [ -z "$possible_cpu_freq" ]; then + possible_cpu_freq="$(echo "${lscpu_output}" | grep "^Model name:" | grep -Eo "[0-9\.]+GHz" | grep -o "^[0-9\.]*" | awk '{print int($0*1000)}')" + fi + [ -n "$possible_cpu_freq" ] && possible_cpu_freq="${possible_cpu_freq} MHz" +elif [ -n "${dmidecode}" ] && dmidecode -t processor > /dev/null 2>&1; then + dmidecode_output="$(${dmidecode} -t processor 2> /dev/null)" + CPU_INFO_SOURCE="dmidecode" + LCPU_COUNT="$(echo "${dmidecode_output}" | grep -F "Thread Count:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_VENDOR="$(echo "${dmidecode_output}" | grep -F "Manufacturer:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_MODEL="$(echo "${dmidecode_output}" | grep -F "Version:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + possible_cpu_freq="$(echo "${dmidecode_output}" | grep -F "Current Speed:" | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" +else + if [ -n "${nproc}" ]; then + CPU_INFO_SOURCE="nproc" + LCPU_COUNT="$(${nproc})" + elif [ "${KERNEL_NAME}" = FreeBSD ]; then + CPU_INFO_SOURCE="sysctl" + LCPU_COUNT="$(sysctl -n kern.smp.cpus)" + if ! possible_cpu_freq=$(sysctl -n machdep.tsc_freq 2> /dev/null); then + possible_cpu_freq=$(sysctl -n hw.model 2> /dev/null | grep -Eo "[0-9\.]+GHz" | grep -o "^[0-9\.]*" | awk '{print int($0*1000)}') + [ -n "$possible_cpu_freq" ] && possible_cpu_freq="${possible_cpu_freq} MHz" + fi + elif [ "${KERNEL_NAME}" = Darwin ]; then + CPU_INFO_SOURCE="sysctl" + LCPU_COUNT="$(sysctl -n hw.logicalcpu)" + elif [ -d /sys/devices/system/cpu ]; then + CPU_INFO_SOURCE="sysfs" + # This is potentially more accurate than checking `/proc/cpuinfo`. + LCPU_COUNT="$(find /sys/devices/system/cpu -mindepth 1 -maxdepth 1 -type d -name 'cpu*' | grep -cEv 'idle|freq')" + elif [ -r /proc/cpuinfo ]; then + CPU_INFO_SOURCE="procfs" + LCPU_COUNT="$(grep -c ^processor /proc/cpuinfo)" + fi + + if [ "${KERNEL_NAME}" = Darwin ]; then + CPU_MODEL="$(sysctl -n machdep.cpu.brand_string)" + if [ "${ARCHITECTURE}" = "x86_64" ]; then + CPU_VENDOR="$(sysctl -n machdep.cpu.vendor)" + else + CPU_VENDOR="Apple" + fi + echo "${CPU_INFO_SOURCE}" | grep -qv sysctl && CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysctl" + elif uname --version 2> /dev/null | grep -qF 'GNU coreutils'; then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} uname" + CPU_MODEL="$(uname -p)" + CPU_VENDOR="$(uname -i)" + elif [ "${KERNEL_NAME}" = FreeBSD ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv sysctl); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysctl" + fi + + CPU_MODEL="$(sysctl -n hw.model)" + elif [ -r /proc/cpuinfo ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv procfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} procfs" + fi + + CPU_MODEL="$(grep -F "model name" /proc/cpuinfo | head -n 1 | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + CPU_VENDOR="$(grep -F "vendor_id" /proc/cpuinfo | head -n 1 | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//')" + fi +fi + +if [ "${KERNEL_NAME}" = Darwin ] && [ "${ARCHITECTURE}" = "x86_64" ]; then + CPU_FREQ="$(sysctl -n hw.cpufrequency)" +elif [ -r /sys/devices/system/cpu/cpu0/cpufreq/base_frequency ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv sysfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysfs" + fi + + value="$(cat /sys/devices/system/cpu/cpu0/cpufreq/base_frequency)" + CPU_FREQ="$((value * 1000))" +elif [ -n "${possible_cpu_freq}" ]; then + CPU_FREQ="${possible_cpu_freq}" +elif [ -r /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv sysfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} sysfs" + fi + + value="$(cat /sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq)" + CPU_FREQ="$((value * 1000))" +elif [ -r /proc/cpuinfo ]; then + if (echo "${CPU_INFO_SOURCE}" | grep -qv procfs); then + CPU_INFO_SOURCE="${CPU_INFO_SOURCE} procfs" + fi + value=$(grep "cpu MHz" /proc/cpuinfo 2>/dev/null | grep -o "[0-9]*" | head -n 1 | awk '{print int($0*1000000)}') + [ -n "$value" ] && CPU_FREQ="$value" +fi + +freq_units="$(echo "${CPU_FREQ}" | cut -f 2 -d ' ')" + +case "${freq_units}" in + GHz) + value="$(echo "${CPU_FREQ}" | cut -f 1 -d ' ')" + CPU_FREQ="$((value * 1000 * 1000 * 1000))" + ;; + MHz) + value="$(echo "${CPU_FREQ}" | cut -f 1 -d ' ')" + CPU_FREQ="$((value * 1000 * 1000))" + ;; + KHz) + value="$(echo "${CPU_FREQ}" | cut -f 1 -d ' ')" + CPU_FREQ="$((value * 1000))" + ;; + *) ;; + +esac + +# ------------------------------------------------------------------------------------------------- +# Detect the total system RAM + +TOTAL_RAM="unknown" +RAM_DETECTION="none" + +if [ "${KERNEL_NAME}" = FreeBSD ]; then + RAM_DETECTION="sysctl" + TOTAL_RAM="$(sysctl -n hw.physmem)" +elif [ "${KERNEL_NAME}" = Darwin ]; then + RAM_DETECTION="sysctl" + TOTAL_RAM="$(sysctl -n hw.memsize)" +elif [ -r /proc/meminfo ]; then + RAM_DETECTION="procfs" + TOTAL_RAM="$(grep -F MemTotal /proc/meminfo | cut -f 2 -d ':' | sed -e 's/^[[:space:]]*//' -e 's/[[:space:]]*$//' | cut -f 1 -d ' ')" + TOTAL_RAM="$((TOTAL_RAM * 1024))" +fi + +# ------------------------------------------------------------------------------------------------- +# Detect the total system disk space + +DISK_SIZE="unknown" +DISK_DETECTION="none" + +if [ "${KERNEL_NAME}" = "Darwin" ]; then + if DISK_SIZE=$(diskutil info / 2>/dev/null | awk '/Disk Size/ {total += substr($5,2,length($5))} END { print total }') && + [ -n "$DISK_SIZE" ] && [ "$DISK_SIZE" != "0" ]; then + DISK_DETECTION="diskutil" + else + types='hfs' + + if (lsvfs | grep -q apfs); then + types="${types},apfs" + fi + + if (lsvfs | grep -q ufs); then + types="${types},ufs" + fi + + DISK_DETECTION="df" + DISK_SIZE=$(($(/bin/df -k -t ${types} | tail -n +2 | sed -E 's/\/dev\/disk([[:digit:]]*)s[[:digit:]]*/\/dev\/disk\1/g' | sort -k 1 | awk -F ' ' '{s=$NF;for(i=NF-1;i>=1;i--)s=s FS $i;print s}' | uniq -f 9 | awk '{print $8}' | tr '\n' '+' | rev | cut -f 2- -d '+' | rev) * 1024)) + fi +elif [ "${KERNEL_NAME}" = FreeBSD ]; then + types='ufs' + + if (lsvfs | grep -q zfs); then + types="${types},zfs" + fi + + DISK_DETECTION="df" + total="$(df -t ${types} -c -k | tail -n 1 | awk '{print $2}')" + DISK_SIZE="$((total * 1024))" +else + if [ -d /sys/block ] && [ -r /proc/devices ]; then + dev_major_whitelist='' + + # This is a list of device names used for block storage devices. + # These translate to the prefixs of files in `/dev` indicating the device type. + # They are sorted by lowest used device major number, with dynamically assigned ones at the end. + # We use this to look up device major numbers in `/proc/devices` + device_names='hd sd mfm ad ftl pd nftl dasd intfl mmcblk ub xvd rfd vbd nvme virtblk blkext' + + for name in ${device_names}; do + if grep -qE " ${name}\$" /proc/devices; then + dev_major_whitelist="${dev_major_whitelist}:$(grep -E "${name}\$" /proc/devices | sed -e 's/^[[:space:]]*//' | cut -f 1 -d ' ' | tr '\n' ':'):" + fi + done + + DISK_DETECTION="sysfs" + DISK_SIZE="0" + for disk in /sys/block/*; do + if [ -r "${disk}/size" ] \ + && (echo "${dev_major_whitelist}" | grep -q ":$(cut -f 1 -d ':' "${disk}/dev"):") \ + && grep -qv 1 "${disk}/removable"; then + size="$(($(cat "${disk}/size") * 512))" + DISK_SIZE="$((DISK_SIZE + size))" + fi + done + elif df --version 2> /dev/null | grep -qF "GNU coreutils"; then + DISK_DETECTION="df" + DISK_SIZE=$(($(df -x tmpfs -x devtmpfs -x squashfs -l -B1 --output=source,size | tail -n +2 | sort -u -k 1 | awk '{print $2}' | tr '\n' '+' | head -c -1))) + else + DISK_DETECTION="df" + include_fs_types="ext*|btrfs|xfs|jfs|reiser*|zfs" + DISK_SIZE=$(($(df -T -P | tail -n +2 | sort -u -k 1 | grep "${include_fs_types}" | awk '{print $3}' | tr '\n' '+' | head -c -1) * 1024)) + fi +fi + +# ------------------------------------------------------------------------------------------------- +# Detect whether the node is kubernetes node + +HOST_IS_K8S_NODE="false" + +if [ -n "${KUBERNETES_SERVICE_HOST}" ] && [ -n "${KUBERNETES_SERVICE_PORT}" ]; then + # These env vars are set for every container managed by k8s. + HOST_IS_K8S_NODE="true" +elif pgrep "kubelet"; then + # The kubelet is the primary "node agent" that runs on each node. + HOST_IS_K8S_NODE="true" +fi + +# ------------------------------------------------------------------------------------------------ +# Detect instance metadata for VMs running on cloud providers + +CLOUD_TYPE="unknown" +CLOUD_INSTANCE_TYPE="unknown" +CLOUD_INSTANCE_REGION="unknown" + +if [ "${VIRTUALIZATION}" != "none" ] && command -v curl > /dev/null 2>&1; then + # Returned HTTP status codes: GCP is 200, AWS is 200, DO is 404. + curl --fail -s -m 1 --noproxy "*" http://169.254.169.254 >/dev/null 2>&1 + ret=$? + # anything but operation timeout. + if [ "$ret" != 28 ]; then + # Try AWS IMDSv2 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + AWS_IMDS_TOKEN="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -X PUT "http://169.254.169.254/latest/api/token" -H "X-aws-ec2-metadata-token-ttl-seconds: 21600")" + if [ -n "${AWS_IMDS_TOKEN}" ]; then + CLOUD_TYPE="AWS" + CLOUD_INSTANCE_TYPE="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/instance-type" 2>/dev/null)" + CLOUD_INSTANCE_REGION="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "X-aws-ec2-metadata-token: $AWS_IMDS_TOKEN" -v "http://169.254.169.254/latest/meta-data/placement/region" 2>/dev/null)" + fi + fi + + # Try GCE computeMetadata v1 + if [ "${CLOUD_TYPE}" = "unknown" ]; then + if [ -n "$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1")" ]; then + CLOUD_TYPE="GCP" + CLOUD_INSTANCE_TYPE="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/machine-type")" + [ -n "$CLOUD_INSTANCE_TYPE" ] && CLOUD_INSTANCE_TYPE=$(basename "$CLOUD_INSTANCE_TYPE") + CLOUD_INSTANCE_REGION="$(curl --fail -s --connect-timeout 1 -m 3 --noproxy "*" -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/zone")" + [ -n "$CLOUD_INSTANCE_REGION" ] && CLOUD_INSTANCE_REGION=$(basename "$CLOUD_INSTANCE_REGION") && CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION%-*} + fi + fi + + # TODO: needs to be tested in Microsoft Azure + # Try Azure IMDS + # if [ "${CLOUD_TYPE}" = "unknown" ]; then + # AZURE_IMDS_DATA="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance?version=2021-10-01")" + # if [ -n "${AZURE_IMDS_DATA}" ]; then + # CLOUD_TYPE="Azure" + # CLOUD_INSTANCE_TYPE="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/vmSize?version=2021-10-01&format=text")" + # CLOUD_INSTANCE_REGION="$(curl --fail -s -m 5 -H "Metadata: true" --noproxy "*" "http://169.254.169.254/metadata/instance/compute/location?version=2021-10-01&format=text")" + # fi + # fi + fi +fi + +echo "NETDATA_CONTAINER_OS_NAME=${CONTAINER_NAME}" +echo "NETDATA_CONTAINER_OS_ID=${CONTAINER_ID}" +echo "NETDATA_CONTAINER_OS_ID_LIKE=${CONTAINER_ID_LIKE}" +echo "NETDATA_CONTAINER_OS_VERSION=${CONTAINER_VERSION}" +echo "NETDATA_CONTAINER_OS_VERSION_ID=${CONTAINER_VERSION_ID}" +echo "NETDATA_CONTAINER_OS_DETECTION=${CONTAINER_OS_DETECTION}" +echo "NETDATA_CONTAINER_IS_OFFICIAL_IMAGE=${CONTAINER_IS_OFFICIAL_IMAGE}" +echo "NETDATA_HOST_OS_NAME=${HOST_NAME}" +echo "NETDATA_HOST_OS_ID=${HOST_ID}" +echo "NETDATA_HOST_OS_ID_LIKE=${HOST_ID_LIKE}" +echo "NETDATA_HOST_OS_VERSION=${HOST_VERSION}" +echo "NETDATA_HOST_OS_VERSION_ID=${HOST_VERSION_ID}" +echo "NETDATA_HOST_OS_DETECTION=${HOST_OS_DETECTION}" +echo "NETDATA_HOST_IS_K8S_NODE=${HOST_IS_K8S_NODE}" +echo "NETDATA_SYSTEM_KERNEL_NAME=${KERNEL_NAME}" +echo "NETDATA_SYSTEM_KERNEL_VERSION=${KERNEL_VERSION}" +echo "NETDATA_SYSTEM_ARCHITECTURE=${ARCHITECTURE}" +echo "NETDATA_SYSTEM_VIRTUALIZATION=${VIRTUALIZATION}" +echo "NETDATA_SYSTEM_VIRT_DETECTION=${VIRT_DETECTION}" +echo "NETDATA_SYSTEM_CONTAINER=${CONTAINER}" +echo "NETDATA_SYSTEM_CONTAINER_DETECTION=${CONT_DETECTION}" +echo "NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT=${LCPU_COUNT}" +echo "NETDATA_SYSTEM_CPU_VENDOR=${CPU_VENDOR}" +echo "NETDATA_SYSTEM_CPU_MODEL=${CPU_MODEL}" +echo "NETDATA_SYSTEM_CPU_FREQ=${CPU_FREQ}" +echo "NETDATA_SYSTEM_CPU_DETECTION=${CPU_INFO_SOURCE}" +echo "NETDATA_SYSTEM_TOTAL_RAM=${TOTAL_RAM}" +echo "NETDATA_SYSTEM_RAM_DETECTION=${RAM_DETECTION}" +echo "NETDATA_SYSTEM_TOTAL_DISK_SIZE=${DISK_SIZE}" +echo "NETDATA_SYSTEM_DISK_DETECTION=${DISK_DETECTION}" +echo "NETDATA_INSTANCE_CLOUD_TYPE=${CLOUD_TYPE}" +echo "NETDATA_INSTANCE_CLOUD_INSTANCE_TYPE=${CLOUD_INSTANCE_TYPE}" +echo "NETDATA_INSTANCE_CLOUD_INSTANCE_REGION=${CLOUD_INSTANCE_REGION}" diff --git a/daemon/unit_test.c b/daemon/unit_test.c new file mode 100644 index 0000000..f698618 --- /dev/null +++ b/daemon/unit_test.c @@ -0,0 +1,2600 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static bool cmd_arg_sanitization_test(const char *expected, const char *src, char *dst, size_t dst_size) { + bool ok = sanitize_command_argument_string(dst, src, dst_size); + + if (!expected) + return ok == false; + + return strcmp(expected, dst) == 0; +} + +bool command_argument_sanitization_tests() { + char dst[1024]; + + for (size_t i = 0; i != 5; i++) { + const char *expected = i == 4 ? "'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 9; i++) { + const char *expected = i == 8 ? "'\\'''\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "''", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "'\\''a" : NULL; + if (cmd_arg_sanitization_test(expected, "'a", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "a'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "a'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 22; i++) { + const char *expected = i == 21 ? "foo'\\''a'\\'''\\'''\\''b" : NULL; + if (cmd_arg_sanitization_test(expected, "--foo'a'''b", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n length: %zu\n", expected, dst, strlen(dst)); + return 1; + } + } + + return 0; +} + +static int check_number_printing(void) { + struct { + NETDATA_DOUBLE n; + const char *correct; + } values[] = { + { .n = 0, .correct = "0" }, + { .n = 0.0000001, .correct = "0.0000001" }, + { .n = 0.00000009, .correct = "0.0000001" }, + { .n = 0.000000001, .correct = "0" }, + { .n = 99.99999999999999999, .correct = "100" }, + { .n = -99.99999999999999999, .correct = "-100" }, + { .n = 123.4567890123456789, .correct = "123.456789" }, + { .n = 9999.9999999, .correct = "9999.9999999" }, + { .n = -9999.9999999, .correct = "-9999.9999999" }, + { .n = 0, .correct = NULL }, + }; + + char netdata[50], system[50]; + int i, failed = 0; + for(i = 0; values[i].correct ; i++) { + print_netdata_double(netdata, values[i].n); + snprintfz(system, 49, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n); + + int ok = 1; + if(strcmp(netdata, values[i].correct) != 0) { + ok = 0; + failed++; + } + + fprintf(stderr, "'%s' (system) printed as '%s' (netdata): %s\n", system, netdata, ok?"OK":"FAILED"); + } + + if(failed) return 1; + return 0; +} + +static int check_rrdcalc_comparisons(void) { + RRDCALC_STATUS a, b; + + // make sure calloc() sets the status to UNINITIALIZED + memset(&a, 0, sizeof(RRDCALC_STATUS)); + if(a != RRDCALC_STATUS_UNINITIALIZED) { + fprintf(stderr, "%s is not zero.\n", rrdcalc_status2string(RRDCALC_STATUS_UNINITIALIZED)); + return 1; + } + + a = RRDCALC_STATUS_REMOVED; + b = RRDCALC_STATUS_UNDEFINED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNDEFINED; + b = RRDCALC_STATUS_UNINITIALIZED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNINITIALIZED; + b = RRDCALC_STATUS_CLEAR; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_CLEAR; + b = RRDCALC_STATUS_RAISED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_RAISED; + b = RRDCALC_STATUS_WARNING; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_WARNING; + b = RRDCALC_STATUS_CRITICAL; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + fprintf(stderr, "RRDCALC_STATUSes are sortable.\n"); + + return 0; +} + +int check_storage_number(NETDATA_DOUBLE n, int debug) { + char buffer[100]; + uint32_t flags = SN_DEFAULT_FLAGS; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(!does_storage_number_exist(s)) { + fprintf(stderr, "Exists flags missing for number " NETDATA_DOUBLE_FORMAT "!\n", n); + return 5; + } + + NETDATA_DOUBLE ddiff = d - n; + NETDATA_DOUBLE dcdiff = ddiff * 100.0 / n; + + if(dcdiff < 0) dcdiff = -dcdiff; + + size_t len = (size_t)print_netdata_double(buffer, d); + NETDATA_DOUBLE p = str2ndd(buffer, NULL); + NETDATA_DOUBLE pdiff = n - p; + NETDATA_DOUBLE pcdiff = pdiff * 100.0 / n; + if(pcdiff < 0) pcdiff = -pcdiff; + + if(debug) { + fprintf(stderr, + NETDATA_DOUBLE_FORMAT + " original\n" NETDATA_DOUBLE_FORMAT " packed and unpacked, (stored as 0x%08X, diff " NETDATA_DOUBLE_FORMAT + ", " NETDATA_DOUBLE_FORMAT "%%)\n" + "%s printed after unpacked (%zu bytes)\n" NETDATA_DOUBLE_FORMAT + " re-parsed from printed (diff " NETDATA_DOUBLE_FORMAT ", " NETDATA_DOUBLE_FORMAT "%%)\n\n", + n, + d, s, ddiff, dcdiff, + buffer, len, + p, pdiff, pcdiff + ); + if(len != strlen(buffer)) fprintf(stderr, "ERROR: printed number %s is reported to have length %zu but it has %zu\n", buffer, len, strlen(buffer)); + + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: packing number " NETDATA_DOUBLE_FORMAT " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, dcdiff); + + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: re-parsing the packed, unpacked and printed number " NETDATA_DOUBLE_FORMAT + " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, pcdiff); + } + + if(len != strlen(buffer)) return 1; + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 3; + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 4; + return 0; +} + +NETDATA_DOUBLE storage_number_min(NETDATA_DOUBLE n) { + NETDATA_DOUBLE r = 1, last; + + do { + last = n; + n /= 2.0; + storage_number t = pack_storage_number(n, SN_DEFAULT_FLAGS); + r = unpack_storage_number(t); + } while(r != 0.0 && r != last); + + return last; +} + +void benchmark_storage_number(int loop, int multiplier) { + int i, j; + NETDATA_DOUBLE n, d; + storage_number s; + unsigned long long user, system, total, mine, their; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_positive_max = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW); + + char buffer[100]; + + struct rusage now, last; + + fprintf(stderr, "\n\nBenchmarking %d numbers, please wait...\n\n", loop); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE SIZE: %zu bytes\n", sizeof(NETDATA_DOUBLE)); + fprintf(stderr, "NETDATA FLOATING POINT SIZE: %zu bytes\n", sizeof(storage_number)); + + mine = (NETDATA_DOUBLE)sizeof(storage_number) * (NETDATA_DOUBLE)loop; + their = (NETDATA_DOUBLE)sizeof(NETDATA_DOUBLE) * (NETDATA_DOUBLE)loop; + + if(mine > their) { + fprintf(stderr, "\nNETDATA NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES MORE MEMORY. Sorry!\n", (NETDATA_DOUBLE)(mine / their)); + } + else { + fprintf(stderr, "\nNETDATA INTERNAL FLOATING POINT ARITHMETICS NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES LESS MEMORY.\n", (NETDATA_DOUBLE)(their / mine)); + } + + fprintf(stderr, "\nNETDATA FLOATING POINT\n"); + fprintf(stderr, "MIN POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW)); + fprintf(stderr, "MAX POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW)); + fprintf(stderr, "MIN NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MIN_RAW)); + fprintf(stderr, "MAX NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW)); + fprintf(stderr, "Maximum accuracy loss accepted: " NETDATA_DOUBLE_FORMAT "%%\n\n\n", (NETDATA_DOUBLE)ACCURACY_LOSS_ACCEPTED_PERCENT); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "INTERNAL LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + print_netdata_double(buffer, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + snprintfz(buffer, 100, NETDATA_DOUBLE_FORMAT, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + their = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > total) { + fprintf(stderr, "NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + + fprintf(stderr, "\nINTERNAL LONG DOUBLE PRINTING WITH PACK / UNPACK: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + s = pack_storage_number(n, SN_DEFAULT_FLAGS); + d = unpack_storage_number(s); + print_netdata_double(buffer, d); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > their) { + fprintf(stderr, "WITH PACKING UNPACKING NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "EVEN WITH PACKING AND UNPACKING, NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + +} + +static int check_storage_number_exists() { + uint32_t flags = SN_DEFAULT_FLAGS; + NETDATA_DOUBLE n = 0.0; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(n != d) { + fprintf(stderr, "Wrong number returned. Expected " NETDATA_DOUBLE_FORMAT ", returned " NETDATA_DOUBLE_FORMAT "!\n", n, d); + return 1; + } + + return 0; +} + +int unit_test_storage() { + if(check_storage_number_exists()) return 0; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_negative_max = unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW); + + NETDATA_DOUBLE c, a = 0; + int i, j, g, r = 0; + + for(g = -1; g <= 1 ; g++) { + a = 0; + + if(!g) continue; + + for(j = 0; j < 9 ;j++) { + a += 0.0000001; + c = a * g; + for(i = 0; i < 21 ;i++, c *= 10) { + if(c > 0 && c < storage_number_positive_min) continue; + if(c < 0 && c > storage_number_negative_max) continue; + + if(check_storage_number(c, 1)) return 1; + } + } + } + + // if(check_storage_number(858993459.1234567, 1)) return 1; + benchmark_storage_number(1000000, 2); + return r; +} + +int unit_test_str2ld() { + char *values[] = { + "1.2345678", "-35.6", "0.00123", "23842384234234.2", ".1", "1.2e-10", + "hello", "1wrong", "nan", "inf", NULL + }; + + int i; + for(i = 0; values[i] ; i++) { + char *e_mine = "hello", *e_sys = "world"; + NETDATA_DOUBLE mine = str2ndd(values[i], &e_mine); + NETDATA_DOUBLE sys = strtondd(values[i], &e_sys); + + if(isnan(mine)) { + if(!isnan(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(isinf(mine)) { + if(!isinf(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(mine != sys && ABS(mine-sys) > 0.000001) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ", delta %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys, sys-mine); + return -1; + } + + if(e_mine != e_sys) { + fprintf(stderr, "Value '%s' is parsed correctly, but endptr is not right\n", values[i]); + return -1; + } + + fprintf(stderr, "str2ndd() parsed value '%s' exactly the same way with strtold(), returned %" NETDATA_DOUBLE_MODIFIER + " vs %" NETDATA_DOUBLE_MODIFIER "\n", values[i], mine, sys); + } + + return 0; +} + +int unit_test_buffer() { + BUFFER *wb = buffer_create(1); + char string[2048 + 1]; + char final[9000 + 1]; + int i; + + for(i = 0; i < 2048; i++) + string[i] = (char)((i % 24) + 'a'); + string[2048] = '\0'; + + const char *fmt = "string1: %s\nstring2: %s\nstring3: %s\nstring4: %s"; + buffer_sprintf(wb, fmt, string, string, string, string); + snprintfz(final, 9000, fmt, string, string, string, string); + + const char *s = buffer_tostring(wb); + + if(buffer_strlen(wb) != strlen(final) || strcmp(s, final) != 0) { + fprintf(stderr, "\nbuffer_sprintf() is faulty.\n"); + fprintf(stderr, "\nstring : %s (length %zu)\n", string, strlen(string)); + fprintf(stderr, "\nbuffer : %s (length %zu)\n", s, buffer_strlen(wb)); + fprintf(stderr, "\nexpected: %s (length %zu)\n", final, strlen(final)); + buffer_free(wb); + return -1; + } + + fprintf(stderr, "buffer_sprintf() works as expected.\n"); + buffer_free(wb); + return 0; +} + +int unit_test_static_threads() { + struct netdata_static_thread *static_threads = static_threads_get(); + + /* + * make sure enough static threads have been registered + */ + if (!static_threads) { + fprintf(stderr, "empty static_threads array\n"); + return 1; + } + + int n; + for (n = 0; static_threads[n].start_routine != NULL; n++) {} + + if (n < 2) { + fprintf(stderr, "only %d static threads registered", n); + freez(static_threads); + return 1; + } + + /* + * verify that each thread's start routine is unique. + */ + for (int i = 0; i != n - 1; i++) { + for (int j = i + 1; j != n; j++) { + if (static_threads[i].start_routine != static_threads[j].start_routine) + continue; + + fprintf(stderr, "Found duplicate threads with name: %s\n", static_threads[i].name); + freez(static_threads); + return 1; + } + } + + freez(static_threads); + return 0; +} + +// -------------------------------------------------------------------------------------------------------------------- + +struct feed_values { + unsigned long long microseconds; + collected_number value; +}; + +struct test { + char name[100]; + char description[1024]; + + int update_every; + unsigned long long multiplier; + unsigned long long divisor; + RRD_ALGORITHM algorithm; + + unsigned long feed_entries; + unsigned long result_entries; + struct feed_values *feed; + NETDATA_DOUBLE *results; + + collected_number *feed2; + NETDATA_DOUBLE *results2; +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test1 +// test absolute values stored + +struct feed_values test1_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test1_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test1 = { + "test1", // name + "test absolute values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test1_feed, // feed + test1_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test2 +// test absolute values stored in the middle of second boundaries + +struct feed_values test2_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test2_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test2 = { + "test2", // name + "test absolute values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test2_feed, // feed + test2_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test3 + +struct feed_values test3_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test3_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test3 = { + "test3", // name + "test incremental values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test3_feed, // feed + test3_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test4 + +struct feed_values test4_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test4_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test4 = { + "test4", // name + "test incremental values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test4_feed, // feed + test4_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5 - 32 bit overflows + +struct feed_values test5_feed[] = { + { 0, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5_results[] = { + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, +}; + +struct test test5 = { + "test5", // name + "test 32-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5_feed, // feed + test5_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5b - 64 bit overflows + +struct feed_values test5b_feed[] = { + { 0, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5b_results[] = { + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, +}; + +struct test test5b = { + "test5b", // name + "test 64-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5b_feed, // feed + test5b_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test6 + +struct feed_values test6_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test6_results[] = { + 4000, 4000, 4000, 4000 +}; + +struct test test6 = { + "test6", // name + "test incremental values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 16, // feed entries + 4, // result entries + test6_feed, // feed + test6_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test7 + +struct feed_values test7_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, + { 2000000, 7000 }, + { 2000000, 8000 }, + { 2000000, 9000 }, + { 2000000, 10000 }, +}; + +NETDATA_DOUBLE test7_results[] = { + 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500 +}; + +struct test test7 = { + "test7", // name + "test incremental values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 18, // result entries + test7_feed, // feed + test7_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test8 + +struct feed_values test8_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, +}; + +NETDATA_DOUBLE test8_results[] = { + 1250, 2000, 2250, 3000, 3250, 4000, 4250, 5000, 5250, 6000 +}; + +struct test test8 = { + "test8", // name + "test absolute values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 6, // feed entries + 10, // result entries + test8_feed, // feed + test8_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test9 + +struct feed_values test9_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test9_results[] = { + 4000, 8000, 12000, 16000 +}; + +struct test test9 = { + "test9", // name + "test absolute values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 16, // feed entries + 4, // result entries + test9_feed, // feed + test9_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test10 + +struct feed_values test10_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test10_results[] = { + 1000, 1000, 1000, 1000, 1000, 1000, 1000 +}; + +struct test test10 = { + "test10", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 7, // result entries + test10_feed, // feed + test10_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test11 + +struct feed_values test11_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test11_feed2[] = { + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +NETDATA_DOUBLE test11_results[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +NETDATA_DOUBLE test11_results2[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +struct test test11 = { + "test11", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test11_feed, // feed + test11_results, // results + test11_feed2, // feed2 + test11_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test12 + +struct feed_values test12_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test12_feed2[] = { + 10*3, 20*3, 30*3, 40*3, 50*3, 60*3, 70*3, 80*3, 90*3, 100*3 +}; + +NETDATA_DOUBLE test12_results[] = { + 25, 25, 25, 25, 25, 25, 25, 25, 25 +}; + +NETDATA_DOUBLE test12_results2[] = { + 75, 75, 75, 75, 75, 75, 75, 75, 75 +}; + +struct test test12 = { + "test12", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test12_feed, // feed + test12_results, // results + test12_feed2, // feed2 + test12_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test13 + +struct feed_values test13_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test13_results[] = { + 83.3333300, 100, 100, 100, 100, 100, 100 +}; + +struct test test13 = { + "test13", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 7, // result entries + test13_feed, // feed + test13_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test14 + +struct feed_values test14_feed[] = { + { 0, 0x015397dc42151c41ULL }, + { 13573000, 0x015397e612e3ff5dULL }, + { 29969000, 0x015397f905ecdaa8ULL }, + { 29958000, 0x0153980c2a6cb5e4ULL }, + { 30054000, 0x0153981f4032fb83ULL }, + { 34952000, 0x015398355efadaccULL }, + { 25046000, 0x01539845ba4b09f8ULL }, + { 29947000, 0x0153985948bf381dULL }, + { 30054000, 0x0153986c5b9c27e2ULL }, + { 29942000, 0x0153987f888982d0ULL }, +}; + +NETDATA_DOUBLE test14_results[] = { + 23.1383300, 21.8515600, 21.8804600, 21.7788000, 22.0112200, 22.4386100, 22.0906100, 21.9150800 +}; + +struct test test14 = { + "test14", // name + "issue #981 with real data", + 30, // update_every + 8, // multiplier + 1000000000, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14_feed, // feed + test14_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14b_feed[] = { + { 0, 0 }, + { 13573000, 13573000 }, + { 29969000, 13573000 + 29969000 }, + { 29958000, 13573000 + 29969000 + 29958000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 }, + { 34952000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 }, + { 25046000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 }, + { 29947000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 }, + { 29942000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 + 29942000 }, +}; + +NETDATA_DOUBLE test14b_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14b = { + "test14b", // name + "issue #981 with dummy data", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14b_feed, // feed + test14b_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14c_feed[] = { + { 29000000, 29000000 }, + { 1000000, 29000000 + 1000000 }, + { 30000000, 29000000 + 1000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, +}; + +NETDATA_DOUBLE test14c_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14c = { + "test14c", // name + "issue #981 with dummy data, checking for late start", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test14c_feed, // feed + test14c_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test15 + +struct feed_values test15_feed[] = { + { 0, 1068066388 }, + { 1008752, 1068822698 }, + { 993809, 1069573072 }, + { 995911, 1070324135 }, + { 1014562, 1071078166 }, + { 994684, 1071831349 }, + { 993128, 1072235739 }, + { 1010332, 1072958871 }, + { 1003394, 1073707019 }, + { 995201, 1074460255 }, +}; + +collected_number test15_feed2[] = { + 178825286, 178825286, 178825286, 178825286, 178825498, 178825498, 179165652, 179202964, 179203282, 179204130 +}; + +NETDATA_DOUBLE test15_results[] = { + 5857.4080000, 5898.4540000, 5891.6590000, 5806.3160000, 5914.2640000, 3202.2630000, 5589.6560000, 5822.5260000, 5911.7520000 +}; + +NETDATA_DOUBLE test15_results2[] = { + 0.0000000, 0.0000000, 0.0024944, 1.6324779, 0.0212777, 2655.1890000, 290.5387000, 5.6733610, 6.5960220 +}; + +struct test test15 = { + "test15", // name + "test incremental with 2 dimensions", + 1, // update_every + 8, // multiplier + 1024, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test15_feed, // feed + test15_results, // results + test15_feed2, // feed2 + test15_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- + +int run_test(struct test *test) +{ + fprintf(stderr, "\nRunning test '%s':\n%s\n", test->name, test->description); + + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = test->update_every; + + char name[101]; + snprintfz(name, 100, "unittest-%s", test->name); + + // create the chart + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1 + , test->update_every, RRDSET_TYPE_LINE); + RRDDIM *rd = rrddim_add(st, "dim1", NULL, test->multiplier, test->divisor, test->algorithm); + + RRDDIM *rd2 = NULL; + if(test->feed2) + rd2 = rrddim_add(st, "dim2", NULL, test->multiplier, test->divisor, test->algorithm); + + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + // feed it with the test data + time_t time_now = 0, time_start = now_realtime_sec(); + unsigned long c; + collected_number last = 0; + for(c = 0; c < test->feed_entries; c++) { + if(debug_flags) fprintf(stderr, "\n\n"); + + if(c) { + time_now += test->feed[c].microseconds; + fprintf(stderr, " > %s: feeding position %lu, after %0.3f seconds (%0.3f seconds from start), delta " NETDATA_DOUBLE_FORMAT + ", rate " NETDATA_DOUBLE_FORMAT "\n", + test->name, c+1, + (float)test->feed[c].microseconds / 1000000.0, + (float)time_now / 1000000.0, + ((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor, + (((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor) / (NETDATA_DOUBLE)test->feed[c].microseconds * (NETDATA_DOUBLE)1000000); + + // rrdset_next_usec_unfiltered(st, test->feed[c].microseconds); + st->usec_since_last_update = test->feed[c].microseconds; + } + else { + fprintf(stderr, " > %s: feeding position %lu\n", test->name, c+1); + } + + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd), test->feed[c].value); + rrddim_set(st, "dim1", test->feed[c].value); + last = test->feed[c].value; + + if(rd2) { + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd2), test->feed2[c]); + rrddim_set(st, "dim2", test->feed2[c]); + } + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st, now, false); + + // align the first entry to second boundary + if(!c) { + fprintf(stderr, " > %s: fixing first collection time to be %llu microseconds to second boundary\n", test->name, test->feed[c].microseconds); + rd->last_collected_time.tv_usec = st->last_collected_time.tv_usec = st->last_updated.tv_usec = test->feed[c].microseconds; + // time_start = st->last_collected_time.tv_sec; + } + } + + // check the result + int errors = 0; + + if(st->counter != test->result_entries) { + fprintf(stderr, " %s stored %zu entries, but we were expecting %lu, ### E R R O R ###\n", test->name, st->counter, test->result_entries); + errors++; + } + + unsigned long max = (st->counter < test->result_entries)?st->counter:test->result_entries; + for(c = 0 ; c < max ; c++) { + NETDATA_DOUBLE v = unpack_storage_number(rd->db[c]); + NETDATA_DOUBLE n = unpack_storage_number(pack_storage_number(test->results[c], SN_DEFAULT_FLAGS)); + int same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd), c+1, + (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + + if(!same) errors++; + + if(rd2) { + v = unpack_storage_number(rd2->db[c]); + n = test->results2[c]; + same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd2), c+1, + (int64_t)((rrdset_first_entry_t(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + if(!same) errors++; + } + } + + return errors; +} + +static int test_variable_renames(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + + fprintf(stderr, "Creating chart\n"); + RRDSET *st = rrdset_create_localhost("chart", "ID", NULL, "family", "context", "Unit Testing", "a value", "unittest", NULL, 1, 1, RRDSET_TYPE_LINE); + fprintf(stderr, "Created chart with id '%s', name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Creating dimension DIM1\n"); + RRDDIM *rd1 = rrddim_add(st, "DIM1", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Creating dimension DIM2\n"); + RRDDIM *rd2 = rrddim_add(st, "DIM2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming chart to CHARTNAME1\n"); + rrdset_reset_name(st, "CHARTNAME1"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming chart to CHARTNAME2\n"); + rrdset_reset_name(st, "CHARTNAME2"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME1\n"); + rrddim_reset_name(st, rd1, "DIM1NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME2\n"); + rrddim_reset_name(st, rd1, "DIM1NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME1\n"); + rrddim_reset_name(st, rd2, "DIM2NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME2\n"); + rrddim_reset_name(st, rd2, "DIM2NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + BUFFER *buf = buffer_create(1); + health_api_v1_chart_variables2json(st, buf); + fprintf(stderr, "%s", buffer_tostring(buf)); + buffer_free(buf); + return 1; +} + +int check_strdupz_path_subpath() { + + struct strdupz_path_subpath_checks { + const char *path; + const char *subpath; + const char *result; + } checks[] = { + { "", "", "." }, + { "/", "", "/" }, + { "/etc/netdata", "", "/etc/netdata" }, + { "/etc/netdata///", "", "/etc/netdata" }, + { "/etc/netdata///", "health.d", "/etc/netdata/health.d" }, + { "/etc/netdata///", "///health.d", "/etc/netdata/health.d" }, + { "/etc/netdata", "///health.d", "/etc/netdata/health.d" }, + { "", "///health.d", "./health.d" }, + { "/", "///health.d", "/health.d" }, + + // terminator + { NULL, NULL, NULL } + }; + + size_t i; + for(i = 0; checks[i].result ; i++) { + char *s = strdupz_path_subpath(checks[i].path, checks[i].subpath); + fprintf(stderr, "strdupz_path_subpath(\"%s\", \"%s\") = \"%s\": ", checks[i].path, checks[i].subpath, s); + if(!s || strcmp(s, checks[i].result) != 0) { + freez(s); + fprintf(stderr, "FAILED\n"); + return 1; + } + else { + freez(s); + fprintf(stderr, "OK\n"); + } + } + + return 0; +} + +int run_all_mockup_tests(void) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + if(check_strdupz_path_subpath()) + return 1; + + if(check_number_printing()) + return 1; + + if(check_rrdcalc_comparisons()) + return 1; + + if(!test_variable_renames()) + return 1; + + if(run_test(&test1)) + return 1; + + if(run_test(&test2)) + return 1; + + if(run_test(&test3)) + return 1; + + if(run_test(&test4)) + return 1; + + if(run_test(&test5)) + return 1; + + if(run_test(&test5b)) + return 1; + + if(run_test(&test6)) + return 1; + + if(run_test(&test7)) + return 1; + + if(run_test(&test8)) + return 1; + + if(run_test(&test9)) + return 1; + + if(run_test(&test10)) + return 1; + + if(run_test(&test11)) + return 1; + + if(run_test(&test12)) + return 1; + + if(run_test(&test13)) + return 1; + + if(run_test(&test14)) + return 1; + + if(run_test(&test14b)) + return 1; + + if(run_test(&test14c)) + return 1; + + if(run_test(&test15)) + return 1; + + + + return 0; +} + +int unit_test(long delay, long shift) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + static int repeat = 0; + repeat++; + + char name[101]; + snprintfz(name, 100, "unittest-%d-%ld-%ld", repeat, delay, shift); + + //debug_flags = 0xffffffff; + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = 1; + + int do_abs = 1; + int do_inc = 1; + int do_abst = 0; + int do_absi = 0; + + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1, 1 + , RRDSET_TYPE_LINE); + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + RRDDIM *rdabs = NULL; + RRDDIM *rdinc = NULL; + RRDDIM *rdabst = NULL; + RRDDIM *rdabsi = NULL; + + if(do_abs) rdabs = rrddim_add(st, "absolute", "absolute", 1, 1, RRD_ALGORITHM_ABSOLUTE); + if(do_inc) rdinc = rrddim_add(st, "incremental", "incremental", 1, 1, RRD_ALGORITHM_INCREMENTAL); + if(do_abst) rdabst = rrddim_add(st, "percentage-of-absolute-row", "percentage-of-absolute-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL); + if(do_absi) rdabsi = rrddim_add(st, "percentage-of-incremental-row", "percentage-of-incremental-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + + long increment = 1000; + collected_number i = 0; + + unsigned long c, dimensions = rrdset_number_of_dimensions(st); + RRDDIM *rd; + + for(c = 0; c < 20 ;c++) { + i += increment; + + fprintf(stderr, "\n\nLOOP = %lu, DELAY = %ld, VALUE = " COLLECTED_NUMBER_FORMAT "\n", c, delay, i); + if(c) { + // rrdset_next_usec_unfiltered(st, delay); + st->usec_since_last_update = delay; + } + if(do_abs) rrddim_set(st, "absolute", i); + if(do_inc) rrddim_set(st, "incremental", i); + if(do_abst) rrddim_set(st, "percentage-of-absolute-row", i); + if(do_absi) rrddim_set(st, "percentage-of-incremental-row", i); + + if(!c) { + now_realtime_timeval(&st->last_collected_time); + st->last_collected_time.tv_usec = shift; + } + + // prevent it from deleting the dimensions + rrddim_foreach_read(rd, st) { + rd->last_collected_time.tv_sec = st->last_collected_time.tv_sec; + } + rrddim_foreach_done(rd); + + rrdset_done(st); + } + + unsigned long oincrement = increment; + increment = increment * st->update_every * 1000000 / delay; + fprintf(stderr, "\n\nORIGINAL INCREMENT: %lu, INCREMENT %ld, DELAY %ld, SHIFT %ld\n", oincrement * 10, increment * 10, delay, shift); + + int ret = 0; + storage_number sn; + NETDATA_DOUBLE cn, v; + for(c = 0 ; c < st->counter ; c++) { + fprintf(stderr, "\nPOSITION: c = %lu, EXPECTED VALUE %lu\n", c, (oincrement + c * increment + increment * (1000000 - shift) / 1000000 )* 10); + + rrddim_foreach_read(rd, st) { + sn = rd->db[c]; + cn = unpack_storage_number(sn); + fprintf(stderr, "\t %s " NETDATA_DOUBLE_FORMAT " (PACKED AS " STORAGE_NUMBER_FORMAT ") -> ", rrddim_id(rd), cn, sn); + + if(rd == rdabs) v = + ( oincrement + // + (increment * (1000000 - shift) / 1000000) + + (c + 1) * increment + ); + + else if(rd == rdinc) v = (c?(increment):(increment * (1000000 - shift) / 1000000)); + else if(rd == rdabst) v = oincrement / dimensions / 10; + else if(rd == rdabsi) v = oincrement / dimensions / 10; + else v = 0; + + if(v == cn) fprintf(stderr, "passed.\n"); + else { + fprintf(stderr, "ERROR! (expected " NETDATA_DOUBLE_FORMAT ")\n", v); + ret = 1; + } + } + rrddim_foreach_done(rd); + } + + if(ret) + fprintf(stderr, "\n\nUNIT TEST(%ld, %ld) FAILED\n\n", delay, shift); + + return ret; +} + +int test_sqlite(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + sqlite3 *db_meta; + fprintf(stderr, "Testing SQLIte\n"); + + int rc = sqlite3_open(":memory:", &db_meta); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: DB init failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_meta, "CREATE TABLE IF NOT EXISTS mine (id1, id2);", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Create table failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_meta, "DELETE FROM MINE LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Delete with LIMIT failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_meta, "UPDATE MINE SET id1=1 LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Update with LIMIT failed\n"); + return 1; + } + + BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE); + char *uuid_str = "0000_000"; + + buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str); + rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL); + if (rc != SQLITE_OK) + goto error; + buffer_flush(sql); + + buffer_sprintf(sql, INDEX_ACLK_ALERT, uuid_str, uuid_str); + rc = sqlite3_exec_monitored(db_meta, buffer_tostring(sql), 0, 0, NULL); + if (rc != SQLITE_OK) + goto error; + buffer_flush(sql); + + buffer_free(sql); + fprintf(stderr,"SQLite is OK\n"); + rc = sqlite3_close_v2(db_meta); + return 0; +error: + rc = sqlite3_close_v2(db_meta); + fprintf(stderr,"SQLite statement failed: %s\n", buffer_tostring(sql)); + buffer_free(sql); + fprintf(stderr,"SQLite tests failed\n"); + return 1; +} + +int unit_test_bitmap256(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + + BITMAP256 test_bitmap = {0}; + + bitmap256_set_bit(&test_bitmap, 0, 1); + bitmap256_set_bit(&test_bitmap, 64, 1); + bitmap256_set_bit(&test_bitmap, 128, 1); + bitmap256_set_bit(&test_bitmap, 192, 1); + if (test_bitmap.data[0] == 1) + fprintf(stderr, "%s() INDEX 1 is OK\n", __FUNCTION__ ); + if (test_bitmap.data[1] == 1) + fprintf(stderr, "%s() INDEX 65 is OK\n", __FUNCTION__ ); + if (test_bitmap.data[2] == 1) + fprintf(stderr, "%s() INDEX 129 is OK\n", __FUNCTION__ ); + if (test_bitmap.data[3] == 1) + fprintf(stderr, "%s() INDEX 192 is OK\n", __FUNCTION__ ); + + uint8_t i=0; + int j = 0; + do { + bitmap256_set_bit(&test_bitmap, i++, 1); + j++; + } while (j < 256); + + if (test_bitmap.data[0] == 0xffffffffffffffff) + fprintf(stderr, "%s() INDEX 0 is fully set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 0 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + if (test_bitmap.data[1] == 0xffffffffffffffff) + fprintf(stderr, "%s() INDEX 1 is fully set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 1 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + if (test_bitmap.data[2] == 0xffffffffffffffff) + fprintf(stderr, "%s() INDEX 2 is fully set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 2 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + if (test_bitmap.data[3] == 0xffffffffffffffff) + fprintf(stderr, "%s() INDEX 3 is fully set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 3 is %"PRIu64" expected 0xffffffffffffffff\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + i = 0; + j = 0; + do { + bitmap256_set_bit(&test_bitmap, i++, 0); + j++; + } while (j < 256); + + if (test_bitmap.data[0] == 0) + fprintf(stderr, "%s() INDEX 0 is reset OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 0 is not reset FAILED\n", __FUNCTION__); + return 1; + } + if (test_bitmap.data[1] == 0) + fprintf(stderr, "%s() INDEX 1 is reset OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 1 is not reset FAILED\n", __FUNCTION__); + return 1; + } + + if (test_bitmap.data[2] == 0) + fprintf(stderr, "%s() INDEX 2 is reset OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 2 is not reset FAILED\n", __FUNCTION__); + return 1; + } + + if (test_bitmap.data[3] == 0) + fprintf(stderr, "%s() INDEX 3 is reset OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 3 is not reset FAILED\n", __FUNCTION__); + return 1; + } + + i=0; + j = 0; + do { + bitmap256_set_bit(&test_bitmap, i, 1); + i += 4; + j += 4; + } while (j < 256); + + if (test_bitmap.data[0] == 0x1111111111111111) + fprintf(stderr, "%s() INDEX 0 is 0x1111111111111111 set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 0 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[0]); + return 1; + } + + if (test_bitmap.data[1] == 0x1111111111111111) + fprintf(stderr, "%s() INDEX 1 is 0x1111111111111111 set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 1 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[1]); + return 1; + } + + if (test_bitmap.data[2] == 0x1111111111111111) + fprintf(stderr, "%s() INDEX 2 is 0x1111111111111111 set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 2 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[2]); + return 1; + } + + if (test_bitmap.data[3] == 0x1111111111111111) + fprintf(stderr, "%s() INDEX 3 is 0x1111111111111111 set OK\n", __FUNCTION__); + else { + fprintf(stderr, "%s() INDEX 3 is %"PRIu64" expected 0x1111111111111111\n", __FUNCTION__, test_bitmap.data[3]); + return 1; + } + + fprintf(stderr, "%s() tests passed\n", __FUNCTION__); + return 0; +} + +#ifdef ENABLE_DBENGINE +static inline void rrddim_set_by_pointer_fake_time(RRDDIM *rd, collected_number value, time_t now) +{ + rd->last_collected_time.tv_sec = now; + rd->last_collected_time.tv_usec = 0; + rd->collected_value = value; + rd->updated = 1; + + rd->collections_counter++; + + collected_number v = (value >= 0) ? value : -value; + if(unlikely(v > rd->collected_value_max)) rd->collected_value_max = v; +} + +static RRDHOST *dbengine_rrdhost_find_or_create(char *name) +{ + /* We don't want to drop metrics when generating load, we prefer to block data generation itself */ + rrdeng_drop_metrics_under_page_cache_pressure = 0; + + return rrdhost_find_or_create( + name + , name + , name + , os_type + , netdata_configured_timezone + , netdata_configured_abbrev_timezone + , netdata_configured_utc_offset + , "" + , program_name + , program_version + , default_rrd_update_every + , default_rrd_history_entries + , RRD_MEMORY_MODE_DBENGINE + , default_health_enabled + , default_rrdpush_enabled + , default_rrdpush_destination + , default_rrdpush_api_key + , default_rrdpush_send_charts_matching + , default_rrdpush_enable_replication + , default_rrdpush_seconds_to_replicate + , default_rrdpush_replication_step + , NULL + , 0 + ); +} + +// constants for test_dbengine +static const int CHARTS = 64; +static const int DIMS = 16; // That gives us 64 * 16 = 1024 metrics +#define REGIONS (3) // 3 regions of update_every +// first region update_every is 2, second is 3, third is 1 +static const int REGION_UPDATE_EVERY[REGIONS] = {2, 3, 1}; +static const int REGION_POINTS[REGIONS] = { + 16384, // This produces 64MiB of metric data for the first region: update_every = 2 + 16384, // This produces 64MiB of metric data for the second region: update_every = 3 + 16384, // This produces 64MiB of metric data for the third region: update_every = 1 +}; +static const int QUERY_BATCH = 4096; + +static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS], + int update_every) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + int i, j; + char name[101]; + + for (i = 0 ; i < CHARTS ; ++i) { + snprintfz(name, 100, "dbengine-chart-%d", i); + + // create the chart + st[i] = rrdset_create(host, "netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", + NULL, 1, update_every, RRDSET_TYPE_LINE); + rrdset_flag_set(st[i], RRDSET_FLAG_DEBUG); + rrdset_flag_set(st[i], RRDSET_FLAG_STORE_FIRST); + for (j = 0 ; j < DIMS ; ++j) { + snprintfz(name, 100, "dim-%d", j); + + rd[i][j] = rrddim_add(st[i], name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + } + + // Initialize DB with the very first entries + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0 ; j < DIMS ; ++j) { + rd[i][j]->last_collected_time.tv_sec = + st[i]->last_collected_time.tv_sec = st[i]->last_updated.tv_sec = 2 * API_RELATIVE_TIME_MAX - 1; + rd[i][j]->last_collected_time.tv_usec = + st[i]->last_collected_time.tv_usec = st[i]->last_updated.tv_usec = 0; + } + } + for (i = 0 ; i < CHARTS ; ++i) { + st[i]->usec_since_last_update = USEC_PER_SEC; + + for (j = 0; j < DIMS; ++j) { + rrddim_set_by_pointer_fake_time(rd[i][j], 69, 2 * API_RELATIVE_TIME_MAX); // set first value to 69 + } + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st[i], now, false); + } + // Fluh pages for subsequent real values + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0; j < DIMS; ++j) { + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle); + } + } +} + +// Feeds the database region with test data, returns last timestamp of region +static time_t test_dbengine_create_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS], + int current_region, time_t time_start) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + time_t time_now; + int i, j, c, update_every; + collected_number next; + + update_every = REGION_UPDATE_EVERY[current_region]; + time_now = time_start; + // feed it with the test data + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0 ; j < DIMS ; ++j) { + rd[i][j]->tiers[0]->collect_ops->change_collection_frequency(rd[i][j]->tiers[0]->db_collection_handle, update_every); + + rd[i][j]->last_collected_time.tv_sec = + st[i]->last_collected_time.tv_sec = st[i]->last_updated.tv_sec = time_now; + rd[i][j]->last_collected_time.tv_usec = + st[i]->last_collected_time.tv_usec = st[i]->last_updated.tv_usec = 0; + } + } + for (c = 0; c < REGION_POINTS[current_region] ; ++c) { + time_now += update_every; // time_now = start + (c + 1) * update_every + + for (i = 0 ; i < CHARTS ; ++i) { + st[i]->usec_since_last_update = USEC_PER_SEC * update_every; + + for (j = 0; j < DIMS; ++j) { + next = ((collected_number)i * DIMS) * REGION_POINTS[current_region] + + j * REGION_POINTS[current_region] + c; + rrddim_set_by_pointer_fake_time(rd[i][j], next, time_now); + } + + struct timeval now; + now.tv_sec = time_now; + now.tv_usec = 0; + + rrdset_timed_done(st[i], now, false); + } + } + return time_now; //time_end +} + +// Checks the metric data for the given region, returns number of errors +static int test_dbengine_check_metrics(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS], + int current_region, time_t time_start) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + uint8_t same; + time_t time_now, time_retrieved, end_time; + int i, j, k, c, errors, update_every; + collected_number last; + NETDATA_DOUBLE value, expected; + struct storage_engine_query_handle handle; + size_t value_errors = 0, time_errors = 0; + + update_every = REGION_UPDATE_EVERY[current_region]; + errors = 0; + + // check the result + for (c = 0; c < REGION_POINTS[current_region] ; c += QUERY_BATCH) { + time_now = time_start + (c + 1) * update_every; + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0; j < DIMS; ++j) { + rd[i][j]->tiers[0]->query_ops->init(rd[i][j]->tiers[0]->db_metric_handle, &handle, time_now, time_now + QUERY_BATCH * update_every); + for (k = 0; k < QUERY_BATCH; ++k) { + last = ((collected_number)i * DIMS) * REGION_POINTS[current_region] + + j * REGION_POINTS[current_region] + c + k; + expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS)); + + STORAGE_POINT sp = rd[i][j]->tiers[0]->query_ops->next_metric(&handle); + value = sp.sum; + time_retrieved = sp.start_time; + end_time = sp.end_time; + + same = (roundndd(value) == roundndd(expected)) ? 1 : 0; + if(!same) { + if(!value_errors) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now + k * update_every, expected, value); + value_errors++; + errors++; + } + if(end_time != time_now + k * update_every) { + if(!time_errors) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now + k * update_every, (unsigned long)time_retrieved); + time_errors++; + errors++; + } + } + rd[i][j]->tiers[0]->query_ops->finalize(&handle); + } + } + } + + if(value_errors) + fprintf(stderr, "%zu value errors encountered\n", value_errors); + + if(time_errors) + fprintf(stderr, "%zu time errors encountered\n", time_errors); + + return errors; +} + +// Check rrdr transformations +static int test_dbengine_check_rrdr(RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS], + int current_region, time_t time_start, time_t time_end) +{ + int update_every = REGION_UPDATE_EVERY[current_region]; + fprintf(stderr, "%s() running on region %d, start time %lld, end time %lld, update every %d...\n", __FUNCTION__, current_region, (long long)time_start, (long long)time_end, update_every); + uint8_t same; + time_t time_now, time_retrieved; + int i, j, errors, value_errors = 0, time_errors = 0; + long c; + collected_number last; + NETDATA_DOUBLE value, expected; + + errors = 0; + long points = (time_end - time_start) / update_every; + for (i = 0 ; i < CHARTS ; ++i) { + ONEWAYALLOC *owa = onewayalloc_create(0); + RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start, time_end, + RRDR_GROUPING_AVERAGE, 0, RRDR_OPTION_NATURAL_POINTS, + NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST); + if (!r) { + fprintf(stderr, " DB-engine unittest %s: empty RRDR on region %d ### E R R O R ###\n", rrdset_name(st[i]), current_region); + return ++errors; + } else { + assert(r->internal.qt->request.st == st[i]); + for (c = 0; c != (long)rrdr_rows(r) ; ++c) { + RRDDIM *d; + time_now = time_start + (c + 1) * update_every; + time_retrieved = r->t[c]; + + // for each dimension + rrddim_foreach_read(d, r->internal.qt->request.st) { + if(unlikely(d_dfe.counter >= r->d)) break; // d_counter is provided by the dictionary dfe + + j = (int)d_dfe.counter; + + NETDATA_DOUBLE *cn = &r->v[ c * r->d ]; + value = cn[j]; + assert(rd[i][j] == d); + + last = i * DIMS * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c; + expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS)); + + same = (roundndd(value) == roundndd(expected)) ? 1 : 0; + if(!same) { + if(value_errors < 20) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", RRDR found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, expected, value); + value_errors++; + } + if(time_retrieved != time_now) { + if(time_errors < 20) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, (unsigned long)time_retrieved); + time_errors++; + } + } + rrddim_foreach_done(d); + } + rrdr_free(owa, r); + } + onewayalloc_destroy(owa); + } + + if(value_errors) + fprintf(stderr, "%d value errors encountered\n", value_errors); + + if(time_errors) + fprintf(stderr, "%d time errors encountered\n", time_errors); + + return errors + value_errors + time_errors; +} + +int test_dbengine(void) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + int i, j, errors, value_errors = 0, time_errors = 0, update_every, current_region; + RRDHOST *host = NULL; + RRDSET *st[CHARTS]; + RRDDIM *rd[CHARTS][DIMS]; + time_t time_start[REGIONS], time_end[REGIONS]; + + error_log_limit_unlimited(); + fprintf(stderr, "\nRunning DB-engine test\n"); + + default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + + fprintf(stderr, "Initializing localhost with hostname 'unittest-dbengine'"); + host = dbengine_rrdhost_find_or_create("unittest-dbengine"); + if (NULL == host) + return 1; + + current_region = 0; // this is the first region of data + update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 2 seconds + test_dbengine_create_charts(host, st, rd, update_every); + + time_start[current_region] = 2 * API_RELATIVE_TIME_MAX; + time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); + + errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); + if (errors) + goto error_out; + + current_region = 1; //this is the second region of data + update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 3 seconds + // Align pages for frequency change + for (i = 0 ; i < CHARTS ; ++i) { + st[i]->update_every = update_every; + for (j = 0; j < DIMS; ++j) { + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle); + } + } + + time_start[current_region] = time_end[current_region - 1] + update_every; + if (0 != time_start[current_region] % update_every) // align to update_every + time_start[current_region] += update_every - time_start[current_region] % update_every; + time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); + + errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); + if (errors) + goto error_out; + + current_region = 2; //this is the third region of data + update_every = REGION_UPDATE_EVERY[current_region]; // set data collection frequency to 1 seconds + // Align pages for frequency change + for (i = 0 ; i < CHARTS ; ++i) { + st[i]->update_every = update_every; + for (j = 0; j < DIMS; ++j) { + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0]->db_collection_handle); + } + } + + time_start[current_region] = time_end[current_region - 1] + update_every; + if (0 != time_start[current_region] % update_every) // align to update_every + time_start[current_region] += update_every - time_start[current_region] % update_every; + time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); + + errors = test_dbengine_check_metrics(st, rd, current_region, time_start[current_region]); + if (errors) + goto error_out; + + for (current_region = 0 ; current_region < REGIONS ; ++current_region) { + errors = test_dbengine_check_rrdr(st, rd, current_region, time_start[current_region], time_end[current_region]); + if (errors) + goto error_out; + } + + current_region = 1; + update_every = REGION_UPDATE_EVERY[current_region]; // use the maximum update_every = 3 + errors = 0; + long points = (time_end[REGIONS - 1] - time_start[0]) / update_every; // cover all time regions with RRDR + long point_offset = (time_start[current_region] - time_start[0]) / update_every; + for (i = 0 ; i < CHARTS ; ++i) { + ONEWAYALLOC *owa = onewayalloc_create(0); + RRDR *r = rrd2rrdr_legacy(owa, st[i], points, time_start[0] + update_every, + time_end[REGIONS - 1], RRDR_GROUPING_AVERAGE, 0, + RRDR_OPTION_NATURAL_POINTS, NULL, NULL, 0, 0, QUERY_SOURCE_UNITTEST); + + if (!r) { + fprintf(stderr, " DB-engine unittest %s: empty RRDR ### E R R O R ###\n", rrdset_name(st[i])); + ++errors; + } else { + long c; + + assert(r->internal.qt->request.st == st[i]); + // test current region values only, since they must be left unchanged + for (c = point_offset ; c < (long)(point_offset + rrdr_rows(r) / REGIONS / 2) ; ++c) { + RRDDIM *d; + time_t time_now = time_start[current_region] + (c - point_offset + 2) * update_every; + time_t time_retrieved = r->t[c]; + + // for each dimension + rrddim_foreach_read(d, r->internal.qt->request.st) { + if(unlikely(d_dfe.counter >= r->d)) break; // d_counter is provided by the dictionary dfe + + j = (int)d_dfe.counter; + + NETDATA_DOUBLE *cn = &r->v[ c * r->d ]; + NETDATA_DOUBLE value = cn[j]; + assert(rd[i][j] == d); + + collected_number last = i * DIMS * REGION_POINTS[current_region] + j * REGION_POINTS[current_region] + c - point_offset + 1; + NETDATA_DOUBLE expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE)last, SN_DEFAULT_FLAGS)); + + uint8_t same = (roundndd(value) == roundndd(expected)) ? 1 : 0; + if(!same) { + if(!value_errors) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", RRDR found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, expected, value); + value_errors++; + } + if(time_retrieved != time_now) { + if(!time_errors) + fprintf(stderr, " DB-engine unittest %s/%s: at %lu secs, found RRDR timestamp %lu ### E R R O R ###\n", + rrdset_name(st[i]), rrddim_name(rd[i][j]), (unsigned long)time_now, (unsigned long)time_retrieved); + time_errors++; + } + } + rrddim_foreach_done(d); + } + rrdr_free(owa, r); + } + onewayalloc_destroy(owa); + } +error_out: + rrd_wrlock(); + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance); + rrdhost_delete_charts(host); + rrdeng_exit((struct rrdengine_instance *)host->db[0].instance); + rrd_unlock(); + + return errors + value_errors + time_errors; +} + +struct dbengine_chart_thread { + uv_thread_t thread; + RRDHOST *host; + char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */ + unsigned dset_charts; /* number of charts */ + unsigned dset_dims; /* dimensions per chart */ + unsigned chart_i; /* current chart offset */ + time_t time_present; /* current virtual time of the benchmark */ + volatile time_t time_max; /* latest timestamp of stored values */ + unsigned history_seconds; /* how far back in the past to go */ + + volatile long done; /* initialize to 0, set to 1 to stop thread */ + struct completion charts_initialized; + unsigned long errors, stored_metrics_nr; /* statistics */ + + RRDSET *st; + RRDDIM *rd[]; /* dset_dims elements */ +}; + +collected_number generate_dbengine_chart_value(int chart_i, int dim_i, time_t time_current) +{ + collected_number value; + + value = ((collected_number)time_current) * (chart_i + 1); + value += ((collected_number)time_current) * (dim_i + 1); + value %= 1024LLU; + + return value; +} + +static void generate_dbengine_chart(void *arg) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + struct dbengine_chart_thread *thread_info = (struct dbengine_chart_thread *)arg; + RRDHOST *host = thread_info->host; + char *chartname = thread_info->chartname; + const unsigned DSET_DIMS = thread_info->dset_dims; + unsigned history_seconds = thread_info->history_seconds; + time_t time_present = thread_info->time_present; + + unsigned j, update_every = 1; + RRDSET *st; + RRDDIM *rd[DSET_DIMS]; + char name[RRD_ID_LENGTH_MAX + 1]; + time_t time_current; + + // create the chart + snprintfz(name, RRD_ID_LENGTH_MAX, "example_local%u", thread_info->chart_i + 1); + thread_info->st = st = rrdset_create(host, name, chartname, chartname, "example", NULL, chartname, chartname, + chartname, NULL, 1, update_every, RRDSET_TYPE_LINE); + for (j = 0 ; j < DSET_DIMS ; ++j) { + snprintfz(name, RRD_ID_LENGTH_MAX, "%s%u", chartname, j + 1); + + thread_info->rd[j] = rd[j] = rrddim_add(st, name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + completion_mark_complete(&thread_info->charts_initialized); + + // feed it with the test data + time_current = time_present - history_seconds; + for (j = 0 ; j < DSET_DIMS ; ++j) { + rd[j]->last_collected_time.tv_sec = + st->last_collected_time.tv_sec = st->last_updated.tv_sec = time_current - update_every; + rd[j]->last_collected_time.tv_usec = + st->last_collected_time.tv_usec = st->last_updated.tv_usec = 0; + } + for( ; !thread_info->done && time_current < time_present ; time_current += update_every) { + st->usec_since_last_update = USEC_PER_SEC * update_every; + + for (j = 0; j < DSET_DIMS; ++j) { + collected_number value; + + value = generate_dbengine_chart_value(thread_info->chart_i, j, time_current); + rrddim_set_by_pointer_fake_time(rd[j], value, time_current); + ++thread_info->stored_metrics_nr; + } + rrdset_done(st); + thread_info->time_max = time_current; + } + for (j = 0; j < DSET_DIMS; ++j) { + rrdeng_store_metric_finalize((rd[j])->tiers[0]->db_collection_handle); + } +} + +void generate_dbengine_dataset(unsigned history_seconds) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + const int DSET_CHARTS = 16; + const int DSET_DIMS = 128; + const uint64_t EXPECTED_COMPRESSION_RATIO = 20; + RRDHOST *host = NULL; + struct dbengine_chart_thread **thread_info; + int i; + time_t time_present; + + default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + default_rrdeng_page_cache_mb = 128; + // Worst case for uncompressible data + default_rrdeng_disk_quota_mb = (((uint64_t)DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * history_seconds) / + (1024 * 1024); + default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; + + error_log_limit_unlimited(); + fprintf(stderr, "Initializing localhost with hostname 'dbengine-dataset'"); + + host = dbengine_rrdhost_find_or_create("dbengine-dataset"); + if (NULL == host) + return; + + thread_info = mallocz(sizeof(*thread_info) * DSET_CHARTS); + for (i = 0 ; i < DSET_CHARTS ; ++i) { + thread_info[i] = mallocz(sizeof(*thread_info[i]) + sizeof(RRDDIM *) * DSET_DIMS); + } + fprintf(stderr, "\nRunning DB-engine workload generator\n"); + + time_present = now_realtime_sec(); + for (i = 0 ; i < DSET_CHARTS ; ++i) { + thread_info[i]->host = host; + thread_info[i]->chartname = "random"; + thread_info[i]->dset_charts = DSET_CHARTS; + thread_info[i]->chart_i = i; + thread_info[i]->dset_dims = DSET_DIMS; + thread_info[i]->history_seconds = history_seconds; + thread_info[i]->time_present = time_present; + thread_info[i]->time_max = 0; + thread_info[i]->done = 0; + completion_init(&thread_info[i]->charts_initialized); + assert(0 == uv_thread_create(&thread_info[i]->thread, generate_dbengine_chart, thread_info[i])); + completion_wait_for(&thread_info[i]->charts_initialized); + completion_destroy(&thread_info[i]->charts_initialized); + } + for (i = 0 ; i < DSET_CHARTS ; ++i) { + assert(0 == uv_thread_join(&thread_info[i]->thread)); + } + + for (i = 0 ; i < DSET_CHARTS ; ++i) { + freez(thread_info[i]); + } + freez(thread_info); + rrd_wrlock(); + rrdhost_free(host, 1); + rrd_unlock(); +} + +struct dbengine_query_thread { + uv_thread_t thread; + RRDHOST *host; + char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */ + unsigned dset_charts; /* number of charts */ + unsigned dset_dims; /* dimensions per chart */ + time_t time_present; /* current virtual time of the benchmark */ + unsigned history_seconds; /* how far back in the past to go */ + volatile long done; /* initialize to 0, set to 1 to stop thread */ + unsigned long errors, queries_nr, queried_metrics_nr; /* statistics */ + uint8_t delete_old_data; /* if non zero then data are deleted when disk space is exhausted */ + + struct dbengine_chart_thread *chart_threads[]; /* dset_charts elements */ +}; + +static void query_dbengine_chart(void *arg) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + struct dbengine_query_thread *thread_info = (struct dbengine_query_thread *)arg; + const int DSET_CHARTS = thread_info->dset_charts; + const int DSET_DIMS = thread_info->dset_dims; + time_t time_after, time_before, time_min, time_approx_min, time_max, duration; + int i, j, update_every = 1; + RRDSET *st; + RRDDIM *rd; + uint8_t same; + time_t time_now, time_retrieved, end_time; + collected_number generatedv; + NETDATA_DOUBLE value, expected; + struct storage_engine_query_handle handle; + size_t value_errors = 0, time_errors = 0; + + do { + // pick a chart and dimension + i = random() % DSET_CHARTS; + st = thread_info->chart_threads[i]->st; + j = random() % DSET_DIMS; + rd = thread_info->chart_threads[i]->rd[j]; + + time_min = thread_info->time_present - thread_info->history_seconds + 1; + time_max = thread_info->chart_threads[i]->time_max; + + if (thread_info->delete_old_data) { + /* A time window of twice the disk space is sufficient for compression space savings of up to 50% */ + time_approx_min = time_max - (default_rrdeng_disk_quota_mb * 2 * 1024 * 1024) / + (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number)); + time_min = MAX(time_min, time_approx_min); + } + if (!time_max) { + time_before = time_after = time_min; + } else { + time_after = time_min + random() % (MAX(time_max - time_min, 1)); + duration = random() % 3600; + time_before = MIN(time_after + duration, time_max); /* up to 1 hour queries */ + } + + rd->tiers[0]->query_ops->init(rd->tiers[0]->db_metric_handle, &handle, time_after, time_before); + ++thread_info->queries_nr; + for (time_now = time_after ; time_now <= time_before ; time_now += update_every) { + generatedv = generate_dbengine_chart_value(i, j, time_now); + expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE) generatedv, SN_DEFAULT_FLAGS)); + + if (unlikely(rd->tiers[0]->query_ops->is_finished(&handle))) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found data gap, ### E R R O R ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected); + ++thread_info->errors; + } + break; + } + + STORAGE_POINT sp = rd->tiers[0]->query_ops->next_metric(&handle); + value = sp.sum; + time_retrieved = sp.start_time; + end_time = sp.end_time; + + if (!netdata_double_isnumber(value)) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found data gap, ### E R R O R ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected); + ++thread_info->errors; + } + break; + } + ++thread_info->queried_metrics_nr; + + same = (roundndd(value) == roundndd(expected)) ? 1 : 0; + if (!same) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + if(!value_errors) + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", ### E R R O R ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected, value); + value_errors++; + thread_info->errors++; + } + } + if (end_time != time_now) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + if(!time_errors) + fprintf(stderr, + " DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### E R R O R ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, (unsigned long) time_retrieved); + time_errors++; + thread_info->errors++; + } + } + } + rd->tiers[0]->query_ops->finalize(&handle); + } while(!thread_info->done); + + if(value_errors) + fprintf(stderr, "%zu value errors encountered\n", value_errors); + + if(time_errors) + fprintf(stderr, "%zu time errors encountered\n", time_errors); +} + +void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, + unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + const unsigned DSET_DIMS = 128; + const uint64_t EXPECTED_COMPRESSION_RATIO = 20; + const unsigned HISTORY_SECONDS = 3600 * 24 * 365 * 50; /* 50 year of history */ + RRDHOST *host = NULL; + struct dbengine_chart_thread **chart_threads; + struct dbengine_query_thread **query_threads; + unsigned i, j; + time_t time_start, test_duration; + + error_log_limit_unlimited(); + + if (!TEST_DURATION_SEC) + TEST_DURATION_SEC = 10; + if (!DSET_CHARTS) + DSET_CHARTS = 1; + if (!QUERY_THREADS) + QUERY_THREADS = 1; + if (PAGE_CACHE_MB < RRDENG_MIN_PAGE_CACHE_SIZE_MB) + PAGE_CACHE_MB = RRDENG_MIN_PAGE_CACHE_SIZE_MB; + + default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + default_rrdeng_page_cache_mb = PAGE_CACHE_MB; + if (DISK_SPACE_MB) { + fprintf(stderr, "By setting disk space limit data are allowed to be deleted. " + "Data validation is turned off for this run.\n"); + default_rrdeng_disk_quota_mb = DISK_SPACE_MB; + } else { + // Worst case for uncompressible data + default_rrdeng_disk_quota_mb = + (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * HISTORY_SECONDS) / (1024 * 1024); + default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; + } + + fprintf(stderr, "Initializing localhost with hostname 'dbengine-stress-test'\n"); + + (void) sql_init_database(DB_CHECK_NONE, 1); + host = dbengine_rrdhost_find_or_create("dbengine-stress-test"); + if (NULL == host) + return; + + chart_threads = mallocz(sizeof(*chart_threads) * DSET_CHARTS); + for (i = 0 ; i < DSET_CHARTS ; ++i) { + chart_threads[i] = mallocz(sizeof(*chart_threads[i]) + sizeof(RRDDIM *) * DSET_DIMS); + } + query_threads = mallocz(sizeof(*query_threads) * QUERY_THREADS); + for (i = 0 ; i < QUERY_THREADS ; ++i) { + query_threads[i] = mallocz(sizeof(*query_threads[i]) + sizeof(struct dbengine_chart_thread *) * DSET_CHARTS); + } + fprintf(stderr, "\nRunning DB-engine stress test, %u seconds writers ramp-up time,\n" + "%u seconds of concurrent readers and writers, %u writer threads, %u reader threads,\n" + "%u MiB of page cache.\n", + RAMP_UP_SECONDS, TEST_DURATION_SEC, DSET_CHARTS, QUERY_THREADS, PAGE_CACHE_MB); + + time_start = now_realtime_sec() + HISTORY_SECONDS; /* move history to the future */ + for (i = 0 ; i < DSET_CHARTS ; ++i) { + chart_threads[i]->host = host; + chart_threads[i]->chartname = "random"; + chart_threads[i]->dset_charts = DSET_CHARTS; + chart_threads[i]->chart_i = i; + chart_threads[i]->dset_dims = DSET_DIMS; + chart_threads[i]->history_seconds = HISTORY_SECONDS; + chart_threads[i]->time_present = time_start; + chart_threads[i]->time_max = 0; + chart_threads[i]->done = 0; + chart_threads[i]->errors = chart_threads[i]->stored_metrics_nr = 0; + completion_init(&chart_threads[i]->charts_initialized); + assert(0 == uv_thread_create(&chart_threads[i]->thread, generate_dbengine_chart, chart_threads[i])); + } + /* barrier so that subsequent queries can access valid chart data */ + for (i = 0 ; i < DSET_CHARTS ; ++i) { + completion_wait_for(&chart_threads[i]->charts_initialized); + completion_destroy(&chart_threads[i]->charts_initialized); + } + sleep(RAMP_UP_SECONDS); + /* at this point data have already began being written to the database */ + for (i = 0 ; i < QUERY_THREADS ; ++i) { + query_threads[i]->host = host; + query_threads[i]->chartname = "random"; + query_threads[i]->dset_charts = DSET_CHARTS; + query_threads[i]->dset_dims = DSET_DIMS; + query_threads[i]->history_seconds = HISTORY_SECONDS; + query_threads[i]->time_present = time_start; + query_threads[i]->done = 0; + query_threads[i]->errors = query_threads[i]->queries_nr = query_threads[i]->queried_metrics_nr = 0; + for (j = 0 ; j < DSET_CHARTS ; ++j) { + query_threads[i]->chart_threads[j] = chart_threads[j]; + } + query_threads[i]->delete_old_data = DISK_SPACE_MB ? 1 : 0; + assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i])); + } + sleep(TEST_DURATION_SEC); + /* stop workload */ + for (i = 0 ; i < DSET_CHARTS ; ++i) { + chart_threads[i]->done = 1; + } + for (i = 0 ; i < QUERY_THREADS ; ++i) { + query_threads[i]->done = 1; + } + for (i = 0 ; i < DSET_CHARTS ; ++i) { + assert(0 == uv_thread_join(&chart_threads[i]->thread)); + } + for (i = 0 ; i < QUERY_THREADS ; ++i) { + assert(0 == uv_thread_join(&query_threads[i]->thread)); + } + test_duration = now_realtime_sec() - (time_start - HISTORY_SECONDS); + if (!test_duration) + test_duration = 1; + fprintf(stderr, "\nDB-engine stress test finished in %lld seconds.\n", (long long)test_duration); + unsigned long stored_metrics_nr = 0; + for (i = 0 ; i < DSET_CHARTS ; ++i) { + stored_metrics_nr += chart_threads[i]->stored_metrics_nr; + } + unsigned long queried_metrics_nr = 0; + for (i = 0 ; i < QUERY_THREADS ; ++i) { + queried_metrics_nr += query_threads[i]->queried_metrics_nr; + } + fprintf(stderr, "%u metrics were stored (dataset size of %lu MiB) in %u charts by 1 writer thread per chart.\n", + DSET_CHARTS * DSET_DIMS, stored_metrics_nr * sizeof(storage_number) / (1024 * 1024), DSET_CHARTS); + fprintf(stderr, "Metrics were being generated per 1 emulated second and time was accelerated.\n"); + fprintf(stderr, "%lu metric data points were queried by %u reader threads.\n", queried_metrics_nr, QUERY_THREADS); + fprintf(stderr, "Query starting time is randomly chosen from the beginning of the time-series up to the time of\n" + "the latest data point, and ending time from 1 second up to 1 hour after the starting time.\n"); + fprintf(stderr, "Performance is %lld written data points/sec and %lld read data points/sec.\n", + (long long)(stored_metrics_nr / test_duration), (long long)(queried_metrics_nr / test_duration)); + + for (i = 0 ; i < DSET_CHARTS ; ++i) { + freez(chart_threads[i]); + } + freez(chart_threads); + for (i = 0 ; i < QUERY_THREADS ; ++i) { + freez(query_threads[i]); + } + freez(query_threads); + rrd_wrlock(); + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].instance); + rrdhost_delete_charts(host); + rrdeng_exit((struct rrdengine_instance *)host->db[0].instance); + rrd_unlock(); +} + +#endif diff --git a/daemon/unit_test.h b/daemon/unit_test.h new file mode 100644 index 0000000..f79bd5c --- /dev/null +++ b/daemon/unit_test.h @@ -0,0 +1,26 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_UNIT_TEST_H +#define NETDATA_UNIT_TEST_H 1 + +#include "stdbool.h" + +int unit_test_storage(void); +int unit_test(long delay, long shift); +int run_all_mockup_tests(void); +int unit_test_str2ld(void); +int unit_test_buffer(void); +int unit_test_static_threads(void); +int test_sqlite(void); +int unit_test_bitmap256(void); +#ifdef ENABLE_DBENGINE +int test_dbengine(void); +void generate_dbengine_dataset(unsigned history_seconds); +void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, + unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB); + +#endif + +bool command_argument_sanitization_tests(); + +#endif /* NETDATA_UNIT_TEST_H */ |