diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:03 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-05 12:08:18 +0000 |
commit | 5da14042f70711ea5cf66e034699730335462f66 (patch) | |
tree | 0f6354ccac934ed87a2d555f45be4c831cf92f4a /src/daemon | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-5da14042f70711ea5cf66e034699730335462f66.tar.xz netdata-5da14042f70711ea5cf66e034699730335462f66.zip |
Merging upstream version 1.45.3+dfsg.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/daemon/README.md | 438 | ||||
-rw-r--r-- | src/daemon/analytics.c (renamed from daemon/analytics.c) | 36 | ||||
-rw-r--r-- | src/daemon/analytics.h (renamed from daemon/analytics.h) | 9 | ||||
-rwxr-xr-x | src/daemon/anonymous-statistics.sh.in (renamed from daemon/anonymous-statistics.sh.in) | 0 | ||||
-rw-r--r-- | src/daemon/buildinfo.c (renamed from daemon/buildinfo.c) | 121 | ||||
-rw-r--r-- | src/daemon/buildinfo.h (renamed from daemon/buildinfo.h) | 0 | ||||
-rw-r--r-- | src/daemon/commands.c (renamed from daemon/commands.c) | 26 | ||||
-rw-r--r-- | src/daemon/commands.h (renamed from daemon/commands.h) | 1 | ||||
-rw-r--r-- | src/daemon/common.c (renamed from daemon/common.c) | 0 | ||||
-rw-r--r-- | src/daemon/common.h (renamed from daemon/common.h) | 7 | ||||
-rw-r--r-- | src/daemon/config/README.md | 231 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-echo.c | 175 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-files.c | 241 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-inline.c | 66 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-intercept.c | 419 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-internals.h | 145 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-tree.c | 284 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-unittest.c | 801 | ||||
-rw-r--r-- | src/daemon/config/dyncfg.c | 454 | ||||
-rw-r--r-- | src/daemon/config/dyncfg.h | 34 | ||||
-rw-r--r-- | src/daemon/daemon.c (renamed from daemon/daemon.c) | 14 | ||||
-rw-r--r-- | src/daemon/daemon.h | 16 | ||||
-rw-r--r-- | src/daemon/event_loop.c (renamed from daemon/event_loop.c) | 0 | ||||
-rw-r--r-- | src/daemon/event_loop.h (renamed from daemon/event_loop.h) | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | src/daemon/get-kubernetes-labels.sh.in (renamed from daemon/get-kubernetes-labels.sh.in) | 0 | ||||
-rw-r--r-- | src/daemon/global_statistics.c (renamed from daemon/global_statistics.c) | 73 | ||||
-rw-r--r-- | src/daemon/global_statistics.h (renamed from daemon/global_statistics.h) | 0 | ||||
-rw-r--r-- | src/daemon/main.c (renamed from daemon/main.c) | 421 | ||||
-rw-r--r-- | src/daemon/main.h | 44 | ||||
-rw-r--r-- | src/daemon/metrics.csv (renamed from daemon/metrics.csv) | 0 | ||||
-rw-r--r-- | src/daemon/pipename.c (renamed from daemon/pipename.c) | 0 | ||||
-rw-r--r-- | src/daemon/pipename.h (renamed from daemon/pipename.h) | 0 | ||||
-rw-r--r-- | src/daemon/sentry-native/sentry-native.c | 50 | ||||
-rw-r--r-- | src/daemon/sentry-native/sentry-native.h | 9 | ||||
-rw-r--r-- | src/daemon/service.c (renamed from daemon/service.c) | 82 | ||||
-rw-r--r-- | src/daemon/signals.c (renamed from daemon/signals.c) | 11 | ||||
-rw-r--r-- | src/daemon/signals.h (renamed from daemon/signals.h) | 0 | ||||
-rw-r--r-- | src/daemon/static_threads.c (renamed from daemon/static_threads.c) | 11 | ||||
-rw-r--r-- | src/daemon/static_threads.h (renamed from daemon/static_threads.h) | 0 | ||||
-rw-r--r-- | src/daemon/static_threads_freebsd.c (renamed from daemon/static_threads_freebsd.c) | 0 | ||||
-rw-r--r-- | src/daemon/static_threads_linux.c (renamed from daemon/static_threads_linux.c) | 0 | ||||
-rw-r--r-- | src/daemon/static_threads_macos.c (renamed from daemon/static_threads_macos.c) | 0 | ||||
-rwxr-xr-x | src/daemon/system-info.sh (renamed from daemon/system-info.sh) | 0 | ||||
-rw-r--r-- | src/daemon/unit_test.c | 1806 | ||||
-rw-r--r-- | src/daemon/unit_test.h (renamed from daemon/unit_test.h) | 0 | ||||
-rw-r--r-- | src/daemon/watcher.c | 178 | ||||
-rw-r--r-- | src/daemon/watcher.h | 54 |
47 files changed, 5799 insertions, 458 deletions
diff --git a/src/daemon/README.md b/src/daemon/README.md new file mode 100644 index 000000000..d805b7743 --- /dev/null +++ b/src/daemon/README.md @@ -0,0 +1,438 @@ +# Netdata daemon + +The Netdata daemon is practically a synonym for the Netdata Agent, as it controls its +entire operation. We support various methods to +[start, stop, or restart the daemon](https://github.com/netdata/netdata/blob/master/packaging/installer/README.md#maintaining-a-netdata-agent-installation). + +This document provides some basic information on the command line options, log files, and how to debug and troubleshoot + +## Command line options + +Normally you don't need to supply any command line arguments to netdata. + +If you do though, they override the configuration equivalent options. + +To get a list of all command line parameters supported, run: + +```sh +netdata -h +``` + +The program will print the supported command line parameters. + +The command line options of the Netdata 1.10.0 version are the following: + +```sh + ^ + |.-. .-. .-. .-. . netdata + | '-' '-' '-' '-' real-time performance monitoring, done right! + +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---> + + Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud> + Released under GNU General Public License v3 or later. + All rights reserved. + + Home Page : https://netdata.cloud + Source Code: https://github.com/netdata/netdata + Docs : https://learn.netdata.cloud + Support : https://github.com/netdata/netdata/issues + License : https://github.com/netdata/netdata/blob/master/LICENSE.md + + Twitter : https://twitter.com/netdatahq + LinkedIn : https://linkedin.com/company/netdata-cloud/ + Facebook : https://facebook.com/linuxnetdata/ + + + SYNOPSIS: netdata [options] + + Options: + + -c filename Configuration file to load. + Default: /etc/netdata/netdata.conf + + -D Do not fork. Run in the foreground. + Default: run in the background + + -h Display this help message. + + -P filename File to save a pid while running. + Default: do not save pid to a file + + -i IP The IP address to listen to. + Default: all IP addresses IPv4 and IPv6 + + -p port API/Web port to use. + Default: 19999 + + -s path Prefix for /proc and /sys (for containers). + Default: no prefix + + -t seconds The internal clock of netdata. + Default: 1 + + -u username Run as user. + Default: netdata + + -v Print netdata version and exit. + + -V Print netdata version and exit. + + -W options See Advanced options below. + + + Advanced options: + + -W stacksize=N Set the stacksize (in bytes). + + -W debug_flags=N Set runtime tracing to debug.log. + + -W unittest Run internal unittests and exit. + + -W createdataset=N Create a DB engine dataset of N seconds and exit. + + -W set section option value + set netdata.conf option from the command line. + + -W buildinfo Print the version, the configure options, + a list of optional features, and whether they + are enabled or not. + + -W buildinfojson Print the version, the configure options, + a list of optional features, and whether they + are enabled or not, in JSON format. + + -W simple-pattern pattern string + Check if string matches pattern and exit. + + -W "claim -token=TOKEN -rooms=ROOM1,ROOM2 url=https://app.netdata.cloud" + Connect the agent to the workspace rooms pointed to by TOKEN and ROOM*. + + Signals netdata handles: + + - HUP Close and reopen log files. + - USR2 Reload health configuration. +``` + +You can send commands during runtime via [netdatacli](https://github.com/netdata/netdata/blob/master/src/cli/README.md). + +## Log files + +Netdata uses 4 log files: + +1. `error.log` +2. `collector.log` +3. `access.log` +4. `debug.log` + +Any of them can be disabled by setting it to `/dev/null` or `none` in `netdata.conf`. By default `error.log`, +`collector.log`, and `access.log` are enabled. `debug.log` is only enabled if debugging/tracing is also enabled +(Netdata needs to be compiled with debugging enabled). + +Log files are stored in `/var/log/netdata/` by default. + +### error.log + +The `error.log` is the `stderr` of the `netdata` daemon . + +For most Netdata programs (including standard external plugins shipped by netdata), the following lines may appear: + +| tag | description | +|:-:|:----------| +| `INFO` | Something important the user should know. | +| `ERROR` | Something that might disable a part of netdata.<br/>The log line includes `errno` (if it is not zero). | +| `FATAL` | Something prevented a program from running.<br/>The log line includes `errno` (if it is not zero) and the program exited. | + +The `FATAL` and `ERROR` messages will always appear in the logs, and `INFO`can be filtered using [severity level](https://github.com/netdata/netdata/blob/master/src/daemon/config/README.md#logs-section-options) option. + +So, when auto-detection of data collection fail, `ERROR` lines are logged and the relevant modules are disabled, but the +program continues to run. + +When a Netdata program cannot run at all, a `FATAL` line is logged. + +### collector.log + +The `collector.log` is the `stderr` of all [collectors](https://github.com/netdata/netdata/blob/master/src/collectors/COLLECTORS.md) + run by `netdata`. + +So if any process, in the Netdata process tree, writes anything to its standard error, +it will appear in `collector.log`. + +Data stored inside this file follows pattern already described for `error.log`. + +### access.log + +The `access.log` logs web requests. The format is: + +```txt +DATE: ID: (sent/all = SENT_BYTES/ALL_BYTES bytes PERCENT_COMPRESSION%, prep/sent/total PREP_TIME/SENT_TIME/TOTAL_TIME ms): ACTION CODE URL +``` + +where: + +- `ID` is the client ID. Client IDs are auto-incremented every time a client connects to netdata. +- `SENT_BYTES` is the number of bytes sent to the client, without the HTTP response header. +- `ALL_BYTES` is the number of bytes of the response, before compression. +- `PERCENT_COMPRESSION` is the percentage of traffic saved due to compression. +- `PREP_TIME` is the time in milliseconds needed to prepared the response. +- `SENT_TIME` is the time in milliseconds needed to sent the response to the client. +- `TOTAL_TIME` is the total time the request was inside Netdata (from the first byte of the request to the last byte + of the response). +- `ACTION` can be `filecopy`, `options` (used in CORS), `data` (API call). + +### debug.log + +See [debugging](#debugging). + +## Netdata process scheduling policy + +By default Netdata versions prior to 1.34.0 run with the `idle` process scheduling policy, so that it uses CPU +resources, only when there is idle CPU to spare. On very busy servers (or weak servers), this can lead to gaps on +the charts. + +Starting with version 1.34.0, Netdata instead uses the `batch` scheduling policy by default. This largely eliminates +issues with gaps in charts on busy systems while still keeping the impact on the rest of the system low. + +You can set Netdata scheduling policy in `netdata.conf`, like this: + +```conf +[global] + process scheduling policy = idle +``` + +You can use the following: + +| policy | description | +| :-----------------------: | :---------- | +| `idle` | use CPU only when there is spare - this is lower than nice 19 - it is the default for Netdata and it is so low that Netdata will run in "slow motion" under extreme system load, resulting in short (1-2 seconds) gaps at the charts. | +| `other`<br/>or<br/>`nice` | this is the default policy for all processes under Linux. It provides dynamic priorities based on the `nice` level of each process. Check below for setting this `nice` level for netdata. | +| `batch` | This policy is similar to `other` in that it schedules the thread according to its dynamic priority (based on the `nice` value). The difference is that this policy will cause the scheduler to always assume that the thread is CPU-intensive. Consequently, the scheduler will apply a small scheduling penalty with respect to wake-up behavior, so that this thread is mildly disfavored in scheduling decisions. | +| `fifo` | `fifo` can be used only with static priorities higher than 0, which means that when a `fifo` threads becomes runnable, it will always immediately preempt any currently running `other`, `batch`, or `idle` thread. `fifo` is a simple scheduling algorithm without time slicing. | +| `rr` | a simple enhancement of `fifo`. Everything described above for `fifo` also applies to `rr`, except that each thread is allowed to run only for a maximum time quantum. | +| `keep`<br/>or<br/>`none` | do not set scheduling policy, priority or nice level - i.e. keep running with whatever it is set already (e.g. by systemd). | + +For more information see `man sched`. + +### Scheduling priority for `rr` and `fifo` + +Once the policy is set to one of `rr` or `fifo`, the following will appear: + +```conf +[global] + process scheduling priority = 0 +``` + +These priorities are usually from 0 to 99. Higher numbers make the process more +important. + +### nice level for policies `other` or `batch` + +When the policy is set to `other`, `nice`, or `batch`, the following will appear: + +```conf +[global] + process nice level = 19 +``` + +## Scheduling settings and systemd + +Netdata will not be able to set its scheduling policy and priority to more important values when it is started as the +`netdata` user (systemd case). + +You can set these settings at `/etc/systemd/system/netdata.service`: + +```sh +[Service] +# By default Netdata switches to scheduling policy idle, which makes it use CPU, only +# when there is spare available. +# Valid policies: other (the system default) | batch | idle | fifo | rr +#CPUSchedulingPolicy=other + +# This sets the maximum scheduling priority Netdata can set (for policies: rr and fifo). +# Netdata (via [global].process scheduling priority in netdata.conf) can only lower this value. +# Priority gets values 1 (lowest) to 99 (highest). +#CPUSchedulingPriority=1 + +# For scheduling policy 'other' and 'batch', this sets the lowest niceness of netdata. +# Netdata (via [global].process nice level in netdata.conf) can only increase the value set here. +#Nice=0 +``` + +Run `systemctl daemon-reload` to reload these changes. + +Now, tell Netdata to keep these settings, as set by systemd, by editing +`netdata.conf` and setting: + +```conf +[global] + process scheduling policy = keep +``` + +Using the above, whatever scheduling settings you have set at `netdata.service` +will be maintained by netdata. + +### Example 1: Netdata with nice -1 on non-systemd systems + +On a system that is not based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for +all programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = other + process nice level = -1 +``` + +then execute this to [restart Netdata](https://github.com/netdata/netdata/blob/master/packaging/installer/README.md#maintaining-a-netdata-agent-installation): + +```sh +sudo systemctl restart netdata +``` + +#### Example 2: Netdata with nice -1 on systemd systems + +On a system that is based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for all +programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = keep +``` + +edit /etc/systemd/system/netdata.service and set: + +```sh +[Service] +CPUSchedulingPolicy=other +Nice=-1 +``` + +then execute: + +```sh +sudo systemctl daemon-reload +sudo systemctl restart netdata +``` + +## Virtual memory + +You may notice that netdata's virtual memory size, as reported by `ps` or `/proc/pid/status` (or even netdata's +applications virtual memory chart) is unrealistically high. + +For example, it may be reported to be 150+MB, even if the resident memory size is just 25MB. Similar values may be +reported for Netdata plugins too. + +Check this for example: A Netdata installation with default settings on Ubuntu +16.04LTS. The top chart is **real memory used**, while the bottom one is +**virtual memory**: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013772/5eb7173e-87e3-11e6-8f2b-a2ccfeb06faf.png) + +### Why does this happen? + +The system memory allocator allocates virtual memory arenas, per thread running. On Linux systems this defaults to 16MB +per thread on 64 bit machines. So, if you get the difference between real and virtual memory and divide it by 16MB you +will roughly get the number of threads running. + +The system does this for speed. Having a separate memory arena for each thread, allows the threads to run in parallel in +multi-core systems, without any locks between them. + +This behaviour is system specific. For example, the chart above when running +Netdata on Alpine Linux (that uses **musl** instead of **glibc**) is this: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013807/7cf5878e-87e4-11e6-9651-082e68701eab.png) + +### Can we do anything to lower it? + +Since Netdata already uses minimal memory allocations while it runs (i.e. it adapts its memory on start, so that while +repeatedly collects data it does not do memory allocations), it already instructs the system memory allocator to +minimize the memory arenas for each thread. We have also added [2 configuration +options](https://github.com/netdata/netdata/blob/5645b1ee35248d94e6931b64a8688f7f0d865ec6/src/main.c#L410-L418) to allow +you tweak these settings: `glibc malloc arena max for plugins` and `glibc malloc arena max for netdata`. + +However, even if we instructed the memory allocator to use just one arena, it +seems it allocates an arena per thread. + +Netdata also supports `jemalloc` and `tcmalloc`, however both behave exactly the +same to the glibc memory allocator in this aspect. + +### Is this a problem? + +No, it is not. + +Linux reserves real memory (physical RAM) in pages (on x86 machines pages are 4KB each). So even if the system memory +allocator is allocating huge amounts of virtual memory, only the 4KB pages that are actually used are reserving physical +RAM. The **real memory** chart on Netdata application section, shows the amount of physical memory these pages occupy(it +accounts the whole pages, even if parts of them are actually used). + +## Debugging + +When you compile Netdata with debugging: + +1. compiler optimizations for your CPU are disabled (Netdata will run somewhat slower) + +2. a lot of code is added all over netdata, to log debug messages to `/var/log/netdata/debug.log`. However, nothing is + printed by default. Netdata allows you to select which sections of Netdata you want to trace. Tracing is activated + via the config option `debug flags`. It accepts a hex number, to enable or disable specific sections. You can find + the options supported at [log.h](https://raw.githubusercontent.com/netdata/netdata/master/src/libnetdata/log/log.h). + They are the `D_*` defines. The value `0xffffffffffffffff` will enable all possible debug flags. + +Once Netdata is compiled with debugging and tracing is enabled for a few sections, the file `/var/log/netdata/debug.log` +will contain the messages. + +> Do not forget to disable tracing (`debug flags = 0`) when you are done tracing. The file `debug.log` can grow too +> fast. + +### Compiling Netdata with debugging + +To compile Netdata with debugging, use this: + +```sh +# step into the Netdata source directory +cd /usr/src/netdata.git + +# run the installer with debugging enabled +CFLAGS="-O1 -ggdb -DNETDATA_INTERNAL_CHECKS=1" ./netdata-installer.sh +``` + +The above will compile and install Netdata with debugging info embedded. You can now use `debug flags` to set the +section(s) you need to trace. + +### Debugging crashes + +We have made the most to make Netdata crash free. If however, Netdata crashes on your system, it would be very helpful +to provide stack traces of the crash. Without them, is will be almost impossible to find the issue (the code base is +quite large to find such an issue by just observing it). + +To provide stack traces, **you need to have Netdata compiled with debugging**. There is no need to enable any tracing +(`debug flags`). + +Then you need to be in one of the following 2 cases: + +1. Netdata crashes and you have a core dump + +2. you can reproduce the crash + +If you are not on these cases, you need to find a way to be (i.e. if your system does not produce core dumps, check your +distro documentation to enable them). + +### Netdata crashes and you have a core dump + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Run the following command and post the output on a github issue. + +```sh +gdb $(which netdata) /path/to/core/dump +``` + +### You can reproduce a Netdata crash on your system + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Install the package `valgrind` and run: + +```sh +valgrind $(which netdata) -D +``` + +Netdata will start and it will be a lot slower. Now reproduce the crash and `valgrind` will dump on your console the +stack trace. Open a new github issue and post the output. diff --git a/daemon/analytics.c b/src/daemon/analytics.c index 353ebd136..015b8b2e8 100644 --- a/daemon/analytics.c +++ b/src/daemon/analytics.c @@ -7,7 +7,6 @@ struct analytics_data analytics_data; extern void analytics_exporting_connectors (BUFFER *b); extern void analytics_exporting_connectors_ssl (BUFFER *b); extern void analytics_build_info (BUFFER *b); -extern int aclk_connected; struct collector { const char *plugin; @@ -471,7 +470,7 @@ void analytics_alarms(void) */ void analytics_misc(void) { - analytics_data.spinlock.locked = false; + spinlock_init(&analytics_data.spinlock); #ifdef ENABLE_ACLK analytics_set_data(&analytics_data.netdata_host_cloud_available, "true"); @@ -489,7 +488,7 @@ void analytics_misc(void) if (strcmp( config_get(CONFIG_SECTION_REGISTRY, "registry to announce", "https://registry.my-netdata.io"), - "https://registry.my-netdata.io")) + "https://registry.my-netdata.io") != 0) analytics_set_data(&analytics_data.netdata_config_use_private_registry, "true"); //do we need both registry to announce and enabled to indicate that this is a private registry ? @@ -601,7 +600,9 @@ void *analytics_main(void *ptr) analytics_gather_immutable_meta_data(); analytics_gather_mutable_meta_data(); - send_statistics("META_START", "-", "-"); + + analytics_statistic_t statistic = { "META_START", "-", "-" }; + analytics_statistic_send(&statistic); analytics_log_data(); sec = 0; @@ -616,8 +617,11 @@ void *analytics_main(void *ptr) continue; analytics_gather_mutable_meta_data(); - send_statistics("META", "-", "-"); + + analytics_statistic_t stt = { "META", "-", "-" }; + analytics_statistic_send(&stt); analytics_log_data(); + sec = 0; } @@ -720,7 +724,7 @@ void get_system_timezone(void) } // use the contents of /etc/timezone - if (!timezone && !read_file("/etc/timezone", buffer, FILENAME_MAX)) { + if (!timezone && !read_txt_file("/etc/timezone", buffer, sizeof(buffer))) { timezone = buffer; netdata_log_info("TIMEZONE: using the contents of /etc/timezone"); } @@ -943,7 +947,10 @@ void set_global_environment() { setenv("LC_ALL", "C", 1); } -void send_statistics(const char *action, const char *action_result, const char *action_data) { +void analytics_statistic_send(const analytics_statistic_t *statistic) { + if (!statistic) + return; + static char *as_script; if (netdata_anonymous_statistics_enabled == -1) { @@ -980,16 +987,19 @@ void send_statistics(const char *action, const char *action_result, const char * freez(optout_file); } - if (!netdata_anonymous_statistics_enabled || !action) + if (!netdata_anonymous_statistics_enabled || !statistic->action) return; - if (!action_result) + const char *action_result = statistic->result; + const char *action_data = statistic->data; + + if (!statistic->result) action_result = ""; - if (!action_data) + if (!statistic->data) action_data = ""; char *command_to_run = mallocz( - sizeof(char) * (strlen(action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + + sizeof(char) * (strlen(statistic->action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15)); pid_t command_pid; @@ -997,7 +1007,7 @@ void send_statistics(const char *action, const char *action_result, const char * command_to_run, "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", as_script, - action, + statistic->action, action_result, action_data, analytics_data.netdata_config_stream_enabled, @@ -1043,7 +1053,7 @@ void send_statistics(const char *action, const char *action_result, const char * nd_log(NDLS_DAEMON, NDLP_DEBUG, "%s '%s' '%s' '%s'", - as_script, action, action_result, action_data); + as_script, statistic->action, action_result, action_data); FILE *fp_child_input; FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input); diff --git a/daemon/analytics.h b/src/daemon/analytics.h index de8d569f9..04991036c 100644 --- a/daemon/analytics.h +++ b/src/daemon/analytics.h @@ -79,7 +79,6 @@ struct analytics_data { void set_late_global_environment(struct rrdhost_system_info *system_info); void analytics_free_data(void); void set_global_environment(void); -void send_statistics(const char *action, const char *action_result, const char *action_data); void analytics_log_shell(void); void analytics_log_json(void); void analytics_log_prometheus(void); @@ -88,6 +87,14 @@ void analytics_gather_mutable_meta_data(void); void analytics_report_oom_score(long long int score); void get_system_timezone(void); +typedef struct { + const char *action; + const char *result; + const char *data; +} analytics_statistic_t; + +void analytics_statistic_send(const analytics_statistic_t *statistic); + extern struct analytics_data analytics_data; #endif //NETDATA_ANALYTICS_H diff --git a/daemon/anonymous-statistics.sh.in b/src/daemon/anonymous-statistics.sh.in index d12e7e32a..d12e7e32a 100755 --- a/daemon/anonymous-statistics.sh.in +++ b/src/daemon/anonymous-statistics.sh.in diff --git a/daemon/buildinfo.c b/src/daemon/buildinfo.c index 41af56af8..2c894a3d2 100644 --- a/daemon/buildinfo.c +++ b/src/daemon/buildinfo.c @@ -57,8 +57,6 @@ typedef enum __attribute__((packed)) { BIB_DB_DBENGINE, BIB_DB_ALLOC, BIB_DB_RAM, - BIB_DB_MAP, - BIB_DB_SAVE, BIB_DB_NONE, BIB_CONNECTIVITY_ACLK, BIB_CONNECTIVITY_HTTPD_STATIC, @@ -69,17 +67,14 @@ typedef enum __attribute__((packed)) { BIB_LIB_LZ4, BIB_LIB_ZSTD, BIB_LIB_ZLIB, - BIB_LIB_JUDY, - BIB_LIB_DLIB, + BIB_LIB_BROTLI, BIB_LIB_PROTOBUF, BIB_LIB_OPENSSL, BIB_LIB_LIBDATACHANNEL, BIB_LIB_JSONC, BIB_LIB_LIBCAP, BIB_LIB_LIBCRYPTO, - BIB_LIB_LIBM, - BIB_LIB_JEMALLOC, - BIB_LIB_TCMALLOC, + BIB_LIB_LIBYAML, BIB_PLUGIN_APPS, BIB_PLUGIN_LINUX_CGROUPS, BIB_PLUGIN_LINUX_CGROUP_NETWORK, @@ -559,22 +554,6 @@ static struct { .json = "ram", .value = NULL, }, - [BIB_DB_MAP] = { - .category = BIC_DATABASE, - .type = BIT_BOOLEAN, - .analytics = NULL, - .print = "map", - .json = "map", - .value = NULL, - }, - [BIB_DB_SAVE] = { - .category = BIC_DATABASE, - .type = BIT_BOOLEAN, - .analytics = NULL, - .print = "save", - .json = "save", - .value = NULL, - }, [BIB_DB_NONE] = { .category = BIC_DATABASE, .type = BIT_BOOLEAN, @@ -655,22 +634,13 @@ static struct { .json = "zlib", .value = NULL, }, - [BIB_LIB_JUDY] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = NULL, - .print = "Judy (high-performance dynamic arrays and hashtables)", - .json = "judy", - .status = true, - .value = "bundled", - }, - [BIB_LIB_DLIB] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = NULL, - .print = "dlib (robust machine learning toolkit)", - .json = "dlib", - .value = NULL, + [BIB_LIB_BROTLI] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Brotli (generic-purpose lossless compression algorithm)", + .json = "brotli", + .value = NULL, }, [BIB_LIB_PROTOBUF] = { .category = BIC_LIBS, @@ -720,29 +690,13 @@ static struct { .json = "libcrypto", .value = NULL, }, - [BIB_LIB_LIBM] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = "libm", - .print = "libm (mathematical functions)", - .json = "libm", - .value = NULL, - }, - [BIB_LIB_JEMALLOC] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = "jemalloc", - .print = "jemalloc", - .json = "jemalloc", - .value = NULL, - }, - [BIB_LIB_TCMALLOC] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = "tcmalloc", - .print = "TCMalloc", - .json = "tcmalloc", - .value = NULL, + [BIB_LIB_LIBYAML] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = "libyaml", + .print = "libyaml (library for parsing and emitting YAML)", + .json = "libyaml", + .value = NULL, }, [BIB_PLUGIN_APPS] = { .category = BIC_PLUGINS, @@ -1124,9 +1078,6 @@ __attribute__((constructor)) void initialize_build_info(void) { build_info_set_status(BIB_FEATURE_STREAMING_COMPRESSION, true); -#ifdef ENABLE_BROTLI - build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "brotli"); -#endif #ifdef ENABLE_ZSTD build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "zstd"); #endif @@ -1134,6 +1085,9 @@ __attribute__((constructor)) void initialize_build_info(void) { build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "lz4"); #endif build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "gzip"); +#ifdef ENABLE_BROTLI + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "brotli"); +#endif build_info_set_status(BIB_FEATURE_CONTEXTS, true); build_info_set_status(BIB_FEATURE_TIERING, true); @@ -1147,8 +1101,6 @@ __attribute__((constructor)) void initialize_build_info(void) { #endif build_info_set_status(BIB_DB_ALLOC, true); build_info_set_status(BIB_DB_RAM, true); - build_info_set_status(BIB_DB_MAP, true); - build_info_set_status(BIB_DB_SAVE, true); build_info_set_status(BIB_DB_NONE, true); build_info_set_status(BIB_CONNECTIVITY_HTTPD_STATIC, true); @@ -1171,6 +1123,9 @@ __attribute__((constructor)) void initialize_build_info(void) { #ifdef ENABLE_ZSTD build_info_set_status(BIB_LIB_ZSTD, true); #endif +#ifdef ENABLE_BROTLI + build_info_set_status(BIB_LIB_BROTLI, true); +#endif build_info_set_status(BIB_LIB_ZLIB, true); @@ -1203,17 +1158,11 @@ __attribute__((constructor)) void initialize_build_info(void) { #ifdef HAVE_CRYPTO build_info_set_status(BIB_LIB_LIBCRYPTO, true); #endif -#ifdef STORAGE_WITH_MATH - build_info_set_status(BIB_LIB_LIBM, true); -#endif -#ifdef ENABLE_JEMALLOC - build_info_set_status(BIB_LIB_JEMALLOC, true); -#endif -#ifdef ENABLE_TCMALLOC - build_info_set_status(BIB_LIB_TCMALLOC, true); +#ifdef HAVE_LIBYAML + build_info_set_status(BIB_LIB_LIBYAML, true); #endif -#ifdef ENABLE_APPS_PLUGIN +#ifdef ENABLE_PLUGIN_APPS build_info_set_status(BIB_PLUGIN_APPS, true); #endif #ifdef HAVE_SETNS @@ -1225,28 +1174,28 @@ __attribute__((constructor)) void initialize_build_info(void) { build_info_set_status(BIB_PLUGIN_IDLEJITTER, true); build_info_set_status(BIB_PLUGIN_BASH, true); -#ifdef ENABLE_DEBUGFS_PLUGIN +#ifdef ENABLE_PLUGIN_DEBUGFS build_info_set_status(BIB_PLUGIN_DEBUGFS, true); #endif -#ifdef HAVE_CUPS +#ifdef ENABLE_PLUGIN_CUPS build_info_set_status(BIB_PLUGIN_CUPS, true); #endif -#ifdef HAVE_LIBBPF +#ifdef ENABLE_PLUGIN_EBPF build_info_set_status(BIB_PLUGIN_EBPF, true); #endif -#ifdef HAVE_FREEIPMI +#ifdef ENABLE_PLUGIN_FREEIPMI build_info_set_status(BIB_PLUGIN_FREEIPMI, true); #endif -#ifdef HAVE_NFACCT +#ifdef ENABLE_PLUGIN_NFACCT build_info_set_status(BIB_PLUGIN_NFACCT, true); #endif -#ifdef ENABLE_PERF_PLUGIN +#ifdef ENABLE_PLUGIN_PERF build_info_set_status(BIB_PLUGIN_PERF, true); #endif -#ifdef ENABLE_SLABINFO +#ifdef ENABLE_PLUGIN_SLABINFO build_info_set_status(BIB_PLUGIN_SLABINFO, true); #endif -#ifdef HAVE_LIBXENSTAT +#ifdef ENABLE_PLUGIN_XENSTAT build_info_set_status(BIB_PLUGIN_XEN, true); #endif #ifdef HAVE_XENSTAT_VBD_ERROR @@ -1499,7 +1448,7 @@ void print_build_info(void) { print_build_info_category_to_console(BIC_PLUGINS, "Plugins"); print_build_info_category_to_console(BIC_EXPORTERS, "Exporters"); print_build_info_category_to_console(BIC_DEBUG_DEVEL, "Debug/Developer Features"); -}; +} void build_info_to_json_object(BUFFER *b) { populate_packaging_info(); @@ -1533,7 +1482,7 @@ void print_build_info_json(void) { buffer_json_finalize(b); printf("%s\n", buffer_tostring(b)); buffer_free(b); -}; +} void analytics_build_info(BUFFER *b) { populate_packaging_info(); diff --git a/daemon/buildinfo.h b/src/daemon/buildinfo.h index 1bb1c9760..1bb1c9760 100644 --- a/daemon/buildinfo.h +++ b/src/daemon/buildinfo.h diff --git a/daemon/commands.c b/src/daemon/commands.c index ed544224e..8c9767a3f 100644 --- a/daemon/commands.c +++ b/src/daemon/commands.c @@ -36,7 +36,6 @@ struct command_context { /* Forward declarations */ static cmd_status_t cmd_help_execute(char *args, char **message); static cmd_status_t cmd_reload_health_execute(char *args, char **message); -static cmd_status_t cmd_save_database_execute(char *args, char **message); static cmd_status_t cmd_reopen_logs_execute(char *args, char **message); static cmd_status_t cmd_exit_execute(char *args, char **message); static cmd_status_t cmd_fatal_execute(char *args, char **message); @@ -52,7 +51,6 @@ static cmd_status_t cmd_dumpconfig(char *args, char **message); static command_info_t command_info_array[] = { {"help", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY}, // show help menu {"reload-health", cmd_reload_health_execute, CMD_TYPE_ORTHOGONAL}, // reload health configuration - {"save-database", cmd_save_database_execute, CMD_TYPE_ORTHOGONAL}, // save database for memory mode save {"reopen-logs", cmd_reopen_logs_execute, CMD_TYPE_ORTHOGONAL}, // Close and reopen log files {"shutdown-agent", cmd_exit_execute, CMD_TYPE_EXCLUSIVE}, // exit cleanly {"fatal-agent", cmd_fatal_execute, CMD_TYPE_HIGH_PRIORITY}, // exit with fatal error @@ -144,21 +142,7 @@ static cmd_status_t cmd_reload_health_execute(char *args, char **message) nd_log_limits_unlimited(); netdata_log_info("COMMAND: Reloading HEALTH configuration."); - health_reload(); - nd_log_limits_reset(); - - return CMD_STATUS_SUCCESS; -} - -static cmd_status_t cmd_save_database_execute(char *args, char **message) -{ - (void)args; - (void)message; - - nd_log_limits_unlimited(); - netdata_log_info("COMMAND: Saving databases."); - rrdhost_save_all(); - netdata_log_info("COMMAND: Databases saved."); + health_plugin_reload(); nd_log_limits_reset(); return CMD_STATUS_SUCCESS; @@ -183,7 +167,7 @@ static cmd_status_t cmd_exit_execute(char *args, char **message) nd_log_limits_unlimited(); netdata_log_info("COMMAND: Cleaning up to exit."); - netdata_cleanup_and_exit(0); + netdata_cleanup_and_exit(0, NULL, NULL, NULL); exit(0); return CMD_STATUS_SUCCESS; @@ -499,15 +483,15 @@ static void parse_commands(struct command_context *cmd_ctx) status = CMD_STATUS_FAILURE; /* Skip white-space characters */ - for (pos = cmd_ctx->command_string ; isspace(*pos) && ('\0' != *pos) ; ++pos) {;} + for (pos = cmd_ctx->command_string ; isspace(*pos) && ('\0' != *pos) ; ++pos) ; for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { if (!strncmp(pos, command_info_array[i].cmd_str, strlen(command_info_array[i].cmd_str))) { if (CMD_EXIT == i) { /* musl C does not like libuv workqueues calling exit() */ execute_command(CMD_EXIT, NULL, NULL); } - for (lstrip=pos + strlen(command_info_array[i].cmd_str); isspace(*lstrip) && ('\0' != *lstrip); ++lstrip) {;} - for (rstrip=lstrip+strlen(lstrip)-1; rstrip>lstrip && isspace(*rstrip); *(rstrip--) = 0 ); + for (lstrip=pos + strlen(command_info_array[i].cmd_str); isspace(*lstrip) && ('\0' != *lstrip); ++lstrip) ; + for (rstrip=lstrip+strlen(lstrip)-1; rstrip>lstrip && isspace(*rstrip); *(rstrip--) = 0 ) ; cmd_ctx->work.data = cmd_ctx; cmd_ctx->idx = i; diff --git a/daemon/commands.h b/src/daemon/commands.h index 368a70a0f..45a76110c 100644 --- a/daemon/commands.h +++ b/src/daemon/commands.h @@ -9,7 +9,6 @@ typedef enum cmd { CMD_HELP = 0, CMD_RELOAD_HEALTH, - CMD_SAVE_DATABASE, CMD_REOPEN_LOGS, CMD_EXIT, CMD_FATAL, diff --git a/daemon/common.c b/src/daemon/common.c index d441c73b6..d441c73b6 100644 --- a/daemon/common.c +++ b/src/daemon/common.c diff --git a/daemon/common.h b/src/daemon/common.h index b1739879f..3dcaedb12 100644 --- a/daemon/common.h +++ b/src/daemon/common.h @@ -34,8 +34,13 @@ // ---------------------------------------------------------------------------- // netdata include files +#include "daemon/config/dyncfg.h" + #include "global_statistics.h" +// health monitoring and alarm notifications +#include "health/health.h" + // the netdata database #include "database/rrd.h" @@ -50,8 +55,6 @@ // streaming metrics between netdata servers #include "streaming/rrdpush.h" -// health monitoring and alarm notifications -#include "health/health.h" // anomaly detection #include "ml/ml.h" diff --git a/src/daemon/config/README.md b/src/daemon/config/README.md new file mode 100644 index 000000000..3a2cee05b --- /dev/null +++ b/src/daemon/config/README.md @@ -0,0 +1,231 @@ +<!-- +title: "Daemon configuration" +description: "The Netdata Agent's daemon is installed preconfigured to collect thousands of metrics every second, but is highly configurable for real-world workloads." +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/daemon/config/README.md" +sidebar_label: "Daemon" +learn_status: "Published" +learn_rel_path: "Configuration" +learn_doc_purpose: "Explain the daemon options, the log files, the process scheduling, virtual memory, explain how the netdata.conf is used and backlink to the netdata.conf file reference" +--> + +# Daemon configuration + +<details> +<summary>The daemon configuration file is read from /etc/netdata/netdata.conf.</summary> + +Depending on your installation method, Netdata will have been installed either directly under `/`, or +under `/opt/netdata`. The paths mentioned here and in the documentation in general assume that your installation is +under `/`. If it is not, you will find the exact same paths under `/opt/netdata` as well. (i.e. `/etc/netdata` will +be `/opt/netdata/etc/netdata`). + +</details> + +This config file **is not needed by default**. Netdata works fine out of the box without it. But it does allow you to +adapt the general behavior of Netdata, in great detail. You can find all these settings, with their default values, by +accessing the URL `https://netdata.server.hostname:19999/netdata.conf`. For example check the configuration file +of [netdata.firehol.org](http://netdata.firehol.org/netdata.conf). HTTP access to this file is limited by default to +[private IPs](https://en.wikipedia.org/wiki/Private_network), via +the [web server access lists](https://github.com/netdata/netdata/blob/master/src/web/server/README.md#access-lists). + +`netdata.conf` has sections stated with `[section]`. You will see the following sections: + +1. `[global]` to [configure](#global-section-options) the [Netdata daemon](https://github.com/netdata/netdata/blob/master/src/daemon/README.md). +2. `[db]` to [configure](#db-section-options) the database of Netdata. +3. `[directories]` to [configure](#directories-section-options) the directories used by Netdata. +4. `[logs]` to [configure](#logs-section-options) the Netdata logging. +5. `[environment variables]` to [configure](#environment-variables-section-options) the environment variables used + Netdata. +6. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](https://github.com/netdata/netdata/blob/master/src/daemon/README.md) SQLite settings. +7. `[ml]` to configure settings for [machine learning](https://github.com/netdata/netdata/blob/master/src/ml/README.md). +8. `[health]` to [configure](#health-section-options) general settings for [health monitoring](https://github.com/netdata/netdata/blob/master/src/health/README.md). +9. `[web]` to [configure the web server](https://github.com/netdata/netdata/blob/master/src/web/server/README.md). +10. `[registry]` for the [Netdata registry](https://github.com/netdata/netdata/blob/master/src/registry/README.md). +11. `[global statistics]` for the [Netdata registry](https://github.com/netdata/netdata/blob/master/src/registry/README.md). +12. `[statsd]` for the general settings of the [stats.d.plugin](https://github.com/netdata/netdata/blob/master/src/collectors/statsd.plugin/README.md). +13. `[plugins]` to [configure](#plugins-section-options) which [collectors](https://github.com/netdata/netdata/blob/master/src/collectors/README.md) to use and PATH + settings. +14. `[plugin:NAME]` sections for each collector plugin, under the + comment [Per plugin configuration](#per-plugin-configuration). + +The configuration file is a `name = value` dictionary. Netdata will not complain if you set options unknown to it. When +you check the running configuration by accessing the URL `/netdata.conf` on your Netdata server, Netdata will add a +comment on settings it does not currently use. + +## Applying changes + +After `netdata.conf` has been modified, Netdata needs to be [restarted](https://github.com/netdata/netdata/blob/master/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for +changes to apply: + +```bash +sudo systemctl restart netdata +``` + +If the above does not work, try the following: + +```bash +sudo killall netdata; sleep 10; sudo netdata +``` + +Please note that your data history will be lost if you have modified `history` parameter in section `[global]`. + +## Sections + +### [global] section options + +| setting | default | info | +|:----------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| process scheduling policy | `keep` | See [Netdata process scheduling policy](https://github.com/netdata/netdata/blob/master/src/daemon/README.md#netdata-process-scheduling-policy) | +| OOM score | `0` | | +| glibc malloc arena max for plugins | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/src/daemon/README.md#virtual-memory). | +| glibc malloc arena max for Netdata | `1` | See [Virtual memory](https://github.com/netdata/netdata/blob/master/src/daemon/README.md#virtual-memory). | +| hostname | auto-detected | The hostname of the computer running Netdata. | +| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | +| timezone | auto-detected | The timezone retrieved from the environment variable | +| run as user | `netdata` | The user Netdata will run as. | +| pthread stack size | auto-detected | | + +### [db] section options + +| setting | default | info | +|:---------------------------------------------:|:----------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`. <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`alloc`: Similar to `ram`, but can significantly reduce memory usage, when combined with a low retention and does not support KSM. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. Not to be used together with streaming. | +| retention | `3600` | Used with `mode = ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](https://github.com/netdata/netdata/blob/master/src/database/README.md) for more information. | +| storage tiers | `3` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/src/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. | +| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. | +| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier. <br /> `N belongs to [1..4]` | +| dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). | +| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. | +| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well. <br /> `N belongs to [1..4]` | +| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](https://github.com/netdata/netdata/blob/master/src/database/engine/README.md#tiering). | +| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`. <br /> `N belongs to [1..4]` | +| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier. <br /> `New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window). <br /> `none`: No back filling is applied. <br /> `N belongs to [1..4]` | +| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](https://github.com/netdata/netdata/blob/master/src/database/README.md#ksm) | +| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](https://github.com/netdata/netdata/blob/master/src/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | +| gap when lost iterations above | `1` | | +| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. | +| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. | + +> ### Info +> +>The multiplication of all the **enabled** tiers `dbengine tier N update every iterations` values must be less than `65535`. + +### [directories] section options + +| setting | default | info | +|:-------------------:|:------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| config | `/etc/netdata` | The directory configuration files are kept. | +| stock config | `/usr/lib/netdata/conf.d` | | +| log | `/var/log/netdata` | The directory in which the [log files](https://github.com/netdata/netdata/blob/master/src/daemon/README.md#log-files) are kept. | +| web | `/usr/share/netdata/web` | The directory the web static files are kept. | +| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. | +| lib | `/var/lib/netdata` | Contains the alert log and the Netdata instance GUID. | +| home | `/var/cache/netdata` | Contains the db files for the collected metrics. | +| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. | +| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. | +| health config | `/etc/netdata/health.d` | The directory containing the user alert configuration files, to override the stock configurations | +| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alert configuration files for each collector | +| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](https://github.com/netdata/netdata/blob/master/src/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | + +### [logs] section options + +| setting | default | info | +|:----------------------------------:|:-----------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/src/daemon/README.md#debugging). | +| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](https://github.com/netdata/netdata/blob/master/src/daemon/README.md#debugging). | +| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. | +| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. | +| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. | +| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. | +| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. | +| severity level | `info` | Controls which log messages are logged, with error being the most important. Supported values: `info` and `error`. | + +### [environment variables] section options + +| setting | default | info | +|:----------:|:-----------------:|:-----------------------------------------------------------| +| TZ | `:/etc/localtime` | Where to find the timezone | +| PATH | `auto-detected` | Specifies the directories to be searched to find a command | +| PYTHONPATH | | Used to set a custom python path | + +### [sqlite] section options + +| setting | default | info | +|:------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| auto vacuum | `INCREMENTAL` | The [auto-vacuum status](https://www.sqlite.org/pragma.html#pragma_auto_vacuum) in the database | +| synchronous | `NORMAL` | The setting of the ["synchronous"](https://www.sqlite.org/pragma.html#pragma_synchronous) flag | +| journal mode | `WAL` | The [journal mode](https://www.sqlite.org/pragma.html#pragma_journal_mode) for databases | +| temp store | `MEMORY` | Used to determine where [temporary tables and indices are stored](https://www.sqlite.org/pragma.html#pragma_temp_store) | +| journal size limit | `16777216` | Used to set a new [limit in bytes for the database](https://www.sqlite.org/pragma.html#pragma_journal_size_limit) | +| cache size | `-2000` | Used to [suggest the maximum number of database disk pages](https://www.sqlite.org/pragma.html#pragma_cache_size) that SQLite will hold in memory at once per open database file | + +### [health] section options + +This section controls the general behavior of the health monitoring capabilities of Netdata. + +Specific alerts are configured in per-collector config files under the `health.d` directory. For more info, see [health +monitoring](https://github.com/netdata/netdata/blob/master/src/health/README.md). + +[Alert notifications](https://github.com/netdata/netdata/blob/master/src/health/notifications/README.md) are configured in `health_alarm_notify.conf`. + +| setting | default | info | +|:----------------------------------------------:|:------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enabled | `yes` | Set to `no` to disable all alerts and notifications | +| in memory max health log entries | 1000 | Size of the alert history held in RAM | +| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alert notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). | +| run at least every seconds | `10` | Controls how often all alert conditions should be evaluated. | +| postpone alarms during hibernation for seconds | `60` | Prevents false alerts. May need to be increased if you get alerts during hibernation. | +| health log history | `432000` | Specifies the history of alert events (in seconds) kept in the agent's sqlite database. | +| enabled alarms | * | Defines which alerts to load from both user and stock directories. This is a [simple pattern](https://github.com/netdata/netdata/blob/master/src/libnetdata/simple_pattern/README.md) list of alert or template names. Can be used to disable specific alerts. For example, `enabled alarms = !oom_kill *` will load all alerts except `oom_kill`. | + +### [web] section options + +Refer to the [web server documentation](https://github.com/netdata/netdata/blob/master/src/web/server/README.md) + +### [plugins] section options + +In this section you will see be a boolean (`yes`/`no`) option for each plugin (e.g. tc, cgroups, apps, proc etc.). Note +that the configuration options in this section for the orchestrator plugins `python.d` and `charts.d` control **all the +modules** written for that orchestrator. For instance, setting `python.d = no` means that all Python modules +under `collectors/python.d.plugin` will be disabled. + +Additionally, there will be the following options: + +| setting | default | info | +|:-------------------------------:|:---------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enable running new plugins | `yes` | When set to `yes`, Netdata will enable detected plugins, even if they are not configured explicitly. Setting this to `no` will only enable plugins explicitly configured in this file with a `yes` | +| check for new plugins every | 60 | The time in seconds to check for new plugins in the plugins directory. This allows having other applications dynamically creating plugins for Netdata. | +| checks | `no` | This is a debugging plugin for the internal latency | + +### [registry] section options + +To understand what this section is and how it should be configured, please refer to +the [registry documentation](https://github.com/netdata/netdata/blob/master/src/registry/README.md). + +## Per-plugin configuration + +The configuration options for plugins appear in sections following the pattern `[plugin:NAME]`. + +### Internal plugins + +Most internal plugins will provide additional options. Check [Internal Plugins](https://github.com/netdata/netdata/blob/master/src/collectors/README.md) for more +information. + +Please note, that by default Netdata will enable monitoring metrics for disks, memory, and network only when they are +not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, +will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them +to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You +can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics +for all internal Netdata plugins. + +### External plugins + +External plugins will have only 2 options at `netdata.conf`: + +| setting | default | info | +|:---------------:|:--------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](https://github.com/netdata/netdata/blob/master/docs/guides/configure/performance.md). | +| command options | - | Additional command line options to pass to the plugin. | + +External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their +documentation. + diff --git a/src/daemon/config/dyncfg-echo.c b/src/daemon/config/dyncfg-echo.c new file mode 100644 index 000000000..95d40a025 --- /dev/null +++ b/src/daemon/config/dyncfg-echo.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +// ---------------------------------------------------------------------------- +// echo is when we send requests to plugins without any caller +// it is used for: +// 1. the first enable/disable requests we send, and also +// 2. updates to stock or user configurations +// 3. saved dynamic jobs we need to add to templates + +struct dyncfg_echo { + const DICTIONARY_ITEM *item; + DYNCFG *df; // for additions this is the job, not the template + BUFFER *wb; + DYNCFG_CMDS cmd; + const char *cmd_str; +}; + +void dyncfg_echo_cb(BUFFER *wb __maybe_unused, int code __maybe_unused, void *result_cb_data) { + struct dyncfg_echo *e = result_cb_data; + DYNCFG *df = e->df; + + if(DYNCFG_RESP_SUCCESS(code)) { + // successful response + + if(e->cmd == DYNCFG_CMD_ADD) { + df->dyncfg.status = dyncfg_status_from_successful_response(code); + dyncfg_update_status_on_successful_add_or_update(df, code); + } + else if(e->cmd == DYNCFG_CMD_UPDATE) { + df->dyncfg.status = dyncfg_status_from_successful_response(code); + dyncfg_update_status_on_successful_add_or_update(df, code); + } + else if(e->cmd == DYNCFG_CMD_DISABLE) + df->dyncfg.status = df->current.status = DYNCFG_STATUS_DISABLED; + else if(e->cmd == DYNCFG_CMD_ENABLE) + df->dyncfg.status = df->current.status = dyncfg_status_from_successful_response(code); + } + else { + // failed response + + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: received response code %d on request to id '%s', cmd: %s", + code, dictionary_acquired_item_name(e->item), e->cmd_str); + + if(e->cmd == DYNCFG_CMD_UPDATE || e->cmd == DYNCFG_CMD_ADD) + e->df->dyncfg.plugin_rejected = true; + } + + buffer_free(e->wb); + dictionary_acquired_item_release(dyncfg_globals.nodes, e->item); + + e->wb = NULL; + e->df = NULL; + e->item = NULL; + freez((void *)e->cmd_str); + e->cmd_str = NULL; + freez(e); +} + +// ---------------------------------------------------------------------------- + +void dyncfg_echo(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id __maybe_unused, DYNCFG_CMDS cmd) { + RRDHOST *host = dyncfg_rrdhost(df); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id); + return; + } + + if(!(df->cmds & cmd)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: attempted to echo a cmd that is not supported"); + return; + } + + const char *cmd_str = dyncfg_id2cmd_one(cmd); + if(!cmd_str) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: command given does not resolve to a known command"); + return; + } + + struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo)); + e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item); + e->wb = buffer_create(0, NULL); + e->df = df; + e->cmd = cmd; + e->cmd_str = strdupz(cmd_str); + + char buf[string_strlen(df->function) + strlen(e->cmd_str) + 20]; + snprintfz(buf, sizeof(buf), "%s %s", string2str(df->function), e->cmd_str); + + rrd_function_run( + host, e->wb, 10, + HTTP_ACCESS_ALL, buf, false, NULL, + dyncfg_echo_cb, e, + NULL, NULL, + NULL, NULL, + NULL, string2str(df->dyncfg.source)); +} + +// ---------------------------------------------------------------------------- + +void dyncfg_echo_update(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id) { + RRDHOST *host = dyncfg_rrdhost(df); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id); + return; + } + + if(!df->dyncfg.payload) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: requested to send an update to '%s', but there is no payload", id); + return; + } + + struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo)); + e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item); + e->wb = buffer_create(0, NULL); + e->df = df; + e->cmd = DYNCFG_CMD_UPDATE; + e->cmd_str = strdupz("update"); + + char buf[string_strlen(df->function) + strlen(e->cmd_str) + 20]; + snprintfz(buf, sizeof(buf), "%s %s", string2str(df->function), e->cmd_str); + + rrd_function_run( + host, e->wb, 10, + HTTP_ACCESS_ALL, buf, false, NULL, + dyncfg_echo_cb, e, + NULL, NULL, + NULL, NULL, + df->dyncfg.payload, string2str(df->dyncfg.source)); +} + +// ---------------------------------------------------------------------------- + +static void dyncfg_echo_payload_add(const DICTIONARY_ITEM *item_template __maybe_unused, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *id_template, const char *cmd) { + RRDHOST *host = dyncfg_rrdhost(df_template); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id_template); + return; + } + + if(!df_job->dyncfg.payload) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: requested to send a '%s' to '%s', but there is no payload", + cmd, id_template); + return; + } + + struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo)); + e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item_job); + e->wb = buffer_create(0, NULL); + e->df = df_job; + e->cmd = DYNCFG_CMD_ADD; + e->cmd_str = strdupz(cmd); + + char buf[string_strlen(df_template->function) + strlen(cmd) + 20]; + snprintfz(buf, sizeof(buf), "%s %s", string2str(df_template->function), cmd); + + rrd_function_run( + host, e->wb, 10, + HTTP_ACCESS_ALL, buf, false, NULL, + dyncfg_echo_cb, e, + NULL, NULL, + NULL, NULL, + df_job->dyncfg.payload, string2str(df_job->dyncfg.source)); +} + +void dyncfg_echo_add(const DICTIONARY_ITEM *item_template, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *template_id, const char *job_name) { + char buf[strlen(job_name) + 20]; + snprintfz(buf, sizeof(buf), "add %s", job_name); + dyncfg_echo_payload_add(item_template, item_job, df_template, df_job, template_id, buf); +} + diff --git a/src/daemon/config/dyncfg-files.c b/src/daemon/config/dyncfg-files.c new file mode 100644 index 000000000..aa91c109d --- /dev/null +++ b/src/daemon/config/dyncfg-files.c @@ -0,0 +1,241 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +void dyncfg_file_delete(const char *id) { + CLEAN_CHAR_P *escaped_id = dyncfg_escape_id_for_filename(id); + char filename[FILENAME_MAX]; + snprintfz(filename, sizeof(filename), "%s/%s.dyncfg", dyncfg_globals.dir, escaped_id); + unlink(filename); +} + +void dyncfg_file_save(const char *id, DYNCFG *df) { + CLEAN_CHAR_P *escaped_id = dyncfg_escape_id_for_filename(id); + char filename[FILENAME_MAX]; + snprintfz(filename, sizeof(filename), "%s/%s.dyncfg", dyncfg_globals.dir, escaped_id); + + FILE *fp = fopen(filename, "w"); + if(!fp) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot create file '%s'", filename); + return; + } + + df->dyncfg.modified_ut = now_realtime_usec(); + if(!df->dyncfg.created_ut) + df->dyncfg.created_ut = df->dyncfg.modified_ut; + + fprintf(fp, "version=%zu\n", DYNCFG_VERSION); + fprintf(fp, "id=%s\n", id); + + if(df->template) + fprintf(fp, "template=%s\n", string2str(df->template)); + + char uuid_str[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(df->host_uuid.uuid, uuid_str); + fprintf(fp, "host=%s\n", uuid_str); + + fprintf(fp, "path=%s\n", string2str(df->path)); + fprintf(fp, "type=%s\n", dyncfg_id2type(df->type)); + + fprintf(fp, "source_type=%s\n", dyncfg_id2source_type(df->dyncfg.source_type)); + fprintf(fp, "source=%s\n", string2str(df->dyncfg.source)); + + fprintf(fp, "created=%"PRIu64"\n", df->dyncfg.created_ut); + fprintf(fp, "modified=%"PRIu64"\n", df->dyncfg.modified_ut); + fprintf(fp, "sync=%s\n", df->sync ? "true" : "false"); + fprintf(fp, "user_disabled=%s\n", df->dyncfg.user_disabled ? "true" : "false"); + fprintf(fp, "saves=%"PRIu32"\n", ++df->dyncfg.saves); + + fprintf(fp, "cmds="); + dyncfg_cmds2fp(df->cmds, fp); + fprintf(fp, "\n"); + + if(df->dyncfg.payload && buffer_strlen(df->dyncfg.payload) > 0) { + fprintf(fp, "content_type=%s\n", content_type_id2string(df->dyncfg.payload->content_type)); + fprintf(fp, "content_length=%zu\n", buffer_strlen(df->dyncfg.payload)); + fprintf(fp, "---\n"); + fwrite(buffer_tostring(df->dyncfg.payload), 1, buffer_strlen(df->dyncfg.payload), fp); + } + + fclose(fp); +} + +void dyncfg_file_load(const char *filename) { + FILE *fp = fopen(filename, "r"); + if (!fp) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot open file '%s'", filename); + return; + } + + DYNCFG tmp = { 0 }; + + char line[PLUGINSD_LINE_MAX]; + CLEAN_CHAR_P *id = NULL; + + HTTP_CONTENT_TYPE content_type = CT_NONE; + size_t content_length = 0; + bool read_payload = false; + + while (fgets(line, sizeof(line), fp)) { + if(strcmp(line, "---\n") == 0) { + read_payload = true; + break; + } + + char *value = strchr(line, '='); + if(!value) continue; + + *value++ = '\0'; + + value = trim(value); + if(!value) continue; + + char *key = trim(line); + if(!key) continue; + + // Parse key-value pairs + if (strcmp(key, "version") == 0) { + size_t version = strtoull(value, NULL, 10); + + if(version > DYNCFG_VERSION) + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "DYNCFG: configuration file '%s' has version %zu, which is newer than our version %zu", + filename, version, DYNCFG_VERSION); + + } else if (strcmp(key, "id") == 0) { + freez(id); + id = strdupz(value); + } else if (strcmp(key, "template") == 0) { + tmp.template = string_strdupz(value); + } else if (strcmp(key, "host") == 0) { + uuid_parse_flexi(value, tmp.host_uuid.uuid); + } else if (strcmp(key, "path") == 0) { + tmp.path = string_strdupz(value); + } else if (strcmp(key, "type") == 0) { + tmp.type = dyncfg_type2id(value); + } else if (strcmp(key, "source_type") == 0) { + tmp.dyncfg.source_type = dyncfg_source_type2id(value); + } else if (strcmp(key, "source") == 0) { + tmp.dyncfg.source = string_strdupz(value); + } else if (strcmp(key, "created") == 0) { + tmp.dyncfg.created_ut = strtoull(value, NULL, 10); + } else if (strcmp(key, "modified") == 0) { + tmp.dyncfg.modified_ut = strtoull(value, NULL, 10); + } else if (strcmp(key, "sync") == 0) { + tmp.sync = (strcmp(value, "true") == 0); + } else if (strcmp(key, "user_disabled") == 0) { + tmp.dyncfg.user_disabled = (strcmp(value, "true") == 0); + } else if (strcmp(key, "saves") == 0) { + tmp.dyncfg.saves = strtoull(value, NULL, 10); + } else if (strcmp(key, "content_type") == 0) { + content_type = content_type_string2id(value); + } else if (strcmp(key, "content_length") == 0) { + content_length = strtoull(value, NULL, 10); + } else if (strcmp(key, "cmds") == 0) { + tmp.cmds = dyncfg_cmds2id(value); + } + } + + if (read_payload) { + // Determine the actual size of the remaining file content + long saved_position = ftell(fp); // Save current position + fseek(fp, 0, SEEK_END); + long total_size = ftell(fp); // Total size of the file + size_t actual_size = total_size - saved_position; // Calculate remaining content size + fseek(fp, saved_position, SEEK_SET); // Reset file pointer to the beginning of the payload + + // Use actual_size instead of content_length to handle the whole remaining file + tmp.dyncfg.payload = buffer_create(actual_size, NULL); + tmp.dyncfg.payload->content_type = content_type; + + buffer_need_bytes(tmp.dyncfg.payload, actual_size); + tmp.dyncfg.payload->len = fread(tmp.dyncfg.payload->buffer, 1, actual_size, fp); + + if (content_length != tmp.dyncfg.payload->len) { + nd_log(NDLS_DAEMON, NDLP_WARNING, + "DYNCFG: content_length %zu does not match actual payload size %zu for file '%s'", + content_length, actual_size, filename); + } + } + + fclose(fp); + + if(!id) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: configuration file '%s' does not include a unique id. Ignoring it.", + filename); + + dyncfg_cleanup(&tmp); + return; + } + + tmp.dyncfg.status = DYNCFG_STATUS_ORPHAN; + tmp.dyncfg.restart_required = false; + + dyncfg_set_current_from_dyncfg(&tmp); + + dictionary_set(dyncfg_globals.nodes, id, &tmp, sizeof(tmp)); +} + +void dyncfg_load_all(void) { + DIR *dir = opendir(dyncfg_globals.dir); + if (!dir) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot open directory '%s'", dyncfg_globals.dir); + return; + } + + struct dirent *entry; + char filepath[PATH_MAX]; + while ((entry = readdir(dir)) != NULL) { + if ((entry->d_type == DT_REG || entry->d_type == DT_LNK) && strendswith(entry->d_name, ".dyncfg")) { + snprintf(filepath, sizeof(filepath), "%s/%s", dyncfg_globals.dir, entry->d_name); + dyncfg_file_load(filepath); + } + } + + closedir(dir); +} + +// ---------------------------------------------------------------------------- +// schemas loading + +static bool dyncfg_read_file_to_buffer(const char *filename, BUFFER *dst) { + int fd = open(filename, O_RDONLY | O_CLOEXEC, 0666); + if(unlikely(fd == -1)) + return false; + + struct stat st = { 0 }; + if(fstat(fd, &st) != 0) { + close(fd); + return false; + } + + buffer_flush(dst); + buffer_need_bytes(dst, st.st_size + 1); // +1 for the terminating zero + + ssize_t r = read(fd, (char*)dst->buffer, st.st_size); + if(unlikely(r == -1)) { + close(fd); + return false; + } + dst->len = r; + dst->buffer[dst->len] = '\0'; + + close(fd); + return true; +} + +bool dyncfg_get_schema(const char *id, BUFFER *dst) { + char filename[FILENAME_MAX + 1]; + + snprintfz(filename, sizeof(filename), "%s/schema.d/%s.json", netdata_configured_user_config_dir, id); + if(dyncfg_read_file_to_buffer(filename, dst)) + return true; + + snprintfz(filename, sizeof(filename), "%s/schema.d/%s.json", netdata_configured_stock_config_dir, id); + if(dyncfg_read_file_to_buffer(filename, dst)) + return true; + + return false; +} diff --git a/src/daemon/config/dyncfg-inline.c b/src/daemon/config/dyncfg-inline.c new file mode 100644 index 000000000..bed912e57 --- /dev/null +++ b/src/daemon/config/dyncfg-inline.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg.h" + +static DICTIONARY *dyncfg_nodes = NULL; + +static int dyncfg_inline_callback(struct rrd_function_execute *rfe, void *data __maybe_unused) { + char tr[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(*rfe->transaction, tr); + + bool cancelled = rfe->is_cancelled.cb ? rfe->is_cancelled.cb(rfe->is_cancelled.data) : false; + + int code; + if(cancelled) + code = HTTP_RESP_CLIENT_CLOSED_REQUEST; + else + code = dyncfg_node_find_and_call(dyncfg_nodes, tr, rfe->function, rfe->stop_monotonic_ut, &cancelled, + rfe->payload, rfe->user_access, rfe->source, rfe->result.wb); + + if(code == HTTP_RESP_CLIENT_CLOSED_REQUEST || (rfe->is_cancelled.cb && rfe->is_cancelled.cb(rfe->is_cancelled.data))) { + buffer_flush(rfe->result.wb); + code = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, code, rfe->result.data); + + return code; +} + +bool dyncfg_add(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, + DYNCFG_CMDS cmds, HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + dyncfg_cb_t cb, void *data) { + + struct dyncfg_node tmp = { + .cmds = cmds, + .type = type, + .cb = cb, + .data = data, + }; + dictionary_set(dyncfg_nodes, id, &tmp, sizeof(tmp)); + + if(!dyncfg_add_low_level(host, id, path, status, type, source_type, source, cmds, + 0, 0, true, view_access, edit_access, + dyncfg_inline_callback, NULL)) { + dictionary_del(dyncfg_nodes, id); + return false; + } + + return true; +} + +void dyncfg_del(RRDHOST *host, const char *id) { + dictionary_del(dyncfg_nodes, id); + dyncfg_del_low_level(host, id); +} + +void dyncfg_status(RRDHOST *host, const char *id, DYNCFG_STATUS status) { + dyncfg_status_low_level(host, id, status); +} + +void dyncfg_init(bool load_saved) { + dyncfg_nodes = dyncfg_nodes_dictionary_create(); + dyncfg_init_low_level(load_saved); +} diff --git a/src/daemon/config/dyncfg-intercept.c b/src/daemon/config/dyncfg-intercept.c new file mode 100644 index 000000000..812059f6f --- /dev/null +++ b/src/daemon/config/dyncfg-intercept.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +// ---------------------------------------------------------------------------- +// we intercept the config function calls of the plugin + +struct dyncfg_call { + BUFFER *payload; + char *function; + char *id; + char *add_name; + char *source; + DYNCFG_CMDS cmd; + rrd_function_result_callback_t result_cb; + void *result_cb_data; + bool from_dyncfg_echo; +}; + +static void dyncfg_function_intercept_job_successfully_added(DYNCFG *df_template, int code, struct dyncfg_call *dc) { + char id[strlen(dc->id) + 1 + strlen(dc->add_name) + 1]; + snprintfz(id, sizeof(id), "%s:%s", dc->id, dc->add_name); + + RRDHOST *host = dyncfg_rrdhost(df_template); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: cannot add job '%s' because host is missing", id); + } + else { + const DICTIONARY_ITEM *item = dyncfg_add_internal( + host, + id, + string2str(df_template->path), + dyncfg_status_from_successful_response(code), + DYNCFG_TYPE_JOB, + DYNCFG_SOURCE_TYPE_DYNCFG, + dc->source, + (df_template->cmds & ~DYNCFG_CMD_ADD) | DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE | DYNCFG_CMD_TEST | + DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_REMOVE, + 0, + 0, + df_template->sync, + df_template->view_access, + df_template->edit_access, + df_template->execute_cb, + df_template->execute_cb_data, + false); + + // adding does not create df->dyncfg + // we have to do it here + + DYNCFG *df = dictionary_acquired_item_value(item); + SWAP(df->dyncfg.payload, dc->payload); + dyncfg_set_dyncfg_source_from_txt(df, dc->source); + df->dyncfg.user_disabled = false; + df->dyncfg.source_type = DYNCFG_SOURCE_TYPE_DYNCFG; + df->dyncfg.status = dyncfg_status_from_successful_response(code); + + dyncfg_file_save(id, df); // updates also the df->dyncfg timestamps + dyncfg_update_status_on_successful_add_or_update(df, code); + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } +} + +static void dyncfg_function_intercept_job_successfully_updated(DYNCFG *df, int code, struct dyncfg_call *dc) { + df->dyncfg.status = dyncfg_status_from_successful_response(code); + df->dyncfg.source_type = DYNCFG_SOURCE_TYPE_DYNCFG; + SWAP(df->dyncfg.payload, dc->payload); + dyncfg_set_dyncfg_source_from_txt(df, dc->source); + + dyncfg_update_status_on_successful_add_or_update(df, code); +} + +void dyncfg_function_intercept_result_cb(BUFFER *wb, int code, void *result_cb_data) { + struct dyncfg_call *dc = result_cb_data; + + bool called_from_dyncfg_echo = dc->from_dyncfg_echo; + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dyncfg_globals.nodes, dc->id, -1); + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + bool old_user_disabled = df->dyncfg.user_disabled; + bool save_required = false; + + if (!called_from_dyncfg_echo) { + // the command was sent by a user + + if (DYNCFG_RESP_SUCCESS(code)) { + if (dc->cmd == DYNCFG_CMD_ADD) { + dyncfg_function_intercept_job_successfully_added(df, code, dc); + } else if (dc->cmd == DYNCFG_CMD_UPDATE) { + dyncfg_function_intercept_job_successfully_updated(df, code, dc); + save_required = true; + } + else if (dc->cmd == DYNCFG_CMD_ENABLE) { + df->dyncfg.user_disabled = false; + } + else if (dc->cmd == DYNCFG_CMD_DISABLE) { + df->dyncfg.user_disabled = true; + } + else if (dc->cmd == DYNCFG_CMD_REMOVE) { + dyncfg_file_delete(dc->id); + dictionary_del(dyncfg_globals.nodes, dc->id); + } + + if (save_required || old_user_disabled != df->dyncfg.user_disabled) + dyncfg_file_save(dc->id, df); + } + else + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: plugin returned code %d to user initiated call: %s", code, dc->function); + } + else { + // the command was sent by dyncfg + // these are handled by the echo callback, we don't need to do anything here + ; + } + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } + + if(dc->result_cb) + dc->result_cb(wb, code, dc->result_cb_data); + + buffer_free(dc->payload); + freez(dc->function); + freez(dc->id); + freez(dc->source); + freez(dc->add_name); + freez(dc); +} + +// ---------------------------------------------------------------------------- + +static void dyncfg_apply_action_on_all_template_jobs(struct rrd_function_execute *rfe, const char *template_id, DYNCFG_CMDS c) { + STRING *template = string_strdupz(template_id); + DYNCFG *df; + + size_t all = 0, done = 0; + dfe_start_read(dyncfg_globals.nodes, df) { + if(df->template == template && df->type == DYNCFG_TYPE_JOB) + all++; + } + dfe_done(df); + + if(rfe->progress.cb) + rfe->progress.cb(rfe->progress.data, done, all); + + dfe_start_reentrant(dyncfg_globals.nodes, df) { + if(df->template == template && df->type == DYNCFG_TYPE_JOB) { + DYNCFG_CMDS cmd_to_send_to_plugin = c; + + if(c == DYNCFG_CMD_ENABLE) + cmd_to_send_to_plugin = df->dyncfg.user_disabled ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE; + else if(c == DYNCFG_CMD_DISABLE) + cmd_to_send_to_plugin = DYNCFG_CMD_DISABLE; + + dyncfg_echo(df_dfe.item, df, df_dfe.name, cmd_to_send_to_plugin); + + if(rfe->progress.cb) + rfe->progress.cb(rfe->progress.data, ++done, all); + } + } + dfe_done(df); + + string_freez(template); +} + +// ---------------------------------------------------------------------------- +// the callback for all config functions + +static int dyncfg_intercept_early_error(struct rrd_function_execute *rfe, int rc, const char *msg) { + rc = dyncfg_default_response(rfe->result.wb, rc, msg); + + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, rc, rfe->result.data); + + return rc; +} + +const DICTIONARY_ITEM *dyncfg_get_template_of_new_job(const char *job_id) { + char id_copy[strlen(job_id) + 1]; + memcpy(id_copy, job_id, sizeof(id_copy)); + + char *colon = strrchr(id_copy, ':'); + if(!colon) return NULL; + + *colon = '\0'; + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id_copy); + if(!item) return NULL; + + DYNCFG *df = dictionary_acquired_item_value(item); + if(df->type != DYNCFG_TYPE_TEMPLATE) { + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return NULL; + } + + return item; +} + +int dyncfg_function_intercept_cb(struct rrd_function_execute *rfe, void *data __maybe_unused) { + + // IMPORTANT: this function MUST call the result_cb even on failures + + bool called_from_dyncfg_echo = rrd_function_has_this_original_result_callback(rfe->transaction, dyncfg_echo_cb); + bool has_payload = rfe->payload && buffer_strlen(rfe->payload) ? true : false; + bool make_the_call_to_plugin = true; + + int rc = HTTP_RESP_INTERNAL_SERVER_ERROR; + DYNCFG_CMDS cmd; + const DICTIONARY_ITEM *item = NULL; + const char *add_name = NULL; + + char buf[strlen(rfe->function) + 1]; + memcpy(buf, rfe->function, sizeof(buf)); + + char *words[20]; + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, 20); + + size_t i = 0; + char *config = get_word(words, num_words, i++); + char *id = get_word(words, num_words, i++); + char *cmd_str = get_word(words, num_words, i++); + + if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this is not a dyncfg request"); + + cmd = dyncfg_cmds2id(cmd_str); + if(cmd == DYNCFG_CMD_NONE) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: invalid command received"); + + if(cmd == DYNCFG_CMD_ADD) { + add_name = get_word(words, num_words, i++); + + if(!add_name || !*add_name) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this action requires a name"); + + if(!called_from_dyncfg_echo) { + char nid[strlen(id) + strlen(add_name) + 2]; + snprintfz(nid, sizeof(nid), "%s:%s", id, add_name); + + if (dictionary_get(dyncfg_globals.nodes, nid)) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: a configuration with this name already exists"); + } + } + + if((cmd == DYNCFG_CMD_ADD || cmd == DYNCFG_CMD_UPDATE || cmd == DYNCFG_CMD_TEST) && !has_payload) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this action requires a payload"); + + if((cmd != DYNCFG_CMD_ADD && cmd != DYNCFG_CMD_UPDATE && cmd != DYNCFG_CMD_TEST) && has_payload) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this action does not require a payload"); + + item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(!item) { + if(cmd == DYNCFG_CMD_TEST) { + // this may be a test on a new job + item = dyncfg_get_template_of_new_job(id); + } + + if(!item) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_NOT_FOUND, + "dyncfg functions intercept: id is not found"); + } + + DYNCFG *df = dictionary_acquired_item_value(item); + + // 1. check the permissions of the request + + switch(cmd) { + case DYNCFG_CMD_GET: + case DYNCFG_CMD_SCHEMA: + if(!http_access_user_has_enough_access_level_for_endpoint(rfe->user_access, df->view_access)) { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_FORBIDDEN, + "dyncfg: you don't have enough view permissions to execute this command"); + } + break; + + case DYNCFG_CMD_ENABLE: + case DYNCFG_CMD_DISABLE: + case DYNCFG_CMD_ADD: + case DYNCFG_CMD_TEST: + case DYNCFG_CMD_UPDATE: + case DYNCFG_CMD_REMOVE: + case DYNCFG_CMD_RESTART: + if(!http_access_user_has_enough_access_level_for_endpoint(rfe->user_access, df->edit_access)) { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_FORBIDDEN, + "dyncfg: you don't have enough edit permissions to execute this command"); + } + break; + + default: { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_INTERNAL_SERVER_ERROR, + "dyncfg: permissions for this command are not set"); + } + break; + } + + // 2. validate the request parameters + + if(make_the_call_to_plugin) { + if (!(df->cmds & cmd)) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: this command is not supported by the configuration node: %s", rfe->function); + + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this command is not supported by this configuration node"); + } + else if (cmd == DYNCFG_CMD_ADD) { + if (df->type != DYNCFG_TYPE_TEMPLATE) { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: add command is only allowed in templates"); + + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: add command can only be applied on templates, not %s: %s", + dyncfg_id2type(df->type), rfe->function); + } + } + else if ( + cmd == DYNCFG_CMD_ENABLE && df->type == DYNCFG_TYPE_JOB && + dyncfg_is_user_disabled(string2str(df->template))) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: cannot enable a job of a disabled template: %s", + rfe->function); + + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this job belongs to disabled template"); + } + } + + // 3. check if it is one of the commands we should execute + + if(make_the_call_to_plugin) { + if (cmd & (DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_RESTART) && df->type == DYNCFG_TYPE_TEMPLATE) { + if (!called_from_dyncfg_echo) { + bool old_user_disabled = df->dyncfg.user_disabled; + if (cmd == DYNCFG_CMD_ENABLE) + df->dyncfg.user_disabled = false; + else if (cmd == DYNCFG_CMD_DISABLE) + df->dyncfg.user_disabled = true; + + if (df->dyncfg.user_disabled != old_user_disabled) + dyncfg_file_save(id, df); + } + + dyncfg_apply_action_on_all_template_jobs(rfe, id, cmd); + + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_OK, "applied to all template job"); + make_the_call_to_plugin = false; + } + else if (cmd == DYNCFG_CMD_SCHEMA) { + bool loaded = false; + if (df->type == DYNCFG_TYPE_JOB) { + if (df->template) + loaded = dyncfg_get_schema(string2str(df->template), rfe->result.wb); + } else + loaded = dyncfg_get_schema(id, rfe->result.wb); + + if (loaded) { + rfe->result.wb->content_type = CT_APPLICATION_JSON; + rfe->result.wb->expires = now_realtime_sec(); + rc = HTTP_RESP_OK; + make_the_call_to_plugin = false; + } + } + } + + // 4. execute the command + + if(make_the_call_to_plugin) { + struct dyncfg_call *dc = callocz(1, sizeof(*dc)); + dc->function = strdupz(rfe->function); + dc->id = strdupz(id); + dc->source = rfe->source ? strdupz(rfe->source) : NULL; + dc->add_name = (add_name) ? strdupz(add_name) : NULL; + dc->cmd = cmd; + dc->result_cb = rfe->result.cb; + dc->result_cb_data = rfe->result.data; + dc->payload = buffer_dup(rfe->payload); + dc->from_dyncfg_echo = called_from_dyncfg_echo; + + rfe->result.cb = dyncfg_function_intercept_result_cb; + rfe->result.data = dc; + + rc = df->execute_cb(rfe, df->execute_cb_data); + } + else if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, rc, rfe->result.data); + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return rc; +} + diff --git a/src/daemon/config/dyncfg-internals.h b/src/daemon/config/dyncfg-internals.h new file mode 100644 index 000000000..181d2328f --- /dev/null +++ b/src/daemon/config/dyncfg-internals.h @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DYNCFG_INTERNALS_H +#define NETDATA_DYNCFG_INTERNALS_H + +#include "../common.h" +#include "database/rrd.h" +#include "database/rrdfunctions.h" +#include "database/rrdfunctions-internals.h" +#include "database/rrdcollector-internals.h" + +typedef struct dyncfg { + UUID host_uuid; + STRING *function; + STRING *template; + STRING *path; + DYNCFG_CMDS cmds; + DYNCFG_TYPE type; + + HTTP_ACCESS view_access; + HTTP_ACCESS edit_access; + + struct { + DYNCFG_STATUS status; + DYNCFG_SOURCE_TYPE source_type; + STRING *source; + usec_t created_ut; + usec_t modified_ut; + } current; + + struct { + uint32_t saves; + bool restart_required; + bool plugin_rejected; + bool user_disabled; + DYNCFG_STATUS status; + DYNCFG_SOURCE_TYPE source_type; + STRING *source; + BUFFER *payload; + usec_t created_ut; + usec_t modified_ut; + } dyncfg; + + bool sync; + rrd_function_execute_cb_t execute_cb; + void *execute_cb_data; +} DYNCFG; + +struct dyncfg_globals { + const char *dir; + DICTIONARY *nodes; +}; + +extern struct dyncfg_globals dyncfg_globals; + +void dyncfg_load_all(void); +void dyncfg_file_load(const char *filename); +void dyncfg_file_save(const char *id, DYNCFG *df); +void dyncfg_file_delete(const char *id); + +bool dyncfg_get_schema(const char *id, BUFFER *dst); + +void dyncfg_echo_cb(BUFFER *wb, int code, void *result_cb_data); +void dyncfg_echo(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id, DYNCFG_CMDS cmd); +void dyncfg_echo_update(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id); +void dyncfg_echo_add(const DICTIONARY_ITEM *item_template, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *template_id, const char *job_name); + +const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, + const char *source, DYNCFG_CMDS cmds, + usec_t created_ut, usec_t modified_ut, + bool sync, HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data, + bool overwrite_cb); + +int dyncfg_function_intercept_cb(struct rrd_function_execute *rfe, void *data); +void dyncfg_cleanup(DYNCFG *v); + +const DICTIONARY_ITEM *dyncfg_get_template_of_new_job(const char *job_id); + +bool dyncfg_is_user_disabled(const char *id); + +RRDHOST *dyncfg_rrdhost_by_uuid(UUID *uuid); +RRDHOST *dyncfg_rrdhost(DYNCFG *df); + +static inline void dyncfg_copy_dyncfg_source_to_current(DYNCFG *df) { + STRING *old = df->current.source; + df->current.source = string_dup(df->dyncfg.source); + string_freez(old); +} + +static inline void dyncfg_set_dyncfg_source_from_txt(DYNCFG *df, const char *source) { + STRING *old = df->dyncfg.source; + df->dyncfg.source = string_strdupz(source); + string_freez(old); +} + +static inline void dyncfg_set_current_from_dyncfg(DYNCFG *df) { + df->current.status = df->dyncfg.status; + df->current.source_type = df->dyncfg.source_type; + + dyncfg_copy_dyncfg_source_to_current(df); + + if(df->dyncfg.created_ut < df->current.created_ut) + df->current.created_ut = df->dyncfg.created_ut; + + if(df->dyncfg.modified_ut > df->current.modified_ut) + df->current.modified_ut = df->dyncfg.modified_ut; +} + +static inline void dyncfg_update_status_on_successful_add_or_update(DYNCFG *df, int code) { + df->dyncfg.plugin_rejected = false; + + if (code == DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED) + df->dyncfg.restart_required = true; + else + df->dyncfg.restart_required = false; + + dyncfg_set_current_from_dyncfg(df); +} + +static inline DYNCFG_STATUS dyncfg_status_from_successful_response(int code) { + DYNCFG_STATUS status = DYNCFG_STATUS_ACCEPTED; + + switch(code) { + default: + case DYNCFG_RESP_ACCEPTED: + case DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED: + status = DYNCFG_STATUS_ACCEPTED; + break; + + case DYNCFG_RESP_ACCEPTED_DISABLED: + status = DYNCFG_STATUS_DISABLED; + break; + + case DYNCFG_RESP_RUNNING: + status = DYNCFG_STATUS_RUNNING; + break; + + } + + return status; +} + +#endif //NETDATA_DYNCFG_INTERNALS_H diff --git a/src/daemon/config/dyncfg-tree.c b/src/daemon/config/dyncfg-tree.c new file mode 100644 index 000000000..6af384daa --- /dev/null +++ b/src/daemon/config/dyncfg-tree.c @@ -0,0 +1,284 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +static int dyncfg_tree_compar(const void *a, const void *b) { + const DICTIONARY_ITEM *item1 = *(const DICTIONARY_ITEM **)a; + const DICTIONARY_ITEM *item2 = *(const DICTIONARY_ITEM **)b; + + DYNCFG *df1 = dictionary_acquired_item_value(item1); + DYNCFG *df2 = dictionary_acquired_item_value(item2); + + int rc = string_cmp(df1->path, df2->path); + if(rc == 0) + rc = strcmp(dictionary_acquired_item_name(item1), dictionary_acquired_item_name(item2)); + + return rc; +} + +static void dyncfg_to_json(DYNCFG *df, const char *id, BUFFER *wb) { + buffer_json_member_add_object(wb, id); + { + buffer_json_member_add_string(wb, "type", dyncfg_id2type(df->type)); + + if(df->type == DYNCFG_TYPE_JOB) + buffer_json_member_add_string(wb, "template", string2str(df->template)); + + buffer_json_member_add_string(wb, "status", dyncfg_id2status(df->current.status)); + dyncfg_cmds2json_array(df->current.status == DYNCFG_STATUS_ORPHAN ? DYNCFG_CMD_REMOVE : df->cmds, "cmds", wb); + buffer_json_member_add_object(wb, "access"); + { + http_access2buffer_json_array(wb, "view", df->view_access); + http_access2buffer_json_array(wb, "edit", df->edit_access); + } + buffer_json_object_close(wb); + buffer_json_member_add_string(wb, "source_type", dyncfg_id2source_type(df->current.source_type)); + buffer_json_member_add_string(wb, "source", string2str(df->current.source)); + buffer_json_member_add_boolean(wb, "sync", df->sync); + buffer_json_member_add_boolean(wb, "user_disabled", df->dyncfg.user_disabled); + buffer_json_member_add_boolean(wb, "restart_required", df->dyncfg.restart_required); + buffer_json_member_add_boolean(wb, "plugin_rejected", df->dyncfg.plugin_rejected); + buffer_json_member_add_object(wb, "payload"); + { + if (df->dyncfg.payload && buffer_strlen(df->dyncfg.payload)) { + buffer_json_member_add_boolean(wb, "available", true); + buffer_json_member_add_string(wb, "status", dyncfg_id2status(df->dyncfg.status)); + buffer_json_member_add_string(wb, "source_type", dyncfg_id2source_type(df->dyncfg.source_type)); + buffer_json_member_add_string(wb, "source", string2str(df->dyncfg.source)); + buffer_json_member_add_uint64(wb, "created_ut", df->dyncfg.created_ut); + buffer_json_member_add_uint64(wb, "modified_ut", df->dyncfg.modified_ut); + buffer_json_member_add_string(wb, "content_type", content_type_id2string(df->dyncfg.payload->content_type)); + buffer_json_member_add_uint64(wb, "content_length", df->dyncfg.payload->len); + } else + buffer_json_member_add_boolean(wb, "available", false); + } + buffer_json_object_close(wb); // payload + buffer_json_member_add_uint64(wb, "saves", df->dyncfg.saves); + buffer_json_member_add_uint64(wb, "created_ut", df->current.created_ut); + buffer_json_member_add_uint64(wb, "modified_ut", df->current.modified_ut); + } + buffer_json_object_close(wb); +} + +static void dyncfg_tree_for_host(RRDHOST *host, BUFFER *wb, const char *path, const char *id) { + size_t entries = dictionary_entries(dyncfg_globals.nodes); + size_t used = 0; + const DICTIONARY_ITEM *items[entries]; + size_t restart_required = 0, plugin_rejected = 0, status_incomplete = 0, status_failed = 0; + + STRING *template = NULL; + if(id && *id) + template = string_strdupz(id); + + UUID host_uuid = uuid2UUID(host->host_uuid); + + size_t path_len = strlen(path); + DYNCFG *df; + dfe_start_read(dyncfg_globals.nodes, df) { + if(!UUIDeq(df->host_uuid, host_uuid)) + continue; + + if(strncmp(string2str(df->path), path, path_len) != 0) + continue; + + if(!rrd_function_available(host, string2str(df->function))) + df->current.status = DYNCFG_STATUS_ORPHAN; + + if((id && strcmp(id, df_dfe.name) != 0) && (template && df->template != template)) + continue; + + items[used++] = dictionary_acquired_item_dup(dyncfg_globals.nodes, df_dfe.item); + } + dfe_done(df); + + if(used > 1) + qsort(items, used, sizeof(const DICTIONARY_ITEM *), dyncfg_tree_compar); + + buffer_flush(wb); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + + buffer_json_member_add_uint64(wb, "version", 1); + + buffer_json_member_add_object(wb, "tree"); + { + STRING *last_path = NULL; + for (size_t i = 0; i < used; i++) { + df = dictionary_acquired_item_value(items[i]); + if (df->path != last_path) { + last_path = df->path; + + if (i) + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, string2str(last_path)); + } + + dyncfg_to_json(df, dictionary_acquired_item_name(items[i]), wb); + + if (df->dyncfg.plugin_rejected) + plugin_rejected++; + + if(df->current.status != DYNCFG_STATUS_ORPHAN) { + if (df->dyncfg.restart_required) + restart_required++; + + if (df->current.status == DYNCFG_STATUS_FAILED) + status_failed++; + + if (df->current.status == DYNCFG_STATUS_INCOMPLETE) + status_incomplete++; + } + } + + if (used) + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // tree + + buffer_json_member_add_object(wb, "attention"); + { + buffer_json_member_add_boolean(wb, "degraded", restart_required + plugin_rejected + status_failed + status_incomplete > 0); + buffer_json_member_add_uint64(wb, "restart_required", restart_required); + buffer_json_member_add_uint64(wb, "plugin_rejected", plugin_rejected); + buffer_json_member_add_uint64(wb, "status_failed", status_failed); + buffer_json_member_add_uint64(wb, "status_incomplete", status_incomplete); + } + buffer_json_object_close(wb); // attention + + buffer_json_agents_v2(wb, NULL, 0, false, false); + + buffer_json_finalize(wb); + + for(size_t i = 0; i < used ;i++) + dictionary_acquired_item_release(dyncfg_globals.nodes, items[i]); +} + +static int dyncfg_config_execute_cb(struct rrd_function_execute *rfe, void *data) { + RRDHOST *host = data; + int code; + + char buf[strlen(rfe->function) + 1]; + memcpy(buf, rfe->function, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + const char *config = get_word(words, num_words, 0); + const char *action = get_word(words, num_words, 1); + const char *path = get_word(words, num_words, 2); + const char *id = get_word(words, num_words, 3); + + if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) { + char *msg = "invalid function call, expected: config"; + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg); + code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!action || !*action) { + char *msg = "invalid function call, expected: config tree"; + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg); + code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(strcmp(action, "tree") == 0) { + if(!path || !*path) + path = "/"; + + if(!id || !*id) + id = NULL; + else if(!dyncfg_is_valid_id(id)) { + char *msg = "invalid id given"; + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg); + code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + code = HTTP_RESP_OK; + dyncfg_tree_for_host(host, rfe->result.wb, path, id); + } + else { + id = action; + action = path; + path = NULL; + + DYNCFG_CMDS cmd = dyncfg_cmds2id(action); + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(!item) + item = dyncfg_get_template_of_new_job(id); + + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + + if(!rrd_function_available(host, string2str(df->function))) + df->current.status = DYNCFG_STATUS_ORPHAN; + + if(cmd == DYNCFG_CMD_REMOVE) { + bool delete = (df->current.status == DYNCFG_STATUS_ORPHAN); + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + item = NULL; + + if(delete) { + if(!http_access_user_has_enough_access_level_for_endpoint(rfe->user_access, df->edit_access)) { + code = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_FORBIDDEN, + "dyncfg: you don't have enough edit permissions to execute this command"); + goto cleanup; + } + + dictionary_del(dyncfg_globals.nodes, id); + dyncfg_file_delete(id); + code = dyncfg_default_response(rfe->result.wb, 200, ""); + goto cleanup; + } + } + else if(cmd == DYNCFG_CMD_TEST && df->type == DYNCFG_TYPE_TEMPLATE && df->current.status != DYNCFG_STATUS_ORPHAN) { + const char *old_rfe_function = rfe->function; + char buf2[2048]; + snprintfz(buf2, sizeof(buf2), "config %s %s", dictionary_acquired_item_name(item), action); + rfe->function = buf2; + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + item = NULL; + code = dyncfg_function_intercept_cb(rfe, data); + rfe->function = old_rfe_function; + return code; + } + + if(item) + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } + + code = HTTP_RESP_NOT_FOUND; + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: unknown config id '%s' in call: '%s'. " + "This can happen if the plugin that registered the dynamic configuration is not running now.", + id, rfe->function); + + rrd_call_function_error( + rfe->result.wb, + "unknown config id given", code); + } + +cleanup: + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, code, rfe->result.data); + + return code; +} + +// ---------------------------------------------------------------------------- +// this adds a 'config' function to all leaf nodes (localhost and virtual nodes) +// which is used to serve the tree and act as a catch-all for all config calls +// for which there is no id overloaded. + +void dyncfg_host_init(RRDHOST *host) { + // IMPORTANT: + // This function needs to be async, although it is internal. + // The reason is that it can call by itself another function that may or may not be internal (sync). + + rrd_function_add(host, NULL, PLUGINSD_FUNCTION_CONFIG, 120, + 1000, "Dynamic configuration", "config", HTTP_ACCESS_ANONYMOUS_DATA, + false, dyncfg_config_execute_cb, host); +} diff --git a/src/daemon/config/dyncfg-unittest.c b/src/daemon/config/dyncfg-unittest.c new file mode 100644 index 000000000..14a52f697 --- /dev/null +++ b/src/daemon/config/dyncfg-unittest.c @@ -0,0 +1,801 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +// ---------------------------------------------------------------------------- +// unit test + +#define LINE_FILE_STR TOSTRING(__LINE__) "@" __FILE__ + +struct dyncfg_unittest { + bool enabled; + size_t errors; + + DICTIONARY *nodes; + + SPINLOCK spinlock; + struct dyncfg_unittest_action *queue; +} dyncfg_unittest_data = { 0 }; + +typedef struct { + bool enabled; + bool removed; + struct { + double dbl; + bool bln; + } value; +} TEST_CFG; + +typedef struct { + const char *id; + const char *source; + bool sync; + DYNCFG_TYPE type; + DYNCFG_CMDS cmds; + DYNCFG_SOURCE_TYPE source_type; + + TEST_CFG current; + TEST_CFG expected; + + bool received; + bool finished; + + size_t last_saves; + bool needs_save; +} TEST; + +struct dyncfg_unittest_action { + TEST *t; + BUFFER *result; + BUFFER *payload; + DYNCFG_CMDS cmd; + const char *add_name; + const char *source; + + rrd_function_result_callback_t result_cb; + void *result_cb_data; + + struct dyncfg_unittest_action *prev, *next; +}; + +static void dyncfg_unittest_register_error(const char *id, const char *msg) { + if(msg) + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: error on id '%s': %s", id ? id : "", msg); + + __atomic_add_fetch(&dyncfg_unittest_data.errors, 1, __ATOMIC_RELAXED); +} + +static int dyncfg_unittest_execute_cb(struct rrd_function_execute *rfe, void *data); + +bool dyncfg_unittest_parse_payload(BUFFER *payload, TEST *t, DYNCFG_CMDS cmd, const char *add_name, const char *source) { + CLEAN_JSON_OBJECT *jobj = json_tokener_parse(buffer_tostring(payload)); + if(!jobj) { + dyncfg_unittest_register_error(t->id, "cannot parse json payload"); + return false; + } + + struct json_object *json_double; + struct json_object *json_boolean; + + json_object_object_get_ex(jobj, "double", &json_double); + double value_double = json_object_get_double(json_double); + + json_object_object_get_ex(jobj, "boolean", &json_boolean); + int value_boolean = json_object_get_boolean(json_boolean); + + if(cmd == DYNCFG_CMD_UPDATE) { + t->current.value.dbl = value_double; + t->current.value.bln = value_boolean; + } + else if(cmd == DYNCFG_CMD_ADD) { + char buf[strlen(t->id) + strlen(add_name) + 20]; + snprintfz(buf, sizeof(buf), "%s:%s", t->id, add_name); + TEST tmp = { + .id = strdupz(buf), + .source = strdupz(source), + .cmds = (t->cmds & ~DYNCFG_CMD_ADD) | DYNCFG_CMD_GET | DYNCFG_CMD_REMOVE | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_TEST, + .sync = t->sync, + .type = DYNCFG_TYPE_JOB, + .source_type = DYNCFG_SOURCE_TYPE_DYNCFG, + .received = true, + .finished = true, + .current = + {.enabled = true, + .removed = false, + .value = + { + .dbl = value_double, + .bln = value_boolean, + }}, + .expected = { + .enabled = true, + .removed = false, + .value = { + .dbl = 3.14, + .bln = true, + } + }, + .needs_save = true, + }; + const DICTIONARY_ITEM *item = dictionary_set_and_acquire_item(dyncfg_unittest_data.nodes, buf, &tmp, sizeof(tmp)); + TEST *t2 = dictionary_acquired_item_value(item); + dictionary_acquired_item_release(dyncfg_unittest_data.nodes, item); + + dyncfg_add_low_level(localhost, t2->id, "/unittests", + DYNCFG_STATUS_RUNNING, t2->type, t2->source_type, t2->source, + t2->cmds, 0, 0, t2->sync, + HTTP_ACCESS_NONE, HTTP_ACCESS_NONE, + dyncfg_unittest_execute_cb, t2); + } + else { + dyncfg_unittest_register_error(t->id, "invalid command received to parse payload"); + return false; + } + + return true; +} + +static int dyncfg_unittest_action(struct dyncfg_unittest_action *a) { + TEST *t = a->t; + + int rc = HTTP_RESP_OK; + + if(a->cmd == DYNCFG_CMD_ENABLE) + t->current.enabled = true; + else if(a->cmd == DYNCFG_CMD_DISABLE) + t->current.enabled = false; + else if(a->cmd == DYNCFG_CMD_ADD || a->cmd == DYNCFG_CMD_UPDATE) + rc = dyncfg_unittest_parse_payload(a->payload, a->t, a->cmd, a->add_name, a->source) ? HTTP_RESP_OK : HTTP_RESP_BAD_REQUEST; + else if(a->cmd == DYNCFG_CMD_REMOVE) + t->current.removed = true; + else + rc = HTTP_RESP_BAD_REQUEST; + + dyncfg_default_response(a->result, rc, NULL); + + a->result_cb(a->result, rc, a->result_cb_data); + + buffer_free(a->payload); + freez((void *)a->add_name); + freez(a); + + __atomic_store_n(&t->finished, true, __ATOMIC_RELAXED); + + return rc; +} + +static void *dyncfg_unittest_thread_action(void *ptr __maybe_unused) { + while(1) { + struct dyncfg_unittest_action *a = NULL; + spinlock_lock(&dyncfg_unittest_data.spinlock); + a = dyncfg_unittest_data.queue; + if(a) + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(dyncfg_unittest_data.queue, a, prev, next); + spinlock_unlock(&dyncfg_unittest_data.spinlock); + + if(a) + dyncfg_unittest_action(a); + else + sleep_usec(10 * USEC_PER_MS); + } +} + +static int dyncfg_unittest_execute_cb(struct rrd_function_execute *rfe, void *data) { + + int rc; + bool run_the_callback = true; + TEST *t = data; + + t->received = true; + + char buf[strlen(rfe->function) + 1]; + memcpy(buf, rfe->function, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + const char *config = get_word(words, num_words, 0); + const char *id = get_word(words, num_words, 1); + const char *action = get_word(words, num_words, 2); + const char *add_name = get_word(words, num_words, 3); + + if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) { + char *msg = "did not receive a config call"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!id || !*id) { + char *msg = "did not receive an id"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(t->type != DYNCFG_TYPE_TEMPLATE && strcmp(t->id, id) != 0) { + char *msg = "id received is not the expected"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!action || !*action) { + char *msg = "did not receive an action"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + DYNCFG_CMDS cmd = dyncfg_cmds2id(action); + if(cmd == DYNCFG_CMD_NONE) { + char *msg = "action received is not known"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!(t->cmds & cmd)) { + char *msg = "received a command that is not supported"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(t->current.removed && cmd != DYNCFG_CMD_ADD) { + char *msg = "received a command for a removed entry"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + struct dyncfg_unittest_action *a = callocz(1, sizeof(*a)); + a->t = t; + a->add_name = add_name ? strdupz(add_name) : NULL; + a->source = rfe->source, + a->result = rfe->result.wb; + a->payload = buffer_dup(rfe->payload); + a->cmd = cmd; + a->result_cb = rfe->result.cb; + a->result_cb_data = rfe->result.data; + + run_the_callback = false; + + if(t->sync) + rc = dyncfg_unittest_action(a); + else { + spinlock_lock(&dyncfg_unittest_data.spinlock); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(dyncfg_unittest_data.queue, a, prev, next); + spinlock_unlock(&dyncfg_unittest_data.spinlock); + rc = HTTP_RESP_OK; + } + +cleanup: + if(run_the_callback) { + __atomic_store_n(&t->finished, true, __ATOMIC_RELAXED); + + if (rfe->result.cb) + rfe->result.cb(rfe->result.wb, rc, rfe->result.data); + } + + return rc; +} + +static bool dyncfg_unittest_check(TEST *t, DYNCFG_CMDS c, const char *cmd, bool received) { + size_t errors = 0; + + fprintf(stderr, "CHECK '%s' after cmd '%s'...", t->id, cmd); + + if(t->received != received) { + fprintf(stderr, "\n - received flag found '%s', expected '%s'", + t->received?"true":"false", + received?"true":"false"); + errors++; + goto cleanup; + } + + if(!received) + goto cleanup; + + usec_t give_up_ut = now_monotonic_usec() + 2 * USEC_PER_SEC; + while(!__atomic_load_n(&t->finished, __ATOMIC_RELAXED)) { + static const struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 }; + nanosleep(&ns, NULL); + + if(now_monotonic_usec() > give_up_ut) { + fprintf(stderr, "\n - gave up waiting for the plugin to process this!"); + errors++; + goto cleanup; + } + } + + if(t->type != DYNCFG_TYPE_TEMPLATE && t->current.enabled != t->expected.enabled) { + fprintf(stderr, "\n - enabled flag found '%s', expected '%s'", + t->current.enabled?"true":"false", + t->expected.enabled?"true":"false"); + errors++; + } + if(t->current.removed != t->expected.removed) { + fprintf(stderr, "\n - removed flag found '%s', expected '%s'", + t->current.removed?"true":"false", + t->expected.removed?"true":"false"); + errors++; + } + if(t->current.value.bln != t->expected.value.bln) { + fprintf(stderr, "\n - boolean value found '%s', expected '%s'", + t->current.value.bln?"true":"false", + t->expected.value.bln?"true":"false"); + errors++; + } + if(t->current.value.dbl != t->expected.value.dbl) { + fprintf(stderr, "\n - double value found '%f', expected '%f'", + t->current.value.dbl, t->expected.value.dbl); + errors++; + } + + DYNCFG *df = dictionary_get(dyncfg_globals.nodes, t->id); + if(!df) { + fprintf(stderr, "\n - not found in DYNCFG nodes dictionary!"); + errors++; + } + else if(df->cmds != t->cmds) { + fprintf(stderr, "\n - has different cmds in DYNCFG nodes dictionary; found: "); + dyncfg_cmds2fp(df->cmds, stderr); + fprintf(stderr, ", expected: "); + dyncfg_cmds2fp(t->cmds, stderr); + fprintf(stderr, "\n"); + errors++; + } + else if(df->type == DYNCFG_TYPE_JOB && df->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && !df->dyncfg.saves) { + fprintf(stderr, "\n - DYNCFG job has no saves!"); + errors++; + } + else if(df->type == DYNCFG_TYPE_JOB && df->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && (!df->dyncfg.payload || !buffer_strlen(df->dyncfg.payload))) { + fprintf(stderr, "\n - DYNCFG job has no payload!"); + errors++; + } + else if(df->dyncfg.user_disabled && !df->dyncfg.saves) { + fprintf(stderr, "\n - DYNCFG disabled config has no saves!"); + errors++; + } + else if((c & (DYNCFG_CMD_ADD | DYNCFG_CMD_UPDATE)) && t->source && string_strcmp(df->current.source, t->source) != 0) { + fprintf(stderr, "\n - source does not match!"); + errors++; + } + else if((c & (DYNCFG_CMD_ADD | DYNCFG_CMD_UPDATE)) && df->current.source && !t->source) { + fprintf(stderr, "\n - there is a source but it shouldn't be any!"); + errors++; + } + else if(t->needs_save && df->dyncfg.saves <= t->last_saves) { + fprintf(stderr, "\n - should be saved, but it is not saved!"); + errors++; + } + else if(!t->needs_save && df->dyncfg.saves > t->last_saves) { + fprintf(stderr, "\n - should be not be saved, but it saved!"); + errors++; + } + +cleanup: + if(errors) { + fprintf(stderr, "\n >>> FAILED\n\n"); + dyncfg_unittest_register_error(NULL, NULL); + return false; + } + + fprintf(stderr, " OK\n"); + return true; +} + +static void dyncfg_unittest_reset(void) { + TEST *t; + dfe_start_read(dyncfg_unittest_data.nodes, t) { + t->received = t->finished = false; + t->needs_save = false; + + DYNCFG *df = dictionary_get(dyncfg_globals.nodes, t->id); + if(!df) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: cannot find id '%s'", t->id); + dyncfg_unittest_register_error(NULL, NULL); + } + else + t->last_saves = df->dyncfg.saves; + } + dfe_done(t); +} + +void should_be_saved(TEST *t, DYNCFG_CMDS c) { + DYNCFG *df; + + if(t->type == DYNCFG_TYPE_TEMPLATE) { + df = dictionary_get(dyncfg_globals.nodes, t->id); + t->current.enabled = !df->dyncfg.user_disabled; + } + + t->needs_save = + c == DYNCFG_CMD_UPDATE || + (t->current.enabled && c == DYNCFG_CMD_DISABLE) || + (!t->current.enabled && c == DYNCFG_CMD_ENABLE); +} + +static int dyncfg_unittest_run(const char *cmd, BUFFER *wb, const char *payload, const char *source) { + dyncfg_unittest_reset(); + + char buf[strlen(cmd) + 1]; + memcpy(buf, cmd, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + // const char *config = get_word(words, num_words, 0); + const char *id = get_word(words, num_words, 1); + char *action = get_word(words, num_words, 2); + const char *add_name = get_word(words, num_words, 3); + + DYNCFG_CMDS c = dyncfg_cmds2id(action); + + TEST *t = dictionary_get(dyncfg_unittest_data.nodes, id); + if(!t) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: cannot find id '%s' from cmd: %s", id, cmd); + dyncfg_unittest_register_error(NULL, NULL); + return HTTP_RESP_NOT_FOUND; + } + + if(t->type == DYNCFG_TYPE_TEMPLATE) + t->received = t->finished = true; + + if(c == DYNCFG_CMD_DISABLE) + t->expected.enabled = false; + if(c == DYNCFG_CMD_ENABLE) + t->expected.enabled = true; + if(c == DYNCFG_CMD_UPDATE) + memset(&t->current.value, 0, sizeof(t->current.value)); + + if(c & (DYNCFG_CMD_UPDATE) || (c & (DYNCFG_CMD_DISABLE|DYNCFG_CMD_ENABLE) && t->type != DYNCFG_TYPE_TEMPLATE)) { + freez((void *)t->source); + t->source = strdupz(source); + } + + buffer_flush(wb); + + CLEAN_BUFFER *pld = NULL; + + if(payload) { + pld = buffer_create(1024, NULL); + buffer_strcat(pld, payload); + } + + should_be_saved(t, c); + + int rc = rrd_function_run(localhost, wb, 10, HTTP_ACCESS_ALL, cmd, + true, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + pld, source); + if(!DYNCFG_RESP_SUCCESS(rc)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: failed to run: %s; returned code %d", cmd, rc); + dyncfg_unittest_register_error(NULL, NULL); + } + + dyncfg_unittest_check(t, c, cmd, true); + + if(rc == HTTP_RESP_OK && t->type == DYNCFG_TYPE_TEMPLATE) { + if(c == DYNCFG_CMD_ADD) { + char buf2[strlen(id) + strlen(add_name) + 2]; + snprintfz(buf2, sizeof(buf2), "%s:%s", id, add_name); + TEST *tt = dictionary_get(dyncfg_unittest_data.nodes, buf2); + if (!tt) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG UNITTEST: failed to find newly added id '%s' of command: %s", + id, cmd); + dyncfg_unittest_register_error(NULL, NULL); + } + dyncfg_unittest_check(tt, c, cmd, true); + } + else { + STRING *template = string_strdupz(t->id); + DYNCFG *df; + dfe_start_read(dyncfg_globals.nodes, df) { + if(df->type == DYNCFG_TYPE_JOB && df->template == template) { + TEST *tt = dictionary_get(dyncfg_unittest_data.nodes, df_dfe.name); + if (!tt) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG UNITTEST: failed to find id '%s' while running command: %s", df_dfe.name, cmd); + dyncfg_unittest_register_error(NULL, NULL); + } + else { + if(c == DYNCFG_CMD_DISABLE) + tt->expected.enabled = false; + if(c == DYNCFG_CMD_ENABLE) + tt->expected.enabled = true; + dyncfg_unittest_check(tt, c, cmd, true); + } + } + } + dfe_done(df); + string_freez(template); + } + } + + return rc; +} + +static void dyncfg_unittest_cleanup_files(void) { + char path[FILENAME_MAX]; + snprintfz(path, sizeof(path) - 1, "%s/%s", netdata_configured_varlib_dir, "config"); + + DIR *dir = opendir(path); + if (!dir) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: cannot open directory '%s'", path); + return; + } + + struct dirent *entry; + char filename[FILENAME_MAX + sizeof(entry->d_name)]; + while ((entry = readdir(dir)) != NULL) { + if ((entry->d_type == DT_REG || entry->d_type == DT_LNK) && strstartswith(entry->d_name, "unittest:") && strendswith(entry->d_name, ".dyncfg")) { + snprintf(filename, sizeof(filename), "%s/%s", path, entry->d_name); + nd_log(NDLS_DAEMON, NDLP_INFO, "DYNCFG UNITTEST: deleting file '%s'", filename); + unlink(filename); + } + } + + closedir(dir); +} + +static TEST *dyncfg_unittest_add(TEST t) { + dyncfg_unittest_reset(); + + TEST *ret = dictionary_set(dyncfg_unittest_data.nodes, t.id, &t, sizeof(t)); + + if(!dyncfg_add_low_level(localhost, t.id, "/unittests", DYNCFG_STATUS_RUNNING, t.type, + t.source_type, t.source, + t.cmds, 0, 0, t.sync, + HTTP_ACCESS_NONE, HTTP_ACCESS_NONE, + dyncfg_unittest_execute_cb, ret)) { + dyncfg_unittest_register_error(t.id, "addition of job failed"); + } + + dyncfg_unittest_check(ret, DYNCFG_CMD_NONE, "plugin create", t.type != DYNCFG_TYPE_TEMPLATE); + + return ret; +} + +void dyncfg_unittest_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + TEST *v = value; + freez((void *)v->id); + freez((void *)v->source); +} + +int dyncfg_unittest(void) { + dyncfg_unittest_data.nodes = dictionary_create(DICT_OPTION_NONE); + dictionary_register_delete_callback(dyncfg_unittest_data.nodes, dyncfg_unittest_delete_cb, NULL); + + dyncfg_unittest_cleanup_files(); + rrd_functions_inflight_init(); + dyncfg_init(false); + + // ------------------------------------------------------------------------ + // create the thread for testing async communication + + netdata_thread_t thread; + netdata_thread_create(&thread, "unittest", NETDATA_THREAD_OPTION_JOINABLE, + dyncfg_unittest_thread_action, NULL); + + // ------------------------------------------------------------------------ + // single + + TEST *single1 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:single1"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_SINGLE, + .cmds = DYNCFG_CMD_GET | DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = true, + .current = { + .enabled = true, + }, + .expected = { + .enabled = true, + } + }); (void)single1; + + TEST *single2 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:single2"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_SINGLE, + .cmds = DYNCFG_CMD_GET | DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = false, + .current = { + .enabled = true, + }, + .expected = { + .enabled = true, + } + }); (void)single2; + + // ------------------------------------------------------------------------ + // template + + TEST *template1 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_TEMPLATE, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_ADD | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = true, + }); (void)template1; + + TEST *template2 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_TEMPLATE, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_ADD | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = false, + }); (void)template2; + + // ------------------------------------------------------------------------ + // job + + TEST *user1 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1:user1"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = true, + .current = { + .enabled = true, + }, + .expected = { + .enabled = true, + } + }); (void)user1; + + TEST *user2 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2:user2"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = false, + .expected = { + .enabled = true, + } + }); (void)user2; + + // ------------------------------------------------------------------------ + + int rc; (void)rc; + BUFFER *wb = buffer_create(0, NULL); + + // ------------------------------------------------------------------------ + // dynamic job + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 add dyn1", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 add dyn2", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 add dyn3", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 add dyn4", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // saving of user_disabled + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:single1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:single2 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:user1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:user2 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn2 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn3 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn4 disable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // enabling + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:single1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:single2 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:user1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:user2 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn2 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn3 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn4 enable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // disabling template + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 disable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // enabling template + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 enable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // adding job on disabled template + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 disable", wb, NULL, LINE_FILE_STR); + + TEST *user3 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1:user3"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = true, + .expected = { + .enabled = false, + } + }); (void)user3; + + TEST *user4 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2:user4"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = false, + .expected = { + .enabled = false, + } + }); (void)user4; + + TEST *user5 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1:user5"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = true, + .expected = { + .enabled = false, + } + }); (void)user5; + + TEST *user6 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2:user6"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = false, + .expected = { + .enabled = false, + } + }); (void)user6; + +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:user5 disable", wb, NULL, LINE_FILE_STR); +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:user6 disable", wb, NULL, LINE_FILE_STR); + +// // ------------------------------------------------------------------------ +// // enable template with disabled jobs +// +// user3->expected.enabled = true; +// user5->expected.enabled = false; +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 enable", wb, NULL, LINE_FILE_STR); +// +// user4->expected.enabled = true; +// user6->expected.enabled = false; +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 enable", wb, NULL, LINE_FILE_STR); + + +// // ------------------------------------------------------------------------ +// +// rc = dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " tree", wb, NULL); +// if(rc == HTTP_RESP_OK) +// fprintf(stderr, "%s\n", buffer_tostring(wb)); + + void *ptr; + netdata_thread_cancel(thread); + netdata_thread_join(thread, &ptr); + dyncfg_unittest_cleanup_files(); + dictionary_destroy(dyncfg_unittest_data.nodes); + buffer_free(wb); + return __atomic_load_n(&dyncfg_unittest_data.errors, __ATOMIC_RELAXED) > 0 ? 1 : 0; +} diff --git a/src/daemon/config/dyncfg.c b/src/daemon/config/dyncfg.c new file mode 100644 index 000000000..2a5696b2a --- /dev/null +++ b/src/daemon/config/dyncfg.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +struct dyncfg_globals dyncfg_globals = { 0 }; + +RRDHOST *dyncfg_rrdhost_by_uuid(UUID *uuid) { + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(uuid->uuid, uuid_str); + + RRDHOST *host = rrdhost_find_by_guid(uuid_str); + if(!host) + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host with UUID '%s'", uuid_str); + + return host; +} + +RRDHOST *dyncfg_rrdhost(DYNCFG *df) { + return dyncfg_rrdhost_by_uuid(&df->host_uuid); +} + +void dyncfg_cleanup(DYNCFG *v) { + string_freez(v->dyncfg.source); + v->dyncfg.source = NULL; + + buffer_free(v->dyncfg.payload); + v->dyncfg.payload = NULL; + + string_freez(v->path); + v->path = NULL; + + string_freez(v->current.source); + v->current.source = NULL; + + string_freez(v->function); + v->function = NULL; + + string_freez(v->template); + v->template = NULL; +} + +static void dyncfg_normalize(DYNCFG *df) { + usec_t now_ut = now_realtime_usec(); + + if(!df->current.created_ut) + df->current.created_ut = now_ut; + + if(!df->current.modified_ut) + df->current.modified_ut = now_ut; +} + +static void dyncfg_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + DYNCFG *df = value; + dyncfg_cleanup(df); +} + +static void dyncfg_insert_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) { + DYNCFG *df = value; + dyncfg_normalize(df); + + const char *id = dictionary_acquired_item_name(item); + char buf[strlen(id) + 20]; + snprintfz(buf, sizeof(buf), PLUGINSD_FUNCTION_CONFIG " %s", id); + df->function = string_strdupz(buf); + + if(df->type == DYNCFG_TYPE_JOB && !df->template) { + const char *last_colon = strrchr(id, ':'); + if(last_colon) + df->template = string_strndupz(id, last_colon - id); + else + nd_log(NDLS_DAEMON, NDLP_WARNING, + "DYNCFG: id '%s' is a job, but does not contain a colon to find the template", id); + } +} + +static void dyncfg_react_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + DYNCFG *df = value; (void)df; + ; +} + +static bool dyncfg_conflict_cb(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data) { + bool *overwrite_cb_ptr = data; + bool overwrite_cb = (overwrite_cb_ptr && *overwrite_cb_ptr); + + DYNCFG *v = old_value; + DYNCFG *nv = new_value; + + size_t changes = 0; + + dyncfg_normalize(nv); + + if(!UUIDeq(v->host_uuid, nv->host_uuid)) { + SWAP(v->host_uuid, nv->host_uuid); + changes++; + } + + if(v->path != nv->path) { + SWAP(v->path, nv->path); + changes++; + } + + if(v->cmds != nv->cmds) { + SWAP(v->cmds, nv->cmds); + changes++; + } + + if(v->type != nv->type) { + SWAP(v->type, nv->type); + changes++; + } + + if(v->view_access != nv->view_access) { + SWAP(v->view_access, nv->view_access); + changes++; + } + + if(v->edit_access != nv->edit_access) { + SWAP(v->edit_access, nv->edit_access); + changes++; + } + + if(v->current.status != nv->current.status) { + SWAP(v->current.status, nv->current.status); + changes++; + } + + if (v->current.source_type != nv->current.source_type) { + SWAP(v->current.source_type, nv->current.source_type); + changes++; + } + + if (v->current.source != nv->current.source) { + SWAP(v->current.source, nv->current.source); + changes++; + } + + if(nv->current.created_ut < v->current.created_ut) { + SWAP(v->current.created_ut, nv->current.created_ut); + changes++; + } + + if(nv->current.modified_ut > v->current.modified_ut) { + SWAP(v->current.modified_ut, nv->current.modified_ut); + changes++; + } + + if(!v->execute_cb || (overwrite_cb && nv->execute_cb && (v->execute_cb != nv->execute_cb || v->execute_cb_data != nv->execute_cb_data))) { + v->sync = nv->sync, + v->execute_cb = nv->execute_cb; + v->execute_cb_data = nv->execute_cb_data; + changes++; + } + + dyncfg_cleanup(nv); + + return changes > 0; +} + +// ---------------------------------------------------------------------------- + +void dyncfg_init_low_level(bool load_saved) { + if(!dyncfg_globals.nodes) { + dyncfg_globals.nodes = dictionary_create_advanced(DICT_OPTION_FIXED_SIZE | DICT_OPTION_DONT_OVERWRITE_VALUE, NULL, sizeof(DYNCFG)); + dictionary_register_insert_callback(dyncfg_globals.nodes, dyncfg_insert_cb, NULL); + dictionary_register_react_callback(dyncfg_globals.nodes, dyncfg_react_cb, NULL); + dictionary_register_conflict_callback(dyncfg_globals.nodes, dyncfg_conflict_cb, NULL); + dictionary_register_delete_callback(dyncfg_globals.nodes, dyncfg_delete_cb, NULL); + + char path[PATH_MAX]; + snprintfz(path, sizeof(path), "%s/%s", netdata_configured_varlib_dir, "config"); + + if(mkdir(path, 0755) == -1) { + if(errno != EEXIST) + nd_log(NDLS_DAEMON, NDLP_CRIT, "DYNCFG: failed to create dynamic configuration directory '%s'", path); + } + + dyncfg_globals.dir = strdupz(path); + + if(load_saved) + dyncfg_load_all(); + } +} + +// ---------------------------------------------------------------------------- + +const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, + const char *source, DYNCFG_CMDS cmds, + usec_t created_ut, usec_t modified_ut, + bool sync, HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data, + bool overwrite_cb) { + DYNCFG tmp = { + .host_uuid = uuid2UUID(host->host_uuid), + .path = string_strdupz(path), + .cmds = cmds, + .type = type, + .view_access = view_access, + .edit_access = edit_access, + .current = { + .status = status, + .source_type = source_type, + .source = string_strdupz(source), + .created_ut = created_ut, + .modified_ut = modified_ut, + }, + .sync = sync, + .dyncfg = { 0 }, + .execute_cb = execute_cb, + .execute_cb_data = execute_cb_data, + }; + + return dictionary_set_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1, &tmp, sizeof(tmp), &overwrite_cb); +} + +static void dyncfg_send_updates(const char *id) { + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1); + if(!item) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: asked to update plugin for configuration '%s', but it is not found.", id); + return; + } + + DYNCFG *df = dictionary_acquired_item_value(item); + + if(df->type == DYNCFG_TYPE_SINGLE || df->type == DYNCFG_TYPE_JOB) { + if (df->cmds & DYNCFG_CMD_UPDATE && df->dyncfg.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->dyncfg.payload && buffer_strlen(df->dyncfg.payload)) + dyncfg_echo_update(item, df, id); + } + else if(df->type == DYNCFG_TYPE_TEMPLATE && (df->cmds & DYNCFG_CMD_ADD)) { + STRING *template = string_strdupz(id); + + size_t len = strlen(id); + DYNCFG *df_job; + dfe_start_reentrant(dyncfg_globals.nodes, df_job) { + const char *id_template = df_job_dfe.name; + if(df_job->type == DYNCFG_TYPE_JOB && // it is a job + df_job->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && // it is dynamically configured + df_job->template == template && // it has the same template name + strncmp(id_template, id, len) == 0 && // the template name matches (redundant) + id_template[len] == ':' && // immediately after the template there is ':' + id_template[len + 1]) { // and there is something else after the ':' + dyncfg_echo_add(item, df_job_dfe.item, df, df_job, id, &id_template[len + 1]); + } + } + dfe_done(df_job); + + string_freez(template); + } + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); +} + +bool dyncfg_is_user_disabled(const char *id) { + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(!item) + return false; + + DYNCFG *df = dictionary_acquired_item_value(item); + bool ret = df->dyncfg.user_disabled; + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return ret; +} + +bool dyncfg_job_has_registered_template(const char *id) { + char buf[strlen(id) + 1]; + memcpy(buf, id, sizeof(buf)); + char *colon = strrchr(buf, ':'); + if(!colon) + return false; + + *colon = '\0'; + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, buf); + if(!item) + return false; + + DYNCFG *df = dictionary_acquired_item_value(item); + bool ret = df->type == DYNCFG_TYPE_TEMPLATE; + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return ret; +} + +bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, + DYNCFG_CMDS cmds, usec_t created_ut, usec_t modified_ut, bool sync, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data) { + + if(view_access == HTTP_ACCESS_NONE) + view_access = HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_VIEW_AGENT_CONFIG; + + if(edit_access == HTTP_ACCESS_NONE) + edit_access = HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_EDIT_AGENT_CONFIG | HTTP_ACCESS_COMMERCIAL_SPACE; + + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return false; + } + + if(type == DYNCFG_TYPE_JOB && !dyncfg_job_has_registered_template(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: job id '%s' does not have a registered template. Ignoring dynamic configuration for it.", id); + return false; + } + + DYNCFG_CMDS old_cmds = cmds; + + // all configurations support schema + cmds |= DYNCFG_CMD_SCHEMA; + + // if there is either enable or disable, both are supported + if(cmds & (DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE)) + cmds |= DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE; + + // add + if(type == DYNCFG_TYPE_TEMPLATE) { + // templates must always support "add" + cmds |= DYNCFG_CMD_ADD; + } + else { + // only templates can have "add" + cmds &= ~DYNCFG_CMD_ADD; + } + + // remove + if(source_type != DYNCFG_SOURCE_TYPE_DYNCFG || type != DYNCFG_TYPE_JOB) { + // remove is only available for dyncfg jobs + cmds &= ~DYNCFG_CMD_REMOVE; + } + + // data + if(type == DYNCFG_TYPE_TEMPLATE) { + // templates do not have data + cmds &= ~(DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE); + } + + if(cmds != old_cmds) { + CLEAN_BUFFER *t = buffer_create(1024, NULL); + buffer_sprintf(t, "DYNCFG: id '%s' was declared with cmds: ", id); + dyncfg_cmds2buffer(old_cmds, t); + buffer_strcat(t, ", but they have sanitized to: "); + dyncfg_cmds2buffer(cmds, t); + nd_log(NDLS_DAEMON, NDLP_NOTICE, "%s", buffer_tostring(t)); + } + + const DICTIONARY_ITEM *item = dyncfg_add_internal(host, id, path, status, type, source_type, source, cmds, + created_ut, modified_ut, sync, view_access, edit_access, + execute_cb, execute_cb_data, true); + DYNCFG *df = dictionary_acquired_item_value(item); + +// if(df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && !df->saves) +// nd_log(NDLS_DAEMON, NDLP_WARNING, "DYNCFG: configuration '%s' is created with source type dyncfg, but we don't have a saved configuration for it", id); + + rrd_collector_started(); + rrd_function_add( + host, + NULL, + string2str(df->function), + 120, + 1000, + "Dynamic configuration", + "config", + (view_access & edit_access), + sync, + dyncfg_function_intercept_cb, + NULL); + + if(df->type != DYNCFG_TYPE_TEMPLATE && (df->cmds & (DYNCFG_CMD_ENABLE|DYNCFG_CMD_DISABLE))) { + DYNCFG_CMDS status_to_send_to_plugin = + (df->dyncfg.user_disabled || df->current.status == DYNCFG_STATUS_DISABLED) ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE; + + if (status_to_send_to_plugin == DYNCFG_CMD_ENABLE && dyncfg_is_user_disabled(string2str(df->template))) + status_to_send_to_plugin = DYNCFG_CMD_DISABLE; + + dyncfg_echo(item, df, id, status_to_send_to_plugin); + } + + if(!(df->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->type == DYNCFG_TYPE_JOB)) + dyncfg_send_updates(id); + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + + return true; +} + +void dyncfg_del_low_level(RRDHOST *host, const char *id) { + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return; + } + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + rrd_function_del(host, NULL, string2str(df->function)); + + bool garbage_collect = false; + if(df->dyncfg.saves == 0) { + dictionary_del(dyncfg_globals.nodes, id); + garbage_collect = true; + } + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + + if(garbage_collect) + dictionary_garbage_collect(dyncfg_globals.nodes); + } +} + +void dyncfg_status_low_level(RRDHOST *host __maybe_unused, const char *id, DYNCFG_STATUS status) { + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return; + } + + if(status == DYNCFG_STATUS_NONE) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: status provided to id '%s' is invalid. Ignoring it.", id); + return; + } + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + df->current.status = status; + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } +} + +// ---------------------------------------------------------------------------- + +void dyncfg_add_streaming(BUFFER *wb) { + // when sending config functions to parents, we send only 1 function called 'config'; + // the parent will send the command to the child, and the child will validate it; + // this way the parent does not need to receive removals of config functions; + + buffer_sprintf(wb + , PLUGINSD_KEYWORD_FUNCTION " GLOBAL " PLUGINSD_FUNCTION_CONFIG " %d \"%s\" \"%s\" "HTTP_ACCESS_FORMAT" %d\n" + , 120 + , "Dynamic configuration" + , "config" + , (HTTP_ACCESS_FORMAT_CAST)(HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_SENSITIVE_DATA) + , 1000 + ); +} + +bool dyncfg_available_for_rrdhost(RRDHOST *host) { + if(host == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST)) + return true; + + return rrd_function_available(host, PLUGINSD_FUNCTION_CONFIG); +} + +// ---------------------------------------------------------------------------- + diff --git a/src/daemon/config/dyncfg.h b/src/daemon/config/dyncfg.h new file mode 100644 index 000000000..539eddbfb --- /dev/null +++ b/src/daemon/config/dyncfg.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DYNCFG_H +#define NETDATA_DYNCFG_H + +#include "../common.h" +#include "database/rrd.h" +#include "database/rrdfunctions.h" + +void dyncfg_add_streaming(BUFFER *wb); +bool dyncfg_available_for_rrdhost(RRDHOST *host); +void dyncfg_host_init(RRDHOST *host); + +// low-level API used by plugins.d and high-level API +bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, + DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, + usec_t created_ut, usec_t modified_ut, bool sync, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data); +void dyncfg_del_low_level(RRDHOST *host, const char *id); +void dyncfg_status_low_level(RRDHOST *host, const char *id, DYNCFG_STATUS status); +void dyncfg_init_low_level(bool load_saved); + +// high-level API for internal modules +bool dyncfg_add(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, + DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + dyncfg_cb_t cb, void *data); +void dyncfg_del(RRDHOST *host, const char *id); +void dyncfg_status(RRDHOST *host, const char *id, DYNCFG_STATUS status); + +void dyncfg_init(bool load_saved); + +#endif //NETDATA_DYNCFG_H diff --git a/daemon/daemon.c b/src/daemon/daemon.c index 433fa0373..d9a4b81de 100644 --- a/daemon/daemon.c +++ b/src/daemon/daemon.c @@ -60,7 +60,7 @@ static void fix_directory_file_permissions(const char *dirname, uid_t uid, gid_t closedir(dir); } -void change_dir_ownership(const char *dir, uid_t uid, gid_t gid, bool recursive) +static void change_dir_ownership(const char *dir, uid_t uid, gid_t gid, bool recursive) { if (chown(dir, uid, gid) == -1) netdata_log_error("Cannot chown directory '%s' to %u:%u", dir, (unsigned int)uid, (unsigned int)gid); @@ -68,7 +68,7 @@ void change_dir_ownership(const char *dir, uid_t uid, gid_t gid, bool recursive) fix_directory_file_permissions(dir, uid, gid, recursive); } -void clean_directory(char *dirname) +static void clean_directory(char *dirname) { DIR *dir = opendir(dirname); if(!dir) return; @@ -84,7 +84,7 @@ void clean_directory(char *dirname) closedir(dir); } -void prepare_required_directories(uid_t uid, gid_t gid) { +static void prepare_required_directories(uid_t uid, gid_t gid) { change_dir_ownership(netdata_configured_cache_dir, uid, gid, true); change_dir_ownership(netdata_configured_varlib_dir, uid, gid, false); change_dir_ownership(netdata_configured_lock_dir, uid, gid, false); @@ -98,7 +98,7 @@ void prepare_required_directories(uid_t uid, gid_t gid) { clean_directory(netdata_configured_lock_dir); } -int become_user(const char *username, int pid_fd) { +static int become_user(const char *username, int pid_fd) { int am_i_root = (getuid() == 0)?1:0; struct passwd *pw = getpwnam(username); @@ -245,7 +245,7 @@ static void oom_score_adj(void) { } int written = 0; - int fd = open("/proc/self/oom_score_adj", O_WRONLY); + int fd = open("/proc/self/oom_score_adj", O_WRONLY | O_CLOEXEC); if(fd != -1) { snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score); ssize_t len = strlen(buf); @@ -276,7 +276,7 @@ static void process_nice_level(void) { else netdata_log_debug(D_SYSTEM, "Set netdata nice level to %d.", nice_level); #endif // HAVE_NICE -}; +} #define SCHED_FLAG_NONE 0x00 #define SCHED_FLAG_PRIORITY_CONFIGURABLE 0x01 // the priority is user configurable @@ -478,7 +478,7 @@ int become_daemon(int dont_fork, const char *user) // generate our pid file int pidfd = -1; if(pidfile[0]) { - pidfd = open(pidfile, O_WRONLY | O_CREAT, 0644); + pidfd = open(pidfile, O_WRONLY | O_CREAT | O_CLOEXEC, 0644); if(pidfd >= 0) { if(ftruncate(pidfd, 0) != 0) netdata_log_error("Cannot truncate pidfile '%s'.", pidfile); diff --git a/src/daemon/daemon.h b/src/daemon/daemon.h new file mode 100644 index 000000000..1f8837fd6 --- /dev/null +++ b/src/daemon/daemon.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DAEMON_H +#define NETDATA_DAEMON_H 1 + +int become_daemon(int dont_fork, const char *user); + +void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data); + +void get_netdata_execution_path(void); + +extern char pidfile[]; +extern char netdata_exe_file[]; +extern char netdata_exe_path[]; + +#endif /* NETDATA_DAEMON_H */ diff --git a/daemon/event_loop.c b/src/daemon/event_loop.c index 93bac97d0..93bac97d0 100644 --- a/daemon/event_loop.c +++ b/src/daemon/event_loop.c diff --git a/daemon/event_loop.h b/src/daemon/event_loop.h index c1821c646..c1821c646 100644 --- a/daemon/event_loop.h +++ b/src/daemon/event_loop.h diff --git a/daemon/get-kubernetes-labels.sh.in b/src/daemon/get-kubernetes-labels.sh.in index bc82c2aee..bc82c2aee 100644..100755 --- a/daemon/get-kubernetes-labels.sh.in +++ b/src/daemon/get-kubernetes-labels.sh.in diff --git a/daemon/global_statistics.c b/src/daemon/global_statistics.c index 9fb1df5f8..1798bfddb 100644 --- a/daemon/global_statistics.c +++ b/src/daemon/global_statistics.c @@ -858,7 +858,7 @@ static void global_statistics_charts(void) { // ---------------------------------------------------------------- #ifdef ENABLE_DBENGINE - if (tier_page_type[0] == PAGE_GORILLA_METRICS) + if (tier_page_type[0] == RRDENG_PAGE_TYPE_GORILLA_32BIT) { static RRDSET *st_tier0_gorilla_pages = NULL; static RRDDIM *rd_num_gorilla_pages = NULL; @@ -887,7 +887,7 @@ static void global_statistics_charts(void) { rrdset_done(st_tier0_gorilla_pages); } - if (tier_page_type[0] == PAGE_GORILLA_METRICS) + if (tier_page_type[0] == RRDENG_PAGE_TYPE_GORILLA_32BIT) { static RRDSET *st_tier0_compression_info = NULL; @@ -2560,9 +2560,9 @@ static void dbengine2_statistics_charts(void) { /* get localhost's DB engine's statistics for each tier */ for(size_t tier = 0; tier < storage_tiers ;tier++) { if(host->db[tier].mode != RRD_MEMORY_MODE_DBENGINE) continue; - if(!host->db[tier].instance) continue; + if(!host->db[tier].si) continue; - if(is_storage_engine_shared(host->db[tier].instance)) { + if(is_storage_engine_shared(host->db[tier].si)) { if(counted_multihost_db[tier]) continue; else @@ -2570,7 +2570,7 @@ static void dbengine2_statistics_charts(void) { } ++dbengine_contexts; - rrdeng_get_37_statistics((struct rrdengine_instance *)host->db[tier].instance, local_stats_array); + rrdeng_get_37_statistics((struct rrdengine_instance *)host->db[tier].si, local_stats_array); for (i = 0; i < RRDENG_NR_STATS; ++i) { /* aggregate statistics across hosts */ stats_array[i] += local_stats_array[i]; @@ -4308,25 +4308,25 @@ void *global_statistics_workers_main(void *ptr) { global_statistics_register_workers(); - netdata_thread_cleanup_push(global_statistics_workers_cleanup, ptr); - - int update_every = - (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); - if (update_every < localhost->rrd_update_every) - update_every = localhost->rrd_update_every; - - usec_t step = update_every * USEC_PER_SEC; - heartbeat_t hb; - heartbeat_init(&hb); + netdata_thread_cleanup_push(global_statistics_workers_cleanup, ptr) + { + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; - while (service_running(SERVICE_COLLECTORS)) { - worker_is_idle(); - heartbeat_next(&hb, step); + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); - worker_is_busy(WORKER_JOB_WORKERS); - worker_utilization_charts(); - } + while (service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + heartbeat_next(&hb, step); + worker_is_busy(WORKER_JOB_WORKERS); + worker_utilization_charts(); + } + } netdata_thread_cleanup_pop(1); return NULL; } @@ -4350,25 +4350,26 @@ void *global_statistics_sqlite3_main(void *ptr) { global_statistics_register_workers(); - netdata_thread_cleanup_push(global_statistics_sqlite3_cleanup, ptr); - - int update_every = - (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); - if (update_every < localhost->rrd_update_every) - update_every = localhost->rrd_update_every; + netdata_thread_cleanup_push(global_statistics_sqlite3_cleanup, ptr) + { - usec_t step = update_every * USEC_PER_SEC; - heartbeat_t hb; - heartbeat_init(&hb); + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; - while (service_running(SERVICE_COLLECTORS)) { - worker_is_idle(); - heartbeat_next(&hb, step); + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); - worker_is_busy(WORKER_JOB_SQLITE3); - sqlite3_statistics_charts(); - } + while (service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + heartbeat_next(&hb, step); + worker_is_busy(WORKER_JOB_SQLITE3); + sqlite3_statistics_charts(); + } + } netdata_thread_cleanup_pop(1); return NULL; } diff --git a/daemon/global_statistics.h b/src/daemon/global_statistics.h index 44717c6cf..44717c6cf 100644 --- a/daemon/global_statistics.h +++ b/src/daemon/global_statistics.h diff --git a/daemon/main.c b/src/daemon/main.c index 3e1fda963..c9602432a 100644 --- a/daemon/main.c +++ b/src/daemon/main.c @@ -2,10 +2,15 @@ #include "common.h" #include "buildinfo.h" +#include "daemon/watcher.h" #include "static_threads.h" #include "database/engine/page_test.h" +#ifdef ENABLE_SENTRY +#include "sentry-native/sentry-native.h" +#endif + #if defined(ENV32BIT) #warning COMPILING 32BIT NETDATA #endif @@ -295,133 +300,94 @@ static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) { return (running == 0); } -#define delta_shutdown_time(msg) \ - { \ - usec_t now_ut = now_monotonic_usec(); \ - if(prev_msg) \ - netdata_log_info("NETDATA SHUTDOWN: in %7llu ms, %s%s - next: %s", (now_ut - last_ut) / USEC_PER_MS, (timeout)?"(TIMEOUT) ":"", prev_msg, msg); \ - else \ - netdata_log_info("NETDATA SHUTDOWN: next: %s", msg); \ - last_ut = now_ut; \ - prev_msg = msg; \ - timeout = false; \ - } - void web_client_cache_destroy(void); -void netdata_cleanup_and_exit(int ret) { - usec_t started_ut = now_monotonic_usec(); - usec_t last_ut = started_ut; - const char *prev_msg = NULL; - bool timeout = false; +void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) { + watcher_shutdown_begin(); nd_log_limits_unlimited(); netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret); - send_statistics("EXIT", ret?"ERROR":"OK","-"); + // send the stat from our caller + analytics_statistic_t statistic = { action, action_result, action_data }; + analytics_statistic_send(&statistic); - delta_shutdown_time("create shutdown file"); + // notify we are exiting + statistic = (analytics_statistic_t) {"EXIT", ret?"ERROR":"OK","-"}; + analytics_statistic_send(&statistic); char agent_crash_file[FILENAME_MAX + 1]; char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); (void) rename(agent_crash_file, agent_incomplete_shutdown_file); + watcher_step_complete(WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE); #ifdef ENABLE_DBENGINE if(dbengine_enabled) { - delta_shutdown_time("dbengine exit mode"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_exit_mode(multidb_ctx[tier]); } #endif - - delta_shutdown_time("close webrtc connections"); + watcher_step_complete(WATCHER_STEP_ID_DBENGINE_EXIT_MODE); webrtc_close_all_connections(); + watcher_step_complete(WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS); - delta_shutdown_time("disable maintenance, new queries, new web requests, new streaming connections and aclk"); - - service_signal_exit( - SERVICE_MAINTENANCE - | ABILITY_DATA_QUERIES - | ABILITY_WEB_REQUESTS - | ABILITY_STREAMING_CONNECTIONS - | SERVICE_ACLK - | SERVICE_ACLKSYNC - ); - - delta_shutdown_time("stop replication, exporters, health and web servers threads"); - - timeout = !service_wait_exit( - SERVICE_EXPORTERS - | SERVICE_HEALTH - | SERVICE_WEB_SERVER - | SERVICE_HTTPD - , 3 * USEC_PER_SEC); + service_signal_exit(SERVICE_MAINTENANCE | ABILITY_DATA_QUERIES | ABILITY_WEB_REQUESTS | + ABILITY_STREAMING_CONNECTIONS | SERVICE_ACLK | SERVICE_ACLKSYNC); + watcher_step_complete(WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK); - delta_shutdown_time("stop collectors and streaming threads"); + service_wait_exit(SERVICE_MAINTENANCE, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD); - timeout = !service_wait_exit( - SERVICE_COLLECTORS - | SERVICE_STREAMING - , 3 * USEC_PER_SEC); + service_wait_exit(SERVICE_EXPORTERS | SERVICE_HEALTH | SERVICE_WEB_SERVER | SERVICE_HTTPD, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS); - delta_shutdown_time("stop replication threads"); + service_wait_exit(SERVICE_COLLECTORS | SERVICE_STREAMING, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS); - timeout = !service_wait_exit( - SERVICE_REPLICATION // replication has to be stopped after STREAMING, because it cleans up ARAL - , 3 * USEC_PER_SEC); - - delta_shutdown_time("prepare metasync shutdown"); + service_wait_exit(SERVICE_REPLICATION, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_REPLICATION_THREADS); metadata_sync_shutdown_prepare(); - - delta_shutdown_time("disable ML detection and training threads"); + watcher_step_complete(WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN); ml_stop_threads(); ml_fini(); + watcher_step_complete(WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS); - delta_shutdown_time("stop context thread"); - - timeout = !service_wait_exit( - SERVICE_CONTEXT - , 3 * USEC_PER_SEC); - - delta_shutdown_time("stop maintenance thread"); - - timeout = !service_wait_exit( - SERVICE_MAINTENANCE - , 3 * USEC_PER_SEC); - - delta_shutdown_time("clear web client cache"); + service_wait_exit(SERVICE_CONTEXT, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_CONTEXT_THREAD); web_client_cache_destroy(); + watcher_step_complete(WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE); - delta_shutdown_time("clean rrdhost database"); - - rrdhost_cleanup_all(); - - delta_shutdown_time("stop aclk threads"); - - timeout = !service_wait_exit( - SERVICE_ACLK - , 3 * USEC_PER_SEC); + service_wait_exit(SERVICE_ACLK, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_ACLK_THREADS); - delta_shutdown_time("stop all remaining worker threads"); - - timeout = !service_wait_exit(~0, 10 * USEC_PER_SEC); - - delta_shutdown_time("cancel main threads"); + service_wait_exit(~0, 10 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS); cancel_main_threads(); + watcher_step_complete(WATCHER_STEP_ID_CANCEL_MAIN_THREADS); + + if (ret) + { + watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); - if(!ret) { + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } + else + { // exit cleanly #ifdef ENABLE_DBENGINE if(dbengine_enabled) { - delta_shutdown_time("flush dbengine tiers"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_prepare_exit(multidb_ctx[tier]); @@ -433,21 +399,16 @@ void netdata_cleanup_and_exit(int ret) { } } #endif + watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); - // free the database - delta_shutdown_time("stop collection for all hosts"); - - // rrdhost_free_all(); rrd_finalize_collection_for_all_hosts(); - - delta_shutdown_time("stop metasync threads"); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); metadata_sync_shutdown(); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); #ifdef ENABLE_DBENGINE if(dbengine_enabled) { - delta_shutdown_time("wait for dbengine collectors to finish"); - size_t running = 1; size_t count = 10; while(running && count) { @@ -455,62 +416,71 @@ void netdata_cleanup_and_exit(int ret) { for (size_t tier = 0; tier < storage_tiers; tier++) running += rrdeng_collectors_running(multidb_ctx[tier]); - if(running) { + if (running) { nd_log_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS); - nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, - "waiting for %zu collectors to finish", running); - // sleep_usec(100 * USEC_PER_MS); - cleanup_destroyed_dictionaries(); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, "waiting for %zu collectors to finish", running); } count--; } - - delta_shutdown_time("wait for dbengine main cache to finish flushing"); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); while (pgc_hot_and_dirty_entries(main_cache)) { pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL); sleep_usec(100 * USEC_PER_MS); } + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); - delta_shutdown_time("stop dbengine tiers"); for (size_t tier = 0; tier < storage_tiers; tier++) rrdeng_exit(multidb_ctx[tier]); - rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } else { + // Skip these steps + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); } +#else + // Skip these steps + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); #endif } - delta_shutdown_time("close SQL context db"); - - sql_close_context_database(); - - delta_shutdown_time("closed SQL main db"); + sqlite_close_databases(); + watcher_step_complete(WATCHER_STEP_ID_CLOSE_SQL_DATABASES); + sqlite_library_shutdown(); - sql_close_database(); // unlink the pid if(pidfile[0]) { - delta_shutdown_time("remove pid file"); - if(unlink(pidfile) != 0) netdata_log_error("EXIT: cannot unlink pidfile '%s'.", pidfile); } + watcher_step_complete(WATCHER_STEP_ID_REMOVE_PID_FILE); #ifdef ENABLE_HTTPS - delta_shutdown_time("free openssl structures"); netdata_ssl_cleanup(); #endif - - delta_shutdown_time("remove incomplete shutdown file"); + watcher_step_complete(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); (void) unlink(agent_incomplete_shutdown_file); - - delta_shutdown_time("exit"); - - usec_t ended_ut = now_monotonic_usec(); - netdata_log_info("NETDATA SHUTDOWN: completed in %llu ms - netdata is now exiting - bye bye...", (ended_ut - started_ut) / USEC_PER_MS); + watcher_step_complete(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE); + + watcher_shutdown_end(); + watcher_thread_stop(); + +#ifdef ENABLE_SENTRY + if (ret) + abort(); + else { + sentry_native_fini(); + exit(ret); + } +#else exit(ret); +#endif } void web_server_threading_selection(void) { @@ -684,6 +654,9 @@ static void set_nofile_limit(struct rlimit *rl) { void cancel_main_threads() { nd_log_limits_unlimited(); + if (!static_threads) + return; + int i, found = 0; usec_t max = 5 * USEC_PER_SEC, step = 100000; for (i = 0; static_threads[i].name != NULL ; i++) { @@ -707,8 +680,14 @@ void cancel_main_threads() { sleep_usec(step); found = 0; for (i = 0; static_threads[i].name != NULL ; i++) { - if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) - found++; + if (static_threads[i].enabled == NETDATA_MAIN_THREAD_EXITED) + continue; + + // Don't wait ourselves. + if (static_threads[i].thread && (*static_threads[i].thread == pthread_self())) + continue; + + found++; } } @@ -725,23 +704,28 @@ void cancel_main_threads() { freez(static_threads[i].thread); freez(static_threads); + static_threads = NULL; } -struct option_def option_definitions[] = { - // opt description arg name default value - { 'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME}, - { 'D', "Do not fork. Run in the foreground.", NULL, "run in the background"}, - { 'd', "Fork. Run in the background.", NULL, "run in the background"}, - { 'h', "Display this help message.", NULL, NULL}, - { 'P', "File to save a pid while running.", "filename", "do not save pid to a file"}, - { 'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"}, - { 'p', "API/Web port to use.", "port", "19999"}, - { 's', "Prefix for /proc and /sys (for containers).", "path", "no prefix"}, - { 't', "The internal clock of netdata.", "seconds", "1"}, - { 'u', "Run as user.", "username", "netdata"}, - { 'v', "Print netdata version and exit.", NULL, NULL}, - { 'V', "Print netdata version and exit.", NULL, NULL}, - { 'W', "See Advanced options below.", "options", NULL}, +static const struct option_def { + const char val; + const char *description; + const char *arg_name; + const char *default_value; +} option_definitions[] = { + {'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME}, + {'D', "Do not fork. Run in the foreground.", NULL, "run in the background"}, + {'d', "Fork. Run in the background.", NULL, "run in the background"}, + {'h', "Display this help message.", NULL, NULL}, + {'P', "File to save a pid while running.", "filename", "do not save pid to a file"}, + {'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"}, + {'p', "API/Web port to use.", "port", "19999"}, + {'s', "Prefix for /proc and /sys (for containers).", "path", "no prefix"}, + {'t', "The internal clock of netdata.", "seconds", "1"}, + {'u', "Run as user.", "username", "netdata"}, + {'v', "Print netdata version and exit.", NULL, NULL}, + {'V', "Print netdata version and exit.", NULL, NULL}, + {'W', "See Advanced options below.", "options", NULL}, }; int help(int exitcode) { @@ -836,7 +820,6 @@ int help(int exitcode) { fprintf(stream, "\n Signals netdata handles:\n\n" " - HUP Close and reopen log files.\n" - " - USR1 Save internal DB to disk.\n" " - USR2 Reload health configuration.\n" "\n" ); @@ -1070,12 +1053,6 @@ static void backwards_compatible_config() { config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds", CONFIG_SECTION_DB, "cleanup orphan hosts after secs"); - config_move(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", - CONFIG_SECTION_DB, "delete obsolete charts files"); - - config_move(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", - CONFIG_SECTION_DB, "delete orphan hosts files"); - config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics", CONFIG_SECTION_DB, "enable zero metrics"); @@ -1102,7 +1079,7 @@ static int get_hostname(char *buf, size_t buf_size) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/etc/hostname", netdata_configured_host_prefix); - if (!read_file(filename, buf, buf_size)) { + if (!read_txt_file(filename, buf, buf_size)) { trim(buf); return 0; } @@ -1185,10 +1162,13 @@ static void get_netdata_configured_variables() { // ------------------------------------------------------------------------ // get default Database Engine page type - const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "raw"); - if (strcmp(page_type, "gorilla") == 0) { - tier_page_type[0] = PAGE_GORILLA_METRICS; - } else if (strcmp(page_type, "raw") != 0) { + const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "gorilla"); + if (strcmp(page_type, "gorilla") == 0) + tier_page_type[0] = RRDENG_PAGE_TYPE_GORILLA_32BIT; + else if (strcmp(page_type, "raw") == 0) + tier_page_type[0] = RRDENG_PAGE_TYPE_ARRAY_32BIT; + else { + tier_page_type[0] = RRDENG_PAGE_TYPE_ARRAY_32BIT; netdata_log_error("Invalid dbengine page type ''%s' given. Defaulting to 'raw'.", page_type); } @@ -1227,7 +1207,7 @@ static void get_netdata_configured_variables() { #else if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead."); - default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE; + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; } #endif @@ -1371,12 +1351,6 @@ int get_system_info(struct rrdhost_system_info *system_info) { return 0; } -void set_silencers_filename() { - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/health.silencers.json", netdata_configured_varlib_dir); - silencers_filename = config_get(CONFIG_SECTION_HEALTH, "silencers file", filename); -} - /* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST* be set in this procedure to be called in all the relevant code paths. */ @@ -1401,6 +1375,25 @@ void replication_initialize(void); void bearer_tokens_init(void); int unittest_rrdpush_compressions(void); int uuid_unittest(void); +int progress_unittest(void); +int dyncfg_unittest(void); + +int unittest_prepare_rrd(char **user) { + post_conf_load(user); + get_netdata_configured_variables(); + default_rrd_update_every = 1; + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; + health_plugin_disable(); + storage_tiers = 1; + registry_init(); + if(rrd_init("unittest", NULL, true)) { + fprintf(stderr, "rrd_init failed for unittest\n"); + return 1; + } + default_rrdpush_enabled = 0; + + return 0; +} int main(int argc, char **argv) { // initialize the system clocks @@ -1508,66 +1501,53 @@ int main(int argc, char **argv) { #endif if(strcmp(optarg, "sqlite-meta-recover") == 0) { - sql_init_database(DB_CHECK_RECOVER, 0); + sql_init_meta_database(DB_CHECK_RECOVER, 0); return 0; } if(strcmp(optarg, "sqlite-compact") == 0) { - sql_init_database(DB_CHECK_RECLAIM_SPACE, 0); + sql_init_meta_database(DB_CHECK_RECLAIM_SPACE, 0); return 0; } if(strcmp(optarg, "sqlite-analyze") == 0) { - sql_init_database(DB_CHECK_ANALYZE, 0); + sql_init_meta_database(DB_CHECK_ANALYZE, 0); return 0; } if(strcmp(optarg, "unittest") == 0) { unittest_running = true; - if (pluginsd_parser_unittest()) - return 1; + // set defaults for dbegnine unittest + config_set(CONFIG_SECTION_DB, "dbengine page type", "gorilla"); + default_rrdeng_disk_quota_mb = default_multidb_disk_quota_mb = 256; - if (unit_test_static_threads()) - return 1; - if (unit_test_buffer()) - return 1; - if (unit_test_str2ld()) - return 1; - if (buffer_unittest()) - return 1; - if (unit_test_bitmaps()) + if (sqlite_library_init()) return 1; + + if (pluginsd_parser_unittest()) return 1; + if (unit_test_static_threads()) return 1; + if (unit_test_buffer()) return 1; + if (unit_test_str2ld()) return 1; + if (buffer_unittest()) return 1; + if (unit_test_bitmaps()) return 1; + // No call to load the config file on this code-path - post_conf_load(&user); - get_netdata_configured_variables(); - default_rrd_update_every = 1; - default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; - default_health_enabled = 0; - storage_tiers = 1; - registry_init(); - if(rrd_init("unittest", NULL, true)) { - fprintf(stderr, "rrd_init failed for unittest\n"); - return 1; - } - default_rrdpush_enabled = 0; - if(run_all_mockup_tests()) return 1; - if(unit_test_storage()) return 1; + if (unittest_prepare_rrd(&user)) return 1; + if (run_all_mockup_tests()) return 1; + if (unit_test_storage()) return 1; #ifdef ENABLE_DBENGINE - if(test_dbengine()) return 1; + if (test_dbengine()) return 1; #endif - if(test_sqlite()) return 1; - if(string_unittest(10000)) return 1; - if (dictionary_unittest(10000)) - return 1; - if(aral_unittest(10000)) - return 1; - if (rrdlabels_unittest()) - return 1; - if (ctx_unittest()) - return 1; - if (uuid_unittest()) - return 1; + if (test_sqlite()) return 1; + if (string_unittest(10000)) return 1; + if (dictionary_unittest(10000)) return 1; + if (aral_unittest(10000)) return 1; + if (rrdlabels_unittest()) return 1; + if (ctx_unittest()) return 1; + if (uuid_unittest()) return 1; + if (dyncfg_unittest()) return 1; + sqlite_library_shutdown(); fprintf(stderr, "\n\nALL TESTS PASSED\n\n"); return 0; } @@ -1631,6 +1611,16 @@ int main(int argc, char **argv) { unittest_running = true; return unittest_rrdpush_compressions(); } + else if(strcmp(optarg, "progresstest") == 0) { + unittest_running = true; + return progress_unittest(); + } + else if(strcmp(optarg, "dyncfgtest") == 0) { + unittest_running = true; + if(unittest_prepare_rrd(&user)) + return 1; + return dyncfg_unittest(); + } else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { optarg += strlen(createdataset_string); unsigned history_seconds = strtoul(optarg, NULL, 0); @@ -1889,12 +1879,14 @@ int main(int argc, char **argv) { for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR); } - if(!config_loaded) { load_netdata_conf(NULL, 0, &user); load_cloud_conf(0); } + // @stelfrag: Where is the right place to call this? + watcher_thread_start(); + // ------------------------------------------------------------------------ // initialize netdata { @@ -2005,7 +1997,7 @@ int main(int argc, char **argv) { // -------------------------------------------------------------------- // This is the safest place to start the SILENCERS structure - set_silencers_filename(); + health_set_silencers_filename(); health_initialize_global_silencers(); // // -------------------------------------------------------------------- @@ -2031,6 +2023,9 @@ int main(int argc, char **argv) { // setup threads configs default_stacksize = netdata_threads_init(); + // musl default thread stack size is 128k, let's set it to a higher value to avoid random crashes + if (default_stacksize < 1 * 1024 * 1024) + default_stacksize = 1 * 1024 * 1024; #ifdef NETDATA_INTERNAL_CHECKS config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring", true); @@ -2071,6 +2066,8 @@ int main(int argc, char **argv) { exit(1); } } + if (sqlite_library_init()) + fatal("Failed to initialize sqlite library"); // -------------------------------------------------------------------- // Initialize ML configuration @@ -2080,9 +2077,9 @@ int main(int argc, char **argv) { #ifdef ENABLE_H2O delta_startup_time("initialize h2o server"); - for (int i = 0; static_threads[i].name; i++) { - if (static_threads[i].start_routine == h2o_main) - static_threads[i].enabled = httpd_is_enabled(); + for (int t = 0; static_threads[t].name; t++) { + if (static_threads[t].start_routine == h2o_main) + static_threads[t].enabled = httpd_is_enabled(); } #endif } @@ -2108,6 +2105,11 @@ int main(int argc, char **argv) { if(become_daemon(dont_fork, user) == -1) fatal("Cannot daemonize myself."); + // init sentry +#ifdef ENABLE_SENTRY + sentry_native_init(); +#endif + // The "HOME" env var points to the root's home dir because Netdata starts as root. Can't use "HOME". struct passwd *pw = getpwuid(getuid()); if (config_exists(CONFIG_SECTION_DIRECTORIES, "home") || !pw || !pw->pw_dir) { @@ -2118,7 +2120,7 @@ int main(int argc, char **argv) { setenv("HOME", netdata_configured_home_dir, 1); - dyn_conf_init(); + dyncfg_init(true); netdata_log_info("netdata started on pid %d.", getpid()); @@ -2171,7 +2173,7 @@ int main(int argc, char **argv) { int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0); snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); int crash_detected = (unlink(agent_crash_file) == 0); - int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC, 444); + int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 444); if (fd >= 0) close(fd); @@ -2230,11 +2232,16 @@ int main(int argc, char **argv) { netdata_log_info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS); netdata_ready = true; - send_statistics("START", "-", "-"); - if (crash_detected) - send_statistics("CRASH", "-", "-"); - if (incomplete_shutdown_detected) - send_statistics("INCOMPLETE_SHUTDOWN", "-", "-"); + analytics_statistic_t start_statistic = { "START", "-", "-" }; + analytics_statistic_send(&start_statistic); + if (crash_detected) { + analytics_statistic_t crash_statistic = { "CRASH", "-", "-" }; + analytics_statistic_send(&crash_statistic); + } + if (incomplete_shutdown_detected) { + analytics_statistic_t incomplete_shutdown_statistic = { "INCOMPLETE_SHUTDOWN", "-", "-" }; + analytics_statistic_send(&incomplete_shutdown_statistic); + } //check if ANALYTICS needs to start if (netdata_anonymous_statistics_enabled == 1) { @@ -2256,7 +2263,9 @@ int main(int argc, char **argv) { char filename[FILENAME_MAX + 1]; snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir); if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized - send_statistics("ACLK_DISABLED", "-", "-"); + analytics_statistic_t statistic = { "ACLK_DISABLED", "-", "-" }; + analytics_statistic_send(&statistic); + int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444); if (fd == -1) netdata_log_error("Cannot create file '%s'. Please fix this.", filename); diff --git a/src/daemon/main.h b/src/daemon/main.h new file mode 100644 index 000000000..faf7d5b69 --- /dev/null +++ b/src/daemon/main.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_MAIN_H +#define NETDATA_MAIN_H 1 + +#include "common.h" + +extern struct config netdata_config; + +void cancel_main_threads(void); +int killpid(pid_t pid); + +typedef enum { + ABILITY_DATA_QUERIES = (1 << 0), + ABILITY_WEB_REQUESTS = (1 << 1), + ABILITY_STREAMING_CONNECTIONS = (1 << 2), + SERVICE_MAINTENANCE = (1 << 3), + SERVICE_COLLECTORS = (1 << 4), + SERVICE_REPLICATION = (1 << 5), + SERVICE_WEB_SERVER = (1 << 6), + SERVICE_ACLK = (1 << 7), + SERVICE_HEALTH = (1 << 8), + SERVICE_STREAMING = (1 << 9), + SERVICE_CONTEXT = (1 << 10), + SERVICE_ANALYTICS = (1 << 11), + SERVICE_EXPORTERS = (1 << 12), + SERVICE_ACLKSYNC = (1 << 13), + SERVICE_HTTPD = (1 << 14) +} SERVICE_TYPE; + +typedef enum { + SERVICE_THREAD_TYPE_NETDATA, + SERVICE_THREAD_TYPE_LIBUV, + SERVICE_THREAD_TYPE_EVENT_LOOP, +} SERVICE_THREAD_TYPE; + +typedef void (*force_quit_t)(void *data); +typedef void (*request_quit_t)(void *data); + +void service_exits(void); +bool service_running(SERVICE_TYPE service); +struct service_thread *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused); + +#endif /* NETDATA_MAIN_H */ diff --git a/daemon/metrics.csv b/src/daemon/metrics.csv index 4aa71a364..4aa71a364 100644 --- a/daemon/metrics.csv +++ b/src/daemon/metrics.csv diff --git a/daemon/pipename.c b/src/daemon/pipename.c index 70b6a25b4..70b6a25b4 100644 --- a/daemon/pipename.c +++ b/src/daemon/pipename.c diff --git a/daemon/pipename.h b/src/daemon/pipename.h index 6ca6e8d08..6ca6e8d08 100644 --- a/daemon/pipename.h +++ b/src/daemon/pipename.h diff --git a/src/daemon/sentry-native/sentry-native.c b/src/daemon/sentry-native/sentry-native.c new file mode 100644 index 000000000..3594c1fff --- /dev/null +++ b/src/daemon/sentry-native/sentry-native.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sentry-native.h" +#include "daemon/common.h" + +#include "sentry.h" + +static bool sentry_telemetry_disabled(void) +{ + char path[FILENAME_MAX + 1]; + sprintf(path, "%s/%s", netdata_configured_user_config_dir, ".opt-out-from-anonymous-statistics"); + + struct stat buffer; + bool opt_out_file_exists = (stat(path, &buffer) == 0); + + if (opt_out_file_exists) + return true; + + return getenv("DISABLE_TELEMETRY") != NULL; +} + +void sentry_native_init(void) +{ + if (sentry_telemetry_disabled()) + return; + + // path where sentry should save stuff + char path[FILENAME_MAX]; + snprintfz(path, FILENAME_MAX - 1, "%s/%s", netdata_configured_cache_dir, ".sentry-native"); + + sentry_options_t *options = sentry_options_new(); + sentry_options_set_dsn(options, NETDATA_SENTRY_DSN); + sentry_options_set_database_path(options, path); + sentry_options_set_environment(options, NETDATA_SENTRY_ENVIRONMENT); + sentry_options_set_release(options, NETDATA_SENTRY_RELEASE); + sentry_options_set_dist(options, NETDATA_SENTRY_DIST); +#ifdef NETDATA_INTERNAL_CHECKS + sentry_options_set_debug(options, 1); +#endif + + sentry_init(options); +} + +void sentry_native_fini(void) +{ + if (sentry_telemetry_disabled()) + return; + + sentry_close(); +} diff --git a/src/daemon/sentry-native/sentry-native.h b/src/daemon/sentry-native/sentry-native.h new file mode 100644 index 000000000..861c5b959 --- /dev/null +++ b/src/daemon/sentry-native/sentry-native.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef SENTRY_NATIVE_H +#define SENTRY_NATIVE_H + +void sentry_native_init(void); +void sentry_native_fini(void); + +#endif /* SENTRY_NATIVE_H */ diff --git a/daemon/service.c b/src/daemon/service.c index 8a65de66c..ff966c57d 100644 --- a/daemon/service.c +++ b/src/daemon/service.c @@ -16,11 +16,7 @@ #define WORKER_JOB_CLEANUP_ORPHAN_HOSTS 6 #define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS 7 #define WORKER_JOB_FREE_HOST 9 -#define WORKER_JOB_SAVE_HOST_CHARTS 10 -#define WORKER_JOB_DELETE_HOST_CHARTS 11 #define WORKER_JOB_FREE_CHART 12 -#define WORKER_JOB_SAVE_CHART 13 -#define WORKER_JOB_DELETE_CHART 14 #define WORKER_JOB_FREE_DIMENSION 15 #define WORKER_JOB_PGC_MAIN_EVICT 16 #define WORKER_JOB_PGC_MAIN_FLUSH 17 @@ -38,31 +34,9 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED); rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); - const char *cache_filename = rrddim_cache_filename(rd); - if(cache_filename) { - netdata_log_info("Deleting dimension file '%s'.", cache_filename); - if (unlikely(unlink(cache_filename) == -1)) - netdata_log_error("Cannot delete dimension file '%s'", cache_filename); - } - if (rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { - rrddimvar_delete_all(rd); - /* only a collector can mark a chart as obsolete, so we must remove the reference */ - - size_t tiers_available = 0, tiers_said_no_retention = 0; - for(size_t tier = 0; tier < storage_tiers ;tier++) { - if(rd->tiers[tier].db_collection_handle) { - tiers_available++; - - if(storage_engine_store_finalize(rd->tiers[tier].db_collection_handle)) - tiers_said_no_retention++; - - rd->tiers[tier].db_collection_handle = NULL; - } - } - - if (tiers_available == tiers_said_no_retention && tiers_said_no_retention) { + if (!rrddim_finalize_collection_and_check_retention(rd)) { /* This metric has no data and no references */ metaqueue_delete_dimension_uuid(&rd->metric_uuid); } @@ -126,24 +100,11 @@ static void svc_rrdset_obsolete_to_free(RRDSET *st) { worker_is_busy(WORKER_JOB_FREE_CHART); - rrdcalc_unlink_all_rrdset_alerts(st); - - rrdsetvar_release_and_delete_all(st); + rrdcalc_unlink_and_delete_all_rrdset_alerts(st); // has to be run after all dimensions are archived - or use-after-free will occur rrdvar_delete_all(st->rrdvars); - if(st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) { - if(rrdhost_option_check(st->rrdhost, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS)) { - worker_is_busy(WORKER_JOB_DELETE_CHART); - rrdset_delete_files(st); - } - else { - worker_is_busy(WORKER_JOB_SAVE_CHART); - rrdset_save(st); - } - } - rrdset_free(st); } @@ -230,6 +191,10 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { RRDHOST *host; rrdhost_foreach_read(host) { + + if (!service_running(SERVICE_MAINTENANCE)) + break; + if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host)) continue; @@ -269,28 +234,11 @@ restart_after_removal: if(!rrdhost_should_be_removed(host, protected_host, now)) continue; - bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED); - if (!is_archived) { - netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); - - if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) - /* don't delete multi-host DB host files */ - && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) - ) { - worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); - rrdhost_delete_charts(host); - } - else { - worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS); - rrdhost_save_charts(host); - } - } - bool force = false; - if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST) && now - host->last_connected > rrdhost_free_ephemeral_time_s) force = true; + bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED); if (!force && is_archived) continue; @@ -339,11 +287,7 @@ void *service_main(void *ptr) worker_register_job_name(WORKER_JOB_CLEANUP_ORPHAN_HOSTS, "cleanup orphan hosts"); worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS, "cleanup obsolete charts on all hosts"); worker_register_job_name(WORKER_JOB_FREE_HOST, "free host"); - worker_register_job_name(WORKER_JOB_SAVE_HOST_CHARTS, "save host charts"); - worker_register_job_name(WORKER_JOB_DELETE_HOST_CHARTS, "delete host charts"); worker_register_job_name(WORKER_JOB_FREE_CHART, "free chart"); - worker_register_job_name(WORKER_JOB_SAVE_CHART, "save chart"); - worker_register_job_name(WORKER_JOB_DELETE_CHART, "delete chart"); worker_register_job_name(WORKER_JOB_FREE_DIMENSION, "free dimension"); worker_register_job_name(WORKER_JOB_PGC_MAIN_EVICT, "main cache evictions"); worker_register_job_name(WORKER_JOB_PGC_MAIN_FLUSH, "main cache flushes"); @@ -354,15 +298,23 @@ void *service_main(void *ptr) heartbeat_t hb; heartbeat_init(&hb); usec_t step = USEC_PER_SEC * SERVICE_HEARTBEAT; + usec_t real_step = USEC_PER_SEC; netdata_log_debug(D_SYSTEM, "Service thread starts"); while (service_running(SERVICE_MAINTENANCE)) { worker_is_idle(); - heartbeat_next(&hb, step); + heartbeat_next(&hb, USEC_PER_SEC); + if (real_step < step) { + real_step += USEC_PER_SEC; + continue; + } + real_step = USEC_PER_SEC; svc_rrd_cleanup_obsolete_charts_from_all_hosts(); - svc_rrdhost_cleanup_orphan_hosts(localhost); + + if (service_running(SERVICE_MAINTENANCE)) + svc_rrdhost_cleanup_orphan_hosts(localhost); } netdata_thread_cleanup_pop(1); diff --git a/daemon/signals.c b/src/daemon/signals.c index 4f2254334..c014452b7 100644 --- a/daemon/signals.c +++ b/src/daemon/signals.c @@ -6,7 +6,6 @@ typedef enum signal_action { NETDATA_SIGNAL_END_OF_LIST, NETDATA_SIGNAL_IGNORE, NETDATA_SIGNAL_EXIT_CLEANLY, - NETDATA_SIGNAL_SAVE_DATABASE, NETDATA_SIGNAL_REOPEN_LOGS, NETDATA_SIGNAL_RELOAD_HEALTH, NETDATA_SIGNAL_FATAL, @@ -24,7 +23,6 @@ static struct { { SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, { SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, { SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS }, - { SIGUSR1, "SIGUSR1", 0, NETDATA_SIGNAL_SAVE_DATABASE }, { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH }, { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL }, { SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD }, @@ -209,13 +207,6 @@ void signals_handle(void) { execute_command(CMD_RELOAD_HEALTH, NULL, NULL); break; - case NETDATA_SIGNAL_SAVE_DATABASE: - nd_log_limits_unlimited(); - netdata_log_info("SIGNAL: Received %s. Saving databases...", name); - nd_log_limits_reset(); - execute_command(CMD_SAVE_DATABASE, NULL, NULL); - break; - case NETDATA_SIGNAL_REOPEN_LOGS: nd_log_limits_unlimited(); netdata_log_info("SIGNAL: Received %s. Reopening all log files...", name); @@ -227,7 +218,7 @@ void signals_handle(void) { nd_log_limits_unlimited(); netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name); commands_exit(); - netdata_cleanup_and_exit(0); + netdata_cleanup_and_exit(0, NULL, NULL, NULL); exit(0); break; diff --git a/daemon/signals.h b/src/daemon/signals.h index 12b1ed198..12b1ed198 100644 --- a/daemon/signals.h +++ b/src/daemon/signals.h diff --git a/daemon/static_threads.c b/src/daemon/static_threads.c index b70373d74..e03819761 100644 --- a/daemon/static_threads.c +++ b/src/daemon/static_threads.c @@ -14,7 +14,7 @@ void *service_main(void *ptr); void *statsd_main(void *ptr); void *timex_main(void *ptr); void *profile_main(void *ptr); -void *replication_thread_main(void *ptr __maybe_unused); +void *replication_thread_main(void *ptr); extern bool global_statistics_enabled; @@ -195,15 +195,6 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = profile_main }, - { - .name = "DYNCFG", - .config_section = NULL, - .config_name = NULL, - .enabled = 1, - .thread = NULL, - .init_routine = NULL, - .start_routine = dyncfg_main - }, // terminator { diff --git a/daemon/static_threads.h b/src/daemon/static_threads.h index 46195cf46..46195cf46 100644 --- a/daemon/static_threads.h +++ b/src/daemon/static_threads.h diff --git a/daemon/static_threads_freebsd.c b/src/daemon/static_threads_freebsd.c index cc150faf9..cc150faf9 100644 --- a/daemon/static_threads_freebsd.c +++ b/src/daemon/static_threads_freebsd.c diff --git a/daemon/static_threads_linux.c b/src/daemon/static_threads_linux.c index 54307eccf..54307eccf 100644 --- a/daemon/static_threads_linux.c +++ b/src/daemon/static_threads_linux.c diff --git a/daemon/static_threads_macos.c b/src/daemon/static_threads_macos.c index aaf7df6f6..aaf7df6f6 100644 --- a/daemon/static_threads_macos.c +++ b/src/daemon/static_threads_macos.c diff --git a/daemon/system-info.sh b/src/daemon/system-info.sh index aaca7fd4b..aaca7fd4b 100755 --- a/daemon/system-info.sh +++ b/src/daemon/system-info.sh diff --git a/src/daemon/unit_test.c b/src/daemon/unit_test.c new file mode 100644 index 000000000..54586eab5 --- /dev/null +++ b/src/daemon/unit_test.c @@ -0,0 +1,1806 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static bool cmd_arg_sanitization_test(const char *expected, const char *src, char *dst, size_t dst_size) { + bool ok = sanitize_command_argument_string(dst, src, dst_size); + + if (!expected) + return ok == false; + + return strcmp(expected, dst) == 0; +} + +bool command_argument_sanitization_tests() { + char dst[1024]; + + for (size_t i = 0; i != 5; i++) { + const char *expected = i == 4 ? "'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 9; i++) { + const char *expected = i == 8 ? "'\\'''\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "''", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "'\\''a" : NULL; + if (cmd_arg_sanitization_test(expected, "'a", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "a'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "a'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 22; i++) { + const char *expected = i == 21 ? "foo'\\''a'\\'''\\'''\\''b" : NULL; + if (cmd_arg_sanitization_test(expected, "--foo'a'''b", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n length: %zu\n", expected, dst, strlen(dst)); + return 1; + } + } + + return 0; +} + +static int check_number_printing(void) { + struct { + NETDATA_DOUBLE n; + const char *correct; + } values[] = { + { .n = 0, .correct = "0" }, + { .n = 0.0000001, .correct = "0.0000001" }, + { .n = 0.00000009, .correct = "0.0000001" }, + { .n = 0.000000001, .correct = "0" }, + { .n = 99.99999999999999999, .correct = "100" }, + { .n = -99.99999999999999999, .correct = "-100" }, + { .n = 123.4567899123456789, .correct = "123.4567899" }, + { .n = 123.4567890123456789, .correct = "123.456789" }, + { .n = 123.4567800123456789, .correct = "123.45678" }, + { .n = 123.4567000123456789, .correct = "123.4567" }, + { .n = 123.4560000123456789, .correct = "123.456" }, + { .n = 123.4500000123456789, .correct = "123.45" }, + { .n = 123.4000000123456789, .correct = "123.4" }, + { .n = 123.0000000123456789, .correct = "123" }, + { .n = 123.0000000923456789, .correct = "123.0000001" }, + { .n = 4294967295.123456789, .correct = "4294967295.123457" }, + { .n = 8294967295.123456789, .correct = "8294967295.123457" }, + { .n = 1.000000000000002e+19, .correct = "1.000000000000001998e+19" }, + { .n = 9.2233720368547676e+18, .correct = "9.223372036854767584e+18" }, + { .n = 18446744073709541376.0, .correct = "1.84467440737095424e+19" }, + { .n = 18446744073709551616.0, .correct = "1.844674407370955136e+19" }, + { .n = 12318446744073710600192.0, .correct = "1.231844674407371008e+22" }, + { .n = 1677721499999999885312.0, .correct = "1.677721499999999872e+21" }, + { .n = -1677721499999999885312.0, .correct = "-1.677721499999999872e+21" }, + { .n = -1.677721499999999885312e40, .correct = "-1.677721499999999872e+40" }, + { .n = -16777214999999997337621690403742592008192.0, .correct = "-1.677721499999999616e+40" }, + { .n = 9999.9999999, .correct = "9999.9999999" }, + { .n = -9999.9999999, .correct = "-9999.9999999" }, + { .n = 0, .correct = NULL }, + }; + + char netdata[512 + 2], system[512 + 2]; + int i, failed = 0; + for(i = 0; values[i].correct ; i++) { + print_netdata_double(netdata, values[i].n); + snprintfz(system, sizeof(system) - 1, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n); + + int ok = 1; + if(strcmp(netdata, values[i].correct) != 0) { + ok = 0; + failed++; + } + + NETDATA_DOUBLE parsed_netdata = str2ndd(netdata, NULL); + NETDATA_DOUBLE parsed_system = strtondd(netdata, NULL); + + if(parsed_system != parsed_netdata) + failed++; + + fprintf(stderr, "[%d]. '%s' (system) printed as '%s' (netdata): PRINT %s, " + "PARSED %0.12" NETDATA_DOUBLE_MODIFIER " (system), %0.12" NETDATA_DOUBLE_MODIFIER " (netdata): %s\n", + i, + system, netdata, ok?"OK":"FAILED", + parsed_system, parsed_netdata, + parsed_netdata == parsed_system ? "OK" : "FAILED"); + } + + if(failed) return 1; + return 0; +} + +static int check_rrdcalc_comparisons(void) { + RRDCALC_STATUS a, b; + + // make sure calloc() sets the status to UNINITIALIZED + memset(&a, 0, sizeof(RRDCALC_STATUS)); + if(a != RRDCALC_STATUS_UNINITIALIZED) { + fprintf(stderr, "%s is not zero.\n", rrdcalc_status2string(RRDCALC_STATUS_UNINITIALIZED)); + return 1; + } + + a = RRDCALC_STATUS_REMOVED; + b = RRDCALC_STATUS_UNDEFINED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNDEFINED; + b = RRDCALC_STATUS_UNINITIALIZED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNINITIALIZED; + b = RRDCALC_STATUS_CLEAR; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_CLEAR; + b = RRDCALC_STATUS_RAISED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_RAISED; + b = RRDCALC_STATUS_WARNING; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_WARNING; + b = RRDCALC_STATUS_CRITICAL; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + fprintf(stderr, "RRDCALC_STATUSes are sortable.\n"); + + return 0; +} + +int check_storage_number(NETDATA_DOUBLE n, int debug) { + char buffer[100]; + uint32_t flags = SN_DEFAULT_FLAGS; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(!does_storage_number_exist(s)) { + fprintf(stderr, "Exists flags missing for number " NETDATA_DOUBLE_FORMAT "!\n", n); + return 5; + } + + NETDATA_DOUBLE ddiff = d - n; + NETDATA_DOUBLE dcdiff = ddiff * 100.0 / n; + + if(dcdiff < 0) dcdiff = -dcdiff; + + size_t len = (size_t)print_netdata_double(buffer, d); + NETDATA_DOUBLE p = str2ndd(buffer, NULL); + NETDATA_DOUBLE pdiff = n - p; + NETDATA_DOUBLE pcdiff = pdiff * 100.0 / n; + if(pcdiff < 0) pcdiff = -pcdiff; + + if(debug) { + fprintf(stderr, + NETDATA_DOUBLE_FORMAT + " original\n" NETDATA_DOUBLE_FORMAT " packed and unpacked, (stored as 0x%08X, diff " NETDATA_DOUBLE_FORMAT + ", " NETDATA_DOUBLE_FORMAT "%%)\n" + "%s printed after unpacked (%zu bytes)\n" NETDATA_DOUBLE_FORMAT + " re-parsed from printed (diff " NETDATA_DOUBLE_FORMAT ", " NETDATA_DOUBLE_FORMAT "%%)\n\n", + n, + d, s, ddiff, dcdiff, + buffer, len, + p, pdiff, pcdiff + ); + if(len != strlen(buffer)) fprintf(stderr, "ERROR: printed number %s is reported to have length %zu but it has %zu\n", buffer, len, strlen(buffer)); + + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: packing number " NETDATA_DOUBLE_FORMAT " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, dcdiff); + + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: re-parsing the packed, unpacked and printed number " NETDATA_DOUBLE_FORMAT + " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, pcdiff); + } + + if(len != strlen(buffer)) return 1; + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 3; + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 4; + return 0; +} + +NETDATA_DOUBLE storage_number_min(NETDATA_DOUBLE n) { + NETDATA_DOUBLE r = 1, last; + + do { + last = n; + n /= 2.0; + storage_number t = pack_storage_number(n, SN_DEFAULT_FLAGS); + r = unpack_storage_number(t); + } while(r != 0.0 && r != last); + + return last; +} + +void benchmark_storage_number(int loop, int multiplier) { + int i, j; + NETDATA_DOUBLE n, d; + storage_number s; + unsigned long long user, system, total, mine, their; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_positive_max = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW); + + char buffer[100]; + + struct rusage now, last; + + fprintf(stderr, "\n\nBenchmarking %d numbers, please wait...\n\n", loop); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE SIZE: %zu bytes\n", sizeof(NETDATA_DOUBLE)); + fprintf(stderr, "NETDATA FLOATING POINT SIZE: %zu bytes\n", sizeof(storage_number)); + + mine = (NETDATA_DOUBLE)sizeof(storage_number) * (NETDATA_DOUBLE)loop; + their = (NETDATA_DOUBLE)sizeof(NETDATA_DOUBLE) * (NETDATA_DOUBLE)loop; + + if(mine > their) { + fprintf(stderr, "\nNETDATA NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES MORE MEMORY. Sorry!\n", (NETDATA_DOUBLE)(mine / their)); + } + else { + fprintf(stderr, "\nNETDATA INTERNAL FLOATING POINT ARITHMETICS NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES LESS MEMORY.\n", (NETDATA_DOUBLE)(their / mine)); + } + + fprintf(stderr, "\nNETDATA FLOATING POINT\n"); + fprintf(stderr, "MIN POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW)); + fprintf(stderr, "MAX POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW)); + fprintf(stderr, "MIN NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MIN_RAW)); + fprintf(stderr, "MAX NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW)); + fprintf(stderr, "Maximum accuracy loss accepted: " NETDATA_DOUBLE_FORMAT "%%\n\n\n", (NETDATA_DOUBLE)ACCURACY_LOSS_ACCEPTED_PERCENT); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "INTERNAL LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + print_netdata_double(buffer, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + snprintfz(buffer, sizeof(buffer) - 1, NETDATA_DOUBLE_FORMAT, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + their = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > total) { + fprintf(stderr, "NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + + fprintf(stderr, "\nINTERNAL LONG DOUBLE PRINTING WITH PACK / UNPACK: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + s = pack_storage_number(n, SN_DEFAULT_FLAGS); + d = unpack_storage_number(s); + print_netdata_double(buffer, d); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > their) { + fprintf(stderr, "WITH PACKING UNPACKING NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "EVEN WITH PACKING AND UNPACKING, NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + +} + +static int check_storage_number_exists() { + uint32_t flags = SN_DEFAULT_FLAGS; + NETDATA_DOUBLE n = 0.0; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(n != d) { + fprintf(stderr, "Wrong number returned. Expected " NETDATA_DOUBLE_FORMAT ", returned " NETDATA_DOUBLE_FORMAT "!\n", n, d); + return 1; + } + + return 0; +} + +int unit_test_storage() { + if(check_storage_number_exists()) return 0; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_negative_max = unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW); + + NETDATA_DOUBLE c, a = 0; + int i, j, g, r = 0; + + for(g = -1; g <= 1 ; g++) { + a = 0; + + if(!g) continue; + + for(j = 0; j < 9 ;j++) { + a += 0.0000001; + c = a * g; + for(i = 0; i < 21 ;i++, c *= 10) { + if(c > 0 && c < storage_number_positive_min) continue; + if(c < 0 && c > storage_number_negative_max) continue; + + if(check_storage_number(c, 1)) return 1; + } + } + } + + // if(check_storage_number(858993459.1234567, 1)) return 1; + benchmark_storage_number(1000000, 2); + return r; +} + +int unit_test_str2ld() { + is_system_ieee754_double(); + + char *values[] = { + "1.2345678", + "-35.6", + "0.00123", + "23842384234234.2", + ".1", + "1.2e-10", + "18446744073709551616.0", + "18446744073709551616123456789123456789123456789123456789123456789123456789123456789.0", + "1.8446744073709551616123456789123456789123456789123456789123456789123456789123456789e+300", + "9.", + "9.e2", + "1.2e", + "1.2e+", + "1.2e-", + "1.2e0", + "1.2e-0", + "1.2e+0", + "-1.2e+1", + "-1.2e-1", + "1.2e1", + "1.2e400", + "hello", + "1wrong", + "nan", + "inf", + NULL + }; + + int i; + for(i = 0; values[i] ; i++) { + char *e_mine = "hello", *e_sys = "world"; + NETDATA_DOUBLE mine = str2ndd(values[i], &e_mine); + NETDATA_DOUBLE sys = strtondd(values[i], &e_sys); + + if(isnan(mine)) { + if(!isnan(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(isinf(mine)) { + if(!isinf(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(mine != sys && ABS(mine-sys) > 0.000001) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ", delta %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys, sys-mine); + return -1; + } + + if(e_mine != e_sys) { + fprintf(stderr, "Value '%s' is parsed correctly, but endptr is not right (netdata returned %d, but system returned %d)\n", + values[i], (int)(e_mine - values[i]), (int)(e_sys - values[i])); + return -1; + } + + fprintf(stderr, "str2ndd() parsed value '%s' exactly the same way with strtold(), returned %" NETDATA_DOUBLE_MODIFIER + " vs %" NETDATA_DOUBLE_MODIFIER "\n", values[i], mine, sys); + } + + return 0; +} + +int unit_test_buffer() { + BUFFER *wb = buffer_create(1, NULL); + char string[2048 + 1]; + char final[9000 + 1]; + int i; + + for(i = 0; i < 2048; i++) + string[i] = (char)((i % 24) + 'a'); + string[2048] = '\0'; + + const char *fmt = "string1: %s\nstring2: %s\nstring3: %s\nstring4: %s"; + buffer_sprintf(wb, fmt, string, string, string, string); + snprintfz(final, sizeof(final) - 1, fmt, string, string, string, string); + + const char *s = buffer_tostring(wb); + + if(buffer_strlen(wb) != strlen(final) || strcmp(s, final) != 0) { + fprintf(stderr, "\nbuffer_sprintf() is faulty.\n"); + fprintf(stderr, "\nstring : %s (length %zu)\n", string, strlen(string)); + fprintf(stderr, "\nbuffer : %s (length %zu)\n", s, buffer_strlen(wb)); + fprintf(stderr, "\nexpected: %s (length %zu)\n", final, strlen(final)); + buffer_free(wb); + return -1; + } + + fprintf(stderr, "buffer_sprintf() works as expected.\n"); + buffer_free(wb); + return 0; +} + +int unit_test_static_threads() { + struct netdata_static_thread *static_threads = static_threads_get(); + + /* + * make sure enough static threads have been registered + */ + if (!static_threads) { + fprintf(stderr, "empty static_threads array\n"); + return 1; + } + + int n; + for (n = 0; static_threads[n].start_routine != NULL; n++) {} + + if (n < 2) { + fprintf(stderr, "only %d static threads registered", n); + freez(static_threads); + return 1; + } + + /* + * verify that each thread's start routine is unique. + */ + for (int i = 0; i != n - 1; i++) { + for (int j = i + 1; j != n; j++) { + if (static_threads[i].start_routine != static_threads[j].start_routine) + continue; + + fprintf(stderr, "Found duplicate threads with name: %s\n", static_threads[i].name); + freez(static_threads); + return 1; + } + } + + freez(static_threads); + return 0; +} + +// -------------------------------------------------------------------------------------------------------------------- + +struct feed_values { + unsigned long long microseconds; + collected_number value; +}; + +struct test { + char name[100]; + char description[1024]; + + int update_every; + unsigned long long multiplier; + unsigned long long divisor; + RRD_ALGORITHM algorithm; + + unsigned long feed_entries; + unsigned long result_entries; + struct feed_values *feed; + NETDATA_DOUBLE *results; + + collected_number *feed2; + NETDATA_DOUBLE *results2; +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test1 +// test absolute values stored + +struct feed_values test1_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test1_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test1 = { + "test1", // name + "test absolute values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test1_feed, // feed + test1_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test2 +// test absolute values stored in the middle of second boundaries + +struct feed_values test2_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test2_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test2 = { + "test2", // name + "test absolute values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test2_feed, // feed + test2_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test3 + +struct feed_values test3_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test3_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test3 = { + "test3", // name + "test incremental values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test3_feed, // feed + test3_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test4 + +struct feed_values test4_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test4_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test4 = { + "test4", // name + "test incremental values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test4_feed, // feed + test4_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5 - 32 bit overflows + +struct feed_values test5_feed[] = { + { 0, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5_results[] = { + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, +}; + +struct test test5 = { + "test5", // name + "test 32-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5_feed, // feed + test5_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5b - 64 bit overflows + +struct feed_values test5b_feed[] = { + { 0, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5b_results[] = { + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, +}; + +struct test test5b = { + "test5b", // name + "test 64-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5b_feed, // feed + test5b_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test6 + +struct feed_values test6_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test6_results[] = { + 4000, 4000, 4000, 4000 +}; + +struct test test6 = { + "test6", // name + "test incremental values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 16, // feed entries + 4, // result entries + test6_feed, // feed + test6_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test7 + +struct feed_values test7_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, + { 2000000, 7000 }, + { 2000000, 8000 }, + { 2000000, 9000 }, + { 2000000, 10000 }, +}; + +NETDATA_DOUBLE test7_results[] = { + 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500 +}; + +struct test test7 = { + "test7", // name + "test incremental values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 18, // result entries + test7_feed, // feed + test7_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test8 + +struct feed_values test8_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, +}; + +NETDATA_DOUBLE test8_results[] = { + 1250, 2000, 2250, 3000, 3250, 4000, 4250, 5000, 5250, 6000 +}; + +struct test test8 = { + "test8", // name + "test absolute values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 6, // feed entries + 10, // result entries + test8_feed, // feed + test8_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test9 + +struct feed_values test9_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test9_results[] = { + 4000, 8000, 12000, 16000 +}; + +struct test test9 = { + "test9", // name + "test absolute values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 16, // feed entries + 4, // result entries + test9_feed, // feed + test9_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test10 + +struct feed_values test10_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test10_results[] = { + 1000, 1000, 1000, 1000, 1000, 1000, 1000 +}; + +struct test test10 = { + "test10", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 7, // result entries + test10_feed, // feed + test10_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test11 + +struct feed_values test11_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test11_feed2[] = { + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +NETDATA_DOUBLE test11_results[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +NETDATA_DOUBLE test11_results2[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +struct test test11 = { + "test11", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test11_feed, // feed + test11_results, // results + test11_feed2, // feed2 + test11_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test12 + +struct feed_values test12_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test12_feed2[] = { + 10*3, 20*3, 30*3, 40*3, 50*3, 60*3, 70*3, 80*3, 90*3, 100*3 +}; + +NETDATA_DOUBLE test12_results[] = { + 25, 25, 25, 25, 25, 25, 25, 25, 25 +}; + +NETDATA_DOUBLE test12_results2[] = { + 75, 75, 75, 75, 75, 75, 75, 75, 75 +}; + +struct test test12 = { + "test12", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test12_feed, // feed + test12_results, // results + test12_feed2, // feed2 + test12_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test13 + +struct feed_values test13_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test13_results[] = { + 83.3333300, 100, 100, 100, 100, 100, 100 +}; + +struct test test13 = { + "test13", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 7, // result entries + test13_feed, // feed + test13_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test14 + +struct feed_values test14_feed[] = { + { 0, 0x015397dc42151c41ULL }, + { 13573000, 0x015397e612e3ff5dULL }, + { 29969000, 0x015397f905ecdaa8ULL }, + { 29958000, 0x0153980c2a6cb5e4ULL }, + { 30054000, 0x0153981f4032fb83ULL }, + { 34952000, 0x015398355efadaccULL }, + { 25046000, 0x01539845ba4b09f8ULL }, + { 29947000, 0x0153985948bf381dULL }, + { 30054000, 0x0153986c5b9c27e2ULL }, + { 29942000, 0x0153987f888982d0ULL }, +}; + +NETDATA_DOUBLE test14_results[] = { + 23.1383300, 21.8515600, 21.8804600, 21.7788000, 22.0112200, 22.4386100, 22.0906100, 21.9150800 +}; + +struct test test14 = { + "test14", // name + "issue #981 with real data", + 30, // update_every + 8, // multiplier + 1000000000, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14_feed, // feed + test14_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14b_feed[] = { + { 0, 0 }, + { 13573000, 13573000 }, + { 29969000, 13573000 + 29969000 }, + { 29958000, 13573000 + 29969000 + 29958000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 }, + { 34952000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 }, + { 25046000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 }, + { 29947000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 }, + { 29942000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 + 29942000 }, +}; + +NETDATA_DOUBLE test14b_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14b = { + "test14b", // name + "issue #981 with dummy data", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14b_feed, // feed + test14b_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14c_feed[] = { + { 29000000, 29000000 }, + { 1000000, 29000000 + 1000000 }, + { 30000000, 29000000 + 1000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, +}; + +NETDATA_DOUBLE test14c_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14c = { + "test14c", // name + "issue #981 with dummy data, checking for late start", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test14c_feed, // feed + test14c_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test15 + +struct feed_values test15_feed[] = { + { 0, 1068066388 }, + { 1008752, 1068822698 }, + { 993809, 1069573072 }, + { 995911, 1070324135 }, + { 1014562, 1071078166 }, + { 994684, 1071831349 }, + { 993128, 1072235739 }, + { 1010332, 1072958871 }, + { 1003394, 1073707019 }, + { 995201, 1074460255 }, +}; + +collected_number test15_feed2[] = { + 178825286, 178825286, 178825286, 178825286, 178825498, 178825498, 179165652, 179202964, 179203282, 179204130 +}; + +NETDATA_DOUBLE test15_results[] = { + 5857.4080000, 5898.4540000, 5891.6590000, 5806.3160000, 5914.2640000, 3202.2630000, 5589.6560000, 5822.5260000, 5911.7520000 +}; + +NETDATA_DOUBLE test15_results2[] = { + 0.0000000, 0.0000000, 0.0024944, 1.6324779, 0.0212777, 2655.1890000, 290.5387000, 5.6733610, 6.5960220 +}; + +struct test test15 = { + "test15", // name + "test incremental with 2 dimensions", + 1, // update_every + 8, // multiplier + 1024, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test15_feed, // feed + test15_results, // results + test15_feed2, // feed2 + test15_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- + +int run_test(struct test *test) +{ + fprintf(stderr, "\nRunning test '%s':\n%s\n", test->name, test->description); + + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = test->update_every; + + char name[101]; + snprintfz(name, sizeof(name) - 1, "unittest-%s", test->name); + + // create the chart + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1 + , test->update_every, RRDSET_TYPE_LINE); + RRDDIM *rd = rrddim_add(st, "dim1", NULL, test->multiplier, test->divisor, test->algorithm); + + RRDDIM *rd2 = NULL; + if(test->feed2) + rd2 = rrddim_add(st, "dim2", NULL, test->multiplier, test->divisor, test->algorithm); + + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + // feed it with the test data + time_t time_now = 0, time_start = now_realtime_sec(); + unsigned long c; + collected_number last = 0; + for(c = 0; c < test->feed_entries; c++) { + if(debug_flags) fprintf(stderr, "\n\n"); + + if(c) { + time_now += test->feed[c].microseconds; + fprintf(stderr, " > %s: feeding position %lu, after %0.3f seconds (%0.3f seconds from start), delta " NETDATA_DOUBLE_FORMAT + ", rate " NETDATA_DOUBLE_FORMAT "\n", + test->name, c+1, + (float)test->feed[c].microseconds / 1000000.0, + (float)time_now / 1000000.0, + ((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor, + (((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor) / (NETDATA_DOUBLE)test->feed[c].microseconds * (NETDATA_DOUBLE)1000000); + + // rrdset_next_usec_unfiltered(st, test->feed[c].microseconds); + st->usec_since_last_update = test->feed[c].microseconds; + } + else { + fprintf(stderr, " > %s: feeding position %lu\n", test->name, c+1); + } + + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd), test->feed[c].value); + rrddim_set(st, "dim1", test->feed[c].value); + last = test->feed[c].value; + + if(rd2) { + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd2), test->feed2[c]); + rrddim_set(st, "dim2", test->feed2[c]); + } + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st, now, false); + + // align the first entry to second boundary + if(!c) { + fprintf(stderr, " > %s: fixing first collection time to be %llu microseconds to second boundary\n", test->name, test->feed[c].microseconds); + rd->collector.last_collected_time.tv_usec = st->last_collected_time.tv_usec = st->last_updated.tv_usec = test->feed[c].microseconds; + // time_start = st->last_collected_time.tv_sec; + } + } + + // check the result + int errors = 0; + + if(st->counter != test->result_entries) { + fprintf(stderr, " %s stored %u entries, but we were expecting %lu, ### E R R O R ###\n", + test->name, st->counter, test->result_entries); + errors++; + } + + unsigned long max = (st->counter < test->result_entries)?st->counter:test->result_entries; + for(c = 0 ; c < max ; c++) { + NETDATA_DOUBLE v = unpack_storage_number(rd->db.data[c]); + NETDATA_DOUBLE n = unpack_storage_number(pack_storage_number(test->results[c], SN_DEFAULT_FLAGS)); + int same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd), c+1, + (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + + if(!same) errors++; + + if(rd2) { + v = unpack_storage_number(rd2->db.data[c]); + n = test->results2[c]; + same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd2), c+1, + (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + if(!same) errors++; + } + } + + return errors; +} + +static int test_variable_renames(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + + fprintf(stderr, "Creating chart\n"); + RRDSET *st = rrdset_create_localhost("chart", "ID", NULL, "family", "context", "Unit Testing", "a value", "unittest", NULL, 1, 1, RRDSET_TYPE_LINE); + fprintf(stderr, "Created chart with id '%s', name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Creating dimension DIM1\n"); + RRDDIM *rd1 = rrddim_add(st, "DIM1", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Creating dimension DIM2\n"); + RRDDIM *rd2 = rrddim_add(st, "DIM2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming chart to CHARTNAME1\n"); + rrdset_reset_name(st, "CHARTNAME1"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming chart to CHARTNAME2\n"); + rrdset_reset_name(st, "CHARTNAME2"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME1\n"); + rrddim_reset_name(st, rd1, "DIM1NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME2\n"); + rrddim_reset_name(st, rd1, "DIM1NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME1\n"); + rrddim_reset_name(st, rd2, "DIM2NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME2\n"); + rrddim_reset_name(st, rd2, "DIM2NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + BUFFER *buf = buffer_create(1, NULL); + health_api_v1_chart_variables2json(st, buf); + fprintf(stderr, "%s", buffer_tostring(buf)); + buffer_free(buf); + return 1; +} + +int check_strdupz_path_subpath() { + + struct strdupz_path_subpath_checks { + const char *path; + const char *subpath; + const char *result; + } checks[] = { + { "", "", "." }, + { "/", "", "/" }, + { "/etc/netdata", "", "/etc/netdata" }, + { "/etc/netdata///", "", "/etc/netdata" }, + { "/etc/netdata///", "health.d", "/etc/netdata/health.d" }, + { "/etc/netdata///", "///health.d", "/etc/netdata/health.d" }, + { "/etc/netdata", "///health.d", "/etc/netdata/health.d" }, + { "", "///health.d", "./health.d" }, + { "/", "///health.d", "/health.d" }, + + // terminator + { NULL, NULL, NULL } + }; + + size_t i; + for(i = 0; checks[i].result ; i++) { + char *s = strdupz_path_subpath(checks[i].path, checks[i].subpath); + fprintf(stderr, "strdupz_path_subpath(\"%s\", \"%s\") = \"%s\": ", checks[i].path, checks[i].subpath, s); + if(!s || strcmp(s, checks[i].result) != 0) { + freez(s); + fprintf(stderr, "FAILED\n"); + return 1; + } + else { + freez(s); + fprintf(stderr, "OK\n"); + } + } + + return 0; +} + +int run_all_mockup_tests(void) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + if(check_strdupz_path_subpath()) + return 1; + + if(check_number_printing()) + return 1; + + if(check_rrdcalc_comparisons()) + return 1; + + if(!test_variable_renames()) + return 1; + + if(run_test(&test1)) + return 1; + + if(run_test(&test2)) + return 1; + + if(run_test(&test3)) + return 1; + + if(run_test(&test4)) + return 1; + + if(run_test(&test5)) + return 1; + + if(run_test(&test5b)) + return 1; + + if(run_test(&test6)) + return 1; + + if(run_test(&test7)) + return 1; + + if(run_test(&test8)) + return 1; + + if(run_test(&test9)) + return 1; + + if(run_test(&test10)) + return 1; + + if(run_test(&test11)) + return 1; + + if(run_test(&test12)) + return 1; + + if(run_test(&test13)) + return 1; + + if(run_test(&test14)) + return 1; + + if(run_test(&test14b)) + return 1; + + if(run_test(&test14c)) + return 1; + + if(run_test(&test15)) + return 1; + + + + return 0; +} + +int unit_test(long delay, long shift) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + static int repeat = 0; + repeat++; + + char name[101]; + snprintfz(name, sizeof(name) - 1, "unittest-%d-%ld-%ld", repeat, delay, shift); + + //debug_flags = 0xffffffff; + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = 1; + + int do_abs = 1; + int do_inc = 1; + int do_abst = 0; + int do_absi = 0; + + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1, 1 + , RRDSET_TYPE_LINE); + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + RRDDIM *rdabs = NULL; + RRDDIM *rdinc = NULL; + RRDDIM *rdabst = NULL; + RRDDIM *rdabsi = NULL; + + if(do_abs) rdabs = rrddim_add(st, "absolute", "absolute", 1, 1, RRD_ALGORITHM_ABSOLUTE); + if(do_inc) rdinc = rrddim_add(st, "incremental", "incremental", 1, 1, RRD_ALGORITHM_INCREMENTAL); + if(do_abst) rdabst = rrddim_add(st, "percentage-of-absolute-row", "percentage-of-absolute-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL); + if(do_absi) rdabsi = rrddim_add(st, "percentage-of-incremental-row", "percentage-of-incremental-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + + long increment = 1000; + collected_number i = 0; + + unsigned long c, dimensions = rrdset_number_of_dimensions(st); + RRDDIM *rd; + + for(c = 0; c < 20 ;c++) { + i += increment; + + fprintf(stderr, "\n\nLOOP = %lu, DELAY = %ld, VALUE = " COLLECTED_NUMBER_FORMAT "\n", c, delay, i); + if(c) { + // rrdset_next_usec_unfiltered(st, delay); + st->usec_since_last_update = delay; + } + if(do_abs) rrddim_set(st, "absolute", i); + if(do_inc) rrddim_set(st, "incremental", i); + if(do_abst) rrddim_set(st, "percentage-of-absolute-row", i); + if(do_absi) rrddim_set(st, "percentage-of-incremental-row", i); + + if(!c) { + now_realtime_timeval(&st->last_collected_time); + st->last_collected_time.tv_usec = shift; + } + + // prevent it from deleting the dimensions + rrddim_foreach_read(rd, st) { + rd->collector.last_collected_time.tv_sec = st->last_collected_time.tv_sec; + } + rrddim_foreach_done(rd); + + rrdset_done(st); + } + + unsigned long oincrement = increment; + increment = increment * st->update_every * 1000000 / delay; + fprintf(stderr, "\n\nORIGINAL INCREMENT: %lu, INCREMENT %ld, DELAY %ld, SHIFT %ld\n", oincrement * 10, increment * 10, delay, shift); + + int ret = 0; + storage_number sn; + NETDATA_DOUBLE cn, v; + for(c = 0 ; c < st->counter ; c++) { + fprintf(stderr, "\nPOSITION: c = %lu, EXPECTED VALUE %lu\n", c, (oincrement + c * increment + increment * (1000000 - shift) / 1000000 )* 10); + + rrddim_foreach_read(rd, st) { + sn = rd->db.data[c]; + cn = unpack_storage_number(sn); + fprintf(stderr, "\t %s " NETDATA_DOUBLE_FORMAT " (PACKED AS " STORAGE_NUMBER_FORMAT ") -> ", rrddim_id(rd), cn, sn); + + if(rd == rdabs) v = + ( oincrement + // + (increment * (1000000 - shift) / 1000000) + + (c + 1) * increment + ); + + else if(rd == rdinc) v = (c?(increment):(increment * (1000000 - shift) / 1000000)); + else if(rd == rdabst) v = oincrement / dimensions / 10; + else if(rd == rdabsi) v = oincrement / dimensions / 10; + else v = 0; + + if(v == cn) fprintf(stderr, "passed.\n"); + else { + fprintf(stderr, "ERROR! (expected " NETDATA_DOUBLE_FORMAT ")\n", v); + ret = 1; + } + } + rrddim_foreach_done(rd); + } + + if(ret) + fprintf(stderr, "\n\nUNIT TEST(%ld, %ld) FAILED\n\n", delay, shift); + + return ret; +} + +int test_sqlite(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + sqlite3 *db_mt; + fprintf(stderr, "Testing SQLIte\n"); + + int rc = sqlite3_open(":memory:", &db_mt); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: DB init failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "CREATE TABLE IF NOT EXISTS mine (id1, id2);", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Create table failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "DELETE FROM MINE LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Delete with LIMIT failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "UPDATE MINE SET id1=1 LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Update with LIMIT failed\n"); + return 1; + } + + rc = sqlite3_create_function(db_mt, "now_usec", 1, SQLITE_ANY, 0, sqlite_now_usec, 0, 0); + if (unlikely(rc != SQLITE_OK)) { + fprintf(stderr, "Failed to register internal now_usec function"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "UPDATE MINE SET id1=now_usec(0);", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Update with now_usec() failed\n"); + return 1; + } + + BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE, NULL); + char *uuid_str = "0000_000"; + + buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str); + rc = sqlite3_exec_monitored(db_mt, buffer_tostring(sql), 0, 0, NULL); + if (rc != SQLITE_OK) + goto error; + + buffer_free(sql); + fprintf(stderr,"SQLite is OK\n"); + rc = sqlite3_close_v2(db_mt); + return 0; +error: + rc = sqlite3_close_v2(db_mt); + fprintf(stderr,"SQLite statement failed: %s\n", buffer_tostring(sql)); + buffer_free(sql); + fprintf(stderr,"SQLite tests failed\n"); + return 1; +} + +static int bitmapX_test(BITMAPX *ptr, char *expected, const char *msg) { + int errors = 0; + + for(uint32_t idx = 0; idx < ptr->bits ; idx++) { + bool found_set = bitmapX_get_bit(ptr, idx); + bool expected_set = expected[idx]; + + if(found_set != expected_set) { + fprintf(stderr, " >>> %s(): %s, bit %u is expected %s but found %s\n", + __FUNCTION__, msg, idx, expected_set?"SET":"UNSET", found_set?"SET":"UNSET"); + errors++; + } + } + + if(errors) + fprintf(stderr,"%s(): %s, found %d errors\n", + __FUNCTION__, msg, errors); + + return errors; +} + +#define bitmapX_set_bit_and_track(ptr, bit, value, expected) do { \ + bitmapX_set_bit(ptr, bit, value); \ + (expected)[bit] = value; \ +} while(0) + +int unit_test_bitmaps(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + + int errors = 0; + + char expected[8192]; + + BITMAP256 bmp256 = BITMAP256_INITIALIZER; + BITMAP1024 bmp1024 = BITMAP1024_INITIALIZER; + BITMAPX *bmp = NULL; + + for(int x = 0; x < 3 ; x++) { + char msg[100 + 1]; + + switch (x) { + default: + case 0: + bmp = (BITMAPX *) &bmp256; + break; + + case 1: + bmp = (BITMAPX *) &bmp1024; + break; + + case 2: + bmp = bitmapX_create(8192); + break; + } + + // reset + memset(expected, 0, bmp->bits); + memset(bmp->data, 0, bmp->bits / 8); + + snprintf(msg, 100, "TEST 1 BITMAP %u", bmp->bits); + bitmapX_set_bit_and_track(bmp, 0, true, expected); + errors += bitmapX_test(bmp, expected, msg); + + snprintf(msg, 100, "TEST 2 BITMAP %u", bmp->bits); + bitmapX_set_bit_and_track(bmp, 64, true, expected); + errors += bitmapX_test(bmp, expected, msg); + + snprintf(msg, 100, "TEST 3 BITMAP %u", bmp->bits); + bitmapX_set_bit_and_track(bmp, 128, true, expected); + errors += bitmapX_test(bmp, expected, msg); + + snprintf(msg, 100, "TEST 4 BITMAP %u", bmp->bits); + bitmapX_set_bit_and_track(bmp, 192, true, expected); + errors += bitmapX_test(bmp, expected, msg); + + for (uint32_t step = 1; step < 256; step++) { + snprintf(msg, 100, "TEST 5 (setting) BITMAP %u STEP %u", bmp->bits, step); + + // reset + memset(expected, 0, bmp->bits); + memset(bmp->data, 0, bmp->bits / 8); + + for (uint32_t i = 0; i < bmp->bits ; i += step) + bitmapX_set_bit_and_track(bmp, i, true, expected); + + errors += bitmapX_test(bmp, expected, msg); + } + + for (uint32_t step = 1; step < 256; step++) { + snprintf(msg, 100, "TEST 6 (clearing) BITMAP %u STEP %u", bmp->bits, step); + + // reset + memset(expected, 0, bmp->bits); + memset(bmp->data, 0, bmp->bits / 8); + + for (uint32_t i = 0; i < bmp->bits ; i++) + bitmapX_set_bit_and_track(bmp, i, true, expected); + + for (uint32_t i = 0; i < bmp->bits ; i += step) + bitmapX_set_bit_and_track(bmp, i, false, expected); + + errors += bitmapX_test(bmp, expected, msg); + } + } + + freez(bmp); + + fprintf(stderr, "%s() %d errors\n", __FUNCTION__, errors); + return errors; +} diff --git a/daemon/unit_test.h b/src/daemon/unit_test.h index c7cd104e1..c7cd104e1 100644 --- a/daemon/unit_test.h +++ b/src/daemon/unit_test.h diff --git a/src/daemon/watcher.c b/src/daemon/watcher.c new file mode 100644 index 000000000..3eea22019 --- /dev/null +++ b/src/daemon/watcher.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "watcher.h" + +watcher_step_t *watcher_steps; + +static struct completion shutdown_begin_completion; +static struct completion shutdown_end_completion; +static netdata_thread_t watcher_thread; + +void watcher_shutdown_begin(void) { + completion_mark_complete(&shutdown_begin_completion); +} + +void watcher_shutdown_end(void) { + completion_mark_complete(&shutdown_end_completion); +} + +void watcher_step_complete(watcher_step_id_t step_id) { + completion_mark_complete(&watcher_steps[step_id].p); +} + +static void watcher_wait_for_step(const watcher_step_id_t step_id) +{ + unsigned timeout = 90; + + usec_t step_start_time = now_monotonic_usec(); + +#ifdef ENABLE_SENTRY + // Wait with a timeout + bool ok = completion_timedwait_for(&watcher_steps[step_id].p, timeout); +#else + // Wait indefinitely + bool ok = true; + completion_wait_for(&watcher_steps[step_id].p); +#endif + + usec_t step_duration = now_monotonic_usec() - step_start_time; + + if (ok) { + netdata_log_info("shutdown step: [%d/%d] - '%s' finished in %llu milliseconds", + step_id + 1, WATCHER_STEP_ID_MAX, + watcher_steps[step_id].msg, step_duration / USEC_PER_MS); + } else { + // Do not call fatal() because it will try to execute the exit + // sequence twice. + netdata_log_error("shutdown step: [%d/%d] - '%s' took more than %u seconds (ie. %llu milliseconds)", + step_id + 1, WATCHER_STEP_ID_MAX, watcher_steps[step_id].msg, + timeout, step_duration / USEC_PER_MS); + + abort(); + } +} + +void *watcher_main(void *arg) +{ + UNUSED(arg); + + netdata_log_debug(D_SYSTEM, "Watcher thread started"); + + // wait until the agent starts the shutdown process + completion_wait_for(&shutdown_begin_completion); + netdata_log_error("Shutdown process started"); + + usec_t shutdown_start_time = now_monotonic_usec(); + + watcher_wait_for_step(WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE); + watcher_wait_for_step(WATCHER_STEP_ID_DBENGINE_EXIT_MODE); + watcher_wait_for_step(WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS); + watcher_wait_for_step(WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_REPLICATION_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN); + watcher_wait_for_step(WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_CONTEXT_THREAD); + watcher_wait_for_step(WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_ACLK_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_CANCEL_MAIN_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_METASYNC_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_wait_for_step(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + watcher_wait_for_step(WATCHER_STEP_ID_CLOSE_SQL_DATABASES); + watcher_wait_for_step(WATCHER_STEP_ID_REMOVE_PID_FILE); + watcher_wait_for_step(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); + watcher_wait_for_step(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE); + + completion_wait_for(&shutdown_end_completion); + usec_t shutdown_end_time = now_monotonic_usec(); + + usec_t shutdown_duration = shutdown_end_time - shutdown_start_time; + netdata_log_error("Shutdown process ended in %llu milliseconds", + shutdown_duration / USEC_PER_MS); + + return NULL; +} + +void watcher_thread_start() { + watcher_steps = callocz(WATCHER_STEP_ID_MAX, sizeof(watcher_step_t)); + + watcher_steps[WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE].msg = + "create shutdown file"; + watcher_steps[WATCHER_STEP_ID_DBENGINE_EXIT_MODE].msg = + "dbengine exit mode"; + watcher_steps[WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS].msg = + "close webrtc connections"; + watcher_steps[WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK].msg = + "disable maintenance, new queries, new web requests, new streaming connections and aclk"; + watcher_steps[WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD].msg = + "stop maintenance thread"; + watcher_steps[WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS].msg = + "stop exporters, health and web servers threads"; + watcher_steps[WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS].msg = + "stop collectors and streaming threads"; + watcher_steps[WATCHER_STEP_ID_STOP_REPLICATION_THREADS].msg = + "stop replication threads"; + watcher_steps[WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN].msg = + "prepare metasync shutdown"; + watcher_steps[WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS].msg = + "disable ML detection and training threads"; + watcher_steps[WATCHER_STEP_ID_STOP_CONTEXT_THREAD].msg = + "stop context thread"; + watcher_steps[WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE].msg = + "clear web client cache"; + watcher_steps[WATCHER_STEP_ID_STOP_ACLK_THREADS].msg = + "stop aclk threads"; + watcher_steps[WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS].msg = + "stop all remaining worker threads"; + watcher_steps[WATCHER_STEP_ID_CANCEL_MAIN_THREADS].msg = + "cancel main threads"; + watcher_steps[WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS].msg = + "flush dbengine tiers"; + watcher_steps[WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS].msg = + "stop collection for all hosts"; + watcher_steps[WATCHER_STEP_ID_STOP_METASYNC_THREADS].msg = + "stop metasync threads"; + watcher_steps[WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH].msg = + "wait for dbengine collectors to finish"; + watcher_steps[WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING].msg = + "wait for dbengine main cache to finish flushing"; + watcher_steps[WATCHER_STEP_ID_STOP_DBENGINE_TIERS].msg = + "stop dbengine tiers"; + watcher_steps[WATCHER_STEP_ID_CLOSE_SQL_DATABASES].msg = + "close SQL databases"; + watcher_steps[WATCHER_STEP_ID_REMOVE_PID_FILE].msg = + "remove pid file"; + watcher_steps[WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES].msg = + "free openssl structures"; + watcher_steps[WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE].msg = + "remove incomplete shutdown file"; + + for (size_t i = 0; i != WATCHER_STEP_ID_MAX; i++) { + completion_init(&watcher_steps[i].p); + } + + completion_init(&shutdown_begin_completion); + completion_init(&shutdown_end_completion); + + netdata_thread_create(&watcher_thread, "P[WATCHER]", NETDATA_THREAD_OPTION_JOINABLE, watcher_main, NULL); +} + +void watcher_thread_stop() { + netdata_thread_join(watcher_thread, NULL); + + for (size_t i = 0; i != WATCHER_STEP_ID_MAX; i++) { + completion_destroy(&watcher_steps[i].p); + } + + completion_destroy(&shutdown_begin_completion); + completion_destroy(&shutdown_end_completion); + + freez(watcher_steps); +} diff --git a/src/daemon/watcher.h b/src/daemon/watcher.h new file mode 100644 index 000000000..b785ca436 --- /dev/null +++ b/src/daemon/watcher.h @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef DAEMON_WATCHER_H +#define DAEMON_WATCHER_H + +#include "libnetdata/libnetdata.h" + +typedef enum { + WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE = 0, + WATCHER_STEP_ID_DBENGINE_EXIT_MODE, + WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS, + WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK, + WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD, + WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS, + WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS, + WATCHER_STEP_ID_STOP_REPLICATION_THREADS, + WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN, + WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS, + WATCHER_STEP_ID_STOP_CONTEXT_THREAD, + WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE, + WATCHER_STEP_ID_STOP_ACLK_THREADS, + WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS, + WATCHER_STEP_ID_CANCEL_MAIN_THREADS, + WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS, + WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS, + WATCHER_STEP_ID_STOP_METASYNC_THREADS, + WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH, + WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING, + WATCHER_STEP_ID_STOP_DBENGINE_TIERS, + WATCHER_STEP_ID_CLOSE_SQL_DATABASES, + WATCHER_STEP_ID_REMOVE_PID_FILE, + WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES, + WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE, + + // Always keep this as the last enum value + WATCHER_STEP_ID_MAX +} watcher_step_id_t; + +typedef struct { + const char *msg; + struct completion p; +} watcher_step_t; + +extern watcher_step_t *watcher_steps; + +void watcher_thread_start(void); +void watcher_thread_stop(void); + +void watcher_shutdown_begin(void); +void watcher_shutdown_end(void); + +void watcher_step_complete(watcher_step_id_t step_id); + +#endif /* DAEMON_WATCHER_H */ |