diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:54:23 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-07-24 09:54:44 +0000 |
commit | 836b47cb7e99a977c5a23b059ca1d0b5065d310e (patch) | |
tree | 1604da8f482d02effa033c94a84be42bc0c848c3 /src/daemon | |
parent | Releasing debian version 1.44.3-2. (diff) | |
download | netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.tar.xz netdata-836b47cb7e99a977c5a23b059ca1d0b5065d310e.zip |
Merging upstream version 1.46.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/daemon/README.md | 438 | ||||
-rw-r--r-- | src/daemon/analytics.c (renamed from daemon/analytics.c) | 59 | ||||
-rw-r--r-- | src/daemon/analytics.h (renamed from daemon/analytics.h) | 11 | ||||
-rwxr-xr-x | src/daemon/anonymous-statistics.sh.in (renamed from daemon/anonymous-statistics.sh.in) | 0 | ||||
-rw-r--r-- | src/daemon/buildinfo.c (renamed from daemon/buildinfo.c) | 147 | ||||
-rw-r--r-- | src/daemon/buildinfo.h (renamed from daemon/buildinfo.h) | 0 | ||||
-rw-r--r-- | src/daemon/commands.c (renamed from daemon/commands.c) | 97 | ||||
-rw-r--r-- | src/daemon/commands.h (renamed from daemon/commands.h) | 6 | ||||
-rw-r--r-- | src/daemon/common.c (renamed from daemon/common.c) | 6 | ||||
-rw-r--r-- | src/daemon/common.h | 141 | ||||
-rw-r--r-- | src/daemon/config/README.md | 231 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-echo.c | 175 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-files.c | 264 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-inline.c | 66 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-intercept.c | 429 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-internals.h | 145 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-tree.c | 292 | ||||
-rw-r--r-- | src/daemon/config/dyncfg-unittest.c | 799 | ||||
-rw-r--r-- | src/daemon/config/dyncfg.c | 454 | ||||
-rw-r--r-- | src/daemon/config/dyncfg.h | 34 | ||||
-rw-r--r-- | src/daemon/daemon.c (renamed from daemon/daemon.c) | 34 | ||||
-rw-r--r-- | src/daemon/daemon.h | 16 | ||||
-rw-r--r-- | src/daemon/event_loop.c (renamed from daemon/event_loop.c) | 2 | ||||
-rw-r--r-- | src/daemon/event_loop.h (renamed from daemon/event_loop.h) | 0 | ||||
-rwxr-xr-x[-rw-r--r--] | src/daemon/get-kubernetes-labels.sh.in (renamed from daemon/get-kubernetes-labels.sh.in) | 0 | ||||
-rw-r--r-- | src/daemon/global_statistics.c (renamed from daemon/global_statistics.c) | 113 | ||||
-rw-r--r-- | src/daemon/global_statistics.h (renamed from daemon/global_statistics.h) | 0 | ||||
-rw-r--r-- | src/daemon/main.c | 2370 | ||||
-rw-r--r-- | src/daemon/main.h | 44 | ||||
-rw-r--r-- | src/daemon/metrics.csv (renamed from daemon/metrics.csv) | 0 | ||||
-rw-r--r-- | src/daemon/pipename.c (renamed from daemon/pipename.c) | 0 | ||||
-rw-r--r-- | src/daemon/pipename.h (renamed from daemon/pipename.h) | 0 | ||||
-rw-r--r-- | src/daemon/sentry-native/sentry-native.c | 62 | ||||
-rw-r--r-- | src/daemon/sentry-native/sentry-native.h | 11 | ||||
-rw-r--r-- | src/daemon/service.c (renamed from daemon/service.c) | 100 | ||||
-rw-r--r-- | src/daemon/signals.c (renamed from daemon/signals.c) | 11 | ||||
-rw-r--r-- | src/daemon/signals.h (renamed from daemon/signals.h) | 0 | ||||
-rw-r--r-- | src/daemon/static_threads.c (renamed from daemon/static_threads.c) | 28 | ||||
-rw-r--r-- | src/daemon/static_threads.h | 16 | ||||
-rw-r--r-- | src/daemon/static_threads_freebsd.c | 33 | ||||
-rw-r--r-- | src/daemon/static_threads_linux.c | 73 | ||||
-rw-r--r-- | src/daemon/static_threads_macos.c | 35 | ||||
-rw-r--r-- | src/daemon/static_threads_windows.c | 33 | ||||
-rwxr-xr-x | src/daemon/system-info.sh (renamed from daemon/system-info.sh) | 0 | ||||
-rw-r--r-- | src/daemon/unit_test.c | 1695 | ||||
-rw-r--r-- | src/daemon/unit_test.h (renamed from daemon/unit_test.h) | 0 | ||||
-rw-r--r-- | src/daemon/watcher.c | 178 | ||||
-rw-r--r-- | src/daemon/watcher.h | 54 |
48 files changed, 8368 insertions, 334 deletions
diff --git a/src/daemon/README.md b/src/daemon/README.md new file mode 100644 index 000000000..bc2ec7757 --- /dev/null +++ b/src/daemon/README.md @@ -0,0 +1,438 @@ +# Netdata daemon + +The Netdata daemon is practically a synonym for the Netdata Agent, as it controls its +entire operation. We support various methods to +[start, stop, or restart the daemon](/packaging/installer/README.md#maintaining-a-netdata-agent-installation). + +This document provides some basic information on the command line options, log files, and how to debug and troubleshoot + +## Command line options + +Normally you don't need to supply any command line arguments to netdata. + +If you do though, they override the configuration equivalent options. + +To get a list of all command line parameters supported, run: + +```sh +netdata -h +``` + +The program will print the supported command line parameters. + +The command line options of the Netdata 1.10.0 version are the following: + +```sh + ^ + |.-. .-. .-. .-. . netdata + | '-' '-' '-' '-' real-time performance monitoring, done right! + +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+---> + + Copyright (C) 2016-2022, Netdata, Inc. <info@netdata.cloud> + Released under GNU General Public License v3 or later. + All rights reserved. + + Home Page : https://netdata.cloud + Source Code: https://github.com/netdata/netdata + Docs : https://learn.netdata.cloud + Support : https://github.com/netdata/netdata/issues + License : https://github.com/netdata/netdata/blob/master/LICENSE.md + + Twitter : https://twitter.com/netdatahq + LinkedIn : https://linkedin.com/company/netdata-cloud/ + Facebook : https://facebook.com/linuxnetdata/ + + + SYNOPSIS: netdata [options] + + Options: + + -c filename Configuration file to load. + Default: /etc/netdata/netdata.conf + + -D Do not fork. Run in the foreground. + Default: run in the background + + -h Display this help message. + + -P filename File to save a pid while running. + Default: do not save pid to a file + + -i IP The IP address to listen to. + Default: all IP addresses IPv4 and IPv6 + + -p port API/Web port to use. + Default: 19999 + + -s path Prefix for /proc and /sys (for containers). + Default: no prefix + + -t seconds The internal clock of netdata. + Default: 1 + + -u username Run as user. + Default: netdata + + -v Print netdata version and exit. + + -V Print netdata version and exit. + + -W options See Advanced options below. + + + Advanced options: + + -W stacksize=N Set the stacksize (in bytes). + + -W debug_flags=N Set runtime tracing to debug.log. + + -W unittest Run internal unittests and exit. + + -W createdataset=N Create a DB engine dataset of N seconds and exit. + + -W set section option value + set netdata.conf option from the command line. + + -W buildinfo Print the version, the configure options, + a list of optional features, and whether they + are enabled or not. + + -W buildinfojson Print the version, the configure options, + a list of optional features, and whether they + are enabled or not, in JSON format. + + -W simple-pattern pattern string + Check if string matches pattern and exit. + + -W "claim -token=TOKEN -rooms=ROOM1,ROOM2 url=https://app.netdata.cloud" + Connect the agent to the workspace Rooms pointed to by TOKEN and ROOM*. + + Signals netdata handles: + + - HUP Close and reopen log files. + - USR2 Reload health configuration. +``` + +You can send commands during runtime via [netdatacli](/src/cli/README.md). + +## Log files + +Netdata uses 4 log files: + +1. `error.log` +2. `collector.log` +3. `access.log` +4. `debug.log` + +Any of them can be disabled by setting it to `/dev/null` or `none` in `netdata.conf`. By default `error.log`, +`collector.log`, and `access.log` are enabled. `debug.log` is only enabled if debugging/tracing is also enabled +(Netdata needs to be compiled with debugging enabled). + +Log files are stored in `/var/log/netdata/` by default. + +### error.log + +The `error.log` is the `stderr` of the `netdata` daemon . + +For most Netdata programs (including standard external plugins shipped by netdata), the following lines may appear: + +| tag | description | +|:-:|:----------| +| `INFO` | Something important the user should know. | +| `ERROR` | Something that might disable a part of netdata.<br/>The log line includes `errno` (if it is not zero). | +| `FATAL` | Something prevented a program from running.<br/>The log line includes `errno` (if it is not zero) and the program exited. | + +The `FATAL` and `ERROR` messages will always appear in the logs, and `INFO`can be filtered using [severity level](/src/daemon/config/README.md#logs-section-options) option. + +So, when auto-detection of data collection fail, `ERROR` lines are logged and the relevant modules are disabled, but the +program continues to run. + +When a Netdata program cannot run at all, a `FATAL` line is logged. + +### collector.log + +The `collector.log` is the `stderr` of all [collectors](/src/collectors/COLLECTORS.md) + run by `netdata`. + +So if any process, in the Netdata process tree, writes anything to its standard error, +it will appear in `collector.log`. + +Data stored inside this file follows pattern already described for `error.log`. + +### access.log + +The `access.log` logs web requests. The format is: + +```txt +DATE: ID: (sent/all = SENT_BYTES/ALL_BYTES bytes PERCENT_COMPRESSION%, prep/sent/total PREP_TIME/SENT_TIME/TOTAL_TIME ms): ACTION CODE URL +``` + +where: + +- `ID` is the client ID. Client IDs are auto-incremented every time a client connects to netdata. +- `SENT_BYTES` is the number of bytes sent to the client, without the HTTP response header. +- `ALL_BYTES` is the number of bytes of the response, before compression. +- `PERCENT_COMPRESSION` is the percentage of traffic saved due to compression. +- `PREP_TIME` is the time in milliseconds needed to prepared the response. +- `SENT_TIME` is the time in milliseconds needed to sent the response to the client. +- `TOTAL_TIME` is the total time the request was inside Netdata (from the first byte of the request to the last byte + of the response). +- `ACTION` can be `filecopy`, `options` (used in CORS), `data` (API call). + +### debug.log + +See [debugging](#debugging). + +## Netdata process scheduling policy + +By default Netdata versions prior to 1.34.0 run with the `idle` process scheduling policy, so that it uses CPU +resources, only when there is idle CPU to spare. On very busy servers (or weak servers), this can lead to gaps on +the charts. + +Starting with version 1.34.0, Netdata instead uses the `batch` scheduling policy by default. This largely eliminates +issues with gaps in charts on busy systems while still keeping the impact on the rest of the system low. + +You can set Netdata scheduling policy in `netdata.conf`, like this: + +```conf +[global] + process scheduling policy = idle +``` + +You can use the following: + +| policy | description | +| :-----------------------: | :---------- | +| `idle` | use CPU only when there is spare - this is lower than nice 19 - it is the default for Netdata and it is so low that Netdata will run in "slow motion" under extreme system load, resulting in short (1-2 seconds) gaps at the charts. | +| `other`<br/>or<br/>`nice` | this is the default policy for all processes under Linux. It provides dynamic priorities based on the `nice` level of each process. Check below for setting this `nice` level for netdata. | +| `batch` | This policy is similar to `other` in that it schedules the thread according to its dynamic priority (based on the `nice` value). The difference is that this policy will cause the scheduler to always assume that the thread is CPU-intensive. Consequently, the scheduler will apply a small scheduling penalty with respect to wake-up behavior, so that this thread is mildly disfavored in scheduling decisions. | +| `fifo` | `fifo` can be used only with static priorities higher than 0, which means that when a `fifo` threads becomes runnable, it will always immediately preempt any currently running `other`, `batch`, or `idle` thread. `fifo` is a simple scheduling algorithm without time slicing. | +| `rr` | a simple enhancement of `fifo`. Everything described above for `fifo` also applies to `rr`, except that each thread is allowed to run only for a maximum time quantum. | +| `keep`<br/>or<br/>`none` | do not set scheduling policy, priority or nice level - i.e. keep running with whatever it is set already (e.g. by systemd). | + +For more information see `man sched`. + +### Scheduling priority for `rr` and `fifo` + +Once the policy is set to one of `rr` or `fifo`, the following will appear: + +```conf +[global] + process scheduling priority = 0 +``` + +These priorities are usually from 0 to 99. Higher numbers make the process more +important. + +### nice level for policies `other` or `batch` + +When the policy is set to `other`, `nice`, or `batch`, the following will appear: + +```conf +[global] + process nice level = 19 +``` + +## Scheduling settings and systemd + +Netdata will not be able to set its scheduling policy and priority to more important values when it is started as the +`netdata` user (systemd case). + +You can set these settings at `/etc/systemd/system/netdata.service`: + +```sh +[Service] +# By default Netdata switches to scheduling policy idle, which makes it use CPU, only +# when there is spare available. +# Valid policies: other (the system default) | batch | idle | fifo | rr +#CPUSchedulingPolicy=other + +# This sets the maximum scheduling priority Netdata can set (for policies: rr and fifo). +# Netdata (via [global].process scheduling priority in netdata.conf) can only lower this value. +# Priority gets values 1 (lowest) to 99 (highest). +#CPUSchedulingPriority=1 + +# For scheduling policy 'other' and 'batch', this sets the lowest niceness of netdata. +# Netdata (via [global].process nice level in netdata.conf) can only increase the value set here. +#Nice=0 +``` + +Run `systemctl daemon-reload` to reload these changes. + +Now, tell Netdata to keep these settings, as set by systemd, by editing +`netdata.conf` and setting: + +```conf +[global] + process scheduling policy = keep +``` + +Using the above, whatever scheduling settings you have set at `netdata.service` +will be maintained by netdata. + +### Example 1: Netdata with nice -1 on non-systemd systems + +On a system that is not based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for +all programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = other + process nice level = -1 +``` + +then execute this to [restart Netdata](/packaging/installer/README.md#maintaining-a-netdata-agent-installation): + +```sh +sudo systemctl restart netdata +``` + +#### Example 2: Netdata with nice -1 on systemd systems + +On a system that is based on systemd, to make Netdata run with nice level -1 (a little bit higher to the default for all +programs), edit `netdata.conf` and set: + +```conf +[global] + process scheduling policy = keep +``` + +edit /etc/systemd/system/netdata.service and set: + +```sh +[Service] +CPUSchedulingPolicy=other +Nice=-1 +``` + +then execute: + +```sh +sudo systemctl daemon-reload +sudo systemctl restart netdata +``` + +## Virtual memory + +You may notice that netdata's virtual memory size, as reported by `ps` or `/proc/pid/status` (or even netdata's +applications virtual memory chart) is unrealistically high. + +For example, it may be reported to be 150+MB, even if the resident memory size is just 25MB. Similar values may be +reported for Netdata plugins too. + +Check this for example: A Netdata installation with default settings on Ubuntu +16.04LTS. The top chart is **real memory used**, while the bottom one is +**virtual memory**: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013772/5eb7173e-87e3-11e6-8f2b-a2ccfeb06faf.png) + +### Why does this happen? + +The system memory allocator allocates virtual memory arenas, per thread running. On Linux systems this defaults to 16MB +per thread on 64 bit machines. So, if you get the difference between real and virtual memory and divide it by 16MB you +will roughly get the number of threads running. + +The system does this for speed. Having a separate memory arena for each thread, allows the threads to run in parallel in +multi-core systems, without any locks between them. + +This behaviour is system specific. For example, the chart above when running +Netdata on Alpine Linux (that uses **musl** instead of **glibc**) is this: + +![image](https://cloud.githubusercontent.com/assets/2662304/19013807/7cf5878e-87e4-11e6-9651-082e68701eab.png) + +### Can we do anything to lower it? + +Since Netdata already uses minimal memory allocations while it runs (i.e. it adapts its memory on start, so that while +repeatedly collects data it does not do memory allocations), it already instructs the system memory allocator to +minimize the memory arenas for each thread. We have also added [2 configuration +options](https://github.com/netdata/netdata/blob/5645b1ee35248d94e6931b64a8688f7f0d865ec6/src/main.c#L410-L418) to allow +you tweak these settings: `glibc malloc arena max for plugins` and `glibc malloc arena max for netdata`. + +However, even if we instructed the memory allocator to use just one arena, it +seems it allocates an arena per thread. + +Netdata also supports `jemalloc` and `tcmalloc`, however both behave exactly the +same to the glibc memory allocator in this aspect. + +### Is this a problem? + +No, it is not. + +Linux reserves real memory (physical RAM) in pages (on x86 machines pages are 4KB each). So even if the system memory +allocator is allocating huge amounts of virtual memory, only the 4KB pages that are actually used are reserving physical +RAM. The **real memory** chart on Netdata application section, shows the amount of physical memory these pages occupy(it +accounts the whole pages, even if parts of them are actually used). + +## Debugging + +When you compile Netdata with debugging: + +1. compiler optimizations for your CPU are disabled (Netdata will run somewhat slower) + +2. a lot of code is added all over netdata, to log debug messages to `/var/log/netdata/debug.log`. However, nothing is + printed by default. Netdata allows you to select which sections of Netdata you want to trace. Tracing is activated + via the config option `debug flags`. It accepts a hex number, to enable or disable specific sections. You can find + the options supported at [log.h](https://raw.githubusercontent.com/netdata/netdata/master/src/libnetdata/log/log.h). + They are the `D_*` defines. The value `0xffffffffffffffff` will enable all possible debug flags. + +Once Netdata is compiled with debugging and tracing is enabled for a few sections, the file `/var/log/netdata/debug.log` +will contain the messages. + +> Do not forget to disable tracing (`debug flags = 0`) when you are done tracing. The file `debug.log` can grow too +> fast. + +### Compiling Netdata with debugging + +To compile Netdata with debugging, use this: + +```sh +# step into the Netdata source directory +cd /usr/src/netdata.git + +# run the installer with debugging enabled +CFLAGS="-O1 -ggdb -DNETDATA_INTERNAL_CHECKS=1" ./netdata-installer.sh +``` + +The above will compile and install Netdata with debugging info embedded. You can now use `debug flags` to set the +section(s) you need to trace. + +### Debugging crashes + +We have made the most to make Netdata crash free. If however, Netdata crashes on your system, it would be very helpful +to provide stack traces of the crash. Without them, is will be almost impossible to find the issue (the code base is +quite large to find such an issue by just observing it). + +To provide stack traces, **you need to have Netdata compiled with debugging**. There is no need to enable any tracing +(`debug flags`). + +Then you need to be in one of the following 2 cases: + +1. Netdata crashes and you have a core dump + +2. you can reproduce the crash + +If you are not on these cases, you need to find a way to be (i.e. if your system does not produce core dumps, check your +distro documentation to enable them). + +### Netdata crashes and you have a core dump + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Run the following command and post the output on a github issue. + +```sh +gdb $(which netdata) /path/to/core/dump +``` + +### You can reproduce a Netdata crash on your system + +> you need to have Netdata compiled with debugging info for this to work (check above) + +Install the package `valgrind` and run: + +```sh +valgrind $(which netdata) -D +``` + +Netdata will start and it will be a lot slower. Now reproduce the crash and `valgrind` will dump on your console the +stack trace. Open a new github issue and post the output. diff --git a/daemon/analytics.c b/src/daemon/analytics.c index 353ebd136..33f6f357f 100644 --- a/daemon/analytics.c +++ b/src/daemon/analytics.c @@ -7,7 +7,6 @@ struct analytics_data analytics_data; extern void analytics_exporting_connectors (BUFFER *b); extern void analytics_exporting_connectors_ssl (BUFFER *b); extern void analytics_build_info (BUFFER *b); -extern int aclk_connected; struct collector { const char *plugin; @@ -224,7 +223,7 @@ void analytics_mirrored_hosts(void) count++; } - rrd_unlock(); + rrd_rdunlock(); snprintfz(b, sizeof(b) - 1, "%zu", count); analytics_set_data(&analytics_data.netdata_mirrored_host_count, b); @@ -471,7 +470,7 @@ void analytics_alarms(void) */ void analytics_misc(void) { - analytics_data.spinlock.locked = false; + spinlock_init(&analytics_data.spinlock); #ifdef ENABLE_ACLK analytics_set_data(&analytics_data.netdata_host_cloud_available, "true"); @@ -489,7 +488,7 @@ void analytics_misc(void) if (strcmp( config_get(CONFIG_SECTION_REGISTRY, "registry to announce", "https://registry.my-netdata.io"), - "https://registry.my-netdata.io")) + "https://registry.my-netdata.io") != 0) analytics_set_data(&analytics_data.netdata_config_use_private_registry, "true"); //do we need both registry to announce and enabled to indicate that this is a private registry ? @@ -563,9 +562,11 @@ void analytics_gather_mutable_meta_data(void) } } -void analytics_main_cleanup(void *ptr) +void analytics_main_cleanup(void *pptr) { - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; netdata_log_debug(D_ANALYTICS, "Cleaning up..."); @@ -582,7 +583,7 @@ void analytics_main_cleanup(void *ptr) */ void *analytics_main(void *ptr) { - netdata_thread_cleanup_push(analytics_main_cleanup, ptr); + CLEANUP_FUNCTION_REGISTER(analytics_main_cleanup) cleanup_ptr = ptr; unsigned int sec = 0; heartbeat_t hb; heartbeat_init(&hb); @@ -601,7 +602,9 @@ void *analytics_main(void *ptr) analytics_gather_immutable_meta_data(); analytics_gather_mutable_meta_data(); - send_statistics("META_START", "-", "-"); + + analytics_statistic_t statistic = { "META_START", "-", "-" }; + analytics_statistic_send(&statistic); analytics_log_data(); sec = 0; @@ -616,13 +619,15 @@ void *analytics_main(void *ptr) continue; analytics_gather_mutable_meta_data(); - send_statistics("META", "-", "-"); + + analytics_statistic_t stt = { "META", "-", "-" }; + analytics_statistic_send(&stt); analytics_log_data(); + sec = 0; } cleanup: - netdata_thread_cleanup_pop(1); return NULL; } @@ -720,7 +725,7 @@ void get_system_timezone(void) } // use the contents of /etc/timezone - if (!timezone && !read_file("/etc/timezone", buffer, FILENAME_MAX)) { + if (!timezone && !read_txt_file("/etc/timezone", buffer, sizeof(buffer))) { timezone = buffer; netdata_log_info("TIMEZONE: using the contents of /etc/timezone"); } @@ -773,7 +778,7 @@ void get_system_timezone(void) *d = '\0'; while (*timezone) { - if (isalnum(*timezone) || *timezone == '_' || *timezone == '/') + if (isalnum((uint8_t)*timezone) || *timezone == '_' || *timezone == '/') *d++ = *timezone++; else timezone++; @@ -812,11 +817,11 @@ void get_system_timezone(void) } else { sign[0] = zone[0] == '-' || zone[0] == '+' ? zone[0] : '0'; sign[1] = '\0'; - hh[0] = isdigit(zone[1]) ? zone[1] : '0'; - hh[1] = isdigit(zone[2]) ? zone[2] : '0'; + hh[0] = isdigit((uint8_t)zone[1]) ? zone[1] : '0'; + hh[1] = isdigit((uint8_t)zone[2]) ? zone[2] : '0'; hh[2] = '\0'; - mm[0] = isdigit(zone[3]) ? zone[3] : '0'; - mm[1] = isdigit(zone[4]) ? zone[4] : '0'; + mm[0] = isdigit((uint8_t)zone[3]) ? zone[3] : '0'; + mm[1] = isdigit((uint8_t)zone[4]) ? zone[4] : '0'; mm[2] = '\0'; netdata_configured_utc_offset = (str2i(hh) * 3600) + (str2i(mm) * 60); @@ -837,7 +842,7 @@ void set_global_environment() { setenv("NETDATA_UPDATE_EVERY", b, 1); } - setenv("NETDATA_VERSION", program_version, 1); + setenv("NETDATA_VERSION", NETDATA_VERSION, 1); setenv("NETDATA_HOSTNAME", netdata_configured_hostname, 1); setenv("NETDATA_CONFIG_DIR", verify_required_directory(netdata_configured_user_config_dir), 1); setenv("NETDATA_USER_CONFIG_DIR", verify_required_directory(netdata_configured_user_config_dir), 1); @@ -943,7 +948,10 @@ void set_global_environment() { setenv("LC_ALL", "C", 1); } -void send_statistics(const char *action, const char *action_result, const char *action_data) { +void analytics_statistic_send(const analytics_statistic_t *statistic) { + if (!statistic) + return; + static char *as_script; if (netdata_anonymous_statistics_enabled == -1) { @@ -980,16 +988,19 @@ void send_statistics(const char *action, const char *action_result, const char * freez(optout_file); } - if (!netdata_anonymous_statistics_enabled || !action) + if (!netdata_anonymous_statistics_enabled || !statistic->action) return; - if (!action_result) + const char *action_result = statistic->result; + const char *action_data = statistic->data; + + if (!statistic->result) action_result = ""; - if (!action_data) + if (!statistic->data) action_data = ""; char *command_to_run = mallocz( - sizeof(char) * (strlen(action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + + sizeof(char) * (strlen(statistic->action) + strlen(action_result) + strlen(action_data) + strlen(as_script) + analytics_data.data_length + (ANALYTICS_NO_OF_ITEMS * 3) + 15)); pid_t command_pid; @@ -997,7 +1008,7 @@ void send_statistics(const char *action, const char *action_result, const char * command_to_run, "%s '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' '%s' ", as_script, - action, + statistic->action, action_result, action_data, analytics_data.netdata_config_stream_enabled, @@ -1043,7 +1054,7 @@ void send_statistics(const char *action, const char *action_result, const char * nd_log(NDLS_DAEMON, NDLP_DEBUG, "%s '%s' '%s' '%s'", - as_script, action, action_result, action_data); + as_script, statistic->action, action_result, action_data); FILE *fp_child_input; FILE *fp_child_output = netdata_popen(command_to_run, &command_pid, &fp_child_input); diff --git a/daemon/analytics.h b/src/daemon/analytics.h index de8d569f9..501eb7b55 100644 --- a/daemon/analytics.h +++ b/src/daemon/analytics.h @@ -9,7 +9,7 @@ #define ANALYTICS_INIT_SLEEP_SEC 120 /* Send a META event every X seconds */ -#define ANALYTICS_HEARTBEAT 7200 +#define ANALYTICS_HEARTBEAT (6 * 3600) /* Maximum number of hits to log */ #define ANALYTICS_MAX_PROMETHEUS_HITS 255 @@ -79,7 +79,6 @@ struct analytics_data { void set_late_global_environment(struct rrdhost_system_info *system_info); void analytics_free_data(void); void set_global_environment(void); -void send_statistics(const char *action, const char *action_result, const char *action_data); void analytics_log_shell(void); void analytics_log_json(void); void analytics_log_prometheus(void); @@ -88,6 +87,14 @@ void analytics_gather_mutable_meta_data(void); void analytics_report_oom_score(long long int score); void get_system_timezone(void); +typedef struct { + const char *action; + const char *result; + const char *data; +} analytics_statistic_t; + +void analytics_statistic_send(const analytics_statistic_t *statistic); + extern struct analytics_data analytics_data; #endif //NETDATA_ANALYTICS_H diff --git a/daemon/anonymous-statistics.sh.in b/src/daemon/anonymous-statistics.sh.in index d12e7e32a..d12e7e32a 100755 --- a/daemon/anonymous-statistics.sh.in +++ b/src/daemon/anonymous-statistics.sh.in diff --git a/daemon/buildinfo.c b/src/daemon/buildinfo.c index 41af56af8..4ee5b43de 100644 --- a/daemon/buildinfo.c +++ b/src/daemon/buildinfo.c @@ -57,8 +57,6 @@ typedef enum __attribute__((packed)) { BIB_DB_DBENGINE, BIB_DB_ALLOC, BIB_DB_RAM, - BIB_DB_MAP, - BIB_DB_SAVE, BIB_DB_NONE, BIB_CONNECTIVITY_ACLK, BIB_CONNECTIVITY_HTTPD_STATIC, @@ -69,17 +67,14 @@ typedef enum __attribute__((packed)) { BIB_LIB_LZ4, BIB_LIB_ZSTD, BIB_LIB_ZLIB, - BIB_LIB_JUDY, - BIB_LIB_DLIB, + BIB_LIB_BROTLI, BIB_LIB_PROTOBUF, BIB_LIB_OPENSSL, BIB_LIB_LIBDATACHANNEL, BIB_LIB_JSONC, BIB_LIB_LIBCAP, BIB_LIB_LIBCRYPTO, - BIB_LIB_LIBM, - BIB_LIB_JEMALLOC, - BIB_LIB_TCMALLOC, + BIB_LIB_LIBYAML, BIB_PLUGIN_APPS, BIB_PLUGIN_LINUX_CGROUPS, BIB_PLUGIN_LINUX_CGROUP_NETWORK, @@ -539,7 +534,7 @@ static struct { .category = BIC_DATABASE, .type = BIT_BOOLEAN, .analytics = "dbengine", - .print = "dbengine", + .print = "dbengine (compression)", .json = "dbengine", .value = NULL, }, @@ -559,22 +554,6 @@ static struct { .json = "ram", .value = NULL, }, - [BIB_DB_MAP] = { - .category = BIC_DATABASE, - .type = BIT_BOOLEAN, - .analytics = NULL, - .print = "map", - .json = "map", - .value = NULL, - }, - [BIB_DB_SAVE] = { - .category = BIC_DATABASE, - .type = BIT_BOOLEAN, - .analytics = NULL, - .print = "save", - .json = "save", - .value = NULL, - }, [BIB_DB_NONE] = { .category = BIC_DATABASE, .type = BIT_BOOLEAN, @@ -655,22 +634,13 @@ static struct { .json = "zlib", .value = NULL, }, - [BIB_LIB_JUDY] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = NULL, - .print = "Judy (high-performance dynamic arrays and hashtables)", - .json = "judy", - .status = true, - .value = "bundled", - }, - [BIB_LIB_DLIB] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = NULL, - .print = "dlib (robust machine learning toolkit)", - .json = "dlib", - .value = NULL, + [BIB_LIB_BROTLI] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = NULL, + .print = "Brotli (generic-purpose lossless compression algorithm)", + .json = "brotli", + .value = NULL, }, [BIB_LIB_PROTOBUF] = { .category = BIC_LIBS, @@ -720,29 +690,13 @@ static struct { .json = "libcrypto", .value = NULL, }, - [BIB_LIB_LIBM] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = "libm", - .print = "libm (mathematical functions)", - .json = "libm", - .value = NULL, - }, - [BIB_LIB_JEMALLOC] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = "jemalloc", - .print = "jemalloc", - .json = "jemalloc", - .value = NULL, - }, - [BIB_LIB_TCMALLOC] = { - .category = BIC_LIBS, - .type = BIT_BOOLEAN, - .analytics = "tcmalloc", - .print = "TCMalloc", - .json = "tcmalloc", - .value = NULL, + [BIB_LIB_LIBYAML] = { + .category = BIC_LIBS, + .type = BIT_BOOLEAN, + .analytics = "libyaml", + .print = "libyaml (library for parsing and emitting YAML)", + .json = "libyaml", + .value = NULL, }, [BIB_PLUGIN_APPS] = { .category = BIC_PLUGINS, @@ -1083,10 +1037,10 @@ static void build_info_set_status(BUILD_INFO_SLOT slot, bool status) { } __attribute__((constructor)) void initialize_build_info(void) { - build_info_set_value(BIB_PACKAGING_NETDATA_VERSION, program_version); + build_info_set_value(BIB_PACKAGING_NETDATA_VERSION, NETDATA_VERSION); build_info_set_value(BIB_PACKAGING_CONFIGURE_OPTIONS, CONFIGURE_COMMAND); -#ifdef COMPILED_FOR_LINUX +#ifdef OS_LINUX build_info_set_status(BIB_FEATURE_BUILT_FOR, true); build_info_set_value(BIB_FEATURE_BUILT_FOR, "Linux"); build_info_set_status(BIB_PLUGIN_LINUX_CGROUPS, true); @@ -1094,16 +1048,26 @@ __attribute__((constructor)) void initialize_build_info(void) { build_info_set_status(BIB_PLUGIN_LINUX_DISKSPACE, true); build_info_set_status(BIB_PLUGIN_LINUX_TC, true); #endif -#ifdef COMPILED_FOR_FREEBSD +#ifdef OS_FREEBSD build_info_set_status(BIB_FEATURE_BUILT_FOR, true); build_info_set_value(BIB_FEATURE_BUILT_FOR, "FreeBSD"); build_info_set_status(BIB_PLUGIN_FREEBSD, true); #endif -#ifdef COMPILED_FOR_MACOS +#ifdef OS_MACOS build_info_set_status(BIB_FEATURE_BUILT_FOR, true); build_info_set_value(BIB_FEATURE_BUILT_FOR, "MacOS"); build_info_set_status(BIB_PLUGIN_MACOS, true); #endif +#ifdef OS_WINDOWS + build_info_set_status(BIB_FEATURE_BUILT_FOR, true); +#if defined(__CYGWIN__) && defined(__MSYS__) + build_info_set_value(BIB_FEATURE_BUILT_FOR, "Windows (MSYS)"); +#elif defined(__CYGWIN__) + build_info_set_value(BIB_FEATURE_BUILT_FOR, "Windows (CYGWIN)"); +#else + build_info_set_value(BIB_FEATURE_BUILT_FOR, "Windows"); +#endif +#endif #ifdef ENABLE_ACLK build_info_set_status(BIB_FEATURE_CLOUD, true); @@ -1124,9 +1088,6 @@ __attribute__((constructor)) void initialize_build_info(void) { build_info_set_status(BIB_FEATURE_STREAMING_COMPRESSION, true); -#ifdef ENABLE_BROTLI - build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "brotli"); -#endif #ifdef ENABLE_ZSTD build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "zstd"); #endif @@ -1134,6 +1095,9 @@ __attribute__((constructor)) void initialize_build_info(void) { build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "lz4"); #endif build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "gzip"); +#ifdef ENABLE_BROTLI + build_info_append_value(BIB_FEATURE_STREAMING_COMPRESSION, "brotli"); +#endif build_info_set_status(BIB_FEATURE_CONTEXTS, true); build_info_set_status(BIB_FEATURE_TIERING, true); @@ -1144,11 +1108,15 @@ __attribute__((constructor)) void initialize_build_info(void) { #ifdef ENABLE_DBENGINE build_info_set_status(BIB_DB_DBENGINE, true); +#ifdef ENABLE_ZSTD + build_info_append_value(BIB_DB_DBENGINE, "zstd"); +#endif +#ifdef ENABLE_LZ4 + build_info_append_value(BIB_DB_DBENGINE, "lz4"); +#endif #endif build_info_set_status(BIB_DB_ALLOC, true); build_info_set_status(BIB_DB_RAM, true); - build_info_set_status(BIB_DB_MAP, true); - build_info_set_status(BIB_DB_SAVE, true); build_info_set_status(BIB_DB_NONE, true); build_info_set_status(BIB_CONNECTIVITY_HTTPD_STATIC, true); @@ -1171,6 +1139,9 @@ __attribute__((constructor)) void initialize_build_info(void) { #ifdef ENABLE_ZSTD build_info_set_status(BIB_LIB_ZSTD, true); #endif +#ifdef ENABLE_BROTLI + build_info_set_status(BIB_LIB_BROTLI, true); +#endif build_info_set_status(BIB_LIB_ZLIB, true); @@ -1203,17 +1174,11 @@ __attribute__((constructor)) void initialize_build_info(void) { #ifdef HAVE_CRYPTO build_info_set_status(BIB_LIB_LIBCRYPTO, true); #endif -#ifdef STORAGE_WITH_MATH - build_info_set_status(BIB_LIB_LIBM, true); -#endif -#ifdef ENABLE_JEMALLOC - build_info_set_status(BIB_LIB_JEMALLOC, true); -#endif -#ifdef ENABLE_TCMALLOC - build_info_set_status(BIB_LIB_TCMALLOC, true); +#ifdef HAVE_LIBYAML + build_info_set_status(BIB_LIB_LIBYAML, true); #endif -#ifdef ENABLE_APPS_PLUGIN +#ifdef ENABLE_PLUGIN_APPS build_info_set_status(BIB_PLUGIN_APPS, true); #endif #ifdef HAVE_SETNS @@ -1225,28 +1190,28 @@ __attribute__((constructor)) void initialize_build_info(void) { build_info_set_status(BIB_PLUGIN_IDLEJITTER, true); build_info_set_status(BIB_PLUGIN_BASH, true); -#ifdef ENABLE_DEBUGFS_PLUGIN +#ifdef ENABLE_PLUGIN_DEBUGFS build_info_set_status(BIB_PLUGIN_DEBUGFS, true); #endif -#ifdef HAVE_CUPS +#ifdef ENABLE_PLUGIN_CUPS build_info_set_status(BIB_PLUGIN_CUPS, true); #endif -#ifdef HAVE_LIBBPF +#ifdef ENABLE_PLUGIN_EBPF build_info_set_status(BIB_PLUGIN_EBPF, true); #endif -#ifdef HAVE_FREEIPMI +#ifdef ENABLE_PLUGIN_FREEIPMI build_info_set_status(BIB_PLUGIN_FREEIPMI, true); #endif -#ifdef HAVE_NFACCT +#ifdef ENABLE_PLUGIN_NFACCT build_info_set_status(BIB_PLUGIN_NFACCT, true); #endif -#ifdef ENABLE_PERF_PLUGIN +#ifdef ENABLE_PLUGIN_PERF build_info_set_status(BIB_PLUGIN_PERF, true); #endif -#ifdef ENABLE_SLABINFO +#ifdef ENABLE_PLUGIN_SLABINFO build_info_set_status(BIB_PLUGIN_SLABINFO, true); #endif -#ifdef HAVE_LIBXENSTAT +#ifdef ENABLE_PLUGIN_XENSTAT build_info_set_status(BIB_PLUGIN_XEN, true); #endif #ifdef HAVE_XENSTAT_VBD_ERROR @@ -1499,7 +1464,7 @@ void print_build_info(void) { print_build_info_category_to_console(BIC_PLUGINS, "Plugins"); print_build_info_category_to_console(BIC_EXPORTERS, "Exporters"); print_build_info_category_to_console(BIC_DEBUG_DEVEL, "Debug/Developer Features"); -}; +} void build_info_to_json_object(BUFFER *b) { populate_packaging_info(); @@ -1533,7 +1498,7 @@ void print_build_info_json(void) { buffer_json_finalize(b); printf("%s\n", buffer_tostring(b)); buffer_free(b); -}; +} void analytics_build_info(BUFFER *b) { populate_packaging_info(); diff --git a/daemon/buildinfo.h b/src/daemon/buildinfo.h index 1bb1c9760..1bb1c9760 100644 --- a/daemon/buildinfo.h +++ b/src/daemon/buildinfo.h diff --git a/daemon/commands.c b/src/daemon/commands.c index ed544224e..70ba11d42 100644 --- a/daemon/commands.c +++ b/src/daemon/commands.c @@ -36,7 +36,6 @@ struct command_context { /* Forward declarations */ static cmd_status_t cmd_help_execute(char *args, char **message); static cmd_status_t cmd_reload_health_execute(char *args, char **message); -static cmd_status_t cmd_save_database_execute(char *args, char **message); static cmd_status_t cmd_reopen_logs_execute(char *args, char **message); static cmd_status_t cmd_exit_execute(char *args, char **message); static cmd_status_t cmd_fatal_execute(char *args, char **message); @@ -48,11 +47,13 @@ static cmd_status_t cmd_ping_execute(char *args, char **message); static cmd_status_t cmd_aclk_state(char *args, char **message); static cmd_status_t cmd_version(char *args, char **message); static cmd_status_t cmd_dumpconfig(char *args, char **message); +#ifdef ENABLE_ACLK +static cmd_status_t cmd_remove_node(char *args, char **message); +#endif static command_info_t command_info_array[] = { {"help", cmd_help_execute, CMD_TYPE_HIGH_PRIORITY}, // show help menu {"reload-health", cmd_reload_health_execute, CMD_TYPE_ORTHOGONAL}, // reload health configuration - {"save-database", cmd_save_database_execute, CMD_TYPE_ORTHOGONAL}, // save database for memory mode save {"reopen-logs", cmd_reopen_logs_execute, CMD_TYPE_ORTHOGONAL}, // Close and reopen log files {"shutdown-agent", cmd_exit_execute, CMD_TYPE_EXCLUSIVE}, // exit cleanly {"fatal-agent", cmd_fatal_execute, CMD_TYPE_HIGH_PRIORITY}, // exit with fatal error @@ -63,7 +64,10 @@ static command_info_t command_info_array[] = { {"ping", cmd_ping_execute, CMD_TYPE_ORTHOGONAL}, {"aclk-state", cmd_aclk_state, CMD_TYPE_ORTHOGONAL}, {"version", cmd_version, CMD_TYPE_ORTHOGONAL}, - {"dumpconfig", cmd_dumpconfig, CMD_TYPE_ORTHOGONAL} + {"dumpconfig", cmd_dumpconfig, CMD_TYPE_ORTHOGONAL}, +#ifdef ENABLE_ACLK + {"remove-stale-node", cmd_remove_node, CMD_TYPE_ORTHOGONAL} +#endif }; /* Mutexes for commands of type CMD_TYPE_ORTHOGONAL */ @@ -131,6 +135,10 @@ static cmd_status_t cmd_help_execute(char *args, char **message) " Returns current state of ACLK and Cloud connection. (optionally in json).\n" "dumpconfig\n" " Returns the current netdata.conf on stdout.\n" +#ifdef ENABLE_ACLK + "remove-stale-node node_id|machine_guid\n" + " Unregisters and removes a node from the cloud.\n" +#endif "version\n" " Returns the netdata version.\n", MAX_COMMAND_LENGTH - 1); @@ -144,21 +152,7 @@ static cmd_status_t cmd_reload_health_execute(char *args, char **message) nd_log_limits_unlimited(); netdata_log_info("COMMAND: Reloading HEALTH configuration."); - health_reload(); - nd_log_limits_reset(); - - return CMD_STATUS_SUCCESS; -} - -static cmd_status_t cmd_save_database_execute(char *args, char **message) -{ - (void)args; - (void)message; - - nd_log_limits_unlimited(); - netdata_log_info("COMMAND: Saving databases."); - rrdhost_save_all(); - netdata_log_info("COMMAND: Databases saved."); + health_plugin_reload(); nd_log_limits_reset(); return CMD_STATUS_SUCCESS; @@ -183,7 +177,7 @@ static cmd_status_t cmd_exit_execute(char *args, char **message) nd_log_limits_unlimited(); netdata_log_info("COMMAND: Cleaning up to exit."); - netdata_cleanup_and_exit(0); + netdata_cleanup_and_exit(0, NULL, NULL, NULL); exit(0); return CMD_STATUS_SUCCESS; @@ -218,6 +212,7 @@ static cmd_status_t cmd_reload_labels_execute(char *args, char **message) (void)args; netdata_log_info("COMMAND: reloading host labels."); reload_host_labels(); + aclk_queue_node_info(localhost, 1); BUFFER *wb = buffer_create(10, NULL); rrdlabels_log_to_buffer(localhost->rrdlabels, wb); @@ -324,7 +319,7 @@ static cmd_status_t cmd_version(char *args, char **message) (void)args; char version[MAX_COMMAND_LENGTH]; - snprintfz(version, MAX_COMMAND_LENGTH -1, "%s %s", program_name, program_version); + snprintfz(version, MAX_COMMAND_LENGTH -1, "%s %s", program_name, NETDATA_VERSION); *message = strdupz(version); @@ -342,6 +337,56 @@ static cmd_status_t cmd_dumpconfig(char *args, char **message) return CMD_STATUS_SUCCESS; } +#ifdef ENABLE_ACLK +static cmd_status_t cmd_remove_node(char *args, char **message) +{ + (void)args; + + BUFFER *wb = buffer_create(1024, NULL); + if (strlen(args) == 0) { + buffer_sprintf(wb, "Please specify a machine or node UUID"); + goto done; + } + + RRDHOST *host = NULL; + host = rrdhost_find_by_guid(args); + if (!host) + host = find_host_by_node_id(args); + + if (!host) + buffer_sprintf(wb, "Node with machine or node UUID \"%s\" not found", args); + else { + + if (host == localhost) { + buffer_sprintf(wb, "You cannot unregister the parent node"); + goto done; + } + + if (rrdhost_is_online(host)) { + buffer_sprintf(wb, "Cannot unregister a live node"); + goto done; + } + + if (!rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST)) { + rrdhost_option_set(host, RRDHOST_OPTION_EPHEMERAL_HOST); + sql_set_host_label(&host->host_uuid, "_is_ephemeral", "true"); + aclk_host_state_update(host, 0, 0); + unregister_node(host->machine_guid); + freez(host->node_id); + host->node_id = NULL; + buffer_sprintf(wb, "Unregistering node with machine guid %s, hostname = %s", host->machine_guid, rrdhost_hostname(host)); + } + else + buffer_sprintf(wb, "Node with machine guid %s, hostname = %s is already unregistered", host->machine_guid, rrdhost_hostname(host)); + } + +done: + *message = strdupz(buffer_tostring(wb)); + buffer_free(wb); + return CMD_STATUS_SUCCESS; +} +#endif + static void cmd_lock_exclusive(unsigned index) { (void)index; @@ -499,15 +544,15 @@ static void parse_commands(struct command_context *cmd_ctx) status = CMD_STATUS_FAILURE; /* Skip white-space characters */ - for (pos = cmd_ctx->command_string ; isspace(*pos) && ('\0' != *pos) ; ++pos) {;} + for (pos = cmd_ctx->command_string ; isspace((uint8_t)*pos) && ('\0' != *pos) ; ++pos) ; for (i = 0 ; i < CMD_TOTAL_COMMANDS ; ++i) { if (!strncmp(pos, command_info_array[i].cmd_str, strlen(command_info_array[i].cmd_str))) { if (CMD_EXIT == i) { /* musl C does not like libuv workqueues calling exit() */ execute_command(CMD_EXIT, NULL, NULL); } - for (lstrip=pos + strlen(command_info_array[i].cmd_str); isspace(*lstrip) && ('\0' != *lstrip); ++lstrip) {;} - for (rstrip=lstrip+strlen(lstrip)-1; rstrip>lstrip && isspace(*rstrip); *(rstrip--) = 0 ); + for (lstrip=pos + strlen(command_info_array[i].cmd_str); isspace((uint8_t)*lstrip) && ('\0' != *lstrip); ++lstrip) ; + for (rstrip=lstrip+strlen(lstrip)-1; rstrip>lstrip && isspace((uint8_t)*rstrip); *(rstrip--) = 0 ) ; cmd_ctx->work.data = cmd_ctx; cmd_ctx->idx = i; @@ -612,8 +657,9 @@ static void async_cb(uv_async_t *handle) uv_stop(handle->loop); } -static void command_thread(void *arg) -{ +static void command_thread(void *arg) { + uv_thread_set_name_np("DAEMON_COMMAND"); + int ret; uv_fs_t req; @@ -730,7 +776,6 @@ void commands_init(void) /* wait for worker thread to initialize */ completion_wait_for(&completion); completion_destroy(&completion); - uv_thread_set_name_np(thread, "DAEMON_COMMAND"); if (command_thread_error) { error = uv_thread_join(&thread); diff --git a/daemon/commands.h b/src/daemon/commands.h index 368a70a0f..14c2ec49e 100644 --- a/daemon/commands.h +++ b/src/daemon/commands.h @@ -3,13 +3,12 @@ #ifndef NETDATA_COMMANDS_H #define NETDATA_COMMANDS_H 1 -#define MAX_COMMAND_LENGTH 4096 +#define MAX_COMMAND_LENGTH (8192) #define MAX_EXIT_STATUS_LENGTH 23 /* Can't ever be bigger than "X-18446744073709551616" */ typedef enum cmd { CMD_HELP = 0, CMD_RELOAD_HEALTH, - CMD_SAVE_DATABASE, CMD_REOPEN_LOGS, CMD_EXIT, CMD_FATAL, @@ -21,6 +20,9 @@ typedef enum cmd { CMD_ACLK_STATE, CMD_VERSION, CMD_DUMPCONFIG, +#ifdef ENABLE_ACLK + CMD_REMOVE_NODE, +#endif CMD_TOTAL_COMMANDS } cmd_t; diff --git a/daemon/common.c b/src/daemon/common.c index d441c73b6..a64d53585 100644 --- a/daemon/common.c +++ b/src/daemon/common.c @@ -31,9 +31,9 @@ long get_netdata_cpus(void) { if(processors) return processors; - long cores_proc_stat = get_system_cpus_with_cache(false, true); - long cores_cpuset_v1 = (long)read_cpuset_cpus("/sys/fs/cgroup/cpuset/cpuset.cpus", cores_proc_stat); - long cores_cpuset_v2 = (long)read_cpuset_cpus("/sys/fs/cgroup/cpuset.cpus", cores_proc_stat); + long cores_proc_stat = os_get_system_cpus_cached(false, true); + long cores_cpuset_v1 = (long)os_read_cpuset_cpus("/sys/fs/cgroup/cpuset/cpuset.cpus", cores_proc_stat); + long cores_cpuset_v2 = (long)os_read_cpuset_cpus("/sys/fs/cgroup/cpuset.cpus", cores_proc_stat); if(cores_cpuset_v2) processors = cores_cpuset_v2; diff --git a/src/daemon/common.h b/src/daemon/common.h new file mode 100644 index 000000000..102ec81e2 --- /dev/null +++ b/src/daemon/common.h @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COMMON_H +#define NETDATA_COMMON_H 1 + +#include "libnetdata/libnetdata.h" +#include "event_loop.h" + +// ---------------------------------------------------------------------------- +// shortcuts for the default netdata configuration + +#define config_load(filename, overwrite_used, section) appconfig_load(&netdata_config, filename, overwrite_used, section) +#define config_get(section, name, default_value) appconfig_get(&netdata_config, section, name, default_value) +#define config_get_number(section, name, value) appconfig_get_number(&netdata_config, section, name, value) +#define config_get_float(section, name, value) appconfig_get_float(&netdata_config, section, name, value) +#define config_get_boolean(section, name, value) appconfig_get_boolean(&netdata_config, section, name, value) +#define config_get_boolean_ondemand(section, name, value) appconfig_get_boolean_ondemand(&netdata_config, section, name, value) +#define config_get_duration(section, name, value) appconfig_get_duration(&netdata_config, section, name, value) + +#define config_set(section, name, default_value) appconfig_set(&netdata_config, section, name, default_value) +#define config_set_default(section, name, value) appconfig_set_default(&netdata_config, section, name, value) +#define config_set_number(section, name, value) appconfig_set_number(&netdata_config, section, name, value) +#define config_set_float(section, name, value) appconfig_set_float(&netdata_config, section, name, value) +#define config_set_boolean(section, name, value) appconfig_set_boolean(&netdata_config, section, name, value) + +#define config_exists(section, name) appconfig_exists(&netdata_config, section, name) +#define config_move(section_old, name_old, section_new, name_new) appconfig_move(&netdata_config, section_old, name_old, section_new, name_new) + +#define config_generate(buffer, only_changed) appconfig_generate(&netdata_config, buffer, only_changed) + +#define config_section_destroy(section) appconfig_section_destroy_non_loaded(&netdata_config, section) +#define config_section_option_destroy(section, name) appconfig_section_option_destroy_non_loaded(&netdata_config, section, name) + +// ---------------------------------------------------------------------------- +// netdata include files + +#include "daemon/config/dyncfg.h" + +#include "global_statistics.h" + +// health monitoring and alarm notifications +#include "health/health.h" + +// the netdata database +#include "database/rrd.h" + +// the netdata webserver(s) +#include "web/server/web_server.h" + +// the new h2o based netdata webserver +#ifdef ENABLE_H2O +#include "web/server/h2o/http_server.h" +#endif + +// streaming metrics between netdata servers +#include "streaming/rrdpush.h" + + +// anomaly detection +#include "ml/ml.h" + +// the netdata registry +// the registry is actually an API feature +#include "registry/registry.h" + +// exporting engine for archiving the metrics +#include "exporting/exporting_engine.h" + +// the netdata API +#include "web/server/web_client.h" +#include "web/rtc/webrtc.h" + +// all data collection plugins +#include "collectors/all.h" + +// netdata unit tests +#include "unit_test.h" + +// netdata agent claiming +#include "claim/claim.h" + +// netdata agent cloud link +#include "aclk/aclk.h" + +// global GUID map functions + +// netdata agent spawn server +#include "spawn/spawn.h" + +// the netdata daemon +#include "daemon.h" +#include "main.h" +#include "static_threads.h" +#include "signals.h" +#include "commands.h" +#include "pipename.h" +#include "analytics.h" + +// global netdata daemon variables +extern char *netdata_configured_hostname; +extern char *netdata_configured_user_config_dir; +extern char *netdata_configured_stock_config_dir; +extern char *netdata_configured_log_dir; +extern char *netdata_configured_primary_plugins_dir; +extern char *netdata_configured_web_dir; +extern char *netdata_configured_cache_dir; +extern char *netdata_configured_varlib_dir; +extern char *netdata_configured_lock_dir; +extern char *netdata_configured_home_dir; +extern char *netdata_configured_host_prefix; +extern char *netdata_configured_timezone; +extern char *netdata_configured_abbrev_timezone; +extern int32_t netdata_configured_utc_offset; +extern int netdata_anonymous_statistics_enabled; + +extern bool netdata_ready; +extern int netdata_cloud_enabled; + +extern time_t netdata_start_time; + +long get_netdata_cpus(void); + +typedef enum __attribute__((packed)) { + CLOUD_STATUS_UNAVAILABLE = 0, // cloud and aclk functionality is not available on this agent + CLOUD_STATUS_AVAILABLE, // cloud and aclk functionality is available, but the agent is not claimed + CLOUD_STATUS_DISABLED, // cloud and aclk functionality is available, but it is disabled + CLOUD_STATUS_BANNED, // the agent has been banned from cloud + CLOUD_STATUS_OFFLINE, // the agent tries to connect to cloud, but cannot do it + CLOUD_STATUS_ONLINE, // the agent is connected to cloud +} CLOUD_STATUS; + +const char *cloud_status_to_string(CLOUD_STATUS status); +CLOUD_STATUS cloud_status(void); +time_t cloud_last_change(void); +time_t cloud_next_connection_attempt(void); +size_t cloud_connection_id(void); +const char *cloud_offline_reason(void); +const char *cloud_base_url(void); +CLOUD_STATUS buffer_json_cloud_status(BUFFER *wb, time_t now_s); + +#endif /* NETDATA_COMMON_H */ diff --git a/src/daemon/config/README.md b/src/daemon/config/README.md new file mode 100644 index 000000000..c59f55620 --- /dev/null +++ b/src/daemon/config/README.md @@ -0,0 +1,231 @@ +<!-- +title: "Daemon configuration" +description: "The Netdata Agent's daemon is installed preconfigured to collect thousands of metrics every second, but is highly configurable for real-world workloads." +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/daemon/config/README.md" +sidebar_label: "Daemon" +learn_status: "Published" +learn_rel_path: "Configuration" +learn_doc_purpose: "Explain the daemon options, the log files, the process scheduling, virtual memory, explain how the netdata.conf is used and backlink to the netdata.conf file reference" +--> + +# Daemon configuration + +<details> +<summary>The daemon configuration file is read from /etc/netdata/netdata.conf.</summary> + +Depending on your installation method, Netdata will have been installed either directly under `/`, or +under `/opt/netdata`. The paths mentioned here and in the documentation in general assume that your installation is +under `/`. If it is not, you will find the exact same paths under `/opt/netdata` as well. (i.e. `/etc/netdata` will +be `/opt/netdata/etc/netdata`). + +</details> + +This config file **is not needed by default**. Netdata works fine out of the box without it. But it does allow you to +adapt the general behavior of Netdata, in great detail. You can find all these settings, with their default values, by +accessing the URL `https://netdata.server.hostname:19999/netdata.conf`. For example check the configuration file +of [netdata.firehol.org](http://netdata.firehol.org/netdata.conf). HTTP access to this file is limited by default to +[private IPs](https://en.wikipedia.org/wiki/Private_network), via +the [web server access lists](/src/web/server/README.md#access-lists). + +`netdata.conf` has sections stated with `[section]`. You will see the following sections: + +1. `[global]` to [configure](#global-section-options) the [Netdata daemon](/src/daemon/README.md). +2. `[db]` to [configure](#db-section-options) the database of Netdata. +3. `[directories]` to [configure](#directories-section-options) the directories used by Netdata. +4. `[logs]` to [configure](#logs-section-options) the Netdata logging. +5. `[environment variables]` to [configure](#environment-variables-section-options) the environment variables used + Netdata. +6. `[sqlite]` to [configure](#sqlite-section-options) the [Netdata daemon](/src/daemon/README.md) SQLite settings. +7. `[ml]` to configure settings for [machine learning](/src/ml/README.md). +8. `[health]` to [configure](#health-section-options) general settings for [health monitoring](/src/health/README.md). +9. `[web]` to [configure the web server](/src/web/server/README.md). +10. `[registry]` for the [Netdata registry](/src/registry/README.md). +11. `[global statistics]` for the [Netdata registry](/src/registry/README.md). +12. `[statsd]` for the general settings of the [stats.d.plugin](/src/collectors/statsd.plugin/README.md). +13. `[plugins]` to [configure](#plugins-section-options) which [collectors](/src/collectors/README.md) to use and PATH + settings. +14. `[plugin:NAME]` sections for each collector plugin, under the + comment [Per plugin configuration](#per-plugin-configuration). + +The configuration file is a `name = value` dictionary. Netdata will not complain if you set options unknown to it. When +you check the running configuration by accessing the URL `/netdata.conf` on your Netdata server, Netdata will add a +comment on settings it does not currently use. + +## Applying changes + +After `netdata.conf` has been modified, Netdata needs to be [restarted](/packaging/installer/README.md#maintaining-a-netdata-agent-installation) for +changes to apply: + +```bash +sudo systemctl restart netdata +``` + +If the above does not work, try the following: + +```bash +sudo killall netdata; sleep 10; sudo netdata +``` + +Please note that your data history will be lost if you have modified `history` parameter in section `[global]`. + +## Sections + +### [global] section options + +| setting | default | info | +|:----------------------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| process scheduling policy | `keep` | See [Netdata process scheduling policy](/src/daemon/README.md#netdata-process-scheduling-policy) | +| OOM score | `0` | | +| glibc malloc arena max for plugins | `1` | See [Virtual memory](/src/daemon/README.md#virtual-memory). | +| glibc malloc arena max for Netdata | `1` | See [Virtual memory](/src/daemon/README.md#virtual-memory). | +| hostname | auto-detected | The hostname of the computer running Netdata. | +| host access prefix | empty | This is used in docker environments where /proc, /sys, etc have to be accessed via another path. You may also have to set SYS_PTRACE capability on the docker for this work. Check [issue 43](https://github.com/netdata/netdata/issues/43). | +| timezone | auto-detected | The timezone retrieved from the environment variable | +| run as user | `netdata` | The user Netdata will run as. | +| pthread stack size | auto-detected | | + +### [db] section options + +| setting | default | info | +|:---------------------------------------------:|:----------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| mode | `dbengine` | `dbengine`: The default for long-term metrics storage with efficient RAM and disk usage. Can be extended with `dbengine page cache size MB` and `dbengine disk space MB`. <br />`ram`: The round-robin database will be temporary and it will be lost when Netdata exits. <br />`alloc`: Similar to `ram`, but can significantly reduce memory usage, when combined with a low retention and does not support KSM. <br />`none`: Disables the database at this host, and disables health monitoring entirely, as that requires a database of metrics. Not to be used together with streaming. | +| retention | `3600` | Used with `mode = ram/alloc`, not the default `mode = dbengine`. This number reflects the number of entries the `netdata` daemon will by default keep in memory for each chart dimension. Check [Memory Requirements](/src/database/README.md) for more information. | +| storage tiers | `3` | The number of storage tiers you want to have in your dbengine. Check the tiering mechanism in the [dbengine's reference](/src/database/engine/README.md#tiering). You can have up to 5 tiers of data (including the _Tier 0_). This number ranges between 1 and 5. | +| dbengine page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated to caching for _Tier 0_ Netdata metric values. | +| dbengine tier **`N`** page cache size MB | `32` | Determines the amount of RAM in MiB that is dedicated for caching Netdata metric values of the **`N`** tier. <br /> `N belongs to [1..4]` | +| dbengine disk space MB | `256` | Determines the amount of disk space in MiB that is dedicated to storing _Tier 0_ Netdata metric values and all related metadata describing them. This option is available **only for legacy configuration** (`Agent v1.23.2 and prior`). | +| dbengine multihost disk space MB | `256` | Same functionality as `dbengine disk space MB`, but includes support for storing metrics streamed to a parent node by its children. Can be used in single-node environments as well. This setting is only for _Tier 0_ metrics. | +| dbengine tier **`N`** multihost disk space MB | `256` | Same functionality as `dbengine multihost disk space MB`, but stores metrics of the **`N`** tier (both parent node and its children). Can be used in single-node environments as well. <br /> `N belongs to [1..4]` | +| update every | `1` | The frequency in seconds, for data collection. For more information see the [performance guide](/docs/netdata-agent/configuration/optimize-the-netdata-agents-performance.md). These metrics stored as _Tier 0_ data. Explore the tiering mechanism in the [dbengine's reference](/src/database/engine/README.md#tiering). | +| dbengine tier **`N`** update every iterations | `60` | The down sampling value of each tier from the previous one. For each Tier, the greater by one Tier has N (equal to 60 by default) less data points of any metric it collects. This setting can take values from `2` up to `255`. <br /> `N belongs to [1..4]` | +| dbengine tier **`N`** back fill | `New` | Specifies the strategy of recreating missing data on each Tier from the exact lower Tier. <br /> `New`: Sees the latest point on each Tier and save new points to it only if the exact lower Tier has available points for it's observation window (`dbengine tier N update every iterations` window). <br /> `none`: No back filling is applied. <br /> `N belongs to [1..4]` | +| memory deduplication (ksm) | `yes` | When set to `yes`, Netdata will offer its in-memory round robin database and the dbengine page cache to kernel same page merging (KSM) for deduplication. For more information check [Memory Deduplication - Kernel Same Page Merging - KSM](/src/database/README.md#ksm) | +| cleanup obsolete charts after secs | `3600` | See [monitoring ephemeral containers](/src/collectors/cgroups.plugin/README.md#monitoring-ephemeral-containers), also sets the timeout for cleaning up obsolete dimensions | +| gap when lost iterations above | `1` | | +| cleanup orphan hosts after secs | `3600` | How long to wait until automatically removing from the DB a remote Netdata host (child) that is no longer sending data. | +| enable zero metrics | `no` | Set to `yes` to show charts when all their metrics are zero. | + +> ### Info +> +>The multiplication of all the **enabled** tiers `dbengine tier N update every iterations` values must be less than `65535`. + +### [directories] section options + +| setting | default | info | +|:-------------------:|:------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| config | `/etc/netdata` | The directory configuration files are kept. | +| stock config | `/usr/lib/netdata/conf.d` | | +| log | `/var/log/netdata` | The directory in which the [log files](/src/daemon/README.md#log-files) are kept. | +| web | `/usr/share/netdata/web` | The directory the web static files are kept. | +| cache | `/var/cache/netdata` | The directory the memory database will be stored if and when Netdata exits. Netdata will re-read the database when it will start again, to continue from the same point. | +| lib | `/var/lib/netdata` | Contains the alert log and the Netdata instance GUID. | +| home | `/var/cache/netdata` | Contains the db files for the collected metrics. | +| lock | `/var/lib/netdata/lock` | Contains the data collectors lock files. | +| plugins | `"/usr/libexec/netdata/plugins.d" "/etc/netdata/custom-plugins.d"` | The directory plugin programs are kept. This setting supports multiple directories, space separated. If any directory path contains spaces, enclose it in single or double quotes. | +| health config | `/etc/netdata/health.d` | The directory containing the user alert configuration files, to override the stock configurations | +| stock health config | `/usr/lib/netdata/conf.d/health.d` | Contains the stock alert configuration files for each collector | +| registry | `/opt/netdata/var/lib/netdata/registry` | Contains the [registry](/src/registry/README.md) database and GUID that uniquely identifies each Netdata Agent | + +### [logs] section options + +| setting | default | info | +|:----------------------------------:|:-----------------------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| debug flags | `0x0000000000000000` | Bitmap of debug options to enable. For more information check [Tracing Options](/src/daemon/README.md#debugging). | +| debug | `/var/log/netdata/debug.log` | The filename to save debug information. This file will not be created if debugging is not enabled. You can also set it to `syslog` to send the debug messages to syslog, or `none` to disable this log. For more information check [Tracing Options](/src/daemon/README.md#debugging). | +| error | `/var/log/netdata/error.log` | The filename to save error messages for Netdata daemon and all plugins (`stderr` is sent here for all Netdata programs, including the plugins). You can also set it to `syslog` to send the errors to syslog, or `none` to disable this log. | +| access | `/var/log/netdata/access.log` | The filename to save the log of web clients accessing Netdata charts. You can also set it to `syslog` to send the access log to syslog, or `none` to disable this log. | +| facility | `daemon` | A facility keyword is used to specify the type of system that is logging the message. | +| errors flood protection period | `1200` | Length of period (in sec) during which the number of errors should not exceed the `errors to trigger flood protection`. | +| errors to trigger flood protection | `200` | Number of errors written to the log in `errors flood protection period` sec before flood protection is activated. | +| severity level | `info` | Controls which log messages are logged, with error being the most important. Supported values: `info` and `error`. | + +### [environment variables] section options + +| setting | default | info | +|:----------:|:-----------------:|:-----------------------------------------------------------| +| TZ | `:/etc/localtime` | Where to find the timezone | +| PATH | `auto-detected` | Specifies the directories to be searched to find a command | +| PYTHONPATH | | Used to set a custom python path | + +### [sqlite] section options + +| setting | default | info | +|:------------------:|:-------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| auto vacuum | `INCREMENTAL` | The [auto-vacuum status](https://www.sqlite.org/pragma.html#pragma_auto_vacuum) in the database | +| synchronous | `NORMAL` | The setting of the ["synchronous"](https://www.sqlite.org/pragma.html#pragma_synchronous) flag | +| journal mode | `WAL` | The [journal mode](https://www.sqlite.org/pragma.html#pragma_journal_mode) for databases | +| temp store | `MEMORY` | Used to determine where [temporary tables and indices are stored](https://www.sqlite.org/pragma.html#pragma_temp_store) | +| journal size limit | `16777216` | Used to set a new [limit in bytes for the database](https://www.sqlite.org/pragma.html#pragma_journal_size_limit) | +| cache size | `-2000` | Used to [suggest the maximum number of database disk pages](https://www.sqlite.org/pragma.html#pragma_cache_size) that SQLite will hold in memory at once per open database file | + +### [health] section options + +This section controls the general behavior of the health monitoring capabilities of Netdata. + +Specific alerts are configured in per-collector config files under the `health.d` directory. For more info, see [health +monitoring](/src/health/README.md). + +[Alert notifications](/src/health/notifications/README.md) are configured in `health_alarm_notify.conf`. + +| setting | default | info | +|:----------------------------------------------:|:------------------------------------------------:|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enabled | `yes` | Set to `no` to disable all alerts and notifications | +| in memory max health log entries | 1000 | Size of the alert history held in RAM | +| script to execute on alarm | `/usr/libexec/netdata/plugins.d/alarm-notify.sh` | The script that sends alert notifications. Note that in versions before 1.16, the plugins.d directory may be installed in a different location in certain OSs (e.g. under `/usr/lib/netdata`). | +| run at least every seconds | `10` | Controls how often all alert conditions should be evaluated. | +| postpone alarms during hibernation for seconds | `60` | Prevents false alerts. May need to be increased if you get alerts during hibernation. | +| health log history | `432000` | Specifies the history of alert events (in seconds) kept in the agent's sqlite database. | +| enabled alarms | * | Defines which alerts to load from both user and stock directories. This is a [simple pattern](/src/libnetdata/simple_pattern/README.md) list of alert or template names. Can be used to disable specific alerts. For example, `enabled alarms = !oom_kill *` will load all alerts except `oom_kill`. | + +### [web] section options + +Refer to the [web server documentation](/src/web/server/README.md) + +### [plugins] section options + +In this section you will see be a boolean (`yes`/`no`) option for each plugin (e.g. tc, cgroups, apps, proc etc.). Note +that the configuration options in this section for the orchestrator plugins `python.d` and `charts.d` control **all the +modules** written for that orchestrator. For instance, setting `python.d = no` means that all Python modules +under `collectors/python.d.plugin` will be disabled. + +Additionally, there will be the following options: + +| setting | default | info | +|:-------------------------------:|:---------------:|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| enable running new plugins | `yes` | When set to `yes`, Netdata will enable detected plugins, even if they are not configured explicitly. Setting this to `no` will only enable plugins explicitly configured in this file with a `yes` | +| check for new plugins every | 60 | The time in seconds to check for new plugins in the plugins directory. This allows having other applications dynamically creating plugins for Netdata. | +| checks | `no` | This is a debugging plugin for the internal latency | + +### [registry] section options + +To understand what this section is and how it should be configured, please refer to +the [registry documentation](/src/registry/README.md). + +## Per-plugin configuration + +The configuration options for plugins appear in sections following the pattern `[plugin:NAME]`. + +### Internal plugins + +Most internal plugins will provide additional options. Check [Internal Plugins](/src/collectors/README.md) for more +information. + +Please note, that by default Netdata will enable monitoring metrics for disks, memory, and network only when they are +not zero. If they are constantly zero they are ignored. Metrics that will start having values, after Netdata is started, +will be detected and charts will be automatically added to the dashboard (a refresh of the dashboard is needed for them +to appear though). Use `yes` instead of `auto` in plugin configuration sections to enable these charts permanently. You +can also set the `enable zero metrics` option to `yes` in the `[global]` section which enables charts with zero metrics +for all internal Netdata plugins. + +### External plugins + +External plugins will have only 2 options at `netdata.conf`: + +| setting | default | info | +|:---------------:|:--------------------------------------------:|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| update every | the value of `[global].update every` setting | The frequency in seconds the plugin should collect values. For more information check the [performance guide](/docs/netdata-agent/configuration/optimize-the-netdata-agents-performance.md). | +| command options | - | Additional command line options to pass to the plugin. | + +External plugins that need additional configuration may support a dedicated file in `/etc/netdata`. Check their +documentation. + diff --git a/src/daemon/config/dyncfg-echo.c b/src/daemon/config/dyncfg-echo.c new file mode 100644 index 000000000..95d40a025 --- /dev/null +++ b/src/daemon/config/dyncfg-echo.c @@ -0,0 +1,175 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +// ---------------------------------------------------------------------------- +// echo is when we send requests to plugins without any caller +// it is used for: +// 1. the first enable/disable requests we send, and also +// 2. updates to stock or user configurations +// 3. saved dynamic jobs we need to add to templates + +struct dyncfg_echo { + const DICTIONARY_ITEM *item; + DYNCFG *df; // for additions this is the job, not the template + BUFFER *wb; + DYNCFG_CMDS cmd; + const char *cmd_str; +}; + +void dyncfg_echo_cb(BUFFER *wb __maybe_unused, int code __maybe_unused, void *result_cb_data) { + struct dyncfg_echo *e = result_cb_data; + DYNCFG *df = e->df; + + if(DYNCFG_RESP_SUCCESS(code)) { + // successful response + + if(e->cmd == DYNCFG_CMD_ADD) { + df->dyncfg.status = dyncfg_status_from_successful_response(code); + dyncfg_update_status_on_successful_add_or_update(df, code); + } + else if(e->cmd == DYNCFG_CMD_UPDATE) { + df->dyncfg.status = dyncfg_status_from_successful_response(code); + dyncfg_update_status_on_successful_add_or_update(df, code); + } + else if(e->cmd == DYNCFG_CMD_DISABLE) + df->dyncfg.status = df->current.status = DYNCFG_STATUS_DISABLED; + else if(e->cmd == DYNCFG_CMD_ENABLE) + df->dyncfg.status = df->current.status = dyncfg_status_from_successful_response(code); + } + else { + // failed response + + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: received response code %d on request to id '%s', cmd: %s", + code, dictionary_acquired_item_name(e->item), e->cmd_str); + + if(e->cmd == DYNCFG_CMD_UPDATE || e->cmd == DYNCFG_CMD_ADD) + e->df->dyncfg.plugin_rejected = true; + } + + buffer_free(e->wb); + dictionary_acquired_item_release(dyncfg_globals.nodes, e->item); + + e->wb = NULL; + e->df = NULL; + e->item = NULL; + freez((void *)e->cmd_str); + e->cmd_str = NULL; + freez(e); +} + +// ---------------------------------------------------------------------------- + +void dyncfg_echo(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id __maybe_unused, DYNCFG_CMDS cmd) { + RRDHOST *host = dyncfg_rrdhost(df); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id); + return; + } + + if(!(df->cmds & cmd)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: attempted to echo a cmd that is not supported"); + return; + } + + const char *cmd_str = dyncfg_id2cmd_one(cmd); + if(!cmd_str) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: command given does not resolve to a known command"); + return; + } + + struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo)); + e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item); + e->wb = buffer_create(0, NULL); + e->df = df; + e->cmd = cmd; + e->cmd_str = strdupz(cmd_str); + + char buf[string_strlen(df->function) + strlen(e->cmd_str) + 20]; + snprintfz(buf, sizeof(buf), "%s %s", string2str(df->function), e->cmd_str); + + rrd_function_run( + host, e->wb, 10, + HTTP_ACCESS_ALL, buf, false, NULL, + dyncfg_echo_cb, e, + NULL, NULL, + NULL, NULL, + NULL, string2str(df->dyncfg.source)); +} + +// ---------------------------------------------------------------------------- + +void dyncfg_echo_update(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id) { + RRDHOST *host = dyncfg_rrdhost(df); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id); + return; + } + + if(!df->dyncfg.payload) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: requested to send an update to '%s', but there is no payload", id); + return; + } + + struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo)); + e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item); + e->wb = buffer_create(0, NULL); + e->df = df; + e->cmd = DYNCFG_CMD_UPDATE; + e->cmd_str = strdupz("update"); + + char buf[string_strlen(df->function) + strlen(e->cmd_str) + 20]; + snprintfz(buf, sizeof(buf), "%s %s", string2str(df->function), e->cmd_str); + + rrd_function_run( + host, e->wb, 10, + HTTP_ACCESS_ALL, buf, false, NULL, + dyncfg_echo_cb, e, + NULL, NULL, + NULL, NULL, + df->dyncfg.payload, string2str(df->dyncfg.source)); +} + +// ---------------------------------------------------------------------------- + +static void dyncfg_echo_payload_add(const DICTIONARY_ITEM *item_template __maybe_unused, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *id_template, const char *cmd) { + RRDHOST *host = dyncfg_rrdhost(df_template); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host of configuration id '%s'", id_template); + return; + } + + if(!df_job->dyncfg.payload) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: requested to send a '%s' to '%s', but there is no payload", + cmd, id_template); + return; + } + + struct dyncfg_echo *e = callocz(1, sizeof(struct dyncfg_echo)); + e->item = dictionary_acquired_item_dup(dyncfg_globals.nodes, item_job); + e->wb = buffer_create(0, NULL); + e->df = df_job; + e->cmd = DYNCFG_CMD_ADD; + e->cmd_str = strdupz(cmd); + + char buf[string_strlen(df_template->function) + strlen(cmd) + 20]; + snprintfz(buf, sizeof(buf), "%s %s", string2str(df_template->function), cmd); + + rrd_function_run( + host, e->wb, 10, + HTTP_ACCESS_ALL, buf, false, NULL, + dyncfg_echo_cb, e, + NULL, NULL, + NULL, NULL, + df_job->dyncfg.payload, string2str(df_job->dyncfg.source)); +} + +void dyncfg_echo_add(const DICTIONARY_ITEM *item_template, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *template_id, const char *job_name) { + char buf[strlen(job_name) + 20]; + snprintfz(buf, sizeof(buf), "add %s", job_name); + dyncfg_echo_payload_add(item_template, item_job, df_template, df_job, template_id, buf); +} + diff --git a/src/daemon/config/dyncfg-files.c b/src/daemon/config/dyncfg-files.c new file mode 100644 index 000000000..81b56918f --- /dev/null +++ b/src/daemon/config/dyncfg-files.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +void dyncfg_file_delete(const char *id) { + CLEAN_CHAR_P *escaped_id = dyncfg_escape_id_for_filename(id); + char filename[FILENAME_MAX]; + snprintfz(filename, sizeof(filename), "%s/%s.dyncfg", dyncfg_globals.dir, escaped_id); + unlink(filename); +} + +void dyncfg_file_save(const char *id, DYNCFG *df) { + CLEAN_CHAR_P *escaped_id = dyncfg_escape_id_for_filename(id); + char filename[FILENAME_MAX]; + snprintfz(filename, sizeof(filename), "%s/%s.dyncfg", dyncfg_globals.dir, escaped_id); + + FILE *fp = fopen(filename, "w"); + if(!fp) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot create file '%s'", filename); + return; + } + + df->dyncfg.modified_ut = now_realtime_usec(); + if(!df->dyncfg.created_ut) + df->dyncfg.created_ut = df->dyncfg.modified_ut; + + fprintf(fp, "version=%zu\n", DYNCFG_VERSION); + fprintf(fp, "id=%s\n", id); + + if(df->template) + fprintf(fp, "template=%s\n", string2str(df->template)); + + char uuid_str[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(df->host_uuid.uuid, uuid_str); + fprintf(fp, "host=%s\n", uuid_str); + + fprintf(fp, "path=%s\n", string2str(df->path)); + fprintf(fp, "type=%s\n", dyncfg_id2type(df->type)); + + fprintf(fp, "source_type=%s\n", dyncfg_id2source_type(df->dyncfg.source_type)); + fprintf(fp, "source=%s\n", string2str(df->dyncfg.source)); + + fprintf(fp, "created=%"PRIu64"\n", df->dyncfg.created_ut); + fprintf(fp, "modified=%"PRIu64"\n", df->dyncfg.modified_ut); + fprintf(fp, "sync=%s\n", df->sync ? "true" : "false"); + fprintf(fp, "user_disabled=%s\n", df->dyncfg.user_disabled ? "true" : "false"); + fprintf(fp, "saves=%"PRIu32"\n", ++df->dyncfg.saves); + + fprintf(fp, "cmds="); + dyncfg_cmds2fp(df->cmds, fp); + fprintf(fp, "\n"); + + if(df->dyncfg.payload && buffer_strlen(df->dyncfg.payload) > 0) { + fprintf(fp, "content_type=%s\n", content_type_id2string(df->dyncfg.payload->content_type)); + fprintf(fp, "content_length=%zu\n", buffer_strlen(df->dyncfg.payload)); + fprintf(fp, "---\n"); + fwrite(buffer_tostring(df->dyncfg.payload), 1, buffer_strlen(df->dyncfg.payload), fp); + } + + fclose(fp); +} + +void dyncfg_file_load(const char *d_name) { + char filename[PATH_MAX]; + snprintf(filename, sizeof(filename), "%s/%s", dyncfg_globals.dir, d_name); + + FILE *fp = fopen(filename, "r"); + if (!fp) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot open file '%s'", filename); + return; + } + + DYNCFG tmp = { 0 }; + + char line[PLUGINSD_LINE_MAX]; + CLEAN_CHAR_P *id = NULL; + + HTTP_CONTENT_TYPE content_type = CT_NONE; + size_t content_length = 0; + bool read_payload = false; + + while (fgets(line, sizeof(line), fp)) { + if(strcmp(line, "---\n") == 0) { + read_payload = true; + break; + } + + char *value = strchr(line, '='); + if(!value) continue; + + *value++ = '\0'; + + value = trim(value); + if(!value) continue; + + char *key = trim(line); + if(!key) continue; + + // Parse key-value pairs + if (strcmp(key, "version") == 0) { + size_t version = strtoull(value, NULL, 10); + + if(version > DYNCFG_VERSION) + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "DYNCFG: configuration file '%s' has version %zu, which is newer than our version %zu", + filename, version, DYNCFG_VERSION); + + } else if (strcmp(key, "id") == 0) { + freez(id); + id = strdupz(value); + } else if (strcmp(key, "template") == 0) { + tmp.template = string_strdupz(value); + } else if (strcmp(key, "host") == 0) { + uuid_parse_flexi(value, tmp.host_uuid.uuid); + } else if (strcmp(key, "path") == 0) { + tmp.path = string_strdupz(value); + } else if (strcmp(key, "type") == 0) { + tmp.type = dyncfg_type2id(value); + } else if (strcmp(key, "source_type") == 0) { + tmp.dyncfg.source_type = dyncfg_source_type2id(value); + } else if (strcmp(key, "source") == 0) { + tmp.dyncfg.source = string_strdupz(value); + } else if (strcmp(key, "created") == 0) { + tmp.dyncfg.created_ut = strtoull(value, NULL, 10); + } else if (strcmp(key, "modified") == 0) { + tmp.dyncfg.modified_ut = strtoull(value, NULL, 10); + } else if (strcmp(key, "sync") == 0) { + tmp.sync = (strcmp(value, "true") == 0); + } else if (strcmp(key, "user_disabled") == 0) { + tmp.dyncfg.user_disabled = (strcmp(value, "true") == 0); + } else if (strcmp(key, "saves") == 0) { + tmp.dyncfg.saves = strtoull(value, NULL, 10); + } else if (strcmp(key, "content_type") == 0) { + content_type = content_type_string2id(value); + } else if (strcmp(key, "content_length") == 0) { + content_length = strtoull(value, NULL, 10); + } else if (strcmp(key, "cmds") == 0) { + tmp.cmds = dyncfg_cmds2id(value); + } + } + + if (read_payload) { + // Determine the actual size of the remaining file content + long saved_position = ftell(fp); // Save current position + fseek(fp, 0, SEEK_END); + long total_size = ftell(fp); // Total size of the file + size_t actual_size = total_size - saved_position; // Calculate remaining content size + fseek(fp, saved_position, SEEK_SET); // Reset file pointer to the beginning of the payload + + // Use actual_size instead of content_length to handle the whole remaining file + tmp.dyncfg.payload = buffer_create(actual_size, NULL); + tmp.dyncfg.payload->content_type = content_type; + + buffer_need_bytes(tmp.dyncfg.payload, actual_size); + tmp.dyncfg.payload->len = fread(tmp.dyncfg.payload->buffer, 1, actual_size, fp); + + if (content_length != tmp.dyncfg.payload->len) { + nd_log(NDLS_DAEMON, NDLP_WARNING, + "DYNCFG: content_length %zu does not match actual payload size %zu for file '%s'", + content_length, actual_size, filename); + } + } + + fclose(fp); + + if(!id) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: configuration file '%s' does not include a unique id. Ignoring it.", + filename); + + dyncfg_cleanup(&tmp); + return; + } + + tmp.dyncfg.status = DYNCFG_STATUS_ORPHAN; + tmp.dyncfg.restart_required = false; + + dyncfg_set_current_from_dyncfg(&tmp); + + dictionary_set(dyncfg_globals.nodes, id, &tmp, sizeof(tmp)); + + // check if we need to rename the file + CLEAN_CHAR_P *fixed_id = dyncfg_escape_id_for_filename(id); + char fixed_filename[PATH_MAX]; + snprintf(fixed_filename, sizeof(fixed_filename), "%s/%s.dyncfg", dyncfg_globals.dir, fixed_id); + + if(strcmp(filename, fixed_filename) != 0) { + if(rename(filename, fixed_filename) != 0) + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: cannot rename file '%s' into '%s'. Saving a new configuraton may not overwrite the old one.", + filename, fixed_filename); + } +} + +void dyncfg_load_all(void) { + DIR *dir = opendir(dyncfg_globals.dir); + if (!dir) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot open directory '%s'", dyncfg_globals.dir); + return; + } + + struct dirent *entry; + while ((entry = readdir(dir)) != NULL) { + if ((entry->d_type == DT_REG || entry->d_type == DT_LNK) && strendswith(entry->d_name, ".dyncfg")) + dyncfg_file_load(entry->d_name); + } + + closedir(dir); +} + +// ---------------------------------------------------------------------------- +// schemas loading + +static bool dyncfg_read_file_to_buffer(const char *filename, BUFFER *dst) { + int fd = open(filename, O_RDONLY | O_CLOEXEC, 0666); + if(unlikely(fd == -1)) + return false; + + struct stat st = { 0 }; + if(fstat(fd, &st) != 0) { + close(fd); + return false; + } + + buffer_flush(dst); + buffer_need_bytes(dst, st.st_size + 1); // +1 for the terminating zero + + ssize_t r = read(fd, (char*)dst->buffer, st.st_size); + if(unlikely(r == -1)) { + close(fd); + return false; + } + dst->len = r; + dst->buffer[dst->len] = '\0'; + + close(fd); + return true; +} + +static bool dyncfg_get_schema_from(const char *dir, const char *id, BUFFER *dst) { + char filename[FILENAME_MAX + 1]; + + CLEAN_CHAR_P *escaped_id = dyncfg_escape_id_for_filename(id); + snprintfz(filename, sizeof(filename), "%s/schema.d/%s.json", dir, escaped_id); + if(dyncfg_read_file_to_buffer(filename, dst)) + return true; + + snprintfz(filename, sizeof(filename), "%s/schema.d/%s.json", dir, id); + if(dyncfg_read_file_to_buffer(filename, dst)) + return true; + + return false; +} + +bool dyncfg_get_schema(const char *id, BUFFER *dst) { + if(dyncfg_get_schema_from(netdata_configured_user_config_dir, id, dst)) + return true; + + if(dyncfg_get_schema_from(netdata_configured_stock_config_dir, id, dst)) + return true; + + return false; +} diff --git a/src/daemon/config/dyncfg-inline.c b/src/daemon/config/dyncfg-inline.c new file mode 100644 index 000000000..bed912e57 --- /dev/null +++ b/src/daemon/config/dyncfg-inline.c @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg.h" + +static DICTIONARY *dyncfg_nodes = NULL; + +static int dyncfg_inline_callback(struct rrd_function_execute *rfe, void *data __maybe_unused) { + char tr[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(*rfe->transaction, tr); + + bool cancelled = rfe->is_cancelled.cb ? rfe->is_cancelled.cb(rfe->is_cancelled.data) : false; + + int code; + if(cancelled) + code = HTTP_RESP_CLIENT_CLOSED_REQUEST; + else + code = dyncfg_node_find_and_call(dyncfg_nodes, tr, rfe->function, rfe->stop_monotonic_ut, &cancelled, + rfe->payload, rfe->user_access, rfe->source, rfe->result.wb); + + if(code == HTTP_RESP_CLIENT_CLOSED_REQUEST || (rfe->is_cancelled.cb && rfe->is_cancelled.cb(rfe->is_cancelled.data))) { + buffer_flush(rfe->result.wb); + code = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, code, rfe->result.data); + + return code; +} + +bool dyncfg_add(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, + DYNCFG_CMDS cmds, HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + dyncfg_cb_t cb, void *data) { + + struct dyncfg_node tmp = { + .cmds = cmds, + .type = type, + .cb = cb, + .data = data, + }; + dictionary_set(dyncfg_nodes, id, &tmp, sizeof(tmp)); + + if(!dyncfg_add_low_level(host, id, path, status, type, source_type, source, cmds, + 0, 0, true, view_access, edit_access, + dyncfg_inline_callback, NULL)) { + dictionary_del(dyncfg_nodes, id); + return false; + } + + return true; +} + +void dyncfg_del(RRDHOST *host, const char *id) { + dictionary_del(dyncfg_nodes, id); + dyncfg_del_low_level(host, id); +} + +void dyncfg_status(RRDHOST *host, const char *id, DYNCFG_STATUS status) { + dyncfg_status_low_level(host, id, status); +} + +void dyncfg_init(bool load_saved) { + dyncfg_nodes = dyncfg_nodes_dictionary_create(); + dyncfg_init_low_level(load_saved); +} diff --git a/src/daemon/config/dyncfg-intercept.c b/src/daemon/config/dyncfg-intercept.c new file mode 100644 index 000000000..65f8383ed --- /dev/null +++ b/src/daemon/config/dyncfg-intercept.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +// ---------------------------------------------------------------------------- +// we intercept the config function calls of the plugin + +struct dyncfg_call { + BUFFER *payload; + char *function; + char *id; + char *add_name; + char *source; + DYNCFG_CMDS cmd; + rrd_function_result_callback_t result_cb; + void *result_cb_data; + bool from_dyncfg_echo; +}; + +static void dyncfg_function_intercept_job_successfully_added(DYNCFG *df_template, int code, struct dyncfg_call *dc) { + char id[strlen(dc->id) + 1 + strlen(dc->add_name) + 1]; + snprintfz(id, sizeof(id), "%s:%s", dc->id, dc->add_name); + + RRDHOST *host = dyncfg_rrdhost(df_template); + if(!host) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: cannot add job '%s' because host is missing", id); + } + else { + const DICTIONARY_ITEM *item = dyncfg_add_internal( + host, + id, + string2str(df_template->path), + dyncfg_status_from_successful_response(code), + DYNCFG_TYPE_JOB, + DYNCFG_SOURCE_TYPE_DYNCFG, + dc->source, + (df_template->cmds & ~DYNCFG_CMD_ADD) | DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE | DYNCFG_CMD_TEST | + DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_REMOVE, + 0, + 0, + df_template->sync, + df_template->view_access, + df_template->edit_access, + df_template->execute_cb, + df_template->execute_cb_data, + false); + + // adding does not create df->dyncfg + // we have to do it here + + DYNCFG *df = dictionary_acquired_item_value(item); + SWAP(df->dyncfg.payload, dc->payload); + dyncfg_set_dyncfg_source_from_txt(df, dc->source); + df->dyncfg.user_disabled = false; + df->dyncfg.source_type = DYNCFG_SOURCE_TYPE_DYNCFG; + df->dyncfg.status = dyncfg_status_from_successful_response(code); + + dyncfg_file_save(id, df); // updates also the df->dyncfg timestamps + dyncfg_update_status_on_successful_add_or_update(df, code); + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } +} + +static void dyncfg_function_intercept_job_successfully_updated(DYNCFG *df, int code, struct dyncfg_call *dc) { + df->dyncfg.status = dyncfg_status_from_successful_response(code); + df->dyncfg.source_type = DYNCFG_SOURCE_TYPE_DYNCFG; + SWAP(df->dyncfg.payload, dc->payload); + dyncfg_set_dyncfg_source_from_txt(df, dc->source); + + dyncfg_update_status_on_successful_add_or_update(df, code); +} + +void dyncfg_function_intercept_result_cb(BUFFER *wb, int code, void *result_cb_data) { + struct dyncfg_call *dc = result_cb_data; + + bool called_from_dyncfg_echo = dc->from_dyncfg_echo; + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dyncfg_globals.nodes, dc->id, -1); + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + bool old_user_disabled = df->dyncfg.user_disabled; + bool save_required = false; + + if (!called_from_dyncfg_echo) { + // the command was sent by a user + + if (DYNCFG_RESP_SUCCESS(code)) { + if (dc->cmd == DYNCFG_CMD_ADD) { + dyncfg_function_intercept_job_successfully_added(df, code, dc); + } else if (dc->cmd == DYNCFG_CMD_UPDATE) { + dyncfg_function_intercept_job_successfully_updated(df, code, dc); + save_required = true; + } + else if (dc->cmd == DYNCFG_CMD_ENABLE) { + df->dyncfg.user_disabled = false; + } + else if (dc->cmd == DYNCFG_CMD_DISABLE) { + df->dyncfg.user_disabled = true; + } + else if (dc->cmd == DYNCFG_CMD_REMOVE) { + dyncfg_file_delete(dc->id); + dictionary_del(dyncfg_globals.nodes, dc->id); + } + + if (save_required || old_user_disabled != df->dyncfg.user_disabled) + dyncfg_file_save(dc->id, df); + } + else + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: plugin returned code %d to user initiated call: %s", code, dc->function); + } + else { + // the command was sent by dyncfg + // these are handled by the echo callback, we don't need to do anything here + ; + } + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } + + if(dc->result_cb) + dc->result_cb(wb, code, dc->result_cb_data); + + buffer_free(dc->payload); + freez(dc->function); + freez(dc->id); + freez(dc->source); + freez(dc->add_name); + freez(dc); +} + +// ---------------------------------------------------------------------------- + +static void dyncfg_apply_action_on_all_template_jobs(struct rrd_function_execute *rfe, const char *template_id, DYNCFG_CMDS c) { + STRING *template = string_strdupz(template_id); + DYNCFG *df; + + size_t all = 0, done = 0; + dfe_start_read(dyncfg_globals.nodes, df) { + if(df->template == template && df->type == DYNCFG_TYPE_JOB) + all++; + } + dfe_done(df); + + if(rfe->progress.cb) + rfe->progress.cb(rfe->progress.data, done, all); + + dfe_start_reentrant(dyncfg_globals.nodes, df) { + if(df->template == template && df->type == DYNCFG_TYPE_JOB) { + DYNCFG_CMDS cmd_to_send_to_plugin = c; + + if(c == DYNCFG_CMD_ENABLE) + cmd_to_send_to_plugin = df->dyncfg.user_disabled ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE; + else if(c == DYNCFG_CMD_DISABLE) + cmd_to_send_to_plugin = DYNCFG_CMD_DISABLE; + + dyncfg_echo(df_dfe.item, df, df_dfe.name, cmd_to_send_to_plugin); + + if(rfe->progress.cb) + rfe->progress.cb(rfe->progress.data, ++done, all); + } + } + dfe_done(df); + + string_freez(template); +} + +// ---------------------------------------------------------------------------- +// the callback for all config functions + +static int dyncfg_intercept_early_error(struct rrd_function_execute *rfe, int rc, const char *msg) { + rc = dyncfg_default_response(rfe->result.wb, rc, msg); + + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, rc, rfe->result.data); + + return rc; +} + +const DICTIONARY_ITEM *dyncfg_get_template_of_new_job(const char *job_id) { + char id_copy[strlen(job_id) + 1]; + memcpy(id_copy, job_id, sizeof(id_copy)); + + char *colon = strrchr(id_copy, ':'); + if(!colon) return NULL; + + *colon = '\0'; + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id_copy); + if(!item) return NULL; + + DYNCFG *df = dictionary_acquired_item_value(item); + if(df->type != DYNCFG_TYPE_TEMPLATE) { + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return NULL; + } + + return item; +} + +int dyncfg_function_intercept_cb(struct rrd_function_execute *rfe, void *data __maybe_unused) { + + // IMPORTANT: this function MUST call the result_cb even on failures + + bool called_from_dyncfg_echo = rrd_function_has_this_original_result_callback(rfe->transaction, dyncfg_echo_cb); + bool has_payload = rfe->payload && buffer_strlen(rfe->payload) ? true : false; + bool make_the_call_to_plugin = true; + + int rc = HTTP_RESP_INTERNAL_SERVER_ERROR; + DYNCFG_CMDS cmd; + const DICTIONARY_ITEM *item = NULL; + + char buf[strlen(rfe->function) + 1]; + memcpy(buf, rfe->function, sizeof(buf)); + + char *words[20]; + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, 20); + + size_t i = 0; + char *config = get_word(words, num_words, i++); + char *id = get_word(words, num_words, i++); + char *cmd_str = get_word(words, num_words, i++); + char *add_name = get_word(words, num_words, i++); + + if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this is not a dyncfg request"); + + cmd = dyncfg_cmds2id(cmd_str); + if(cmd == DYNCFG_CMD_NONE) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: invalid command received"); + + if(cmd == DYNCFG_CMD_ADD || cmd == DYNCFG_CMD_TEST || cmd == DYNCFG_CMD_USERCONFIG) { + if(cmd == DYNCFG_CMD_TEST && (!add_name || !*add_name)) { + // backwards compatibility for TEST without a name + char *colon = strrchr(id, ':'); + if(colon) { + *colon = '\0'; + add_name = ++colon; + } + else + add_name = "test"; + } + + if(!add_name || !*add_name) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this action requires a name"); + + if(!called_from_dyncfg_echo) { + char nid[strlen(id) + strlen(add_name) + 2]; + snprintfz(nid, sizeof(nid), "%s:%s", id, add_name); + + if (cmd == DYNCFG_CMD_ADD && dictionary_get(dyncfg_globals.nodes, nid)) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: a configuration with this name already exists"); + } + } + + if((cmd == DYNCFG_CMD_ADD || cmd == DYNCFG_CMD_UPDATE || cmd == DYNCFG_CMD_TEST || cmd == DYNCFG_CMD_USERCONFIG) && !has_payload) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this action requires a payload"); + + if((cmd != DYNCFG_CMD_ADD && cmd != DYNCFG_CMD_UPDATE && cmd != DYNCFG_CMD_TEST && cmd != DYNCFG_CMD_USERCONFIG) && has_payload) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this action does not require a payload"); + + item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(!item) { + if(cmd == DYNCFG_CMD_TEST || cmd == DYNCFG_CMD_USERCONFIG) { + // this may be a test on a new job + item = dyncfg_get_template_of_new_job(id); + } + + if(!item) + return dyncfg_intercept_early_error( + rfe, HTTP_RESP_NOT_FOUND, + "dyncfg functions intercept: id is not found"); + } + + DYNCFG *df = dictionary_acquired_item_value(item); + + // 1. check the permissions of the request + + switch(cmd) { + case DYNCFG_CMD_GET: + case DYNCFG_CMD_SCHEMA: + case DYNCFG_CMD_USERCONFIG: + if(!http_access_user_has_enough_access_level_for_endpoint(rfe->user_access, df->view_access)) { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_FORBIDDEN, + "dyncfg: you don't have enough view permissions to execute this command"); + } + break; + + case DYNCFG_CMD_ENABLE: + case DYNCFG_CMD_DISABLE: + case DYNCFG_CMD_ADD: + case DYNCFG_CMD_TEST: + case DYNCFG_CMD_UPDATE: + case DYNCFG_CMD_REMOVE: + case DYNCFG_CMD_RESTART: + if(!http_access_user_has_enough_access_level_for_endpoint(rfe->user_access, df->edit_access)) { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_FORBIDDEN, + "dyncfg: you don't have enough edit permissions to execute this command"); + } + break; + + default: { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_INTERNAL_SERVER_ERROR, + "dyncfg: permissions for this command are not set"); + } + break; + } + + // 2. validate the request parameters + + if(make_the_call_to_plugin) { + if (!(df->cmds & cmd)) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: this command is not supported by the configuration node: %s", rfe->function); + + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this command is not supported by this configuration node"); + } + else if (cmd == DYNCFG_CMD_ADD) { + if (df->type != DYNCFG_TYPE_TEMPLATE) { + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: add command is only allowed in templates"); + + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: add command can only be applied on templates, not %s: %s", + dyncfg_id2type(df->type), rfe->function); + } + } + else if ( + cmd == DYNCFG_CMD_ENABLE && df->type == DYNCFG_TYPE_JOB && + dyncfg_is_user_disabled(string2str(df->template))) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: cannot enable a job of a disabled template: %s", + rfe->function); + + make_the_call_to_plugin = false; + rc = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_BAD_REQUEST, + "dyncfg functions intercept: this job belongs to disabled template"); + } + } + + // 3. check if it is one of the commands we should execute + + if(make_the_call_to_plugin) { + if (cmd & (DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_RESTART) && df->type == DYNCFG_TYPE_TEMPLATE) { + if (!called_from_dyncfg_echo) { + bool old_user_disabled = df->dyncfg.user_disabled; + if (cmd == DYNCFG_CMD_ENABLE) + df->dyncfg.user_disabled = false; + else if (cmd == DYNCFG_CMD_DISABLE) + df->dyncfg.user_disabled = true; + + if (df->dyncfg.user_disabled != old_user_disabled) + dyncfg_file_save(id, df); + } + + dyncfg_apply_action_on_all_template_jobs(rfe, id, cmd); + + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_OK, "applied to all template job"); + make_the_call_to_plugin = false; + } + else if (cmd == DYNCFG_CMD_SCHEMA) { + bool loaded = false; + if (df->type == DYNCFG_TYPE_JOB) { + if (df->template) + loaded = dyncfg_get_schema(string2str(df->template), rfe->result.wb); + } else + loaded = dyncfg_get_schema(id, rfe->result.wb); + + if (loaded) { + rfe->result.wb->content_type = CT_APPLICATION_JSON; + rfe->result.wb->expires = now_realtime_sec(); + rc = HTTP_RESP_OK; + make_the_call_to_plugin = false; + } + } + } + + // 4. execute the command + + if(make_the_call_to_plugin) { + struct dyncfg_call *dc = callocz(1, sizeof(*dc)); + dc->function = strdupz(rfe->function); + dc->id = strdupz(id); + dc->source = rfe->source ? strdupz(rfe->source) : NULL; + dc->add_name = (add_name) ? strdupz(add_name) : NULL; + dc->cmd = cmd; + dc->result_cb = rfe->result.cb; + dc->result_cb_data = rfe->result.data; + dc->payload = buffer_dup(rfe->payload); + dc->from_dyncfg_echo = called_from_dyncfg_echo; + + rfe->result.cb = dyncfg_function_intercept_result_cb; + rfe->result.data = dc; + + rc = df->execute_cb(rfe, df->execute_cb_data); + } + else if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, rc, rfe->result.data); + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return rc; +} + diff --git a/src/daemon/config/dyncfg-internals.h b/src/daemon/config/dyncfg-internals.h new file mode 100644 index 000000000..1722ae792 --- /dev/null +++ b/src/daemon/config/dyncfg-internals.h @@ -0,0 +1,145 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DYNCFG_INTERNALS_H +#define NETDATA_DYNCFG_INTERNALS_H + +#include "../common.h" +#include "database/rrd.h" +#include "database/rrdfunctions.h" +#include "database/rrdfunctions-internals.h" +#include "database/rrdcollector-internals.h" + +typedef struct dyncfg { + ND_UUID host_uuid; + STRING *function; + STRING *template; + STRING *path; + DYNCFG_CMDS cmds; + DYNCFG_TYPE type; + + HTTP_ACCESS view_access; + HTTP_ACCESS edit_access; + + struct { + DYNCFG_STATUS status; + DYNCFG_SOURCE_TYPE source_type; + STRING *source; + usec_t created_ut; + usec_t modified_ut; + } current; + + struct { + uint32_t saves; + bool restart_required; + bool plugin_rejected; + bool user_disabled; + DYNCFG_STATUS status; + DYNCFG_SOURCE_TYPE source_type; + STRING *source; + BUFFER *payload; + usec_t created_ut; + usec_t modified_ut; + } dyncfg; + + bool sync; + rrd_function_execute_cb_t execute_cb; + void *execute_cb_data; +} DYNCFG; + +struct dyncfg_globals { + const char *dir; + DICTIONARY *nodes; +}; + +extern struct dyncfg_globals dyncfg_globals; + +void dyncfg_load_all(void); +void dyncfg_file_load(const char *filename); +void dyncfg_file_save(const char *id, DYNCFG *df); +void dyncfg_file_delete(const char *id); + +bool dyncfg_get_schema(const char *id, BUFFER *dst); + +void dyncfg_echo_cb(BUFFER *wb, int code, void *result_cb_data); +void dyncfg_echo(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id, DYNCFG_CMDS cmd); +void dyncfg_echo_update(const DICTIONARY_ITEM *item, DYNCFG *df, const char *id); +void dyncfg_echo_add(const DICTIONARY_ITEM *item_template, const DICTIONARY_ITEM *item_job, DYNCFG *df_template, DYNCFG *df_job, const char *template_id, const char *job_name); + +const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, + const char *source, DYNCFG_CMDS cmds, + usec_t created_ut, usec_t modified_ut, + bool sync, HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data, + bool overwrite_cb); + +int dyncfg_function_intercept_cb(struct rrd_function_execute *rfe, void *data); +void dyncfg_cleanup(DYNCFG *v); + +const DICTIONARY_ITEM *dyncfg_get_template_of_new_job(const char *job_id); + +bool dyncfg_is_user_disabled(const char *id); + +RRDHOST *dyncfg_rrdhost_by_uuid(ND_UUID *uuid); +RRDHOST *dyncfg_rrdhost(DYNCFG *df); + +static inline void dyncfg_copy_dyncfg_source_to_current(DYNCFG *df) { + STRING *old = df->current.source; + df->current.source = string_dup(df->dyncfg.source); + string_freez(old); +} + +static inline void dyncfg_set_dyncfg_source_from_txt(DYNCFG *df, const char *source) { + STRING *old = df->dyncfg.source; + df->dyncfg.source = string_strdupz(source); + string_freez(old); +} + +static inline void dyncfg_set_current_from_dyncfg(DYNCFG *df) { + df->current.status = df->dyncfg.status; + df->current.source_type = df->dyncfg.source_type; + + dyncfg_copy_dyncfg_source_to_current(df); + + if(df->dyncfg.created_ut < df->current.created_ut) + df->current.created_ut = df->dyncfg.created_ut; + + if(df->dyncfg.modified_ut > df->current.modified_ut) + df->current.modified_ut = df->dyncfg.modified_ut; +} + +static inline void dyncfg_update_status_on_successful_add_or_update(DYNCFG *df, int code) { + df->dyncfg.plugin_rejected = false; + + if (code == DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED) + df->dyncfg.restart_required = true; + else + df->dyncfg.restart_required = false; + + dyncfg_set_current_from_dyncfg(df); +} + +static inline DYNCFG_STATUS dyncfg_status_from_successful_response(int code) { + DYNCFG_STATUS status = DYNCFG_STATUS_ACCEPTED; + + switch(code) { + default: + case DYNCFG_RESP_ACCEPTED: + case DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED: + status = DYNCFG_STATUS_ACCEPTED; + break; + + case DYNCFG_RESP_ACCEPTED_DISABLED: + status = DYNCFG_STATUS_DISABLED; + break; + + case DYNCFG_RESP_RUNNING: + status = DYNCFG_STATUS_RUNNING; + break; + + } + + return status; +} + +#endif //NETDATA_DYNCFG_INTERNALS_H diff --git a/src/daemon/config/dyncfg-tree.c b/src/daemon/config/dyncfg-tree.c new file mode 100644 index 000000000..77d031fa0 --- /dev/null +++ b/src/daemon/config/dyncfg-tree.c @@ -0,0 +1,292 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +static int dyncfg_tree_compar(const void *a, const void *b) { + const DICTIONARY_ITEM *item1 = *(const DICTIONARY_ITEM **)a; + const DICTIONARY_ITEM *item2 = *(const DICTIONARY_ITEM **)b; + + DYNCFG *df1 = dictionary_acquired_item_value(item1); + DYNCFG *df2 = dictionary_acquired_item_value(item2); + + int rc = string_cmp(df1->path, df2->path); + if(rc == 0) + rc = strcmp(dictionary_acquired_item_name(item1), dictionary_acquired_item_name(item2)); + + return rc; +} + +static void dyncfg_to_json(DYNCFG *df, const char *id, BUFFER *wb) { + buffer_json_member_add_object(wb, id); + { + buffer_json_member_add_string(wb, "type", dyncfg_id2type(df->type)); + + if(df->type == DYNCFG_TYPE_JOB) + buffer_json_member_add_string(wb, "template", string2str(df->template)); + + buffer_json_member_add_string(wb, "status", dyncfg_id2status(df->current.status)); + dyncfg_cmds2json_array(df->current.status == DYNCFG_STATUS_ORPHAN ? DYNCFG_CMD_REMOVE : df->cmds, "cmds", wb); + buffer_json_member_add_object(wb, "access"); + { + http_access2buffer_json_array(wb, "view", df->view_access); + http_access2buffer_json_array(wb, "edit", df->edit_access); + } + buffer_json_object_close(wb); + buffer_json_member_add_string(wb, "source_type", dyncfg_id2source_type(df->current.source_type)); + buffer_json_member_add_string(wb, "source", string2str(df->current.source)); + buffer_json_member_add_boolean(wb, "sync", df->sync); + buffer_json_member_add_boolean(wb, "user_disabled", df->dyncfg.user_disabled); + buffer_json_member_add_boolean(wb, "restart_required", df->dyncfg.restart_required); + buffer_json_member_add_boolean(wb, "plugin_rejected", df->dyncfg.plugin_rejected); + buffer_json_member_add_object(wb, "payload"); + { + if (df->dyncfg.payload && buffer_strlen(df->dyncfg.payload)) { + buffer_json_member_add_boolean(wb, "available", true); + buffer_json_member_add_string(wb, "status", dyncfg_id2status(df->dyncfg.status)); + buffer_json_member_add_string(wb, "source_type", dyncfg_id2source_type(df->dyncfg.source_type)); + buffer_json_member_add_string(wb, "source", string2str(df->dyncfg.source)); + buffer_json_member_add_uint64(wb, "created_ut", df->dyncfg.created_ut); + buffer_json_member_add_uint64(wb, "modified_ut", df->dyncfg.modified_ut); + buffer_json_member_add_string(wb, "content_type", content_type_id2string(df->dyncfg.payload->content_type)); + buffer_json_member_add_uint64(wb, "content_length", df->dyncfg.payload->len); + } else + buffer_json_member_add_boolean(wb, "available", false); + } + buffer_json_object_close(wb); // payload + buffer_json_member_add_uint64(wb, "saves", df->dyncfg.saves); + buffer_json_member_add_uint64(wb, "created_ut", df->current.created_ut); + buffer_json_member_add_uint64(wb, "modified_ut", df->current.modified_ut); + } + buffer_json_object_close(wb); +} + +static void dyncfg_tree_for_host(RRDHOST *host, BUFFER *wb, const char *path, const char *id) { + size_t entries = dictionary_entries(dyncfg_globals.nodes); + size_t used = 0; + const DICTIONARY_ITEM *items[entries]; + size_t restart_required = 0, plugin_rejected = 0, status_incomplete = 0, status_failed = 0; + + STRING *template = NULL; + if(id && *id) + template = string_strdupz(id); + + ND_UUID host_uuid = uuid2UUID(host->host_uuid); + + size_t path_len = strlen(path); + DYNCFG *df; + dfe_start_read(dyncfg_globals.nodes, df) { + if(!UUIDeq(df->host_uuid, host_uuid)) + continue; + + if(strncmp(string2str(df->path), path, path_len) != 0) + continue; + + if(!rrd_function_available(host, string2str(df->function))) + df->current.status = DYNCFG_STATUS_ORPHAN; + + if((id && strcmp(id, df_dfe.name) != 0) && (template && df->template != template)) + continue; + + items[used++] = dictionary_acquired_item_dup(dyncfg_globals.nodes, df_dfe.item); + } + dfe_done(df); + + if(used > 1) + qsort(items, used, sizeof(const DICTIONARY_ITEM *), dyncfg_tree_compar); + + buffer_flush(wb); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + + buffer_json_member_add_uint64(wb, "version", 1); + + buffer_json_member_add_object(wb, "tree"); + { + STRING *last_path = NULL; + for (size_t i = 0; i < used; i++) { + df = dictionary_acquired_item_value(items[i]); + if (df->path != last_path) { + last_path = df->path; + + if (i) + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, string2str(last_path)); + } + + dyncfg_to_json(df, dictionary_acquired_item_name(items[i]), wb); + + if (df->dyncfg.plugin_rejected) + plugin_rejected++; + + if(df->current.status != DYNCFG_STATUS_ORPHAN) { + if (df->dyncfg.restart_required) + restart_required++; + + if (df->current.status == DYNCFG_STATUS_FAILED) + status_failed++; + + if (df->current.status == DYNCFG_STATUS_INCOMPLETE) + status_incomplete++; + } + } + + if (used) + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // tree + + buffer_json_member_add_object(wb, "attention"); + { + buffer_json_member_add_boolean(wb, "degraded", restart_required + plugin_rejected + status_failed + status_incomplete > 0); + buffer_json_member_add_uint64(wb, "restart_required", restart_required); + buffer_json_member_add_uint64(wb, "plugin_rejected", plugin_rejected); + buffer_json_member_add_uint64(wb, "status_failed", status_failed); + buffer_json_member_add_uint64(wb, "status_incomplete", status_incomplete); + } + buffer_json_object_close(wb); // attention + + buffer_json_agents_v2(wb, NULL, 0, false, false); + + buffer_json_finalize(wb); + + for(size_t i = 0; i < used ;i++) + dictionary_acquired_item_release(dyncfg_globals.nodes, items[i]); +} + +static int dyncfg_config_execute_cb(struct rrd_function_execute *rfe, void *data) { + RRDHOST *host = data; + int code; + + char buf[strlen(rfe->function) + 1]; + memcpy(buf, rfe->function, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + const char *config = get_word(words, num_words, 0); + const char *action = get_word(words, num_words, 1); + const char *path = get_word(words, num_words, 2); + const char *id = get_word(words, num_words, 3); + + if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) { + char *msg = "invalid function call, expected: config"; + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg); + code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!action || !*action) { + char *msg = "invalid function call, expected: config tree"; + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg); + code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(strcmp(action, "tree") == 0) { + if(!path || !*path) + path = "/"; + + if(!id || !*id) + id = NULL; + else if(!dyncfg_is_valid_id(id)) { + char *msg = "invalid id given"; + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG TREE: function call '%s': %s", rfe->function, msg); + code = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + code = HTTP_RESP_OK; + dyncfg_tree_for_host(host, rfe->result.wb, path, id); + } + else { + const char *name = id; + id = action; + action = path; + path = NULL; + + DYNCFG_CMDS cmd = dyncfg_cmds2id(action); + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(!item) { + item = dyncfg_get_template_of_new_job(id); + + if(item && (!name || !*name)) { + const char *n = dictionary_acquired_item_name(item); + if(strncmp(id, n, strlen(n)) == 0 && id[strlen(n)] == ':') + name = &id[strlen(n) + 1]; + } + } + + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + + if(!rrd_function_available(host, string2str(df->function))) + df->current.status = DYNCFG_STATUS_ORPHAN; + + if(cmd == DYNCFG_CMD_REMOVE) { + bool delete = (df->current.status == DYNCFG_STATUS_ORPHAN); + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + item = NULL; + + if(delete) { + if(!http_access_user_has_enough_access_level_for_endpoint(rfe->user_access, df->edit_access)) { + code = dyncfg_default_response( + rfe->result.wb, HTTP_RESP_FORBIDDEN, + "dyncfg: you don't have enough edit permissions to execute this command"); + goto cleanup; + } + + dictionary_del(dyncfg_globals.nodes, id); + dyncfg_file_delete(id); + code = dyncfg_default_response(rfe->result.wb, 200, ""); + goto cleanup; + } + } + else if((cmd == DYNCFG_CMD_USERCONFIG || cmd == DYNCFG_CMD_TEST) && df->current.status != DYNCFG_STATUS_ORPHAN) { + const char *old_rfe_function = rfe->function; + char buf2[2048]; + snprintfz(buf2, sizeof(buf2), "config %s %s %s", dictionary_acquired_item_name(item), action, name?name:""); + rfe->function = buf2; + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + item = NULL; + code = dyncfg_function_intercept_cb(rfe, data); + rfe->function = old_rfe_function; + return code; + } + + if(item) + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } + + code = HTTP_RESP_NOT_FOUND; + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG: unknown config id '%s' in call: '%s'. " + "This can happen if the plugin that registered the dynamic configuration is not running now.", + id, rfe->function); + + rrd_call_function_error( + rfe->result.wb, + "unknown config id given", code); + } + +cleanup: + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, code, rfe->result.data); + + return code; +} + +// ---------------------------------------------------------------------------- +// this adds a 'config' function to all leaf nodes (localhost and virtual nodes) +// which is used to serve the tree and act as a catch-all for all config calls +// for which there is no id overloaded. + +void dyncfg_host_init(RRDHOST *host) { + // IMPORTANT: + // This function needs to be async, although it is internal. + // The reason is that it can call by itself another function that may or may not be internal (sync). + + rrd_function_add(host, NULL, PLUGINSD_FUNCTION_CONFIG, 120, + 1000, "Dynamic configuration", "config", HTTP_ACCESS_ANONYMOUS_DATA, + false, dyncfg_config_execute_cb, host); +} diff --git a/src/daemon/config/dyncfg-unittest.c b/src/daemon/config/dyncfg-unittest.c new file mode 100644 index 000000000..775dc7cbd --- /dev/null +++ b/src/daemon/config/dyncfg-unittest.c @@ -0,0 +1,799 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +// ---------------------------------------------------------------------------- +// unit test + +#define LINE_FILE_STR TOSTRING(__LINE__) "@" __FILE__ + +struct dyncfg_unittest { + bool enabled; + size_t errors; + + DICTIONARY *nodes; + + SPINLOCK spinlock; + struct dyncfg_unittest_action *queue; +} dyncfg_unittest_data = { 0 }; + +typedef struct { + bool enabled; + bool removed; + struct { + double dbl; + bool bln; + } value; +} TEST_CFG; + +typedef struct { + const char *id; + const char *source; + bool sync; + DYNCFG_TYPE type; + DYNCFG_CMDS cmds; + DYNCFG_SOURCE_TYPE source_type; + + TEST_CFG current; + TEST_CFG expected; + + bool received; + bool finished; + + size_t last_saves; + bool needs_save; +} TEST; + +struct dyncfg_unittest_action { + TEST *t; + BUFFER *result; + BUFFER *payload; + DYNCFG_CMDS cmd; + const char *add_name; + const char *source; + + rrd_function_result_callback_t result_cb; + void *result_cb_data; + + struct dyncfg_unittest_action *prev, *next; +}; + +static void dyncfg_unittest_register_error(const char *id, const char *msg) { + if(msg) + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: error on id '%s': %s", id ? id : "", msg); + + __atomic_add_fetch(&dyncfg_unittest_data.errors, 1, __ATOMIC_RELAXED); +} + +static int dyncfg_unittest_execute_cb(struct rrd_function_execute *rfe, void *data); + +bool dyncfg_unittest_parse_payload(BUFFER *payload, TEST *t, DYNCFG_CMDS cmd, const char *add_name, const char *source) { + CLEAN_JSON_OBJECT *jobj = json_tokener_parse(buffer_tostring(payload)); + if(!jobj) { + dyncfg_unittest_register_error(t->id, "cannot parse json payload"); + return false; + } + + struct json_object *json_double; + struct json_object *json_boolean; + + json_object_object_get_ex(jobj, "double", &json_double); + double value_double = json_object_get_double(json_double); + + json_object_object_get_ex(jobj, "boolean", &json_boolean); + int value_boolean = json_object_get_boolean(json_boolean); + + if(cmd == DYNCFG_CMD_UPDATE) { + t->current.value.dbl = value_double; + t->current.value.bln = value_boolean; + } + else if(cmd == DYNCFG_CMD_ADD) { + char buf[strlen(t->id) + strlen(add_name) + 20]; + snprintfz(buf, sizeof(buf), "%s:%s", t->id, add_name); + TEST tmp = { + .id = strdupz(buf), + .source = strdupz(source), + .cmds = (t->cmds & ~DYNCFG_CMD_ADD) | DYNCFG_CMD_GET | DYNCFG_CMD_REMOVE | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE | DYNCFG_CMD_TEST, + .sync = t->sync, + .type = DYNCFG_TYPE_JOB, + .source_type = DYNCFG_SOURCE_TYPE_DYNCFG, + .received = true, + .finished = true, + .current = + {.enabled = true, + .removed = false, + .value = + { + .dbl = value_double, + .bln = value_boolean, + }}, + .expected = { + .enabled = true, + .removed = false, + .value = { + .dbl = 3.14, + .bln = true, + } + }, + .needs_save = true, + }; + const DICTIONARY_ITEM *item = dictionary_set_and_acquire_item(dyncfg_unittest_data.nodes, buf, &tmp, sizeof(tmp)); + TEST *t2 = dictionary_acquired_item_value(item); + dictionary_acquired_item_release(dyncfg_unittest_data.nodes, item); + + dyncfg_add_low_level(localhost, t2->id, "/unittests", + DYNCFG_STATUS_RUNNING, t2->type, t2->source_type, t2->source, + t2->cmds, 0, 0, t2->sync, + HTTP_ACCESS_NONE, HTTP_ACCESS_NONE, + dyncfg_unittest_execute_cb, t2); + } + else { + dyncfg_unittest_register_error(t->id, "invalid command received to parse payload"); + return false; + } + + return true; +} + +static int dyncfg_unittest_action(struct dyncfg_unittest_action *a) { + TEST *t = a->t; + + int rc = HTTP_RESP_OK; + + if(a->cmd == DYNCFG_CMD_ENABLE) + t->current.enabled = true; + else if(a->cmd == DYNCFG_CMD_DISABLE) + t->current.enabled = false; + else if(a->cmd == DYNCFG_CMD_ADD || a->cmd == DYNCFG_CMD_UPDATE) + rc = dyncfg_unittest_parse_payload(a->payload, a->t, a->cmd, a->add_name, a->source) ? HTTP_RESP_OK : HTTP_RESP_BAD_REQUEST; + else if(a->cmd == DYNCFG_CMD_REMOVE) + t->current.removed = true; + else + rc = HTTP_RESP_BAD_REQUEST; + + dyncfg_default_response(a->result, rc, NULL); + + a->result_cb(a->result, rc, a->result_cb_data); + + buffer_free(a->payload); + freez((void *)a->add_name); + freez(a); + + __atomic_store_n(&t->finished, true, __ATOMIC_RELAXED); + + return rc; +} + +static void *dyncfg_unittest_thread_action(void *ptr) { + while(!nd_thread_signaled_to_cancel()) { + struct dyncfg_unittest_action *a = NULL; + spinlock_lock(&dyncfg_unittest_data.spinlock); + a = dyncfg_unittest_data.queue; + if(a) + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(dyncfg_unittest_data.queue, a, prev, next); + spinlock_unlock(&dyncfg_unittest_data.spinlock); + + if(a) + dyncfg_unittest_action(a); + else + sleep_usec(10 * USEC_PER_MS); + } + + return ptr; +} + +static int dyncfg_unittest_execute_cb(struct rrd_function_execute *rfe, void *data) { + + int rc; + bool run_the_callback = true; + TEST *t = data; + + t->received = true; + + char buf[strlen(rfe->function) + 1]; + memcpy(buf, rfe->function, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + const char *config = get_word(words, num_words, 0); + const char *id = get_word(words, num_words, 1); + const char *action = get_word(words, num_words, 2); + const char *add_name = get_word(words, num_words, 3); + + if(!config || !*config || strcmp(config, PLUGINSD_FUNCTION_CONFIG) != 0) { + char *msg = "did not receive a config call"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!id || !*id) { + char *msg = "did not receive an id"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(t->type != DYNCFG_TYPE_TEMPLATE && strcmp(t->id, id) != 0) { + char *msg = "id received is not the expected"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!action || !*action) { + char *msg = "did not receive an action"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + DYNCFG_CMDS cmd = dyncfg_cmds2id(action); + if(cmd == DYNCFG_CMD_NONE) { + char *msg = "action received is not known"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(!(t->cmds & cmd)) { + char *msg = "received a command that is not supported"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + if(t->current.removed && cmd != DYNCFG_CMD_ADD) { + char *msg = "received a command for a removed entry"; + dyncfg_unittest_register_error(id, msg); + rc = dyncfg_default_response(rfe->result.wb, HTTP_RESP_BAD_REQUEST, msg); + goto cleanup; + } + + struct dyncfg_unittest_action *a = callocz(1, sizeof(*a)); + a->t = t; + a->add_name = add_name ? strdupz(add_name) : NULL; + a->source = rfe->source, + a->result = rfe->result.wb; + a->payload = buffer_dup(rfe->payload); + a->cmd = cmd; + a->result_cb = rfe->result.cb; + a->result_cb_data = rfe->result.data; + + run_the_callback = false; + + if(t->sync) + rc = dyncfg_unittest_action(a); + else { + spinlock_lock(&dyncfg_unittest_data.spinlock); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(dyncfg_unittest_data.queue, a, prev, next); + spinlock_unlock(&dyncfg_unittest_data.spinlock); + rc = HTTP_RESP_OK; + } + +cleanup: + if(run_the_callback) { + __atomic_store_n(&t->finished, true, __ATOMIC_RELAXED); + + if (rfe->result.cb) + rfe->result.cb(rfe->result.wb, rc, rfe->result.data); + } + + return rc; +} + +static bool dyncfg_unittest_check(TEST *t, DYNCFG_CMDS c, const char *cmd, bool received) { + size_t errors = 0; + + fprintf(stderr, "CHECK '%s' after cmd '%s'...", t->id, cmd); + + if(t->received != received) { + fprintf(stderr, "\n - received flag found '%s', expected '%s'", + t->received?"true":"false", + received?"true":"false"); + errors++; + goto cleanup; + } + + if(!received) + goto cleanup; + + usec_t give_up_ut = now_monotonic_usec() + 2 * USEC_PER_SEC; + while(!__atomic_load_n(&t->finished, __ATOMIC_RELAXED)) { + tinysleep(); + + if(now_monotonic_usec() > give_up_ut) { + fprintf(stderr, "\n - gave up waiting for the plugin to process this!"); + errors++; + goto cleanup; + } + } + + if(t->type != DYNCFG_TYPE_TEMPLATE && t->current.enabled != t->expected.enabled) { + fprintf(stderr, "\n - enabled flag found '%s', expected '%s'", + t->current.enabled?"true":"false", + t->expected.enabled?"true":"false"); + errors++; + } + if(t->current.removed != t->expected.removed) { + fprintf(stderr, "\n - removed flag found '%s', expected '%s'", + t->current.removed?"true":"false", + t->expected.removed?"true":"false"); + errors++; + } + if(t->current.value.bln != t->expected.value.bln) { + fprintf(stderr, "\n - boolean value found '%s', expected '%s'", + t->current.value.bln?"true":"false", + t->expected.value.bln?"true":"false"); + errors++; + } + if(t->current.value.dbl != t->expected.value.dbl) { + fprintf(stderr, "\n - double value found '%f', expected '%f'", + t->current.value.dbl, t->expected.value.dbl); + errors++; + } + + DYNCFG *df = dictionary_get(dyncfg_globals.nodes, t->id); + if(!df) { + fprintf(stderr, "\n - not found in DYNCFG nodes dictionary!"); + errors++; + } + else if(df->cmds != t->cmds) { + fprintf(stderr, "\n - has different cmds in DYNCFG nodes dictionary; found: "); + dyncfg_cmds2fp(df->cmds, stderr); + fprintf(stderr, ", expected: "); + dyncfg_cmds2fp(t->cmds, stderr); + fprintf(stderr, "\n"); + errors++; + } + else if(df->type == DYNCFG_TYPE_JOB && df->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && !df->dyncfg.saves) { + fprintf(stderr, "\n - DYNCFG job has no saves!"); + errors++; + } + else if(df->type == DYNCFG_TYPE_JOB && df->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && (!df->dyncfg.payload || !buffer_strlen(df->dyncfg.payload))) { + fprintf(stderr, "\n - DYNCFG job has no payload!"); + errors++; + } + else if(df->dyncfg.user_disabled && !df->dyncfg.saves) { + fprintf(stderr, "\n - DYNCFG disabled config has no saves!"); + errors++; + } + else if((c & (DYNCFG_CMD_ADD | DYNCFG_CMD_UPDATE)) && t->source && string_strcmp(df->current.source, t->source) != 0) { + fprintf(stderr, "\n - source does not match!"); + errors++; + } + else if((c & (DYNCFG_CMD_ADD | DYNCFG_CMD_UPDATE)) && df->current.source && !t->source) { + fprintf(stderr, "\n - there is a source but it shouldn't be any!"); + errors++; + } + else if(t->needs_save && df->dyncfg.saves <= t->last_saves) { + fprintf(stderr, "\n - should be saved, but it is not saved!"); + errors++; + } + else if(!t->needs_save && df->dyncfg.saves > t->last_saves) { + fprintf(stderr, "\n - should be not be saved, but it saved!"); + errors++; + } + +cleanup: + if(errors) { + fprintf(stderr, "\n >>> FAILED\n\n"); + dyncfg_unittest_register_error(NULL, NULL); + return false; + } + + fprintf(stderr, " OK\n"); + return true; +} + +static void dyncfg_unittest_reset(void) { + TEST *t; + dfe_start_read(dyncfg_unittest_data.nodes, t) { + t->received = t->finished = false; + t->needs_save = false; + + DYNCFG *df = dictionary_get(dyncfg_globals.nodes, t->id); + if(!df) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: cannot find id '%s'", t->id); + dyncfg_unittest_register_error(NULL, NULL); + } + else + t->last_saves = df->dyncfg.saves; + } + dfe_done(t); +} + +void should_be_saved(TEST *t, DYNCFG_CMDS c) { + DYNCFG *df; + + if(t->type == DYNCFG_TYPE_TEMPLATE) { + df = dictionary_get(dyncfg_globals.nodes, t->id); + t->current.enabled = !df->dyncfg.user_disabled; + } + + t->needs_save = + c == DYNCFG_CMD_UPDATE || + (t->current.enabled && c == DYNCFG_CMD_DISABLE) || + (!t->current.enabled && c == DYNCFG_CMD_ENABLE); +} + +static int dyncfg_unittest_run(const char *cmd, BUFFER *wb, const char *payload, const char *source) { + dyncfg_unittest_reset(); + + char buf[strlen(cmd) + 1]; + memcpy(buf, cmd, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + // const char *config = get_word(words, num_words, 0); + const char *id = get_word(words, num_words, 1); + char *action = get_word(words, num_words, 2); + const char *add_name = get_word(words, num_words, 3); + + DYNCFG_CMDS c = dyncfg_cmds2id(action); + + TEST *t = dictionary_get(dyncfg_unittest_data.nodes, id); + if(!t) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: cannot find id '%s' from cmd: %s", id, cmd); + dyncfg_unittest_register_error(NULL, NULL); + return HTTP_RESP_NOT_FOUND; + } + + if(t->type == DYNCFG_TYPE_TEMPLATE) + t->received = t->finished = true; + + if(c == DYNCFG_CMD_DISABLE) + t->expected.enabled = false; + if(c == DYNCFG_CMD_ENABLE) + t->expected.enabled = true; + if(c == DYNCFG_CMD_UPDATE) + memset(&t->current.value, 0, sizeof(t->current.value)); + + if(c & (DYNCFG_CMD_UPDATE) || (c & (DYNCFG_CMD_DISABLE|DYNCFG_CMD_ENABLE) && t->type != DYNCFG_TYPE_TEMPLATE)) { + freez((void *)t->source); + t->source = strdupz(source); + } + + buffer_flush(wb); + + CLEAN_BUFFER *pld = NULL; + + if(payload) { + pld = buffer_create(1024, NULL); + buffer_strcat(pld, payload); + } + + should_be_saved(t, c); + + int rc = rrd_function_run(localhost, wb, 10, HTTP_ACCESS_ALL, cmd, + true, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + pld, source); + if(!DYNCFG_RESP_SUCCESS(rc)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: failed to run: %s; returned code %d", cmd, rc); + dyncfg_unittest_register_error(NULL, NULL); + } + + dyncfg_unittest_check(t, c, cmd, true); + + if(rc == HTTP_RESP_OK && t->type == DYNCFG_TYPE_TEMPLATE) { + if(c == DYNCFG_CMD_ADD) { + char buf2[strlen(id) + strlen(add_name) + 2]; + snprintfz(buf2, sizeof(buf2), "%s:%s", id, add_name); + TEST *tt = dictionary_get(dyncfg_unittest_data.nodes, buf2); + if (!tt) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG UNITTEST: failed to find newly added id '%s' of command: %s", + id, cmd); + dyncfg_unittest_register_error(NULL, NULL); + } + dyncfg_unittest_check(tt, c, cmd, true); + } + else { + STRING *template = string_strdupz(t->id); + DYNCFG *df; + dfe_start_read(dyncfg_globals.nodes, df) { + if(df->type == DYNCFG_TYPE_JOB && df->template == template) { + TEST *tt = dictionary_get(dyncfg_unittest_data.nodes, df_dfe.name); + if (!tt) { + nd_log(NDLS_DAEMON, NDLP_ERR, + "DYNCFG UNITTEST: failed to find id '%s' while running command: %s", df_dfe.name, cmd); + dyncfg_unittest_register_error(NULL, NULL); + } + else { + if(c == DYNCFG_CMD_DISABLE) + tt->expected.enabled = false; + if(c == DYNCFG_CMD_ENABLE) + tt->expected.enabled = true; + dyncfg_unittest_check(tt, c, cmd, true); + } + } + } + dfe_done(df); + string_freez(template); + } + } + + return rc; +} + +static void dyncfg_unittest_cleanup_files(void) { + char path[FILENAME_MAX]; + snprintfz(path, sizeof(path) - 1, "%s/%s", netdata_configured_varlib_dir, "config"); + + DIR *dir = opendir(path); + if (!dir) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG UNITTEST: cannot open directory '%s'", path); + return; + } + + struct dirent *entry; + char filename[FILENAME_MAX + sizeof(entry->d_name)]; + while ((entry = readdir(dir)) != NULL) { + if ((entry->d_type == DT_REG || entry->d_type == DT_LNK) && strstartswith(entry->d_name, "unittest:") && strendswith(entry->d_name, ".dyncfg")) { + snprintf(filename, sizeof(filename), "%s/%s", path, entry->d_name); + nd_log(NDLS_DAEMON, NDLP_INFO, "DYNCFG UNITTEST: deleting file '%s'", filename); + unlink(filename); + } + } + + closedir(dir); +} + +static TEST *dyncfg_unittest_add(TEST t) { + dyncfg_unittest_reset(); + + TEST *ret = dictionary_set(dyncfg_unittest_data.nodes, t.id, &t, sizeof(t)); + + if(!dyncfg_add_low_level(localhost, t.id, "/unittests", DYNCFG_STATUS_RUNNING, t.type, + t.source_type, t.source, + t.cmds, 0, 0, t.sync, + HTTP_ACCESS_NONE, HTTP_ACCESS_NONE, + dyncfg_unittest_execute_cb, ret)) { + dyncfg_unittest_register_error(t.id, "addition of job failed"); + } + + dyncfg_unittest_check(ret, DYNCFG_CMD_NONE, "plugin create", t.type != DYNCFG_TYPE_TEMPLATE); + + return ret; +} + +void dyncfg_unittest_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + TEST *v = value; + freez((void *)v->id); + freez((void *)v->source); +} + +int dyncfg_unittest(void) { + dyncfg_unittest_data.nodes = dictionary_create(DICT_OPTION_NONE); + dictionary_register_delete_callback(dyncfg_unittest_data.nodes, dyncfg_unittest_delete_cb, NULL); + + dyncfg_unittest_cleanup_files(); + rrd_functions_inflight_init(); + dyncfg_init(false); + + // ------------------------------------------------------------------------ + // create the thread for testing async communication + + ND_THREAD *thread = nd_thread_create("unittest", NETDATA_THREAD_OPTION_JOINABLE, dyncfg_unittest_thread_action, NULL); + + // ------------------------------------------------------------------------ + // single + + TEST *single1 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:single1"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_SINGLE, + .cmds = DYNCFG_CMD_GET | DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = true, + .current = { + .enabled = true, + }, + .expected = { + .enabled = true, + } + }); (void)single1; + + TEST *single2 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:single2"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_SINGLE, + .cmds = DYNCFG_CMD_GET | DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = false, + .current = { + .enabled = true, + }, + .expected = { + .enabled = true, + } + }); (void)single2; + + // ------------------------------------------------------------------------ + // template + + TEST *template1 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_TEMPLATE, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_ADD | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = true, + }); (void)template1; + + TEST *template2 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_TEMPLATE, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_ADD | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, + .sync = false, + }); (void)template2; + + // ------------------------------------------------------------------------ + // job + + TEST *user1 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1:user1"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = true, + .current = { + .enabled = true, + }, + .expected = { + .enabled = true, + } + }); (void)user1; + + TEST *user2 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2:user2"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = false, + .expected = { + .enabled = true, + } + }); (void)user2; + + // ------------------------------------------------------------------------ + + int rc; (void)rc; + BUFFER *wb = buffer_create(0, NULL); + + // ------------------------------------------------------------------------ + // dynamic job + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 add dyn1", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 add dyn2", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 add dyn3", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 add dyn4", wb, "{\"double\":3.14,\"boolean\":true}", LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // saving of user_disabled + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:single1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:single2 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:user1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:user2 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn2 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn3 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn4 disable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // enabling + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:single1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:single2 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:user1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:user2 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:dyn2 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn3 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:dyn4 enable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // disabling template + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 disable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // enabling template + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 enable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 enable", wb, NULL, LINE_FILE_STR); + + // ------------------------------------------------------------------------ + // adding job on disabled template + + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 disable", wb, NULL, LINE_FILE_STR); + dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 disable", wb, NULL, LINE_FILE_STR); + + TEST *user3 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1:user3"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = true, + .expected = { + .enabled = false, + } + }); (void)user3; + + TEST *user4 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2:user4"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = false, + .expected = { + .enabled = false, + } + }); (void)user4; + + TEST *user5 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:sync:template1:user5"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = true, + .expected = { + .enabled = false, + } + }); (void)user5; + + TEST *user6 = dyncfg_unittest_add((TEST){ + .id = strdupz("unittest:async:template2:user6"), + .source = strdupz(LINE_FILE_STR), + .type = DYNCFG_TYPE_JOB, + .cmds = DYNCFG_CMD_SCHEMA | DYNCFG_CMD_UPDATE | DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE, + .source_type = DYNCFG_SOURCE_TYPE_USER, + .sync = false, + .expected = { + .enabled = false, + } + }); (void)user6; + +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1:user5 disable", wb, NULL, LINE_FILE_STR); +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2:user6 disable", wb, NULL, LINE_FILE_STR); + +// // ------------------------------------------------------------------------ +// // enable template with disabled jobs +// +// user3->expected.enabled = true; +// user5->expected.enabled = false; +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:sync:template1 enable", wb, NULL, LINE_FILE_STR); +// +// user4->expected.enabled = true; +// user6->expected.enabled = false; +// dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " unittest:async:template2 enable", wb, NULL, LINE_FILE_STR); + + +// // ------------------------------------------------------------------------ +// +// rc = dyncfg_unittest_run(PLUGINSD_FUNCTION_CONFIG " tree", wb, NULL); +// if(rc == HTTP_RESP_OK) +// fprintf(stderr, "%s\n", buffer_tostring(wb)); + + nd_thread_signal_cancel(thread); + nd_thread_join(thread); + dyncfg_unittest_cleanup_files(); + dictionary_destroy(dyncfg_unittest_data.nodes); + buffer_free(wb); + return __atomic_load_n(&dyncfg_unittest_data.errors, __ATOMIC_RELAXED) > 0 ? 1 : 0; +} diff --git a/src/daemon/config/dyncfg.c b/src/daemon/config/dyncfg.c new file mode 100644 index 000000000..2f484d1ed --- /dev/null +++ b/src/daemon/config/dyncfg.c @@ -0,0 +1,454 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dyncfg-internals.h" +#include "dyncfg.h" + +struct dyncfg_globals dyncfg_globals = { 0 }; + +RRDHOST *dyncfg_rrdhost_by_uuid(ND_UUID *uuid) { + char uuid_str[UUID_STR_LEN]; + uuid_unparse_lower(uuid->uuid, uuid_str); + + RRDHOST *host = rrdhost_find_by_guid(uuid_str); + if(!host) + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: cannot find host with UUID '%s'", uuid_str); + + return host; +} + +RRDHOST *dyncfg_rrdhost(DYNCFG *df) { + return dyncfg_rrdhost_by_uuid(&df->host_uuid); +} + +void dyncfg_cleanup(DYNCFG *v) { + string_freez(v->dyncfg.source); + v->dyncfg.source = NULL; + + buffer_free(v->dyncfg.payload); + v->dyncfg.payload = NULL; + + string_freez(v->path); + v->path = NULL; + + string_freez(v->current.source); + v->current.source = NULL; + + string_freez(v->function); + v->function = NULL; + + string_freez(v->template); + v->template = NULL; +} + +static void dyncfg_normalize(DYNCFG *df) { + usec_t now_ut = now_realtime_usec(); + + if(!df->current.created_ut) + df->current.created_ut = now_ut; + + if(!df->current.modified_ut) + df->current.modified_ut = now_ut; +} + +static void dyncfg_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + DYNCFG *df = value; + dyncfg_cleanup(df); +} + +static void dyncfg_insert_cb(const DICTIONARY_ITEM *item, void *value, void *data __maybe_unused) { + DYNCFG *df = value; + dyncfg_normalize(df); + + const char *id = dictionary_acquired_item_name(item); + char buf[strlen(id) + 20]; + snprintfz(buf, sizeof(buf), PLUGINSD_FUNCTION_CONFIG " %s", id); + df->function = string_strdupz(buf); + + if(df->type == DYNCFG_TYPE_JOB && !df->template) { + const char *last_colon = strrchr(id, ':'); + if(last_colon) + df->template = string_strndupz(id, last_colon - id); + else + nd_log(NDLS_DAEMON, NDLP_WARNING, + "DYNCFG: id '%s' is a job, but does not contain a colon to find the template", id); + } +} + +static void dyncfg_react_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + DYNCFG *df = value; (void)df; + ; +} + +static bool dyncfg_conflict_cb(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value, void *data) { + bool *overwrite_cb_ptr = data; + bool overwrite_cb = (overwrite_cb_ptr && *overwrite_cb_ptr); + + DYNCFG *v = old_value; + DYNCFG *nv = new_value; + + size_t changes = 0; + + dyncfg_normalize(nv); + + if(!UUIDeq(v->host_uuid, nv->host_uuid)) { + SWAP(v->host_uuid, nv->host_uuid); + changes++; + } + + if(v->path != nv->path) { + SWAP(v->path, nv->path); + changes++; + } + + if(v->cmds != nv->cmds) { + SWAP(v->cmds, nv->cmds); + changes++; + } + + if(v->type != nv->type) { + SWAP(v->type, nv->type); + changes++; + } + + if(v->view_access != nv->view_access) { + SWAP(v->view_access, nv->view_access); + changes++; + } + + if(v->edit_access != nv->edit_access) { + SWAP(v->edit_access, nv->edit_access); + changes++; + } + + if(v->current.status != nv->current.status) { + SWAP(v->current.status, nv->current.status); + changes++; + } + + if (v->current.source_type != nv->current.source_type) { + SWAP(v->current.source_type, nv->current.source_type); + changes++; + } + + if (v->current.source != nv->current.source) { + SWAP(v->current.source, nv->current.source); + changes++; + } + + if(nv->current.created_ut < v->current.created_ut) { + SWAP(v->current.created_ut, nv->current.created_ut); + changes++; + } + + if(nv->current.modified_ut > v->current.modified_ut) { + SWAP(v->current.modified_ut, nv->current.modified_ut); + changes++; + } + + if(!v->execute_cb || (overwrite_cb && nv->execute_cb && (v->execute_cb != nv->execute_cb || v->execute_cb_data != nv->execute_cb_data))) { + v->sync = nv->sync, + v->execute_cb = nv->execute_cb; + v->execute_cb_data = nv->execute_cb_data; + changes++; + } + + dyncfg_cleanup(nv); + + return changes > 0; +} + +// ---------------------------------------------------------------------------- + +void dyncfg_init_low_level(bool load_saved) { + if(!dyncfg_globals.nodes) { + dyncfg_globals.nodes = dictionary_create_advanced(DICT_OPTION_FIXED_SIZE | DICT_OPTION_DONT_OVERWRITE_VALUE, NULL, sizeof(DYNCFG)); + dictionary_register_insert_callback(dyncfg_globals.nodes, dyncfg_insert_cb, NULL); + dictionary_register_react_callback(dyncfg_globals.nodes, dyncfg_react_cb, NULL); + dictionary_register_conflict_callback(dyncfg_globals.nodes, dyncfg_conflict_cb, NULL); + dictionary_register_delete_callback(dyncfg_globals.nodes, dyncfg_delete_cb, NULL); + + char path[PATH_MAX]; + snprintfz(path, sizeof(path), "%s/%s", netdata_configured_varlib_dir, "config"); + + if(mkdir(path, 0755) == -1) { + if(errno != EEXIST) + nd_log(NDLS_DAEMON, NDLP_CRIT, "DYNCFG: failed to create dynamic configuration directory '%s'", path); + } + + dyncfg_globals.dir = strdupz(path); + + if(load_saved) + dyncfg_load_all(); + } +} + +// ---------------------------------------------------------------------------- + +const DICTIONARY_ITEM *dyncfg_add_internal(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, + const char *source, DYNCFG_CMDS cmds, + usec_t created_ut, usec_t modified_ut, + bool sync, HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data, + bool overwrite_cb) { + DYNCFG tmp = { + .host_uuid = uuid2UUID(host->host_uuid), + .path = string_strdupz(path), + .cmds = cmds, + .type = type, + .view_access = view_access, + .edit_access = edit_access, + .current = { + .status = status, + .source_type = source_type, + .source = string_strdupz(source), + .created_ut = created_ut, + .modified_ut = modified_ut, + }, + .sync = sync, + .dyncfg = { 0 }, + .execute_cb = execute_cb, + .execute_cb_data = execute_cb_data, + }; + + return dictionary_set_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1, &tmp, sizeof(tmp), &overwrite_cb); +} + +static void dyncfg_send_updates(const char *id) { + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dyncfg_globals.nodes, id, -1); + if(!item) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: asked to update plugin for configuration '%s', but it is not found.", id); + return; + } + + DYNCFG *df = dictionary_acquired_item_value(item); + + if(df->type == DYNCFG_TYPE_SINGLE || df->type == DYNCFG_TYPE_JOB) { + if (df->cmds & DYNCFG_CMD_UPDATE && df->dyncfg.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->dyncfg.payload && buffer_strlen(df->dyncfg.payload)) + dyncfg_echo_update(item, df, id); + } + else if(df->type == DYNCFG_TYPE_TEMPLATE && (df->cmds & DYNCFG_CMD_ADD)) { + STRING *template = string_strdupz(id); + + size_t len = strlen(id); + DYNCFG *df_job; + dfe_start_reentrant(dyncfg_globals.nodes, df_job) { + const char *id_template = df_job_dfe.name; + if(df_job->type == DYNCFG_TYPE_JOB && // it is a job + df_job->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && // it is dynamically configured + df_job->template == template && // it has the same template name + strncmp(id_template, id, len) == 0 && // the template name matches (redundant) + id_template[len] == ':' && // immediately after the template there is ':' + id_template[len + 1]) { // and there is something else after the ':' + dyncfg_echo_add(item, df_job_dfe.item, df, df_job, id, &id_template[len + 1]); + } + } + dfe_done(df_job); + + string_freez(template); + } + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); +} + +bool dyncfg_is_user_disabled(const char *id) { + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(!item) + return false; + + DYNCFG *df = dictionary_acquired_item_value(item); + bool ret = df->dyncfg.user_disabled; + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return ret; +} + +bool dyncfg_job_has_registered_template(const char *id) { + char buf[strlen(id) + 1]; + memcpy(buf, id, sizeof(buf)); + char *colon = strrchr(buf, ':'); + if(!colon) + return false; + + *colon = '\0'; + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, buf); + if(!item) + return false; + + DYNCFG *df = dictionary_acquired_item_value(item); + bool ret = df->type == DYNCFG_TYPE_TEMPLATE; + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + return ret; +} + +bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, + DYNCFG_CMDS cmds, usec_t created_ut, usec_t modified_ut, bool sync, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data) { + + if(view_access == HTTP_ACCESS_NONE) + view_access = HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_VIEW_AGENT_CONFIG; + + if(edit_access == HTTP_ACCESS_NONE) + edit_access = HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_EDIT_AGENT_CONFIG | HTTP_ACCESS_COMMERCIAL_SPACE; + + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return false; + } + + if(type == DYNCFG_TYPE_JOB && !dyncfg_job_has_registered_template(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: job id '%s' does not have a registered template. Ignoring dynamic configuration for it.", id); + return false; + } + + DYNCFG_CMDS old_cmds = cmds; + + // all configurations support schema + cmds |= DYNCFG_CMD_SCHEMA; + + // if there is either enable or disable, both are supported + if(cmds & (DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE)) + cmds |= DYNCFG_CMD_ENABLE | DYNCFG_CMD_DISABLE; + + // add + if(type == DYNCFG_TYPE_TEMPLATE) { + // templates must always support "add" + cmds |= DYNCFG_CMD_ADD; + } + else { + // only templates can have "add" + cmds &= ~DYNCFG_CMD_ADD; + } + + // remove + if(source_type != DYNCFG_SOURCE_TYPE_DYNCFG || type != DYNCFG_TYPE_JOB) { + // remove is only available for dyncfg jobs + cmds &= ~DYNCFG_CMD_REMOVE; + } + + // data + if(type == DYNCFG_TYPE_TEMPLATE) { + // templates do not have data + cmds &= ~(DYNCFG_CMD_GET | DYNCFG_CMD_UPDATE); + } + + if(cmds != old_cmds) { + CLEAN_BUFFER *t = buffer_create(1024, NULL); + buffer_sprintf(t, "DYNCFG: id '%s' was declared with cmds: ", id); + dyncfg_cmds2buffer(old_cmds, t); + buffer_strcat(t, ", but they have sanitized to: "); + dyncfg_cmds2buffer(cmds, t); + nd_log(NDLS_DAEMON, NDLP_NOTICE, "%s", buffer_tostring(t)); + } + + const DICTIONARY_ITEM *item = dyncfg_add_internal(host, id, path, status, type, source_type, source, cmds, + created_ut, modified_ut, sync, view_access, edit_access, + execute_cb, execute_cb_data, true); + DYNCFG *df = dictionary_acquired_item_value(item); + +// if(df->source_type == DYNCFG_SOURCE_TYPE_DYNCFG && !df->saves) +// nd_log(NDLS_DAEMON, NDLP_WARNING, "DYNCFG: configuration '%s' is created with source type dyncfg, but we don't have a saved configuration for it", id); + + rrd_collector_started(); + rrd_function_add( + host, + NULL, + string2str(df->function), + 120, + 1000, + "Dynamic configuration", + "config", + (view_access & edit_access), + sync, + dyncfg_function_intercept_cb, + NULL); + + if(df->type != DYNCFG_TYPE_TEMPLATE && (df->cmds & (DYNCFG_CMD_ENABLE|DYNCFG_CMD_DISABLE))) { + DYNCFG_CMDS status_to_send_to_plugin = + (df->dyncfg.user_disabled || df->current.status == DYNCFG_STATUS_DISABLED) ? DYNCFG_CMD_DISABLE : DYNCFG_CMD_ENABLE; + + if (status_to_send_to_plugin == DYNCFG_CMD_ENABLE && dyncfg_is_user_disabled(string2str(df->template))) + status_to_send_to_plugin = DYNCFG_CMD_DISABLE; + + dyncfg_echo(item, df, id, status_to_send_to_plugin); + } + + if(!(df->current.source_type == DYNCFG_SOURCE_TYPE_DYNCFG && df->type == DYNCFG_TYPE_JOB)) + dyncfg_send_updates(id); + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + + return true; +} + +void dyncfg_del_low_level(RRDHOST *host, const char *id) { + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return; + } + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + rrd_function_del(host, NULL, string2str(df->function)); + + bool garbage_collect = false; + if(df->dyncfg.saves == 0) { + dictionary_del(dyncfg_globals.nodes, id); + garbage_collect = true; + } + + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + + if(garbage_collect) + dictionary_garbage_collect(dyncfg_globals.nodes); + } +} + +void dyncfg_status_low_level(RRDHOST *host __maybe_unused, const char *id, DYNCFG_STATUS status) { + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return; + } + + if(status == DYNCFG_STATUS_NONE) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DYNCFG: status provided to id '%s' is invalid. Ignoring it.", id); + return; + } + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_globals.nodes, id); + if(item) { + DYNCFG *df = dictionary_acquired_item_value(item); + df->current.status = status; + dictionary_acquired_item_release(dyncfg_globals.nodes, item); + } +} + +// ---------------------------------------------------------------------------- + +void dyncfg_add_streaming(BUFFER *wb) { + // when sending config functions to parents, we send only 1 function called 'config'; + // the parent will send the command to the child, and the child will validate it; + // this way the parent does not need to receive removals of config functions; + + buffer_sprintf(wb + , PLUGINSD_KEYWORD_FUNCTION " GLOBAL " PLUGINSD_FUNCTION_CONFIG " %d \"%s\" \"%s\" "HTTP_ACCESS_FORMAT" %d\n" + , 120 + , "Dynamic configuration" + , "config" + , (unsigned)HTTP_ACCESS_ANONYMOUS_DATA + , 1000 + ); +} + +bool dyncfg_available_for_rrdhost(RRDHOST *host) { + if(host == localhost || rrdhost_option_check(host, RRDHOST_OPTION_VIRTUAL_HOST)) + return true; + + return rrd_function_available(host, PLUGINSD_FUNCTION_CONFIG); +} + +// ---------------------------------------------------------------------------- + diff --git a/src/daemon/config/dyncfg.h b/src/daemon/config/dyncfg.h new file mode 100644 index 000000000..539eddbfb --- /dev/null +++ b/src/daemon/config/dyncfg.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DYNCFG_H +#define NETDATA_DYNCFG_H + +#include "../common.h" +#include "database/rrd.h" +#include "database/rrdfunctions.h" + +void dyncfg_add_streaming(BUFFER *wb); +bool dyncfg_available_for_rrdhost(RRDHOST *host); +void dyncfg_host_init(RRDHOST *host); + +// low-level API used by plugins.d and high-level API +bool dyncfg_add_low_level(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, + DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, + usec_t created_ut, usec_t modified_ut, bool sync, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data); +void dyncfg_del_low_level(RRDHOST *host, const char *id); +void dyncfg_status_low_level(RRDHOST *host, const char *id, DYNCFG_STATUS status); +void dyncfg_init_low_level(bool load_saved); + +// high-level API for internal modules +bool dyncfg_add(RRDHOST *host, const char *id, const char *path, DYNCFG_STATUS status, DYNCFG_TYPE type, + DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + dyncfg_cb_t cb, void *data); +void dyncfg_del(RRDHOST *host, const char *id); +void dyncfg_status(RRDHOST *host, const char *id, DYNCFG_STATUS status); + +void dyncfg_init(bool load_saved); + +#endif //NETDATA_DYNCFG_H diff --git a/daemon/daemon.c b/src/daemon/daemon.c index 433fa0373..f77b748a8 100644 --- a/daemon/daemon.c +++ b/src/daemon/daemon.c @@ -60,7 +60,7 @@ static void fix_directory_file_permissions(const char *dirname, uid_t uid, gid_t closedir(dir); } -void change_dir_ownership(const char *dir, uid_t uid, gid_t gid, bool recursive) +static void change_dir_ownership(const char *dir, uid_t uid, gid_t gid, bool recursive) { if (chown(dir, uid, gid) == -1) netdata_log_error("Cannot chown directory '%s' to %u:%u", dir, (unsigned int)uid, (unsigned int)gid); @@ -68,7 +68,7 @@ void change_dir_ownership(const char *dir, uid_t uid, gid_t gid, bool recursive) fix_directory_file_permissions(dir, uid, gid, recursive); } -void clean_directory(char *dirname) +static void clean_directory(char *dirname) { DIR *dir = opendir(dirname); if(!dir) return; @@ -84,7 +84,7 @@ void clean_directory(char *dirname) closedir(dir); } -void prepare_required_directories(uid_t uid, gid_t gid) { +static void prepare_required_directories(uid_t uid, gid_t gid) { change_dir_ownership(netdata_configured_cache_dir, uid, gid, true); change_dir_ownership(netdata_configured_varlib_dir, uid, gid, false); change_dir_ownership(netdata_configured_lock_dir, uid, gid, false); @@ -98,7 +98,7 @@ void prepare_required_directories(uid_t uid, gid_t gid) { clean_directory(netdata_configured_lock_dir); } -int become_user(const char *username, int pid_fd) { +static int become_user(const char *username, int pid_fd) { int am_i_root = (getuid() == 0)?1:0; struct passwd *pw = getpwnam(username); @@ -121,11 +121,7 @@ int become_user(const char *username, int pid_fd) { gid_t *supplementary_groups = NULL; if(ngroups > 0) { supplementary_groups = mallocz(sizeof(gid_t) * ngroups); -#ifdef __APPLE__ - if(getgrouplist(username, gid, (int *)supplementary_groups, &ngroups) == -1) { -#else - if(getgrouplist(username, gid, supplementary_groups, &ngroups) == -1) { -#endif /* __APPLE__ */ + if(os_getgrouplist(username, gid, supplementary_groups, &ngroups) == -1) { if(am_i_root) netdata_log_error("Cannot get supplementary groups of user '%s'.", username); @@ -149,20 +145,12 @@ int become_user(const char *username, int pid_fd) { if(supplementary_groups) freez(supplementary_groups); -#ifdef __APPLE__ - if(setregid(gid, gid) != 0) { -#else - if(setresgid(gid, gid, gid) != 0) { -#endif /* __APPLE__ */ + if(os_setresgid(gid, gid, gid) != 0) { netdata_log_error("Cannot switch to user's %s group (gid: %u).", username, gid); return -1; } -#ifdef __APPLE__ - if(setreuid(uid, uid) != 0) { -#else - if(setresuid(uid, uid, uid) != 0) { -#endif /* __APPLE__ */ + if(os_setresuid(uid, uid, uid) != 0) { netdata_log_error("Cannot switch to user %s (uid: %u).", username, uid); return -1; } @@ -218,7 +206,7 @@ static void oom_score_adj(void) { // check netdata.conf configuration s = config_get(CONFIG_SECTION_GLOBAL, "OOM score", s); - if(s && *s && (isdigit(*s) || *s == '-' || *s == '+')) + if(s && *s && (isdigit((uint8_t)*s) || *s == '-' || *s == '+')) wanted_score = atoll(s); else if(s && !strcmp(s, "keep")) { netdata_log_info("Out-Of-Memory (OOM) kept as-is (running with %d)", (int) old_score); @@ -245,7 +233,7 @@ static void oom_score_adj(void) { } int written = 0; - int fd = open("/proc/self/oom_score_adj", O_WRONLY); + int fd = open("/proc/self/oom_score_adj", O_WRONLY | O_CLOEXEC); if(fd != -1) { snprintfz(buf, sizeof(buf) - 1, "%d", (int)wanted_score); ssize_t len = strlen(buf); @@ -276,7 +264,7 @@ static void process_nice_level(void) { else netdata_log_debug(D_SYSTEM, "Set netdata nice level to %d.", nice_level); #endif // HAVE_NICE -}; +} #define SCHED_FLAG_NONE 0x00 #define SCHED_FLAG_PRIORITY_CONFIGURABLE 0x01 // the priority is user configurable @@ -478,7 +466,7 @@ int become_daemon(int dont_fork, const char *user) // generate our pid file int pidfd = -1; if(pidfile[0]) { - pidfd = open(pidfile, O_WRONLY | O_CREAT, 0644); + pidfd = open(pidfile, O_WRONLY | O_CREAT | O_CLOEXEC, 0644); if(pidfd >= 0) { if(ftruncate(pidfd, 0) != 0) netdata_log_error("Cannot truncate pidfile '%s'.", pidfile); diff --git a/src/daemon/daemon.h b/src/daemon/daemon.h new file mode 100644 index 000000000..1f8837fd6 --- /dev/null +++ b/src/daemon/daemon.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DAEMON_H +#define NETDATA_DAEMON_H 1 + +int become_daemon(int dont_fork, const char *user); + +void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data); + +void get_netdata_execution_path(void); + +extern char pidfile[]; +extern char netdata_exe_file[]; +extern char netdata_exe_path[]; + +#endif /* NETDATA_DAEMON_H */ diff --git a/daemon/event_loop.c b/src/daemon/event_loop.c index 93bac97d0..d1908ec15 100644 --- a/daemon/event_loop.c +++ b/src/daemon/event_loop.c @@ -62,5 +62,5 @@ void register_libuv_worker_jobs() { char buf[NETDATA_THREAD_TAG_MAX + 1]; snprintfz(buf, NETDATA_THREAD_TAG_MAX, "UV_WORKER[%d]", worker_id); - uv_thread_set_name_np(pthread_self(), buf); + uv_thread_set_name_np(buf); } diff --git a/daemon/event_loop.h b/src/daemon/event_loop.h index c1821c646..c1821c646 100644 --- a/daemon/event_loop.h +++ b/src/daemon/event_loop.h diff --git a/daemon/get-kubernetes-labels.sh.in b/src/daemon/get-kubernetes-labels.sh.in index bc82c2aee..bc82c2aee 100644..100755 --- a/daemon/get-kubernetes-labels.sh.in +++ b/src/daemon/get-kubernetes-labels.sh.in diff --git a/daemon/global_statistics.c b/src/daemon/global_statistics.c index 9fb1df5f8..429f68c0d 100644 --- a/daemon/global_statistics.c +++ b/src/daemon/global_statistics.c @@ -858,7 +858,7 @@ static void global_statistics_charts(void) { // ---------------------------------------------------------------- #ifdef ENABLE_DBENGINE - if (tier_page_type[0] == PAGE_GORILLA_METRICS) + if (tier_page_type[0] == RRDENG_PAGE_TYPE_GORILLA_32BIT) { static RRDSET *st_tier0_gorilla_pages = NULL; static RRDDIM *rd_num_gorilla_pages = NULL; @@ -887,7 +887,7 @@ static void global_statistics_charts(void) { rrdset_done(st_tier0_gorilla_pages); } - if (tier_page_type[0] == PAGE_GORILLA_METRICS) + if (tier_page_type[0] == RRDENG_PAGE_TYPE_GORILLA_32BIT) { static RRDSET *st_tier0_compression_info = NULL; @@ -2560,17 +2560,15 @@ static void dbengine2_statistics_charts(void) { /* get localhost's DB engine's statistics for each tier */ for(size_t tier = 0; tier < storage_tiers ;tier++) { if(host->db[tier].mode != RRD_MEMORY_MODE_DBENGINE) continue; - if(!host->db[tier].instance) continue; + if(!host->db[tier].si) continue; - if(is_storage_engine_shared(host->db[tier].instance)) { - if(counted_multihost_db[tier]) - continue; - else - counted_multihost_db[tier] = 1; - } + if(counted_multihost_db[tier]) + continue; + else + counted_multihost_db[tier] = 1; ++dbengine_contexts; - rrdeng_get_37_statistics((struct rrdengine_instance *)host->db[tier].instance, local_stats_array); + rrdeng_get_37_statistics((struct rrdengine_instance *)host->db[tier].si, local_stats_array); for (i = 0; i < RRDENG_NR_STATS; ++i) { /* aggregate statistics across hosts */ stats_array[i] += local_stats_array[i]; @@ -2578,7 +2576,7 @@ static void dbengine2_statistics_charts(void) { } } } - rrd_unlock(); + rrd_rdunlock(); if (dbengine_contexts) { /* deduplicate global statistics by getting the ones from the last context */ @@ -3522,6 +3520,7 @@ static struct worker_utilization all_workers_utilization[] = { { .name = "STATSD", .family = "workers plugin statsd", .priority = 1000000 }, { .name = "STATSDFLUSH", .family = "workers plugin statsd flush", .priority = 1000000 }, { .name = "PROC", .family = "workers plugin proc", .priority = 1000000 }, + { .name = "WIN", .family = "workers plugin windows", .priority = 1000000 }, { .name = "NETDEV", .family = "workers plugin proc netdev", .priority = 1000000 }, { .name = "FREEBSD", .family = "workers plugin freebsd", .priority = 1000000 }, { .name = "MACOS", .family = "workers plugin macos", .priority = 1000000 }, @@ -4157,17 +4156,13 @@ static void worker_utilization_charts(void) { for(int i = 0; all_workers_utilization[i].name ;i++) { workers_utilization_reset_statistics(&all_workers_utilization[i]); - netdata_thread_disable_cancelability(); workers_foreach(all_workers_utilization[i].name, worker_utilization_charts_callback, &all_workers_utilization[i]); - netdata_thread_enable_cancelability(); // skip the first iteration, so that we don't accumulate startup utilization to our charts if(likely(iterations > 1)) workers_utilization_update_chart(&all_workers_utilization[i]); - netdata_thread_disable_cancelability(); workers_threads_cleanup(&all_workers_utilization[i]); - netdata_thread_enable_cancelability(); } workers_total_cpu_utilization_chart(); @@ -4217,13 +4212,14 @@ static void global_statistics_register_workers(void) { worker_register_job_name(WORKER_JOB_SQLITE3, "sqlite3"); } -static void global_statistics_cleanup(void *ptr) +static void global_statistics_cleanup(void *pptr) { - worker_unregister(); + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + worker_unregister(); netdata_log_info("cleaning up..."); static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; @@ -4231,9 +4227,9 @@ static void global_statistics_cleanup(void *ptr) void *global_statistics_main(void *ptr) { - global_statistics_register_workers(); + CLEANUP_FUNCTION_REGISTER(global_statistics_cleanup) cleanup_ptr = ptr; - netdata_thread_cleanup_push(global_statistics_cleanup, ptr); + global_statistics_register_workers(); int update_every = (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); @@ -4282,7 +4278,6 @@ void *global_statistics_main(void *ptr) #endif } - netdata_thread_cleanup_pop(1); return NULL; } @@ -4290,15 +4285,16 @@ void *global_statistics_main(void *ptr) // --------------------------------------------------------------------------------------------------------------------- // workers thread -static void global_statistics_workers_cleanup(void *ptr) +static void global_statistics_workers_cleanup(void *pptr) { - worker_unregister(); + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; netdata_log_info("cleaning up..."); + worker_unregister(); worker_utilization_finish(); static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; @@ -4306,41 +4302,41 @@ static void global_statistics_workers_cleanup(void *ptr) void *global_statistics_workers_main(void *ptr) { - global_statistics_register_workers(); + CLEANUP_FUNCTION_REGISTER(global_statistics_workers_cleanup) cleanup_ptr = ptr; - netdata_thread_cleanup_push(global_statistics_workers_cleanup, ptr); + global_statistics_register_workers(); - int update_every = - (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); - if (update_every < localhost->rrd_update_every) - update_every = localhost->rrd_update_every; + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; - usec_t step = update_every * USEC_PER_SEC; - heartbeat_t hb; - heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); - while (service_running(SERVICE_COLLECTORS)) { - worker_is_idle(); - heartbeat_next(&hb, step); + while (service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + heartbeat_next(&hb, step); - worker_is_busy(WORKER_JOB_WORKERS); - worker_utilization_charts(); - } + worker_is_busy(WORKER_JOB_WORKERS); + worker_utilization_charts(); + } - netdata_thread_cleanup_pop(1); return NULL; } // --------------------------------------------------------------------------------------------------------------------- // sqlite3 thread -static void global_statistics_sqlite3_cleanup(void *ptr) +static void global_statistics_sqlite3_cleanup(void *pptr) { - worker_unregister(); + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; + worker_unregister(); netdata_log_info("cleaning up..."); static_thread->enabled = NETDATA_MAIN_THREAD_EXITED; @@ -4348,28 +4344,27 @@ static void global_statistics_sqlite3_cleanup(void *ptr) void *global_statistics_sqlite3_main(void *ptr) { - global_statistics_register_workers(); + CLEANUP_FUNCTION_REGISTER(global_statistics_sqlite3_cleanup) cleanup_ptr = ptr; - netdata_thread_cleanup_push(global_statistics_sqlite3_cleanup, ptr); + global_statistics_register_workers(); - int update_every = - (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); - if (update_every < localhost->rrd_update_every) - update_every = localhost->rrd_update_every; + int update_every = + (int)config_get_number(CONFIG_SECTION_GLOBAL_STATISTICS, "update every", localhost->rrd_update_every); + if (update_every < localhost->rrd_update_every) + update_every = localhost->rrd_update_every; - usec_t step = update_every * USEC_PER_SEC; - heartbeat_t hb; - heartbeat_init(&hb); + usec_t step = update_every * USEC_PER_SEC; + heartbeat_t hb; + heartbeat_init(&hb); - while (service_running(SERVICE_COLLECTORS)) { - worker_is_idle(); - heartbeat_next(&hb, step); + while (service_running(SERVICE_COLLECTORS)) { + worker_is_idle(); + heartbeat_next(&hb, step); - worker_is_busy(WORKER_JOB_SQLITE3); - sqlite3_statistics_charts(); - } + worker_is_busy(WORKER_JOB_SQLITE3); + sqlite3_statistics_charts(); + } - netdata_thread_cleanup_pop(1); return NULL; } diff --git a/daemon/global_statistics.h b/src/daemon/global_statistics.h index 44717c6cf..44717c6cf 100644 --- a/daemon/global_statistics.h +++ b/src/daemon/global_statistics.h diff --git a/src/daemon/main.c b/src/daemon/main.c new file mode 100644 index 000000000..e2db02097 --- /dev/null +++ b/src/daemon/main.c @@ -0,0 +1,2370 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" +#include "buildinfo.h" +#include "daemon/watcher.h" +#include "static_threads.h" + +#include "database/engine/page_test.h" + +#ifdef ENABLE_SENTRY +#include "sentry-native/sentry-native.h" +#endif + +#if defined(ENV32BIT) +#warning COMPILING 32BIT NETDATA +#endif + +bool unittest_running = false; +int netdata_anonymous_statistics_enabled; + +int libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; +bool ieee754_doubles = false; +time_t netdata_start_time = 0; +struct netdata_static_thread *static_threads; +bool i_am_the_spawn_server = false; + +struct config netdata_config = { + .first_section = NULL, + .last_section = NULL, + .mutex = NETDATA_MUTEX_INITIALIZER, + .index = { + .avl_tree = { + .root = NULL, + .compar = appconfig_section_compare + }, + .rwlock = AVL_LOCK_INITIALIZER + } +}; + +typedef struct service_thread { + pid_t tid; + SERVICE_THREAD_TYPE type; + SERVICE_TYPE services; + char name[ND_THREAD_TAG_MAX + 1]; + bool stop_immediately; + bool cancelled; + + union { + ND_THREAD *netdata_thread; + uv_thread_t uv_thread; + }; + + force_quit_t force_quit_callback; + request_quit_t request_quit_callback; + void *data; +} SERVICE_THREAD; + +struct service_globals { + SPINLOCK lock; + Pvoid_t pid_judy; +} service_globals = { + .pid_judy = NULL, +}; + +SERVICE_THREAD *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused) { + SERVICE_THREAD *sth = NULL; + pid_t tid = gettid_cached(); + + spinlock_lock(&service_globals.lock); + Pvoid_t *PValue = JudyLIns(&service_globals.pid_judy, tid, PJE0); + if(!*PValue) { + sth = callocz(1, sizeof(SERVICE_THREAD)); + sth->tid = tid; + sth->type = thread_type; + sth->request_quit_callback = request_quit_callback; + sth->force_quit_callback = force_quit_callback; + sth->data = data; + *PValue = sth; + + switch(thread_type) { + default: + case SERVICE_THREAD_TYPE_NETDATA: + sth->netdata_thread = nd_thread_self(); + break; + + case SERVICE_THREAD_TYPE_EVENT_LOOP: + case SERVICE_THREAD_TYPE_LIBUV: + sth->uv_thread = uv_thread_self(); + break; + } + + const char *name = nd_thread_tag(); + if(!name) name = ""; + strncpyz(sth->name, name, sizeof(sth->name) - 1); + } + else { + sth = *PValue; + } + spinlock_unlock(&service_globals.lock); + + return sth; +} + +void service_exits(void) { + pid_t tid = gettid_cached(); + + spinlock_lock(&service_globals.lock); + Pvoid_t *PValue = JudyLGet(service_globals.pid_judy, tid, PJE0); + if(PValue) { + freez(*PValue); + JudyLDel(&service_globals.pid_judy, tid, PJE0); + } + spinlock_unlock(&service_globals.lock); +} + +bool service_running(SERVICE_TYPE service) { + static __thread SERVICE_THREAD *sth = NULL; + + if(unlikely(!sth)) + sth = service_register(SERVICE_THREAD_TYPE_NETDATA, NULL, NULL, NULL, false); + + sth->services |= service; + + bool cancelled = false; + if (sth->type == SERVICE_THREAD_TYPE_NETDATA) + cancelled = nd_thread_signaled_to_cancel(); + + return !sth->stop_immediately && !netdata_exit && !cancelled; +} + +void service_signal_exit(SERVICE_TYPE service) { + spinlock_lock(&service_globals.lock); + + Pvoid_t *PValue; + Word_t tid = 0; + bool first = true; + while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) { + SERVICE_THREAD *sth = *PValue; + + if((sth->services & service)) { + sth->stop_immediately = true; + + switch(sth->type) { + default: + case SERVICE_THREAD_TYPE_NETDATA: + nd_thread_signal_cancel(sth->netdata_thread); + break; + + case SERVICE_THREAD_TYPE_EVENT_LOOP: + case SERVICE_THREAD_TYPE_LIBUV: + break; + } + + if(sth->request_quit_callback) { + spinlock_unlock(&service_globals.lock); + sth->request_quit_callback(sth->data); + spinlock_lock(&service_globals.lock); + } + } + } + + spinlock_unlock(&service_globals.lock); +} + +static void service_to_buffer(BUFFER *wb, SERVICE_TYPE service) { + if(service & SERVICE_MAINTENANCE) + buffer_strcat(wb, "MAINTENANCE "); + if(service & SERVICE_COLLECTORS) + buffer_strcat(wb, "COLLECTORS "); + if(service & SERVICE_REPLICATION) + buffer_strcat(wb, "REPLICATION "); + if(service & ABILITY_DATA_QUERIES) + buffer_strcat(wb, "DATA_QUERIES "); + if(service & ABILITY_WEB_REQUESTS) + buffer_strcat(wb, "WEB_REQUESTS "); + if(service & SERVICE_WEB_SERVER) + buffer_strcat(wb, "WEB_SERVER "); + if(service & SERVICE_ACLK) + buffer_strcat(wb, "ACLK "); + if(service & SERVICE_HEALTH) + buffer_strcat(wb, "HEALTH "); + if(service & SERVICE_STREAMING) + buffer_strcat(wb, "STREAMING "); + if(service & ABILITY_STREAMING_CONNECTIONS) + buffer_strcat(wb, "STREAMING_CONNECTIONS "); + if(service & SERVICE_CONTEXT) + buffer_strcat(wb, "CONTEXT "); + if(service & SERVICE_ANALYTICS) + buffer_strcat(wb, "ANALYTICS "); + if(service & SERVICE_EXPORTERS) + buffer_strcat(wb, "EXPORTERS "); + if(service & SERVICE_HTTPD) + buffer_strcat(wb, "HTTPD "); +} + +static bool service_wait_exit(SERVICE_TYPE service, usec_t timeout_ut) { + BUFFER *service_list = buffer_create(1024, NULL); + BUFFER *thread_list = buffer_create(1024, NULL); + usec_t started_ut = now_monotonic_usec(), ended_ut; + size_t running; + SERVICE_TYPE running_services = 0; + + // cancel the threads + running = 0; + running_services = 0; + { + buffer_flush(thread_list); + + spinlock_lock(&service_globals.lock); + + Pvoid_t *PValue; + Word_t tid = 0; + bool first = true; + while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) { + SERVICE_THREAD *sth = *PValue; + if(sth->services & service && sth->tid != gettid_cached() && !sth->cancelled) { + sth->cancelled = true; + + switch(sth->type) { + default: + case SERVICE_THREAD_TYPE_NETDATA: + nd_thread_signal_cancel(sth->netdata_thread); + break; + + case SERVICE_THREAD_TYPE_EVENT_LOOP: + case SERVICE_THREAD_TYPE_LIBUV: + break; + } + + if(running) + buffer_strcat(thread_list, ", "); + + buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid); + + running++; + running_services |= sth->services & service; + + if(sth->force_quit_callback) { + spinlock_unlock(&service_globals.lock); + sth->force_quit_callback(sth->data); + spinlock_lock(&service_globals.lock); + continue; + } + } + } + + spinlock_unlock(&service_globals.lock); + } + + service_signal_exit(service); + + // signal them to stop + size_t last_running = 0; + size_t stale_time_ut = 0; + usec_t sleep_ut = 50 * USEC_PER_MS; + size_t log_countdown_ut = sleep_ut; + do { + if(running != last_running) + stale_time_ut = 0; + + last_running = running; + running = 0; + running_services = 0; + buffer_flush(thread_list); + + spinlock_lock(&service_globals.lock); + + Pvoid_t *PValue; + Word_t tid = 0; + bool first = true; + while((PValue = JudyLFirstThenNext(service_globals.pid_judy, &tid, &first))) { + SERVICE_THREAD *sth = *PValue; + if(sth->services & service && sth->tid != gettid_cached()) { + if(running) + buffer_strcat(thread_list, ", "); + + buffer_sprintf(thread_list, "'%s' (%d)", sth->name, sth->tid); + + running_services |= sth->services & service; + running++; + } + } + + spinlock_unlock(&service_globals.lock); + + if(running) { + log_countdown_ut -= (log_countdown_ut >= sleep_ut) ? sleep_ut : log_countdown_ut; + if(log_countdown_ut == 0 || running != last_running) { + log_countdown_ut = 20 * sleep_ut; + + buffer_flush(service_list); + service_to_buffer(service_list, running_services); + netdata_log_info("SERVICE CONTROL: waiting for the following %zu services [ %s] to exit: %s", + running, buffer_tostring(service_list), + running <= 10 ? buffer_tostring(thread_list) : ""); + } + + sleep_usec(sleep_ut); + stale_time_ut += sleep_ut; + } + + ended_ut = now_monotonic_usec(); + } while(running && (ended_ut - started_ut < timeout_ut || stale_time_ut < timeout_ut)); + + if(running) { + buffer_flush(service_list); + service_to_buffer(service_list, running_services); + netdata_log_info("SERVICE CONTROL: " + "the following %zu service(s) [ %s] take too long to exit: %s; " + "giving up on them...", + running, buffer_tostring(service_list), + buffer_tostring(thread_list)); + } + + buffer_free(thread_list); + buffer_free(service_list); + + return (running == 0); +} + +void web_client_cache_destroy(void); + +void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) { + if (i_am_the_spawn_server) + exit(ret); + + watcher_shutdown_begin(); + + nd_log_limits_unlimited(); + netdata_log_info("NETDATA SHUTDOWN: initializing shutdown with code %d...", ret); + + // send the stat from our caller + analytics_statistic_t statistic = { action, action_result, action_data }; + analytics_statistic_send(&statistic); + + // notify we are exiting + statistic = (analytics_statistic_t) {"EXIT", ret?"ERROR":"OK","-"}; + analytics_statistic_send(&statistic); + + char agent_crash_file[FILENAME_MAX + 1]; + char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; + snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); + snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); + (void) rename(agent_crash_file, agent_incomplete_shutdown_file); + watcher_step_complete(WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE); + +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_exit_mode(multidb_ctx[tier]); + } +#endif + watcher_step_complete(WATCHER_STEP_ID_DBENGINE_EXIT_MODE); + + webrtc_close_all_connections(); + watcher_step_complete(WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS); + + service_signal_exit(SERVICE_MAINTENANCE | ABILITY_DATA_QUERIES | ABILITY_WEB_REQUESTS | + ABILITY_STREAMING_CONNECTIONS | SERVICE_ACLK | SERVICE_ACLKSYNC); + watcher_step_complete(WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK); + + service_wait_exit(SERVICE_MAINTENANCE, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD); + + service_wait_exit(SERVICE_EXPORTERS | SERVICE_HEALTH | SERVICE_WEB_SERVER | SERVICE_HTTPD, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS); + + service_wait_exit(SERVICE_COLLECTORS | SERVICE_STREAMING, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS); + + service_wait_exit(SERVICE_REPLICATION, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_REPLICATION_THREADS); + + metadata_sync_shutdown_prepare(); + watcher_step_complete(WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN); + + ml_stop_threads(); + ml_fini(); + watcher_step_complete(WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS); + + service_wait_exit(SERVICE_CONTEXT, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_CONTEXT_THREAD); + + web_client_cache_destroy(); + watcher_step_complete(WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE); + + service_wait_exit(SERVICE_ACLK, 3 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_ACLK_THREADS); + + service_wait_exit(~0, 10 * USEC_PER_SEC); + watcher_step_complete(WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS); + + cancel_main_threads(); + watcher_step_complete(WATCHER_STEP_ID_CANCEL_MAIN_THREADS); + + if (ret) + { + watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); + + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } + else + { + // exit cleanly + +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_prepare_exit(multidb_ctx[tier]); + + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (!multidb_ctx[tier]) + continue; + completion_wait_for(&multidb_ctx[tier]->quiesce.completion); + completion_destroy(&multidb_ctx[tier]->quiesce.completion); + } + } +#endif + watcher_step_complete(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); + + rrd_finalize_collection_for_all_hosts(); + watcher_step_complete(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); + + metadata_sync_shutdown(); + watcher_step_complete(WATCHER_STEP_ID_STOP_METASYNC_THREADS); + +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + size_t running = 1; + size_t count = 10; + while(running && count) { + running = 0; + for (size_t tier = 0; tier < storage_tiers; tier++) + running += rrdeng_collectors_running(multidb_ctx[tier]); + + if (running) { + nd_log_limit_static_thread_var(erl, 1, 100 * USEC_PER_MS); + nd_log_limit(&erl, NDLS_DAEMON, NDLP_NOTICE, "waiting for %zu collectors to finish", running); + } + count--; + } + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + + while (pgc_hot_and_dirty_entries(main_cache)) { + pgc_flush_all_hot_and_dirty_pages(main_cache, PGC_SECTION_ALL); + sleep_usec(100 * USEC_PER_MS); + } + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + + for (size_t tier = 0; tier < storage_tiers; tier++) + rrdeng_exit(multidb_ctx[tier]); + rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } else { + // Skip these steps + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + } +#else + // Skip these steps + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_step_complete(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_step_complete(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); +#endif + } + + sqlite_close_databases(); + watcher_step_complete(WATCHER_STEP_ID_CLOSE_SQL_DATABASES); + sqlite_library_shutdown(); + + + // unlink the pid + if(pidfile[0]) { + if(unlink(pidfile) != 0) + netdata_log_error("EXIT: cannot unlink pidfile '%s'.", pidfile); + } + watcher_step_complete(WATCHER_STEP_ID_REMOVE_PID_FILE); + +#ifdef ENABLE_HTTPS + netdata_ssl_cleanup(); +#endif + watcher_step_complete(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); + + (void) unlink(agent_incomplete_shutdown_file); + watcher_step_complete(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE); + + watcher_shutdown_end(); + watcher_thread_stop(); + +#ifdef ENABLE_SENTRY + if (ret) + abort(); + else { + nd_sentry_fini(); + exit(ret); + } +#else + exit(ret); +#endif +} + +void web_server_threading_selection(void) { + web_server_mode = web_server_mode_id(config_get(CONFIG_SECTION_WEB, "mode", web_server_mode_name(web_server_mode))); + + int static_threaded = (web_server_mode == WEB_SERVER_MODE_STATIC_THREADED); + + int i; + for (i = 0; static_threads[i].name; i++) { + if (static_threads[i].start_routine == socket_listen_main_static_threaded) + static_threads[i].enabled = static_threaded; + } +} + +int make_dns_decision(const char *section_name, const char *config_name, const char *default_value, SIMPLE_PATTERN *p) +{ + char *value = config_get(section_name,config_name,default_value); + if(!strcmp("yes",value)) + return 1; + if(!strcmp("no",value)) + return 0; + if(strcmp("heuristic",value)) + netdata_log_error("Invalid configuration option '%s' for '%s'/'%s'. Valid options are 'yes', 'no' and 'heuristic'. Proceeding with 'heuristic'", + value, section_name, config_name); + + return simple_pattern_is_potential_name(p); +} + +void web_server_config_options(void) +{ + web_client_timeout = + (int)config_get_number(CONFIG_SECTION_WEB, "disconnect idle clients after seconds", web_client_timeout); + web_client_first_request_timeout = + (int)config_get_number(CONFIG_SECTION_WEB, "timeout for first request", web_client_first_request_timeout); + web_client_streaming_rate_t = + config_get_number(CONFIG_SECTION_WEB, "accept a streaming request every seconds", web_client_streaming_rate_t); + + respect_web_browser_do_not_track_policy = + config_get_boolean(CONFIG_SECTION_WEB, "respect do not track policy", respect_web_browser_do_not_track_policy); + web_x_frame_options = config_get(CONFIG_SECTION_WEB, "x-frame-options response header", ""); + if(!*web_x_frame_options) + web_x_frame_options = NULL; + + web_allow_connections_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow connections from", "localhost *"), + NULL, SIMPLE_PATTERN_EXACT, true); + web_allow_connections_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow connections by dns", "heuristic", web_allow_connections_from); + web_allow_dashboard_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow dashboard from", "localhost *"), + NULL, SIMPLE_PATTERN_EXACT, true); + web_allow_dashboard_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow dashboard by dns", "heuristic", web_allow_dashboard_from); + web_allow_badges_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow badges from", "*"), NULL, SIMPLE_PATTERN_EXACT, + true); + web_allow_badges_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow badges by dns", "heuristic", web_allow_badges_from); + web_allow_registry_from = + simple_pattern_create(config_get(CONFIG_SECTION_REGISTRY, "allow from", "*"), NULL, SIMPLE_PATTERN_EXACT, + true); + web_allow_registry_dns = make_dns_decision(CONFIG_SECTION_REGISTRY, "allow by dns", "heuristic", + web_allow_registry_from); + web_allow_streaming_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow streaming from", "*"), + NULL, SIMPLE_PATTERN_EXACT, true); + web_allow_streaming_dns = make_dns_decision(CONFIG_SECTION_WEB, "allow streaming by dns", "heuristic", + web_allow_streaming_from); + // Note the default is not heuristic, the wildcards could match DNS but the intent is ip-addresses. + web_allow_netdataconf_from = simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow netdata.conf from", + "localhost fd* 10.* 192.168.* 172.16.* 172.17.* 172.18.*" + " 172.19.* 172.20.* 172.21.* 172.22.* 172.23.* 172.24.*" + " 172.25.* 172.26.* 172.27.* 172.28.* 172.29.* 172.30.*" + " 172.31.* UNKNOWN"), NULL, SIMPLE_PATTERN_EXACT, + true); + web_allow_netdataconf_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow netdata.conf by dns", "no", web_allow_netdataconf_from); + web_allow_mgmt_from = + simple_pattern_create(config_get(CONFIG_SECTION_WEB, "allow management from", "localhost"), + NULL, SIMPLE_PATTERN_EXACT, true); + web_allow_mgmt_dns = + make_dns_decision(CONFIG_SECTION_WEB, "allow management by dns","heuristic",web_allow_mgmt_from); + + web_enable_gzip = config_get_boolean(CONFIG_SECTION_WEB, "enable gzip compression", web_enable_gzip); + + char *s = config_get(CONFIG_SECTION_WEB, "gzip compression strategy", "default"); + if(!strcmp(s, "default")) + web_gzip_strategy = Z_DEFAULT_STRATEGY; + else if(!strcmp(s, "filtered")) + web_gzip_strategy = Z_FILTERED; + else if(!strcmp(s, "huffman only")) + web_gzip_strategy = Z_HUFFMAN_ONLY; + else if(!strcmp(s, "rle")) + web_gzip_strategy = Z_RLE; + else if(!strcmp(s, "fixed")) + web_gzip_strategy = Z_FIXED; + else { + netdata_log_error("Invalid compression strategy '%s'. Valid strategies are 'default', 'filtered', 'huffman only', 'rle' and 'fixed'. Proceeding with 'default'.", s); + web_gzip_strategy = Z_DEFAULT_STRATEGY; + } + + web_gzip_level = (int)config_get_number(CONFIG_SECTION_WEB, "gzip compression level", 3); + if(web_gzip_level < 1) { + netdata_log_error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 1 (fastest compression).", web_gzip_level); + web_gzip_level = 1; + } + else if(web_gzip_level > 9) { + netdata_log_error("Invalid compression level %d. Valid levels are 1 (fastest) to 9 (best ratio). Proceeding with level 9 (best compression).", web_gzip_level); + web_gzip_level = 9; + } +} + + +// killpid kills pid with SIGTERM. +int killpid(pid_t pid) { + int ret; + netdata_log_debug(D_EXIT, "Request to kill pid %d", pid); + + int signal = SIGTERM; +//#ifdef NETDATA_INTERNAL_CHECKS +// if(service_running(SERVICE_COLLECTORS)) +// signal = SIGABRT; +//#endif + + errno = 0; + ret = kill(pid, signal); + if (ret == -1) { + switch(errno) { + case ESRCH: + // We wanted the process to exit so just let the caller handle. + return ret; + + case EPERM: + netdata_log_error("Cannot kill pid %d, but I do not have enough permissions.", pid); + break; + + default: + netdata_log_error("Cannot kill pid %d, but I received an error.", pid); + break; + } + } + + return ret; +} + +static void set_nofile_limit(struct rlimit *rl) { + // get the num files allowed + if(getrlimit(RLIMIT_NOFILE, rl) != 0) { + netdata_log_error("getrlimit(RLIMIT_NOFILE) failed"); + return; + } + + netdata_log_info("resources control: allowed file descriptors: soft = %zu, max = %zu", + (size_t) rl->rlim_cur, (size_t) rl->rlim_max); + + // make the soft/hard limits equal + rl->rlim_cur = rl->rlim_max; + if (setrlimit(RLIMIT_NOFILE, rl) != 0) { + netdata_log_error("setrlimit(RLIMIT_NOFILE, { %zu, %zu }) failed", (size_t)rl->rlim_cur, (size_t)rl->rlim_max); + } + + // sanity check to make sure we have enough file descriptors available to open + if (getrlimit(RLIMIT_NOFILE, rl) != 0) { + netdata_log_error("getrlimit(RLIMIT_NOFILE) failed"); + return; + } + + if (rl->rlim_cur < 1024) + netdata_log_error("Number of open file descriptors allowed for this process is too low (RLIMIT_NOFILE=%zu)", (size_t)rl->rlim_cur); +} + +void cancel_main_threads() { + nd_log_limits_unlimited(); + + if (!static_threads) + return; + + int i, found = 0; + usec_t max = 5 * USEC_PER_SEC, step = 100000; + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled == NETDATA_MAIN_THREAD_RUNNING) { + if (static_threads[i].thread) { + netdata_log_info("EXIT: Stopping main thread: %s", static_threads[i].name); + nd_thread_signal_cancel(static_threads[i].thread); + } else { + netdata_log_info("EXIT: No thread running (marking as EXITED): %s", static_threads[i].name); + static_threads[i].enabled = NETDATA_MAIN_THREAD_EXITED; + } + found++; + } + } + + netdata_exit = 1; + + while(found && max > 0) { + max -= step; + netdata_log_info("Waiting %d threads to finish...", found); + sleep_usec(step); + found = 0; + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled == NETDATA_MAIN_THREAD_EXITED) + continue; + + // Don't wait ourselves. + if (nd_thread_is_me(static_threads[i].thread)) + continue; + + found++; + } + } + + if(found) { + for (i = 0; static_threads[i].name != NULL ; i++) { + if (static_threads[i].enabled != NETDATA_MAIN_THREAD_EXITED) + netdata_log_error("Main thread %s takes too long to exit. Giving up...", static_threads[i].name); + } + } + else + netdata_log_info("All threads finished."); + + freez(static_threads); + static_threads = NULL; +} + +static const struct option_def { + const char val; + const char *description; + const char *arg_name; + const char *default_value; +} option_definitions[] = { + {'c', "Configuration file to load.", "filename", CONFIG_DIR "/" CONFIG_FILENAME}, + {'D', "Do not fork. Run in the foreground.", NULL, "run in the background"}, + {'d', "Fork. Run in the background.", NULL, "run in the background"}, + {'h', "Display this help message.", NULL, NULL}, + {'P', "File to save a pid while running.", "filename", "do not save pid to a file"}, + {'i', "The IP address to listen to.", "IP", "all IP addresses IPv4 and IPv6"}, + {'p', "API/Web port to use.", "port", "19999"}, + {'s', "Prefix for /proc and /sys (for containers).", "path", "no prefix"}, + {'t', "The internal clock of netdata.", "seconds", "1"}, + {'u', "Run as user.", "username", "netdata"}, + {'v', "Print netdata version and exit.", NULL, NULL}, + {'V', "Print netdata version and exit.", NULL, NULL}, + {'W', "See Advanced options below.", "options", NULL}, +}; + +int help(int exitcode) { + FILE *stream; + if(exitcode == 0) + stream = stdout; + else + stream = stderr; + + int num_opts = sizeof(option_definitions) / sizeof(struct option_def); + int i; + int max_len_arg = 0; + + // Compute maximum argument length + for( i = 0; i < num_opts; i++ ) { + if(option_definitions[i].arg_name) { + int len_arg = (int)strlen(option_definitions[i].arg_name); + if(len_arg > max_len_arg) max_len_arg = len_arg; + } + } + + if(max_len_arg > 30) max_len_arg = 30; + if(max_len_arg < 20) max_len_arg = 20; + + fprintf(stream, "%s", "\n" + " ^\n" + " |.-. .-. .-. .-. . netdata \n" + " | '-' '-' '-' '-' real-time performance monitoring, done right! \n" + " +----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+-----+--->\n" + "\n" + " Copyright (C) 2016-2023, Netdata, Inc. <info@netdata.cloud>\n" + " Released under GNU General Public License v3 or later.\n" + " All rights reserved.\n" + "\n" + " Home Page : https://netdata.cloud\n" + " Source Code: https://github.com/netdata/netdata\n" + " Docs : https://learn.netdata.cloud\n" + " Support : https://github.com/netdata/netdata/issues\n" + " License : https://github.com/netdata/netdata/blob/master/LICENSE.md\n" + "\n" + " Twitter : https://twitter.com/netdatahq\n" + " LinkedIn : https://linkedin.com/company/netdata-cloud/\n" + " Facebook : https://facebook.com/linuxnetdata/\n" + "\n" + "\n" + ); + + fprintf(stream, " SYNOPSIS: netdata [options]\n"); + fprintf(stream, "\n"); + fprintf(stream, " Options:\n\n"); + + // Output options description. + for( i = 0; i < num_opts; i++ ) { + fprintf(stream, " -%c %-*s %s", option_definitions[i].val, max_len_arg, option_definitions[i].arg_name ? option_definitions[i].arg_name : "", option_definitions[i].description); + if(option_definitions[i].default_value) { + fprintf(stream, "\n %c %-*s Default: %s\n", ' ', max_len_arg, "", option_definitions[i].default_value); + } else { + fprintf(stream, "\n"); + } + fprintf(stream, "\n"); + } + + fprintf(stream, "\n Advanced options:\n\n" + " -W stacksize=N Set the stacksize (in bytes).\n\n" + " -W debug_flags=N Set runtime tracing to debug.log.\n\n" + " -W unittest Run internal unittests and exit.\n\n" + " -W sqlite-meta-recover Run recovery on the metadata database and exit.\n\n" + " -W sqlite-compact Reclaim metadata database unused space and exit.\n\n" + " -W sqlite-analyze Run update statistics and exit.\n\n" + " -W sqlite-alert-cleanup Perform maintenance on the alerts table.\n\n" +#ifdef ENABLE_DBENGINE + " -W createdataset=N Create a DB engine dataset of N seconds and exit.\n\n" + " -W stresstest=A,B,C,D,E,F,G\n" + " Run a DB engine stress test for A seconds,\n" + " with B writers and C readers, with a ramp up\n" + " time of D seconds for writers, a page cache\n" + " size of E MiB, an optional disk space limit\n" + " of F MiB, G libuv workers (default 16) and exit.\n\n" +#endif + " -W set section option value\n" + " set netdata.conf option from the command line.\n\n" + " -W buildinfo Print the version, the configure options,\n" + " a list of optional features, and whether they\n" + " are enabled or not.\n\n" + " -W buildinfojson Print the version, the configure options,\n" + " a list of optional features, and whether they\n" + " are enabled or not, in JSON format.\n\n" + " -W simple-pattern pattern string\n" + " Check if string matches pattern and exit.\n\n" + " -W \"claim -token=TOKEN -rooms=ROOM1,ROOM2\"\n" + " Claim the agent to the workspace rooms pointed to by TOKEN and ROOM*.\n\n" +#ifdef OS_WINDOWS + " -W perflibdump [key]\n" + " Dump the Windows Performance Counters Registry in JSON.\n\n" +#endif + ); + + fprintf(stream, "\n Signals netdata handles:\n\n" + " - HUP Close and reopen log files.\n" + " - USR2 Reload health configuration.\n" + "\n" + ); + + fflush(stream); + return exitcode; +} + +#ifdef ENABLE_HTTPS +static void security_init(){ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/ssl/key.pem",netdata_configured_user_config_dir); + netdata_ssl_security_key = config_get(CONFIG_SECTION_WEB, "ssl key", filename); + + snprintfz(filename, FILENAME_MAX, "%s/ssl/cert.pem",netdata_configured_user_config_dir); + netdata_ssl_security_cert = config_get(CONFIG_SECTION_WEB, "ssl certificate", filename); + + tls_version = config_get(CONFIG_SECTION_WEB, "tls version", "1.3"); + tls_ciphers = config_get(CONFIG_SECTION_WEB, "tls ciphers", "none"); + + netdata_ssl_initialize_openssl(); +} +#endif + +static void log_init(void) { + nd_log_set_facility(config_get(CONFIG_SECTION_LOGS, "facility", "daemon")); + + time_t period = ND_LOG_DEFAULT_THROTTLE_PERIOD; + size_t logs = ND_LOG_DEFAULT_THROTTLE_LOGS; + period = config_get_number(CONFIG_SECTION_LOGS, "logs flood protection period", period); + logs = (unsigned long)config_get_number(CONFIG_SECTION_LOGS, "logs to trigger flood protection", (long long int)logs); + nd_log_set_flood_protection(logs, period); + + const char *netdata_log_level = getenv("NETDATA_LOG_LEVEL"); + netdata_log_level = netdata_log_level ? nd_log_id2priority(nd_log_priority2id(netdata_log_level)) : NDLP_INFO_STR; + + nd_log_set_priority_level(config_get(CONFIG_SECTION_LOGS, "level", netdata_log_level)); + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/debug.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_DEBUG, config_get(CONFIG_SECTION_LOGS, "debug", filename)); + + bool with_journal = is_stderr_connected_to_journal() /* || nd_log_journal_socket_available() */; + if(with_journal) + snprintfz(filename, FILENAME_MAX, "journal"); + else + snprintfz(filename, FILENAME_MAX, "%s/daemon.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_DAEMON, config_get(CONFIG_SECTION_LOGS, "daemon", filename)); + + if(with_journal) + snprintfz(filename, FILENAME_MAX, "journal"); + else + snprintfz(filename, FILENAME_MAX, "%s/collector.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_COLLECTORS, config_get(CONFIG_SECTION_LOGS, "collector", filename)); + + snprintfz(filename, FILENAME_MAX, "%s/access.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_ACCESS, config_get(CONFIG_SECTION_LOGS, "access", filename)); + + if(with_journal) + snprintfz(filename, FILENAME_MAX, "journal"); + else + snprintfz(filename, FILENAME_MAX, "%s/health.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_HEALTH, config_get(CONFIG_SECTION_LOGS, "health", filename)); + +#ifdef ENABLE_ACLK + aclklog_enabled = config_get_boolean(CONFIG_SECTION_CLOUD, "conversation log", CONFIG_BOOLEAN_NO); + if (aclklog_enabled) { + snprintfz(filename, FILENAME_MAX, "%s/aclk.log", netdata_configured_log_dir); + nd_log_set_user_settings(NDLS_ACLK, config_get(CONFIG_SECTION_CLOUD, "conversation log file", filename)); + } +#endif +} + +char *initialize_lock_directory_path(char *prefix) +{ + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/lock", prefix); + + return config_get(CONFIG_SECTION_DIRECTORIES, "lock", filename); +} + +static void backwards_compatible_config() { + // move [global] options to the [web] section + config_move(CONFIG_SECTION_GLOBAL, "http port listen backlog", + CONFIG_SECTION_WEB, "listen backlog"); + + config_move(CONFIG_SECTION_GLOBAL, "bind socket to IP", + CONFIG_SECTION_WEB, "bind to"); + + config_move(CONFIG_SECTION_GLOBAL, "bind to", + CONFIG_SECTION_WEB, "bind to"); + + config_move(CONFIG_SECTION_GLOBAL, "port", + CONFIG_SECTION_WEB, "default port"); + + config_move(CONFIG_SECTION_GLOBAL, "default port", + CONFIG_SECTION_WEB, "default port"); + + config_move(CONFIG_SECTION_GLOBAL, "disconnect idle web clients after seconds", + CONFIG_SECTION_WEB, "disconnect idle clients after seconds"); + + config_move(CONFIG_SECTION_GLOBAL, "respect web browser do not track policy", + CONFIG_SECTION_WEB, "respect do not track policy"); + + config_move(CONFIG_SECTION_GLOBAL, "web x-frame-options header", + CONFIG_SECTION_WEB, "x-frame-options response header"); + + config_move(CONFIG_SECTION_GLOBAL, "enable web responses gzip compression", + CONFIG_SECTION_WEB, "enable gzip compression"); + + config_move(CONFIG_SECTION_GLOBAL, "web compression strategy", + CONFIG_SECTION_WEB, "gzip compression strategy"); + + config_move(CONFIG_SECTION_GLOBAL, "web compression level", + CONFIG_SECTION_WEB, "gzip compression level"); + + config_move(CONFIG_SECTION_GLOBAL, "config directory", + CONFIG_SECTION_DIRECTORIES, "config"); + + config_move(CONFIG_SECTION_GLOBAL, "stock config directory", + CONFIG_SECTION_DIRECTORIES, "stock config"); + + config_move(CONFIG_SECTION_GLOBAL, "log directory", + CONFIG_SECTION_DIRECTORIES, "log"); + + config_move(CONFIG_SECTION_GLOBAL, "web files directory", + CONFIG_SECTION_DIRECTORIES, "web"); + + config_move(CONFIG_SECTION_GLOBAL, "cache directory", + CONFIG_SECTION_DIRECTORIES, "cache"); + + config_move(CONFIG_SECTION_GLOBAL, "lib directory", + CONFIG_SECTION_DIRECTORIES, "lib"); + + config_move(CONFIG_SECTION_GLOBAL, "home directory", + CONFIG_SECTION_DIRECTORIES, "home"); + + config_move(CONFIG_SECTION_GLOBAL, "lock directory", + CONFIG_SECTION_DIRECTORIES, "lock"); + + config_move(CONFIG_SECTION_GLOBAL, "plugins directory", + CONFIG_SECTION_DIRECTORIES, "plugins"); + + config_move(CONFIG_SECTION_HEALTH, "health configuration directory", + CONFIG_SECTION_DIRECTORIES, "health config"); + + config_move(CONFIG_SECTION_HEALTH, "stock health configuration directory", + CONFIG_SECTION_DIRECTORIES, "stock health config"); + + config_move(CONFIG_SECTION_REGISTRY, "registry db directory", + CONFIG_SECTION_DIRECTORIES, "registry"); + + config_move(CONFIG_SECTION_GLOBAL, "debug log", + CONFIG_SECTION_LOGS, "debug"); + + config_move(CONFIG_SECTION_GLOBAL, "error log", + CONFIG_SECTION_LOGS, "error"); + + config_move(CONFIG_SECTION_GLOBAL, "access log", + CONFIG_SECTION_LOGS, "access"); + + config_move(CONFIG_SECTION_GLOBAL, "facility log", + CONFIG_SECTION_LOGS, "facility"); + + config_move(CONFIG_SECTION_GLOBAL, "errors flood protection period", + CONFIG_SECTION_LOGS, "errors flood protection period"); + + config_move(CONFIG_SECTION_GLOBAL, "errors to trigger flood protection", + CONFIG_SECTION_LOGS, "errors to trigger flood protection"); + + config_move(CONFIG_SECTION_GLOBAL, "debug flags", + CONFIG_SECTION_LOGS, "debug flags"); + + config_move(CONFIG_SECTION_GLOBAL, "TZ environment variable", + CONFIG_SECTION_ENV_VARS, "TZ"); + + config_move(CONFIG_SECTION_PLUGINS, "PATH environment variable", + CONFIG_SECTION_ENV_VARS, "PATH"); + + config_move(CONFIG_SECTION_PLUGINS, "PYTHONPATH environment variable", + CONFIG_SECTION_ENV_VARS, "PYTHONPATH"); + + config_move(CONFIG_SECTION_STATSD, "enabled", + CONFIG_SECTION_PLUGINS, "statsd"); + + config_move(CONFIG_SECTION_GLOBAL, "memory mode", + CONFIG_SECTION_DB, "mode"); + + config_move(CONFIG_SECTION_GLOBAL, "history", + CONFIG_SECTION_DB, "retention"); + + config_move(CONFIG_SECTION_GLOBAL, "update every", + CONFIG_SECTION_DB, "update every"); + + config_move(CONFIG_SECTION_GLOBAL, "page cache size", + CONFIG_SECTION_DB, "dbengine page cache size MB"); + + config_move(CONFIG_SECTION_DB, "page cache size", + CONFIG_SECTION_DB, "dbengine page cache size MB"); + + config_move(CONFIG_SECTION_GLOBAL, "page cache uses malloc", + CONFIG_SECTION_DB, "dbengine page cache with malloc"); + + config_move(CONFIG_SECTION_DB, "page cache with malloc", + CONFIG_SECTION_DB, "dbengine page cache with malloc"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine disk space", + CONFIG_SECTION_DB, "dbengine disk space MB"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space", + CONFIG_SECTION_DB, "dbengine multihost disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine disk space MB", + CONFIG_SECTION_DB, "dbengine multihost disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 0 disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine tier 1 multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 1 disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine tier 2 multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 2 disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine tier 3 multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 3 disk space MB"); + + config_move(CONFIG_SECTION_DB, "dbengine tier 4 multihost disk space MB", + CONFIG_SECTION_DB, "dbengine tier 4 disk space MB"); + + config_move(CONFIG_SECTION_GLOBAL, "memory deduplication (ksm)", + CONFIG_SECTION_DB, "memory deduplication (ksm)"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch timeout", + CONFIG_SECTION_DB, "dbengine page fetch timeout secs"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine page fetch retries", + CONFIG_SECTION_DB, "dbengine page fetch retries"); + + config_move(CONFIG_SECTION_GLOBAL, "dbengine extent pages", + CONFIG_SECTION_DB, "dbengine pages per extent"); + + config_move(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", + CONFIG_SECTION_DB, "cleanup obsolete charts after secs"); + + config_move(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", + CONFIG_SECTION_DB, "gap when lost iterations above"); + + config_move(CONFIG_SECTION_GLOBAL, "cleanup orphan hosts after seconds", + CONFIG_SECTION_DB, "cleanup orphan hosts after secs"); + + config_move(CONFIG_SECTION_GLOBAL, "enable zero metrics", + CONFIG_SECTION_DB, "enable zero metrics"); + + config_move(CONFIG_SECTION_LOGS, "error", + CONFIG_SECTION_LOGS, "daemon"); + + config_move(CONFIG_SECTION_LOGS, "severity level", + CONFIG_SECTION_LOGS, "level"); + + config_move(CONFIG_SECTION_LOGS, "errors to trigger flood protection", + CONFIG_SECTION_LOGS, "logs to trigger flood protection"); + + config_move(CONFIG_SECTION_LOGS, "errors flood protection period", + CONFIG_SECTION_LOGS, "logs flood protection period"); + config_move(CONFIG_SECTION_HEALTH, "is ephemeral", + CONFIG_SECTION_GLOBAL, "is ephemeral node"); + + config_move(CONFIG_SECTION_HEALTH, "has unstable connection", + CONFIG_SECTION_GLOBAL, "has unstable connection"); +} + +static int get_hostname(char *buf, size_t buf_size) { + if (netdata_configured_host_prefix && *netdata_configured_host_prefix) { + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/etc/hostname", netdata_configured_host_prefix); + + if (!read_txt_file(filename, buf, buf_size)) { + trim(buf); + return 0; + } + } + + return gethostname(buf, buf_size); +} + +static void get_netdata_configured_variables() +{ +#ifdef ENABLE_DBENGINE + legacy_multihost_db_space = config_exists(CONFIG_SECTION_DB, "dbengine multihost disk space MB"); + if (!legacy_multihost_db_space) + legacy_multihost_db_space = config_exists(CONFIG_SECTION_GLOBAL, "dbengine multihost disk space"); + if (!legacy_multihost_db_space) + legacy_multihost_db_space = config_exists(CONFIG_SECTION_GLOBAL, "dbengine disk space"); +#endif + + backwards_compatible_config(); + + // ------------------------------------------------------------------------ + // get the hostname + + netdata_configured_host_prefix = config_get(CONFIG_SECTION_GLOBAL, "host access prefix", ""); + verify_netdata_host_prefix(true); + + char buf[HOSTNAME_MAX + 1]; + if (get_hostname(buf, HOSTNAME_MAX)) + netdata_log_error("Cannot get machine hostname."); + + netdata_configured_hostname = config_get(CONFIG_SECTION_GLOBAL, "hostname", buf); + netdata_log_debug(D_OPTIONS, "hostname set to '%s'", netdata_configured_hostname); + + // ------------------------------------------------------------------------ + // get default database update frequency + + default_rrd_update_every = (int) config_get_number(CONFIG_SECTION_DB, "update every", UPDATE_EVERY); + if(default_rrd_update_every < 1 || default_rrd_update_every > 600) { + netdata_log_error("Invalid data collection frequency (update every) %d given. Defaulting to %d.", default_rrd_update_every, UPDATE_EVERY); + default_rrd_update_every = UPDATE_EVERY; + config_set_number(CONFIG_SECTION_DB, "update every", default_rrd_update_every); + } + + // ------------------------------------------------------------------------ + // get default memory mode for the database + + { + const char *mode = config_get(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode)); + default_rrd_memory_mode = rrd_memory_mode_id(mode); + if(strcmp(mode, rrd_memory_mode_name(default_rrd_memory_mode)) != 0) { + netdata_log_error("Invalid memory mode '%s' given. Using '%s'", mode, rrd_memory_mode_name(default_rrd_memory_mode)); + config_set(CONFIG_SECTION_DB, "mode", rrd_memory_mode_name(default_rrd_memory_mode)); + } + } + + // ------------------------------------------------------------------------ + // get default database size + + if(default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE && default_rrd_memory_mode != RRD_MEMORY_MODE_NONE) { + default_rrd_history_entries = (int)config_get_number( + CONFIG_SECTION_DB, "retention", + align_entries_to_pagesize(default_rrd_memory_mode, RRD_DEFAULT_HISTORY_ENTRIES)); + + long h = align_entries_to_pagesize(default_rrd_memory_mode, default_rrd_history_entries); + if (h != default_rrd_history_entries) { + config_set_number(CONFIG_SECTION_DB, "retention", h); + default_rrd_history_entries = (int)h; + } + } + + // ------------------------------------------------------------------------ + // get system paths + + netdata_configured_user_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "config", netdata_configured_user_config_dir); + netdata_configured_stock_config_dir = config_get(CONFIG_SECTION_DIRECTORIES, "stock config", netdata_configured_stock_config_dir); + netdata_configured_log_dir = config_get(CONFIG_SECTION_DIRECTORIES, "log", netdata_configured_log_dir); + netdata_configured_web_dir = config_get(CONFIG_SECTION_DIRECTORIES, "web", netdata_configured_web_dir); + netdata_configured_cache_dir = config_get(CONFIG_SECTION_DIRECTORIES, "cache", netdata_configured_cache_dir); + netdata_configured_varlib_dir = config_get(CONFIG_SECTION_DIRECTORIES, "lib", netdata_configured_varlib_dir); + + netdata_configured_lock_dir = initialize_lock_directory_path(netdata_configured_varlib_dir); + + { + pluginsd_initialize_plugin_directories(); + netdata_configured_primary_plugins_dir = plugin_directories[PLUGINSD_STOCK_PLUGINS_DIRECTORY_PATH]; + } + +#ifdef ENABLE_DBENGINE + // ------------------------------------------------------------------------ + // get default Database Engine page type + + const char *page_type = config_get(CONFIG_SECTION_DB, "dbengine page type", "gorilla"); + if (strcmp(page_type, "gorilla") == 0) + tier_page_type[0] = RRDENG_PAGE_TYPE_GORILLA_32BIT; + else if (strcmp(page_type, "raw") == 0) + tier_page_type[0] = RRDENG_PAGE_TYPE_ARRAY_32BIT; + else { + tier_page_type[0] = RRDENG_PAGE_TYPE_ARRAY_32BIT; + netdata_log_error("Invalid dbengine page type ''%s' given. Defaulting to 'raw'.", page_type); + } + + // ------------------------------------------------------------------------ + // get default Database Engine page cache size in MiB + + default_rrdeng_page_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); + default_rrdeng_extent_cache_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine extent cache size MB", default_rrdeng_extent_cache_mb); + db_engine_journal_check = config_get_boolean(CONFIG_SECTION_DB, "dbengine enable journal integrity check", CONFIG_BOOLEAN_NO); + + if(default_rrdeng_extent_cache_mb < 0) + default_rrdeng_extent_cache_mb = 0; + + if(default_rrdeng_page_cache_mb < RRDENG_MIN_PAGE_CACHE_SIZE_MB) { + netdata_log_error("Invalid page cache size %d given. Defaulting to %d.", default_rrdeng_page_cache_mb, RRDENG_MIN_PAGE_CACHE_SIZE_MB); + default_rrdeng_page_cache_mb = RRDENG_MIN_PAGE_CACHE_SIZE_MB; + config_set_number(CONFIG_SECTION_DB, "dbengine page cache size MB", default_rrdeng_page_cache_mb); + } + + // ------------------------------------------------------------------------ + // get default Database Engine disk space quota in MiB +// +// // if (!config_exists(CONFIG_SECTION_DB, "dbengine disk space MB") && !config_exists(CONFIG_SECTION_DB, "dbengine multihost disk space MB")) +// +// default_rrdeng_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb); +// if(default_rrdeng_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) { +// netdata_log_error("Invalid dbengine disk space %d given. Defaulting to %d.", default_rrdeng_disk_quota_mb, RRDENG_MIN_DISK_SPACE_MB); +// default_rrdeng_disk_quota_mb = RRDENG_MIN_DISK_SPACE_MB; +// config_set_number(CONFIG_SECTION_DB, "dbengine disk space MB", default_rrdeng_disk_quota_mb); +// } +// +// default_multidb_disk_quota_mb = (int) config_get_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", compute_multidb_diskspace()); +// if(default_multidb_disk_quota_mb < RRDENG_MIN_DISK_SPACE_MB) { +// netdata_log_error("Invalid multidb disk space %d given. Defaulting to %d.", default_multidb_disk_quota_mb, default_rrdeng_disk_quota_mb); +// default_multidb_disk_quota_mb = default_rrdeng_disk_quota_mb; +// config_set_number(CONFIG_SECTION_DB, "dbengine multihost disk space MB", default_multidb_disk_quota_mb); +// } + +#else + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + error_report("RRD_MEMORY_MODE_DBENGINE is not supported in this platform. The agent will use db mode 'save' instead."); + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; + } +#endif + + // -------------------------------------------------------------------- + // get KSM settings + +#ifdef MADV_MERGEABLE + enable_ksm = config_get_boolean(CONFIG_SECTION_DB, "memory deduplication (ksm)", enable_ksm); +#endif + + // -------------------------------------------------------------------- + // metric correlations + + enable_metric_correlations = config_get_boolean(CONFIG_SECTION_GLOBAL, "enable metric correlations", enable_metric_correlations); + default_metric_correlations_method = weights_string_to_method(config_get( + CONFIG_SECTION_GLOBAL, "metric correlations method", + weights_method_to_string(default_metric_correlations_method))); + + // -------------------------------------------------------------------- + + rrdset_free_obsolete_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s); + rrdhost_free_ephemeral_time_s = config_get_number(CONFIG_SECTION_DB, "cleanup ephemeral hosts after secs", rrdhost_free_ephemeral_time_s); + // Current chart locking and invalidation scheme doesn't prevent Netdata from segmentation faults if a short + // cleanup delay is set. Extensive stress tests showed that 10 seconds is quite a safe delay. Look at + // https://github.com/netdata/netdata/pull/11222#issuecomment-868367920 for more information. + if (rrdset_free_obsolete_time_s < 10) { + rrdset_free_obsolete_time_s = 10; + netdata_log_info("The \"cleanup obsolete charts after seconds\" option was set to 10 seconds."); + config_set_number(CONFIG_SECTION_DB, "cleanup obsolete charts after secs", rrdset_free_obsolete_time_s); + } + + gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); + if (gap_when_lost_iterations_above < 1) { + gap_when_lost_iterations_above = 1; + config_set_number(CONFIG_SECTION_DB, "gap when lost iterations above", gap_when_lost_iterations_above); + } + gap_when_lost_iterations_above += 2; + + // -------------------------------------------------------------------- + // get various system parameters + + os_get_system_HZ(); + os_get_system_cpus_uncached(); + os_get_system_pid_max(); + + +} + +static void post_conf_load(char **user) +{ + // -------------------------------------------------------------------- + // get the user we should run + + // IMPORTANT: this is required before web_files_uid() + if(getuid() == 0) { + *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", NETDATA_USER); + } + else { + struct passwd *passwd = getpwuid(getuid()); + *user = config_get(CONFIG_SECTION_GLOBAL, "run as user", (passwd && passwd->pw_name)?passwd->pw_name:""); + } +} + +static bool load_netdata_conf(char *filename, char overwrite_used, char **user) { + errno = 0; + + int ret = 0; + + if(filename && *filename) { + ret = config_load(filename, overwrite_used, NULL); + if(!ret) + netdata_log_error("CONFIG: cannot load config file '%s'.", filename); + } + else { + filename = strdupz_path_subpath(netdata_configured_user_config_dir, "netdata.conf"); + + ret = config_load(filename, overwrite_used, NULL); + if(!ret) { + netdata_log_info("CONFIG: cannot load user config '%s'. Will try the stock version.", filename); + freez(filename); + + filename = strdupz_path_subpath(netdata_configured_stock_config_dir, "netdata.conf"); + ret = config_load(filename, overwrite_used, NULL); + if(!ret) + netdata_log_info("CONFIG: cannot load stock config '%s'. Running with internal defaults.", filename); + } + + freez(filename); + } + + post_conf_load(user); + return ret; +} + +// coverity[ +tainted_string_sanitize_content : arg-0 ] +static inline void coverity_remove_taint(char *s) +{ + (void)s; +} + +int get_system_info(struct rrdhost_system_info *system_info) { + char *script; + script = mallocz(sizeof(char) * (strlen(netdata_configured_primary_plugins_dir) + strlen("system-info.sh") + 2)); + sprintf(script, "%s/%s", netdata_configured_primary_plugins_dir, "system-info.sh"); + if (unlikely(access(script, R_OK) != 0)) { + netdata_log_error("System info script %s not found.",script); + freez(script); + return 1; + } + + pid_t command_pid; + + FILE *fp_child_input; + FILE *fp_child_output = netdata_popen(script, &command_pid, &fp_child_input); + if(fp_child_output) { + char line[200 + 1]; + // Removed the double strlens, if the Coverity tainted string warning reappears I'll revert. + // One time init code, but I'm curious about the warning... + while (fgets(line, 200, fp_child_output) != NULL) { + char *value=line; + while (*value && *value != '=') value++; + if (*value=='=') { + *value='\0'; + value++; + char *end = value; + while (*end && *end != '\n') end++; + *end = '\0'; // Overwrite newline if present + coverity_remove_taint(line); // I/O is controlled result of system_info.sh - not tainted + coverity_remove_taint(value); + + if(unlikely(rrdhost_set_system_info_variable(system_info, line, value))) { + netdata_log_error("Unexpected environment variable %s=%s", line, value); + } else { + setenv(line, value, 1); + } + } + } + netdata_pclose(fp_child_input, fp_child_output, command_pid); + } + freez(script); + return 0; +} + +/* Any config setting that can be accessed without a default value i.e. configget(...,...,NULL) *MUST* + be set in this procedure to be called in all the relevant code paths. +*/ + +#define delta_startup_time(msg) \ + { \ + usec_t now_ut = now_monotonic_usec(); \ + if(prev_msg) \ + netdata_log_info("NETDATA STARTUP: in %7llu ms, %s - next: %s", (now_ut - last_ut) / USEC_PER_MS, prev_msg, msg); \ + else \ + netdata_log_info("NETDATA STARTUP: next: %s", msg); \ + last_ut = now_ut; \ + prev_msg = msg; \ + } + +int buffer_unittest(void); +int pgc_unittest(void); +int mrg_unittest(void); +int julytest(void); +int pluginsd_parser_unittest(void); +void replication_initialize(void); +void bearer_tokens_init(void); +int unittest_rrdpush_compressions(void); +int uuid_unittest(void); +int progress_unittest(void); +int dyncfg_unittest(void); + +#ifdef OS_WINDOWS +int windows_perflib_dump(const char *key); +#endif + +int unittest_prepare_rrd(char **user) { + post_conf_load(user); + get_netdata_configured_variables(); + default_rrd_update_every = 1; + default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; + health_plugin_disable(); + storage_tiers = 1; + registry_init(); + if(rrd_init("unittest", NULL, true)) { + fprintf(stderr, "rrd_init failed for unittest\n"); + return 1; + } + default_rrdpush_enabled = 0; + + return 0; +} + +int main(int argc, char **argv) { + // initialize the system clocks + clocks_init(); + netdata_start_time = now_realtime_sec(); + + usec_t started_ut = now_monotonic_usec(); + usec_t last_ut = started_ut; + const char *prev_msg = NULL; + + int i; + int config_loaded = 0; + int dont_fork = 0; + bool close_open_fds = true; + size_t default_stacksize; + char *user = NULL; + + static_threads = static_threads_get(); + + netdata_ready = false; + // set the name for logging + program_name = "netdata"; + + if (argc > 1 && strcmp(argv[1], SPAWN_SERVER_COMMAND_LINE_ARGUMENT) == 0) { + // don't run netdata, this is the spawn server + i_am_the_spawn_server = true; + spawn_server(); + exit(0); + } + + // parse options + { + int num_opts = sizeof(option_definitions) / sizeof(struct option_def); + char optstring[(num_opts * 2) + 1]; + + int string_i = 0; + for( i = 0; i < num_opts; i++ ) { + optstring[string_i] = option_definitions[i].val; + string_i++; + if(option_definitions[i].arg_name) { + optstring[string_i] = ':'; + string_i++; + } + } + // terminate optstring + optstring[string_i] ='\0'; + optstring[(num_opts *2)] ='\0'; + + int opt; + while( (opt = getopt(argc, argv, optstring)) != -1 ) { + switch(opt) { + case 'c': + if(!load_netdata_conf(optarg, 1, &user)) { + netdata_log_error("Cannot load configuration file %s.", optarg); + return 1; + } + else { + netdata_log_debug(D_OPTIONS, "Configuration loaded from %s.", optarg); + load_cloud_conf(1); + config_loaded = 1; + } + break; + case 'D': + dont_fork = 1; + break; + case 'd': + dont_fork = 0; + break; + case 'h': + return help(0); + case 'i': + config_set(CONFIG_SECTION_WEB, "bind to", optarg); + break; + case 'P': + strncpy(pidfile, optarg, FILENAME_MAX); + pidfile[FILENAME_MAX] = '\0'; + break; + case 'p': + config_set(CONFIG_SECTION_GLOBAL, "default port", optarg); + break; + case 's': + config_set(CONFIG_SECTION_GLOBAL, "host access prefix", optarg); + break; + case 't': + config_set(CONFIG_SECTION_GLOBAL, "update every", optarg); + break; + case 'u': + config_set(CONFIG_SECTION_GLOBAL, "run as user", optarg); + break; + case 'v': + case 'V': + printf("%s %s\n", program_name, NETDATA_VERSION); + return 0; + case 'W': + { + char* stacksize_string = "stacksize="; + char* debug_flags_string = "debug_flags="; + char* claim_string = "claim"; +#ifdef ENABLE_DBENGINE + char* createdataset_string = "createdataset="; + char* stresstest_string = "stresstest="; + + if(strcmp(optarg, "pgd-tests") == 0) { + return pgd_test(argc, argv); + } +#endif + + if(strcmp(optarg, "sqlite-meta-recover") == 0) { + sql_init_meta_database(DB_CHECK_RECOVER, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-compact") == 0) { + sql_init_meta_database(DB_CHECK_RECLAIM_SPACE, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-analyze") == 0) { + sql_init_meta_database(DB_CHECK_ANALYZE, 0); + return 0; + } + + if(strcmp(optarg, "sqlite-alert-cleanup") == 0) { + sql_alert_cleanup(true); + return 0; + } + + if(strcmp(optarg, "unittest") == 0) { + unittest_running = true; + + // set defaults for dbegnine unittest + config_set(CONFIG_SECTION_DB, "dbengine page type", "gorilla"); +#ifdef ENABLE_DBENGINE + default_rrdeng_disk_quota_mb = default_multidb_disk_quota_mb = 256; +#endif + + if (sqlite_library_init()) + return 1; + + if (pluginsd_parser_unittest()) return 1; + if (unit_test_static_threads()) return 1; + if (unit_test_buffer()) return 1; + if (unit_test_str2ld()) return 1; + if (buffer_unittest()) return 1; + + // No call to load the config file on this code-path + if (unittest_prepare_rrd(&user)) return 1; + if (run_all_mockup_tests()) return 1; + if (unit_test_storage()) return 1; +#ifdef ENABLE_DBENGINE + if (test_dbengine()) return 1; +#endif + if (test_sqlite()) return 1; + if (string_unittest(10000)) return 1; + if (dictionary_unittest(10000)) return 1; + if (aral_unittest(10000)) return 1; + if (rrdlabels_unittest()) return 1; + if (ctx_unittest()) return 1; + if (uuid_unittest()) return 1; + if (dyncfg_unittest()) return 1; + sqlite_library_shutdown(); + fprintf(stderr, "\n\nALL TESTS PASSED\n\n"); + return 0; + } + else if(strcmp(optarg, "escapetest") == 0) { + return command_argument_sanitization_tests(); + } + else if(strcmp(optarg, "dicttest") == 0) { + unittest_running = true; + return dictionary_unittest(10000); + } + else if(strcmp(optarg, "araltest") == 0) { + unittest_running = true; + return aral_unittest(10000); + } + else if(strcmp(optarg, "stringtest") == 0) { + unittest_running = true; + return string_unittest(10000); + } + else if(strcmp(optarg, "rrdlabelstest") == 0) { + unittest_running = true; + return rrdlabels_unittest(); + } + else if(strcmp(optarg, "buffertest") == 0) { + unittest_running = true; + return buffer_unittest(); + } + else if(strcmp(optarg, "uuidtest") == 0) { + unittest_running = true; + return uuid_unittest(); + } +#ifdef OS_WINDOWS + else if(strcmp(optarg, "perflibdump") == 0) { + return windows_perflib_dump(optind + 1 > argc ? NULL : argv[optind]); + } +#endif +#ifdef ENABLE_DBENGINE + else if(strcmp(optarg, "mctest") == 0) { + unittest_running = true; + return mc_unittest(); + } + else if(strcmp(optarg, "ctxtest") == 0) { + unittest_running = true; + return ctx_unittest(); + } + else if(strcmp(optarg, "metatest") == 0) { + unittest_running = true; + return metadata_unittest(); + } + else if(strcmp(optarg, "pgctest") == 0) { + unittest_running = true; + return pgc_unittest(); + } + else if(strcmp(optarg, "mrgtest") == 0) { + unittest_running = true; + return mrg_unittest(); + } + else if(strcmp(optarg, "julytest") == 0) { + unittest_running = true; + return julytest(); + } + else if(strcmp(optarg, "parsertest") == 0) { + unittest_running = true; + return pluginsd_parser_unittest(); + } + else if(strcmp(optarg, "rrdpush_compressions_test") == 0) { + unittest_running = true; + return unittest_rrdpush_compressions(); + } + else if(strcmp(optarg, "progresstest") == 0) { + unittest_running = true; + return progress_unittest(); + } + else if(strcmp(optarg, "dyncfgtest") == 0) { + unittest_running = true; + if(unittest_prepare_rrd(&user)) + return 1; + return dyncfg_unittest(); + } + else if(strncmp(optarg, createdataset_string, strlen(createdataset_string)) == 0) { + optarg += strlen(createdataset_string); + unsigned history_seconds = strtoul(optarg, NULL, 0); + post_conf_load(&user); + get_netdata_configured_variables(); + default_rrd_update_every = 1; + registry_init(); + if(rrd_init("dbengine-dataset", NULL, true)) { + fprintf(stderr, "rrd_init failed for unittest\n"); + return 1; + } + generate_dbengine_dataset(history_seconds); + return 0; + } + else if(strncmp(optarg, stresstest_string, strlen(stresstest_string)) == 0) { + char *endptr; + unsigned test_duration_sec = 0, dset_charts = 0, query_threads = 0, ramp_up_seconds = 0, + page_cache_mb = 0, disk_space_mb = 0, workers = 16; + + optarg += strlen(stresstest_string); + test_duration_sec = (unsigned)strtoul(optarg, &endptr, 0); + if (',' == *endptr) + dset_charts = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + query_threads = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + ramp_up_seconds = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + page_cache_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + disk_space_mb = (unsigned)strtoul(endptr + 1, &endptr, 0); + if (',' == *endptr) + workers = (unsigned)strtoul(endptr + 1, &endptr, 0); + + if (workers > 1024) + workers = 1024; + + char workers_str[16]; + snprintf(workers_str, 15, "%u", workers); + setenv("UV_THREADPOOL_SIZE", workers_str, 1); + dbengine_stress_test(test_duration_sec, dset_charts, query_threads, ramp_up_seconds, + page_cache_mb, disk_space_mb); + return 0; + } +#endif + else if(strcmp(optarg, "simple-pattern") == 0) { + if(optind + 2 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W simple-pattern 'pattern' 'string'\n\n" + " Checks if 'pattern' matches the given 'string'.\n" + " - 'pattern' can be one or more space separated words.\n" + " - each 'word' can contain one or more asterisks.\n" + " - words starting with '!' give negative matches.\n" + " - words are processed left to right\n" + "\n" + "Examples:\n" + "\n" + " > match all veth interfaces, except veth0:\n" + "\n" + " -W simple-pattern '!veth0 veth*' 'veth12'\n" + "\n" + "\n" + " > match all *.ext files directly in /path/:\n" + " (this will not match *.ext files in a subdir of /path/)\n" + "\n" + " -W simple-pattern '!/path/*/*.ext /path/*.ext' '/path/test.ext'\n" + "\n" + ); + return 1; + } + + const char *haystack = argv[optind]; + const char *needle = argv[optind + 1]; + size_t len = strlen(needle) + 1; + char wildcarded[len]; + + SIMPLE_PATTERN *p = simple_pattern_create(haystack, NULL, SIMPLE_PATTERN_EXACT, true); + SIMPLE_PATTERN_RESULT ret = simple_pattern_matches_extract(p, needle, wildcarded, len); + simple_pattern_free(p); + + if(ret == SP_MATCHED_POSITIVE) { + fprintf(stdout, "RESULT: POSITIVE MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 0; + } + else if(ret == SP_MATCHED_NEGATIVE) { + fprintf(stdout, "RESULT: NEGATIVE MATCHED - pattern '%s' matches '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 0; + } + else { + fprintf(stdout, "RESULT: NOT MATCHED - pattern '%s' does not match '%s', wildcarded '%s'\n", haystack, needle, wildcarded); + return 1; + } + } + else if(strncmp(optarg, stacksize_string, strlen(stacksize_string)) == 0) { + optarg += strlen(stacksize_string); + config_set(CONFIG_SECTION_GLOBAL, "pthread stack size", optarg); + } + else if(strncmp(optarg, debug_flags_string, strlen(debug_flags_string)) == 0) { + optarg += strlen(debug_flags_string); + config_set(CONFIG_SECTION_LOGS, "debug flags", optarg); + debug_flags = strtoull(optarg, NULL, 0); + } + else if(strcmp(optarg, "set") == 0) { + if(optind + 3 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W set 'section' 'key' 'value'\n\n" + " Overwrites settings of netdata.conf.\n" + "\n" + " These options interact with: -c netdata.conf\n" + " If -c netdata.conf is given on the command line,\n" + " before -W set... the user may overwrite command\n" + " line parameters at netdata.conf\n" + " If -c netdata.conf is given after (or missing)\n" + " -W set... the user cannot overwrite the command line\n" + " parameters." + "\n" + ); + return 1; + } + const char *section = argv[optind]; + const char *key = argv[optind + 1]; + const char *value = argv[optind + 2]; + optind += 3; + + // set this one as the default + // only if it is not already set in the config file + // so the caller can use -c netdata.conf before or + // after this parameter to prevent or allow overwriting + // variables at netdata.conf + config_set_default(section, key, value); + + // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value); + } + else if(strcmp(optarg, "set2") == 0) { + if(optind + 4 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W set 'conf_file' 'section' 'key' 'value'\n\n" + " Overwrites settings of netdata.conf or cloud.conf\n" + "\n" + " These options interact with: -c netdata.conf\n" + " If -c netdata.conf is given on the command line,\n" + " before -W set... the user may overwrite command\n" + " line parameters at netdata.conf\n" + " If -c netdata.conf is given after (or missing)\n" + " -W set... the user cannot overwrite the command line\n" + " parameters." + " conf_file can be \"cloud\" or \"netdata\".\n" + "\n" + ); + return 1; + } + const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + const char *section = argv[optind + 1]; + const char *key = argv[optind + 2]; + const char *value = argv[optind + 3]; + optind += 4; + + // set this one as the default + // only if it is not already set in the config file + // so the caller can use -c netdata.conf before or + // after this parameter to prevent or allow overwriting + // variables at netdata.conf + appconfig_set_default(tmp_config, section, key, value); + + // fprintf(stderr, "SET section '%s', key '%s', value '%s'\n", section, key, value); + } + else if(strcmp(optarg, "get") == 0) { + if(optind + 3 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W get 'section' 'key' 'value'\n\n" + " Prints settings of netdata.conf.\n" + "\n" + " These options interact with: -c netdata.conf\n" + " -c netdata.conf has to be given before -W get.\n" + "\n" + ); + return 1; + } + + if(!config_loaded) { + fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n"); + load_netdata_conf(NULL, 0, &user); + } + + get_netdata_configured_variables(); + + const char *section = argv[optind]; + const char *key = argv[optind + 1]; + const char *def = argv[optind + 2]; + const char *value = config_get(section, key, def); + printf("%s\n", value); + return 0; + } + else if(strcmp(optarg, "get2") == 0) { + if(optind + 4 > argc) { + fprintf(stderr, "%s", "\nUSAGE: -W get2 'conf_file' 'section' 'key' 'value'\n\n" + " Prints settings of netdata.conf or cloud.conf\n" + "\n" + " These options interact with: -c netdata.conf\n" + " -c netdata.conf has to be given before -W get2.\n" + " conf_file can be \"cloud\" or \"netdata\".\n" + "\n" + ); + return 1; + } + + if(!config_loaded) { + fprintf(stderr, "warning: no configuration file has been loaded. Use -c CONFIG_FILE, before -W get. Using default config.\n"); + load_netdata_conf(NULL, 0, &user); + load_cloud_conf(1); + } + + get_netdata_configured_variables(); + + const char *conf_file = argv[optind]; /* "cloud" is cloud.conf, otherwise netdata.conf */ + struct config *tmp_config = strcmp(conf_file, "cloud") ? &netdata_config : &cloud_config; + const char *section = argv[optind + 1]; + const char *key = argv[optind + 2]; + const char *def = argv[optind + 3]; + const char *value = appconfig_get(tmp_config, section, key, def); + printf("%s\n", value); + return 0; + } + else if(strncmp(optarg, claim_string, strlen(claim_string)) == 0) { + /* will trigger a claiming attempt when the agent is initialized */ + claiming_pending_arguments = optarg + strlen(claim_string); + } + else if(strcmp(optarg, "buildinfo") == 0) { + print_build_info(); + return 0; + } + else if(strcmp(optarg, "buildinfojson") == 0) { + print_build_info_json(); + return 0; + } + else if(strcmp(optarg, "keepopenfds") == 0) { + // Internal dev option to skip closing inherited + // open FDs. Useful, when we want to run the agent + // under profiling tools that open/maintain their + // own FDs. + close_open_fds = false; + } else { + fprintf(stderr, "Unknown -W parameter '%s'\n", optarg); + return help(1); + } + } + break; + + default: /* ? */ + fprintf(stderr, "Unknown parameter '%c'\n", opt); + return help(1); + } + } + } + + if (close_open_fds == true) { + // close all open file descriptors, except the standard ones + // the caller may have left open files (lxc-attach has this issue) + for_each_open_fd(OPEN_FD_ACTION_CLOSE, OPEN_FD_EXCLUDE_STDIN | OPEN_FD_EXCLUDE_STDOUT | OPEN_FD_EXCLUDE_STDERR); + } + + if(!config_loaded) { + load_netdata_conf(NULL, 0, &user); + load_cloud_conf(0); + } + + // ------------------------------------------------------------------------ + // initialize netdata + { + char *pmax = config_get(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for plugins", "1"); + if(pmax && *pmax) + setenv("MALLOC_ARENA_MAX", pmax, 1); + +#if defined(HAVE_C_MALLOPT) + i = (int)config_get_number(CONFIG_SECTION_GLOBAL, "glibc malloc arena max for netdata", 1); + if(i > 0) + mallopt(M_ARENA_MAX, 1); + + +#ifdef NETDATA_INTERNAL_CHECKS + mallopt(M_PERTURB, 0x5A); + // mallopt(M_MXFAST, 0); +#endif +#endif + + // set libuv worker threads + libuv_worker_threads = (int)get_netdata_cpus() * 6; + + if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) + libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; + + if(libuv_worker_threads > MAX_LIBUV_WORKER_THREADS) + libuv_worker_threads = MAX_LIBUV_WORKER_THREADS; + + + libuv_worker_threads = config_get_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads); + if(libuv_worker_threads < MIN_LIBUV_WORKER_THREADS) { + libuv_worker_threads = MIN_LIBUV_WORKER_THREADS; + config_set_number(CONFIG_SECTION_GLOBAL, "libuv worker threads", libuv_worker_threads); + } + + { + char buf[20 + 1]; + snprintfz(buf, sizeof(buf) - 1, "%d", libuv_worker_threads); + setenv("UV_THREADPOOL_SIZE", buf, 1); + } + + // prepare configuration environment variables for the plugins + get_netdata_configured_variables(); + set_global_environment(); + + // work while we are cd into config_dir + // to allow the plugins refer to their config + // files using relative filenames + if(chdir(netdata_configured_user_config_dir) == -1) + fatal("Cannot cd to '%s'", netdata_configured_user_config_dir); + + // Get execution path before switching user to avoid permission issues + get_netdata_execution_path(); + } + + { + // -------------------------------------------------------------------- + // get the debugging flags from the configuration file + + char *flags = config_get(CONFIG_SECTION_LOGS, "debug flags", "0x0000000000000000"); + setenv("NETDATA_DEBUG_FLAGS", flags, 1); + + debug_flags = strtoull(flags, NULL, 0); + netdata_log_debug(D_OPTIONS, "Debug flags set to '0x%" PRIX64 "'.", debug_flags); + + if(debug_flags != 0) { + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if(setrlimit(RLIMIT_CORE, &rl) != 0) + netdata_log_error("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); + +#ifdef HAVE_SYS_PRCTL_H + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif + } + + + // -------------------------------------------------------------------- + // get log filenames and settings + + log_init(); + nd_log_limits_unlimited(); + + // initialize the log files + nd_log_initialize(); + netdata_log_info("Netdata agent version '%s' is starting", NETDATA_VERSION); + + ieee754_doubles = is_system_ieee754_double(); + if(!ieee754_doubles) + globally_disabled_capabilities |= STREAM_CAP_IEEE754; + + aral_judy_init(); + + get_system_timezone(); + + bearer_tokens_init(); + + replication_initialize(); + + rrd_functions_inflight_init(); + + // -------------------------------------------------------------------- + // get the certificate and start security + +#ifdef ENABLE_HTTPS + security_init(); +#endif + + // -------------------------------------------------------------------- + // This is the safest place to start the SILENCERS structure + + health_set_silencers_filename(); + health_initialize_global_silencers(); + +// // -------------------------------------------------------------------- +// // Initialize ML configuration +// +// delta_startup_time("initialize ML"); +// ml_init(); + + // -------------------------------------------------------------------- + // setup process signals + + // block signals while initializing threads. + // this causes the threads to block signals. + + delta_startup_time("initialize signals"); + signals_block(); + signals_init(); // setup the signals we want to use + + // -------------------------------------------------------------------- + // check which threads are enabled and initialize them + + delta_startup_time("initialize static threads"); + + // setup threads configs + default_stacksize = netdata_threads_init(); + // musl default thread stack size is 128k, let's set it to a higher value to avoid random crashes + if (default_stacksize < 1 * 1024 * 1024) + default_stacksize = 1 * 1024 * 1024; + +#ifdef NETDATA_INTERNAL_CHECKS + config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring", true); + config_set_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", true); +#endif + + if(config_get_boolean(CONFIG_SECTION_PLUGINS, "netdata monitoring extended", false)) + // this has to run before starting any other threads that use workers + workers_utilization_enable(); + + for (i = 0; static_threads[i].name != NULL ; i++) { + struct netdata_static_thread *st = &static_threads[i]; + + if(st->config_name) + st->enabled = config_get_boolean(st->config_section, st->config_name, st->enabled); + + if(st->enabled && st->init_routine) + st->init_routine(); + + if(st->env_name) + setenv(st->env_name, st->enabled?"YES":"NO", 1); + + if(st->global_variable) + *st->global_variable = (st->enabled) ? true : false; + } + + // -------------------------------------------------------------------- + // create the listening sockets + + delta_startup_time("initialize web server"); + + web_client_api_v1_init(); + web_server_threading_selection(); + + if(web_server_mode != WEB_SERVER_MODE_NONE) { + if (!api_listen_sockets_setup()) { + netdata_log_error("Cannot setup listen port(s). Is Netdata already running?"); + exit(1); + } + } + if (sqlite_library_init()) + fatal("Failed to initialize sqlite library"); + + // -------------------------------------------------------------------- + // Initialize ML configuration + + delta_startup_time("initialize ML"); + ml_init(); + +#ifdef ENABLE_H2O + delta_startup_time("initialize h2o server"); + for (int t = 0; static_threads[t].name; t++) { + if (static_threads[t].start_routine == h2o_main) + static_threads[t].enabled = httpd_is_enabled(); + } +#endif + } + + delta_startup_time("set resource limits"); + +#ifdef NETDATA_INTERNAL_CHECKS + if(debug_flags != 0) { + struct rlimit rl = { RLIM_INFINITY, RLIM_INFINITY }; + if(setrlimit(RLIMIT_CORE, &rl) != 0) + netdata_log_error("Cannot request unlimited core dumps for debugging... Proceeding anyway..."); +#ifdef HAVE_SYS_PRCTL_H + prctl(PR_SET_DUMPABLE, 1, 0, 0, 0); +#endif + } +#endif /* NETDATA_INTERNAL_CHECKS */ + + set_nofile_limit(&rlimit_nofile); + + delta_startup_time("become daemon"); + +#if defined(OS_LINUX) || defined(OS_MACOS) || defined(OS_FREEBSD) + // fork, switch user, create pid file, set process priority + if(become_daemon(dont_fork, user) == -1) + fatal("Cannot daemonize myself."); +#else + (void)dont_fork; +#endif + + watcher_thread_start(); + + // init sentry +#ifdef ENABLE_SENTRY + nd_sentry_init(); +#endif + + // The "HOME" env var points to the root's home dir because Netdata starts as root. Can't use "HOME". + struct passwd *pw = getpwuid(getuid()); + if (config_exists(CONFIG_SECTION_DIRECTORIES, "home") || !pw || !pw->pw_dir) { + netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", netdata_configured_home_dir); + } else { + netdata_configured_home_dir = config_get(CONFIG_SECTION_DIRECTORIES, "home", pw->pw_dir); + } + + setenv("HOME", netdata_configured_home_dir, 1); + + dyncfg_init(true); + + netdata_log_info("netdata started on pid %d.", getpid()); + + delta_startup_time("initialize threads after fork"); + + netdata_threads_init_after_fork((size_t)config_get_number(CONFIG_SECTION_GLOBAL, "pthread stack size", (long)default_stacksize)); + + // initialize internal registry + delta_startup_time("initialize registry"); + registry_init(); + + // fork the spawn server + delta_startup_time("fork the spawn server"); + spawn_init(); + + /* + * Libuv uv_spawn() uses SIGCHLD internally: + * https://github.com/libuv/libuv/blob/cc51217a317e96510fbb284721d5e6bc2af31e33/src/unix/process.c#L485 + * and inadvertently replaces the netdata signal handler which was setup during initialization. + * Thusly, we must explicitly restore the signal handler for SIGCHLD. + * Warning: extreme care is needed when mixing and matching POSIX and libuv. + */ + signals_restore_SIGCHLD(); + + // ------------------------------------------------------------------------ + // initialize rrd, registry, health, rrdpush, etc. + + delta_startup_time("collecting system info"); + + netdata_anonymous_statistics_enabled=-1; + struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info)); + __atomic_sub_fetch(&netdata_buffers_statistics.rrdhost_allocations_size, sizeof(struct rrdhost_system_info), __ATOMIC_RELAXED); + get_system_info(system_info); + + const char *guid = registry_get_this_machine_guid(); +#ifdef ENABLE_SENTRY + nd_sentry_set_user(guid); +#else + UNUSED(guid); +#endif + + system_info->hops = 0; + get_install_type(&system_info->install_type, &system_info->prebuilt_arch, &system_info->prebuilt_dist); + + delta_startup_time("initialize RRD structures"); + + if(rrd_init(netdata_configured_hostname, system_info, false)) { + set_late_global_environment(system_info); + fatal("Cannot initialize localhost instance with name '%s'.", netdata_configured_hostname); + } + + delta_startup_time("check for incomplete shutdown"); + + char agent_crash_file[FILENAME_MAX + 1]; + char agent_incomplete_shutdown_file[FILENAME_MAX + 1]; + snprintfz(agent_incomplete_shutdown_file, FILENAME_MAX, "%s/.agent_incomplete_shutdown", netdata_configured_varlib_dir); + int incomplete_shutdown_detected = (unlink(agent_incomplete_shutdown_file) == 0); + snprintfz(agent_crash_file, FILENAME_MAX, "%s/.agent_crash", netdata_configured_varlib_dir); + int crash_detected = (unlink(agent_crash_file) == 0); + int fd = open(agent_crash_file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 444); + if (fd >= 0) + close(fd); + + + // ------------------------------------------------------------------------ + // Claim netdata agent to a cloud endpoint + + delta_startup_time("collect claiming info"); + + if (claiming_pending_arguments) + claim_agent(claiming_pending_arguments, false, NULL); + load_claiming_state(); + + // ------------------------------------------------------------------------ + // enable log flood protection + + nd_log_limits_reset(); + + // Load host labels + delta_startup_time("collect host labels"); + reload_host_labels(); + + // ------------------------------------------------------------------------ + // spawn the threads + + delta_startup_time("start the static threads"); + + web_server_config_options(); + + set_late_global_environment(system_info); + for (i = 0; static_threads[i].name != NULL ; i++) { + struct netdata_static_thread *st = &static_threads[i]; + + if(st->enabled) { + netdata_log_debug(D_SYSTEM, "Starting thread %s.", st->name); + st->thread = nd_thread_create(st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st); + } + else + netdata_log_debug(D_SYSTEM, "Not starting thread %s.", st->name); + } + ml_start_threads(); + + // ------------------------------------------------------------------------ + // Initialize netdata agent command serving from cli and signals + + delta_startup_time("initialize commands API"); + + commands_init(); + + delta_startup_time("ready"); + + usec_t ready_ut = now_monotonic_usec(); + netdata_log_info("NETDATA STARTUP: completed in %llu ms. Enjoy real-time performance monitoring!", (ready_ut - started_ut) / USEC_PER_MS); + netdata_ready = true; + + analytics_statistic_t start_statistic = { "START", "-", "-" }; + analytics_statistic_send(&start_statistic); + if (crash_detected) { + analytics_statistic_t crash_statistic = { "CRASH", "-", "-" }; + analytics_statistic_send(&crash_statistic); + } + if (incomplete_shutdown_detected) { + analytics_statistic_t incomplete_shutdown_statistic = { "INCOMPLETE_SHUTDOWN", "-", "-" }; + analytics_statistic_send(&incomplete_shutdown_statistic); + } + + //check if ANALYTICS needs to start + if (netdata_anonymous_statistics_enabled == 1) { + for (i = 0; static_threads[i].name != NULL; i++) { + if (!strncmp(static_threads[i].name, "ANALYTICS", 9)) { + struct netdata_static_thread *st = &static_threads[i]; + st->enabled = 1; + netdata_log_debug(D_SYSTEM, "Starting thread %s.", st->name); + st->thread = nd_thread_create(st->name, NETDATA_THREAD_OPTION_DEFAULT, st->start_routine, st); + } + } + } + + // ------------------------------------------------------------------------ + // Report ACLK build failure +#ifndef ENABLE_ACLK + netdata_log_error("This agent doesn't have ACLK."); + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/.aclk_report_sent", netdata_configured_varlib_dir); + if (netdata_anonymous_statistics_enabled > 0 && access(filename, F_OK)) { // -1 -> not initialized + analytics_statistic_t statistic = { "ACLK_DISABLED", "-", "-" }; + analytics_statistic_send(&statistic); + + int fd = open(filename, O_WRONLY | O_CREAT | O_TRUNC, 444); + if (fd == -1) + netdata_log_error("Cannot create file '%s'. Please fix this.", filename); + else + close(fd); + } +#endif + + // ------------------------------------------------------------------------ + // initialize WebRTC + + webrtc_initialize(); + + // ------------------------------------------------------------------------ + // unblock signals + + signals_unblock(); + + // ------------------------------------------------------------------------ + // Handle signals + + signals_handle(); + + // should never reach this point + // but we need it for rpmlint #2752 + return 1; +} diff --git a/src/daemon/main.h b/src/daemon/main.h new file mode 100644 index 000000000..faf7d5b69 --- /dev/null +++ b/src/daemon/main.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_MAIN_H +#define NETDATA_MAIN_H 1 + +#include "common.h" + +extern struct config netdata_config; + +void cancel_main_threads(void); +int killpid(pid_t pid); + +typedef enum { + ABILITY_DATA_QUERIES = (1 << 0), + ABILITY_WEB_REQUESTS = (1 << 1), + ABILITY_STREAMING_CONNECTIONS = (1 << 2), + SERVICE_MAINTENANCE = (1 << 3), + SERVICE_COLLECTORS = (1 << 4), + SERVICE_REPLICATION = (1 << 5), + SERVICE_WEB_SERVER = (1 << 6), + SERVICE_ACLK = (1 << 7), + SERVICE_HEALTH = (1 << 8), + SERVICE_STREAMING = (1 << 9), + SERVICE_CONTEXT = (1 << 10), + SERVICE_ANALYTICS = (1 << 11), + SERVICE_EXPORTERS = (1 << 12), + SERVICE_ACLKSYNC = (1 << 13), + SERVICE_HTTPD = (1 << 14) +} SERVICE_TYPE; + +typedef enum { + SERVICE_THREAD_TYPE_NETDATA, + SERVICE_THREAD_TYPE_LIBUV, + SERVICE_THREAD_TYPE_EVENT_LOOP, +} SERVICE_THREAD_TYPE; + +typedef void (*force_quit_t)(void *data); +typedef void (*request_quit_t)(void *data); + +void service_exits(void); +bool service_running(SERVICE_TYPE service); +struct service_thread *service_register(SERVICE_THREAD_TYPE thread_type, request_quit_t request_quit_callback, force_quit_t force_quit_callback, void *data, bool update __maybe_unused); + +#endif /* NETDATA_MAIN_H */ diff --git a/daemon/metrics.csv b/src/daemon/metrics.csv index 4aa71a364..4aa71a364 100644 --- a/daemon/metrics.csv +++ b/src/daemon/metrics.csv diff --git a/daemon/pipename.c b/src/daemon/pipename.c index 70b6a25b4..70b6a25b4 100644 --- a/daemon/pipename.c +++ b/src/daemon/pipename.c diff --git a/daemon/pipename.h b/src/daemon/pipename.h index 6ca6e8d08..6ca6e8d08 100644 --- a/daemon/pipename.h +++ b/src/daemon/pipename.h diff --git a/src/daemon/sentry-native/sentry-native.c b/src/daemon/sentry-native/sentry-native.c new file mode 100644 index 000000000..9e6930e55 --- /dev/null +++ b/src/daemon/sentry-native/sentry-native.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sentry-native.h" +#include "daemon/common.h" + +#include "sentry.h" + +static bool sentry_telemetry_disabled(void) +{ + char path[FILENAME_MAX + 1]; + sprintf(path, "%s/%s", netdata_configured_user_config_dir, ".opt-out-from-anonymous-statistics"); + + struct stat buffer; + bool opt_out_file_exists = (stat(path, &buffer) == 0); + + if (opt_out_file_exists) + return true; + + return getenv("DISABLE_TELEMETRY") != NULL; +} + +void nd_sentry_init(void) +{ + if (sentry_telemetry_disabled()) + return; + + // path where sentry should save stuff + char path[FILENAME_MAX]; + snprintfz(path, FILENAME_MAX - 1, "%s/%s", netdata_configured_cache_dir, ".sentry-native"); + + sentry_options_t *options = sentry_options_new(); + sentry_options_set_dsn(options, NETDATA_SENTRY_DSN); + sentry_options_set_database_path(options, path); + sentry_options_set_environment(options, NETDATA_SENTRY_ENVIRONMENT); + + char release[64]; + snprintfz(release, 64 - 1, "%s.%s.%s", + NETDATA_VERSION_MINOR, NETDATA_VERSION_PATCH, NETDATA_VERSION_TWEAK); + sentry_options_set_release(options, release); + + sentry_options_set_dist(options, NETDATA_SENTRY_DIST); +#ifdef NETDATA_INTERNAL_CHECKS + sentry_options_set_debug(options, 1); +#endif + + sentry_init(options); +} + +void nd_sentry_fini(void) +{ + if (sentry_telemetry_disabled()) + return; + + sentry_close(); +} + +void nd_sentry_set_user(const char *guid) +{ + sentry_value_t user = sentry_value_new_object(); + sentry_value_set_by_key(user, "id", sentry_value_new_string(guid)); + sentry_set_user(user); +} diff --git a/src/daemon/sentry-native/sentry-native.h b/src/daemon/sentry-native/sentry-native.h new file mode 100644 index 000000000..81f909d9f --- /dev/null +++ b/src/daemon/sentry-native/sentry-native.h @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef ND_SENTRY_H +#define ND_SENTRY_H + +void nd_sentry_init(void); +void nd_sentry_fini(void); + +void nd_sentry_set_user(const char *guid); + +#endif /* ND_SENTRY_H */ diff --git a/daemon/service.c b/src/daemon/service.c index 8a65de66c..ead633445 100644 --- a/daemon/service.c +++ b/src/daemon/service.c @@ -16,11 +16,7 @@ #define WORKER_JOB_CLEANUP_ORPHAN_HOSTS 6 #define WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS 7 #define WORKER_JOB_FREE_HOST 9 -#define WORKER_JOB_SAVE_HOST_CHARTS 10 -#define WORKER_JOB_DELETE_HOST_CHARTS 11 #define WORKER_JOB_FREE_CHART 12 -#define WORKER_JOB_SAVE_CHART 13 -#define WORKER_JOB_DELETE_CHART 14 #define WORKER_JOB_FREE_DIMENSION 15 #define WORKER_JOB_PGC_MAIN_EVICT 16 #define WORKER_JOB_PGC_MAIN_FLUSH 17 @@ -38,31 +34,9 @@ static void svc_rrddim_obsolete_to_archive(RRDDIM *rd) { rrddim_flag_set(rd, RRDDIM_FLAG_ARCHIVED); rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); - const char *cache_filename = rrddim_cache_filename(rd); - if(cache_filename) { - netdata_log_info("Deleting dimension file '%s'.", cache_filename); - if (unlikely(unlink(cache_filename) == -1)) - netdata_log_error("Cannot delete dimension file '%s'", cache_filename); - } - if (rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { - rrddimvar_delete_all(rd); - /* only a collector can mark a chart as obsolete, so we must remove the reference */ - - size_t tiers_available = 0, tiers_said_no_retention = 0; - for(size_t tier = 0; tier < storage_tiers ;tier++) { - if(rd->tiers[tier].db_collection_handle) { - tiers_available++; - - if(storage_engine_store_finalize(rd->tiers[tier].db_collection_handle)) - tiers_said_no_retention++; - - rd->tiers[tier].db_collection_handle = NULL; - } - } - - if (tiers_available == tiers_said_no_retention && tiers_said_no_retention) { + if (!rrddim_finalize_collection_and_check_retention(rd)) { /* This metric has no data and no references */ metaqueue_delete_dimension_uuid(&rd->metric_uuid); } @@ -126,24 +100,11 @@ static void svc_rrdset_obsolete_to_free(RRDSET *st) { worker_is_busy(WORKER_JOB_FREE_CHART); - rrdcalc_unlink_all_rrdset_alerts(st); - - rrdsetvar_release_and_delete_all(st); + rrdcalc_unlink_and_delete_all_rrdset_alerts(st); // has to be run after all dimensions are archived - or use-after-free will occur rrdvar_delete_all(st->rrdvars); - if(st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) { - if(rrdhost_option_check(st->rrdhost, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS)) { - worker_is_busy(WORKER_JOB_DELETE_CHART); - rrdset_delete_files(st); - } - else { - worker_is_busy(WORKER_JOB_SAVE_CHART); - rrdset_save(st); - } - } - rrdset_free(st); } @@ -230,6 +191,10 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { RRDHOST *host; rrdhost_foreach_read(host) { + + if (!service_running(SERVICE_MAINTENANCE)) + break; + if(rrdhost_receiver_replicating_charts(host) || rrdhost_sender_replicating_charts(host)) continue; @@ -253,7 +218,7 @@ static void svc_rrd_cleanup_obsolete_charts_from_all_hosts() { netdata_mutex_unlock(&host->receiver_lock); } - rrd_unlock(); + rrd_rdunlock(); } static void svc_rrdhost_cleanup_orphan_hosts(RRDHOST *protected_host) { @@ -269,28 +234,11 @@ restart_after_removal: if(!rrdhost_should_be_removed(host, protected_host, now)) continue; - bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED); - if (!is_archived) { - netdata_log_info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", rrdhost_hostname(host), host->machine_guid); - - if (rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) - /* don't delete multi-host DB host files */ - && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) - ) { - worker_is_busy(WORKER_JOB_DELETE_HOST_CHARTS); - rrdhost_delete_charts(host); - } - else { - worker_is_busy(WORKER_JOB_SAVE_HOST_CHARTS); - rrdhost_save_charts(host); - } - } - bool force = false; - if (rrdhost_option_check(host, RRDHOST_OPTION_EPHEMERAL_HOST) && now - host->last_connected > rrdhost_free_ephemeral_time_s) force = true; + bool is_archived = rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED); if (!force && is_archived) continue; @@ -311,12 +259,14 @@ restart_after_removal: goto restart_after_removal; } - rrd_unlock(); + rrd_wrunlock(); } -static void service_main_cleanup(void *ptr) +static void service_main_cleanup(void *pptr) { - struct netdata_static_thread *static_thread = (struct netdata_static_thread *)ptr; + struct netdata_static_thread *static_thread = CLEANUP_FUNCTION_GET_PTR(pptr); + if(!static_thread) return; + static_thread->enabled = NETDATA_MAIN_THREAD_EXITING; netdata_log_debug(D_SYSTEM, "Cleaning up..."); @@ -339,32 +289,40 @@ void *service_main(void *ptr) worker_register_job_name(WORKER_JOB_CLEANUP_ORPHAN_HOSTS, "cleanup orphan hosts"); worker_register_job_name(WORKER_JOB_CLEANUP_OBSOLETE_CHARTS_ON_HOSTS, "cleanup obsolete charts on all hosts"); worker_register_job_name(WORKER_JOB_FREE_HOST, "free host"); - worker_register_job_name(WORKER_JOB_SAVE_HOST_CHARTS, "save host charts"); - worker_register_job_name(WORKER_JOB_DELETE_HOST_CHARTS, "delete host charts"); worker_register_job_name(WORKER_JOB_FREE_CHART, "free chart"); - worker_register_job_name(WORKER_JOB_SAVE_CHART, "save chart"); - worker_register_job_name(WORKER_JOB_DELETE_CHART, "delete chart"); worker_register_job_name(WORKER_JOB_FREE_DIMENSION, "free dimension"); worker_register_job_name(WORKER_JOB_PGC_MAIN_EVICT, "main cache evictions"); worker_register_job_name(WORKER_JOB_PGC_MAIN_FLUSH, "main cache flushes"); worker_register_job_name(WORKER_JOB_PGC_OPEN_EVICT, "open cache evictions"); worker_register_job_name(WORKER_JOB_PGC_OPEN_FLUSH, "open cache flushes"); - netdata_thread_cleanup_push(service_main_cleanup, ptr); + CLEANUP_FUNCTION_REGISTER(service_main_cleanup) cleanup_ptr = ptr; + heartbeat_t hb; heartbeat_init(&hb); usec_t step = USEC_PER_SEC * SERVICE_HEARTBEAT; + usec_t real_step = USEC_PER_SEC; netdata_log_debug(D_SYSTEM, "Service thread starts"); while (service_running(SERVICE_MAINTENANCE)) { worker_is_idle(); - heartbeat_next(&hb, step); + heartbeat_next(&hb, USEC_PER_SEC); + if (real_step < step) { + real_step += USEC_PER_SEC; + continue; + } + real_step = USEC_PER_SEC; + +#ifdef ENABLE_DBENGINE + dbengine_retention_statistics(); +#endif svc_rrd_cleanup_obsolete_charts_from_all_hosts(); - svc_rrdhost_cleanup_orphan_hosts(localhost); + + if (service_running(SERVICE_MAINTENANCE)) + svc_rrdhost_cleanup_orphan_hosts(localhost); } - netdata_thread_cleanup_pop(1); return NULL; } diff --git a/daemon/signals.c b/src/daemon/signals.c index 4f2254334..c014452b7 100644 --- a/daemon/signals.c +++ b/src/daemon/signals.c @@ -6,7 +6,6 @@ typedef enum signal_action { NETDATA_SIGNAL_END_OF_LIST, NETDATA_SIGNAL_IGNORE, NETDATA_SIGNAL_EXIT_CLEANLY, - NETDATA_SIGNAL_SAVE_DATABASE, NETDATA_SIGNAL_REOPEN_LOGS, NETDATA_SIGNAL_RELOAD_HEALTH, NETDATA_SIGNAL_FATAL, @@ -24,7 +23,6 @@ static struct { { SIGQUIT, "SIGQUIT", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, { SIGTERM, "SIGTERM", 0, NETDATA_SIGNAL_EXIT_CLEANLY }, { SIGHUP, "SIGHUP", 0, NETDATA_SIGNAL_REOPEN_LOGS }, - { SIGUSR1, "SIGUSR1", 0, NETDATA_SIGNAL_SAVE_DATABASE }, { SIGUSR2, "SIGUSR2", 0, NETDATA_SIGNAL_RELOAD_HEALTH }, { SIGBUS, "SIGBUS", 0, NETDATA_SIGNAL_FATAL }, { SIGCHLD, "SIGCHLD", 0, NETDATA_SIGNAL_CHILD }, @@ -209,13 +207,6 @@ void signals_handle(void) { execute_command(CMD_RELOAD_HEALTH, NULL, NULL); break; - case NETDATA_SIGNAL_SAVE_DATABASE: - nd_log_limits_unlimited(); - netdata_log_info("SIGNAL: Received %s. Saving databases...", name); - nd_log_limits_reset(); - execute_command(CMD_SAVE_DATABASE, NULL, NULL); - break; - case NETDATA_SIGNAL_REOPEN_LOGS: nd_log_limits_unlimited(); netdata_log_info("SIGNAL: Received %s. Reopening all log files...", name); @@ -227,7 +218,7 @@ void signals_handle(void) { nd_log_limits_unlimited(); netdata_log_info("SIGNAL: Received %s. Cleaning up to exit...", name); commands_exit(); - netdata_cleanup_and_exit(0); + netdata_cleanup_and_exit(0, NULL, NULL, NULL); exit(0); break; diff --git a/daemon/signals.h b/src/daemon/signals.h index 12b1ed198..12b1ed198 100644 --- a/daemon/signals.h +++ b/src/daemon/signals.h diff --git a/daemon/static_threads.c b/src/daemon/static_threads.c index b70373d74..4199e9306 100644 --- a/daemon/static_threads.c +++ b/src/daemon/static_threads.c @@ -12,23 +12,13 @@ void *health_main(void *ptr); void *pluginsd_main(void *ptr); void *service_main(void *ptr); void *statsd_main(void *ptr); -void *timex_main(void *ptr); void *profile_main(void *ptr); -void *replication_thread_main(void *ptr __maybe_unused); +void *replication_thread_main(void *ptr); extern bool global_statistics_enabled; const struct netdata_static_thread static_threads_common[] = { { - .name = "P[timex]", - .config_section = CONFIG_SECTION_PLUGINS, - .config_name = "timex", - .enabled = 1, - .thread = NULL, - .init_routine = NULL, - .start_routine = timex_main - }, - { .name = "P[idlejitter]", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "idlejitter", @@ -70,7 +60,7 @@ const struct netdata_static_thread static_threads_common[] = { .name = "STATS_WORKERS", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "netdata monitoring extended", - .env_name = "NETDATA_INTERNALS_MONITORING", + .env_name = "NETDATA_INTERNALS_EXTENDED_MONITORING", .global_variable = &global_statistics_enabled, .enabled = 0, // this is ignored - check main() for "netdata monitoring extended" .thread = NULL, @@ -81,7 +71,7 @@ const struct netdata_static_thread static_threads_common[] = { .name = "STATS_SQLITE3", .config_section = CONFIG_SECTION_PLUGINS, .config_name = "netdata monitoring extended", - .env_name = "NETDATA_INTERNALS_MONITORING", + .env_name = "NETDATA_INTERNALS_EXTENDED_MONITORING", .global_variable = &global_statistics_enabled, .enabled = 0, // this is ignored - check main() for "netdata monitoring extended" .thread = NULL, @@ -115,6 +105,8 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = statsd_main }, +#ifndef OS_WINDOWS + // this crashes the debugger under windows { .name = "EXPORTING", .config_section = NULL, @@ -124,6 +116,7 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = exporting_main }, +#endif { .name = "SNDR[localhost]", .config_section = NULL, @@ -195,15 +188,6 @@ const struct netdata_static_thread static_threads_common[] = { .init_routine = NULL, .start_routine = profile_main }, - { - .name = "DYNCFG", - .config_section = NULL, - .config_name = NULL, - .enabled = 1, - .thread = NULL, - .init_routine = NULL, - .start_routine = dyncfg_main - }, // terminator { diff --git a/src/daemon/static_threads.h b/src/daemon/static_threads.h new file mode 100644 index 000000000..9c9be7620 --- /dev/null +++ b/src/daemon/static_threads.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STATIC_THREADS_H +#define NETDATA_STATIC_THREADS_H + +#include "common.h" + +extern const struct netdata_static_thread static_threads_common[]; + +struct netdata_static_thread * +static_threads_concat(const struct netdata_static_thread *lhs, + const struct netdata_static_thread *rhs); + +struct netdata_static_thread *static_threads_get(); + +#endif /* NETDATA_STATIC_THREADS_H */ diff --git a/src/daemon/static_threads_freebsd.c b/src/daemon/static_threads_freebsd.c new file mode 100644 index 000000000..1bb671a68 --- /dev/null +++ b/src/daemon/static_threads_freebsd.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *freebsd_main(void *ptr); +void *timex_main(void *ptr); + +static const struct netdata_static_thread static_threads_freebsd[] = { + { + .name = "P[freebsd]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "freebsd", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = freebsd_main + }, + { + .name = "P[timex]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "timex", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = timex_main + }, + + {NULL, NULL, NULL, 0, NULL, NULL, NULL} +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_freebsd); +} diff --git a/src/daemon/static_threads_linux.c b/src/daemon/static_threads_linux.c new file mode 100644 index 000000000..1efd63755 --- /dev/null +++ b/src/daemon/static_threads_linux.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *cgroups_main(void *ptr); +void *proc_main(void *ptr); +void *diskspace_main(void *ptr); +void *tc_main(void *ptr); +void *timex_main(void *ptr); + +static const struct netdata_static_thread static_threads_linux[] = { + { + .name = "P[tc]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "tc", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = tc_main + }, + { + .name = "P[diskspace]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "diskspace", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = diskspace_main + }, + { + .name = "P[proc]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "proc", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = proc_main + }, + { + .name = "P[cgroups]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "cgroups", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = cgroups_main + }, + { + .name = "P[timex]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "timex", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = timex_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_linux); +} diff --git a/src/daemon/static_threads_macos.c b/src/daemon/static_threads_macos.c new file mode 100644 index 000000000..3b417c0b2 --- /dev/null +++ b/src/daemon/static_threads_macos.c @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *macos_main(void *ptr); +void *timex_main(void *ptr); + +static const struct netdata_static_thread static_threads_macos[] = { + { + .name = "P[timex]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "timex", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = timex_main + }, + { + .name = "P[macos]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "macos", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = macos_main, + .env_name = NULL, + .global_variable = NULL, + }, + + {NULL, NULL, NULL, 0, NULL, NULL, NULL, NULL, NULL} +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_macos); +} diff --git a/src/daemon/static_threads_windows.c b/src/daemon/static_threads_windows.c new file mode 100644 index 000000000..57c47be0c --- /dev/null +++ b/src/daemon/static_threads_windows.c @@ -0,0 +1,33 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +void *win_plugin_main(void *ptr); + +static const struct netdata_static_thread static_threads_windows[] = { + { + .name = "P[windows]", + .config_section = CONFIG_SECTION_PLUGINS, + .config_name = "windows", + .enabled = 1, + .thread = NULL, + .init_routine = NULL, + .start_routine = win_plugin_main + }, + + // terminator + { + .name = NULL, + .config_section = NULL, + .config_name = NULL, + .env_name = NULL, + .enabled = 0, + .thread = NULL, + .init_routine = NULL, + .start_routine = NULL + } +}; + +struct netdata_static_thread *static_threads_get() { + return static_threads_concat(static_threads_common, static_threads_windows); +} diff --git a/daemon/system-info.sh b/src/daemon/system-info.sh index aaca7fd4b..aaca7fd4b 100755 --- a/daemon/system-info.sh +++ b/src/daemon/system-info.sh diff --git a/src/daemon/unit_test.c b/src/daemon/unit_test.c new file mode 100644 index 000000000..e7a743603 --- /dev/null +++ b/src/daemon/unit_test.c @@ -0,0 +1,1695 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "common.h" + +static bool cmd_arg_sanitization_test(const char *expected, const char *src, char *dst, size_t dst_size) { + bool ok = sanitize_command_argument_string(dst, src, dst_size); + + if (!expected) + return ok == false; + + return strcmp(expected, dst) == 0; +} + +bool command_argument_sanitization_tests() { + char dst[1024]; + + for (size_t i = 0; i != 5; i++) { + const char *expected = i == 4 ? "'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 9; i++) { + const char *expected = i == 8 ? "'\\'''\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "''", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "'\\''a" : NULL; + if (cmd_arg_sanitization_test(expected, "'a", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 7; i++) { + const char *expected = i == 6 ? "a'\\''" : NULL; + if (cmd_arg_sanitization_test(expected, "a'", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n", expected, dst); + return 1; + } + } + + for (size_t i = 0; i != 22; i++) { + const char *expected = i == 21 ? "foo'\\''a'\\'''\\'''\\''b" : NULL; + if (cmd_arg_sanitization_test(expected, "--foo'a'''b", dst, i) == false) { + fprintf(stderr, "expected: >>>%s<<<, got: >>>%s<<<\n length: %zu\n", expected, dst, strlen(dst)); + return 1; + } + } + + return 0; +} + +static int check_number_printing(void) { + struct { + NETDATA_DOUBLE n; + const char *correct; + } values[] = { + { .n = 0, .correct = "0" }, + { .n = 0.0000001, .correct = "0.0000001" }, + { .n = 0.00000009, .correct = "0.0000001" }, + { .n = 0.000000001, .correct = "0" }, + { .n = 99.99999999999999999, .correct = "100" }, + { .n = -99.99999999999999999, .correct = "-100" }, + { .n = 123.4567899123456789, .correct = "123.4567899" }, + { .n = 123.4567890123456789, .correct = "123.456789" }, + { .n = 123.4567800123456789, .correct = "123.45678" }, + { .n = 123.4567000123456789, .correct = "123.4567" }, + { .n = 123.4560000123456789, .correct = "123.456" }, + { .n = 123.4500000123456789, .correct = "123.45" }, + { .n = 123.4000000123456789, .correct = "123.4" }, + { .n = 123.0000000123456789, .correct = "123" }, + { .n = 123.0000000923456789, .correct = "123.0000001" }, + { .n = 4294967295.123456789, .correct = "4294967295.123457" }, + { .n = 8294967295.123456789, .correct = "8294967295.123457" }, + { .n = 1.000000000000002e+19, .correct = "1.000000000000001998e+19" }, + { .n = 9.2233720368547676e+18, .correct = "9.223372036854767584e+18" }, + { .n = 18446744073709541376.0, .correct = "1.84467440737095424e+19" }, + { .n = 18446744073709551616.0, .correct = "1.844674407370955136e+19" }, + { .n = 12318446744073710600192.0, .correct = "1.231844674407371008e+22" }, + { .n = 1677721499999999885312.0, .correct = "1.677721499999999872e+21" }, + { .n = -1677721499999999885312.0, .correct = "-1.677721499999999872e+21" }, + { .n = -1.677721499999999885312e40, .correct = "-1.677721499999999872e+40" }, + { .n = -16777214999999997337621690403742592008192.0, .correct = "-1.677721499999999616e+40" }, + { .n = 9999.9999999, .correct = "9999.9999999" }, + { .n = -9999.9999999, .correct = "-9999.9999999" }, + { .n = 0, .correct = NULL }, + }; + + char netdata[512 + 2], system[512 + 2]; + int i, failed = 0; + for(i = 0; values[i].correct ; i++) { + print_netdata_double(netdata, values[i].n); + snprintfz(system, sizeof(system) - 1, "%0.12" NETDATA_DOUBLE_MODIFIER, (NETDATA_DOUBLE)values[i].n); + + int ok = 1; + if(strcmp(netdata, values[i].correct) != 0) { + ok = 0; + failed++; + } + + NETDATA_DOUBLE parsed_netdata = str2ndd(netdata, NULL); + NETDATA_DOUBLE parsed_system = strtondd(netdata, NULL); + + if(parsed_system != parsed_netdata) + failed++; + + fprintf(stderr, "[%d]. '%s' (system) printed as '%s' (netdata): PRINT %s, " + "PARSED %0.12" NETDATA_DOUBLE_MODIFIER " (system), %0.12" NETDATA_DOUBLE_MODIFIER " (netdata): %s\n", + i, + system, netdata, ok?"OK":"FAILED", + parsed_system, parsed_netdata, + parsed_netdata == parsed_system ? "OK" : "FAILED"); + } + + if(failed) return 1; + return 0; +} + +static int check_rrdcalc_comparisons(void) { + RRDCALC_STATUS a, b; + + // make sure calloc() sets the status to UNINITIALIZED + memset(&a, 0, sizeof(RRDCALC_STATUS)); + if(a != RRDCALC_STATUS_UNINITIALIZED) { + fprintf(stderr, "%s is not zero.\n", rrdcalc_status2string(RRDCALC_STATUS_UNINITIALIZED)); + return 1; + } + + a = RRDCALC_STATUS_REMOVED; + b = RRDCALC_STATUS_UNDEFINED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNDEFINED; + b = RRDCALC_STATUS_UNINITIALIZED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_UNINITIALIZED; + b = RRDCALC_STATUS_CLEAR; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_CLEAR; + b = RRDCALC_STATUS_RAISED; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_RAISED; + b = RRDCALC_STATUS_WARNING; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + a = RRDCALC_STATUS_WARNING; + b = RRDCALC_STATUS_CRITICAL; + if(!(a < b)) { + fprintf(stderr, "%s is not less than %s\n", rrdcalc_status2string(a), rrdcalc_status2string(b)); + return 1; + } + + fprintf(stderr, "RRDCALC_STATUSes are sortable.\n"); + + return 0; +} + +int check_storage_number(NETDATA_DOUBLE n, int debug) { + char buffer[100]; + uint32_t flags = SN_DEFAULT_FLAGS; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(!does_storage_number_exist(s)) { + fprintf(stderr, "Exists flags missing for number " NETDATA_DOUBLE_FORMAT "!\n", n); + return 5; + } + + NETDATA_DOUBLE ddiff = d - n; + NETDATA_DOUBLE dcdiff = ddiff * 100.0 / n; + + if(dcdiff < 0) dcdiff = -dcdiff; + + size_t len = (size_t)print_netdata_double(buffer, d); + NETDATA_DOUBLE p = str2ndd(buffer, NULL); + NETDATA_DOUBLE pdiff = n - p; + NETDATA_DOUBLE pcdiff = pdiff * 100.0 / n; + if(pcdiff < 0) pcdiff = -pcdiff; + + if(debug) { + fprintf(stderr, + NETDATA_DOUBLE_FORMAT + " original\n" NETDATA_DOUBLE_FORMAT " packed and unpacked, (stored as 0x%08X, diff " NETDATA_DOUBLE_FORMAT + ", " NETDATA_DOUBLE_FORMAT "%%)\n" + "%s printed after unpacked (%zu bytes)\n" NETDATA_DOUBLE_FORMAT + " re-parsed from printed (diff " NETDATA_DOUBLE_FORMAT ", " NETDATA_DOUBLE_FORMAT "%%)\n\n", + n, + d, s, ddiff, dcdiff, + buffer, len, + p, pdiff, pcdiff + ); + if(len != strlen(buffer)) fprintf(stderr, "ERROR: printed number %s is reported to have length %zu but it has %zu\n", buffer, len, strlen(buffer)); + + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: packing number " NETDATA_DOUBLE_FORMAT " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, dcdiff); + + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) + fprintf(stderr, "WARNING: re-parsing the packed, unpacked and printed number " NETDATA_DOUBLE_FORMAT + " has accuracy loss " NETDATA_DOUBLE_FORMAT " %%\n", n, pcdiff); + } + + if(len != strlen(buffer)) return 1; + if(dcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 3; + if(pcdiff > ACCURACY_LOSS_ACCEPTED_PERCENT) return 4; + return 0; +} + +NETDATA_DOUBLE storage_number_min(NETDATA_DOUBLE n) { + NETDATA_DOUBLE r = 1, last; + + do { + last = n; + n /= 2.0; + storage_number t = pack_storage_number(n, SN_DEFAULT_FLAGS); + r = unpack_storage_number(t); + } while(r != 0.0 && r != last); + + return last; +} + +void benchmark_storage_number(int loop, int multiplier) { + int i, j; + NETDATA_DOUBLE n, d; + storage_number s; + unsigned long long user, system, total, mine, their; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_positive_max = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW); + + char buffer[100]; + + struct rusage now, last; + + fprintf(stderr, "\n\nBenchmarking %d numbers, please wait...\n\n", loop); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE SIZE: %zu bytes\n", sizeof(NETDATA_DOUBLE)); + fprintf(stderr, "NETDATA FLOATING POINT SIZE: %zu bytes\n", sizeof(storage_number)); + + mine = (NETDATA_DOUBLE)sizeof(storage_number) * (NETDATA_DOUBLE)loop; + their = (NETDATA_DOUBLE)sizeof(NETDATA_DOUBLE) * (NETDATA_DOUBLE)loop; + + if(mine > their) { + fprintf(stderr, "\nNETDATA NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES MORE MEMORY. Sorry!\n", (NETDATA_DOUBLE)(mine / their)); + } + else { + fprintf(stderr, "\nNETDATA INTERNAL FLOATING POINT ARITHMETICS NEEDS %0.2" NETDATA_DOUBLE_MODIFIER " TIMES LESS MEMORY.\n", (NETDATA_DOUBLE)(their / mine)); + } + + fprintf(stderr, "\nNETDATA FLOATING POINT\n"); + fprintf(stderr, "MIN POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW)); + fprintf(stderr, "MAX POSITIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_POSITIVE_MAX_RAW)); + fprintf(stderr, "MIN NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MIN_RAW)); + fprintf(stderr, "MAX NEGATIVE VALUE " NETDATA_DOUBLE_FORMAT "\n", unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW)); + fprintf(stderr, "Maximum accuracy loss accepted: " NETDATA_DOUBLE_FORMAT "%%\n\n\n", (NETDATA_DOUBLE)ACCURACY_LOSS_ACCEPTED_PERCENT); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "INTERNAL LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + print_netdata_double(buffer, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + // ------------------------------------------------------------------------ + + fprintf(stderr, "SYSTEM LONG DOUBLE PRINTING: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + snprintfz(buffer, sizeof(buffer) - 1, NETDATA_DOUBLE_FORMAT, n); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + their = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > total) { + fprintf(stderr, "NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + + fprintf(stderr, "\nINTERNAL LONG DOUBLE PRINTING WITH PACK / UNPACK: "); + getrusage(RUSAGE_SELF, &last); + + // do the job + for(j = 1; j < 11 ;j++) { + n = storage_number_positive_min * j; + + for(i = 0; i < loop ;i++) { + n *= multiplier; + if(n > storage_number_positive_max) n = storage_number_positive_min; + + s = pack_storage_number(n, SN_DEFAULT_FLAGS); + d = unpack_storage_number(s); + print_netdata_double(buffer, d); + } + } + + getrusage(RUSAGE_SELF, &now); + user = now.ru_utime.tv_sec * 1000000ULL + now.ru_utime.tv_usec - last.ru_utime.tv_sec * 1000000ULL + last.ru_utime.tv_usec; + system = now.ru_stime.tv_sec * 1000000ULL + now.ru_stime.tv_usec - last.ru_stime.tv_sec * 1000000ULL + last.ru_stime.tv_usec; + total = user + system; + mine = total; + + fprintf(stderr, "user %0.5" NETDATA_DOUBLE_MODIFIER ", system %0.5" NETDATA_DOUBLE_MODIFIER + ", total %0.5" NETDATA_DOUBLE_MODIFIER "\n", (NETDATA_DOUBLE)(user / 1000000.0), (NETDATA_DOUBLE)(system / 1000000.0), (NETDATA_DOUBLE)(total / 1000000.0)); + + if(mine > their) { + fprintf(stderr, "WITH PACKING UNPACKING NETDATA CODE IS SLOWER %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(mine * 100.0 / their - 100.0)); + } + else { + fprintf(stderr, "EVEN WITH PACKING AND UNPACKING, NETDATA CODE IS F A S T E R %0.2" NETDATA_DOUBLE_MODIFIER " %%\n", (NETDATA_DOUBLE)(their * 100.0 / mine - 100.0)); + } + + // ------------------------------------------------------------------------ + +} + +static int check_storage_number_exists() { + uint32_t flags = SN_DEFAULT_FLAGS; + NETDATA_DOUBLE n = 0.0; + + storage_number s = pack_storage_number(n, flags); + NETDATA_DOUBLE d = unpack_storage_number(s); + + if(n != d) { + fprintf(stderr, "Wrong number returned. Expected " NETDATA_DOUBLE_FORMAT ", returned " NETDATA_DOUBLE_FORMAT "!\n", n, d); + return 1; + } + + return 0; +} + +int unit_test_storage() { + if(check_storage_number_exists()) return 0; + + NETDATA_DOUBLE storage_number_positive_min = unpack_storage_number(STORAGE_NUMBER_POSITIVE_MIN_RAW); + NETDATA_DOUBLE storage_number_negative_max = unpack_storage_number(STORAGE_NUMBER_NEGATIVE_MAX_RAW); + + NETDATA_DOUBLE c, a = 0; + int i, j, g, r = 0; + + for(g = -1; g <= 1 ; g++) { + a = 0; + + if(!g) continue; + + for(j = 0; j < 9 ;j++) { + a += 0.0000001; + c = a * g; + for(i = 0; i < 21 ;i++, c *= 10) { + if(c > 0 && c < storage_number_positive_min) continue; + if(c < 0 && c > storage_number_negative_max) continue; + + if(check_storage_number(c, 1)) return 1; + } + } + } + + // if(check_storage_number(858993459.1234567, 1)) return 1; + benchmark_storage_number(1000000, 2); + return r; +} + +int unit_test_str2ld() { + is_system_ieee754_double(); + + char *values[] = { + "1.2345678", + "-35.6", + "0.00123", + "23842384234234.2", + ".1", + "1.2e-10", + "18446744073709551616.0", + "18446744073709551616123456789123456789123456789123456789123456789123456789123456789.0", + "1.8446744073709551616123456789123456789123456789123456789123456789123456789123456789e+300", + "9.", + "9.e2", + "1.2e", + "1.2e+", + "1.2e-", + "1.2e0", + "1.2e-0", + "1.2e+0", + "-1.2e+1", + "-1.2e-1", + "1.2e1", + "1.2e400", + "hello", + "1wrong", + "nan", + "inf", + NULL + }; + + int i; + for(i = 0; values[i] ; i++) { + char *e_mine = "hello", *e_sys = "world"; + NETDATA_DOUBLE mine = str2ndd(values[i], &e_mine); + NETDATA_DOUBLE sys = strtondd(values[i], &e_sys); + + if(isnan(mine)) { + if(!isnan(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(isinf(mine)) { + if(!isinf(sys)) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys); + return -1; + } + } + else if(mine != sys && ABS(mine-sys) > 0.000001) { + fprintf(stderr, "Value '%s' is parsed as %" NETDATA_DOUBLE_MODIFIER + ", but system believes it is %" NETDATA_DOUBLE_MODIFIER ", delta %" NETDATA_DOUBLE_MODIFIER ".\n", values[i], mine, sys, sys-mine); + return -1; + } + + if(e_mine != e_sys) { + fprintf(stderr, "Value '%s' is parsed correctly, but endptr is not right (netdata returned %d, but system returned %d)\n", + values[i], (int)(e_mine - values[i]), (int)(e_sys - values[i])); + return -1; + } + + fprintf(stderr, "str2ndd() parsed value '%s' exactly the same way with strtold(), returned %" NETDATA_DOUBLE_MODIFIER + " vs %" NETDATA_DOUBLE_MODIFIER "\n", values[i], mine, sys); + } + + return 0; +} + +int unit_test_buffer() { + BUFFER *wb = buffer_create(1, NULL); + char string[2048 + 1]; + char final[9000 + 1]; + int i; + + for(i = 0; i < 2048; i++) + string[i] = (char)((i % 24) + 'a'); + string[2048] = '\0'; + + const char *fmt = "string1: %s\nstring2: %s\nstring3: %s\nstring4: %s"; + buffer_sprintf(wb, fmt, string, string, string, string); + snprintfz(final, sizeof(final) - 1, fmt, string, string, string, string); + + const char *s = buffer_tostring(wb); + + if(buffer_strlen(wb) != strlen(final) || strcmp(s, final) != 0) { + fprintf(stderr, "\nbuffer_sprintf() is faulty.\n"); + fprintf(stderr, "\nstring : %s (length %zu)\n", string, strlen(string)); + fprintf(stderr, "\nbuffer : %s (length %zu)\n", s, buffer_strlen(wb)); + fprintf(stderr, "\nexpected: %s (length %zu)\n", final, strlen(final)); + buffer_free(wb); + return -1; + } + + fprintf(stderr, "buffer_sprintf() works as expected.\n"); + buffer_free(wb); + return 0; +} + +int unit_test_static_threads() { + struct netdata_static_thread *static_threads = static_threads_get(); + + /* + * make sure enough static threads have been registered + */ + if (!static_threads) { + fprintf(stderr, "empty static_threads array\n"); + return 1; + } + + int n; + for (n = 0; static_threads[n].start_routine != NULL; n++) {} + + if (n < 2) { + fprintf(stderr, "only %d static threads registered", n); + freez(static_threads); + return 1; + } + + /* + * verify that each thread's start routine is unique. + */ + for (int i = 0; i != n - 1; i++) { + for (int j = i + 1; j != n; j++) { + if (static_threads[i].start_routine != static_threads[j].start_routine) + continue; + + fprintf(stderr, "Found duplicate threads with name: %s\n", static_threads[i].name); + freez(static_threads); + return 1; + } + } + + freez(static_threads); + return 0; +} + +// -------------------------------------------------------------------------------------------------------------------- + +struct feed_values { + unsigned long long microseconds; + collected_number value; +}; + +struct test { + char name[100]; + char description[1024]; + + int update_every; + unsigned long long multiplier; + unsigned long long divisor; + RRD_ALGORITHM algorithm; + + unsigned long feed_entries; + unsigned long result_entries; + struct feed_values *feed; + NETDATA_DOUBLE *results; + + collected_number *feed2; + NETDATA_DOUBLE *results2; +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test1 +// test absolute values stored + +struct feed_values test1_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test1_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test1 = { + "test1", // name + "test absolute values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test1_feed, // feed + test1_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test2 +// test absolute values stored in the middle of second boundaries + +struct feed_values test2_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test2_results[] = { + 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +struct test test2 = { + "test2", // name + "test absolute values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 10, // feed entries + 9, // result entries + test2_feed, // feed + test2_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test3 + +struct feed_values test3_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test3_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test3 = { + "test3", // name + "test incremental values stored at exactly second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test3_feed, // feed + test3_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test4 + +struct feed_values test4_feed[] = { + { 500000, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +NETDATA_DOUBLE test4_results[] = { + 10, 10, 10, 10, 10, 10, 10, 10, 10 +}; + +struct test test4 = { + "test4", // name + "test incremental values stored in the middle of second boundaries", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test4_feed, // feed + test4_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5 - 32 bit overflows + +struct feed_values test5_feed[] = { + { 0, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 7 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 14 }, + { 1000000, 0x00000000FFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5_results[] = { + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15 * 7, + 0x00000000FFFFFFFFULL / 15, +}; + +struct test test5 = { + "test5", // name + "test 32-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5_feed, // feed + test5_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test5b - 64 bit overflows + +struct feed_values test5b_feed[] = { + { 0, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 7 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 14 }, + { 1000000, 0xFFFFFFFFFFFFFFFFULL / 15 * 0 }, +}; + +NETDATA_DOUBLE test5b_results[] = { + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15 * 7, + 0xFFFFFFFFFFFFFFFFULL / 15, +}; + +struct test test5b = { + "test5b", // name + "test 64-bit incremental values overflow", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test5b_feed, // feed + test5b_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test6 + +struct feed_values test6_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test6_results[] = { + 4000, 4000, 4000, 4000 +}; + +struct test test6 = { + "test6", // name + "test incremental values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 16, // feed entries + 4, // result entries + test6_feed, // feed + test6_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test7 + +struct feed_values test7_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, + { 2000000, 7000 }, + { 2000000, 8000 }, + { 2000000, 9000 }, + { 2000000, 10000 }, +}; + +NETDATA_DOUBLE test7_results[] = { + 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500, 500 +}; + +struct test test7 = { + "test7", // name + "test incremental values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 18, // result entries + test7_feed, // feed + test7_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test8 + +struct feed_values test8_feed[] = { + { 500000, 1000 }, + { 2000000, 2000 }, + { 2000000, 3000 }, + { 2000000, 4000 }, + { 2000000, 5000 }, + { 2000000, 6000 }, +}; + +NETDATA_DOUBLE test8_results[] = { + 1250, 2000, 2250, 3000, 3250, 4000, 4250, 5000, 5250, 6000 +}; + +struct test test8 = { + "test8", // name + "test absolute values updated in long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 6, // feed entries + 10, // result entries + test8_feed, // feed + test8_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test9 + +struct feed_values test9_feed[] = { + { 250000, 1000 }, + { 250000, 2000 }, + { 250000, 3000 }, + { 250000, 4000 }, + { 250000, 5000 }, + { 250000, 6000 }, + { 250000, 7000 }, + { 250000, 8000 }, + { 250000, 9000 }, + { 250000, 10000 }, + { 250000, 11000 }, + { 250000, 12000 }, + { 250000, 13000 }, + { 250000, 14000 }, + { 250000, 15000 }, + { 250000, 16000 }, +}; + +NETDATA_DOUBLE test9_results[] = { + 4000, 8000, 12000, 16000 +}; + +struct test test9 = { + "test9", // name + "test absolute values updated within the same second", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_ABSOLUTE, // algorithm + 16, // feed entries + 4, // result entries + test9_feed, // feed + test9_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test10 + +struct feed_values test10_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test10_results[] = { + 1000, 1000, 1000, 1000, 1000, 1000, 1000 +}; + +struct test test10 = { + "test10", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 7, // result entries + test10_feed, // feed + test10_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test11 + +struct feed_values test11_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test11_feed2[] = { + 10, 20, 30, 40, 50, 60, 70, 80, 90, 100 +}; + +NETDATA_DOUBLE test11_results[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +NETDATA_DOUBLE test11_results2[] = { + 50, 50, 50, 50, 50, 50, 50, 50, 50 +}; + +struct test test11 = { + "test11", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test11_feed, // feed + test11_results, // results + test11_feed2, // feed2 + test11_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test12 + +struct feed_values test12_feed[] = { + { 0, 10 }, + { 1000000, 20 }, + { 1000000, 30 }, + { 1000000, 40 }, + { 1000000, 50 }, + { 1000000, 60 }, + { 1000000, 70 }, + { 1000000, 80 }, + { 1000000, 90 }, + { 1000000, 100 }, +}; + +collected_number test12_feed2[] = { + 10*3, 20*3, 30*3, 40*3, 50*3, 60*3, 70*3, 80*3, 90*3, 100*3 +}; + +NETDATA_DOUBLE test12_results[] = { + 25, 25, 25, 25, 25, 25, 25, 25, 25 +}; + +NETDATA_DOUBLE test12_results2[] = { + 75, 75, 75, 75, 75, 75, 75, 75, 75 +}; + +struct test test12 = { + "test12", // name + "test percentage-of-incremental-row with equal values", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 9, // result entries + test12_feed, // feed + test12_results, // results + test12_feed2, // feed2 + test12_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test13 + +struct feed_values test13_feed[] = { + { 500000, 1000 }, + { 600000, 1000 + 600 }, + { 200000, 1600 + 200 }, + { 1000000, 1800 + 1000 }, + { 200000, 2800 + 200 }, + { 2000000, 3000 + 2000 }, + { 600000, 5000 + 600 }, + { 400000, 5600 + 400 }, + { 900000, 6000 + 900 }, + { 1000000, 6900 + 1000 }, +}; + +NETDATA_DOUBLE test13_results[] = { + 83.3333300, 100, 100, 100, 100, 100, 100 +}; + +struct test test13 = { + "test13", // name + "test incremental values updated in short and long durations", + 1, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL, // algorithm + 10, // feed entries + 7, // result entries + test13_feed, // feed + test13_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test14 + +struct feed_values test14_feed[] = { + { 0, 0x015397dc42151c41ULL }, + { 13573000, 0x015397e612e3ff5dULL }, + { 29969000, 0x015397f905ecdaa8ULL }, + { 29958000, 0x0153980c2a6cb5e4ULL }, + { 30054000, 0x0153981f4032fb83ULL }, + { 34952000, 0x015398355efadaccULL }, + { 25046000, 0x01539845ba4b09f8ULL }, + { 29947000, 0x0153985948bf381dULL }, + { 30054000, 0x0153986c5b9c27e2ULL }, + { 29942000, 0x0153987f888982d0ULL }, +}; + +NETDATA_DOUBLE test14_results[] = { + 23.1383300, 21.8515600, 21.8804600, 21.7788000, 22.0112200, 22.4386100, 22.0906100, 21.9150800 +}; + +struct test test14 = { + "test14", // name + "issue #981 with real data", + 30, // update_every + 8, // multiplier + 1000000000, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14_feed, // feed + test14_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14b_feed[] = { + { 0, 0 }, + { 13573000, 13573000 }, + { 29969000, 13573000 + 29969000 }, + { 29958000, 13573000 + 29969000 + 29958000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 }, + { 34952000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 }, + { 25046000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 }, + { 29947000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 }, + { 30054000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 }, + { 29942000, 13573000 + 29969000 + 29958000 + 30054000 + 34952000 + 25046000 + 29947000 + 30054000 + 29942000 }, +}; + +NETDATA_DOUBLE test14b_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14b = { + "test14b", // name + "issue #981 with dummy data", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 8, // result entries + test14b_feed, // feed + test14b_results, // results + NULL, // feed2 + NULL // results2 +}; + +struct feed_values test14c_feed[] = { + { 29000000, 29000000 }, + { 1000000, 29000000 + 1000000 }, + { 30000000, 29000000 + 1000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, + { 30000000, 29000000 + 1000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 + 30000000 }, +}; + +NETDATA_DOUBLE test14c_results[] = { + 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000, 1000000 +}; + +struct test test14c = { + "test14c", // name + "issue #981 with dummy data, checking for late start", + 30, // update_every + 1, // multiplier + 1, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test14c_feed, // feed + test14c_results, // results + NULL, // feed2 + NULL // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- +// test15 + +struct feed_values test15_feed[] = { + { 0, 1068066388 }, + { 1008752, 1068822698 }, + { 993809, 1069573072 }, + { 995911, 1070324135 }, + { 1014562, 1071078166 }, + { 994684, 1071831349 }, + { 993128, 1072235739 }, + { 1010332, 1072958871 }, + { 1003394, 1073707019 }, + { 995201, 1074460255 }, +}; + +collected_number test15_feed2[] = { + 178825286, 178825286, 178825286, 178825286, 178825498, 178825498, 179165652, 179202964, 179203282, 179204130 +}; + +NETDATA_DOUBLE test15_results[] = { + 5857.4080000, 5898.4540000, 5891.6590000, 5806.3160000, 5914.2640000, 3202.2630000, 5589.6560000, 5822.5260000, 5911.7520000 +}; + +NETDATA_DOUBLE test15_results2[] = { + 0.0000000, 0.0000000, 0.0024944, 1.6324779, 0.0212777, 2655.1890000, 290.5387000, 5.6733610, 6.5960220 +}; + +struct test test15 = { + "test15", // name + "test incremental with 2 dimensions", + 1, // update_every + 8, // multiplier + 1024, // divisor + RRD_ALGORITHM_INCREMENTAL, // algorithm + 10, // feed entries + 9, // result entries + test15_feed, // feed + test15_results, // results + test15_feed2, // feed2 + test15_results2 // results2 +}; + +// -------------------------------------------------------------------------------------------------------------------- + +int run_test(struct test *test) +{ + fprintf(stderr, "\nRunning test '%s':\n%s\n", test->name, test->description); + + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = test->update_every; + + char name[101]; + snprintfz(name, sizeof(name) - 1, "unittest-%s", test->name); + + // create the chart + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1 + , test->update_every, RRDSET_TYPE_LINE); + RRDDIM *rd = rrddim_add(st, "dim1", NULL, test->multiplier, test->divisor, test->algorithm); + + RRDDIM *rd2 = NULL; + if(test->feed2) + rd2 = rrddim_add(st, "dim2", NULL, test->multiplier, test->divisor, test->algorithm); + + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + // feed it with the test data + time_t time_now = 0, time_start = now_realtime_sec(); + unsigned long c; + collected_number last = 0; + for(c = 0; c < test->feed_entries; c++) { + if(debug_flags) fprintf(stderr, "\n\n"); + + if(c) { + time_now += test->feed[c].microseconds; + fprintf(stderr, " > %s: feeding position %lu, after %0.3f seconds (%0.3f seconds from start), delta " NETDATA_DOUBLE_FORMAT + ", rate " NETDATA_DOUBLE_FORMAT "\n", + test->name, c+1, + (float)test->feed[c].microseconds / 1000000.0, + (float)time_now / 1000000.0, + ((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor, + (((NETDATA_DOUBLE)test->feed[c].value - (NETDATA_DOUBLE)last) * (NETDATA_DOUBLE)test->multiplier / (NETDATA_DOUBLE)test->divisor) / (NETDATA_DOUBLE)test->feed[c].microseconds * (NETDATA_DOUBLE)1000000); + + // rrdset_next_usec_unfiltered(st, test->feed[c].microseconds); + st->usec_since_last_update = test->feed[c].microseconds; + } + else { + fprintf(stderr, " > %s: feeding position %lu\n", test->name, c+1); + } + + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd), test->feed[c].value); + rrddim_set(st, "dim1", test->feed[c].value); + last = test->feed[c].value; + + if(rd2) { + fprintf(stderr, " >> %s with value " COLLECTED_NUMBER_FORMAT "\n", rrddim_name(rd2), test->feed2[c]); + rrddim_set(st, "dim2", test->feed2[c]); + } + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st, now, false); + + // align the first entry to second boundary + if(!c) { + fprintf(stderr, " > %s: fixing first collection time to be %llu microseconds to second boundary\n", test->name, test->feed[c].microseconds); + rd->collector.last_collected_time.tv_usec = st->last_collected_time.tv_usec = st->last_updated.tv_usec = test->feed[c].microseconds; + // time_start = st->last_collected_time.tv_sec; + } + } + + // check the result + int errors = 0; + + if(st->counter != test->result_entries) { + fprintf(stderr, " %s stored %u entries, but we were expecting %lu, ### E R R O R ###\n", + test->name, st->counter, test->result_entries); + errors++; + } + + unsigned long max = (st->counter < test->result_entries)?st->counter:test->result_entries; + for(c = 0 ; c < max ; c++) { + NETDATA_DOUBLE v = unpack_storage_number(rd->db.data[c]); + NETDATA_DOUBLE n = unpack_storage_number(pack_storage_number(test->results[c], SN_DEFAULT_FLAGS)); + int same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd), c+1, + (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + + if(!same) errors++; + + if(rd2) { + v = unpack_storage_number(rd2->db.data[c]); + n = test->results2[c]; + same = (roundndd(v * 10000000.0) == roundndd(n * 10000000.0))?1:0; + fprintf(stderr, " %s/%s: checking position %lu (at %"PRId64" secs), expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", %s\n", + test->name, rrddim_name(rd2), c+1, + (int64_t)((rrdset_first_entry_s(st) + c * st->update_every) - time_start), + n, v, (same)?"OK":"### E R R O R ###"); + if(!same) errors++; + } + } + + return errors; +} + +static int test_variable_renames(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + + fprintf(stderr, "Creating chart\n"); + RRDSET *st = rrdset_create_localhost("chart", "ID", NULL, "family", "context", "Unit Testing", "a value", "unittest", NULL, 1, 1, RRDSET_TYPE_LINE); + fprintf(stderr, "Created chart with id '%s', name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Creating dimension DIM1\n"); + RRDDIM *rd1 = rrddim_add(st, "DIM1", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Creating dimension DIM2\n"); + RRDDIM *rd2 = rrddim_add(st, "DIM2", NULL, 1, 1, RRD_ALGORITHM_INCREMENTAL); + fprintf(stderr, "Created dimension with id '%s', name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming chart to CHARTNAME1\n"); + rrdset_reset_name(st, "CHARTNAME1"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming chart to CHARTNAME2\n"); + rrdset_reset_name(st, "CHARTNAME2"); + fprintf(stderr, "Renamed chart with id '%s' to name '%s'\n", rrdset_id(st), rrdset_name(st)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME1\n"); + rrddim_reset_name(st, rd1, "DIM1NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM1 to DIM1NAME2\n"); + rrddim_reset_name(st, rd1, "DIM1NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd1), rrddim_name(rd1)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME1\n"); + rrddim_reset_name(st, rd2, "DIM2NAME1"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + fprintf(stderr, "Renaming dimension DIM2 to DIM2NAME2\n"); + rrddim_reset_name(st, rd2, "DIM2NAME2"); + fprintf(stderr, "Renamed dimension with id '%s' to name '%s'\n", rrddim_id(rd2), rrddim_name(rd2)); + + BUFFER *buf = buffer_create(1, NULL); + health_api_v1_chart_variables2json(st, buf); + fprintf(stderr, "%s", buffer_tostring(buf)); + buffer_free(buf); + return 1; +} + +int check_strdupz_path_subpath() { + + struct strdupz_path_subpath_checks { + const char *path; + const char *subpath; + const char *result; + } checks[] = { + { "", "", "." }, + { "/", "", "/" }, + { "/etc/netdata", "", "/etc/netdata" }, + { "/etc/netdata///", "", "/etc/netdata" }, + { "/etc/netdata///", "health.d", "/etc/netdata/health.d" }, + { "/etc/netdata///", "///health.d", "/etc/netdata/health.d" }, + { "/etc/netdata", "///health.d", "/etc/netdata/health.d" }, + { "", "///health.d", "./health.d" }, + { "/", "///health.d", "/health.d" }, + + // terminator + { NULL, NULL, NULL } + }; + + size_t i; + for(i = 0; checks[i].result ; i++) { + char *s = strdupz_path_subpath(checks[i].path, checks[i].subpath); + fprintf(stderr, "strdupz_path_subpath(\"%s\", \"%s\") = \"%s\": ", checks[i].path, checks[i].subpath, s); + if(!s || strcmp(s, checks[i].result) != 0) { + freez(s); + fprintf(stderr, "FAILED\n"); + return 1; + } + else { + freez(s); + fprintf(stderr, "OK\n"); + } + } + + return 0; +} + +int run_all_mockup_tests(void) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + if(check_strdupz_path_subpath()) + return 1; + + if(check_number_printing()) + return 1; + + if(check_rrdcalc_comparisons()) + return 1; + + if(!test_variable_renames()) + return 1; + + if(run_test(&test1)) + return 1; + + if(run_test(&test2)) + return 1; + + if(run_test(&test3)) + return 1; + + if(run_test(&test4)) + return 1; + + if(run_test(&test5)) + return 1; + + if(run_test(&test5b)) + return 1; + + if(run_test(&test6)) + return 1; + + if(run_test(&test7)) + return 1; + + if(run_test(&test8)) + return 1; + + if(run_test(&test9)) + return 1; + + if(run_test(&test10)) + return 1; + + if(run_test(&test11)) + return 1; + + if(run_test(&test12)) + return 1; + + if(run_test(&test13)) + return 1; + + if(run_test(&test14)) + return 1; + + if(run_test(&test14b)) + return 1; + + if(run_test(&test14c)) + return 1; + + if(run_test(&test15)) + return 1; + + + + return 0; +} + +int unit_test(long delay, long shift) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + static int repeat = 0; + repeat++; + + char name[101]; + snprintfz(name, sizeof(name) - 1, "unittest-%d-%ld-%ld", repeat, delay, shift); + + //debug_flags = 0xffffffff; + default_rrd_memory_mode = RRD_MEMORY_MODE_ALLOC; + default_rrd_update_every = 1; + + int do_abs = 1; + int do_inc = 1; + int do_abst = 0; + int do_absi = 0; + + RRDSET *st = rrdset_create_localhost("netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", NULL, 1, 1 + , RRDSET_TYPE_LINE); + rrdset_flag_set(st, RRDSET_FLAG_DEBUG); + + RRDDIM *rdabs = NULL; + RRDDIM *rdinc = NULL; + RRDDIM *rdabst = NULL; + RRDDIM *rdabsi = NULL; + + if(do_abs) rdabs = rrddim_add(st, "absolute", "absolute", 1, 1, RRD_ALGORITHM_ABSOLUTE); + if(do_inc) rdinc = rrddim_add(st, "incremental", "incremental", 1, 1, RRD_ALGORITHM_INCREMENTAL); + if(do_abst) rdabst = rrddim_add(st, "percentage-of-absolute-row", "percentage-of-absolute-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL); + if(do_absi) rdabsi = rrddim_add(st, "percentage-of-incremental-row", "percentage-of-incremental-row", 1, 1, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL); + + long increment = 1000; + collected_number i = 0; + + unsigned long c, dimensions = rrdset_number_of_dimensions(st); + RRDDIM *rd; + + for(c = 0; c < 20 ;c++) { + i += increment; + + fprintf(stderr, "\n\nLOOP = %lu, DELAY = %ld, VALUE = " COLLECTED_NUMBER_FORMAT "\n", c, delay, i); + if(c) { + // rrdset_next_usec_unfiltered(st, delay); + st->usec_since_last_update = delay; + } + if(do_abs) rrddim_set(st, "absolute", i); + if(do_inc) rrddim_set(st, "incremental", i); + if(do_abst) rrddim_set(st, "percentage-of-absolute-row", i); + if(do_absi) rrddim_set(st, "percentage-of-incremental-row", i); + + if(!c) { + now_realtime_timeval(&st->last_collected_time); + st->last_collected_time.tv_usec = shift; + } + + // prevent it from deleting the dimensions + rrddim_foreach_read(rd, st) { + rd->collector.last_collected_time.tv_sec = st->last_collected_time.tv_sec; + } + rrddim_foreach_done(rd); + + rrdset_done(st); + } + + unsigned long oincrement = increment; + increment = increment * st->update_every * 1000000 / delay; + fprintf(stderr, "\n\nORIGINAL INCREMENT: %lu, INCREMENT %ld, DELAY %ld, SHIFT %ld\n", oincrement * 10, increment * 10, delay, shift); + + int ret = 0; + storage_number sn; + NETDATA_DOUBLE cn, v; + for(c = 0 ; c < st->counter ; c++) { + fprintf(stderr, "\nPOSITION: c = %lu, EXPECTED VALUE %lu\n", c, (oincrement + c * increment + increment * (1000000 - shift) / 1000000 )* 10); + + rrddim_foreach_read(rd, st) { + sn = rd->db.data[c]; + cn = unpack_storage_number(sn); + fprintf(stderr, "\t %s " NETDATA_DOUBLE_FORMAT " (PACKED AS " STORAGE_NUMBER_FORMAT ") -> ", rrddim_id(rd), cn, sn); + + if(rd == rdabs) v = + ( oincrement + // + (increment * (1000000 - shift) / 1000000) + + (c + 1) * increment + ); + + else if(rd == rdinc) v = (c?(increment):(increment * (1000000 - shift) / 1000000)); + else if(rd == rdabst) v = oincrement / dimensions / 10; + else if(rd == rdabsi) v = oincrement / dimensions / 10; + else v = 0; + + if(v == cn) fprintf(stderr, "passed.\n"); + else { + fprintf(stderr, "ERROR! (expected " NETDATA_DOUBLE_FORMAT ")\n", v); + ret = 1; + } + } + rrddim_foreach_done(rd); + } + + if(ret) + fprintf(stderr, "\n\nUNIT TEST(%ld, %ld) FAILED\n\n", delay, shift); + + return ret; +} + +int test_sqlite(void) { + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + sqlite3 *db_mt; + fprintf(stderr, "Testing SQLIte\n"); + + int rc = sqlite3_open(":memory:", &db_mt); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: DB init failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "CREATE TABLE IF NOT EXISTS mine (id1, id2);", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Create table failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "DELETE FROM MINE LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Delete with LIMIT failed\n"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "UPDATE MINE SET id1=1 LIMIT 1;", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Update with LIMIT failed\n"); + return 1; + } + + rc = sqlite3_create_function(db_mt, "now_usec", 1, SQLITE_ANY, 0, sqlite_now_usec, 0, 0); + if (unlikely(rc != SQLITE_OK)) { + fprintf(stderr, "Failed to register internal now_usec function"); + return 1; + } + + rc = sqlite3_exec_monitored(db_mt, "UPDATE MINE SET id1=now_usec(0);", 0, 0, NULL); + if (rc != SQLITE_OK) { + fprintf(stderr,"Failed to test SQLite: Update with now_usec() failed\n"); + return 1; + } + + BUFFER *sql = buffer_create(ACLK_SYNC_QUERY_SIZE, NULL); + char *uuid_str = "0000_000"; + + buffer_sprintf(sql, TABLE_ACLK_ALERT, uuid_str); + rc = sqlite3_exec_monitored(db_mt, buffer_tostring(sql), 0, 0, NULL); + if (rc != SQLITE_OK) + goto error; + + buffer_free(sql); + fprintf(stderr,"SQLite is OK\n"); + rc = sqlite3_close_v2(db_mt); + return 0; +error: + rc = sqlite3_close_v2(db_mt); + fprintf(stderr,"SQLite statement failed: %s\n", buffer_tostring(sql)); + buffer_free(sql); + fprintf(stderr,"SQLite tests failed\n"); + return 1; +} diff --git a/daemon/unit_test.h b/src/daemon/unit_test.h index c7cd104e1..c7cd104e1 100644 --- a/daemon/unit_test.h +++ b/src/daemon/unit_test.h diff --git a/src/daemon/watcher.c b/src/daemon/watcher.c new file mode 100644 index 000000000..1e0090e24 --- /dev/null +++ b/src/daemon/watcher.c @@ -0,0 +1,178 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "watcher.h" + +watcher_step_t *watcher_steps; + +static struct completion shutdown_begin_completion; +static struct completion shutdown_end_completion; +static ND_THREAD *watcher_thread; + +void watcher_shutdown_begin(void) { + completion_mark_complete(&shutdown_begin_completion); +} + +void watcher_shutdown_end(void) { + completion_mark_complete(&shutdown_end_completion); +} + +void watcher_step_complete(watcher_step_id_t step_id) { + completion_mark_complete(&watcher_steps[step_id].p); +} + +static void watcher_wait_for_step(const watcher_step_id_t step_id) +{ + unsigned timeout = 90; + + usec_t step_start_time = now_monotonic_usec(); + +#ifdef ENABLE_SENTRY + // Wait with a timeout + bool ok = completion_timedwait_for(&watcher_steps[step_id].p, timeout); +#else + // Wait indefinitely + bool ok = true; + completion_wait_for(&watcher_steps[step_id].p); +#endif + + usec_t step_duration = now_monotonic_usec() - step_start_time; + + if (ok) { + netdata_log_info("shutdown step: [%d/%d] - '%s' finished in %llu milliseconds", + (int)step_id + 1, (int)WATCHER_STEP_ID_MAX, + watcher_steps[step_id].msg, step_duration / USEC_PER_MS); + } else { + // Do not call fatal() because it will try to execute the exit + // sequence twice. + netdata_log_error("shutdown step: [%d/%d] - '%s' took more than %u seconds (ie. %llu milliseconds)", + (int)step_id + 1, (int)WATCHER_STEP_ID_MAX, watcher_steps[step_id].msg, + timeout, step_duration / USEC_PER_MS); + + abort(); + } +} + +void *watcher_main(void *arg) +{ + UNUSED(arg); + + netdata_log_debug(D_SYSTEM, "Watcher thread started"); + + // wait until the agent starts the shutdown process + completion_wait_for(&shutdown_begin_completion); + netdata_log_error("Shutdown process started"); + + usec_t shutdown_start_time = now_monotonic_usec(); + + watcher_wait_for_step(WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE); + watcher_wait_for_step(WATCHER_STEP_ID_DBENGINE_EXIT_MODE); + watcher_wait_for_step(WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS); + watcher_wait_for_step(WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_REPLICATION_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN); + watcher_wait_for_step(WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_CONTEXT_THREAD); + watcher_wait_for_step(WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_ACLK_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_CANCEL_MAIN_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_METASYNC_THREADS); + watcher_wait_for_step(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH); + watcher_wait_for_step(WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING); + watcher_wait_for_step(WATCHER_STEP_ID_STOP_DBENGINE_TIERS); + watcher_wait_for_step(WATCHER_STEP_ID_CLOSE_SQL_DATABASES); + watcher_wait_for_step(WATCHER_STEP_ID_REMOVE_PID_FILE); + watcher_wait_for_step(WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES); + watcher_wait_for_step(WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE); + + completion_wait_for(&shutdown_end_completion); + usec_t shutdown_end_time = now_monotonic_usec(); + + usec_t shutdown_duration = shutdown_end_time - shutdown_start_time; + netdata_log_error("Shutdown process ended in %llu milliseconds", + shutdown_duration / USEC_PER_MS); + + return NULL; +} + +void watcher_thread_start() { + watcher_steps = callocz(WATCHER_STEP_ID_MAX, sizeof(watcher_step_t)); + + watcher_steps[WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE].msg = + "create shutdown file"; + watcher_steps[WATCHER_STEP_ID_DBENGINE_EXIT_MODE].msg = + "dbengine exit mode"; + watcher_steps[WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS].msg = + "close webrtc connections"; + watcher_steps[WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK].msg = + "disable maintenance, new queries, new web requests, new streaming connections and aclk"; + watcher_steps[WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD].msg = + "stop maintenance thread"; + watcher_steps[WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS].msg = + "stop exporters, health and web servers threads"; + watcher_steps[WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS].msg = + "stop collectors and streaming threads"; + watcher_steps[WATCHER_STEP_ID_STOP_REPLICATION_THREADS].msg = + "stop replication threads"; + watcher_steps[WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN].msg = + "prepare metasync shutdown"; + watcher_steps[WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS].msg = + "disable ML detection and training threads"; + watcher_steps[WATCHER_STEP_ID_STOP_CONTEXT_THREAD].msg = + "stop context thread"; + watcher_steps[WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE].msg = + "clear web client cache"; + watcher_steps[WATCHER_STEP_ID_STOP_ACLK_THREADS].msg = + "stop aclk threads"; + watcher_steps[WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS].msg = + "stop all remaining worker threads"; + watcher_steps[WATCHER_STEP_ID_CANCEL_MAIN_THREADS].msg = + "cancel main threads"; + watcher_steps[WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS].msg = + "flush dbengine tiers"; + watcher_steps[WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS].msg = + "stop collection for all hosts"; + watcher_steps[WATCHER_STEP_ID_STOP_METASYNC_THREADS].msg = + "stop metasync threads"; + watcher_steps[WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH].msg = + "wait for dbengine collectors to finish"; + watcher_steps[WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING].msg = + "wait for dbengine main cache to finish flushing"; + watcher_steps[WATCHER_STEP_ID_STOP_DBENGINE_TIERS].msg = + "stop dbengine tiers"; + watcher_steps[WATCHER_STEP_ID_CLOSE_SQL_DATABASES].msg = + "close SQL databases"; + watcher_steps[WATCHER_STEP_ID_REMOVE_PID_FILE].msg = + "remove pid file"; + watcher_steps[WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES].msg = + "free openssl structures"; + watcher_steps[WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE].msg = + "remove incomplete shutdown file"; + + for (size_t i = 0; i != WATCHER_STEP_ID_MAX; i++) { + completion_init(&watcher_steps[i].p); + } + + completion_init(&shutdown_begin_completion); + completion_init(&shutdown_end_completion); + + watcher_thread = nd_thread_create("P[WATCHER]", NETDATA_THREAD_OPTION_JOINABLE, watcher_main, NULL); +} + +void watcher_thread_stop() { + nd_thread_join(watcher_thread); + + for (size_t i = 0; i != WATCHER_STEP_ID_MAX; i++) { + completion_destroy(&watcher_steps[i].p); + } + + completion_destroy(&shutdown_begin_completion); + completion_destroy(&shutdown_end_completion); + + freez(watcher_steps); +} diff --git a/src/daemon/watcher.h b/src/daemon/watcher.h new file mode 100644 index 000000000..b785ca436 --- /dev/null +++ b/src/daemon/watcher.h @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef DAEMON_WATCHER_H +#define DAEMON_WATCHER_H + +#include "libnetdata/libnetdata.h" + +typedef enum { + WATCHER_STEP_ID_CREATE_SHUTDOWN_FILE = 0, + WATCHER_STEP_ID_DBENGINE_EXIT_MODE, + WATCHER_STEP_ID_CLOSE_WEBRTC_CONNECTIONS, + WATCHER_STEP_ID_DISABLE_MAINTENANCE_NEW_QUERIES_NEW_WEB_REQUESTS_NEW_STREAMING_CONNECTIONS_AND_ACLK, + WATCHER_STEP_ID_STOP_MAINTENANCE_THREAD, + WATCHER_STEP_ID_STOP_EXPORTERS_HEALTH_AND_WEB_SERVERS_THREADS, + WATCHER_STEP_ID_STOP_COLLECTORS_AND_STREAMING_THREADS, + WATCHER_STEP_ID_STOP_REPLICATION_THREADS, + WATCHER_STEP_ID_PREPARE_METASYNC_SHUTDOWN, + WATCHER_STEP_ID_DISABLE_ML_DETECTION_AND_TRAINING_THREADS, + WATCHER_STEP_ID_STOP_CONTEXT_THREAD, + WATCHER_STEP_ID_CLEAR_WEB_CLIENT_CACHE, + WATCHER_STEP_ID_STOP_ACLK_THREADS, + WATCHER_STEP_ID_STOP_ALL_REMAINING_WORKER_THREADS, + WATCHER_STEP_ID_CANCEL_MAIN_THREADS, + WATCHER_STEP_ID_FLUSH_DBENGINE_TIERS, + WATCHER_STEP_ID_STOP_COLLECTION_FOR_ALL_HOSTS, + WATCHER_STEP_ID_STOP_METASYNC_THREADS, + WATCHER_STEP_ID_WAIT_FOR_DBENGINE_COLLECTORS_TO_FINISH, + WATCHER_STEP_ID_WAIT_FOR_DBENGINE_MAIN_CACHE_TO_FINISH_FLUSHING, + WATCHER_STEP_ID_STOP_DBENGINE_TIERS, + WATCHER_STEP_ID_CLOSE_SQL_DATABASES, + WATCHER_STEP_ID_REMOVE_PID_FILE, + WATCHER_STEP_ID_FREE_OPENSSL_STRUCTURES, + WATCHER_STEP_ID_REMOVE_INCOMPLETE_SHUTDOWN_FILE, + + // Always keep this as the last enum value + WATCHER_STEP_ID_MAX +} watcher_step_id_t; + +typedef struct { + const char *msg; + struct completion p; +} watcher_step_t; + +extern watcher_step_t *watcher_steps; + +void watcher_thread_start(void); +void watcher_thread_stop(void); + +void watcher_shutdown_begin(void); +void watcher_shutdown_end(void); + +void watcher_step_complete(watcher_step_id_t step_id); + +#endif /* DAEMON_WATCHER_H */ |