diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:22:31 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-06 01:22:31 +0000 |
commit | 8d4f58e49b9dc7d3545651023a36729de773ad86 (patch) | |
tree | 7bc7be4a8e9e298daa1349348400aa2a653866f2 /database | |
parent | Initial commit. (diff) | |
download | netdata-8d4f58e49b9dc7d3545651023a36729de773ad86.tar.xz netdata-8d4f58e49b9dc7d3545651023a36729de773ad86.zip |
Adding upstream version 1.12.0.upstream/1.12.0upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | database/Makefile.am | 8 | ||||
-rw-r--r-- | database/README.md | 208 | ||||
-rw-r--r-- | database/rrd.c | 150 | ||||
-rw-r--r-- | database/rrd.h | 888 | ||||
-rw-r--r-- | database/rrdcalc.c | 429 | ||||
-rw-r--r-- | database/rrdcalc.h | 138 | ||||
-rw-r--r-- | database/rrdcalctemplate.c | 71 | ||||
-rw-r--r-- | database/rrdcalctemplate.h | 68 | ||||
-rw-r--r-- | database/rrddim.c | 401 | ||||
-rw-r--r-- | database/rrddimvar.c | 217 | ||||
-rw-r--r-- | database/rrddimvar.h | 56 | ||||
-rw-r--r-- | database/rrdfamily.c | 61 | ||||
-rw-r--r-- | database/rrdhost.c | 744 | ||||
-rw-r--r-- | database/rrdset.c | 1637 | ||||
-rw-r--r-- | database/rrdsetvar.c | 189 | ||||
-rw-r--r-- | database/rrdsetvar.h | 44 | ||||
-rw-r--r-- | database/rrdvar.c | 285 | ||||
-rw-r--r-- | database/rrdvar.h | 66 |
18 files changed, 5660 insertions, 0 deletions
diff --git a/database/Makefile.am b/database/Makefile.am new file mode 100644 index 0000000..19554be --- /dev/null +++ b/database/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/database/README.md b/database/README.md new file mode 100644 index 0000000..aedf4d5 --- /dev/null +++ b/database/README.md @@ -0,0 +1,208 @@ +# Database + +Although `netdata` does all its calculations using `long double`, it stores all values using +a [custom-made 32-bit number](../libnetdata/storage_number/). + +So, for each dimension of a chart, netdata will need: `4 bytes for the value * the entries +of its history`. It will not store any other data for each value in the time series database. +Since all its values are stored in a time series with fixed step, the time each value +corresponds can be calculated at run time, using the position of a value in the round robin database. + +The default history is 3.600 entries, thus it will need 14.4KB for each chart dimension. +If you need 1.000 dimensions, they will occupy just 14.4MB. + +Of course, 3.600 entries is a very short history, especially if data collection frequency is set +to 1 second. You will have just one hour of data. + +For a day of data and 1.000 dimensions, you will need: 86.400 seconds * 4 bytes * 1.000 +dimensions = 345MB of RAM. + +Currently the only option you have to lower this number is to use +**[Memory Deduplication - Kernel Same Page Merging - KSM](#ksm)**. + +## Memory modes + +Currently netdata supports 5 memory modes: + +1. `ram`, data are purely in memory. Data are never saved on disk. This mode uses `mmap()` and + supports [KSM](#ksm). + +2. `save`, (the default) data are only in RAM while netdata runs and are saved to / loaded from + disk on netdata restart. It also uses `mmap()` and supports [KSM](#ksm). + +3. `map`, data are in memory mapped files. This works like the swap. Keep in mind though, this + will have a constant write on your disk. When netdata writes data on its memory, the Linux kernel + marks the related memory pages as dirty and automatically starts updating them on disk. + Unfortunately we cannot control how frequently this works. The Linux kernel uses exactly the + same algorithm it uses for its swap memory. Check below for additional information on running a + dedicated central netdata server. This mode uses `mmap()` but does not support [KSM](#ksm). + +4. `none`, without a database (collected metrics can only be streamed to another netdata). + +5. `alloc`, like `ram` but it uses `calloc()` and does not support [KSM](#ksm). This mode is the + fallback for all others except `none`. + +You can select the memory mode by editing netdata.conf and setting: + +``` +[global] + # ram, save (the default, save on exit, load on start), map (swap like) + memory mode = save + + # the directory where data are saved + cache directory = /var/cache/netdata +``` + +## Running netdata in embedded devices + +Embedded devices usually have very limited RAM resources available. + +There are 2 settings for you to tweak: + +1. `update every`, which controls the data collection frequency +2. `history`, which controls the size of the database in RAM + +By default `update every = 1` and `history = 3600`. This gives you an hour of data with per +second updates. + +If you set `update every = 2` and `history = 1800`, you will still have an hour of data, but +collected once every 2 seconds. This will **cut in half** both CPU and RAM resources consumed +by netdata. Of course experiment a bit. On very weak devices you might have to use +`update every = 5` and `history = 720` (still 1 hour of data, but 1/5 of the CPU and RAM resources). + +You can also disable [data collection plugins](../collectors) you don't need. +Disabling such plugins will also free both CPU and RAM resources. + +## Running a dedicated central netdata server + +Netdata allows streaming data between netdata nodes. This allows us to have a central netdata +server that will maintain the entire database for all nodes, and will also run health checks/alarms +for all nodes. + +For this central netdata, memory size can be a problem. Fortunately, netdata supports several +memory modes. What is interesting for this setup is `memory mode = map`. + +In this mode, the database of netdata is stored in memory mapped files. netdata continues to read +and write the database in memory, but the kernel automatically loads and saves memory pages from/to +disk. + +**We suggest _not_ to use this mode on nodes that run other applications.** There will always be +dirty memory to be synced and this syncing process may influence the way other applications work. +This mode however is ideal when we need a central netdata server that would normally need huge +amounts of memory. Using memory mode `map` we can overcome all memory restrictions. + +There are a few kernel options that provide finer control on the way this syncing works. But before +explaining them, a brief introduction of how netdata database works is needed. + +For each chart, netdata maps the following files: + +1. `chart/main.db`, this is the file that maintains chart information. Every time data are collected + for a chart, this is updated. + +2. `chart/dimension_name.db`, this is the file for each dimension. At its beginning there is a + header, followed by the round robin database where metrics are stored. + +So, every time netdata collects data, the following pages will become dirty: + +1. the chart file +2. the header part of all dimension files +3. if the collected metrics are stored far enough in the dimension file, another page will + become dirty, for each dimension + +Each page in Linux is 4KB. So, with 200 charts and 1000 dimensions, there will be 1200 to 2200 4KB +pages dirty pages every second. Of course 1200 of them will always be dirty (the chart header and +the dimensions headers) and 1000 will be dirty for about 1000 seconds (4 bytes per metric, 4KB per +page, so 1000 seconds, or 16 minutes per page). + +Hopefully, the Linux kernel does not sync all these data every second. The frequency they are +synced is controlled by `/proc/sys/vm/dirty_expire_centisecs` or the +`sysctl` `vm.dirty_expire_centisecs`. The default on most systems is 3000 (30 seconds). + +On a busy server centralizing metrics from 20+ servers you will experience this: + +![image](https://cloud.githubusercontent.com/assets/2662304/23834750/429ab0dc-0764-11e7-821a-d7908bc881ac.png) + +As you can see, there is quite some stress (this is `iowait`) every 30 seconds. + +A simple solution is to increase this time to 10 minutes (60000). This is the same system +with this setting in 10 minutes: + +![image](https://cloud.githubusercontent.com/assets/2662304/23834784/d2304f72-0764-11e7-8389-fb830ffd973a.png) + +Of course, setting this to 10 minutes means that data on disk might be up to 10 minutes old if you +get an abnormal shutdown. + +There are 2 more options to tweak: + +1. `dirty_background_ratio`, by default `10`. +2. `dirty_ratio`, by default `20`. + +These control the amount of memory that should be dirty for disk syncing to be triggered. +On dedicated netdata servers, you can use: `80` and `90` respectively, so that all RAM is given +to netdata. + +With these settings, you can expect a little `iowait` spike once every 10 minutes and in case +of system crash, data on disk will be up to 10 minutes old. + +![image](https://cloud.githubusercontent.com/assets/2662304/23835030/ba4bf506-0768-11e7-9bc6-3b23e080c69f.png) + +To have these settings automatically applied on boot, create the file `/etc/sysctl.d/netdata-memory.conf` with these contents: + +``` +vm.dirty_expire_centisecs = 60000 +vm.dirty_background_ratio = 80 +vm.dirty_ratio = 90 +vm.dirty_writeback_centisecs = 0 +``` + +## KSM + +Netdata offers all its round robin database to kernel for deduplication. + +In the past KSM has been criticized for consuming a lot of CPU resources. +Although this is true when KSM is used for deduplicating certain applications, it is not true with +netdata, since the netdata memory is written very infrequently (if you have 24 hours of metrics in +netdata, each byte at the in-memory database will be updated just once per day). + +KSM is a solution that will provide 60+% memory savings to netdata. + +### Enable KSM in kernel + +You need to run a kernel compiled with: + +```sh +CONFIG_KSM=y +``` + +When KSM is enabled at the kernel is just available for the user to enable it. + +So, if you build a kernel with `CONFIG_KSM=y` you will just get a few files in `/sys/kernel/mm/ksm`. Nothing else happens. There is no performance penalty (apart I guess from the memory this code occupies into the kernel). + +The files that `CONFIG_KSM=y` offers include: + +- `/sys/kernel/mm/ksm/run` by default `0`. You have to set this to `1` for the kernel to spawn `ksmd`. +- `/sys/kernel/mm/ksm/sleep_millisecs`, by default `20`. The frequency ksmd should evaluate memory for deduplication. +- `/sys/kernel/mm/ksm/pages_to_scan`, by default `100`. The amount of pages ksmd will evaluate on each run. + +So, by default `ksmd` is just disabled. It will not harm performance and the user/admin can control the CPU resources he/she is willing `ksmd` to use. + +### Run `ksmd` kernel daemon + +To activate / run `ksmd` you need to run: + +```sh +echo 1 >/sys/kernel/mm/ksm/run +echo 1000 >/sys/kernel/mm/ksm/sleep_millisecs +``` + +With these settings ksmd does not even appear in the running process list (it will run once per second and evaluate 100 pages for de-duplication). + +Put the above lines in your boot sequence (`/etc/rc.local` or equivalent) to have `ksmd` run at boot. + +## Monitoring Kernel Memory de-duplication performance + +Netdata will create charts for kernel memory de-duplication performance, like this: + +![image](https://cloud.githubusercontent.com/assets/2662304/11998786/eb23ae54-aab6-11e5-94d4-e848e8a5c56a.png) + +[![analytics](https://www.google-analytics.com/collect?v=1&aip=1&t=pageview&_s=1&ds=github&dr=https%3A%2F%2Fgithub.com%2Fnetdata%2Fnetdata&dl=https%3A%2F%2Fmy-netdata.io%2Fgithub%2Fdatabase%2FREADME&_u=MAC~&cid=5792dfd7-8dc4-476b-af31-da2fdb9f93d2&tid=UA-64295674-3)]() diff --git a/database/rrd.c b/database/rrd.c new file mode 100644 index 0000000..119efa6 --- /dev/null +++ b/database/rrd.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#define NETDATA_RRD_INTERNALS 1 + +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// globals + +/* +// if not zero it gives the time (in seconds) to remove un-updated dimensions +// DO NOT ENABLE +// if dimensions are removed, the chart generation will have to run again +int rrd_delete_unupdated_dimensions = 0; +*/ + +int default_rrd_update_every = UPDATE_EVERY; +int default_rrd_history_entries = RRD_DEFAULT_HISTORY_ENTRIES; +RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE; +int gap_when_lost_iterations_above = 1; + + +// ---------------------------------------------------------------------------- +// RRD - memory modes + +inline const char *rrd_memory_mode_name(RRD_MEMORY_MODE id) { + switch(id) { + case RRD_MEMORY_MODE_RAM: + return RRD_MEMORY_MODE_RAM_NAME; + + case RRD_MEMORY_MODE_MAP: + return RRD_MEMORY_MODE_MAP_NAME; + + case RRD_MEMORY_MODE_NONE: + return RRD_MEMORY_MODE_NONE_NAME; + + case RRD_MEMORY_MODE_SAVE: + return RRD_MEMORY_MODE_SAVE_NAME; + + case RRD_MEMORY_MODE_ALLOC: + return RRD_MEMORY_MODE_ALLOC_NAME; + } + + return RRD_MEMORY_MODE_SAVE_NAME; +} + +RRD_MEMORY_MODE rrd_memory_mode_id(const char *name) { + if(unlikely(!strcmp(name, RRD_MEMORY_MODE_RAM_NAME))) + return RRD_MEMORY_MODE_RAM; + + else if(unlikely(!strcmp(name, RRD_MEMORY_MODE_MAP_NAME))) + return RRD_MEMORY_MODE_MAP; + + else if(unlikely(!strcmp(name, RRD_MEMORY_MODE_NONE_NAME))) + return RRD_MEMORY_MODE_NONE; + + else if(unlikely(!strcmp(name, RRD_MEMORY_MODE_ALLOC_NAME))) + return RRD_MEMORY_MODE_ALLOC; + + return RRD_MEMORY_MODE_SAVE; +} + + +// ---------------------------------------------------------------------------- +// RRD - algorithms types + +RRD_ALGORITHM rrd_algorithm_id(const char *name) { + if(strcmp(name, RRD_ALGORITHM_INCREMENTAL_NAME) == 0) + return RRD_ALGORITHM_INCREMENTAL; + + else if(strcmp(name, RRD_ALGORITHM_ABSOLUTE_NAME) == 0) + return RRD_ALGORITHM_ABSOLUTE; + + else if(strcmp(name, RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL_NAME) == 0) + return RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL; + + else if(strcmp(name, RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL_NAME) == 0) + return RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL; + + else + return RRD_ALGORITHM_ABSOLUTE; +} + +const char *rrd_algorithm_name(RRD_ALGORITHM algorithm) { + switch(algorithm) { + case RRD_ALGORITHM_ABSOLUTE: + default: + return RRD_ALGORITHM_ABSOLUTE_NAME; + + case RRD_ALGORITHM_INCREMENTAL: + return RRD_ALGORITHM_INCREMENTAL_NAME; + + case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: + return RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL_NAME; + + case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: + return RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL_NAME; + } +} + + +// ---------------------------------------------------------------------------- +// RRD - chart types + +inline RRDSET_TYPE rrdset_type_id(const char *name) { + if(unlikely(strcmp(name, RRDSET_TYPE_AREA_NAME) == 0)) + return RRDSET_TYPE_AREA; + + else if(unlikely(strcmp(name, RRDSET_TYPE_STACKED_NAME) == 0)) + return RRDSET_TYPE_STACKED; + + else // if(unlikely(strcmp(name, RRDSET_TYPE_LINE_NAME) == 0)) + return RRDSET_TYPE_LINE; +} + +const char *rrdset_type_name(RRDSET_TYPE chart_type) { + switch(chart_type) { + case RRDSET_TYPE_LINE: + default: + return RRDSET_TYPE_LINE_NAME; + + case RRDSET_TYPE_AREA: + return RRDSET_TYPE_AREA_NAME; + + case RRDSET_TYPE_STACKED: + return RRDSET_TYPE_STACKED_NAME; + } +} + + +// ---------------------------------------------------------------------------- +// RRD - cache directory + +char *rrdset_cache_dir(RRDHOST *host, const char *id, const char *config_section) { + char *ret = NULL; + + char b[FILENAME_MAX + 1]; + char n[FILENAME_MAX + 1]; + rrdset_strncpyz_name(b, id, FILENAME_MAX); + + snprintfz(n, FILENAME_MAX, "%s/%s", host->cache_dir, b); + ret = config_get(config_section, "cache directory", n); + + if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) { + int r = mkdir(ret, 0775); + if(r != 0 && errno != EEXIST) + error("Cannot create directory '%s'", ret); + } + + return ret; +} diff --git a/database/rrd.h b/database/rrd.h new file mode 100644 index 0000000..24705eb --- /dev/null +++ b/database/rrd.h @@ -0,0 +1,888 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRD_H +#define NETDATA_RRD_H 1 + +// forward typedefs +typedef struct rrdhost RRDHOST; +typedef struct rrddim RRDDIM; +typedef struct rrdset RRDSET; +typedef struct rrdvar RRDVAR; +typedef struct rrdsetvar RRDSETVAR; +typedef struct rrddimvar RRDDIMVAR; +typedef struct rrdcalc RRDCALC; +typedef struct rrdcalctemplate RRDCALCTEMPLATE; +typedef struct alarm_entry ALARM_ENTRY; + +#include "../daemon/common.h" +#include "web/api/queries/query.h" +#include "rrdvar.h" +#include "rrdsetvar.h" +#include "rrddimvar.h" +#include "rrdcalc.h" +#include "rrdcalctemplate.h" + +#define UPDATE_EVERY 1 +#define UPDATE_EVERY_MAX 3600 + +#define RRD_DEFAULT_HISTORY_ENTRIES 3600 +#define RRD_HISTORY_ENTRIES_MAX (86400*365) + +extern int default_rrd_update_every; +extern int default_rrd_history_entries; +extern int gap_when_lost_iterations_above; + +#define RRD_ID_LENGTH_MAX 200 + +#define RRDSET_MAGIC "NETDATA RRD SET FILE V019" +#define RRDDIMENSION_MAGIC "NETDATA RRD DIMENSION FILE V019" + +typedef long long total_number; +#define TOTAL_NUMBER_FORMAT "%lld" + +// ---------------------------------------------------------------------------- +// chart types + +typedef enum rrdset_type { + RRDSET_TYPE_LINE = 0, + RRDSET_TYPE_AREA = 1, + RRDSET_TYPE_STACKED = 2 +} RRDSET_TYPE; + +#define RRDSET_TYPE_LINE_NAME "line" +#define RRDSET_TYPE_AREA_NAME "area" +#define RRDSET_TYPE_STACKED_NAME "stacked" + +RRDSET_TYPE rrdset_type_id(const char *name); +const char *rrdset_type_name(RRDSET_TYPE chart_type); + + +// ---------------------------------------------------------------------------- +// memory mode + +typedef enum rrd_memory_mode { + RRD_MEMORY_MODE_NONE = 0, + RRD_MEMORY_MODE_RAM = 1, + RRD_MEMORY_MODE_MAP = 2, + RRD_MEMORY_MODE_SAVE = 3, + RRD_MEMORY_MODE_ALLOC = 4 +} RRD_MEMORY_MODE; + +#define RRD_MEMORY_MODE_NONE_NAME "none" +#define RRD_MEMORY_MODE_RAM_NAME "ram" +#define RRD_MEMORY_MODE_MAP_NAME "map" +#define RRD_MEMORY_MODE_SAVE_NAME "save" +#define RRD_MEMORY_MODE_ALLOC_NAME "alloc" + +extern RRD_MEMORY_MODE default_rrd_memory_mode; + +extern const char *rrd_memory_mode_name(RRD_MEMORY_MODE id); +extern RRD_MEMORY_MODE rrd_memory_mode_id(const char *name); + + +// ---------------------------------------------------------------------------- +// algorithms types + +typedef enum rrd_algorithm { + RRD_ALGORITHM_ABSOLUTE = 0, + RRD_ALGORITHM_INCREMENTAL = 1, + RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL = 2, + RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL = 3 +} RRD_ALGORITHM; + +#define RRD_ALGORITHM_ABSOLUTE_NAME "absolute" +#define RRD_ALGORITHM_INCREMENTAL_NAME "incremental" +#define RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL_NAME "percentage-of-incremental-row" +#define RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL_NAME "percentage-of-absolute-row" + +extern RRD_ALGORITHM rrd_algorithm_id(const char *name); +extern const char *rrd_algorithm_name(RRD_ALGORITHM algorithm); + +// ---------------------------------------------------------------------------- +// RRD FAMILY + +struct rrdfamily { + avl avl; + + const char *family; + uint32_t hash_family; + + size_t use_count; + + avl_tree_lock rrdvar_root_index; +}; +typedef struct rrdfamily RRDFAMILY; + + +// ---------------------------------------------------------------------------- +// flags +// use this for configuration flags, not for state control +// flags are set/unset in a manner that is not thread safe +// and may lead to missing information. + +typedef enum rrddim_flags { + RRDDIM_FLAG_NONE = 0, + RRDDIM_FLAG_HIDDEN = (1 << 0), // this dimension will not be offered to callers + RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS = (1 << 1) // do not offer RESET or OVERFLOW info to callers +} RRDDIM_FLAGS; + +#ifdef HAVE_C___ATOMIC +#define rrddim_flag_check(rd, flag) (__atomic_load_n(&((rd)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define rrddim_flag_set(rd, flag) __atomic_or_fetch(&((rd)->flags), (flag), __ATOMIC_SEQ_CST) +#define rrddim_flag_clear(rd, flag) __atomic_and_fetch(&((rd)->flags), ~(flag), __ATOMIC_SEQ_CST) +#else +#define rrddim_flag_check(rd, flag) ((rd)->flags & (flag)) +#define rrddim_flag_set(rd, flag) (rd)->flags |= (flag) +#define rrddim_flag_clear(rd, flag) (rd)->flags &= ~(flag) +#endif + + +// ---------------------------------------------------------------------------- +// RRD DIMENSION - this is a metric + +struct rrddim { + // ------------------------------------------------------------------------ + // binary indexing structures + + avl avl; // the binary index - this has to be first member! + + // ------------------------------------------------------------------------ + // the dimension definition + + const char *id; // the id of this dimension (for internal identification) + const char *name; // the name of this dimension (as presented to user) + // this is a pointer to the config structure + // since the config always has a higher priority + // (the user overwrites the name of the charts) + // DO NOT FREE THIS - IT IS ALLOCATED IN CONFIG + + RRD_ALGORITHM algorithm; // the algorithm that is applied to add new collected values + RRD_MEMORY_MODE rrd_memory_mode; // the memory mode for this dimension + + collected_number multiplier; // the multiplier of the collected values + collected_number divisor; // the divider of the collected values + + uint32_t flags; // configuration flags for the dimension + + // ------------------------------------------------------------------------ + // members for temporary data we need for calculations + + uint32_t hash; // a simple hash of the id, to speed up searching / indexing + // instead of strcmp() every item in the binary index + // we first compare the hashes + + uint32_t hash_name; // a simple hash of the name + + char *cache_filename; // the filename we load/save from/to this set + + size_t collections_counter; // the number of times we added values to this rrdim + size_t unused[9]; + + collected_number collected_value_max; // the absolute maximum of the collected value + + unsigned int updated:1; // 1 when the dimension has been updated since the last processing + unsigned int exposed:1; // 1 when set what have sent this dimension to the central netdata + + struct timeval last_collected_time; // when was this dimension last updated + // this is actual date time we updated the last_collected_value + // THIS IS DIFFERENT FROM THE SAME MEMBER OF RRDSET + + calculated_number calculated_value; // the current calculated value, after applying the algorithm - resets to zero after being used + calculated_number last_calculated_value; // the last calculated value processed + + calculated_number last_stored_value; // the last value as stored in the database (after interpolation) + + collected_number collected_value; // the current value, as collected - resets to 0 after being used + collected_number last_collected_value; // the last value that was collected, after being processed + + // the *_volume members are used to calculate the accuracy of the rounding done by the + // storage number - they are printed to debug.log when debug is enabled for a set. + calculated_number collected_volume; // the sum of all collected values so far + calculated_number stored_volume; // the sum of all stored values so far + + struct rrddim *next; // linking of dimensions within the same data set + struct rrdset *rrdset; + + // ------------------------------------------------------------------------ + // members for checking the data when loading from disk + + long entries; // how many entries this dimension has in ram + // this is the same to the entries of the data set + // we set it here, to check the data when we load it from disk. + + int update_every; // every how many seconds is this updated + + size_t memsize; // the memory allocated for this dimension + + char magic[sizeof(RRDDIMENSION_MAGIC) + 1]; // a string to be saved, used to identify our data file + + struct rrddimvar *variables; + + // ------------------------------------------------------------------------ + // the values stored in this dimension, using our floating point numbers + + storage_number values[]; // the array of values - THIS HAS TO BE THE LAST MEMBER +}; + +// ---------------------------------------------------------------------------- +// these loop macros make sure the linked list is accessed with the right lock + +#define rrddim_foreach_read(rd, st) \ + for((rd) = (st)->dimensions, rrdset_check_rdlock(st); (rd) ; (rd) = (rd)->next) + +#define rrddim_foreach_write(rd, st) \ + for((rd) = (st)->dimensions, rrdset_check_wrlock(st); (rd) ; (rd) = (rd)->next) + + +// ---------------------------------------------------------------------------- +// RRDSET - this is a chart + +// use this for configuration flags, not for state control +// flags are set/unset in a manner that is not thread safe +// and may lead to missing information. + +typedef enum rrdset_flags { + RRDSET_FLAG_ENABLED = 1 << 0, // enables or disables a chart + RRDSET_FLAG_DETAIL = 1 << 1, // if set, the data set should be considered as a detail of another + // (the master data set should be the one that has the same family and is not detail) + RRDSET_FLAG_DEBUG = 1 << 2, // enables or disables debugging for a chart + RRDSET_FLAG_OBSOLETE = 1 << 3, // this is marked by the collector/module as obsolete + RRDSET_FLAG_BACKEND_SEND = 1 << 4, // if set, this chart should be sent to backends + RRDSET_FLAG_BACKEND_IGNORE = 1 << 5, // if set, this chart should not be sent to backends + RRDSET_FLAG_UPSTREAM_SEND = 1 << 6, // if set, this chart should be sent upstream (streaming) + RRDSET_FLAG_UPSTREAM_IGNORE = 1 << 7, // if set, this chart should not be sent upstream (streaming) + RRDSET_FLAG_UPSTREAM_EXPOSED = 1 << 8, // if set, we have sent this chart definition to netdata master (streaming) + RRDSET_FLAG_STORE_FIRST = 1 << 9, // if set, do not eliminate the first collection during interpolation + RRDSET_FLAG_HETEROGENEOUS = 1 << 10, // if set, the chart is not homogeneous (dimensions in it have multiple algorithms, multipliers or dividers) + RRDSET_FLAG_HOMEGENEOUS_CHECK = 1 << 11, // if set, the chart should be checked to determine if the dimensions as homogeneous + RRDSET_FLAG_HIDDEN = 1 << 12, // if set, do not show this chart on the dashboard, but use it for backends + RRDSET_FLAG_SYNC_CLOCK = 1 << 13, // if set, microseconds on next data collection will be ignored (the chart will be synced to now) +} RRDSET_FLAGS; + +#ifdef HAVE_C___ATOMIC +#define rrdset_flag_check(st, flag) (__atomic_load_n(&((st)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define rrdset_flag_set(st, flag) __atomic_or_fetch(&((st)->flags), flag, __ATOMIC_SEQ_CST) +#define rrdset_flag_clear(st, flag) __atomic_and_fetch(&((st)->flags), ~flag, __ATOMIC_SEQ_CST) +#else +#define rrdset_flag_check(st, flag) ((st)->flags & (flag)) +#define rrdset_flag_set(st, flag) (st)->flags |= (flag) +#define rrdset_flag_clear(st, flag) (st)->flags &= ~(flag) +#endif +#define rrdset_flag_check_noatomic(st, flag) ((st)->flags & (flag)) + +struct rrdset { + // ------------------------------------------------------------------------ + // binary indexing structures + + avl avl; // the index, with key the id - this has to be first! + avl avlname; // the index, with key the name + + // ------------------------------------------------------------------------ + // the set configuration + + char id[RRD_ID_LENGTH_MAX + 1]; // id of the data set + + const char *name; // the name of this dimension (as presented to user) + // this is a pointer to the config structure + // since the config always has a higher priority + // (the user overwrites the name of the charts) + + char *config_section; // the config section for the chart + + char *type; // the type of graph RRD_TYPE_* (a category, for determining graphing options) + char *family; // grouping sets under the same family + char *title; // title shown to user + char *units; // units of measurement + + char *context; // the template of this data set + uint32_t hash_context; // the hash of the chart's context + + RRDSET_TYPE chart_type; // line, area, stacked + + int update_every; // every how many seconds is this updated? + + long entries; // total number of entries in the data set + + long current_entry; // the entry that is currently being updated + // it goes around in a round-robin fashion + + RRDSET_FLAGS flags; // configuration flags + + int gap_when_lost_iterations_above; // after how many lost iterations a gap should be stored + // netdata will interpolate values for gaps lower than this + + long priority; // the sorting priority of this chart + + + // ------------------------------------------------------------------------ + // members for temporary data we need for calculations + + RRD_MEMORY_MODE rrd_memory_mode; // if set to 1, this is memory mapped + + char *cache_dir; // the directory to store dimensions + char cache_filename[FILENAME_MAX+1]; // the filename to store this set + + netdata_rwlock_t rrdset_rwlock; // protects dimensions linked list + + size_t counter; // the number of times we added values to this database + size_t counter_done; // the number of times rrdset_done() has been called + + time_t last_accessed_time; // the last time this RRDSET has been accessed + time_t upstream_resync_time; // the timestamp up to which we should resync clock upstream + + char *plugin_name; // the name of the plugin that generated this + char *module_name; // the name of the plugin module that generated this + + size_t unused[6]; + + uint32_t hash; // a simple hash on the id, to speed up searching + // we first compare hashes, and only if the hashes are equal we do string comparisons + + uint32_t hash_name; // a simple hash on the name + + usec_t usec_since_last_update; // the time in microseconds since the last collection of data + + struct timeval last_updated; // when this data set was last updated (updated every time the rrd_stats_done() function) + struct timeval last_collected_time; // when did this data set last collected values + + total_number collected_total; // used internally to calculate percentages + total_number last_collected_total; // used internally to calculate percentages + + RRDFAMILY *rrdfamily; // pointer to RRDFAMILY this chart belongs to + RRDHOST *rrdhost; // pointer to RRDHOST this chart belongs to + + struct rrdset *next; // linking of rrdsets + + // ------------------------------------------------------------------------ + // local variables + + calculated_number green; // green threshold for this chart + calculated_number red; // red threshold for this chart + + avl_tree_lock rrdvar_root_index; // RRDVAR index for this chart + RRDSETVAR *variables; // RRDSETVAR linked list for this chart (one RRDSETVAR, many RRDVARs) + RRDCALC *alarms; // RRDCALC linked list for this chart + + // ------------------------------------------------------------------------ + // members for checking the data when loading from disk + + unsigned long memsize; // how much mem we have allocated for this (without dimensions) + + char magic[sizeof(RRDSET_MAGIC) + 1]; // our magic + + // ------------------------------------------------------------------------ + // the dimensions + + avl_tree_lock dimensions_index; // the root of the dimensions index + RRDDIM *dimensions; // the actual data for every dimension + +}; + +#define rrdset_rdlock(st) netdata_rwlock_rdlock(&((st)->rrdset_rwlock)) +#define rrdset_wrlock(st) netdata_rwlock_wrlock(&((st)->rrdset_rwlock)) +#define rrdset_unlock(st) netdata_rwlock_unlock(&((st)->rrdset_rwlock)) + + +// ---------------------------------------------------------------------------- +// these loop macros make sure the linked list is accessed with the right lock + +#define rrdset_foreach_read(st, host) \ + for((st) = (host)->rrdset_root, rrdhost_check_rdlock(host); st ; (st) = (st)->next) + +#define rrdset_foreach_write(st, host) \ + for((st) = (host)->rrdset_root, rrdhost_check_wrlock(host); st ; (st) = (st)->next) + + +// ---------------------------------------------------------------------------- +// RRDHOST flags +// use this for configuration flags, not for state control +// flags are set/unset in a manner that is not thread safe +// and may lead to missing information. + +typedef enum rrdhost_flags { + RRDHOST_FLAG_ORPHAN = 1 << 0, // this host is orphan (not receiving data) + RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS = 1 << 1, // delete files of obsolete charts + RRDHOST_FLAG_DELETE_ORPHAN_HOST = 1 << 2, // delete the entire host when orphan + RRDHOST_FLAG_BACKEND_SEND = 1 << 3, // send it to backends + RRDHOST_FLAG_BACKEND_DONT_SEND = 1 << 4, // don't send it to backends +} RRDHOST_FLAGS; + +#ifdef HAVE_C___ATOMIC +#define rrdhost_flag_check(host, flag) (__atomic_load_n(&((host)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define rrdhost_flag_set(host, flag) __atomic_or_fetch(&((host)->flags), flag, __ATOMIC_SEQ_CST) +#define rrdhost_flag_clear(host, flag) __atomic_and_fetch(&((host)->flags), ~flag, __ATOMIC_SEQ_CST) +#else +#define rrdhost_flag_check(host, flag) ((host)->flags & (flag)) +#define rrdhost_flag_set(host, flag) (host)->flags |= (flag) +#define rrdhost_flag_clear(host, flag) (host)->flags &= ~(flag) +#endif + +#ifdef NETDATA_INTERNAL_CHECKS +#define rrdset_debug(st, fmt, args...) do { if(unlikely(debug_flags & D_RRD_STATS && rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) \ + debug_int(__FILE__, __FUNCTION__, __LINE__, "%s: " fmt, st->name, ##args); } while(0) +#else +#define rrdset_debug(st, fmt, args...) debug_dummy() +#endif + +// ---------------------------------------------------------------------------- +// Health data + +struct alarm_entry { + uint32_t unique_id; + uint32_t alarm_id; + uint32_t alarm_event_id; + + time_t when; + time_t duration; + time_t non_clear_duration; + + char *name; + uint32_t hash_name; + + char *chart; + uint32_t hash_chart; + + char *family; + + char *exec; + char *recipient; + time_t exec_run_timestamp; + int exec_code; + + char *source; + char *units; + char *info; + + calculated_number old_value; + calculated_number new_value; + + char *old_value_string; + char *new_value_string; + + RRDCALC_STATUS old_status; + RRDCALC_STATUS new_status; + + uint32_t flags; + + int delay; + time_t delay_up_to_timestamp; + + uint32_t updated_by_id; + uint32_t updates_id; + + struct alarm_entry *next; +}; + + +typedef struct alarm_log { + uint32_t next_log_id; + uint32_t next_alarm_id; + unsigned int count; + unsigned int max; + ALARM_ENTRY *alarms; + netdata_rwlock_t alarm_log_rwlock; +} ALARM_LOG; + + +// ---------------------------------------------------------------------------- +// RRD HOST + +struct rrdhost { + avl avl; // the index of hosts + + // ------------------------------------------------------------------------ + // host information + + char *hostname; // the hostname of this host + uint32_t hash_hostname; // the hostname hash + + char *registry_hostname; // the registry hostname for this host + + char machine_guid[GUID_LEN + 1]; // the unique ID of this host + uint32_t hash_machine_guid; // the hash of the unique ID + + const char *os; // the O/S type of the host + const char *tags; // tags for this host + const char *timezone; // the timezone of the host + + RRDHOST_FLAGS flags; // flags about this RRDHOST + + int rrd_update_every; // the update frequency of the host + long rrd_history_entries; // the number of history entries for the host's charts + RRD_MEMORY_MODE rrd_memory_mode; // the memory more for the charts of this host + + char *cache_dir; // the directory to save RRD cache files + char *varlib_dir; // the directory to save health log + + char *program_name; // the program name that collects metrics for this host + char *program_version; // the program version that collects metrics for this host + + // ------------------------------------------------------------------------ + // streaming of data to remote hosts - rrdpush + + unsigned int rrdpush_send_enabled:1; // 1 when this host sends metrics to another netdata + char *rrdpush_send_destination; // where to send metrics to + char *rrdpush_send_api_key; // the api key at the receiving netdata + + // the following are state information for the threading + // streaming metrics from this netdata to an upstream netdata + volatile unsigned int rrdpush_sender_spawn:1; // 1 when the sender thread has been spawn + netdata_thread_t rrdpush_sender_thread; // the sender thread + + volatile unsigned int rrdpush_sender_connected:1; // 1 when the sender is ready to push metrics + int rrdpush_sender_socket; // the fd of the socket to the remote host, or -1 + + volatile unsigned int rrdpush_sender_error_shown:1; // 1 when we have logged a communication error + volatile unsigned int rrdpush_sender_join:1; // 1 when we have to join the sending thread + + SIMPLE_PATTERN *rrdpush_send_charts_matching; // pattern to match the charts to be sent + + // metrics may be collected asynchronously + // these synchronize all the threads willing the write to our sending buffer + netdata_mutex_t rrdpush_sender_buffer_mutex; // exclusive access to rrdpush_sender_buffer + int rrdpush_sender_pipe[2]; // collector to sender thread signaling + BUFFER *rrdpush_sender_buffer; // collector fills it, sender sends it + + + // ------------------------------------------------------------------------ + // streaming of data from remote hosts - rrdpush + + volatile size_t connected_senders; // when remote hosts are streaming to this + // host, this is the counter of connected clients + + time_t senders_disconnected_time; // the time the last sender was disconnected + + // ------------------------------------------------------------------------ + // health monitoring options + + unsigned int health_enabled:1; // 1 when this host has health enabled + time_t health_delay_up_to; // a timestamp to delay alarms processing up to + char *health_default_exec; // the full path of the alarms notifications program + char *health_default_recipient; // the default recipient for all alarms + char *health_log_filename; // the alarms event log filename + size_t health_log_entries_written; // the number of alarm events writtern to the alarms event log + FILE *health_log_fp; // the FILE pointer to the open alarms event log file + + // all RRDCALCs are primarily allocated and linked here + // RRDCALCs may be linked to charts at any point + // (charts may or may not exist when these are loaded) + RRDCALC *alarms; + + ALARM_LOG health_log; // alarms historical events (event log) + uint32_t health_last_processed_id; // the last processed health id from the log + uint32_t health_max_unique_id; // the max alarm log unique id given for the host + uint32_t health_max_alarm_id; // the max alarm id given for the host + + // templates of alarms + // these are used to create alarms when charts + // are created or renamed, that match them + RRDCALCTEMPLATE *templates; + + + // ------------------------------------------------------------------------ + // the charts of the host + + RRDSET *rrdset_root; // the host charts + + + // ------------------------------------------------------------------------ + // locks + + netdata_rwlock_t rrdhost_rwlock; // lock for this RRDHOST (protects rrdset_root linked list) + + // ------------------------------------------------------------------------ + // indexes + + avl_tree_lock rrdset_root_index; // the host's charts index (by id) + avl_tree_lock rrdset_root_index_name; // the host's charts index (by name) + + avl_tree_lock rrdfamily_root_index; // the host's chart families index + avl_tree_lock rrdvar_root_index; // the host's chart variables index + + struct rrdhost *next; +}; +extern RRDHOST *localhost; + +#define rrdhost_rdlock(host) netdata_rwlock_rdlock(&((host)->rrdhost_rwlock)) +#define rrdhost_wrlock(host) netdata_rwlock_wrlock(&((host)->rrdhost_rwlock)) +#define rrdhost_unlock(host) netdata_rwlock_unlock(&((host)->rrdhost_rwlock)) + +// ---------------------------------------------------------------------------- +// these loop macros make sure the linked list is accessed with the right lock + +#define rrdhost_foreach_read(var) \ + for((var) = localhost, rrd_check_rdlock(); var ; (var) = (var)->next) + +#define rrdhost_foreach_write(var) \ + for((var) = localhost, rrd_check_wrlock(); var ; (var) = (var)->next) + + +// ---------------------------------------------------------------------------- +// global lock for all RRDHOSTs + +extern netdata_rwlock_t rrd_rwlock; + +#define rrd_rdlock() netdata_rwlock_rdlock(&rrd_rwlock) +#define rrd_wrlock() netdata_rwlock_wrlock(&rrd_rwlock) +#define rrd_unlock() netdata_rwlock_unlock(&rrd_rwlock) + +// ---------------------------------------------------------------------------- + +extern size_t rrd_hosts_available; +extern time_t rrdhost_free_orphan_time; + +extern void rrd_init(char *hostname); + +extern RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash); +extern RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash); + +extern RRDHOST *rrdhost_find_or_create( + const char *hostname + , const char *registry_hostname + , const char *guid + , const char *os + , const char *timezone + , const char *tags + , const char *program_name + , const char *program_version + , int update_every + , long history + , RRD_MEMORY_MODE mode + , unsigned int health_enabled + , unsigned int rrdpush_enabled + , char *rrdpush_destination + , char *rrdpush_api_key + , char *rrdpush_send_charts_matching +); + +#if defined(NETDATA_INTERNAL_CHECKS) && defined(NETDATA_VERIFY_LOCKS) +extern void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line); +extern void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line); +extern void __rrdset_check_rdlock(RRDSET *st, const char *file, const char *function, const unsigned long line); +extern void __rrdset_check_wrlock(RRDSET *st, const char *file, const char *function, const unsigned long line); +extern void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line); +extern void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line); + +#define rrdhost_check_rdlock(host) __rrdhost_check_rdlock(host, __FILE__, __FUNCTION__, __LINE__) +#define rrdhost_check_wrlock(host) __rrdhost_check_wrlock(host, __FILE__, __FUNCTION__, __LINE__) +#define rrdset_check_rdlock(st) __rrdset_check_rdlock(st, __FILE__, __FUNCTION__, __LINE__) +#define rrdset_check_wrlock(st) __rrdset_check_wrlock(st, __FILE__, __FUNCTION__, __LINE__) +#define rrd_check_rdlock() __rrd_check_rdlock(__FILE__, __FUNCTION__, __LINE__) +#define rrd_check_wrlock() __rrd_check_wrlock(__FILE__, __FUNCTION__, __LINE__) + +#else +#define rrdhost_check_rdlock(host) (void)0 +#define rrdhost_check_wrlock(host) (void)0 +#define rrdset_check_rdlock(st) (void)0 +#define rrdset_check_wrlock(st) (void)0 +#define rrd_check_rdlock() (void)0 +#define rrd_check_wrlock() (void)0 +#endif + +// ---------------------------------------------------------------------------- +// RRDSET functions + +extern int rrdset_set_name(RRDSET *st, const char *name); + +extern RRDSET *rrdset_create_custom(RRDHOST *host + , const char *type + , const char *id + , const char *name + , const char *family + , const char *context + , const char *title + , const char *units + , const char *plugin + , const char *module + , long priority + , int update_every + , RRDSET_TYPE chart_type + , RRD_MEMORY_MODE memory_mode + , long history_entries); + +#define rrdset_create(host, type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type) \ + rrdset_create_custom(host, type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type, (host)->rrd_memory_mode, (host)->rrd_history_entries) + +#define rrdset_create_localhost(type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type) \ + rrdset_create(localhost, type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type) + +extern void rrdhost_free_all(void); +extern void rrdhost_save_all(void); +extern void rrdhost_cleanup_all(void); + +extern void rrdhost_cleanup_orphan_hosts_nolock(RRDHOST *protected); +extern void rrdhost_free(RRDHOST *host); +extern void rrdhost_save_charts(RRDHOST *host); +extern void rrdhost_delete_charts(RRDHOST *host); + +extern int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected, time_t now); + +extern void rrdset_update_heterogeneous_flag(RRDSET *st); + +extern RRDSET *rrdset_find(RRDHOST *host, const char *id); +#define rrdset_find_localhost(id) rrdset_find(localhost, id) + +extern RRDSET *rrdset_find_bytype(RRDHOST *host, const char *type, const char *id); +#define rrdset_find_bytype_localhost(type, id) rrdset_find_bytype(localhost, type, id) + +extern RRDSET *rrdset_find_byname(RRDHOST *host, const char *name); +#define rrdset_find_byname_localhost(name) rrdset_find_byname(localhost, name) + +extern void rrdset_next_usec_unfiltered(RRDSET *st, usec_t microseconds); +extern void rrdset_next_usec(RRDSET *st, usec_t microseconds); +#define rrdset_next(st) rrdset_next_usec(st, 0ULL) + +extern void rrdset_done(RRDSET *st); + +extern void rrdset_is_obsolete(RRDSET *st); +extern void rrdset_isnot_obsolete(RRDSET *st); + +// checks if the RRDSET should be offered to viewers +#define rrdset_is_available_for_viewers(st) (rrdset_flag_check(st, RRDSET_FLAG_ENABLED) && !rrdset_flag_check(st, RRDSET_FLAG_HIDDEN) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && (st)->dimensions && (st)->rrd_memory_mode != RRD_MEMORY_MODE_NONE) +#define rrdset_is_available_for_backends(st) (rrdset_flag_check(st, RRDSET_FLAG_ENABLED) && !rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) && (st)->dimensions) + +// get the total duration in seconds of the round robin database +#define rrdset_duration(st) ((time_t)( (((st)->counter >= ((unsigned long)(st)->entries))?(unsigned long)(st)->entries:(st)->counter) * (st)->update_every )) + +// get the timestamp of the last entry in the round robin database +#define rrdset_last_entry_t(st) ((time_t)(((st)->last_updated.tv_sec))) + +// get the timestamp of first entry in the round robin database +#define rrdset_first_entry_t(st) ((time_t)(rrdset_last_entry_t(st) - rrdset_duration(st))) + +// get the last slot updated in the round robin database +#define rrdset_last_slot(st) ((size_t)(((st)->current_entry == 0) ? (st)->entries - 1 : (st)->current_entry - 1)) + +// get the first / oldest slot updated in the round robin database +// #define rrdset_first_slot(st) ((size_t)( (((st)->counter >= ((unsigned long)(st)->entries)) ? (unsigned long)( ((unsigned long)(st)->current_entry > 0) ? ((unsigned long)(st)->current_entry) : ((unsigned long)(st)->entries) ) - 1 : 0) )) + +// return the slot that has the oldest value + +static inline size_t rrdset_first_slot(RRDSET *st) { + if(st->counter >= (size_t)st->entries) { + // the database has been rotated at least once + // the oldest entry is the one that will be next + // overwritten by data collection + return (size_t)st->current_entry; + } + + // we do not have rotated the db yet + // so 0 is the first entry + return 0; +} + +// get the slot of the round robin database, for the given timestamp (t) +// it always returns a valid slot, although may not be for the time requested if the time is outside the round robin database +static inline size_t rrdset_time2slot(RRDSET *st, time_t t) { + size_t ret = 0; + + if(t >= rrdset_last_entry_t(st)) { + // the requested time is after the last entry we have + ret = rrdset_last_slot(st); + } + else { + if(t <= rrdset_first_entry_t(st)) { + // the requested time is before the first entry we have + ret = rrdset_first_slot(st); + } + else { + if(rrdset_last_slot(st) >= ((rrdset_last_entry_t(st) - t) / (size_t)(st->update_every))) + ret = rrdset_last_slot(st) - ((rrdset_last_entry_t(st) - t) / (size_t)(st->update_every)); + else + ret = rrdset_last_slot(st) - ((rrdset_last_entry_t(st) - t) / (size_t)(st->update_every)) + (unsigned long)st->entries; + } + } + + if(unlikely(ret >= (size_t)st->entries)) { + error("INTERNAL ERROR: rrdset_time2slot() on %s returns values outside entries", st->name); + ret = (size_t)(st->entries - 1); + } + + return ret; +} + +// get the timestamp of a specific slot in the round robin database +static inline time_t rrdset_slot2time(RRDSET *st, size_t slot) { + time_t ret; + + if(slot >= (size_t)st->entries) { + error("INTERNAL ERROR: caller of rrdset_slot2time() gives invalid slot %zu", slot); + slot = (size_t)st->entries - 1; + } + + if(slot > rrdset_last_slot(st)) { + ret = rrdset_last_entry_t(st) - (size_t)st->update_every * (rrdset_last_slot(st) - slot + (size_t)st->entries); + } + else { + ret = rrdset_last_entry_t(st) - (size_t)st->update_every; + } + + if(unlikely(ret < rrdset_first_entry_t(st))) { + error("INTERNAL ERROR: rrdset_slot2time() on %s returns time too far in the past", st->name); + ret = rrdset_first_entry_t(st); + } + + if(unlikely(ret > rrdset_last_entry_t(st))) { + error("INTERNAL ERROR: rrdset_slot2time() on %s returns time into the future", st->name); + ret = rrdset_last_entry_t(st); + } + + return ret; +} + +// ---------------------------------------------------------------------------- +// RRD DIMENSION functions + +extern RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode); +#define rrddim_add(st, id, name, multiplier, divisor, algorithm) rrddim_add_custom(st, id, name, multiplier, divisor, algorithm, (st)->rrd_memory_mode) + +extern int rrddim_set_name(RRDSET *st, RRDDIM *rd, const char *name); +extern int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm); +extern int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, collected_number multiplier); +extern int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor); + +extern RRDDIM *rrddim_find(RRDSET *st, const char *id); + +extern int rrddim_hide(RRDSET *st, const char *id); +extern int rrddim_unhide(RRDSET *st, const char *id); + +extern collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value); +extern collected_number rrddim_set(RRDSET *st, const char *id, collected_number value); + +extern long align_entries_to_pagesize(RRD_MEMORY_MODE mode, long entries); + +// ---------------------------------------------------------------------------- +// RRD internal functions + +#ifdef NETDATA_RRD_INTERNALS + +extern avl_tree_lock rrdhost_root_index; + +extern char *rrdset_strncpyz_name(char *to, const char *from, size_t length); +extern char *rrdset_cache_dir(RRDHOST *host, const char *id, const char *config_section); + +extern void rrddim_free(RRDSET *st, RRDDIM *rd); + +extern int rrddim_compare(void* a, void* b); +extern int rrdset_compare(void* a, void* b); +extern int rrdset_compare_name(void* a, void* b); +extern int rrdfamily_compare(void *a, void *b); + +extern RRDFAMILY *rrdfamily_create(RRDHOST *host, const char *id); +extern void rrdfamily_free(RRDHOST *host, RRDFAMILY *rc); + +#define rrdset_index_add(host, st) (RRDSET *)avl_insert_lock(&((host)->rrdset_root_index), (avl *)(st)) +#define rrdset_index_del(host, st) (RRDSET *)avl_remove_lock(&((host)->rrdset_root_index), (avl *)(st)) +extern RRDSET *rrdset_index_del_name(RRDHOST *host, RRDSET *st); + +extern void rrdset_free(RRDSET *st); +extern void rrdset_reset(RRDSET *st); +extern void rrdset_save(RRDSET *st); +extern void rrdset_delete(RRDSET *st); + +extern void rrdhost_cleanup_obsolete_charts(RRDHOST *host); + +#endif /* NETDATA_RRD_INTERNALS */ + + +#endif /* NETDATA_RRD_H */ diff --git a/database/rrdcalc.c b/database/rrdcalc.c new file mode 100644 index 0000000..7f6a896 --- /dev/null +++ b/database/rrdcalc.c @@ -0,0 +1,429 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_HEALTH_INTERNALS +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDCALC management + +inline const char *rrdcalc_status2string(RRDCALC_STATUS status) { + switch(status) { + case RRDCALC_STATUS_REMOVED: + return "REMOVED"; + + case RRDCALC_STATUS_UNDEFINED: + return "UNDEFINED"; + + case RRDCALC_STATUS_UNINITIALIZED: + return "UNINITIALIZED"; + + case RRDCALC_STATUS_CLEAR: + return "CLEAR"; + + case RRDCALC_STATUS_RAISED: + return "RAISED"; + + case RRDCALC_STATUS_WARNING: + return "WARNING"; + + case RRDCALC_STATUS_CRITICAL: + return "CRITICAL"; + + default: + error("Unknown alarm status %d", status); + return "UNKNOWN"; + } +} + +static void rrdsetcalc_link(RRDSET *st, RRDCALC *rc) { + RRDHOST *host = st->rrdhost; + + debug(D_HEALTH, "Health linking alarm '%s.%s' to chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); + + rc->last_status_change = now_realtime_sec(); + rc->rrdset = st; + + rc->rrdset_next = st->alarms; + rc->rrdset_prev = NULL; + + if(rc->rrdset_next) + rc->rrdset_next->rrdset_prev = rc; + + st->alarms = rc; + + if(rc->update_every < rc->rrdset->update_every) { + error("Health alarm '%s.%s' has update every %d, less than chart update every %d. Setting alarm update frequency to %d.", rc->rrdset->id, rc->name, rc->update_every, rc->rrdset->update_every, rc->rrdset->update_every); + rc->update_every = rc->rrdset->update_every; + } + + if(!isnan(rc->green) && isnan(st->green)) { + debug(D_HEALTH, "Health alarm '%s.%s' green threshold set from " CALCULATED_NUMBER_FORMAT_AUTO " to " CALCULATED_NUMBER_FORMAT_AUTO ".", rc->rrdset->id, rc->name, rc->rrdset->green, rc->green); + st->green = rc->green; + } + + if(!isnan(rc->red) && isnan(st->red)) { + debug(D_HEALTH, "Health alarm '%s.%s' red threshold set from " CALCULATED_NUMBER_FORMAT_AUTO " to " CALCULATED_NUMBER_FORMAT_AUTO ".", rc->rrdset->id, rc->name, rc->rrdset->red, rc->red); + st->red = rc->red; + } + + rc->local = rrdvar_create_and_index("local", &st->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_LOCAL_VAR, &rc->value); + rc->family = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rc->name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_FAMILY_VAR, &rc->value); + + char fullname[RRDVAR_MAX_LENGTH + 1]; + snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rc->name); + rc->hostid = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTID_VAR, &rc->value); + + snprintfz(fullname, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rc->name); + rc->hostname = rrdvar_create_and_index("host", &host->rrdvar_root_index, fullname, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR, &rc->value); + + if(rc->hostid && !rc->hostname) + rc->hostid->options |= RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR; + + if(!rc->units) rc->units = strdupz(st->units); + + { + time_t now = now_realtime_sec(); + health_alarm_log( + host, + rc->id, + rc->next_event_id++, + now, + rc->name, + rc->rrdset->id, + rc->rrdset->family, + rc->exec, + rc->recipient, + now - rc->last_status_change, + rc->old_value, + rc->value, + rc->status, + RRDCALC_STATUS_UNINITIALIZED, + rc->source, + rc->units, + rc->info, + 0, + 0 + ); + } +} + +static inline int rrdcalc_is_matching_this_rrdset(RRDCALC *rc, RRDSET *st) { + if( (rc->hash_chart == st->hash && !strcmp(rc->chart, st->id)) || + (rc->hash_chart == st->hash_name && !strcmp(rc->chart, st->name))) + return 1; + + return 0; +} + +// this has to be called while the RRDHOST is locked +inline void rrdsetcalc_link_matching(RRDSET *st) { + RRDHOST *host = st->rrdhost; + // debug(D_HEALTH, "find matching alarms for chart '%s'", st->id); + + RRDCALC *rc; + for(rc = host->alarms; rc ; rc = rc->next) { + if(unlikely(rc->rrdset)) + continue; + + if(unlikely(rrdcalc_is_matching_this_rrdset(rc, st))) + rrdsetcalc_link(st, rc); + } +} + +// this has to be called while the RRDHOST is locked +inline void rrdsetcalc_unlink(RRDCALC *rc) { + RRDSET *st = rc->rrdset; + + if(!st) { + debug(D_HEALTH, "Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); + error("Requested to unlink RRDCALC '%s.%s' which is not linked to any RRDSET", rc->chart?rc->chart:"NOCHART", rc->name); + return; + } + + RRDHOST *host = st->rrdhost; + + { + time_t now = now_realtime_sec(); + health_alarm_log( + host, + rc->id, + rc->next_event_id++, + now, + rc->name, + rc->rrdset->id, + rc->rrdset->family, + rc->exec, + rc->recipient, + now - rc->last_status_change, + rc->old_value, + rc->value, + rc->status, + RRDCALC_STATUS_REMOVED, + rc->source, + rc->units, + rc->info, + 0, + 0 + ); + } + + debug(D_HEALTH, "Health unlinking alarm '%s.%s' from chart '%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, st->id, host->hostname); + + // unlink it + if(rc->rrdset_prev) + rc->rrdset_prev->rrdset_next = rc->rrdset_next; + + if(rc->rrdset_next) + rc->rrdset_next->rrdset_prev = rc->rrdset_prev; + + if(st->alarms == rc) + st->alarms = rc->rrdset_next; + + rc->rrdset_prev = rc->rrdset_next = NULL; + + rrdvar_free(host, &st->rrdvar_root_index, rc->local); + rc->local = NULL; + + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rc->family); + rc->family = NULL; + + rrdvar_free(host, &host->rrdvar_root_index, rc->hostid); + rc->hostid = NULL; + + rrdvar_free(host, &host->rrdvar_root_index, rc->hostname); + rc->hostname = NULL; + + rc->rrdset = NULL; + + // RRDCALC will remain in RRDHOST + // so that if the matching chart is found in the future + // it will be applied automatically +} + +RRDCALC *rrdcalc_find(RRDSET *st, const char *name) { + RRDCALC *rc; + uint32_t hash = simple_hash(name); + + for( rc = st->alarms; rc ; rc = rc->rrdset_next ) { + if(unlikely(rc->hash == hash && !strcmp(rc->name, name))) + return rc; + } + + return NULL; +} + +inline int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name) { + RRDCALC *rc; + + if(unlikely(!chart)) { + error("attempt to find RRDCALC '%s' without giving a chart name", name); + return 1; + } + + if(unlikely(!hash_chart)) hash_chart = simple_hash(chart); + if(unlikely(!hash_name)) hash_name = simple_hash(name); + + // make sure it does not already exist + for(rc = host->alarms; rc ; rc = rc->next) { + if (unlikely(rc->chart && rc->hash == hash_name && rc->hash_chart == hash_chart && !strcmp(name, rc->name) && !strcmp(chart, rc->chart))) { + debug(D_HEALTH, "Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); + info("Health alarm '%s.%s' already exists in host '%s'.", chart, name, host->hostname); + return 1; + } + } + + return 0; +} + +inline uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id) { + if(chart && name) { + uint32_t hash_chart = simple_hash(chart); + uint32_t hash_name = simple_hash(name); + + // re-use old IDs, by looking them up in the alarm log + ALARM_ENTRY *ae; + for(ae = host->health_log.alarms; ae ;ae = ae->next) { + if(unlikely(ae->hash_name == hash_name && ae->hash_chart == hash_chart && !strcmp(name, ae->name) && !strcmp(chart, ae->chart))) { + if(next_event_id) *next_event_id = ae->alarm_event_id + 1; + return ae->alarm_id; + } + } + } + + return host->health_log.next_alarm_id++; +} + +inline void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc) { + rrdhost_check_rdlock(host); + + if(rc->calculation) { + rc->calculation->status = &rc->status; + rc->calculation->this = &rc->value; + rc->calculation->after = &rc->db_after; + rc->calculation->before = &rc->db_before; + rc->calculation->rrdcalc = rc; + } + + if(rc->warning) { + rc->warning->status = &rc->status; + rc->warning->this = &rc->value; + rc->warning->after = &rc->db_after; + rc->warning->before = &rc->db_before; + rc->warning->rrdcalc = rc; + } + + if(rc->critical) { + rc->critical->status = &rc->status; + rc->critical->this = &rc->value; + rc->critical->after = &rc->db_after; + rc->critical->before = &rc->db_before; + rc->critical->rrdcalc = rc; + } + + // link it to the host + if(likely(host->alarms)) { + // append it + RRDCALC *t; + for(t = host->alarms; t && t->next ; t = t->next) ; + t->next = rc; + } + else { + host->alarms = rc; + } + + // link it to its chart + RRDSET *st; + rrdset_foreach_read(st, host) { + if(rrdcalc_is_matching_this_rrdset(rc, st)) { + rrdsetcalc_link(st, rc); + break; + } + } +} + +inline RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart) { + + debug(D_HEALTH, "Health creating dynamic alarm (from template) '%s.%s'", chart, rt->name); + + if(rrdcalc_exists(host, chart, rt->name, 0, 0)) + return NULL; + + RRDCALC *rc = callocz(1, sizeof(RRDCALC)); + rc->next_event_id = 1; + rc->id = rrdcalc_get_unique_id(host, chart, rt->name, &rc->next_event_id); + rc->name = strdupz(rt->name); + rc->hash = simple_hash(rc->name); + rc->chart = strdupz(chart); + rc->hash_chart = simple_hash(rc->chart); + + if(rt->dimensions) rc->dimensions = strdupz(rt->dimensions); + + rc->green = rt->green; + rc->red = rt->red; + rc->value = NAN; + rc->old_value = NAN; + + rc->delay_up_duration = rt->delay_up_duration; + rc->delay_down_duration = rt->delay_down_duration; + rc->delay_max_duration = rt->delay_max_duration; + rc->delay_multiplier = rt->delay_multiplier; + + rc->group = rt->group; + rc->after = rt->after; + rc->before = rt->before; + rc->update_every = rt->update_every; + rc->options = rt->options; + + if(rt->exec) rc->exec = strdupz(rt->exec); + if(rt->recipient) rc->recipient = strdupz(rt->recipient); + if(rt->source) rc->source = strdupz(rt->source); + if(rt->units) rc->units = strdupz(rt->units); + if(rt->info) rc->info = strdupz(rt->info); + + if(rt->calculation) { + rc->calculation = expression_parse(rt->calculation->source, NULL, NULL); + if(!rc->calculation) + error("Health alarm '%s.%s': failed to parse calculation expression '%s'", chart, rt->name, rt->calculation->source); + } + if(rt->warning) { + rc->warning = expression_parse(rt->warning->source, NULL, NULL); + if(!rc->warning) + error("Health alarm '%s.%s': failed to re-parse warning expression '%s'", chart, rt->name, rt->warning->source); + } + if(rt->critical) { + rc->critical = expression_parse(rt->critical->source, NULL, NULL); + if(!rc->critical) + error("Health alarm '%s.%s': failed to re-parse critical expression '%s'", chart, rt->name, rt->critical->source); + } + + debug(D_HEALTH, "Health runtime added alarm '%s.%s': exec '%s', recipient '%s', green " CALCULATED_NUMBER_FORMAT_AUTO ", red " CALCULATED_NUMBER_FORMAT_AUTO ", lookup: group %d, after %d, before %d, options %u, dimensions '%s', update every %d, calculation '%s', warning '%s', critical '%s', source '%s', delay up %d, delay down %d, delay max %d, delay_multiplier %f", + (rc->chart)?rc->chart:"NOCHART", + rc->name, + (rc->exec)?rc->exec:"DEFAULT", + (rc->recipient)?rc->recipient:"DEFAULT", + rc->green, + rc->red, + (int)rc->group, + rc->after, + rc->before, + rc->options, + (rc->dimensions)?rc->dimensions:"NONE", + rc->update_every, + (rc->calculation)?rc->calculation->parsed_as:"NONE", + (rc->warning)?rc->warning->parsed_as:"NONE", + (rc->critical)?rc->critical->parsed_as:"NONE", + rc->source, + rc->delay_up_duration, + rc->delay_down_duration, + rc->delay_max_duration, + rc->delay_multiplier + ); + + rrdcalc_create_part2(host, rc); + return rc; +} + +void rrdcalc_free(RRDCALC *rc) { + if(unlikely(!rc)) return; + + expression_free(rc->calculation); + expression_free(rc->warning); + expression_free(rc->critical); + + freez(rc->name); + freez(rc->chart); + freez(rc->family); + freez(rc->dimensions); + freez(rc->exec); + freez(rc->recipient); + freez(rc->source); + freez(rc->units); + freez(rc->info); + freez(rc); +} + +void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc) { + if(unlikely(!rc)) return; + + debug(D_HEALTH, "Health removing alarm '%s.%s' of host '%s'", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); + + // unlink it from RRDSET + if(rc->rrdset) rrdsetcalc_unlink(rc); + + // unlink it from RRDHOST + if(unlikely(rc == host->alarms)) + host->alarms = rc->next; + + else { + RRDCALC *t; + for(t = host->alarms; t && t->next != rc; t = t->next) ; + if(t) { + t->next = rc->next; + rc->next = NULL; + } + else + error("Cannot unlink alarm '%s.%s' from host '%s': not found", rc->chart?rc->chart:"NOCHART", rc->name, host->hostname); + } + + rrdcalc_free(rc); +} diff --git a/database/rrdcalc.h b/database/rrdcalc.h new file mode 100644 index 0000000..4df4381 --- /dev/null +++ b/database/rrdcalc.h @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrd.h" + +#ifndef NETDATA_RRDCALC_H +#define NETDATA_RRDCALC_H 1 + +// calculated variables (defined in health configuration) +// These aggregate time-series data at fixed intervals +// (defined in their update_every member below) +// They increase the overhead of netdata. +// +// These calculations are allocated and linked (->next) +// under RRDHOST. +// Then are also linked to RRDSET (of course only when the +// chart is found, via ->rrdset_next and ->rrdset_prev). +// This double-linked list is maintained sorted at all times +// having as RRDSET.calculations the RRDCALC to be processed +// next. + +#define RRDCALC_FLAG_DB_ERROR 0x00000001 +#define RRDCALC_FLAG_DB_NAN 0x00000002 +/* #define RRDCALC_FLAG_DB_STALE 0x00000004 */ +#define RRDCALC_FLAG_CALC_ERROR 0x00000008 +#define RRDCALC_FLAG_WARN_ERROR 0x00000010 +#define RRDCALC_FLAG_CRIT_ERROR 0x00000020 +#define RRDCALC_FLAG_RUNNABLE 0x00000040 +#define RRDCALC_FLAG_DISABLED 0x00000080 +#define RRDCALC_FLAG_SILENCED 0x00000100 +#define RRDCALC_FLAG_NO_CLEAR_NOTIFICATION 0x80000000 + +struct rrdcalc { + uint32_t id; // the unique id of this alarm + uint32_t next_event_id; // the next event id that will be used for this alarm + + char *name; // the name of this alarm + uint32_t hash; + + char *exec; // the command to execute when this alarm switches state + char *recipient; // the recipient of the alarm (the first parameter to exec) + + char *chart; // the chart id this should be linked to + uint32_t hash_chart; + + char *source; // the source of this alarm + char *units; // the units of the alarm + char *info; // a short description of the alarm + + int update_every; // update frequency for the alarm + + // the red and green threshold of this alarm (to be set to the chart) + calculated_number green; + calculated_number red; + + // ------------------------------------------------------------------------ + // database lookup settings + + char *dimensions; // the chart dimensions + RRDR_GROUPING group; // grouping method: average, max, etc. + int before; // ending point in time-series + int after; // starting point in time-series + uint32_t options; // calculation options + + // ------------------------------------------------------------------------ + // expressions related to the alarm + + EVAL_EXPRESSION *calculation; // expression to calculate the value of the alarm + EVAL_EXPRESSION *warning; // expression to check the warning condition + EVAL_EXPRESSION *critical; // expression to check the critical condition + + // ------------------------------------------------------------------------ + // notification delay settings + + int delay_up_duration; // duration to delay notifications when alarm raises + int delay_down_duration; // duration to delay notifications when alarm lowers + int delay_max_duration; // the absolute max delay to apply to this alarm + float delay_multiplier; // multiplier for all delays when alarms switch status + // while now < delay_up_to + + // ------------------------------------------------------------------------ + // runtime information + + RRDCALC_STATUS status; // the current status of the alarm + + calculated_number value; // the current value of the alarm + calculated_number old_value; // the previous value of the alarm + + uint32_t rrdcalc_flags; // check RRDCALC_FLAG_* + + time_t last_updated; // the last update timestamp of the alarm + time_t next_update; // the next update timestamp of the alarm + time_t last_status_change; // the timestamp of the last time this alarm changed status + + time_t db_after; // the first timestamp evaluated by the db lookup + time_t db_before; // the last timestamp evaluated by the db lookup + + time_t delay_up_to_timestamp; // the timestamp up to which we should delay notifications + int delay_up_current; // the current up notification delay duration + int delay_down_current; // the current down notification delay duration + int delay_last; // the last delay we used + + // ------------------------------------------------------------------------ + // variables this alarm exposes to the rest of the alarms + + RRDVAR *local; + RRDVAR *family; + RRDVAR *hostid; + RRDVAR *hostname; + + // ------------------------------------------------------------------------ + // the chart this alarm it is linked to + + struct rrdset *rrdset; + + // linking of this alarm on its chart + struct rrdcalc *rrdset_next; + struct rrdcalc *rrdset_prev; + + struct rrdcalc *next; +}; + +#define RRDCALC_HAS_DB_LOOKUP(rc) ((rc)->after) + +extern void rrdsetcalc_link_matching(RRDSET *st); +extern void rrdsetcalc_unlink(RRDCALC *rc); +extern RRDCALC *rrdcalc_find(RRDSET *st, const char *name); + +extern const char *rrdcalc_status2string(RRDCALC_STATUS status); + +extern void rrdcalc_free(RRDCALC *rc); +extern void rrdcalc_unlink_and_free(RRDHOST *host, RRDCALC *rc); + +extern int rrdcalc_exists(RRDHOST *host, const char *chart, const char *name, uint32_t hash_chart, uint32_t hash_name); +extern uint32_t rrdcalc_get_unique_id(RRDHOST *host, const char *chart, const char *name, uint32_t *next_event_id); +extern RRDCALC *rrdcalc_create(RRDHOST *host, RRDCALCTEMPLATE *rt, const char *chart); +extern void rrdcalc_create_part2(RRDHOST *host, RRDCALC *rc); + +#endif //NETDATA_RRDCALC_H diff --git a/database/rrdcalctemplate.c b/database/rrdcalctemplate.c new file mode 100644 index 0000000..ba7e7ec --- /dev/null +++ b/database/rrdcalctemplate.c @@ -0,0 +1,71 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_HEALTH_INTERNALS +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDCALCTEMPLATE management + +void rrdcalctemplate_link_matching(RRDSET *st) { + RRDHOST *host = st->rrdhost; + RRDCALCTEMPLATE *rt; + + for(rt = host->templates; rt ; rt = rt->next) { + if(rt->hash_context == st->hash_context && !strcmp(rt->context, st->context) + && (!rt->family_pattern || simple_pattern_matches(rt->family_pattern, st->family))) { + RRDCALC *rc = rrdcalc_create(host, rt, st->id); + if(unlikely(!rc)) + info("Health tried to create alarm from template '%s' on chart '%s' of host '%s', but it failed", rt->name, st->id, host->hostname); + +#ifdef NETDATA_INTERNAL_CHECKS + else if(rc->rrdset != st) + error("Health alarm '%s.%s' should be linked to chart '%s', but it is not", rc->chart?rc->chart:"NOCHART", rc->name, st->id); +#endif + } + } +} + +inline void rrdcalctemplate_free(RRDCALCTEMPLATE *rt) { + if(unlikely(!rt)) return; + + expression_free(rt->calculation); + expression_free(rt->warning); + expression_free(rt->critical); + + freez(rt->family_match); + simple_pattern_free(rt->family_pattern); + + freez(rt->name); + freez(rt->exec); + freez(rt->recipient); + freez(rt->context); + freez(rt->source); + freez(rt->units); + freez(rt->info); + freez(rt->dimensions); + freez(rt); +} + +inline void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt) { + if(unlikely(!rt)) return; + + debug(D_HEALTH, "Health removing template '%s' of host '%s'", rt->name, host->hostname); + + if(host->templates == rt) { + host->templates = rt->next; + } + else { + RRDCALCTEMPLATE *t; + for (t = host->templates; t && t->next != rt; t = t->next ) ; + if(t) { + t->next = rt->next; + rt->next = NULL; + } + else + error("Cannot find RRDCALCTEMPLATE '%s' linked in host '%s'", rt->name, host->hostname); + } + + rrdcalctemplate_free(rt); +} + + diff --git a/database/rrdcalctemplate.h b/database/rrdcalctemplate.h new file mode 100644 index 0000000..b8996bc --- /dev/null +++ b/database/rrdcalctemplate.h @@ -0,0 +1,68 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDCALCTEMPLATE_H +#define NETDATA_RRDCALCTEMPLATE_H 1 + +#include "rrd.h" + +// RRDCALCTEMPLATE +// these are to be applied to charts found dynamically +// based on their context. +struct rrdcalctemplate { + char *name; + uint32_t hash_name; + + char *exec; + char *recipient; + + char *context; + uint32_t hash_context; + + char *family_match; + SIMPLE_PATTERN *family_pattern; + + char *source; // the source of this alarm + char *units; // the units of the alarm + char *info; // a short description of the alarm + + int update_every; // update frequency for the alarm + + // the red and green threshold of this alarm (to be set to the chart) + calculated_number green; + calculated_number red; + + // ------------------------------------------------------------------------ + // database lookup settings + + char *dimensions; // the chart dimensions + RRDR_GROUPING group; // grouping method: average, max, etc. + int before; // ending point in time-series + int after; // starting point in time-series + uint32_t options; // calculation options + + // ------------------------------------------------------------------------ + // notification delay settings + + int delay_up_duration; // duration to delay notifications when alarm raises + int delay_down_duration; // duration to delay notifications when alarm lowers + int delay_max_duration; // the absolute max delay to apply to this alarm + float delay_multiplier; // multiplier for all delays when alarms switch status + + // ------------------------------------------------------------------------ + // expressions related to the alarm + + EVAL_EXPRESSION *calculation; + EVAL_EXPRESSION *warning; + EVAL_EXPRESSION *critical; + + struct rrdcalctemplate *next; +}; + +#define RRDCALCTEMPLATE_HAS_DB_LOOKUP(rt) ((rt)->after) + +extern void rrdcalctemplate_link_matching(RRDSET *st); + +extern void rrdcalctemplate_free(RRDCALCTEMPLATE *rt); +extern void rrdcalctemplate_unlink_and_free(RRDHOST *host, RRDCALCTEMPLATE *rt); + +#endif //NETDATA_RRDCALCTEMPLATE_H diff --git a/database/rrddim.c b/database/rrddim.c new file mode 100644 index 0000000..e98f702 --- /dev/null +++ b/database/rrddim.c @@ -0,0 +1,401 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDDIM index + +int rrddim_compare(void* a, void* b) { + if(((RRDDIM *)a)->hash < ((RRDDIM *)b)->hash) return -1; + else if(((RRDDIM *)a)->hash > ((RRDDIM *)b)->hash) return 1; + else return strcmp(((RRDDIM *)a)->id, ((RRDDIM *)b)->id); +} + +#define rrddim_index_add(st, rd) (RRDDIM *)avl_insert_lock(&((st)->dimensions_index), (avl *)(rd)) +#define rrddim_index_del(st,rd ) (RRDDIM *)avl_remove_lock(&((st)->dimensions_index), (avl *)(rd)) + +static inline RRDDIM *rrddim_index_find(RRDSET *st, const char *id, uint32_t hash) { + RRDDIM tmp = { + .id = id, + .hash = (hash)?hash:simple_hash(id) + }; + return (RRDDIM *)avl_search_lock(&(st->dimensions_index), (avl *) &tmp); +} + + +// ---------------------------------------------------------------------------- +// RRDDIM - find a dimension + +inline RRDDIM *rrddim_find(RRDSET *st, const char *id) { + debug(D_RRD_CALLS, "rrddim_find() for chart %s, dimension %s", st->name, id); + + return rrddim_index_find(st, id, 0); +} + + +// ---------------------------------------------------------------------------- +// RRDDIM rename a dimension + +inline int rrddim_set_name(RRDSET *st, RRDDIM *rd, const char *name) { + if(unlikely(!name || !*name || !strcmp(rd->name, name))) + return 0; + + debug(D_RRD_CALLS, "rrddim_set_name() from %s.%s to %s.%s", st->name, rd->name, st->name, name); + + char varname[CONFIG_MAX_NAME + 1]; + snprintfz(varname, CONFIG_MAX_NAME, "dim %s name", rd->id); + rd->name = config_set_default(st->config_section, varname, name); + rd->hash_name = simple_hash(rd->name); + rrddimvar_rename_all(rd); + rd->exposed = 0; + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + return 1; +} + +inline int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm) { + if(unlikely(rd->algorithm == algorithm)) + return 0; + + debug(D_RRD_CALLS, "Updating algorithm of dimension '%s/%s' from %s to %s", st->id, rd->name, rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm)); + rd->algorithm = algorithm; + rd->exposed = 0; + rrdset_flag_set(st, RRDSET_FLAG_HOMEGENEOUS_CHECK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + return 1; +} + +inline int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, collected_number multiplier) { + if(unlikely(rd->multiplier == multiplier)) + return 0; + + debug(D_RRD_CALLS, "Updating multiplier of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, st->id, rd->name, rd->multiplier, multiplier); + rd->multiplier = multiplier; + rd->exposed = 0; + rrdset_flag_set(st, RRDSET_FLAG_HOMEGENEOUS_CHECK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + return 1; +} + +inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, collected_number divisor) { + if(unlikely(rd->divisor == divisor)) + return 0; + + debug(D_RRD_CALLS, "Updating divisor of dimension '%s/%s' from " COLLECTED_NUMBER_FORMAT " to " COLLECTED_NUMBER_FORMAT, st->id, rd->name, rd->divisor, divisor); + rd->divisor = divisor; + rd->exposed = 0; + rrdset_flag_set(st, RRDSET_FLAG_HOMEGENEOUS_CHECK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + return 1; +} + +// ---------------------------------------------------------------------------- +// RRDDIM create a dimension + +RRDDIM *rrddim_add_custom(RRDSET *st, const char *id, const char *name, collected_number multiplier, collected_number divisor, RRD_ALGORITHM algorithm, RRD_MEMORY_MODE memory_mode) { + rrdset_wrlock(st); + + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(rd)) { + debug(D_RRD_CALLS, "Cannot create rrd dimension '%s/%s', it already exists.", st->id, name?name:"<NONAME>"); + + rrddim_set_name(st, rd, name); + rrddim_set_algorithm(st, rd, algorithm); + rrddim_set_multiplier(st, rd, multiplier); + rrddim_set_divisor(st, rd, divisor); + + rrdset_unlock(st); + return rd; + } + + RRDHOST *host = st->rrdhost; + char filename[FILENAME_MAX + 1]; + char fullfilename[FILENAME_MAX + 1]; + + char varname[CONFIG_MAX_NAME + 1]; + unsigned long size = sizeof(RRDDIM) + (st->entries * sizeof(storage_number)); + + debug(D_RRD_CALLS, "Adding dimension '%s/%s'.", st->id, id); + + rrdset_strncpyz_name(filename, id, FILENAME_MAX); + snprintfz(fullfilename, FILENAME_MAX, "%s/%s.db", st->cache_dir, filename); + + if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP || memory_mode == RRD_MEMORY_MODE_RAM) { + rd = (RRDDIM *)mymmap( + (memory_mode == RRD_MEMORY_MODE_RAM)?NULL:fullfilename + , size + , ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE) + , 1 + ); + + if(likely(rd)) { + // we have a file mapped for rd + + memset(&rd->avl, 0, sizeof(avl)); + rd->id = NULL; + rd->name = NULL; + rd->cache_filename = NULL; + rd->variables = NULL; + rd->next = NULL; + rd->rrdset = NULL; + rd->exposed = 0; + + struct timeval now; + now_realtime_timeval(&now); + + if(memory_mode == RRD_MEMORY_MODE_RAM) { + memset(rd, 0, size); + } + else { + int reset = 0; + + if(strcmp(rd->magic, RRDDIMENSION_MAGIC) != 0) { + info("Initializing file %s.", fullfilename); + memset(rd, 0, size); + reset = 1; + } + else if(rd->memsize != size) { + error("File %s does not have the desired size, expected %lu but found %lu. Clearing it.", fullfilename, size, rd->memsize); + memset(rd, 0, size); + reset = 1; + } + else if(rd->update_every != st->update_every) { + error("File %s does not have the same update frequency, expected %d but found %d. Clearing it.", fullfilename, st->update_every, rd->update_every); + memset(rd, 0, size); + reset = 1; + } + else if(dt_usec(&now, &rd->last_collected_time) > (rd->entries * rd->update_every * USEC_PER_SEC)) { + info("File %s is too old (last collected %llu seconds ago, but the database is %ld seconds). Clearing it.", fullfilename, dt_usec(&now, &rd->last_collected_time) / USEC_PER_SEC, rd->entries * rd->update_every); + memset(rd, 0, size); + reset = 1; + } + + if(!reset) { + if(rd->algorithm != algorithm) { + info("File %s does not have the expected algorithm (expected %u '%s', found %u '%s'). Previous values may be wrong.", + fullfilename, algorithm, rrd_algorithm_name(algorithm), rd->algorithm, rrd_algorithm_name(rd->algorithm)); + } + + if(rd->multiplier != multiplier) { + info("File %s does not have the expected multiplier (expected " COLLECTED_NUMBER_FORMAT ", found " COLLECTED_NUMBER_FORMAT "). Previous values may be wrong.", fullfilename, multiplier, rd->multiplier); + } + + if(rd->divisor != divisor) { + info("File %s does not have the expected divisor (expected " COLLECTED_NUMBER_FORMAT ", found " COLLECTED_NUMBER_FORMAT "). Previous values may be wrong.", fullfilename, divisor, rd->divisor); + } + } + } + + // make sure we have the right memory mode + // even if we cleared the memory + rd->rrd_memory_mode = memory_mode; + } + } + + if(unlikely(!rd)) { + // if we didn't manage to get a mmap'd dimension, just create one + rd = callocz(1, size); + rd->rrd_memory_mode = (memory_mode == RRD_MEMORY_MODE_NONE) ? RRD_MEMORY_MODE_NONE : RRD_MEMORY_MODE_ALLOC; + } + + rd->memsize = size; + + strcpy(rd->magic, RRDDIMENSION_MAGIC); + + rd->id = strdupz(id); + rd->hash = simple_hash(rd->id); + + rd->cache_filename = strdupz(fullfilename); + + snprintfz(varname, CONFIG_MAX_NAME, "dim %s name", rd->id); + rd->name = config_get(st->config_section, varname, (name && *name)?name:rd->id); + rd->hash_name = simple_hash(rd->name); + + snprintfz(varname, CONFIG_MAX_NAME, "dim %s algorithm", rd->id); + rd->algorithm = rrd_algorithm_id(config_get(st->config_section, varname, rrd_algorithm_name(algorithm))); + + snprintfz(varname, CONFIG_MAX_NAME, "dim %s multiplier", rd->id); + rd->multiplier = config_get_number(st->config_section, varname, multiplier); + + snprintfz(varname, CONFIG_MAX_NAME, "dim %s divisor", rd->id); + rd->divisor = config_get_number(st->config_section, varname, divisor); + if(!rd->divisor) rd->divisor = 1; + + rd->entries = st->entries; + rd->update_every = st->update_every; + + if(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)) + rd->collections_counter = 1; + else + rd->collections_counter = 0; + + rd->updated = 0; + rd->flags = 0x00000000; + + rd->calculated_value = 0; + rd->last_calculated_value = 0; + rd->collected_value = 0; + rd->last_collected_value = 0; + rd->collected_value_max = 0; + rd->collected_volume = 0; + rd->stored_volume = 0; + rd->last_stored_value = 0; + rd->values[st->current_entry] = SN_EMPTY_SLOT; // pack_storage_number(0, SN_NOT_EXISTS); + rd->last_collected_time.tv_sec = 0; + rd->last_collected_time.tv_usec = 0; + rd->rrdset = st; + + // append this dimension + if(!st->dimensions) + st->dimensions = rd; + else { + RRDDIM *td = st->dimensions; + + if(td->algorithm != rd->algorithm || abs(td->multiplier) != abs(rd->multiplier) || abs(td->divisor) != abs(rd->divisor)) { + if(!rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) { + #ifdef NETDATA_INTERNAL_CHECKS + info("Dimension '%s' added on chart '%s' of host '%s' is not homogeneous to other dimensions already present (algorithm is '%s' vs '%s', multiplier is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ", divisor is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ").", + rd->name, + st->name, + host->hostname, + rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(td->algorithm), + rd->multiplier, td->multiplier, + rd->divisor, td->divisor + ); + #endif + rrdset_flag_set(st, RRDSET_FLAG_HETEROGENEOUS); + } + } + + for(; td->next; td = td->next) ; + td->next = rd; + } + + if(host->health_enabled) { + rrddimvar_create(rd, RRDVAR_TYPE_CALCULATED, NULL, NULL, &rd->last_stored_value, RRDVAR_OPTION_DEFAULT); + rrddimvar_create(rd, RRDVAR_TYPE_COLLECTED, NULL, "_raw", &rd->last_collected_value, RRDVAR_OPTION_DEFAULT); + rrddimvar_create(rd, RRDVAR_TYPE_TIME_T, NULL, "_last_collected_t", &rd->last_collected_time.tv_sec, RRDVAR_OPTION_DEFAULT); + } + + if(unlikely(rrddim_index_add(st, rd) != rd)) + error("RRDDIM: INTERNAL ERROR: attempt to index duplicate dimension '%s' on chart '%s'", rd->id, st->id); + + rrdset_unlock(st); + return(rd); +} + +// ---------------------------------------------------------------------------- +// RRDDIM remove / free a dimension + +void rrddim_free(RRDSET *st, RRDDIM *rd) +{ + debug(D_RRD_CALLS, "rrddim_free() %s.%s", st->name, rd->name); + + if(rd == st->dimensions) + st->dimensions = rd->next; + else { + RRDDIM *i; + for (i = st->dimensions; i && i->next != rd; i = i->next) ; + + if (i && i->next == rd) + i->next = rd->next; + else + error("Request to free dimension '%s.%s' but it is not linked.", st->id, rd->name); + } + rd->next = NULL; + + while(rd->variables) + rrddimvar_free(rd->variables); + + if(unlikely(rrddim_index_del(st, rd) != rd)) + error("RRDDIM: INTERNAL ERROR: attempt to remove from index dimension '%s' on chart '%s', removed a different dimension.", rd->id, st->id); + + // free(rd->annotations); + + switch(rd->rrd_memory_mode) { + case RRD_MEMORY_MODE_SAVE: + case RRD_MEMORY_MODE_MAP: + case RRD_MEMORY_MODE_RAM: + debug(D_RRD_CALLS, "Unmapping dimension '%s'.", rd->name); + freez((void *)rd->id); + freez(rd->cache_filename); + munmap(rd, rd->memsize); + break; + + case RRD_MEMORY_MODE_ALLOC: + case RRD_MEMORY_MODE_NONE: + debug(D_RRD_CALLS, "Removing dimension '%s'.", rd->name); + freez((void *)rd->id); + freez(rd->cache_filename); + freez(rd); + break; + } +} + + +// ---------------------------------------------------------------------------- +// RRDDIM - set dimension options + +int rrddim_hide(RRDSET *st, const char *id) { + debug(D_RRD_CALLS, "rrddim_hide() for chart %s, dimension %s", st->name, id); + + RRDHOST *host = st->rrdhost; + + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(!rd)) { + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname); + return 1; + } + + rrddim_flag_set(rd, RRDDIM_FLAG_HIDDEN); + return 0; +} + +int rrddim_unhide(RRDSET *st, const char *id) { + debug(D_RRD_CALLS, "rrddim_unhide() for chart %s, dimension %s", st->name, id); + + RRDHOST *host = st->rrdhost; + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(!rd)) { + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname); + return 1; + } + + rrddim_flag_clear(rd, RRDDIM_FLAG_HIDDEN); + return 0; +} + + +// ---------------------------------------------------------------------------- +// RRDDIM - collect values for a dimension + +inline collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value) { + debug(D_RRD_CALLS, "rrddim_set_by_pointer() for chart %s, dimension %s, value " COLLECTED_NUMBER_FORMAT, st->name, rd->name, value); + + now_realtime_timeval(&rd->last_collected_time); + rd->collected_value = value; + rd->updated = 1; + + rd->collections_counter++; + + collected_number v = (value >= 0) ? value : -value; + if(unlikely(v > rd->collected_value_max)) rd->collected_value_max = v; + + // fprintf(stderr, "%s.%s %llu " COLLECTED_NUMBER_FORMAT " dt %0.6f" " rate " CALCULATED_NUMBER_FORMAT "\n", st->name, rd->name, st->usec_since_last_update, value, (float)((double)st->usec_since_last_update / (double)1000000), (calculated_number)((value - rd->last_collected_value) * (calculated_number)rd->multiplier / (calculated_number)rd->divisor * 1000000.0 / (calculated_number)st->usec_since_last_update)); + + return rd->last_collected_value; +} + +collected_number rrddim_set(RRDSET *st, const char *id, collected_number value) { + RRDHOST *host = st->rrdhost; + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(!rd)) { + error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, st->name, st->id, host->hostname); + return 0; + } + + return rrddim_set_by_pointer(st, rd, value); +} diff --git a/database/rrddimvar.c b/database/rrddimvar.c new file mode 100644 index 0000000..3c2ed75 --- /dev/null +++ b/database/rrddimvar.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_HEALTH_INTERNALS +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDDIMVAR management +// DIMENSION VARIABLES + +#define RRDDIMVAR_ID_MAX 1024 + +static inline void rrddimvar_free_variables(RRDDIMVAR *rs) { + RRDDIM *rd = rs->rrddim; + RRDSET *st = rd->rrdset; + RRDHOST *host = st->rrdhost; + + // CHART VARIABLES FOR THIS DIMENSION + + rrdvar_free(host, &st->rrdvar_root_index, rs->var_local_id); + rs->var_local_id = NULL; + + rrdvar_free(host, &st->rrdvar_root_index, rs->var_local_name); + rs->var_local_name = NULL; + + // FAMILY VARIABLES FOR THIS DIMENSION + + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_id); + rs->var_family_id = NULL; + + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_name); + rs->var_family_name = NULL; + + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_contextid); + rs->var_family_contextid = NULL; + + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_contextname); + rs->var_family_contextname = NULL; + + // HOST VARIABLES FOR THIS DIMENSION + + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartidid); + rs->var_host_chartidid = NULL; + + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartidname); + rs->var_host_chartidname = NULL; + + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartnameid); + rs->var_host_chartnameid = NULL; + + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_chartnamename); + rs->var_host_chartnamename = NULL; + + // KEYS + + freez(rs->key_id); + rs->key_id = NULL; + + freez(rs->key_name); + rs->key_name = NULL; + + freez(rs->key_fullidid); + rs->key_fullidid = NULL; + + freez(rs->key_fullidname); + rs->key_fullidname = NULL; + + freez(rs->key_contextid); + rs->key_contextid = NULL; + + freez(rs->key_contextname); + rs->key_contextname = NULL; + + freez(rs->key_fullnameid); + rs->key_fullnameid = NULL; + + freez(rs->key_fullnamename); + rs->key_fullnamename = NULL; +} + +static inline void rrddimvar_create_variables(RRDDIMVAR *rs) { + rrddimvar_free_variables(rs); + + RRDDIM *rd = rs->rrddim; + RRDSET *st = rd->rrdset; + RRDHOST *host = st->rrdhost; + + char buffer[RRDDIMVAR_ID_MAX + 1]; + + // KEYS + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", rs->prefix, rd->id, rs->suffix); + rs->key_id = strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s%s%s", rs->prefix, rd->name, rs->suffix); + rs->key_name = strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->id, rs->key_id); + rs->key_fullidid = strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->id, rs->key_name); + rs->key_fullidname = strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->context, rs->key_id); + rs->key_contextid = strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->context, rs->key_name); + rs->key_contextname = strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->name, rs->key_id); + rs->key_fullnameid = strdupz(buffer); + + snprintfz(buffer, RRDDIMVAR_ID_MAX, "%s.%s", st->name, rs->key_name); + rs->key_fullnamename = strdupz(buffer); + + // CHART VARIABLES FOR THIS DIMENSION + // ----------------------------------- + // + // dimensions are available as: + // - $id + // - $name + + rs->var_local_id = rrdvar_create_and_index("local", &st->rrdvar_root_index, rs->key_id, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + rs->var_local_name = rrdvar_create_and_index("local", &st->rrdvar_root_index, rs->key_name, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + + // FAMILY VARIABLES FOR THIS DIMENSION + // ----------------------------------- + // + // dimensions are available as: + // - $id (only the first, when multiple overlap) + // - $name (only the first, when multiple overlap) + // - $chart-context.id + // - $chart-context.name + + rs->var_family_id = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_id, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + rs->var_family_name = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_name, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + rs->var_family_contextid = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_contextid, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + rs->var_family_contextname = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_contextname, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + + // HOST VARIABLES FOR THIS DIMENSION + // ----------------------------------- + // + // dimensions are available as: + // - $chart-id.id + // - $chart-id.name + // - $chart-name.id + // - $chart-name.name + + rs->var_host_chartidid = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullidid, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + rs->var_host_chartidname = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullidname, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + rs->var_host_chartnameid = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullnameid, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); + rs->var_host_chartnamename = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullnamename, rs->type, RRDVAR_OPTION_DEFAULT, rs->value); +} + +RRDDIMVAR *rrddimvar_create(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_OPTIONS options) { + RRDSET *st = rd->rrdset; + (void)st; + + debug(D_VARIABLES, "RRDDIMSET create for chart id '%s' name '%s', dimension id '%s', name '%s%s%s'", st->id, st->name, rd->id, (prefix)?prefix:"", rd->name, (suffix)?suffix:""); + + if(!prefix) prefix = ""; + if(!suffix) suffix = ""; + + RRDDIMVAR *rs = (RRDDIMVAR *)callocz(1, sizeof(RRDDIMVAR)); + + rs->prefix = strdupz(prefix); + rs->suffix = strdupz(suffix); + + rs->type = type; + rs->value = value; + rs->options = options; + rs->rrddim = rd; + + rs->next = rd->variables; + rd->variables = rs; + + rrddimvar_create_variables(rs); + + return rs; +} + +void rrddimvar_rename_all(RRDDIM *rd) { + RRDSET *st = rd->rrdset; + (void)st; + + debug(D_VARIABLES, "RRDDIMSET rename for chart id '%s' name '%s', dimension id '%s', name '%s'", st->id, st->name, rd->id, rd->name); + + RRDDIMVAR *rs, *next = rd->variables; + while((rs = next)) { + next = rs->next; + rrddimvar_create_variables(rs); + } +} + +void rrddimvar_free(RRDDIMVAR *rs) { + RRDDIM *rd = rs->rrddim; + RRDSET *st = rd->rrdset; + debug(D_VARIABLES, "RRDDIMSET free for chart id '%s' name '%s', dimension id '%s', name '%s', prefix='%s', suffix='%s'", st->id, st->name, rd->id, rd->name, rs->prefix, rs->suffix); + + rrddimvar_free_variables(rs); + + if(rd->variables == rs) { + debug(D_VARIABLES, "RRDDIMSET removing first entry for chart id '%s' name '%s', dimension id '%s', name '%s'", st->id, st->name, rd->id, rd->name); + rd->variables = rs->next; + } + else { + debug(D_VARIABLES, "RRDDIMSET removing non-first entry for chart id '%s' name '%s', dimension id '%s', name '%s'", st->id, st->name, rd->id, rd->name); + RRDDIMVAR *t; + for (t = rd->variables; t && t->next != rs; t = t->next) ; + if(!t) error("RRDDIMVAR '%s' not found in dimension '%s/%s' variables linked list", rs->key_name, st->id, rd->id); + else t->next = rs->next; + } + + freez(rs->prefix); + freez(rs->suffix); + freez(rs); +} + diff --git a/database/rrddimvar.h b/database/rrddimvar.h new file mode 100644 index 0000000..3494824 --- /dev/null +++ b/database/rrddimvar.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDDIMVAR_H +#define NETDATA_RRDDIMVAR_H 1 + +#include "rrd.h" + +// variables linked to individual dimensions +// We link variables to point the values that are already +// calculated / processed by the normal data collection process +// This means, there will be no speed penalty for using +// these variables +struct rrddimvar { + char *prefix; + char *suffix; + + char *key_id; // dimension id + char *key_name; // dimension name + char *key_contextid; // context + dimension id + char *key_contextname; // context + dimension name + char *key_fullidid; // chart type.chart id + dimension id + char *key_fullidname; // chart type.chart id + dimension name + char *key_fullnameid; // chart type.chart name + dimension id + char *key_fullnamename; // chart type.chart name + dimension name + + RRDVAR_TYPE type; + void *value; + + RRDVAR_OPTIONS options; + + RRDVAR *var_local_id; + RRDVAR *var_local_name; + + RRDVAR *var_family_id; + RRDVAR *var_family_name; + RRDVAR *var_family_contextid; + RRDVAR *var_family_contextname; + + RRDVAR *var_host_chartidid; + RRDVAR *var_host_chartidname; + RRDVAR *var_host_chartnameid; + RRDVAR *var_host_chartnamename; + + struct rrddim *rrddim; + + struct rrddimvar *next; +}; + + +extern void rrddimvar_rename_all(RRDDIM *rd); +extern RRDDIMVAR *rrddimvar_create(RRDDIM *rd, RRDVAR_TYPE type, const char *prefix, const char *suffix, void *value, RRDVAR_OPTIONS options); +extern void rrddimvar_free(RRDDIMVAR *rs); + + + +#endif //NETDATA_RRDDIMVAR_H diff --git a/database/rrdfamily.c b/database/rrdfamily.c new file mode 100644 index 0000000..f75f0ad --- /dev/null +++ b/database/rrdfamily.c @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDFAMILY index + +int rrdfamily_compare(void *a, void *b) { + if(((RRDFAMILY *)a)->hash_family < ((RRDFAMILY *)b)->hash_family) return -1; + else if(((RRDFAMILY *)a)->hash_family > ((RRDFAMILY *)b)->hash_family) return 1; + else return strcmp(((RRDFAMILY *)a)->family, ((RRDFAMILY *)b)->family); +} + +#define rrdfamily_index_add(host, rc) (RRDFAMILY *)avl_insert_lock(&((host)->rrdfamily_root_index), (avl *)(rc)) +#define rrdfamily_index_del(host, rc) (RRDFAMILY *)avl_remove_lock(&((host)->rrdfamily_root_index), (avl *)(rc)) + +static RRDFAMILY *rrdfamily_index_find(RRDHOST *host, const char *id, uint32_t hash) { + RRDFAMILY tmp; + tmp.family = id; + tmp.hash_family = (hash)?hash:simple_hash(tmp.family); + + return (RRDFAMILY *)avl_search_lock(&(host->rrdfamily_root_index), (avl *) &tmp); +} + +RRDFAMILY *rrdfamily_create(RRDHOST *host, const char *id) { + RRDFAMILY *rc = rrdfamily_index_find(host, id, 0); + if(!rc) { + rc = callocz(1, sizeof(RRDFAMILY)); + + rc->family = strdupz(id); + rc->hash_family = simple_hash(rc->family); + + // initialize the variables index + avl_init_lock(&rc->rrdvar_root_index, rrdvar_compare); + + RRDFAMILY *ret = rrdfamily_index_add(host, rc); + if(ret != rc) + error("RRDFAMILY: INTERNAL ERROR: Expected to INSERT RRDFAMILY '%s' into index, but inserted '%s'.", rc->family, (ret)?ret->family:"NONE"); + } + + rc->use_count++; + return rc; +} + +void rrdfamily_free(RRDHOST *host, RRDFAMILY *rc) { + rc->use_count--; + if(!rc->use_count) { + RRDFAMILY *ret = rrdfamily_index_del(host, rc); + if(ret != rc) + error("RRDFAMILY: INTERNAL ERROR: Expected to DELETE RRDFAMILY '%s' from index, but deleted '%s'.", rc->family, (ret)?ret->family:"NONE"); + else { + debug(D_RRD_CALLS, "RRDFAMILY: Cleaning up remaining family variables for host '%s', family '%s'", host->hostname, rc->family); + rrdvar_free_remaining_variables(host, &rc->rrdvar_root_index); + + freez((void *) rc->family); + freez(rc); + } + } +} + diff --git a/database/rrdhost.c b/database/rrdhost.c new file mode 100644 index 0000000..7234db9 --- /dev/null +++ b/database/rrdhost.c @@ -0,0 +1,744 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" + +RRDHOST *localhost = NULL; +size_t rrd_hosts_available = 0; +netdata_rwlock_t rrd_rwlock = NETDATA_RWLOCK_INITIALIZER; + +time_t rrdset_free_obsolete_time = 3600; +time_t rrdhost_free_orphan_time = 3600; + +// ---------------------------------------------------------------------------- +// RRDHOST index + +int rrdhost_compare(void* a, void* b) { + if(((RRDHOST *)a)->hash_machine_guid < ((RRDHOST *)b)->hash_machine_guid) return -1; + else if(((RRDHOST *)a)->hash_machine_guid > ((RRDHOST *)b)->hash_machine_guid) return 1; + else return strcmp(((RRDHOST *)a)->machine_guid, ((RRDHOST *)b)->machine_guid); +} + +avl_tree_lock rrdhost_root_index = { + .avl_tree = { NULL, rrdhost_compare }, + .rwlock = AVL_LOCK_INITIALIZER +}; + +RRDHOST *rrdhost_find_by_guid(const char *guid, uint32_t hash) { + debug(D_RRDHOST, "Searching in index for host with guid '%s'", guid); + + RRDHOST tmp; + strncpyz(tmp.machine_guid, guid, GUID_LEN); + tmp.hash_machine_guid = (hash)?hash:simple_hash(tmp.machine_guid); + + return (RRDHOST *)avl_search_lock(&(rrdhost_root_index), (avl *) &tmp); +} + +RRDHOST *rrdhost_find_by_hostname(const char *hostname, uint32_t hash) { + if(unlikely(!strcmp(hostname, "localhost"))) + return localhost; + + if(unlikely(!hash)) hash = simple_hash(hostname); + + rrd_rdlock(); + RRDHOST *host; + rrdhost_foreach_read(host) { + if(unlikely((hash == host->hash_hostname && !strcmp(hostname, host->hostname)))) { + rrd_unlock(); + return host; + } + } + rrd_unlock(); + + return NULL; +} + +#define rrdhost_index_add(rrdhost) (RRDHOST *)avl_insert_lock(&(rrdhost_root_index), (avl *)(rrdhost)) +#define rrdhost_index_del(rrdhost) (RRDHOST *)avl_remove_lock(&(rrdhost_root_index), (avl *)(rrdhost)) + + +// ---------------------------------------------------------------------------- +// RRDHOST - internal helpers + +static inline void rrdhost_init_tags(RRDHOST *host, const char *tags) { + if(host->tags && tags && !strcmp(host->tags, tags)) + return; + + void *old = (void *)host->tags; + host->tags = (tags && *tags)?strdupz(tags):NULL; + freez(old); +} + +static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname) { + if(host->hostname && hostname && !strcmp(host->hostname, hostname)) + return; + + void *old = host->hostname; + host->hostname = strdupz(hostname?hostname:"localhost"); + host->hash_hostname = simple_hash(host->hostname); + freez(old); +} + +static inline void rrdhost_init_os(RRDHOST *host, const char *os) { + if(host->os && os && !strcmp(host->os, os)) + return; + + void *old = (void *)host->os; + host->os = strdupz(os?os:"unknown"); + freez(old); +} + +static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone) { + if(host->timezone && timezone && !strcmp(host->timezone, timezone)) + return; + + void *old = (void *)host->timezone; + host->timezone = strdupz((timezone && *timezone)?timezone:"unknown"); + freez(old); +} + +static inline void rrdhost_init_machine_guid(RRDHOST *host, const char *machine_guid) { + strncpy(host->machine_guid, machine_guid, GUID_LEN); + host->machine_guid[GUID_LEN] = '\0'; + host->hash_machine_guid = simple_hash(host->machine_guid); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - add a host + +RRDHOST *rrdhost_create(const char *hostname, + const char *registry_hostname, + const char *guid, + const char *os, + const char *timezone, + const char *tags, + const char *program_name, + const char *program_version, + int update_every, + long entries, + RRD_MEMORY_MODE memory_mode, + unsigned int health_enabled, + unsigned int rrdpush_enabled, + char *rrdpush_destination, + char *rrdpush_api_key, + char *rrdpush_send_charts_matching, + int is_localhost +) { + debug(D_RRDHOST, "Host '%s': adding with guid '%s'", hostname, guid); + + rrd_check_wrlock(); + + RRDHOST *host = callocz(1, sizeof(RRDHOST)); + + host->rrd_update_every = (update_every > 0)?update_every:1; + host->rrd_history_entries = align_entries_to_pagesize(memory_mode, entries); + host->rrd_memory_mode = memory_mode; + host->health_enabled = (memory_mode == RRD_MEMORY_MODE_NONE)? 0 : health_enabled; + host->rrdpush_send_enabled = (rrdpush_enabled && rrdpush_destination && *rrdpush_destination && rrdpush_api_key && *rrdpush_api_key) ? 1 : 0; + host->rrdpush_send_destination = (host->rrdpush_send_enabled)?strdupz(rrdpush_destination):NULL; + host->rrdpush_send_api_key = (host->rrdpush_send_enabled)?strdupz(rrdpush_api_key):NULL; + host->rrdpush_send_charts_matching = simple_pattern_create(rrdpush_send_charts_matching, NULL, SIMPLE_PATTERN_EXACT); + + host->rrdpush_sender_pipe[0] = -1; + host->rrdpush_sender_pipe[1] = -1; + host->rrdpush_sender_socket = -1; + + netdata_mutex_init(&host->rrdpush_sender_buffer_mutex); + netdata_rwlock_init(&host->rrdhost_rwlock); + + rrdhost_init_hostname(host, hostname); + rrdhost_init_machine_guid(host, guid); + + rrdhost_init_os(host, os); + rrdhost_init_timezone(host, timezone); + rrdhost_init_tags(host, tags); + + host->program_name = strdupz((program_name && *program_name)?program_name:"unknown"); + host->program_version = strdupz((program_version && *program_version)?program_version:"unknown"); + host->registry_hostname = strdupz((registry_hostname && *registry_hostname)?registry_hostname:hostname); + + avl_init_lock(&(host->rrdset_root_index), rrdset_compare); + avl_init_lock(&(host->rrdset_root_index_name), rrdset_compare_name); + avl_init_lock(&(host->rrdfamily_root_index), rrdfamily_compare); + avl_init_lock(&(host->rrdvar_root_index), rrdvar_compare); + + if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete obsolete charts files", 1)) + rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS); + + if(config_get_boolean(CONFIG_SECTION_GLOBAL, "delete orphan hosts files", 1) && !is_localhost) + rrdhost_flag_set(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST); + + + // ------------------------------------------------------------------------ + // initialize health variables + + host->health_log.next_log_id = 1; + host->health_log.next_alarm_id = 1; + host->health_log.max = 1000; + host->health_log.next_log_id = + host->health_log.next_alarm_id = (uint32_t)now_realtime_sec(); + + long n = config_get_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", host->health_log.max); + if(n < 10) { + error("Host '%s': health configuration has invalid max log entries %ld. Using default %u", host->hostname, n, host->health_log.max); + config_set_number(CONFIG_SECTION_HEALTH, "in memory max health log entries", (long)host->health_log.max); + } + else + host->health_log.max = (unsigned int)n; + + netdata_rwlock_init(&host->health_log.alarm_log_rwlock); + + char filename[FILENAME_MAX + 1]; + + if(is_localhost) { + + host->cache_dir = strdupz(netdata_configured_cache_dir); + host->varlib_dir = strdupz(netdata_configured_varlib_dir); + + } + else { + // this is not localhost - append our GUID to localhost path + + snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_cache_dir, host->machine_guid); + host->cache_dir = strdupz(filename); + + if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) { + int r = mkdir(host->cache_dir, 0775); + if(r != 0 && errno != EEXIST) + error("Host '%s': cannot create directory '%s'", host->hostname, host->cache_dir); + } + + snprintfz(filename, FILENAME_MAX, "%s/%s", netdata_configured_varlib_dir, host->machine_guid); + host->varlib_dir = strdupz(filename); + + if(host->health_enabled) { + int r = mkdir(host->varlib_dir, 0775); + if(r != 0 && errno != EEXIST) + error("Host '%s': cannot create directory '%s'", host->hostname, host->varlib_dir); + } + + } + + if(host->health_enabled) { + snprintfz(filename, FILENAME_MAX, "%s/health", host->varlib_dir); + int r = mkdir(filename, 0775); + if(r != 0 && errno != EEXIST) + error("Host '%s': cannot create directory '%s'", host->hostname, filename); + } + + snprintfz(filename, FILENAME_MAX, "%s/health/health-log.db", host->varlib_dir); + host->health_log_filename = strdupz(filename); + + snprintfz(filename, FILENAME_MAX, "%s/alarm-notify.sh", netdata_configured_plugins_dir); + host->health_default_exec = strdupz(config_get(CONFIG_SECTION_HEALTH, "script to execute on alarm", filename)); + host->health_default_recipient = strdupz("root"); + + + // ------------------------------------------------------------------------ + // load health configuration + + if(host->health_enabled) { + health_alarm_log_load(host); + health_alarm_log_open(host); + + rrdhost_wrlock(host); + health_readdir(host, health_user_config_dir(), health_stock_config_dir(), NULL); + rrdhost_unlock(host); + } + + + // ------------------------------------------------------------------------ + // link it and add it to the index + + if(is_localhost) { + host->next = localhost; + localhost = host; + } + else { + if(localhost) { + host->next = localhost->next; + localhost->next = host; + } + else localhost = host; + } + + RRDHOST *t = rrdhost_index_add(host); + + if(t != host) { + error("Host '%s': cannot add host with machine guid '%s' to index. It already exists as host '%s' with machine guid '%s'.", host->hostname, host->machine_guid, t->hostname, t->machine_guid); + rrdhost_free(host); + host = NULL; + } + else { + info("Host '%s' (at registry as '%s') with guid '%s' initialized" + ", os '%s'" + ", timezone '%s'" + ", tags '%s'" + ", program_name '%s'" + ", program_version '%s'" + ", update every %d" + ", memory mode %s" + ", history entries %ld" + ", streaming %s" + " (to '%s' with api key '%s')" + ", health %s" + ", cache_dir '%s'" + ", varlib_dir '%s'" + ", health_log '%s'" + ", alarms default handler '%s'" + ", alarms default recipient '%s'" + , host->hostname + , host->registry_hostname + , host->machine_guid + , host->os + , host->timezone + , (host->tags)?host->tags:"" + , host->program_name + , host->program_version + , host->rrd_update_every + , rrd_memory_mode_name(host->rrd_memory_mode) + , host->rrd_history_entries + , host->rrdpush_send_enabled?"enabled":"disabled" + , host->rrdpush_send_destination?host->rrdpush_send_destination:"" + , host->rrdpush_send_api_key?host->rrdpush_send_api_key:"" + , host->health_enabled?"enabled":"disabled" + , host->cache_dir + , host->varlib_dir + , host->health_log_filename + , host->health_default_exec + , host->health_default_recipient + ); + } + + rrd_hosts_available++; + + return host; +} + +RRDHOST *rrdhost_find_or_create( + const char *hostname + , const char *registry_hostname + , const char *guid + , const char *os + , const char *timezone + , const char *tags + , const char *program_name + , const char *program_version + , int update_every + , long history + , RRD_MEMORY_MODE mode + , unsigned int health_enabled + , unsigned int rrdpush_enabled + , char *rrdpush_destination + , char *rrdpush_api_key + , char *rrdpush_send_charts_matching +) { + debug(D_RRDHOST, "Searching for host '%s' with guid '%s'", hostname, guid); + + rrd_wrlock(); + RRDHOST *host = rrdhost_find_by_guid(guid, 0); + if(!host) { + host = rrdhost_create( + hostname + , registry_hostname + , guid + , os + , timezone + , tags + , program_name + , program_version + , update_every + , history + , mode + , health_enabled + , rrdpush_enabled + , rrdpush_destination + , rrdpush_api_key + , rrdpush_send_charts_matching + , 0 + ); + } + else { + host->health_enabled = health_enabled; + + if(strcmp(host->hostname, hostname) != 0) { + info("Host '%s' has been renamed to '%s'. If this is not intentional it may mean multiple hosts are using the same machine_guid.", host->hostname, hostname); + char *t = host->hostname; + host->hostname = strdupz(hostname); + host->hash_hostname = simple_hash(host->hostname); + freez(t); + } + + if(strcmp(host->program_name, program_name) != 0) { + info("Host '%s' switched program name from '%s' to '%s'", host->hostname, host->program_name, program_name); + char *t = host->program_name; + host->program_name = strdupz(program_name); + freez(t); + } + + if(strcmp(host->program_version, program_version) != 0) { + info("Host '%s' switched program version from '%s' to '%s'", host->hostname, host->program_version, program_version); + char *t = host->program_version; + host->program_version = strdupz(program_version); + freez(t); + } + + if(host->rrd_update_every != update_every) + error("Host '%s' has an update frequency of %d seconds, but the wanted one is %d seconds. Restart netdata here to apply the new settings.", host->hostname, host->rrd_update_every, update_every); + + if(host->rrd_history_entries < history) + error("Host '%s' has history of %ld entries, but the wanted one is %ld entries. Restart netdata here to apply the new settings.", host->hostname, host->rrd_history_entries, history); + + if(host->rrd_memory_mode != mode) + error("Host '%s' has memory mode '%s', but the wanted one is '%s'. Restart netdata here to apply the new settings.", host->hostname, rrd_memory_mode_name(host->rrd_memory_mode), rrd_memory_mode_name(mode)); + + // update host tags + rrdhost_init_tags(host, tags); + } + + rrdhost_cleanup_orphan_hosts_nolock(host); + + rrd_unlock(); + + return host; +} + +inline int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected, time_t now) { + if(host != protected + && host != localhost + && rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN) + && !host->connected_senders + && host->senders_disconnected_time + && host->senders_disconnected_time + rrdhost_free_orphan_time < now) + return 1; + + return 0; +} + +void rrdhost_cleanup_orphan_hosts_nolock(RRDHOST *protected) { + time_t now = now_realtime_sec(); + + RRDHOST *host; + +restart_after_removal: + rrdhost_foreach_write(host) { + if(rrdhost_should_be_removed(host, protected, now)) { + info("Host '%s' with machine guid '%s' is obsolete - cleaning up.", host->hostname, host->machine_guid); + + if(rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_ORPHAN_HOST)) + rrdhost_delete_charts(host); + else + rrdhost_save_charts(host); + + rrdhost_free(host); + goto restart_after_removal; + } + } +} + +// ---------------------------------------------------------------------------- +// RRDHOST global / startup initialization + +void rrd_init(char *hostname) { + rrdset_free_obsolete_time = config_get_number(CONFIG_SECTION_GLOBAL, "cleanup obsolete charts after seconds", rrdset_free_obsolete_time); + gap_when_lost_iterations_above = (int)config_get_number(CONFIG_SECTION_GLOBAL, "gap when lost iterations above", gap_when_lost_iterations_above); + if (gap_when_lost_iterations_above < 1) + gap_when_lost_iterations_above = 1; + + health_init(); + registry_init(); + rrdpush_init(); + + debug(D_RRDHOST, "Initializing localhost with hostname '%s'", hostname); + rrd_wrlock(); + localhost = rrdhost_create( + hostname + , registry_get_this_machine_hostname() + , registry_get_this_machine_guid() + , os_type + , netdata_configured_timezone + , config_get(CONFIG_SECTION_BACKEND, "host tags", "") + , program_name + , program_version + , default_rrd_update_every + , default_rrd_history_entries + , default_rrd_memory_mode + , default_health_enabled + , default_rrdpush_enabled + , default_rrdpush_destination + , default_rrdpush_api_key + , default_rrdpush_send_charts_matching + , 1 + ); + rrd_unlock(); + web_client_api_v1_management_init(); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - lock validations +// there are only used when NETDATA_INTERNAL_CHECKS is set + +void __rrdhost_check_rdlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) { + debug(D_RRDHOST, "Checking read lock on host '%s'", host->hostname); + + int ret = netdata_rwlock_trywrlock(&host->rrdhost_rwlock); + if(ret == 0) + fatal("RRDHOST '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file); +} + +void __rrdhost_check_wrlock(RRDHOST *host, const char *file, const char *function, const unsigned long line) { + debug(D_RRDHOST, "Checking write lock on host '%s'", host->hostname); + + int ret = netdata_rwlock_tryrdlock(&host->rrdhost_rwlock); + if(ret == 0) + fatal("RRDHOST '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", host->hostname, function, line, file); +} + +void __rrd_check_rdlock(const char *file, const char *function, const unsigned long line) { + debug(D_RRDHOST, "Checking read lock on all RRDs"); + + int ret = netdata_rwlock_trywrlock(&rrd_rwlock); + if(ret == 0) + fatal("RRDs should be read-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file); +} + +void __rrd_check_wrlock(const char *file, const char *function, const unsigned long line) { + debug(D_RRDHOST, "Checking write lock on all RRDs"); + + int ret = netdata_rwlock_tryrdlock(&rrd_rwlock); + if(ret == 0) + fatal("RRDs should be write-locked, but it are not, at function %s() at line %lu of file '%s'", function, line, file); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - free + +void rrdhost_free(RRDHOST *host) { + if(!host) return; + + info("Freeing all memory for host '%s'...", host->hostname); + + rrd_check_wrlock(); // make sure the RRDs are write locked + + // stop a possibly running thread + rrdpush_sender_thread_stop(host); + + rrdhost_wrlock(host); // lock this RRDHOST + + // ------------------------------------------------------------------------ + // release its children resources + + while(host->rrdset_root) + rrdset_free(host->rrdset_root); + + while(host->alarms) + rrdcalc_unlink_and_free(host, host->alarms); + + while(host->templates) + rrdcalctemplate_unlink_and_free(host, host->templates); + + debug(D_RRD_CALLS, "RRDHOST: Cleaning up remaining host variables for host '%s'", host->hostname); + rrdvar_free_remaining_variables(host, &host->rrdvar_root_index); + + health_alarm_log_free(host); + + // ------------------------------------------------------------------------ + // remove it from the indexes + + if(rrdhost_index_del(host) != host) + error("RRDHOST '%s' removed from index, deleted the wrong entry.", host->hostname); + + + // ------------------------------------------------------------------------ + // unlink it from the host + + if(host == localhost) { + localhost = host->next; + } + else { + // find the previous one + RRDHOST *h; + for(h = localhost; h && h->next != host ; h = h->next) ; + + // bypass it + if(h) h->next = host->next; + else error("Request to free RRDHOST '%s': cannot find it", host->hostname); + } + + // ------------------------------------------------------------------------ + // free it + + freez((void *)host->tags); + freez((void *)host->os); + freez((void *)host->timezone); + freez(host->program_version); + freez(host->program_name); + freez(host->cache_dir); + freez(host->varlib_dir); + freez(host->rrdpush_send_api_key); + freez(host->rrdpush_send_destination); + freez(host->health_default_exec); + freez(host->health_default_recipient); + freez(host->health_log_filename); + freez(host->hostname); + freez(host->registry_hostname); + simple_pattern_free(host->rrdpush_send_charts_matching); + rrdhost_unlock(host); + netdata_rwlock_destroy(&host->health_log.alarm_log_rwlock); + netdata_rwlock_destroy(&host->rrdhost_rwlock); + freez(host); + + rrd_hosts_available--; +} + +void rrdhost_free_all(void) { + rrd_wrlock(); + while(localhost) rrdhost_free(localhost); + rrd_unlock(); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - save host files + +void rrdhost_save_charts(RRDHOST *host) { + if(!host) return; + + info("Saving/Closing database of host '%s'...", host->hostname); + + RRDSET *st; + + // we get a write lock + // to ensure only one thread is saving the database + rrdhost_wrlock(host); + + rrdset_foreach_write(st, host) { + rrdset_rdlock(st); + rrdset_save(st); + rrdset_unlock(st); + } + + rrdhost_unlock(host); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - delete host files + +void rrdhost_delete_charts(RRDHOST *host) { + if(!host) return; + + info("Deleting database of host '%s'...", host->hostname); + + RRDSET *st; + + // we get a write lock + // to ensure only one thread is saving the database + rrdhost_wrlock(host); + + rrdset_foreach_write(st, host) { + rrdset_rdlock(st); + rrdset_delete(st); + rrdset_unlock(st); + } + + recursively_delete_dir(host->cache_dir, "left over host"); + + rrdhost_unlock(host); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - cleanup host files + +void rrdhost_cleanup_charts(RRDHOST *host) { + if(!host) return; + + info("Cleaning up database of host '%s'...", host->hostname); + + RRDSET *st; + uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS); + + // we get a write lock + // to ensure only one thread is saving the database + rrdhost_wrlock(host); + + rrdset_foreach_write(st, host) { + rrdset_rdlock(st); + + if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)) + rrdset_delete(st); + else + rrdset_save(st); + + rrdset_unlock(st); + } + + rrdhost_unlock(host); +} + + +// ---------------------------------------------------------------------------- +// RRDHOST - save all hosts to disk + +void rrdhost_save_all(void) { + info("Saving database [%zu hosts(s)]...", rrd_hosts_available); + + rrd_rdlock(); + + RRDHOST *host; + rrdhost_foreach_read(host) + rrdhost_save_charts(host); + + rrd_unlock(); +} + +// ---------------------------------------------------------------------------- +// RRDHOST - save or delete all hosts from disk + +void rrdhost_cleanup_all(void) { + info("Cleaning up database [%zu hosts(s)]...", rrd_hosts_available); + + rrd_rdlock(); + + RRDHOST *host; + rrdhost_foreach_read(host) { + if(host != localhost && rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS) && !host->connected_senders) + rrdhost_delete_charts(host); + else + rrdhost_cleanup_charts(host); + } + + rrd_unlock(); +} + + +// ---------------------------------------------------------------------------- +// RRDHOST - save or delete all the host charts from disk + +void rrdhost_cleanup_obsolete_charts(RRDHOST *host) { + time_t now = now_realtime_sec(); + + RRDSET *st; + + uint32_t rrdhost_delete_obsolete_charts = rrdhost_flag_check(host, RRDHOST_FLAG_DELETE_OBSOLETE_CHARTS); + +restart_after_removal: + rrdset_foreach_write(st, host) { + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE) + && st->last_accessed_time + rrdset_free_obsolete_time < now + && st->last_updated.tv_sec + rrdset_free_obsolete_time < now + && st->last_collected_time.tv_sec + rrdset_free_obsolete_time < now + )) { + + rrdset_rdlock(st); + + if(rrdhost_delete_obsolete_charts) + rrdset_delete(st); + else + rrdset_save(st); + + rrdset_unlock(st); + + rrdset_free(st); + goto restart_after_removal; + } + } +} diff --git a/database/rrdset.c b/database/rrdset.c new file mode 100644 index 0000000..d74ac91 --- /dev/null +++ b/database/rrdset.c @@ -0,0 +1,1637 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" + +void __rrdset_check_rdlock(RRDSET *st, const char *file, const char *function, const unsigned long line) { + debug(D_RRD_CALLS, "Checking read lock on chart '%s'", st->id); + + int ret = netdata_rwlock_trywrlock(&st->rrdset_rwlock); + if(ret == 0) + fatal("RRDSET '%s' should be read-locked, but it is not, at function %s() at line %lu of file '%s'", st->id, function, line, file); +} + +void __rrdset_check_wrlock(RRDSET *st, const char *file, const char *function, const unsigned long line) { + debug(D_RRD_CALLS, "Checking write lock on chart '%s'", st->id); + + int ret = netdata_rwlock_tryrdlock(&st->rrdset_rwlock); + if(ret == 0) + fatal("RRDSET '%s' should be write-locked, but it is not, at function %s() at line %lu of file '%s'", st->id, function, line, file); +} + + +// ---------------------------------------------------------------------------- +// RRDSET index + +int rrdset_compare(void* a, void* b) { + if(((RRDSET *)a)->hash < ((RRDSET *)b)->hash) return -1; + else if(((RRDSET *)a)->hash > ((RRDSET *)b)->hash) return 1; + else return strcmp(((RRDSET *)a)->id, ((RRDSET *)b)->id); +} + +static RRDSET *rrdset_index_find(RRDHOST *host, const char *id, uint32_t hash) { + RRDSET tmp; + strncpyz(tmp.id, id, RRD_ID_LENGTH_MAX); + tmp.hash = (hash)?hash:simple_hash(tmp.id); + + return (RRDSET *)avl_search_lock(&(host->rrdset_root_index), (avl *) &tmp); +} + +// ---------------------------------------------------------------------------- +// RRDSET name index + +#define rrdset_from_avlname(avlname_ptr) ((RRDSET *)((avlname_ptr) - offsetof(RRDSET, avlname))) + +int rrdset_compare_name(void* a, void* b) { + RRDSET *A = rrdset_from_avlname(a); + RRDSET *B = rrdset_from_avlname(b); + + // fprintf(stderr, "COMPARING: %s with %s\n", A->name, B->name); + + if(A->hash_name < B->hash_name) return -1; + else if(A->hash_name > B->hash_name) return 1; + else return strcmp(A->name, B->name); +} + +RRDSET *rrdset_index_add_name(RRDHOST *host, RRDSET *st) { + void *result; + // fprintf(stderr, "ADDING: %s (name: %s)\n", st->id, st->name); + result = avl_insert_lock(&host->rrdset_root_index_name, (avl *) (&st->avlname)); + if(result) return rrdset_from_avlname(result); + return NULL; +} + +RRDSET *rrdset_index_del_name(RRDHOST *host, RRDSET *st) { + void *result; + // fprintf(stderr, "DELETING: %s (name: %s)\n", st->id, st->name); + result = (RRDSET *)avl_remove_lock(&((host)->rrdset_root_index_name), (avl *)(&st->avlname)); + if(result) return rrdset_from_avlname(result); + return NULL; +} + + +// ---------------------------------------------------------------------------- +// RRDSET - find charts + +static inline RRDSET *rrdset_index_find_name(RRDHOST *host, const char *name, uint32_t hash) { + void *result = NULL; + RRDSET tmp; + tmp.name = name; + tmp.hash_name = (hash)?hash:simple_hash(tmp.name); + + // fprintf(stderr, "SEARCHING: %s\n", name); + result = avl_search_lock(&host->rrdset_root_index_name, (avl *) (&(tmp.avlname))); + if(result) { + RRDSET *st = rrdset_from_avlname(result); + if(strcmp(st->magic, RRDSET_MAGIC) != 0) + error("Search for RRDSET %s returned an invalid RRDSET %s (name %s)", name, st->id, st->name); + + // fprintf(stderr, "FOUND: %s\n", name); + return rrdset_from_avlname(result); + } + // fprintf(stderr, "NOT FOUND: %s\n", name); + return NULL; +} + +inline RRDSET *rrdset_find(RRDHOST *host, const char *id) { + debug(D_RRD_CALLS, "rrdset_find() for chart '%s' in host '%s'", id, host->hostname); + RRDSET *st = rrdset_index_find(host, id, 0); + return(st); +} + +inline RRDSET *rrdset_find_bytype(RRDHOST *host, const char *type, const char *id) { + debug(D_RRD_CALLS, "rrdset_find_bytype() for chart '%s.%s' in host '%s'", type, id, host->hostname); + + char buf[RRD_ID_LENGTH_MAX + 1]; + strncpyz(buf, type, RRD_ID_LENGTH_MAX - 1); + strcat(buf, "."); + int len = (int) strlen(buf); + strncpyz(&buf[len], id, (size_t) (RRD_ID_LENGTH_MAX - len)); + + return(rrdset_find(host, buf)); +} + +inline RRDSET *rrdset_find_byname(RRDHOST *host, const char *name) { + debug(D_RRD_CALLS, "rrdset_find_byname() for chart '%s' in host '%s'", name, host->hostname); + RRDSET *st = rrdset_index_find_name(host, name, 0); + return(st); +} + +// ---------------------------------------------------------------------------- +// RRDSET - rename charts + +char *rrdset_strncpyz_name(char *to, const char *from, size_t length) { + char c, *p = to; + + while (length-- && (c = *from++)) { + if(c != '.' && !isalnum(c)) + c = '_'; + + *p++ = c; + } + + *p = '\0'; + + return to; +} + +int rrdset_set_name(RRDSET *st, const char *name) { + if(unlikely(st->name && !strcmp(st->name, name))) + return 1; + + RRDHOST *host = st->rrdhost; + + debug(D_RRD_CALLS, "rrdset_set_name() old: '%s', new: '%s'", st->name?st->name:"", name); + + char b[CONFIG_MAX_VALUE + 1]; + char n[RRD_ID_LENGTH_MAX + 1]; + + snprintfz(n, RRD_ID_LENGTH_MAX, "%s.%s", st->type, name); + rrdset_strncpyz_name(b, n, CONFIG_MAX_VALUE); + + if(rrdset_index_find_name(host, b, 0)) { + error("RRDSET: chart name '%s' on host '%s' already exists.", b, host->hostname); + return 0; + } + + if(st->name) { + rrdset_index_del_name(host, st); + st->name = config_set_default(st->config_section, "name", b); + st->hash_name = simple_hash(st->name); + rrdsetvar_rename_all(st); + } + else { + st->name = config_get(st->config_section, "name", b); + st->hash_name = simple_hash(st->name); + } + + rrdset_wrlock(st); + RRDDIM *rd; + rrddim_foreach_write(rd, st) + rrddimvar_rename_all(rd); + rrdset_unlock(st); + + if(unlikely(rrdset_index_add_name(host, st) != st)) + error("RRDSET: INTERNAL ERROR: attempted to index duplicate chart name '%s'", st->name); + + rrdset_flag_clear(st, RRDSET_FLAG_BACKEND_SEND); + rrdset_flag_clear(st, RRDSET_FLAG_BACKEND_IGNORE); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_IGNORE); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + return 1; +} + +inline void rrdset_is_obsolete(RRDSET *st) { + if(unlikely(!(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)))) { + rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + // the chart will not get more updates (data collection) + // so, we have to push its definition now + rrdset_push_chart_definition_now(st); + } +} + +inline void rrdset_isnot_obsolete(RRDSET *st) { + if(unlikely((rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)))) { + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + + // the chart will be pushed upstream automatically + // due to data collection + } +} + +inline void rrdset_update_heterogeneous_flag(RRDSET *st) { + RRDHOST *host = st->rrdhost; + (void)host; + + RRDDIM *rd; + + rrdset_flag_clear(st, RRDSET_FLAG_HOMEGENEOUS_CHECK); + + RRD_ALGORITHM algorithm = st->dimensions->algorithm; + collected_number multiplier = abs(st->dimensions->multiplier); + collected_number divisor = abs(st->dimensions->divisor); + + rrddim_foreach_read(rd, st) { + if(algorithm != rd->algorithm || multiplier != abs(rd->multiplier) || divisor != abs(rd->divisor)) { + if(!rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) { + #ifdef NETDATA_INTERNAL_CHECKS + info("Dimension '%s' added on chart '%s' of host '%s' is not homogeneous to other dimensions already present (algorithm is '%s' vs '%s', multiplier is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ", divisor is " COLLECTED_NUMBER_FORMAT " vs " COLLECTED_NUMBER_FORMAT ").", + rd->name, + st->name, + host->hostname, + rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm), + rd->multiplier, multiplier, + rd->divisor, divisor + ); + #endif + rrdset_flag_set(st, RRDSET_FLAG_HETEROGENEOUS); + } + return; + } + } + + rrdset_flag_clear(st, RRDSET_FLAG_HETEROGENEOUS); +} + +// ---------------------------------------------------------------------------- +// RRDSET - reset a chart + +void rrdset_reset(RRDSET *st) { + debug(D_RRD_CALLS, "rrdset_reset() %s", st->name); + + st->last_collected_time.tv_sec = 0; + st->last_collected_time.tv_usec = 0; + st->last_updated.tv_sec = 0; + st->last_updated.tv_usec = 0; + st->current_entry = 0; + st->counter = 0; + st->counter_done = 0; + + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + rd->last_collected_time.tv_sec = 0; + rd->last_collected_time.tv_usec = 0; + rd->collections_counter = 0; + // memset(rd->values, 0, rd->entries * sizeof(storage_number)); + } +} + +// ---------------------------------------------------------------------------- +// RRDSET - helpers for rrdset_create() + +inline long align_entries_to_pagesize(RRD_MEMORY_MODE mode, long entries) { + if(unlikely(entries < 5)) entries = 5; + if(unlikely(entries > RRD_HISTORY_ENTRIES_MAX)) entries = RRD_HISTORY_ENTRIES_MAX; + + if(unlikely(mode == RRD_MEMORY_MODE_NONE || mode == RRD_MEMORY_MODE_ALLOC)) + return entries; + + long page = (size_t)sysconf(_SC_PAGESIZE); + long size = sizeof(RRDDIM) + entries * sizeof(storage_number); + if(unlikely(size % page)) { + size -= (size % page); + size += page; + + long n = (size - sizeof(RRDDIM)) / sizeof(storage_number); + return n; + } + + return entries; +} + +static inline void last_collected_time_align(RRDSET *st) { + st->last_collected_time.tv_sec -= st->last_collected_time.tv_sec % st->update_every; + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST))) + st->last_collected_time.tv_usec = 0; + else + st->last_collected_time.tv_usec = 500000; +} + +static inline void last_updated_time_align(RRDSET *st) { + st->last_updated.tv_sec -= st->last_updated.tv_sec % st->update_every; + st->last_updated.tv_usec = 0; +} + +// ---------------------------------------------------------------------------- +// RRDSET - free a chart + +void rrdset_free(RRDSET *st) { + if(unlikely(!st)) return; + + RRDHOST *host = st->rrdhost; + + rrdhost_check_wrlock(host); // make sure we have a write lock on the host + rrdset_wrlock(st); // lock this RRDSET + + // info("Removing chart '%s' ('%s')", st->id, st->name); + + // ------------------------------------------------------------------------ + // remove it from the indexes + + if(unlikely(rrdset_index_del(host, st) != st)) + error("RRDSET: INTERNAL ERROR: attempt to remove from index chart '%s', removed a different chart.", st->id); + + rrdset_index_del_name(host, st); + + // ------------------------------------------------------------------------ + // free its children structures + + while(st->variables) rrdsetvar_free(st->variables); + while(st->alarms) rrdsetcalc_unlink(st->alarms); + while(st->dimensions) rrddim_free(st, st->dimensions); + + rrdfamily_free(host, st->rrdfamily); + + debug(D_RRD_CALLS, "RRDSET: Cleaning up remaining chart variables for host '%s', chart '%s'", host->hostname, st->id); + rrdvar_free_remaining_variables(host, &st->rrdvar_root_index); + + // ------------------------------------------------------------------------ + // unlink it from the host + + if(st == host->rrdset_root) { + host->rrdset_root = st->next; + } + else { + // find the previous one + RRDSET *s; + for(s = host->rrdset_root; s && s->next != st ; s = s->next) ; + + // bypass it + if(s) s->next = st->next; + else error("Request to free RRDSET '%s': cannot find it under host '%s'", st->id, host->hostname); + } + + rrdset_unlock(st); + + // ------------------------------------------------------------------------ + // free it + + netdata_rwlock_destroy(&st->rrdset_rwlock); + + // free directly allocated members + freez(st->config_section); + freez(st->plugin_name); + freez(st->module_name); + + switch(st->rrd_memory_mode) { + case RRD_MEMORY_MODE_SAVE: + case RRD_MEMORY_MODE_MAP: + case RRD_MEMORY_MODE_RAM: + debug(D_RRD_CALLS, "Unmapping stats '%s'.", st->name); + munmap(st, st->memsize); + break; + + case RRD_MEMORY_MODE_ALLOC: + case RRD_MEMORY_MODE_NONE: + freez(st); + break; + } +} + +void rrdset_save(RRDSET *st) { + rrdset_check_rdlock(st); + + // info("Saving chart '%s' ('%s')", st->id, st->name); + + if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) { + debug(D_RRD_STATS, "Saving stats '%s' to '%s'.", st->name, st->cache_filename); + memory_file_save(st->cache_filename, st, st->memsize); + } + + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if(likely(rd->rrd_memory_mode == RRD_MEMORY_MODE_SAVE)) { + debug(D_RRD_STATS, "Saving dimension '%s' to '%s'.", rd->name, rd->cache_filename); + memory_file_save(rd->cache_filename, rd, rd->memsize); + } + } +} + +void rrdset_delete(RRDSET *st) { + RRDDIM *rd; + + rrdset_check_rdlock(st); + + info("Deleting chart '%s' ('%s') from disk...", st->id, st->name); + + if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || st->rrd_memory_mode == RRD_MEMORY_MODE_MAP) { + info("Deleting chart header file '%s'.", st->cache_filename); + if(unlikely(unlink(st->cache_filename) == -1)) + error("Cannot delete chart header file '%s'", st->cache_filename); + } + + rrddim_foreach_read(rd, st) { + if(likely(rd->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || rd->rrd_memory_mode == RRD_MEMORY_MODE_MAP)) { + info("Deleting dimension file '%s'.", rd->cache_filename); + if(unlikely(unlink(rd->cache_filename) == -1)) + error("Cannot delete dimension file '%s'", rd->cache_filename); + } + } + + recursively_delete_dir(st->cache_dir, "left-over chart"); +} + +// ---------------------------------------------------------------------------- +// RRDSET - create a chart + +static inline RRDSET *rrdset_find_on_create(RRDHOST *host, const char *fullid) { + RRDSET *st = rrdset_find(host, fullid); + if(unlikely(st)) { + rrdset_isnot_obsolete(st); + debug(D_RRD_CALLS, "RRDSET '%s', already exists.", fullid); + return st; + } + + return NULL; +} + +RRDSET *rrdset_create_custom( + RRDHOST *host + , const char *type + , const char *id + , const char *name + , const char *family + , const char *context + , const char *title + , const char *units + , const char *plugin + , const char *module + , long priority + , int update_every + , RRDSET_TYPE chart_type + , RRD_MEMORY_MODE memory_mode + , long history_entries +) { + if(!type || !type[0]) { + fatal("Cannot create rrd stats without a type: id '%s', name '%s', family '%s', context '%s', title '%s', units '%s', plugin '%s', module '%s'." + , (id && *id)?id:"<unset>" + , (name && *name)?name:"<unset>" + , (family && *family)?family:"<unset>" + , (context && *context)?context:"<unset>" + , (title && *title)?title:"<unset>" + , (units && *units)?units:"<unset>" + , (plugin && *plugin)?plugin:"<unset>" + , (module && *module)?module:"<unset>" + ); + return NULL; + } + + if(!id || !id[0]) { + fatal("Cannot create rrd stats without an id: type '%s', name '%s', family '%s', context '%s', title '%s', units '%s', plugin '%s', module '%s'." + , type + , (name && *name)?name:"<unset>" + , (family && *family)?family:"<unset>" + , (context && *context)?context:"<unset>" + , (title && *title)?title:"<unset>" + , (units && *units)?units:"<unset>" + , (plugin && *plugin)?plugin:"<unset>" + , (module && *module)?module:"<unset>" + ); + return NULL; + } + + // ------------------------------------------------------------------------ + // check if it already exists + + char fullid[RRD_ID_LENGTH_MAX + 1]; + snprintfz(fullid, RRD_ID_LENGTH_MAX, "%s.%s", type, id); + + RRDSET *st = rrdset_find_on_create(host, fullid); + if(st) { + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + return st; + } + + rrdhost_wrlock(host); + + st = rrdset_find_on_create(host, fullid); + if(st) { + rrdhost_unlock(host); + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + return st; + } + + char fullfilename[FILENAME_MAX + 1]; + + // ------------------------------------------------------------------------ + // compose the config_section for this chart + + char config_section[RRD_ID_LENGTH_MAX + 1]; + if(host == localhost) + strcpy(config_section, fullid); + else + snprintfz(config_section, RRD_ID_LENGTH_MAX, "%s/%s", host->machine_guid, fullid); + + // ------------------------------------------------------------------------ + // get the options from the config, we need to create it + + long rentries = config_get_number(config_section, "history", history_entries); + long entries = align_entries_to_pagesize(memory_mode, rentries); + if(entries != rentries) entries = config_set_number(config_section, "history", entries); + + if(memory_mode == RRD_MEMORY_MODE_NONE && entries != rentries) + entries = config_set_number(config_section, "history", 10); + + int enabled = config_get_boolean(config_section, "enabled", 1); + if(!enabled) entries = 5; + + unsigned long size = sizeof(RRDSET); + char *cache_dir = rrdset_cache_dir(host, fullid, config_section); + + time_t now = now_realtime_sec(); + + // ------------------------------------------------------------------------ + // load it or allocate it + + debug(D_RRD_CALLS, "Creating RRD_STATS for '%s.%s'.", type, id); + + snprintfz(fullfilename, FILENAME_MAX, "%s/main.db", cache_dir); + if(memory_mode == RRD_MEMORY_MODE_SAVE || memory_mode == RRD_MEMORY_MODE_MAP || memory_mode == RRD_MEMORY_MODE_RAM) { + st = (RRDSET *) mymmap( + (memory_mode == RRD_MEMORY_MODE_RAM)?NULL:fullfilename + , size + , ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE) + , 0 + ); + + if(st) { + memset(&st->avl, 0, sizeof(avl)); + memset(&st->avlname, 0, sizeof(avl)); + memset(&st->rrdvar_root_index, 0, sizeof(avl_tree_lock)); + memset(&st->dimensions_index, 0, sizeof(avl_tree_lock)); + memset(&st->rrdset_rwlock, 0, sizeof(netdata_rwlock_t)); + + st->name = NULL; + st->config_section = NULL; + st->type = NULL; + st->family = NULL; + st->title = NULL; + st->units = NULL; + st->context = NULL; + st->cache_dir = NULL; + st->plugin_name = NULL; + st->module_name = NULL; + st->dimensions = NULL; + st->rrdfamily = NULL; + st->rrdhost = NULL; + st->next = NULL; + st->variables = NULL; + st->alarms = NULL; + st->flags = 0x00000000; + + if(memory_mode == RRD_MEMORY_MODE_RAM) { + memset(st, 0, size); + } + else { + if(strcmp(st->magic, RRDSET_MAGIC) != 0) { + info("Initializing file %s.", fullfilename); + memset(st, 0, size); + } + else if(strcmp(st->id, fullid) != 0) { + error("File %s contents are not for chart %s. Clearing it.", fullfilename, fullid); + // munmap(st, size); + // st = NULL; + memset(st, 0, size); + } + else if(st->memsize != size || st->entries != entries) { + error("File %s does not have the desired size. Clearing it.", fullfilename); + memset(st, 0, size); + } + else if(st->update_every != update_every) { + error("File %s does not have the desired update frequency. Clearing it.", fullfilename); + memset(st, 0, size); + } + else if((now - st->last_updated.tv_sec) > update_every * entries) { + error("File %s is too old. Clearing it.", fullfilename); + memset(st, 0, size); + } + else if(st->last_updated.tv_sec > now + update_every) { + error("File %s refers to the future by %zd secs. Resetting it to now.", fullfilename, (ssize_t)(st->last_updated.tv_sec - now)); + st->last_updated.tv_sec = now; + } + + // make sure the database is aligned + if(st->last_updated.tv_sec) { + st->update_every = update_every; + last_updated_time_align(st); + } + } + + // make sure we have the right memory mode + // even if we cleared the memory + st->rrd_memory_mode = memory_mode; + } + } + + if(unlikely(!st)) { + st = callocz(1, size); + st->rrd_memory_mode = (memory_mode == RRD_MEMORY_MODE_NONE) ? RRD_MEMORY_MODE_NONE : RRD_MEMORY_MODE_ALLOC; + } + + st->plugin_name = plugin?strdupz(plugin):NULL; + st->module_name = module?strdupz(module):NULL; + + st->config_section = strdupz(config_section); + st->rrdhost = host; + st->memsize = size; + st->entries = entries; + st->update_every = update_every; + + if(st->current_entry >= st->entries) st->current_entry = 0; + + strcpy(st->cache_filename, fullfilename); + strcpy(st->magic, RRDSET_MAGIC); + + strcpy(st->id, fullid); + st->hash = simple_hash(st->id); + + st->cache_dir = cache_dir; + + st->chart_type = rrdset_type_id(config_get(st->config_section, "chart type", rrdset_type_name(chart_type))); + st->type = config_get(st->config_section, "type", type); + + st->family = config_get(st->config_section, "family", family?family:st->type); + json_fix_string(st->family); + + st->units = config_get(st->config_section, "units", units?units:""); + json_fix_string(st->units); + + st->context = config_get(st->config_section, "context", context?context:st->id); + json_fix_string(st->context); + st->hash_context = simple_hash(st->context); + + st->priority = config_get_number(st->config_section, "priority", priority); + if(enabled) + rrdset_flag_set(st, RRDSET_FLAG_ENABLED); + else + rrdset_flag_clear(st, RRDSET_FLAG_ENABLED); + + rrdset_flag_clear(st, RRDSET_FLAG_DETAIL); + rrdset_flag_clear(st, RRDSET_FLAG_DEBUG); + rrdset_flag_clear(st, RRDSET_FLAG_OBSOLETE); + rrdset_flag_clear(st, RRDSET_FLAG_BACKEND_SEND); + rrdset_flag_clear(st, RRDSET_FLAG_BACKEND_IGNORE); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_SEND); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_IGNORE); + rrdset_flag_clear(st, RRDSET_FLAG_UPSTREAM_EXPOSED); + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + + // if(!strcmp(st->id, "disk_util.dm-0")) { + // st->debug = 1; + // error("enabled debugging for '%s'", st->id); + // } + // else error("not enabled debugging for '%s'", st->id); + + st->green = NAN; + st->red = NAN; + + st->last_collected_time.tv_sec = 0; + st->last_collected_time.tv_usec = 0; + st->counter_done = 0; + + st->gap_when_lost_iterations_above = (int) (gap_when_lost_iterations_above + 2); + + st->last_accessed_time = 0; + st->upstream_resync_time = 0; + + avl_init_lock(&st->dimensions_index, rrddim_compare); + avl_init_lock(&st->rrdvar_root_index, rrdvar_compare); + + netdata_rwlock_init(&st->rrdset_rwlock); + + if(name && *name && rrdset_set_name(st, name)) + // we did set the name + ; + else + // could not use the name, use the id + rrdset_set_name(st, id); + + st->title = config_get(st->config_section, "title", title); + json_fix_string(st->title); + + st->rrdfamily = rrdfamily_create(host, st->family); + + st->next = host->rrdset_root; + host->rrdset_root = st; + + if(host->health_enabled) { + rrdsetvar_create(st, "last_collected_t", RRDVAR_TYPE_TIME_T, &st->last_collected_time.tv_sec, RRDVAR_OPTION_DEFAULT); + rrdsetvar_create(st, "collected_total_raw", RRDVAR_TYPE_TOTAL, &st->last_collected_total, RRDVAR_OPTION_DEFAULT); + rrdsetvar_create(st, "green", RRDVAR_TYPE_CALCULATED, &st->green, RRDVAR_OPTION_DEFAULT); + rrdsetvar_create(st, "red", RRDVAR_TYPE_CALCULATED, &st->red, RRDVAR_OPTION_DEFAULT); + rrdsetvar_create(st, "update_every", RRDVAR_TYPE_INT, &st->update_every, RRDVAR_OPTION_DEFAULT); + } + + if(unlikely(rrdset_index_add(host, st) != st)) + error("RRDSET: INTERNAL ERROR: attempt to index duplicate chart '%s'", st->id); + + rrdsetcalc_link_matching(st); + rrdcalctemplate_link_matching(st); + + rrdhost_cleanup_obsolete_charts(host); + + rrdhost_unlock(host); + + return(st); +} + + +// ---------------------------------------------------------------------------- +// RRDSET - data collection iteration control + +inline void rrdset_next_usec_unfiltered(RRDSET *st, usec_t microseconds) { + if(unlikely(!st->last_collected_time.tv_sec || !microseconds || (rrdset_flag_check_noatomic(st, RRDSET_FLAG_SYNC_CLOCK)))) { + // call the full next_usec() function + rrdset_next_usec(st, microseconds); + return; + } + + st->usec_since_last_update = microseconds; +} + +inline void rrdset_next_usec(RRDSET *st, usec_t microseconds) { + struct timeval now; + now_realtime_timeval(&now); + + #ifdef NETDATA_INTERNAL_CHECKS + char *discard_reason = NULL; + usec_t discarded = microseconds; + #endif + + if(unlikely(rrdset_flag_check_noatomic(st, RRDSET_FLAG_SYNC_CLOCK))) { + // the chart needs to be re-synced to current time + rrdset_flag_clear(st, RRDSET_FLAG_SYNC_CLOCK); + + // discard the microseconds supplied + microseconds = 0; + + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "SYNC CLOCK FLAG"; + #endif + } + + if(unlikely(!st->last_collected_time.tv_sec)) { + // the first entry + microseconds = st->update_every * USEC_PER_SEC; + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "FIRST DATA COLLECTION"; + #endif + } + else if(unlikely(!microseconds)) { + // no dt given by the plugin + microseconds = dt_usec(&now, &st->last_collected_time); + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "NO USEC GIVEN BY COLLECTOR"; + #endif + } + else { + // microseconds has the time since the last collection + susec_t since_last_usec = dt_usec_signed(&now, &st->last_collected_time); + + if(unlikely(since_last_usec < 0)) { + // oops! the database is in the future + info("RRD database for chart '%s' on host '%s' is %0.5" LONG_DOUBLE_MODIFIER " secs in the future (counter #%zu, update #%zu). Adjusting it to current time.", st->id, st->rrdhost->hostname, (LONG_DOUBLE)-since_last_usec / USEC_PER_SEC, st->counter, st->counter_done); + + st->last_collected_time.tv_sec = now.tv_sec - st->update_every; + st->last_collected_time.tv_usec = now.tv_usec; + last_collected_time_align(st); + + st->last_updated.tv_sec = now.tv_sec - st->update_every; + st->last_updated.tv_usec = now.tv_usec; + last_updated_time_align(st); + + microseconds = st->update_every * USEC_PER_SEC; + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "COLLECTION TIME IN FUTURE"; + #endif + } + else if(unlikely((usec_t)since_last_usec > (usec_t)(st->update_every * 5 * USEC_PER_SEC))) { + // oops! the database is too far behind + info("RRD database for chart '%s' on host '%s' is %0.5" LONG_DOUBLE_MODIFIER " secs in the past (counter #%zu, update #%zu). Adjusting it to current time.", st->id, st->rrdhost->hostname, (LONG_DOUBLE)since_last_usec / USEC_PER_SEC, st->counter, st->counter_done); + + microseconds = (usec_t)since_last_usec; + #ifdef NETDATA_INTERNAL_CHECKS + if(!discard_reason) discard_reason = "COLLECTION TIME TOO FAR IN THE PAST"; + #endif + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(since_last_usec > 0 && (susec_t)microseconds < since_last_usec) { + static __thread susec_t min_delta = USEC_PER_SEC * 3600, permanent_min_delta = 0; + static __thread time_t last_t = 0; + + // the first time initialize it so that it will make the check later + if(last_t == 0) last_t = now.tv_sec + 60; + + susec_t delta = since_last_usec - (susec_t)microseconds; + if(delta < min_delta) min_delta = delta; + + if(now.tv_sec >= last_t + 60) { + last_t = now.tv_sec; + + if(min_delta > permanent_min_delta) { + info("MINIMUM MICROSECONDS DELTA of thread %d increased from %lld to %lld (+%lld)", gettid(), permanent_min_delta, min_delta, min_delta - permanent_min_delta); + permanent_min_delta = min_delta; + } + + min_delta = USEC_PER_SEC * 3600; + } + } +#endif + } + + #ifdef NETDATA_INTERNAL_CHECKS + debug(D_RRD_CALLS, "rrdset_next_usec() for chart %s with microseconds %llu", st->name, microseconds); + rrdset_debug(st, "NEXT: %llu microseconds", microseconds); + + if(discarded && discarded != microseconds) + info("host '%s', chart '%s': discarded data collection time of %llu usec, replaced with %llu usec, reason: '%s'", st->rrdhost->hostname, st->id, discarded, microseconds, discard_reason?discard_reason:"UNDEFINED"); + + #endif + + st->usec_since_last_update = microseconds; +} + + +// ---------------------------------------------------------------------------- +// RRDSET - process the collected values for all dimensions of a chart + +static inline usec_t rrdset_init_last_collected_time(RRDSET *st) { + now_realtime_timeval(&st->last_collected_time); + last_collected_time_align(st); + + usec_t last_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "initialized last collected time to %0.3" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE)last_collect_ut / USEC_PER_SEC); + #endif + + return last_collect_ut; +} + +static inline usec_t rrdset_update_last_collected_time(RRDSET *st) { + usec_t last_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; + usec_t ut = last_collect_ut + st->usec_since_last_update; + st->last_collected_time.tv_sec = (time_t) (ut / USEC_PER_SEC); + st->last_collected_time.tv_usec = (suseconds_t) (ut % USEC_PER_SEC); + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "updated last collected time to %0.3" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE)last_collect_ut / USEC_PER_SEC); + #endif + + return last_collect_ut; +} + +static inline usec_t rrdset_init_last_updated_time(RRDSET *st) { + // copy the last collected time to last updated time + st->last_updated.tv_sec = st->last_collected_time.tv_sec; + st->last_updated.tv_usec = st->last_collected_time.tv_usec; + + if(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)) + st->last_updated.tv_sec -= st->update_every; + + last_updated_time_align(st); + + usec_t last_updated_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "initialized last updated time to %0.3" LONG_DOUBLE_MODIFIER, (LONG_DOUBLE)last_updated_ut / USEC_PER_SEC); + #endif + + return last_updated_ut; +} + +static inline void rrdset_done_push_exclusive(RRDSET *st) { +// usec_t update_every_ut = st->update_every * USEC_PER_SEC; // st->update_every in microseconds +// +// if(unlikely(st->usec_since_last_update > update_every_ut * remote_clock_resync_iterations)) { +// error("Chart '%s' was last collected %llu usec before. Resetting it.", st->id, st->usec_since_last_update); +// rrdset_reset(st); +// st->usec_since_last_update = update_every_ut; +// } + + if(unlikely(!st->last_collected_time.tv_sec)) { + // it is the first entry + // set the last_collected_time to now + rrdset_init_last_collected_time(st); + } + else { + // it is not the first entry + // calculate the proper last_collected_time, using usec_since_last_update + rrdset_update_last_collected_time(st); + } + + st->counter_done++; + + rrdset_rdlock(st); + rrdset_done_push(st); + rrdset_unlock(st); +} + + +static inline size_t rrdset_done_interpolate( + RRDSET *st + , usec_t update_every_ut + , usec_t last_stored_ut + , usec_t next_store_ut + , usec_t last_collect_ut + , usec_t now_collect_ut + , char store_this_entry + , uint32_t storage_flags +) { + RRDDIM *rd; + + size_t stored_entries = 0; // the number of entries we have stored in the db, during this call to rrdset_done() + + usec_t first_ut = last_stored_ut, last_ut = 0; + (void)first_ut; + + ssize_t iterations = (ssize_t)((now_collect_ut - last_stored_ut) / (update_every_ut)); + if((now_collect_ut % (update_every_ut)) == 0) iterations++; + + size_t counter = st->counter; + long current_entry = st->current_entry; + + for( ; next_store_ut <= now_collect_ut ; last_collect_ut = next_store_ut, next_store_ut += update_every_ut, iterations-- ) { + + #ifdef NETDATA_INTERNAL_CHECKS + if(iterations < 0) { error("INTERNAL CHECK: %s: iterations calculation wrapped! first_ut = %llu, last_stored_ut = %llu, next_store_ut = %llu, now_collect_ut = %llu", st->name, first_ut, last_stored_ut, next_store_ut, now_collect_ut); } + rrdset_debug(st, "last_stored_ut = %0.3" LONG_DOUBLE_MODIFIER " (last updated time)", (LONG_DOUBLE)last_stored_ut/USEC_PER_SEC); + rrdset_debug(st, "next_store_ut = %0.3" LONG_DOUBLE_MODIFIER " (next interpolation point)", (LONG_DOUBLE)next_store_ut/USEC_PER_SEC); + #endif + + last_ut = next_store_ut; + + rrddim_foreach_read(rd, st) { + calculated_number new_value; + + switch(rd->algorithm) { + case RRD_ALGORITHM_INCREMENTAL: + new_value = (calculated_number) + ( rd->calculated_value + * (calculated_number)(next_store_ut - last_collect_ut) + / (calculated_number)(now_collect_ut - last_collect_ut) + ); + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: CALC2 INC " + CALCULATED_NUMBER_FORMAT " = " + CALCULATED_NUMBER_FORMAT + " * (%llu - %llu)" + " / (%llu - %llu)" + , rd->name + , new_value + , rd->calculated_value + , next_store_ut, last_collect_ut + , now_collect_ut, last_collect_ut + ); + #endif + + rd->calculated_value -= new_value; + new_value += rd->last_calculated_value; + rd->last_calculated_value = 0; + new_value /= (calculated_number)st->update_every; + + if(unlikely(next_store_ut - last_stored_ut < update_every_ut)) { + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: COLLECTION POINT IS SHORT " CALCULATED_NUMBER_FORMAT " - EXTRAPOLATING", + rd->name + , (calculated_number)(next_store_ut - last_stored_ut) + ); + #endif + + new_value = new_value * (calculated_number)(st->update_every * USEC_PER_SEC) / (calculated_number)(next_store_ut - last_stored_ut); + } + break; + + case RRD_ALGORITHM_ABSOLUTE: + case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: + case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: + default: + if(iterations == 1) { + // this is the last iteration + // do not interpolate + // just show the calculated value + + new_value = rd->calculated_value; + } + else { + // we have missed an update + // interpolate in the middle values + + new_value = (calculated_number) + ( ( (rd->calculated_value - rd->last_calculated_value) + * (calculated_number)(next_store_ut - last_collect_ut) + / (calculated_number)(now_collect_ut - last_collect_ut) + ) + + rd->last_calculated_value + ); + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: CALC2 DEF " + CALCULATED_NUMBER_FORMAT " = (((" + "(" CALCULATED_NUMBER_FORMAT " - " CALCULATED_NUMBER_FORMAT ")" + " * %llu" + " / %llu) + " CALCULATED_NUMBER_FORMAT + , rd->name + , new_value + , rd->calculated_value, rd->last_calculated_value + , (next_store_ut - first_ut) + , (now_collect_ut - first_ut), rd->last_calculated_value + ); + #endif + } + break; + } + + if(unlikely(!store_this_entry)) { + rd->values[current_entry] = SN_EMPTY_SLOT; //pack_storage_number(0, SN_NOT_EXISTS); + continue; + } + + if(likely(rd->updated && rd->collections_counter > 1 && iterations < st->gap_when_lost_iterations_above)) { + rd->values[current_entry] = pack_storage_number(new_value, storage_flags ); + rd->last_stored_value = new_value; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: STORE[%ld] " + CALCULATED_NUMBER_FORMAT " = " CALCULATED_NUMBER_FORMAT + , rd->name + , current_entry + , unpack_storage_number(rd->values[current_entry]), new_value + ); + #endif + + } + else { + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING " + , rd->name + , current_entry + ); + #endif + + rd->values[current_entry] = SN_EMPTY_SLOT; // pack_storage_number(0, SN_NOT_EXISTS); + rd->last_stored_value = NAN; + } + + stored_entries++; + + #ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_DEBUG))) { + calculated_number t1 = new_value * (calculated_number)rd->multiplier / (calculated_number)rd->divisor; + calculated_number t2 = unpack_storage_number(rd->values[current_entry]); + + calculated_number accuracy = accuracy_loss(t1, t2); + debug(D_RRD_STATS, "%s/%s: UNPACK[%ld] = " CALCULATED_NUMBER_FORMAT " FLAGS=0x%08x (original = " CALCULATED_NUMBER_FORMAT ", accuracy loss = " CALCULATED_NUMBER_FORMAT "%%%s)" + , st->id, rd->name + , current_entry + , t2 + , get_storage_number_flags(rd->values[current_entry]) + , t1 + , accuracy + , (accuracy > ACCURACY_LOSS_ACCEPTED_PERCENT) ? " **TOO BIG** " : "" + ); + + rd->collected_volume += t1; + rd->stored_volume += t2; + + accuracy = accuracy_loss(rd->collected_volume, rd->stored_volume); + debug(D_RRD_STATS, "%s/%s: VOLUME[%ld] = " CALCULATED_NUMBER_FORMAT ", calculated = " CALCULATED_NUMBER_FORMAT ", accuracy loss = " CALCULATED_NUMBER_FORMAT "%%%s" + , st->id, rd->name + , current_entry + , rd->stored_volume + , rd->collected_volume + , accuracy + , (accuracy > ACCURACY_LOSS_ACCEPTED_PERCENT) ? " **TOO BIG** " : "" + ); + } + #endif + } + // reset the storage flags for the next point, if any; + storage_flags = SN_EXISTS; + + counter++; + current_entry = ((current_entry + 1) >= st->entries) ? 0 : current_entry + 1; + last_stored_ut = next_store_ut; + } + + st->counter = counter; + st->current_entry = current_entry; + + if(likely(last_ut)) { + st->last_updated.tv_sec = (time_t) (last_ut / USEC_PER_SEC); + st->last_updated.tv_usec = 0; + } + + return stored_entries; +} + +static inline void rrdset_done_fill_the_gap(RRDSET *st) { + usec_t update_every_ut = st->update_every * USEC_PER_SEC; + usec_t now_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; + + long c = 0, entries = st->entries; + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + usec_t next_store_ut = (st->last_updated.tv_sec + st->update_every) * USEC_PER_SEC; + long current_entry = st->current_entry; + + for(c = 0; c < entries && next_store_ut <= now_collect_ut ; next_store_ut += update_every_ut, c++) { + rd->values[current_entry] = SN_EMPTY_SLOT; + current_entry = ((current_entry + 1) >= entries) ? 0 : current_entry + 1; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: STORE[%ld] = NON EXISTING (FILLED THE GAP)", rd->name, current_entry); + #endif + } + } + + if(c > 0) { + c--; + st->last_updated.tv_sec += c * st->update_every; + + st->current_entry += c; + if(st->current_entry >= st->entries) + st->current_entry -= st->entries; + } +} + +void rrdset_done(RRDSET *st) { + if(unlikely(netdata_exit)) return; + + if(unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_NONE)) { + if(unlikely(st->rrdhost->rrdpush_send_enabled)) + rrdset_done_push_exclusive(st); + + return; + } + + debug(D_RRD_CALLS, "rrdset_done() for chart %s", st->name); + + RRDDIM *rd; + + char + store_this_entry = 1, // boolean: 1 = store this entry, 0 = don't store this entry + first_entry = 0; // boolean: 1 = this is the first entry seen for this chart, 0 = all other entries + + usec_t + last_collect_ut, // the timestamp in microseconds, of the last collected value + now_collect_ut, // the timestamp in microseconds, of this collected value (this is NOW) + last_stored_ut, // the timestamp in microseconds, of the last stored entry in the db + next_store_ut, // the timestamp in microseconds, of the next entry to store in the db + update_every_ut = st->update_every * USEC_PER_SEC; // st->update_every in microseconds + + netdata_thread_disable_cancelability(); + + // a read lock is OK here + rrdset_rdlock(st); + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE))) { + error("Chart '%s' has the OBSOLETE flag set, but it is collected.", st->id); + rrdset_isnot_obsolete(st); + } + + // check if the chart has a long time to be updated + if(unlikely(st->usec_since_last_update > st->entries * update_every_ut)) { + info("host '%s', chart %s: took too long to be updated (counter #%zu, update #%zu, %0.3" LONG_DOUBLE_MODIFIER " secs). Resetting it.", st->rrdhost->hostname, st->name, st->counter, st->counter_done, (LONG_DOUBLE)st->usec_since_last_update / USEC_PER_SEC); + rrdset_reset(st); + st->usec_since_last_update = update_every_ut; + store_this_entry = 0; + first_entry = 1; + } + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "microseconds since last update: %llu", st->usec_since_last_update); + #endif + + // set last_collected_time + if(unlikely(!st->last_collected_time.tv_sec)) { + // it is the first entry + // set the last_collected_time to now + last_collect_ut = rrdset_init_last_collected_time(st) - update_every_ut; + + // the first entry should not be stored + store_this_entry = 0; + first_entry = 1; + } + else { + // it is not the first entry + // calculate the proper last_collected_time, using usec_since_last_update + last_collect_ut = rrdset_update_last_collected_time(st); + } + + // if this set has not been updated in the past + // we fake the last_update time to be = now - usec_since_last_update + if(unlikely(!st->last_updated.tv_sec)) { + // it has never been updated before + // set a fake last_updated, in the past using usec_since_last_update + rrdset_init_last_updated_time(st); + + // the first entry should not be stored + store_this_entry = 0; + first_entry = 1; + } + + // check if we will re-write the entire data set + if(unlikely(dt_usec(&st->last_collected_time, &st->last_updated) > st->entries * update_every_ut)) { + info("%s: too old data (last updated at %ld.%ld, last collected at %ld.%ld). Resetting it. Will not store the next entry.", st->name, st->last_updated.tv_sec, st->last_updated.tv_usec, st->last_collected_time.tv_sec, st->last_collected_time.tv_usec); + rrdset_reset(st); + rrdset_init_last_updated_time(st); + + st->usec_since_last_update = update_every_ut; + + // the first entry should not be stored + store_this_entry = 0; + first_entry = 1; + } + + // these are the 3 variables that will help us in interpolation + // last_stored_ut = the last time we added a value to the storage + // now_collect_ut = the time the current value has been collected + // next_store_ut = the time of the next interpolation point + now_collect_ut = st->last_collected_time.tv_sec * USEC_PER_SEC + st->last_collected_time.tv_usec; + last_stored_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec; + next_store_ut = (st->last_updated.tv_sec + st->update_every) * USEC_PER_SEC; + + if(unlikely(!st->counter_done)) { + // if we have not collected metrics this session (st->counter_done == 0) + // and we have collected metrics for this chart in the past (st->counter != 0) + // fill the gap (the chart has been just loaded from disk) + if(unlikely(st->counter)) { + rrdset_done_fill_the_gap(st); + last_stored_ut = st->last_updated.tv_sec * USEC_PER_SEC + st->last_updated.tv_usec; + next_store_ut = (st->last_updated.tv_sec + st->update_every) * USEC_PER_SEC; + } + + if(unlikely(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST))) { + store_this_entry = 1; + last_collect_ut = next_store_ut - update_every_ut; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "Fixed first entry."); + #endif + } + else { + store_this_entry = 0; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "Will not store the next entry."); + #endif + } + } + st->counter_done++; + + if(unlikely(st->rrdhost->rrdpush_send_enabled)) + rrdset_done_push(st); + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "last_collect_ut = %0.3" LONG_DOUBLE_MODIFIER " (last collection time)", (LONG_DOUBLE)last_collect_ut/USEC_PER_SEC); + rrdset_debug(st, "now_collect_ut = %0.3" LONG_DOUBLE_MODIFIER " (current collection time)", (LONG_DOUBLE)now_collect_ut/USEC_PER_SEC); + rrdset_debug(st, "last_stored_ut = %0.3" LONG_DOUBLE_MODIFIER " (last updated time)", (LONG_DOUBLE)last_stored_ut/USEC_PER_SEC); + rrdset_debug(st, "next_store_ut = %0.3" LONG_DOUBLE_MODIFIER " (next interpolation point)", (LONG_DOUBLE)next_store_ut/USEC_PER_SEC); + #endif + + // calculate totals and count the dimensions + int dimensions = 0; + st->collected_total = 0; + rrddim_foreach_read(rd, st) { + dimensions++; + if(likely(rd->updated)) + st->collected_total += rd->collected_value; + } + + uint32_t storage_flags = SN_EXISTS; + + // process all dimensions to calculate their values + // based on the collected figures only + // at this stage we do not interpolate anything + rrddim_foreach_read(rd, st) { + + if(unlikely(!rd->updated)) { + rd->calculated_value = 0; + continue; + } + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: START " + " last_collected_value = " COLLECTED_NUMBER_FORMAT + " collected_value = " COLLECTED_NUMBER_FORMAT + " last_calculated_value = " CALCULATED_NUMBER_FORMAT + " calculated_value = " CALCULATED_NUMBER_FORMAT + , rd->name + , rd->last_collected_value + , rd->collected_value + , rd->last_calculated_value + , rd->calculated_value + ); + #endif + + switch(rd->algorithm) { + case RRD_ALGORITHM_ABSOLUTE: + rd->calculated_value = (calculated_number)rd->collected_value + * (calculated_number)rd->multiplier + / (calculated_number)rd->divisor; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: CALC ABS/ABS-NO-IN " + CALCULATED_NUMBER_FORMAT " = " + COLLECTED_NUMBER_FORMAT + " * " CALCULATED_NUMBER_FORMAT + " / " CALCULATED_NUMBER_FORMAT + , rd->name + , rd->calculated_value + , rd->collected_value + , (calculated_number)rd->multiplier + , (calculated_number)rd->divisor + ); + #endif + + break; + + case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: + if(unlikely(!st->collected_total)) + rd->calculated_value = 0; + else + // the percentage of the current value + // over the total of all dimensions + rd->calculated_value = + (calculated_number)100 + * (calculated_number)rd->collected_value + / (calculated_number)st->collected_total; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: CALC PCENT-ROW " + CALCULATED_NUMBER_FORMAT " = 100" + " * " COLLECTED_NUMBER_FORMAT + " / " COLLECTED_NUMBER_FORMAT + , rd->name + , rd->calculated_value + , rd->collected_value + , st->collected_total + ); + #endif + + break; + + case RRD_ALGORITHM_INCREMENTAL: + if(unlikely(rd->collections_counter <= 1)) { + rd->calculated_value = 0; + continue; + } + + // if the new is smaller than the old (an overflow, or reset), set the old equal to the new + // to reset the calculation (it will give zero as the calculation for this second) + if(unlikely(rd->last_collected_value > rd->collected_value)) { + debug(D_RRD_STATS, "%s.%s: RESET or OVERFLOW. Last collected value = " COLLECTED_NUMBER_FORMAT ", current = " COLLECTED_NUMBER_FORMAT + , st->name, rd->name + , rd->last_collected_value + , rd->collected_value); + + if(!(rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS))) + storage_flags = SN_EXISTS_RESET; + + uint64_t last = (uint64_t)rd->last_collected_value; + uint64_t new = (uint64_t)rd->collected_value; + uint64_t max = (uint64_t)rd->collected_value_max; + uint64_t cap = 0; + + if(max > 0x00000000FFFFFFFFULL) cap = 0xFFFFFFFFFFFFFFFFULL; + else if(max > 0x000000000000FFFFULL) cap = 0x00000000FFFFFFFFULL; + else if(max > 0x00000000000000FFULL) cap = 0x000000000000FFFFULL; + else cap = 0x00000000000000FFULL; + + uint64_t delta = cap - last + new; + + rd->calculated_value += + (calculated_number) delta + * (calculated_number) rd->multiplier + / (calculated_number) rd->divisor; + } + else { + rd->calculated_value += + (calculated_number) (rd->collected_value - rd->last_collected_value) + * (calculated_number) rd->multiplier + / (calculated_number) rd->divisor; + } + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: CALC INC PRE " + CALCULATED_NUMBER_FORMAT " = (" + COLLECTED_NUMBER_FORMAT " - " COLLECTED_NUMBER_FORMAT + ")" + " * " CALCULATED_NUMBER_FORMAT + " / " CALCULATED_NUMBER_FORMAT + , rd->name + , rd->calculated_value + , rd->collected_value, rd->last_collected_value + , (calculated_number)rd->multiplier + , (calculated_number)rd->divisor + ); + #endif + + break; + + case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: + if(unlikely(rd->collections_counter <= 1)) { + rd->calculated_value = 0; + continue; + } + + // if the new is smaller than the old (an overflow, or reset), set the old equal to the new + // to reset the calculation (it will give zero as the calculation for this second) + if(unlikely(rd->last_collected_value > rd->collected_value)) { + debug(D_RRD_STATS, "%s.%s: RESET or OVERFLOW. Last collected value = " COLLECTED_NUMBER_FORMAT ", current = " COLLECTED_NUMBER_FORMAT + , st->name, rd->name + , rd->last_collected_value + , rd->collected_value + ); + + if(!(rrddim_flag_check(rd, RRDDIM_FLAG_DONT_DETECT_RESETS_OR_OVERFLOWS))) + storage_flags = SN_EXISTS_RESET; + + rd->last_collected_value = rd->collected_value; + } + + // the percentage of the current increment + // over the increment of all dimensions together + if(unlikely(st->collected_total == st->last_collected_total)) + rd->calculated_value = 0; + else + rd->calculated_value = + (calculated_number)100 + * (calculated_number)(rd->collected_value - rd->last_collected_value) + / (calculated_number)(st->collected_total - st->last_collected_total); + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: CALC PCENT-DIFF " + CALCULATED_NUMBER_FORMAT " = 100" + " * (" COLLECTED_NUMBER_FORMAT " - " COLLECTED_NUMBER_FORMAT ")" + " / (" COLLECTED_NUMBER_FORMAT " - " COLLECTED_NUMBER_FORMAT ")" + , rd->name + , rd->calculated_value + , rd->collected_value, rd->last_collected_value + , st->collected_total, st->last_collected_total + ); + #endif + + break; + + default: + // make the default zero, to make sure + // it gets noticed when we add new types + rd->calculated_value = 0; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: CALC " + CALCULATED_NUMBER_FORMAT " = 0" + , rd->name + , rd->calculated_value + ); + #endif + + break; + } + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: PHASE2 " + " last_collected_value = " COLLECTED_NUMBER_FORMAT + " collected_value = " COLLECTED_NUMBER_FORMAT + " last_calculated_value = " CALCULATED_NUMBER_FORMAT + " calculated_value = " CALCULATED_NUMBER_FORMAT + , rd->name + , rd->last_collected_value + , rd->collected_value + , rd->last_calculated_value + , rd->calculated_value + ); + #endif + + } + + // at this point we have all the calculated values ready + // it is now time to interpolate values on a second boundary + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(now_collect_ut < next_store_ut)) { + // this is collected in the same interpolation point + rrdset_debug(st, "THIS IS IN THE SAME INTERPOLATION POINT"); + info("INTERNAL CHECK: host '%s', chart '%s' is collected in the same interpolation point: short by %llu microseconds", st->rrdhost->hostname, st->name, next_store_ut - now_collect_ut); + } +#endif + + rrdset_done_interpolate(st + , update_every_ut + , last_stored_ut + , next_store_ut + , last_collect_ut + , now_collect_ut + , store_this_entry + , storage_flags + ); + + st->last_collected_total = st->collected_total; + + rrddim_foreach_read(rd, st) { + if(unlikely(!rd->updated)) + continue; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: setting last_collected_value (old: " COLLECTED_NUMBER_FORMAT ") to last_collected_value (new: " COLLECTED_NUMBER_FORMAT ")", rd->name, rd->last_collected_value, rd->collected_value); + #endif + + rd->last_collected_value = rd->collected_value; + + switch(rd->algorithm) { + case RRD_ALGORITHM_INCREMENTAL: + if(unlikely(!first_entry)) { + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: setting last_calculated_value (old: " CALCULATED_NUMBER_FORMAT ") to last_calculated_value (new: " CALCULATED_NUMBER_FORMAT ")", rd->name, rd->last_calculated_value + rd->calculated_value, rd->calculated_value); + #endif + + rd->last_calculated_value += rd->calculated_value; + } + else { + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "THIS IS THE FIRST POINT"); + #endif + } + break; + + case RRD_ALGORITHM_ABSOLUTE: + case RRD_ALGORITHM_PCENT_OVER_ROW_TOTAL: + case RRD_ALGORITHM_PCENT_OVER_DIFF_TOTAL: + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: setting last_calculated_value (old: " CALCULATED_NUMBER_FORMAT ") to last_calculated_value (new: " CALCULATED_NUMBER_FORMAT ")", rd->name, rd->last_calculated_value, rd->calculated_value); + #endif + + rd->last_calculated_value = rd->calculated_value; + break; + } + + rd->calculated_value = 0; + rd->collected_value = 0; + rd->updated = 0; + + #ifdef NETDATA_INTERNAL_CHECKS + rrdset_debug(st, "%s: END " + " last_collected_value = " COLLECTED_NUMBER_FORMAT + " collected_value = " COLLECTED_NUMBER_FORMAT + " last_calculated_value = " CALCULATED_NUMBER_FORMAT + " calculated_value = " CALCULATED_NUMBER_FORMAT + , rd->name + , rd->last_collected_value + , rd->collected_value + , rd->last_calculated_value + , rd->calculated_value + ); + #endif + + } + + // ALL DONE ABOUT THE DATA UPDATE + // -------------------------------------------------------------------- + +/* + // find if there are any obsolete dimensions (not updated recently) + if(unlikely(rrd_delete_unupdated_dimensions)) { + + for( rd = st->dimensions; likely(rd) ; rd = rd->next ) + if((rd->last_collected_time.tv_sec + (rrd_delete_unupdated_dimensions * st->update_every)) < st->last_collected_time.tv_sec) + break; + + if(unlikely(rd)) { + RRDDIM *last; + // there is dimension to free + // upgrade our read lock to a write lock + rrdset_unlock(st); + rrdset_wrlock(st); + + for( rd = st->dimensions, last = NULL ; likely(rd) ; ) { + // remove it only it is not updated in rrd_delete_unupdated_dimensions seconds + + if(unlikely((rd->last_collected_time.tv_sec + (rrd_delete_unupdated_dimensions * st->update_every)) < st->last_collected_time.tv_sec)) { + info("Removing obsolete dimension '%s' (%s) of '%s' (%s).", rd->name, rd->id, st->name, st->id); + + if(unlikely(!last)) { + st->dimensions = rd->next; + rd->next = NULL; + rrddim_free(st, rd); + rd = st->dimensions; + continue; + } + else { + last->next = rd->next; + rd->next = NULL; + rrddim_free(st, rd); + rd = last->next; + continue; + } + } + + last = rd; + rd = rd->next; + } + + if(unlikely(!st->dimensions)) { + info("Disabling chart %s (%s) since it does not have any dimensions", st->name, st->id); + st->enabled = 0; + } + } + } +*/ + + rrdset_unlock(st); + + netdata_thread_enable_cancelability(); +} diff --git a/database/rrdsetvar.c b/database/rrdsetvar.c new file mode 100644 index 0000000..9da4193 --- /dev/null +++ b/database/rrdsetvar.c @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_HEALTH_INTERNALS +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDSETVAR management +// CHART VARIABLES + +static inline void rrdsetvar_free_variables(RRDSETVAR *rs) { + RRDSET *st = rs->rrdset; + RRDHOST *host = st->rrdhost; + + // ------------------------------------------------------------------------ + // CHART + rrdvar_free(host, &st->rrdvar_root_index, rs->var_local); + rs->var_local = NULL; + + // ------------------------------------------------------------------------ + // FAMILY + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family); + rs->var_family = NULL; + + rrdvar_free(host, &st->rrdfamily->rrdvar_root_index, rs->var_family_name); + rs->var_family_name = NULL; + + // ------------------------------------------------------------------------ + // HOST + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host); + rs->var_host = NULL; + + rrdvar_free(host, &host->rrdvar_root_index, rs->var_host_name); + rs->var_host_name = NULL; + + // ------------------------------------------------------------------------ + // KEYS + freez(rs->key_fullid); + rs->key_fullid = NULL; + + freez(rs->key_fullname); + rs->key_fullname = NULL; +} + +static inline void rrdsetvar_create_variables(RRDSETVAR *rs) { + RRDSET *st = rs->rrdset; + RRDHOST *host = st->rrdhost; + + RRDVAR_OPTIONS options = rs->options; + if(rs->options & RRDVAR_OPTION_ALLOCATED) + options &= ~ RRDVAR_OPTION_ALLOCATED; + + // ------------------------------------------------------------------------ + // free the old ones (if any) + + rrdsetvar_free_variables(rs); + + // ------------------------------------------------------------------------ + // KEYS + + char buffer[RRDVAR_MAX_LENGTH + 1]; + snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", st->id, rs->variable); + rs->key_fullid = strdupz(buffer); + + snprintfz(buffer, RRDVAR_MAX_LENGTH, "%s.%s", st->name, rs->variable); + rs->key_fullname = strdupz(buffer); + + // ------------------------------------------------------------------------ + // CHART + rs->var_local = rrdvar_create_and_index("local", &st->rrdvar_root_index, rs->variable, rs->type, options, rs->value); + + // ------------------------------------------------------------------------ + // FAMILY + rs->var_family = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_fullid, rs->type, options, rs->value); + rs->var_family_name = rrdvar_create_and_index("family", &st->rrdfamily->rrdvar_root_index, rs->key_fullname, rs->type, options, rs->value); + + // ------------------------------------------------------------------------ + // HOST + rs->var_host = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullid, rs->type, options, rs->value); + rs->var_host_name = rrdvar_create_and_index("host", &host->rrdvar_root_index, rs->key_fullname, rs->type, options, rs->value); +} + +RRDSETVAR *rrdsetvar_create(RRDSET *st, const char *variable, RRDVAR_TYPE type, void *value, RRDVAR_OPTIONS options) { + debug(D_VARIABLES, "RRDVARSET create for chart id '%s' name '%s' with variable name '%s'", st->id, st->name, variable); + RRDSETVAR *rs = (RRDSETVAR *)callocz(1, sizeof(RRDSETVAR)); + + rs->variable = strdupz(variable); + rs->hash = simple_hash(rs->variable); + rs->type = type; + rs->value = value; + rs->options = options; + rs->rrdset = st; + + rs->next = st->variables; + st->variables = rs; + + rrdsetvar_create_variables(rs); + + return rs; +} + +void rrdsetvar_rename_all(RRDSET *st) { + debug(D_VARIABLES, "RRDSETVAR rename for chart id '%s' name '%s'", st->id, st->name); + + RRDSETVAR *rs; + for(rs = st->variables; rs ; rs = rs->next) + rrdsetvar_create_variables(rs); + + rrdsetcalc_link_matching(st); +} + +void rrdsetvar_free(RRDSETVAR *rs) { + RRDSET *st = rs->rrdset; + debug(D_VARIABLES, "RRDSETVAR free for chart id '%s' name '%s', variable '%s'", st->id, st->name, rs->variable); + + if(st->variables == rs) { + st->variables = rs->next; + } + else { + RRDSETVAR *t; + for (t = st->variables; t && t->next != rs; t = t->next); + if(!t) error("RRDSETVAR '%s' not found in chart '%s' variables linked list", rs->key_fullname, st->id); + else t->next = rs->next; + } + + rrdsetvar_free_variables(rs); + + freez(rs->variable); + + if(rs->options & RRDVAR_OPTION_ALLOCATED) + freez(rs->value); + + freez(rs); +} + +// -------------------------------------------------------------------------------------------------------------------- +// custom chart variables + +RRDSETVAR *rrdsetvar_custom_chart_variable_create(RRDSET *st, const char *name) { + RRDHOST *host = st->rrdhost; + + char *n = strdupz(name); + rrdvar_fix_name(n); + uint32_t hash = simple_hash(n); + + rrdset_wrlock(st); + + // find it + RRDSETVAR *rs; + for(rs = st->variables; rs ; rs = rs->next) { + if(hash == rs->hash && strcmp(n, rs->variable) == 0) { + rrdset_unlock(st); + if(rs->options & RRDVAR_OPTION_CUSTOM_CHART_VAR) { + freez(n); + return rs; + } + else { + error("RRDSETVAR: custom variable '%s' on chart '%s' of host '%s', conflicts with an internal chart variable", n, st->id, host->hostname); + freez(n); + return NULL; + } + } + } + + // not found, allocate one + + calculated_number *v = mallocz(sizeof(calculated_number)); + *v = NAN; + + rs = rrdsetvar_create(st, n, RRDVAR_TYPE_CALCULATED, v, RRDVAR_OPTION_ALLOCATED|RRDVAR_OPTION_CUSTOM_CHART_VAR); + rrdset_unlock(st); + + freez(n); + return rs; +} + +void rrdsetvar_custom_chart_variable_set(RRDSETVAR *rs, calculated_number value) { + if(rs->type != RRDVAR_TYPE_CALCULATED || !(rs->options & RRDVAR_OPTION_CUSTOM_CHART_VAR) || !(rs->options & RRDVAR_OPTION_ALLOCATED)) { + error("RRDSETVAR: requested to set variable '%s' of chart '%s' on host '%s' to value " CALCULATED_NUMBER_FORMAT " but the variable is not a custom chart one.", rs->variable, rs->rrdset->id, rs->rrdset->rrdhost->hostname, value); + } + else { + calculated_number *v = rs->value; + if(*v != value) { + *v = value; + + // mark the chart to be sent upstream + rrdset_flag_clear(rs->rrdset, RRDSET_FLAG_UPSTREAM_EXPOSED); + } + } +} diff --git a/database/rrdsetvar.h b/database/rrdsetvar.h new file mode 100644 index 0000000..34a26d2 --- /dev/null +++ b/database/rrdsetvar.h @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDSETVAR_H +#define NETDATA_RRDSETVAR_H 1 + +#include "rrd.h" + +// variables linked to charts +// We link variables to point to the values that are already +// calculated / processed by the normal data collection process +// This means, there will be no speed penalty for using +// these variables + +struct rrdsetvar { + char *variable; // variable name + uint32_t hash; // variable name hash + + char *key_fullid; // chart type.chart id.variable + char *key_fullname; // chart type.chart name.variable + + RRDVAR_TYPE type; + void *value; + + RRDVAR_OPTIONS options; + + RRDVAR *var_local; + RRDVAR *var_family; + RRDVAR *var_host; + RRDVAR *var_family_name; + RRDVAR *var_host_name; + + struct rrdset *rrdset; + + struct rrdsetvar *next; +}; + +extern RRDSETVAR *rrdsetvar_custom_chart_variable_create(RRDSET *st, const char *name); +extern void rrdsetvar_custom_chart_variable_set(RRDSETVAR *rv, calculated_number value); + +extern void rrdsetvar_rename_all(RRDSET *st); +extern RRDSETVAR *rrdsetvar_create(RRDSET *st, const char *variable, RRDVAR_TYPE type, void *value, RRDVAR_OPTIONS options); +extern void rrdsetvar_free(RRDSETVAR *rs); + +#endif //NETDATA_RRDSETVAR_H diff --git a/database/rrdvar.c b/database/rrdvar.c new file mode 100644 index 0000000..600bd34 --- /dev/null +++ b/database/rrdvar.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_HEALTH_INTERNALS +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// RRDVAR management + +inline int rrdvar_fix_name(char *variable) { + int fixed = 0; + while(*variable) { + if (!isalnum(*variable) && *variable != '.' && *variable != '_') { + *variable++ = '_'; + fixed++; + } + else + variable++; + } + + return fixed; +} + +int rrdvar_compare(void* a, void* b) { + if(((RRDVAR *)a)->hash < ((RRDVAR *)b)->hash) return -1; + else if(((RRDVAR *)a)->hash > ((RRDVAR *)b)->hash) return 1; + else return strcmp(((RRDVAR *)a)->name, ((RRDVAR *)b)->name); +} + +static inline RRDVAR *rrdvar_index_add(avl_tree_lock *tree, RRDVAR *rv) { + RRDVAR *ret = (RRDVAR *)avl_insert_lock(tree, (avl *)(rv)); + if(ret != rv) + debug(D_VARIABLES, "Request to insert RRDVAR '%s' into index failed. Already exists.", rv->name); + + return ret; +} + +static inline RRDVAR *rrdvar_index_del(avl_tree_lock *tree, RRDVAR *rv) { + RRDVAR *ret = (RRDVAR *)avl_remove_lock(tree, (avl *)(rv)); + if(!ret) + error("Request to remove RRDVAR '%s' from index failed. Not Found.", rv->name); + + return ret; +} + +static inline RRDVAR *rrdvar_index_find(avl_tree_lock *tree, const char *name, uint32_t hash) { + RRDVAR tmp; + tmp.name = (char *)name; + tmp.hash = (hash)?hash:simple_hash(tmp.name); + + return (RRDVAR *)avl_search_lock(tree, (avl *)&tmp); +} + +inline void rrdvar_free(RRDHOST *host, avl_tree_lock *tree, RRDVAR *rv) { + (void)host; + + if(!rv) return; + + if(tree) { + debug(D_VARIABLES, "Deleting variable '%s'", rv->name); + if(unlikely(!rrdvar_index_del(tree, rv))) + error("RRDVAR: Attempted to delete variable '%s' from host '%s', but it is not found.", rv->name, host->hostname); + } + + if(rv->options & RRDVAR_OPTION_ALLOCATED) + freez(rv->value); + + freez(rv->name); + freez(rv); +} + +inline RRDVAR *rrdvar_create_and_index(const char *scope, avl_tree_lock *tree, const char *name, RRDVAR_TYPE type, RRDVAR_OPTIONS options, void *value) { + char *variable = strdupz(name); + rrdvar_fix_name(variable); + uint32_t hash = simple_hash(variable); + + RRDVAR *rv = rrdvar_index_find(tree, variable, hash); + if(unlikely(!rv)) { + debug(D_VARIABLES, "Variable '%s' not found in scope '%s'. Creating a new one.", variable, scope); + + rv = callocz(1, sizeof(RRDVAR)); + rv->name = variable; + rv->hash = hash; + rv->type = type; + rv->options = options; + rv->value = value; + rv->last_updated = now_realtime_sec(); + + RRDVAR *ret = rrdvar_index_add(tree, rv); + if(unlikely(ret != rv)) { + debug(D_VARIABLES, "Variable '%s' in scope '%s' already exists", variable, scope); + freez(rv); + freez(variable); + rv = NULL; + } + else + debug(D_VARIABLES, "Variable '%s' created in scope '%s'", variable, scope); + } + else { + debug(D_VARIABLES, "Variable '%s' is already found in scope '%s'.", variable, scope); + + // already exists + freez(variable); + + // this is important + // it must return NULL - not the existing variable - or double-free will happen + rv = NULL; + } + + return rv; +} + +void rrdvar_free_remaining_variables(RRDHOST *host, avl_tree_lock *tree_lock) { + // This is not bullet proof - avl should support some means to destroy it + // with a callback for each item already in the index + + RRDVAR *rv, *last = NULL; + while((rv = (RRDVAR *)tree_lock->avl_tree.root)) { + if(unlikely(rv == last)) { + error("RRDVAR: INTERNAL ERROR: Cannot cleanup tree of RRDVARs"); + break; + } + last = rv; + rrdvar_free(host, tree_lock, rv); + } +} + +// ---------------------------------------------------------------------------- +// CUSTOM HOST VARIABLES + +inline int rrdvar_callback_for_all_host_variables(RRDHOST *host, int (*callback)(void * /*rrdvar*/, void * /*data*/), void *data) { + return avl_traverse_lock(&host->rrdvar_root_index, callback, data); +} + +static RRDVAR *rrdvar_custom_variable_create(const char *scope, avl_tree_lock *tree_lock, const char *name) { + calculated_number *v = callocz(1, sizeof(calculated_number)); + *v = NAN; + + RRDVAR *rv = rrdvar_create_and_index(scope, tree_lock, name, RRDVAR_TYPE_CALCULATED, RRDVAR_OPTION_CUSTOM_HOST_VAR|RRDVAR_OPTION_ALLOCATED, v); + if(unlikely(!rv)) { + freez(v); + debug(D_VARIABLES, "Requested variable '%s' already exists - possibly 2 plugins are updating it at the same time.", name); + + char *variable = strdupz(name); + rrdvar_fix_name(variable); + uint32_t hash = simple_hash(variable); + + // find the existing one to return it + rv = rrdvar_index_find(tree_lock, variable, hash); + + freez(variable); + } + + return rv; +} + +RRDVAR *rrdvar_custom_host_variable_create(RRDHOST *host, const char *name) { + return rrdvar_custom_variable_create("host", &host->rrdvar_root_index, name); +} + +void rrdvar_custom_host_variable_set(RRDHOST *host, RRDVAR *rv, calculated_number value) { + if(rv->type != RRDVAR_TYPE_CALCULATED || !(rv->options & RRDVAR_OPTION_CUSTOM_HOST_VAR) || !(rv->options & RRDVAR_OPTION_ALLOCATED)) + error("requested to set variable '%s' to value " CALCULATED_NUMBER_FORMAT " but the variable is not a custom one.", rv->name, value); + else { + calculated_number *v = rv->value; + if(*v != value) { + *v = value; + + rv->last_updated = now_realtime_sec(); + + // if the host is streaming, send this variable upstream immediately + rrdpush_sender_send_this_host_variable_now(host, rv); + } + } +} + +int foreach_host_variable_callback(RRDHOST *host, int (*callback)(RRDVAR * /*rv*/, void * /*data*/), void *data) { + return avl_traverse_lock(&host->rrdvar_root_index, (int (*)(void *, void *))callback, data); +} + +// ---------------------------------------------------------------------------- +// RRDVAR lookup + +calculated_number rrdvar2number(RRDVAR *rv) { + switch(rv->type) { + case RRDVAR_TYPE_CALCULATED: { + calculated_number *n = (calculated_number *)rv->value; + return *n; + } + + case RRDVAR_TYPE_TIME_T: { + time_t *n = (time_t *)rv->value; + return *n; + } + + case RRDVAR_TYPE_COLLECTED: { + collected_number *n = (collected_number *)rv->value; + return *n; + } + + case RRDVAR_TYPE_TOTAL: { + total_number *n = (total_number *)rv->value; + return *n; + } + + case RRDVAR_TYPE_INT: { + int *n = (int *)rv->value; + return *n; + } + + default: + error("I don't know how to convert RRDVAR type %u to calculated_number", rv->type); + return NAN; + } +} + +int health_variable_lookup(const char *variable, uint32_t hash, RRDCALC *rc, calculated_number *result) { + RRDSET *st = rc->rrdset; + if(!st) return 0; + + RRDHOST *host = st->rrdhost; + RRDVAR *rv; + + rv = rrdvar_index_find(&st->rrdvar_root_index, variable, hash); + if(rv) { + *result = rrdvar2number(rv); + return 1; + } + + rv = rrdvar_index_find(&st->rrdfamily->rrdvar_root_index, variable, hash); + if(rv) { + *result = rrdvar2number(rv); + return 1; + } + + rv = rrdvar_index_find(&host->rrdvar_root_index, variable, hash); + if(rv) { + *result = rrdvar2number(rv); + return 1; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// RRDVAR to JSON + +struct variable2json_helper { + BUFFER *buf; + size_t counter; +}; + +static int single_variable2json(void *entry, void *data) { + struct variable2json_helper *helper = (struct variable2json_helper *)data; + RRDVAR *rv = (RRDVAR *)entry; + calculated_number value = rrdvar2number(rv); + + if(unlikely(isnan(value) || isinf(value))) + buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": null", helper->counter?",":"", rv->name); + else + buffer_sprintf(helper->buf, "%s\n\t\t\"%s\": %0.5" LONG_DOUBLE_MODIFIER, helper->counter?",":"", rv->name, (LONG_DOUBLE)value); + + helper->counter++; + + return 0; +} + +void health_api_v1_chart_variables2json(RRDSET *st, BUFFER *buf) { + RRDHOST *host = st->rrdhost; + + struct variable2json_helper helper = { + .buf = buf, + .counter = 0 + }; + + buffer_sprintf(buf, "{\n\t\"chart\": \"%s\",\n\t\"chart_name\": \"%s\",\n\t\"chart_context\": \"%s\",\n\t\"chart_variables\": {", st->id, st->name, st->context); + avl_traverse_lock(&st->rrdvar_root_index, single_variable2json, (void *)&helper); + buffer_sprintf(buf, "\n\t},\n\t\"family\": \"%s\",\n\t\"family_variables\": {", st->family); + helper.counter = 0; + avl_traverse_lock(&st->rrdfamily->rrdvar_root_index, single_variable2json, (void *)&helper); + buffer_sprintf(buf, "\n\t},\n\t\"host\": \"%s\",\n\t\"host_variables\": {", host->hostname); + helper.counter = 0; + avl_traverse_lock(&host->rrdvar_root_index, single_variable2json, (void *)&helper); + buffer_strcat(buf, "\n\t}\n}\n"); +} + diff --git a/database/rrdvar.h b/database/rrdvar.h new file mode 100644 index 0000000..6d1461b --- /dev/null +++ b/database/rrdvar.h @@ -0,0 +1,66 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDVAR_H +#define NETDATA_RRDVAR_H 1 + +#include "libnetdata/libnetdata.h" + +extern int rrdvar_compare(void *a, void *b); + +typedef enum rrdvar_type { + RRDVAR_TYPE_CALCULATED = 1, + RRDVAR_TYPE_TIME_T = 2, + RRDVAR_TYPE_COLLECTED = 3, + RRDVAR_TYPE_TOTAL = 4, + RRDVAR_TYPE_INT = 5 +} RRDVAR_TYPE; + +typedef enum rrdvar_options { + RRDVAR_OPTION_DEFAULT = 0, + RRDVAR_OPTION_ALLOCATED = (1 << 0), // the value ptr is allocated (not a reference) + RRDVAR_OPTION_CUSTOM_HOST_VAR = (1 << 1), // this is a custom host variable, not associated with a dimension + RRDVAR_OPTION_CUSTOM_CHART_VAR = (1 << 2), // this is a custom chart variable, not associated with a dimension + RRDVAR_OPTION_RRDCALC_LOCAL_VAR = (1 << 3), // this is a an alarm variable, attached to a chart + RRDVAR_OPTION_RRDCALC_FAMILY_VAR = (1 << 4), // this is a an alarm variable, attached to a family + RRDVAR_OPTION_RRDCALC_HOST_CHARTID_VAR = (1 << 5), // this is a an alarm variable, attached to the host, using the chart id + RRDVAR_OPTION_RRDCALC_HOST_CHARTNAME_VAR = (1 << 6), // this is a an alarm variable, attached to the host, using the chart name +} RRDVAR_OPTIONS; + +// the variables as stored in the variables indexes +// there are 3 indexes: +// 1. at each chart (RRDSET.rrdvar_root_index) +// 2. at each context (RRDFAMILY.rrdvar_root_index) +// 3. at each host (RRDHOST.rrdvar_root_index) +struct rrdvar { + avl avl; + + char *name; + uint32_t hash; + + RRDVAR_TYPE type; + RRDVAR_OPTIONS options; + + void *value; + + time_t last_updated; +}; + +#define RRDVAR_MAX_LENGTH 1024 + +extern int rrdvar_fix_name(char *variable); + +#include "rrd.h" + +extern RRDVAR *rrdvar_custom_host_variable_create(RRDHOST *host, const char *name); +extern void rrdvar_custom_host_variable_set(RRDHOST *host, RRDVAR *rv, calculated_number value); +extern int foreach_host_variable_callback(RRDHOST *host, int (*callback)(RRDVAR *rv, void *data), void *data); +extern void rrdvar_free_remaining_variables(RRDHOST *host, avl_tree_lock *tree_lock); + +extern int rrdvar_callback_for_all_host_variables(RRDHOST *host, int (*callback)(void *rrdvar, void *data), void *data); + +extern calculated_number rrdvar2number(RRDVAR *rv); + +extern RRDVAR *rrdvar_create_and_index(const char *scope, avl_tree_lock *tree, const char *name, RRDVAR_TYPE type, RRDVAR_OPTIONS options, void *value); +extern void rrdvar_free(RRDHOST *host, avl_tree_lock *tree, RRDVAR *rv); + +#endif //NETDATA_RRDVAR_H |