diff options
Diffstat (limited to '')
-rw-r--r-- | src/database/KolmogorovSmirnovDist.c (renamed from database/KolmogorovSmirnovDist.c) | 0 | ||||
-rw-r--r-- | src/database/KolmogorovSmirnovDist.h (renamed from database/KolmogorovSmirnovDist.h) | 0 | ||||
-rw-r--r-- | src/database/README.md | 153 | ||||
-rw-r--r-- | src/database/contexts/README.md (renamed from database/contexts/README.md) | 0 | ||||
-rw-r--r-- | src/database/contexts/api_v1.c (renamed from database/contexts/api_v1.c) | 8 | ||||
-rw-r--r-- | src/database/contexts/api_v2.c (renamed from database/contexts/api_v2.c) | 304 | ||||
-rw-r--r-- | src/database/contexts/context.c (renamed from database/contexts/context.c) | 0 | ||||
-rw-r--r-- | src/database/contexts/instance.c (renamed from database/contexts/instance.c) | 2 | ||||
-rw-r--r-- | src/database/contexts/internal.h (renamed from database/contexts/internal.h) | 0 | ||||
-rw-r--r-- | src/database/contexts/metric.c (renamed from database/contexts/metric.c) | 0 | ||||
-rw-r--r-- | src/database/contexts/query_scope.c (renamed from database/contexts/query_scope.c) | 0 | ||||
-rw-r--r-- | src/database/contexts/query_target.c (renamed from database/contexts/query_target.c) | 60 | ||||
-rw-r--r-- | src/database/contexts/rrdcontext.c (renamed from database/contexts/rrdcontext.c) | 0 | ||||
-rw-r--r-- | src/database/contexts/rrdcontext.h (renamed from database/contexts/rrdcontext.h) | 13 | ||||
-rw-r--r-- | src/database/contexts/worker.c (renamed from database/contexts/worker.c) | 2 | ||||
-rw-r--r-- | src/database/engine/README.md (renamed from database/engine/README.md) | 0 | ||||
-rw-r--r-- | src/database/engine/cache.c (renamed from database/engine/cache.c) | 93 | ||||
-rw-r--r-- | src/database/engine/cache.h (renamed from database/engine/cache.h) | 9 | ||||
-rw-r--r-- | src/database/engine/datafile.c (renamed from database/engine/datafile.c) | 4 | ||||
-rw-r--r-- | src/database/engine/datafile.h (renamed from database/engine/datafile.h) | 0 | ||||
-rw-r--r-- | src/database/engine/dbengine-compression.c | 159 | ||||
-rw-r--r-- | src/database/engine/dbengine-compression.h | 15 | ||||
-rw-r--r-- | src/database/engine/dbengine-diagram.xml (renamed from database/engine/dbengine-diagram.xml) | 0 | ||||
-rw-r--r-- | src/database/engine/dbengine-stresstest.c | 456 | ||||
-rw-r--r-- | src/database/engine/dbengine-unittest.c | 419 | ||||
-rw-r--r-- | src/database/engine/journalfile.c (renamed from database/engine/journalfile.c) | 29 | ||||
-rw-r--r-- | src/database/engine/journalfile.h (renamed from database/engine/journalfile.h) | 1 | ||||
-rw-r--r-- | src/database/engine/metric.c (renamed from database/engine/metric.c) | 331 | ||||
-rw-r--r-- | src/database/engine/metric.h (renamed from database/engine/metric.h) | 15 | ||||
-rw-r--r-- | src/database/engine/page.c (renamed from database/engine/page.c) | 106 | ||||
-rw-r--r-- | src/database/engine/page.h (renamed from database/engine/page.h) | 0 | ||||
-rw-r--r-- | src/database/engine/page_test.cc (renamed from database/engine/page_test.cc) | 0 | ||||
-rw-r--r-- | src/database/engine/page_test.h (renamed from database/engine/page_test.h) | 0 | ||||
-rw-r--r-- | src/database/engine/pagecache.c (renamed from database/engine/pagecache.c) | 24 | ||||
-rw-r--r-- | src/database/engine/pagecache.h (renamed from database/engine/pagecache.h) | 6 | ||||
-rw-r--r-- | src/database/engine/pdc.c (renamed from database/engine/pdc.c) | 89 | ||||
-rw-r--r-- | src/database/engine/pdc.h (renamed from database/engine/pdc.h) | 0 | ||||
-rw-r--r-- | src/database/engine/rrddiskprotocol.h (renamed from database/engine/rrddiskprotocol.h) | 16 | ||||
-rw-r--r-- | src/database/engine/rrdengine.c (renamed from database/engine/rrdengine.c) | 92 | ||||
-rw-r--r-- | src/database/engine/rrdengine.h (renamed from database/engine/rrdengine.h) | 16 | ||||
-rwxr-xr-x | src/database/engine/rrdengineapi.c (renamed from database/engine/rrdengineapi.c) | 269 | ||||
-rw-r--r-- | src/database/engine/rrdengineapi.h (renamed from database/engine/rrdengineapi.h) | 45 | ||||
-rw-r--r-- | src/database/engine/rrdenginelib.c (renamed from database/engine/rrdenginelib.c) | 0 | ||||
-rw-r--r-- | src/database/engine/rrdenginelib.h (renamed from database/engine/rrdenginelib.h) | 0 | ||||
-rw-r--r-- | src/database/ram/README.md | 11 | ||||
-rw-r--r-- | src/database/ram/rrddim_mem.c | 437 | ||||
-rw-r--r-- | src/database/ram/rrddim_mem.h | 54 | ||||
-rw-r--r-- | src/database/rrd.c (renamed from database/rrd.c) | 34 | ||||
-rw-r--r-- | src/database/rrd.h (renamed from database/rrd.h) | 392 | ||||
-rw-r--r-- | src/database/rrdcollector-internals.h | 17 | ||||
-rw-r--r-- | src/database/rrdcollector.c | 137 | ||||
-rw-r--r-- | src/database/rrdcollector.h | 14 | ||||
-rw-r--r-- | src/database/rrddim.c | 586 | ||||
-rw-r--r-- | src/database/rrdfunctions-exporters.c | 164 | ||||
-rw-r--r-- | src/database/rrdfunctions-exporters.h | 17 | ||||
-rw-r--r-- | src/database/rrdfunctions-inflight.c | 691 | ||||
-rw-r--r-- | src/database/rrdfunctions-inflight.h | 16 | ||||
-rw-r--r-- | src/database/rrdfunctions-inline.c | 44 | ||||
-rw-r--r-- | src/database/rrdfunctions-inline.h | 14 | ||||
-rw-r--r-- | src/database/rrdfunctions-internals.h | 36 | ||||
-rw-r--r-- | src/database/rrdfunctions-progress.c | 8 | ||||
-rw-r--r-- | src/database/rrdfunctions-progress.h | 10 | ||||
-rw-r--r-- | src/database/rrdfunctions-streaming.c | 627 | ||||
-rw-r--r-- | src/database/rrdfunctions-streaming.h | 12 | ||||
-rw-r--r-- | src/database/rrdfunctions.c | 374 | ||||
-rw-r--r-- | src/database/rrdfunctions.h | 96 | ||||
-rw-r--r-- | src/database/rrdhost.c (renamed from database/rrdhost.c) | 402 | ||||
-rw-r--r-- | src/database/rrdlabels.c (renamed from database/rrdlabels.c) | 372 | ||||
-rw-r--r-- | src/database/rrdlabels.h (renamed from database/rrdlabels.h) | 28 | ||||
-rw-r--r-- | src/database/rrdset.c (renamed from database/rrdset.c) | 367 | ||||
-rw-r--r-- | src/database/sqlite/dbdata.c (renamed from database/sqlite/dbdata.c) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite3.c (renamed from database/sqlite/sqlite3.c) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite3.h (renamed from database/sqlite/sqlite3.h) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite3recover.c (renamed from database/sqlite/sqlite3recover.c) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite3recover.h (renamed from database/sqlite/sqlite3recover.h) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_aclk.c (renamed from database/sqlite/sqlite_aclk.c) | 22 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_aclk.h (renamed from database/sqlite/sqlite_aclk.h) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_aclk_alert.c (renamed from database/sqlite/sqlite_aclk_alert.c) | 15 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_aclk_alert.h (renamed from database/sqlite/sqlite_aclk_alert.h) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_aclk_node.c (renamed from database/sqlite/sqlite_aclk_node.c) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_aclk_node.h (renamed from database/sqlite/sqlite_aclk_node.h) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_context.c (renamed from database/sqlite/sqlite_context.c) | 37 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_context.h (renamed from database/sqlite/sqlite_context.h) | 1 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_db_migration.c (renamed from database/sqlite/sqlite_db_migration.c) | 60 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_db_migration.h (renamed from database/sqlite/sqlite_db_migration.h) | 0 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_functions.c | 373 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_functions.h | 47 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_health.c (renamed from database/sqlite/sqlite_health.c) | 365 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_health.h (renamed from database/sqlite/sqlite_health.h) | 9 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_metadata.c | 2645 | ||||
-rw-r--r-- | src/database/sqlite/sqlite_metadata.h | 59 | ||||
-rw-r--r-- | src/database/storage_engine.c | 94 | ||||
-rw-r--r-- | src/database/storage_engine.h (renamed from database/storage_engine.h) | 0 |
93 files changed, 9653 insertions, 1770 deletions
diff --git a/database/KolmogorovSmirnovDist.c b/src/database/KolmogorovSmirnovDist.c index 1486abc7b..1486abc7b 100644 --- a/database/KolmogorovSmirnovDist.c +++ b/src/database/KolmogorovSmirnovDist.c diff --git a/database/KolmogorovSmirnovDist.h b/src/database/KolmogorovSmirnovDist.h index cf455042a..cf455042a 100644 --- a/database/KolmogorovSmirnovDist.h +++ b/src/database/KolmogorovSmirnovDist.h diff --git a/src/database/README.md b/src/database/README.md new file mode 100644 index 000000000..fb0209316 --- /dev/null +++ b/src/database/README.md @@ -0,0 +1,153 @@ +# Database + +Netdata is fully capable of long-term metrics storage, at per-second granularity, via its default database engine +(`dbengine`). But to remain as flexible as possible, Netdata supports several storage options: + +1. `dbengine`, (the default) data are in database files. The [Database Engine](https://github.com/netdata/netdata/blob/master/src/database/engine/README.md) works like a + traditional database. There is some amount of RAM dedicated to data caching and indexing and the rest of the data + reside compressed on disk. The number of history entries is not fixed in this case, but depends on the configured + disk space and the effective compression ratio of the data stored. This is the **only mode** that supports changing + the data collection update frequency (`update every`) **without losing** the previously stored metrics. For more + details see [here](https://github.com/netdata/netdata/blob/master/src/database/engine/README.md). + +2. `ram`, data are purely in memory. Data are never saved on disk. This mode uses `mmap()` and supports [KSM](#ksm). + +3. `alloc`, like `ram` but it uses `calloc()` and does not support [KSM](#ksm). This mode is the fallback for all others + except `none`. + +4. `none`, without a database (collected metrics can only be streamed to another Netdata). + +## Which database mode to use + +The default mode `[db].mode = dbengine` has been designed to scale for longer retentions and is the only mode suitable +for parent Agents in the _Parent - Child_ setups + +The other available database modes are designed to minimize resource utilization and should only be considered on +[Parent - Child](https://github.com/netdata/netdata/blob/master/docs/metrics-storage-management/enable-streaming.md) setups at the children side and only when the +resource constraints are very strict. + +So, + +- On a single node setup, use `[db].mode = dbengine`. +- On a [Parent - Child](https://github.com/netdata/netdata/blob/master/docs/metrics-storage-management/enable-streaming.md) setup, use `[db].mode = dbengine` on the + parent to increase retention, and a more resource-efficient mode like, `dbengine` with light retention settings, `ram`, or `none` for the children to minimize resource utilization. + +## Choose your database mode + +You can select the database mode by editing `netdata.conf` and setting: + +```conf +[db] + # dbengine (default), ram (the default if dbengine not available), alloc, none + mode = dbengine +``` + +## Netdata Longer Metrics Retention + +Metrics retention is controlled only by the disk space allocated to storing metrics. But it also affects the memory and +CPU required by the agent to query longer timeframes. + +Since Netdata Agents usually run on the edge, on production systems, Netdata Agent **parents** should be considered. +When having a [**parent - child**](https://github.com/netdata/netdata/blob/master/docs/metrics-storage-management/enable-streaming.md) setup, the child (the +Netdata Agent running on a production system) delegates all of its functions, including longer metrics retention and +querying, to the parent node that can dedicate more resources to this task. A single Netdata Agent parent can centralize +multiple children Netdata Agents (dozens, hundreds, or even thousands depending on its available resources). + +## Running Netdata on embedded devices + +Embedded devices typically have very limited RAM resources available. + +There are two settings for you to configure: + +1. `[db].update every`, which controls the data collection frequency +2. `[db].retention`, which controls the size of the database in memory (except for `[db].mode = dbengine`) + +By default `[db].update every = 1` and `[db].retention = 3600`. This gives you an hour of data with per second updates. + +If you set `[db].update every = 2` and `[db].retention = 1800`, you will still have an hour of data, but collected once +every 2 seconds. This will **cut in half** both CPU and RAM resources consumed by Netdata. Of course experiment a bit to find the right setting. +On very weak devices you might have to use `[db].update every = 5` and `[db].retention = 720` (still 1 hour of data, but +1/5 of the CPU and RAM resources). + +You can also disable [data collection plugins](https://github.com/netdata/netdata/blob/master/src/collectors/README.md) that you don't need. Disabling such plugins will also +free both CPU and RAM resources. + +## Memory optimizations + +### KSM + +KSM performs memory deduplication by scanning through main memory for physical pages that have identical content, and +identifies the virtual pages that are mapped to those physical pages. It leaves one page unchanged, and re-maps each +duplicate page to point to the same physical page. Netdata offers all of its in-memory database to kernel for +deduplication. + +In the past, KSM has been criticized for consuming a lot of CPU resources. This is true when KSM is used for +deduplicating certain applications, but it is not true for Netdata. Agent's memory is written very infrequently +(if you have 24 hours of metrics in Netdata, each byte at the in-memory database will be updated just once per day). KSM +is a solution that will provide 60+% memory savings to Netdata. + +### Enable KSM in kernel + +To enable KSM in kernel, you need to run a kernel compiled with the following: + +```sh +CONFIG_KSM=y +``` + +When KSM is enabled at the kernel, it is just available for the user to enable it. + +If you build a kernel with `CONFIG_KSM=y`, you will just get a few files in `/sys/kernel/mm/ksm`. Nothing else +happens. There is no performance penalty (apart from the memory this code occupies into the kernel). + +The files that `CONFIG_KSM=y` offers include: + +- `/sys/kernel/mm/ksm/run` by default `0`. You have to set this to `1` for the kernel to spawn `ksmd`. +- `/sys/kernel/mm/ksm/sleep_millisecs`, by default `20`. The frequency ksmd should evaluate memory for deduplication. +- `/sys/kernel/mm/ksm/pages_to_scan`, by default `100`. The amount of pages ksmd will evaluate on each run. + +So, by default `ksmd` is just disabled. It will not harm performance and the user/admin can control the CPU resources +they are willing to have used by `ksmd`. + +### Run `ksmd` kernel daemon + +To activate / run `ksmd,` you need to run the following: + +```sh +echo 1 >/sys/kernel/mm/ksm/run +echo 1000 >/sys/kernel/mm/ksm/sleep_millisecs +``` + +With these settings, ksmd does not even appear in the running process list (it will run once per second and evaluate 100 +pages for de-duplication). + +Put the above lines in your boot sequence (`/etc/rc.local` or equivalent) to have `ksmd` run at boot. + +### Monitoring Kernel Memory de-duplication performance + +Netdata will create charts for kernel memory de-duplication performance, the **deduper (ksm)** charts can be seen under the **Memory** section in the Netdata UI. + +#### KSM summary + +The summary gives you a quick idea of how much savings (in terms of bytes and in terms of percentage) KSM is able to achieve. + +![image](https://user-images.githubusercontent.com/24860547/199454880-123ae7c4-071a-4811-95b8-18cf4e4f60a2.png) + +#### KSM pages merge performance + +This chart indicates the performance of page merging. **Shared** indicates used shared pages, **Unshared** indicates memory no longer shared (pages are unique but repeatedly checked for merging), **Sharing** indicates memory currently shared(how many more sites are sharing the pages, i.e. how much saved) and **Volatile** indicates volatile pages (changing too fast to be placed in a tree). + +A high ratio of Sharing to Shared indicates good sharing, but a high ratio of Unshared to Sharing indicates wasted effort. + +![image](https://user-images.githubusercontent.com/24860547/199455374-d63fd2c2-e12b-4ddf-947b-35371215eb05.png) + +#### KSM savings + +This chart shows the amount of memory saved by KSM. **Savings** indicates saved memory. **Offered** indicates memory marked as mergeable. + +![image](https://user-images.githubusercontent.com/24860547/199455604-43cd9248-1f6e-4c31-be56-e0b9e432f48a.png) + +#### KSM effectiveness + +This chart tells you how well KSM is doing at what it is supposed to. It does this by charting the percentage of the mergeable pages that are currently merged. + +![image](https://user-images.githubusercontent.com/24860547/199455770-4d7991ff-6b7e-4d96-9d23-33ffc572b370.png) diff --git a/database/contexts/README.md b/src/database/contexts/README.md index e69de29bb..e69de29bb 100644 --- a/database/contexts/README.md +++ b/src/database/contexts/README.md diff --git a/database/contexts/api_v1.c b/src/database/contexts/api_v1.c index f144e6f7b..355aaf91a 100644 --- a/database/contexts/api_v1.c +++ b/src/database/contexts/api_v1.c @@ -131,13 +131,13 @@ static inline int rrdinstance_to_json_callback(const DICTIONARY_ITEM *item, void if(before && (!ri->first_time_s || before < ri->first_time_s)) return 0; - if(t_parent->chart_label_key && !rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, t_parent->chart_label_key, - '\0', NULL)) + if(t_parent->chart_label_key && rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, t_parent->chart_label_key, + '\0', NULL) != SP_MATCHED_POSITIVE) return 0; - if(t_parent->chart_labels_filter && !rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, + if(t_parent->chart_labels_filter && rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, t_parent->chart_labels_filter, ':', - NULL)) + NULL) != SP_MATCHED_POSITIVE) return 0; time_t first_time_s = ri->first_time_s; diff --git a/database/contexts/api_v2.c b/src/database/contexts/api_v2.c index 3ca49a319..edfeab1d6 100644 --- a/database/contexts/api_v2.c +++ b/src/database/contexts/api_v2.c @@ -167,6 +167,9 @@ struct function_v2_entry { size_t used; size_t *node_ids; STRING *help; + STRING *tags; + HTTP_ACCESS access; + int priority; }; struct context_v2_entry { @@ -180,6 +183,13 @@ struct context_v2_entry { FTS_MATCH match; }; +struct alert_counts { + size_t critical; + size_t warning; + size_t clear; + size_t error; +}; + struct alert_v2_entry { RRDCALC *tmp; @@ -188,16 +198,25 @@ struct alert_v2_entry { size_t ati; - size_t critical; - size_t warning; - size_t clear; - size_t error; + struct alert_counts counts; size_t instances; DICTIONARY *nodes; DICTIONARY *configs; }; +struct alert_by_x_entry { + struct { + struct alert_counts counts; + size_t silent; + size_t total; + } running; + + struct { + size_t available; + } prototypes; +}; + typedef struct full_text_search_index { size_t searches; size_t string_searches; @@ -251,8 +270,14 @@ struct rrdcontext_to_json_v2_data { size_t ati; - DICTIONARY *alerts; + DICTIONARY *summary; DICTIONARY *alert_instances; + + DICTIONARY *by_type; + DICTIONARY *by_component; + DICTIONARY *by_classification; + DICTIONARY *by_recipient; + DICTIONARY *by_module; } alerts; struct { @@ -276,9 +301,7 @@ struct rrdcontext_to_json_v2_data { struct query_timings timings; }; -static void alerts_v2_add(struct alert_v2_entry *t, RRDCALC *rc) { - t->instances++; - +static void alert_counts_add(struct alert_counts *t, RRDCALC *rc) { switch(rc->status) { case RRDCALC_STATUS_CRITICAL: t->critical++; @@ -303,20 +326,51 @@ static void alerts_v2_add(struct alert_v2_entry *t, RRDCALC *rc) { break; } +} + +static void alerts_v2_add(struct alert_v2_entry *t, RRDCALC *rc) { + t->instances++; + + alert_counts_add(&t->counts, rc); dictionary_set(t->nodes, rc->rrdset->rrdhost->machine_guid, NULL, 0); char key[UUID_STR_LEN + 1]; - uuid_unparse_lower(rc->config_hash_id, key); + uuid_unparse_lower(rc->config.hash_id, key); dictionary_set(t->configs, key, NULL, 0); } +static void alerts_by_x_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) { + static STRING *silent = NULL; + if(unlikely(!silent)) silent = string_strdupz("silent"); + + struct alert_by_x_entry *b = value; + RRDCALC *rc = data; + if(!rc) { + // prototype + b->prototypes.available++; + } + else { + alert_counts_add(&b->running.counts, rc); + + b->running.total++; + + if (rc->config.recipient == silent) + b->running.silent++; + } +} + +static bool alerts_by_x_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value, void *new_value __maybe_unused, void *data __maybe_unused) { + alerts_by_x_insert_callback(item, old_value, data); + return false; +} + static void alerts_v2_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) { struct rrdcontext_to_json_v2_data *ctl = data; struct alert_v2_entry *t = value; RRDCALC *rc = t->tmp; - t->name = rc->name; - t->summary = rc->summary; + t->name = rc->config.name; + t->summary = rc->config.summary; // the original summary t->ati = ctl->alerts.ati++; t->nodes = dictionary_create(DICT_OPTION_SINGLE_THREADED|DICT_OPTION_VALUE_LINK_DONT_CLONE|DICT_OPTION_NAME_LINK_DONT_CLONE); @@ -347,16 +401,16 @@ static void alert_instances_v2_insert_callback(const DICTIONARY_ITEM *item __may t->chart_id = rc->rrdset->id; t->chart_name = rc->rrdset->name; t->family = rc->rrdset->family; - t->units = rc->units; - t->classification = rc->classification; - t->type = rc->type; - t->recipient = rc->recipient; - t->component = rc->component; - t->name = rc->name; - t->source = rc->source; + t->units = rc->config.units; + t->classification = rc->config.classification; + t->type = rc->config.type; + t->recipient = rc->config.recipient; + t->component = rc->config.component; + t->name = rc->config.name; + t->source = rc->config.source; t->status = rc->status; t->flags = rc->run_flags; - t->info = rc->info; + t->info = rc->config.info; t->summary = rc->summary; t->value = rc->value; t->last_updated = rc->last_updated; @@ -365,12 +419,9 @@ static void alert_instances_v2_insert_callback(const DICTIONARY_ITEM *item __may t->host = rc->rrdset->rrdhost; t->alarm_id = rc->id; t->ni = ctl->nodes.ni; - t->global_id = rc->ae ? rc->ae->global_id : 0; - t->name = rc->name; - uuid_copy(t->config_hash_id, rc->config_hash_id); - if(rc->ae) - uuid_copy(t->last_transition_id, rc->ae->transition_id); + uuid_copy(t->config_hash_id, rc->config.hash_id); + health_alarm_log_get_global_id_and_transition_id_for_rrdcalc(rc, &t->global_id, &t->last_transition_id); } static bool alert_instances_v2_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *old_value __maybe_unused, void *new_value __maybe_unused, void *data __maybe_unused) { @@ -422,7 +473,7 @@ static FTS_MATCH rrdcontext_to_json_v2_full_text_search(struct rrdcontext_to_jso size_t label_searches = 0; if(unlikely(ri->rrdlabels && rrdlabels_entries(ri->rrdlabels) && - rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, q, ':', &label_searches))) { + rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, q, ':', &label_searches) == SP_MATCHED_POSITIVE)) { ctl->q.fts.searches += label_searches; ctl->q.fts.char_searches += label_searches; matched = FTS_MATCHED_LABEL; @@ -435,12 +486,12 @@ static FTS_MATCH rrdcontext_to_json_v2_full_text_search(struct rrdcontext_to_jso RRDSET *st = ri->rrdset; rw_spinlock_read_lock(&st->alerts.spinlock); for (RRDCALC *rcl = st->alerts.base; rcl; rcl = rcl->next) { - if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->name))) { + if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->config.name))) { matched = FTS_MATCHED_ALERT; break; } - if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->info))) { + if(unlikely(full_text_search_string(&ctl->q.fts, q, rcl->config.info))) { matched = FTS_MATCHED_ALERT_INFO; break; } @@ -460,7 +511,7 @@ static bool rrdcontext_matches_alert(struct rrdcontext_to_json_v2_data *ctl, RRD RRDSET *st = ri->rrdset; rw_spinlock_read_lock(&st->alerts.spinlock); for (RRDCALC *rcl = st->alerts.base; rcl; rcl = rcl->next) { - if(ctl->alerts.alert_name_pattern && !simple_pattern_matches_string(ctl->alerts.alert_name_pattern, rcl->name)) + if(ctl->alerts.alert_name_pattern && !simple_pattern_matches_string(ctl->alerts.alert_name_pattern, rcl->config.name)) continue; if(ctl->alerts.alarm_id_filter && ctl->alerts.alarm_id_filter != rcl->id) @@ -500,11 +551,51 @@ static bool rrdcontext_matches_alert(struct rrdcontext_to_json_v2_data *ctl, RRD struct alert_v2_entry t = { .tmp = rcl, }; - struct alert_v2_entry *a2e = dictionary_set(ctl->alerts.alerts, string2str(rcl->name), &t, - sizeof(struct alert_v2_entry)); + struct alert_v2_entry *a2e = + dictionary_set(ctl->alerts.summary, string2str(rcl->config.name), + &t, sizeof(struct alert_v2_entry)); size_t ati = a2e->ati; matches++; + dictionary_set_advanced(ctl->alerts.by_type, + string2str(rcl->config.type), + (ssize_t)string_strlen(rcl->config.type), + NULL, + sizeof(struct alert_by_x_entry), + rcl); + + dictionary_set_advanced(ctl->alerts.by_component, + string2str(rcl->config.component), + (ssize_t)string_strlen(rcl->config.component), + NULL, + sizeof(struct alert_by_x_entry), + rcl); + + dictionary_set_advanced(ctl->alerts.by_classification, + string2str(rcl->config.classification), + (ssize_t)string_strlen(rcl->config.classification), + NULL, + sizeof(struct alert_by_x_entry), + rcl); + + dictionary_set_advanced(ctl->alerts.by_recipient, + string2str(rcl->config.recipient), + (ssize_t)string_strlen(rcl->config.recipient), + NULL, + sizeof(struct alert_by_x_entry), + rcl); + + char *module = NULL; + rrdlabels_get_value_strdup_or_null(st->rrdlabels, &module, "_collect_module"); + if(!module || !*module) module = "[unset]"; + + dictionary_set_advanced(ctl->alerts.by_module, + module, + -1, + NULL, + sizeof(struct alert_by_x_entry), + rcl); + if (ctl->options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES | CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) { char key[20 + 1]; snprintfz(key, sizeof(key) - 1, "%p", rcl); @@ -726,6 +817,15 @@ static void agent_capabilities_to_json(BUFFER *wb, RRDHOST *host, const char *ke freez(capas); } +static inline void host_dyncfg_to_json_v2(BUFFER *wb, const char *key, RRDHOST_STATUS *s) { + buffer_json_member_add_object(wb, key); + { + buffer_json_member_add_string(wb, "status", rrdhost_dyncfg_status_to_string(s->dyncfg.status)); + } + buffer_json_object_close(wb); // health + +} + static inline void rrdhost_health_to_json_v2(BUFFER *wb, const char *key, RRDHOST_STATUS *s) { buffer_json_member_add_object(wb, key); { @@ -838,6 +938,8 @@ static void rrdcontext_to_json_v2_rrdhost(BUFFER *wb, RRDHOST *host, struct rrdc host_functions2json(host, wb); // functions agent_capabilities_to_json(wb, host, "capabilities"); + + host_dyncfg_to_json_v2(wb, "dyncfg", &s); } buffer_json_object_close(wb); // this instance buffer_json_array_close(wb); // instances @@ -913,8 +1015,11 @@ static ssize_t rrdcontext_to_json_v2_add_host(void *data, RRDHOST *host, bool qu .size = 1, .node_ids = &ctl->nodes.ni, .help = NULL, + .tags = NULL, + .access = HTTP_ACCESS_ALL, + .priority = RRDFUNCTIONS_PRIORITY_DEFAULT, }; - host_functions_to_dict(host, ctl->functions.dict, &t, sizeof(t), &t.help); + host_functions_to_dict(host, ctl->functions.dict, &t, sizeof(t), &t.help, &t.tags, &t.access, &t.priority); } if(ctl->mode & CONTEXTS_V2_NODES) { @@ -1013,10 +1118,10 @@ void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now STORAGE_ENGINE *eng = localhost->db[tier].eng; if (!eng) continue; - uint64_t max = storage_engine_disk_space_max(eng->backend, localhost->db[tier].instance); - uint64_t used = storage_engine_disk_space_used(eng->backend, localhost->db[tier].instance); - time_t first_time_s = storage_engine_global_first_time_s(eng->backend, localhost->db[tier].instance); - size_t currently_collected_metrics = storage_engine_collected_metrics(eng->backend, localhost->db[tier].instance); + uint64_t max = storage_engine_disk_space_max(eng->seb, localhost->db[tier].si); + uint64_t used = storage_engine_disk_space_used(eng->seb, localhost->db[tier].si); + time_t first_time_s = storage_engine_global_first_time_s(eng->seb, localhost->db[tier].si); + size_t currently_collected_metrics = storage_engine_collected_metrics(eng->seb, localhost->db[tier].si); NETDATA_DOUBLE percent; if (used && max) @@ -1026,6 +1131,8 @@ void buffer_json_agents_v2(BUFFER *wb, struct query_timings *timings, time_t now buffer_json_add_array_item_object(wb); buffer_json_member_add_uint64(wb, "tier", tier); + buffer_json_member_add_uint64(wb, "metrics", storage_engine_metrics(eng->seb, localhost->db[tier].si)); + buffer_json_member_add_uint64(wb, "samples", storage_engine_samples(eng->seb, localhost->db[tier].si)); if(used || max) { buffer_json_member_add_uint64(wb, "disk_used", used); @@ -1212,14 +1319,9 @@ static void contexts_v2_alert_config_to_json_from_sql_alert_config_data(struct s buffer_json_member_add_string(wb, "type", is_template ? "template" : "alarm"); buffer_json_member_add_string(wb, "on", is_template ? t->selectors.on_template : t->selectors.on_key); - buffer_json_member_add_string(wb, "os", t->selectors.os); - buffer_json_member_add_string(wb, "hosts", t->selectors.hosts); buffer_json_member_add_string(wb, "families", t->selectors.families); - buffer_json_member_add_string(wb, "plugin", t->selectors.plugin); - buffer_json_member_add_string(wb, "module", t->selectors.module); buffer_json_member_add_string(wb, "host_labels", t->selectors.host_labels); buffer_json_member_add_string(wb, "chart_labels", t->selectors.chart_labels); - buffer_json_member_add_string(wb, "charts", t->selectors.charts); } buffer_json_object_close(wb); // selectors @@ -1236,9 +1338,13 @@ static void contexts_v2_alert_config_to_json_from_sql_alert_config_data(struct s buffer_json_member_add_time_t(wb, "after", t->value.db.after); buffer_json_member_add_time_t(wb, "before", t->value.db.before); + buffer_json_member_add_string(wb, "time_group_condition", alerts_group_conditions_id2txt(t->value.db.time_group_condition)); + buffer_json_member_add_double(wb, "time_group_value", t->value.db.time_group_value); + buffer_json_member_add_string(wb, "dims_group", alerts_dims_grouping_id2group(t->value.db.dims_group)); + buffer_json_member_add_string(wb, "data_source", alerts_data_source_id2source(t->value.db.data_source)); buffer_json_member_add_string(wb, "method", t->value.db.method); buffer_json_member_add_string(wb, "dimensions", t->value.db.dimensions); - web_client_api_request_v1_data_options_to_buffer_json_array(wb, "options",(RRDR_OPTIONS) t->value.db.options); + rrdr_options_to_buffer_json_array(wb, "options", (RRDR_OPTIONS)t->value.db.options); } buffer_json_object_close(wb); // db } @@ -1378,6 +1484,41 @@ static int contexts_v2_alert_instance_to_json_callback(const DICTIONARY_ITEM *it return 1; } +static void contexts_v2_alerts_by_x_update_prototypes(void *data, STRING *type, STRING *component, STRING *classification, STRING *recipient) { + struct rrdcontext_to_json_v2_data *ctl = data; + + dictionary_set_advanced(ctl->alerts.by_type, string2str(type), (ssize_t)string_strlen(type), NULL, sizeof(struct alert_by_x_entry), NULL); + dictionary_set_advanced(ctl->alerts.by_component, string2str(component), (ssize_t)string_strlen(component), NULL, sizeof(struct alert_by_x_entry), NULL); + dictionary_set_advanced(ctl->alerts.by_classification, string2str(classification), (ssize_t)string_strlen(classification), NULL, sizeof(struct alert_by_x_entry), NULL); + dictionary_set_advanced(ctl->alerts.by_recipient, string2str(recipient), (ssize_t)string_strlen(recipient), NULL, sizeof(struct alert_by_x_entry), NULL); +} + +static void contexts_v2_alerts_by_x_to_json(BUFFER *wb, DICTIONARY *dict, const char *key) { + buffer_json_member_add_array(wb, key); + { + struct alert_by_x_entry *b; + dfe_start_read(dict, b) { + buffer_json_add_array_item_object(wb); + { + buffer_json_member_add_string(wb, "name", b_dfe.name); + buffer_json_member_add_uint64(wb, "cr", b->running.counts.critical); + buffer_json_member_add_uint64(wb, "wr", b->running.counts.warning); + buffer_json_member_add_uint64(wb, "cl", b->running.counts.clear); + buffer_json_member_add_uint64(wb, "er", b->running.counts.error); + buffer_json_member_add_uint64(wb, "running", b->running.total); + + buffer_json_member_add_uint64(wb, "running_silent", b->running.silent); + + if(b->prototypes.available) + buffer_json_member_add_uint64(wb, "available", b->prototypes.available); + } + buffer_json_object_close(wb); + } + dfe_done(b); + } + buffer_json_array_close(wb); +} + static void contexts_v2_alert_instances_to_json(BUFFER *wb, const char *key, struct rrdcontext_to_json_v2_data *ctl, bool debug) { buffer_json_member_add_array(wb, key); { @@ -1397,7 +1538,7 @@ static void contexts_v2_alerts_to_json(BUFFER *wb, struct rrdcontext_to_json_v2_ buffer_json_member_add_array(wb, "alerts"); { struct alert_v2_entry *t; - dfe_start_read(ctl->alerts.alerts, t) + dfe_start_read(ctl->alerts.summary, t) { buffer_json_add_array_item_object(wb); { @@ -1405,10 +1546,10 @@ static void contexts_v2_alerts_to_json(BUFFER *wb, struct rrdcontext_to_json_v2_ buffer_json_member_add_string(wb, "nm", string2str(t->name)); buffer_json_member_add_string(wb, "sum", string2str(t->summary)); - buffer_json_member_add_uint64(wb, "cr", t->critical); - buffer_json_member_add_uint64(wb, "wr", t->warning); - buffer_json_member_add_uint64(wb, "cl", t->clear); - buffer_json_member_add_uint64(wb, "er", t->error); + buffer_json_member_add_uint64(wb, "cr", t->counts.critical); + buffer_json_member_add_uint64(wb, "wr", t->counts.warning); + buffer_json_member_add_uint64(wb, "cl", t->counts.clear); + buffer_json_member_add_uint64(wb, "er", t->counts.error); buffer_json_member_add_uint64(wb, "in", t->instances); buffer_json_member_add_uint64(wb, "nd", dictionary_entries(t->nodes)); @@ -1419,6 +1560,13 @@ static void contexts_v2_alerts_to_json(BUFFER *wb, struct rrdcontext_to_json_v2_ dfe_done(t); } buffer_json_array_close(wb); // alerts + + health_prototype_metadata_foreach(ctl, contexts_v2_alerts_by_x_update_prototypes); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_type, "alerts_by_type"); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_component, "alerts_by_component"); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_classification, "alerts_by_classification"); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_recipient, "alerts_by_recipient"); + contexts_v2_alerts_by_x_to_json(wb, ctl->alerts.by_module, "alerts_by_module"); } if(ctl->request->options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES|CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) { @@ -1926,12 +2074,42 @@ int rrdcontext_to_json_v2(BUFFER *wb, struct api_v2_contexts_request *req, CONTE } } - ctl.alerts.alerts = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + ctl.alerts.summary = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct alert_v2_entry)); - dictionary_register_insert_callback(ctl.alerts.alerts, alerts_v2_insert_callback, &ctl); - dictionary_register_conflict_callback(ctl.alerts.alerts, alerts_v2_conflict_callback, &ctl); - dictionary_register_delete_callback(ctl.alerts.alerts, alerts_v2_delete_callback, &ctl); + dictionary_register_insert_callback(ctl.alerts.summary, alerts_v2_insert_callback, &ctl); + dictionary_register_conflict_callback(ctl.alerts.summary, alerts_v2_conflict_callback, &ctl); + dictionary_register_delete_callback(ctl.alerts.summary, alerts_v2_delete_callback, &ctl); + + ctl.alerts.by_type = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_type, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_type, alerts_by_x_conflict_callback, NULL); + + ctl.alerts.by_component = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_component, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_component, alerts_by_x_conflict_callback, NULL); + + ctl.alerts.by_classification = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_classification, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_classification, alerts_by_x_conflict_callback, NULL); + + ctl.alerts.by_recipient = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_recipient, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_recipient, alerts_by_x_conflict_callback, NULL); + + ctl.alerts.by_module = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + NULL, sizeof(struct alert_by_x_entry)); + + dictionary_register_insert_callback(ctl.alerts.by_module, alerts_by_x_insert_callback, NULL); + dictionary_register_conflict_callback(ctl.alerts.by_module, alerts_by_x_conflict_callback, NULL); if(ctl.options & (CONTEXT_V2_OPTION_ALERTS_WITH_INSTANCES | CONTEXT_V2_OPTION_ALERTS_WITH_VALUES)) { ctl.alerts.alert_instances = dictionary_create_advanced(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, @@ -2062,12 +2240,19 @@ int rrdcontext_to_json_v2(BUFFER *wb, struct api_v2_contexts_request *req, CONTE struct function_v2_entry *t; dfe_start_read(ctl.functions.dict, t) { buffer_json_add_array_item_object(wb); - buffer_json_member_add_string(wb, "name", t_dfe.name); - buffer_json_member_add_string(wb, "help", string2str(t->help)); - buffer_json_member_add_array(wb, "ni"); - for (size_t i = 0; i < t->used; i++) - buffer_json_add_array_item_uint64(wb, t->node_ids[i]); - buffer_json_array_close(wb); + { + buffer_json_member_add_string(wb, "name", t_dfe.name); + buffer_json_member_add_string(wb, "help", string2str(t->help)); + buffer_json_member_add_array(wb, "ni"); + { + for (size_t i = 0; i < t->used; i++) + buffer_json_add_array_item_uint64(wb, t->node_ids[i]); + } + buffer_json_array_close(wb); + buffer_json_member_add_string(wb, "tags", string2str(t->tags)); + http_access2buffer_json_array(wb, "access", t->access); + buffer_json_member_add_uint64(wb, "priority", t->priority); + } buffer_json_object_close(wb); } dfe_done(t); @@ -2127,8 +2312,13 @@ cleanup: dictionary_destroy(ctl.nodes.dict); dictionary_destroy(ctl.contexts.dict); dictionary_destroy(ctl.functions.dict); - dictionary_destroy(ctl.alerts.alerts); + dictionary_destroy(ctl.alerts.summary); dictionary_destroy(ctl.alerts.alert_instances); + dictionary_destroy(ctl.alerts.by_type); + dictionary_destroy(ctl.alerts.by_component); + dictionary_destroy(ctl.alerts.by_classification); + dictionary_destroy(ctl.alerts.by_recipient); + dictionary_destroy(ctl.alerts.by_module); simple_pattern_free(ctl.nodes.scope_pattern); simple_pattern_free(ctl.nodes.pattern); simple_pattern_free(ctl.contexts.pattern); diff --git a/database/contexts/context.c b/src/database/contexts/context.c index 5613c63cf..5613c63cf 100644 --- a/database/contexts/context.c +++ b/src/database/contexts/context.c diff --git a/database/contexts/instance.c b/src/database/contexts/instance.c index 39837dbf6..117953d38 100644 --- a/database/contexts/instance.c +++ b/src/database/contexts/instance.c @@ -404,7 +404,7 @@ inline void rrdinstance_from_rrdset(RRDSET *st) { fatal("RRDCONTEXT: cannot switch rrdcontext without switching rrdinstance too"); } -#define rrdset_get_rrdinstance(st) rrdset_get_rrdinstance_with_trace(st, __FUNCTION__); +#define rrdset_get_rrdinstance(st) rrdset_get_rrdinstance_with_trace(st, __FUNCTION__) static inline RRDINSTANCE *rrdset_get_rrdinstance_with_trace(RRDSET *st, const char *function) { if(unlikely(!st->rrdcontexts.rrdinstance)) { netdata_log_error("RRDINSTANCE: RRDSET '%s' is not linked to an RRDINSTANCE at %s()", rrdset_id(st), function); diff --git a/database/contexts/internal.h b/src/database/contexts/internal.h index 293659fdd..293659fdd 100644 --- a/database/contexts/internal.h +++ b/src/database/contexts/internal.h diff --git a/database/contexts/metric.c b/src/database/contexts/metric.c index 0f0785972..0f0785972 100644 --- a/database/contexts/metric.c +++ b/src/database/contexts/metric.c diff --git a/database/contexts/query_scope.c b/src/database/contexts/query_scope.c index f3bcd0b3f..f3bcd0b3f 100644 --- a/database/contexts/query_scope.c +++ b/src/database/contexts/query_scope.c diff --git a/database/contexts/query_target.c b/src/database/contexts/query_target.c index 95abc3e65..29a9c3e59 100644 --- a/database/contexts/query_target.c +++ b/src/database/contexts/query_target.c @@ -4,7 +4,7 @@ #define QUERY_TARGET_MAX_REALLOC_INCREASE 500 #define query_target_realloc_size(size, start) \ - (size) ? ((size) < QUERY_TARGET_MAX_REALLOC_INCREASE ? (size) * 2 : (size) + QUERY_TARGET_MAX_REALLOC_INCREASE) : (start); + (size) ? ((size) < QUERY_TARGET_MAX_REALLOC_INCREASE ? (size) * 2 : (size) + QUERY_TARGET_MAX_REALLOC_INCREASE) : (start) static void query_metric_release(QUERY_TARGET *qt, QUERY_METRIC *qm); static void query_dimension_release(QUERY_DIMENSION *qd); @@ -82,7 +82,6 @@ void query_target_release(QUERY_TARGET *qt) { simple_pattern_free(qt->instances.labels_pattern); qt->instances.labels_pattern = NULL; - simple_pattern_free(qt->query.pattern); qt->query.pattern = NULL; @@ -221,10 +220,10 @@ static inline void query_metric_release(QUERY_TARGET *qt, QUERY_METRIC *qm) { // reset the tiers for(size_t tier = 0; tier < storage_tiers ;tier++) { - if(qm->tiers[tier].db_metric_handle) { + if(qm->tiers[tier].smh) { STORAGE_ENGINE *eng = query_metric_storage_engine(qt, qm, tier); - eng->api.metric_release(qm->tiers[tier].db_metric_handle); - qm->tiers[tier].db_metric_handle = NULL; + eng->api.metric_release(qm->tiers[tier].smh); + qm->tiers[tier].smh = NULL; } } } @@ -241,7 +240,7 @@ static bool query_metric_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_CON struct { STORAGE_ENGINE *eng; - STORAGE_METRIC_HANDLE *db_metric_handle; + STORAGE_METRIC_HANDLE *smh; time_t db_first_time_s; time_t db_last_time_s; time_t db_update_every_s; @@ -252,14 +251,14 @@ static bool query_metric_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_CON tier_retention[tier].eng = eng; tier_retention[tier].db_update_every_s = (time_t) (qn->rrdhost->db[tier].tier_grouping * ri->update_every_s); - if(rm->rrddim && rm->rrddim->tiers[tier].db_metric_handle) - tier_retention[tier].db_metric_handle = eng->api.metric_dup(rm->rrddim->tiers[tier].db_metric_handle); + if(rm->rrddim && rm->rrddim->tiers[tier].smh) + tier_retention[tier].smh = eng->api.metric_dup(rm->rrddim->tiers[tier].smh); else - tier_retention[tier].db_metric_handle = eng->api.metric_get(qn->rrdhost->db[tier].instance, &rm->uuid); + tier_retention[tier].smh = eng->api.metric_get(qn->rrdhost->db[tier].si, &rm->uuid); - if(tier_retention[tier].db_metric_handle) { - tier_retention[tier].db_first_time_s = storage_engine_oldest_time_s(tier_retention[tier].eng->backend, tier_retention[tier].db_metric_handle); - tier_retention[tier].db_last_time_s = storage_engine_latest_time_s(tier_retention[tier].eng->backend, tier_retention[tier].db_metric_handle); + if(tier_retention[tier].smh) { + tier_retention[tier].db_first_time_s = storage_engine_oldest_time_s(tier_retention[tier].eng->seb, tier_retention[tier].smh); + tier_retention[tier].db_last_time_s = storage_engine_latest_time_s(tier_retention[tier].eng->seb, tier_retention[tier].smh); if(!common_first_time_s) common_first_time_s = tier_retention[tier].db_first_time_s; @@ -331,7 +330,7 @@ static bool query_metric_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_CON for (size_t tier = 0; tier < storage_tiers; tier++) { internal_fatal(tier_retention[tier].eng != query_metric_storage_engine(qt, qm, tier), "QUERY TARGET: storage engine mismatch"); - qm->tiers[tier].db_metric_handle = tier_retention[tier].db_metric_handle; + qm->tiers[tier].smh = tier_retention[tier].smh; qm->tiers[tier].db_first_time_s = tier_retention[tier].db_first_time_s; qm->tiers[tier].db_last_time_s = tier_retention[tier].db_last_time_s; qm->tiers[tier].db_update_every_s = tier_retention[tier].db_update_every_s; @@ -342,8 +341,10 @@ static bool query_metric_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_CON // cleanup anything we allocated to the retention we will not use for(size_t tier = 0; tier < storage_tiers ;tier++) { - if (tier_retention[tier].db_metric_handle) - tier_retention[tier].eng->api.metric_release(tier_retention[tier].db_metric_handle); + if (tier_retention[tier].smh) { + tier_retention[tier].eng->api.metric_release(tier_retention[tier].smh); + tier_retention[tier].smh = NULL; + } } return false; @@ -627,7 +628,7 @@ static bool query_target_match_alert_pattern(RRDINSTANCE_ACQUIRED *ria, SIMPLE_P rw_spinlock_read_lock(&st->alerts.spinlock); if (st->alerts.base) { for (RRDCALC *rc = st->alerts.base; rc; rc = rc->next) { - SIMPLE_PATTERN_RESULT ret = simple_pattern_matches_string_extract(pattern, rc->name, NULL, 0); + SIMPLE_PATTERN_RESULT ret = simple_pattern_matches_string_extract(pattern, rc->config.name, NULL, 0); if(ret == SP_MATCHED_POSITIVE) { matched = true; @@ -641,7 +642,7 @@ static bool query_target_match_alert_pattern(RRDINSTANCE_ACQUIRED *ria, SIMPLE_P else buffer_flush(wb); - buffer_fast_strcat(wb, string2str(rc->name), string_strlen(rc->name)); + buffer_fast_strcat(wb, string2str(rc->config.name), string_strlen(rc->config.name)); buffer_fast_strcat(wb, ":", 1); buffer_strcat(wb, rrdcalc_status2string(rc->status)); @@ -725,13 +726,22 @@ static inline SIMPLE_PATTERN_RESULT query_instance_matches(QUERY_INSTANCE *qi, return ret; } -static inline bool query_instance_matches_labels(RRDINSTANCE *ri, SIMPLE_PATTERN *chart_label_key_sp, SIMPLE_PATTERN *labels_sp) { - if ((chart_label_key_sp && !rrdlabels_match_simple_pattern_parsed( - ri->rrdlabels, chart_label_key_sp, '\0', NULL)) || - (labels_sp && !rrdlabels_match_simple_pattern_parsed( - ri->rrdlabels, labels_sp, ':', NULL))) +static inline bool query_instance_matches_labels( + RRDINSTANCE *ri, + SIMPLE_PATTERN *chart_label_key_sp, + SIMPLE_PATTERN *labels_sp) +{ + + if (chart_label_key_sp && rrdlabels_match_simple_pattern_parsed(ri->rrdlabels, chart_label_key_sp, '\0', NULL) != SP_MATCHED_POSITIVE) return false; + if (labels_sp) { + struct pattern_array *pa = pattern_array_add_simple_pattern(NULL, labels_sp, ':'); + bool found = pattern_array_label_match(pa, ri->rrdlabels, ':', NULL); + pattern_array_free(pa); + return found; + } + return true; } @@ -752,7 +762,10 @@ static bool query_instance_add(QUERY_TARGET_LOCALS *qtl, QUERY_NODE *qn, QUERY_C qi, ri, qt->instances.pattern, qtl->match_ids, qtl->match_names, qt->request.version, qtl->host_node_id_str)); if(queryable_instance) - queryable_instance = query_instance_matches_labels(ri, qt->instances.chart_label_key_pattern, qt->instances.labels_pattern); + queryable_instance = query_instance_matches_labels( + ri, + qt->instances.chart_label_key_pattern, + qt->instances.labels_pattern); if(queryable_instance) { if(qt->instances.alerts_pattern && !query_target_match_alert_pattern(ria, qt->instances.alerts_pattern)) @@ -1216,6 +1229,5 @@ ssize_t weights_foreach_rrdmetric_in_context(RRDCONTEXT_ACQUIRED *rca, break; } dfe_done(ri); - return count; } diff --git a/database/contexts/rrdcontext.c b/src/database/contexts/rrdcontext.c index 9dee39be2..9dee39be2 100644 --- a/database/contexts/rrdcontext.c +++ b/src/database/contexts/rrdcontext.c diff --git a/database/contexts/rrdcontext.h b/src/database/contexts/rrdcontext.h index 9c497a5a5..08a5760b5 100644 --- a/database/contexts/rrdcontext.h +++ b/src/database/contexts/rrdcontext.h @@ -210,7 +210,7 @@ typedef struct query_metric { RRDR_DIMENSION_FLAGS status; struct query_metric_tier { - STORAGE_METRIC_HANDLE *db_metric_handle; + STORAGE_METRIC_HANDLE *smh; time_t db_first_time_s; // the oldest timestamp available for this tier time_t db_last_time_s; // the latest timestamp available for this tier time_t db_update_every_s; // latest update every for this tier @@ -299,6 +299,8 @@ typedef struct query_target_request { qt_interrupt_callback_t interrupt_callback; void *interrupt_callback_data; + + uuid_t *transaction; } QUERY_TARGET_REQUEST; #define GROUP_BY_MAX_LABEL_KEYS 10 @@ -459,14 +461,9 @@ struct sql_alert_config_data { const char *on_template; const char *on_key; - const char *os; - const char *hosts; const char *families; - const char *plugin; - const char *module; const char *host_labels; const char *chart_labels; - const char *charts; } selectors; const char *info; @@ -479,6 +476,10 @@ struct sql_alert_config_data { struct { const char *dimensions; const char *method; + ALERT_LOOKUP_TIME_GROUP_CONDITION time_group_condition; + NETDATA_DOUBLE time_group_value; + ALERT_LOOKUP_DIMS_GROUPING dims_group; + ALERT_LOOKUP_DATA_SOURCE data_source; uint32_t options; int32_t after; diff --git a/database/contexts/worker.c b/src/database/contexts/worker.c index 9d7c18863..2aae8363d 100644 --- a/database/contexts/worker.c +++ b/src/database/contexts/worker.c @@ -239,7 +239,7 @@ bool rrdmetric_update_retention(RRDMETRIC *rm) { STORAGE_ENGINE *eng = rrdhost->db[tier].eng; time_t first_time_t, last_time_t; - if (eng->api.metric_retention_by_uuid(rrdhost->db[tier].instance, &rm->uuid, &first_time_t, &last_time_t)) { + if (eng->api.metric_retention_by_uuid(rrdhost->db[tier].si, &rm->uuid, &first_time_t, &last_time_t)) { if (first_time_t < min_first_time_t) min_first_time_t = first_time_t; diff --git a/database/engine/README.md b/src/database/engine/README.md index 890018642..890018642 100644 --- a/database/engine/README.md +++ b/src/database/engine/README.md diff --git a/database/engine/cache.c b/src/database/engine/cache.c index eb1c35298..49a9b6b96 100644 --- a/database/engine/cache.c +++ b/src/database/engine/cache.c @@ -1325,21 +1325,8 @@ static PGC_PAGE *page_add(PGC *cache, PGC_ENTRY *entry, bool *added) { return page; } -static PGC_PAGE *page_find_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) { - __atomic_add_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED); - - size_t *stats_hit_ptr, *stats_miss_ptr; - - if(method == PGC_SEARCH_CLOSEST) { - __atomic_add_fetch(&cache->stats.searches_closest, 1, __ATOMIC_RELAXED); - stats_hit_ptr = &cache->stats.searches_closest_hits; - stats_miss_ptr = &cache->stats.searches_closest_misses; - } - else { - __atomic_add_fetch(&cache->stats.searches_exact, 1, __ATOMIC_RELAXED); - stats_hit_ptr = &cache->stats.searches_exact_hits; - stats_miss_ptr = &cache->stats.searches_exact_misses; - } +static PGC_PAGE *page_find_and_acquire_once(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method, bool *retry) { + *retry = false; PGC_PAGE *page = NULL; size_t partition = pgc_indexing_partition(cache, metric_id); @@ -1462,22 +1449,13 @@ static PGC_PAGE *page_find_and_acquire(PGC *cache, Word_t section, Word_t metric if(!page_acquire(cache, page)) { // this page is not good to use + *retry = true; page = NULL; } } cleanup: pgc_index_read_unlock(cache, partition); - - if(page) { - __atomic_add_fetch(stats_hit_ptr, 1, __ATOMIC_RELAXED); - page_has_been_accessed(cache, page); - } - else - __atomic_add_fetch(stats_miss_ptr, 1, __ATOMIC_RELAXED); - - __atomic_sub_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED); - return page; } @@ -1882,7 +1860,7 @@ void pgc_page_release(PGC *cache, PGC_PAGE *page) { page_release(cache, page, is_page_clean(page)); } -void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page) { +void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page, bool never_flush) { __atomic_add_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED); //#ifdef NETDATA_INTERNAL_CHECKS @@ -1901,10 +1879,8 @@ void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page) { __atomic_sub_fetch(&cache->stats.workers_hot2dirty, 1, __ATOMIC_RELAXED); // flush, if we have to - if((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache)) { - flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL, - false, false); - } + if(!never_flush && ((cache->config.options & PGC_OPTIONS_FLUSH_PAGES_INLINE) || flushing_critical(cache))) + flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL, false, false); } bool pgc_page_to_clean_evict_or_release(PGC *cache, PGC_PAGE *page) { @@ -1949,13 +1925,13 @@ time_t pgc_page_end_time_s(PGC_PAGE *page) { return page->end_time_s; } -time_t pgc_page_update_every_s(PGC_PAGE *page) { +uint32_t pgc_page_update_every_s(PGC_PAGE *page) { return page->update_every_s; } -time_t pgc_page_fix_update_every(PGC_PAGE *page, time_t update_every_s) { +uint32_t pgc_page_fix_update_every(PGC_PAGE *page, uint32_t update_every_s) { if(page->update_every_s == 0) - page->update_every_s = (uint32_t) update_every_s; + page->update_every_s = update_every_s; return page->update_every_s; } @@ -2050,7 +2026,46 @@ void pgc_page_hot_set_end_time_s(PGC *cache __maybe_unused, PGC_PAGE *page, time } PGC_PAGE *pgc_page_get_and_acquire(PGC *cache, Word_t section, Word_t metric_id, time_t start_time_s, PGC_SEARCH method) { - return page_find_and_acquire(cache, section, metric_id, start_time_s, method); + static const struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 }; + + PGC_PAGE *page = NULL; + + __atomic_add_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED); + + size_t *stats_hit_ptr, *stats_miss_ptr; + + if(method == PGC_SEARCH_CLOSEST) { + __atomic_add_fetch(&cache->stats.searches_closest, 1, __ATOMIC_RELAXED); + stats_hit_ptr = &cache->stats.searches_closest_hits; + stats_miss_ptr = &cache->stats.searches_closest_misses; + } + else { + __atomic_add_fetch(&cache->stats.searches_exact, 1, __ATOMIC_RELAXED); + stats_hit_ptr = &cache->stats.searches_exact_hits; + stats_miss_ptr = &cache->stats.searches_exact_misses; + } + + while(1) { + bool retry = false; + + page = page_find_and_acquire_once(cache, section, metric_id, start_time_s, method, &retry); + + if(page || !retry) + break; + + nanosleep(&ns, NULL); + } + + if(page) { + __atomic_add_fetch(stats_hit_ptr, 1, __ATOMIC_RELAXED); + page_has_been_accessed(cache, page); + } + else + __atomic_add_fetch(stats_miss_ptr, 1, __ATOMIC_RELAXED); + + __atomic_sub_fetch(&cache->stats.workers_search, 1, __ATOMIC_RELAXED); + + return page; } struct pgc_statistics pgc_get_statistics(PGC *cache) { @@ -2224,7 +2239,7 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ while ((PValue2 = JudyLFirstThenNext(mi->JudyL_pages_by_start_time, &start_time, &start_time_first))) { struct jv2_page_info *pi = *PValue2; page_transition_unlock(cache, pi->page); - pgc_page_hot_to_dirty_and_release(cache, pi->page); + pgc_page_hot_to_dirty_and_release(cache, pi->page, true); // make_acquired_page_clean_and_evict_or_page_release(cache, pi->page); aral_freez(ar_pi, pi); } @@ -2251,6 +2266,8 @@ void pgc_open_cache_to_journal_v2(PGC *cache, Word_t section, unsigned datafile_ aral_by_size_release(ar_mi); __atomic_sub_fetch(&cache->stats.workers_jv2_flush, 1, __ATOMIC_RELAXED); + + flush_pages(cache, cache->config.max_flushes_inline, PGC_SECTION_ALL, false, false); } static bool match_page_data(PGC_PAGE *page, void *data) { @@ -2396,7 +2413,7 @@ void *unittest_stress_test_collector(void *ptr) { if(i % 10 == 0) pgc_page_to_clean_evict_or_release(pgc_uts.cache, pgc_uts.metrics[i]); else - pgc_page_hot_to_dirty_and_release(pgc_uts.cache, pgc_uts.metrics[i]); + pgc_page_hot_to_dirty_and_release(pgc_uts.cache, pgc_uts.metrics[i], false); } } @@ -2721,7 +2738,7 @@ int pgc_unittest(void) { }, NULL); pgc_page_hot_set_end_time_s(cache, page2, 2001); - pgc_page_hot_to_dirty_and_release(cache, page2); + pgc_page_hot_to_dirty_and_release(cache, page2, false); PGC_PAGE *page3 = pgc_page_add_and_acquire(cache, (PGC_ENTRY){ .section = 3, @@ -2734,7 +2751,7 @@ int pgc_unittest(void) { }, NULL); pgc_page_hot_set_end_time_s(cache, page3, 2001); - pgc_page_hot_to_dirty_and_release(cache, page3); + pgc_page_hot_to_dirty_and_release(cache, page3, false); pgc_destroy(cache); diff --git a/database/engine/cache.h b/src/database/engine/cache.h index 7cd7c0636..b6f81bcc2 100644 --- a/database/engine/cache.h +++ b/src/database/engine/cache.h @@ -2,6 +2,7 @@ #ifndef DBENGINE_CACHE_H #define DBENGINE_CACHE_H +#include "datafile.h" #include "../rrd.h" // CACHE COMPILE TIME CONFIGURATION @@ -27,7 +28,7 @@ typedef struct pgc_entry { time_t end_time_s; // the end time of the page size_t size; // the size in bytes of the allocation, outside the cache void *data; // a pointer to data outside the cache - uint32_t update_every_s; // the update every of the page + uint32_t update_every_s; // the update every of the page bool hot; // true if this entry is currently being collected uint8_t *custom_data; } PGC_ENTRY; @@ -191,7 +192,7 @@ PGC_PAGE *pgc_page_dup(PGC *cache, PGC_PAGE *page); void pgc_page_release(PGC *cache, PGC_PAGE *page); // mark a hot page dirty, and release it -void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page); +void pgc_page_hot_to_dirty_and_release(PGC *cache, PGC_PAGE *page, bool never_flush); // find a page from the cache typedef enum { @@ -210,8 +211,8 @@ Word_t pgc_page_section(PGC_PAGE *page); Word_t pgc_page_metric(PGC_PAGE *page); time_t pgc_page_start_time_s(PGC_PAGE *page); time_t pgc_page_end_time_s(PGC_PAGE *page); -time_t pgc_page_update_every_s(PGC_PAGE *page); -time_t pgc_page_fix_update_every(PGC_PAGE *page, time_t update_every_s); +uint32_t pgc_page_update_every_s(PGC_PAGE *page); +uint32_t pgc_page_fix_update_every(PGC_PAGE *page, uint32_t update_every_s); time_t pgc_page_fix_end_time_s(PGC_PAGE *page, time_t end_time_s); void *pgc_page_data(PGC_PAGE *page); void *pgc_page_custom_data(PGC *cache, PGC_PAGE *page); diff --git a/database/engine/datafile.c b/src/database/engine/datafile.c index 7322039cd..1ec2dea79 100644 --- a/database/engine/datafile.c +++ b/src/database/engine/datafile.c @@ -557,7 +557,9 @@ void finalize_data_files(struct rrdengine_instance *ctx) { bool logged = false; - logged = false; + if (!ctx->datafiles.first) + return; + while(__atomic_load_n(&ctx->atomic.extents_currently_being_flushed, __ATOMIC_RELAXED)) { if(!logged) { netdata_log_info("Waiting for inflight flush to finish on tier %d...", ctx->config.tier); diff --git a/database/engine/datafile.h b/src/database/engine/datafile.h index 569f1b0a2..569f1b0a2 100644 --- a/database/engine/datafile.h +++ b/src/database/engine/datafile.h diff --git a/src/database/engine/dbengine-compression.c b/src/database/engine/dbengine-compression.c new file mode 100644 index 000000000..46ef2b075 --- /dev/null +++ b/src/database/engine/dbengine-compression.c @@ -0,0 +1,159 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdengine.h" +#include "dbengine-compression.h" + +#ifdef ENABLE_LZ4 +#include <lz4.h> +#endif + +#ifdef ENABLE_ZSTD +#include <zstd.h> +#define DBENGINE_ZSTD_DEFAULT_COMPRESSION_LEVEL 3 +#endif + +uint8_t dbengine_default_compression(void) { + +#ifdef ENABLE_LZ4 + return RRDENG_COMPRESSION_LZ4; +#endif + + return RRDENG_COMPRESSION_NONE; +} + +bool dbengine_valid_compression_algorithm(uint8_t algorithm) { + switch(algorithm) { + case RRDENG_COMPRESSION_NONE: + +#ifdef ENABLE_LZ4 + case RRDENG_COMPRESSION_LZ4: +#endif + +#ifdef ENABLE_ZSTD + case RRDENG_COMPRESSION_ZSTD: +#endif + + return true; + + default: + return false; + } +} + +size_t dbengine_max_compressed_size(size_t uncompressed_size, uint8_t algorithm) { + switch(algorithm) { +#ifdef ENABLE_LZ4 + case RRDENG_COMPRESSION_LZ4: + fatal_assert(uncompressed_size < LZ4_MAX_INPUT_SIZE); + return LZ4_compressBound((int)uncompressed_size); +#endif + +#ifdef ENABLE_ZSTD + case RRDENG_COMPRESSION_ZSTD: + return ZSTD_compressBound(uncompressed_size); +#endif + + case RRDENG_COMPRESSION_NONE: + return uncompressed_size; + + default: + fatal("DBENGINE: unknown compression algorithm %u", algorithm); + } +} + +size_t dbengine_compress(void *payload, size_t uncompressed_size, uint8_t algorithm) { + // the result should be stored in the payload + // the caller must have called dbengine_max_compressed_size() to make sure the + // payload is big enough to fit the max size needed. + + switch(algorithm) { +#ifdef ENABLE_LZ4 + case RRDENG_COMPRESSION_LZ4: { + size_t max_compressed_size = dbengine_max_compressed_size(uncompressed_size, algorithm); + struct extent_buffer *eb = extent_buffer_get(max_compressed_size); + void *compressed_buf = eb->data; + + size_t compressed_size = + LZ4_compress_default(payload, compressed_buf, (int)uncompressed_size, (int)max_compressed_size); + + if(compressed_size > 0 && compressed_size < uncompressed_size) + memcpy(payload, compressed_buf, compressed_size); + else + compressed_size = 0; + + extent_buffer_release(eb); + return compressed_size; + } +#endif + +#ifdef ENABLE_ZSTD + case RRDENG_COMPRESSION_ZSTD: { + size_t max_compressed_size = dbengine_max_compressed_size(uncompressed_size, algorithm); + struct extent_buffer *eb = extent_buffer_get(max_compressed_size); + void *compressed_buf = eb->data; + + size_t compressed_size = ZSTD_compress(compressed_buf, max_compressed_size, payload, uncompressed_size, + DBENGINE_ZSTD_DEFAULT_COMPRESSION_LEVEL); + + if (ZSTD_isError(compressed_size)) { + internal_fatal(true, "DBENGINE: ZSTD compression error %s", ZSTD_getErrorName(compressed_size)); + compressed_size = 0; + } + + if(compressed_size > 0 && compressed_size < uncompressed_size) + memcpy(payload, compressed_buf, compressed_size); + else + compressed_size = 0; + + extent_buffer_release(eb); + return compressed_size; + } +#endif + + case RRDENG_COMPRESSION_NONE: + return 0; + + default: + fatal("DBENGINE: unknown compression algorithm %u", algorithm); + } +} + +size_t dbengine_decompress(void *dst, void *src, size_t dst_size, size_t src_size, uint8_t algorithm) { + switch(algorithm) { + +#ifdef ENABLE_LZ4 + case RRDENG_COMPRESSION_LZ4: { + int rc = LZ4_decompress_safe(src, dst, (int)src_size, (int)dst_size); + if(rc < 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DBENGINE: ZSTD decompression error %d", rc); + rc = 0; + } + + return rc; + } +#endif + +#ifdef ENABLE_ZSTD + case RRDENG_COMPRESSION_ZSTD: { + size_t decompressed_size = ZSTD_decompress(dst, dst_size, src, src_size); + + if (ZSTD_isError(decompressed_size)) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DBENGINE: ZSTD decompression error %s", + ZSTD_getErrorName(decompressed_size)); + + decompressed_size = 0; + } + + return decompressed_size; + } +#endif + + case RRDENG_COMPRESSION_NONE: + internal_fatal(true, "DBENGINE: %s() should not be called for uncompressed pages", __FUNCTION__ ); + return 0; + + default: + internal_fatal(true, "DBENGINE: unknown compression algorithm %u", algorithm); + return 0; + } +} diff --git a/src/database/engine/dbengine-compression.h b/src/database/engine/dbengine-compression.h new file mode 100644 index 000000000..8dd97f5d7 --- /dev/null +++ b/src/database/engine/dbengine-compression.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DBENGINE_COMPRESSION_H +#define NETDATA_DBENGINE_COMPRESSION_H + +uint8_t dbengine_default_compression(void); + +bool dbengine_valid_compression_algorithm(uint8_t algorithm); + +size_t dbengine_max_compressed_size(size_t uncompressed_size, uint8_t algorithm); +size_t dbengine_compress(void *payload, size_t uncompressed_size, uint8_t algorithm); + +size_t dbengine_decompress(void *dst, void *src, size_t dst_size, size_t src_size, uint8_t algorithm); + +#endif //NETDATA_DBENGINE_COMPRESSION_H diff --git a/database/engine/dbengine-diagram.xml b/src/database/engine/dbengine-diagram.xml index 793e8a355..793e8a355 100644 --- a/database/engine/dbengine-diagram.xml +++ b/src/database/engine/dbengine-diagram.xml diff --git a/src/database/engine/dbengine-stresstest.c b/src/database/engine/dbengine-stresstest.c new file mode 100644 index 000000000..86d09c4ab --- /dev/null +++ b/src/database/engine/dbengine-stresstest.c @@ -0,0 +1,456 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../../daemon/common.h" + +#ifdef ENABLE_DBENGINE + +static RRDHOST *dbengine_rrdhost_find_or_create(char *name) { + /* We don't want to drop metrics when generating load, + * we prefer to block data generation itself */ + + return rrdhost_find_or_create( + name, + name, + name, + os_type, + netdata_configured_timezone, + netdata_configured_abbrev_timezone, + netdata_configured_utc_offset, + program_name, + program_version, + default_rrd_update_every, + default_rrd_history_entries, + RRD_MEMORY_MODE_DBENGINE, + health_plugin_enabled(), + default_rrdpush_enabled, + default_rrdpush_destination, + default_rrdpush_api_key, + default_rrdpush_send_charts_matching, + default_rrdpush_enable_replication, + default_rrdpush_seconds_to_replicate, + default_rrdpush_replication_step, + NULL, + 0 + ); +} + +static inline void rrddim_set_by_pointer_fake_time(RRDDIM *rd, collected_number value, time_t now) { + rd->collector.last_collected_time.tv_sec = now; + rd->collector.last_collected_time.tv_usec = 0; + rd->collector.collected_value = value; + rrddim_set_updated(rd); + + rd->collector.counter++; + + collected_number v = (value >= 0) ? value : -value; + if(unlikely(v > rd->collector.collected_value_max)) rd->collector.collected_value_max = v; +} + +struct dbengine_chart_thread { + uv_thread_t thread; + RRDHOST *host; + char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */ + unsigned dset_charts; /* number of charts */ + unsigned dset_dims; /* dimensions per chart */ + unsigned chart_i; /* current chart offset */ + time_t time_present; /* current virtual time of the benchmark */ + volatile time_t time_max; /* latest timestamp of stored values */ + unsigned history_seconds; /* how far back in the past to go */ + + volatile long done; /* initialize to 0, set to 1 to stop thread */ + struct completion charts_initialized; + unsigned long errors, stored_metrics_nr; /* statistics */ + + RRDSET *st; + RRDDIM *rd[]; /* dset_dims elements */ +}; + +collected_number generate_dbengine_chart_value(int chart_i, int dim_i, time_t time_current) +{ + collected_number value; + + value = ((collected_number)time_current) * (chart_i + 1); + value += ((collected_number)time_current) * (dim_i + 1); + value %= 1024LLU; + + return value; +} + +static void generate_dbengine_chart(void *arg) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + struct dbengine_chart_thread *thread_info = (struct dbengine_chart_thread *)arg; + RRDHOST *host = thread_info->host; + char *chartname = thread_info->chartname; + const unsigned DSET_DIMS = thread_info->dset_dims; + unsigned history_seconds = thread_info->history_seconds; + time_t time_present = thread_info->time_present; + + unsigned j, update_every = 1; + RRDSET *st; + RRDDIM *rd[DSET_DIMS]; + char name[RRD_ID_LENGTH_MAX + 1]; + time_t time_current; + + // create the chart + snprintfz(name, RRD_ID_LENGTH_MAX, "example_local%u", thread_info->chart_i + 1); + thread_info->st = st = rrdset_create(host, name, chartname, chartname, "example", NULL, chartname, chartname, + chartname, NULL, 1, update_every, RRDSET_TYPE_LINE); + for (j = 0 ; j < DSET_DIMS ; ++j) { + snprintfz(name, RRD_ID_LENGTH_MAX, "%s%u", chartname, j + 1); + + thread_info->rd[j] = rd[j] = rrddim_add(st, name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + completion_mark_complete(&thread_info->charts_initialized); + + // feed it with the test data + time_current = time_present - history_seconds; + for (j = 0 ; j < DSET_DIMS ; ++j) { + rd[j]->collector.last_collected_time.tv_sec = + st->last_collected_time.tv_sec = st->last_updated.tv_sec = time_current - update_every; + rd[j]->collector.last_collected_time.tv_usec = + st->last_collected_time.tv_usec = st->last_updated.tv_usec = 0; + } + for( ; !thread_info->done && time_current < time_present ; time_current += update_every) { + st->usec_since_last_update = USEC_PER_SEC * update_every; + + for (j = 0; j < DSET_DIMS; ++j) { + collected_number value; + + value = generate_dbengine_chart_value(thread_info->chart_i, j, time_current); + rrddim_set_by_pointer_fake_time(rd[j], value, time_current); + ++thread_info->stored_metrics_nr; + } + rrdset_done(st); + thread_info->time_max = time_current; + } + for (j = 0; j < DSET_DIMS; ++j) { + rrdeng_store_metric_finalize((rd[j])->tiers[0].sch); + } +} + +void generate_dbengine_dataset(unsigned history_seconds) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + const int DSET_CHARTS = 16; + const int DSET_DIMS = 128; + const uint64_t EXPECTED_COMPRESSION_RATIO = 20; + RRDHOST *host = NULL; + struct dbengine_chart_thread **thread_info; + int i; + time_t time_present; + + default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + default_rrdeng_page_cache_mb = 128; + // Worst case for uncompressible data + default_rrdeng_disk_quota_mb = (((uint64_t)DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * history_seconds) / + (1024 * 1024); + default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; + + nd_log_limits_unlimited(); + fprintf(stderr, "Initializing localhost with hostname 'dbengine-dataset'"); + + host = dbengine_rrdhost_find_or_create("dbengine-dataset"); + if (NULL == host) + return; + + thread_info = mallocz(sizeof(*thread_info) * DSET_CHARTS); + for (i = 0 ; i < DSET_CHARTS ; ++i) { + thread_info[i] = mallocz(sizeof(*thread_info[i]) + sizeof(RRDDIM *) * DSET_DIMS); + } + fprintf(stderr, "\nRunning DB-engine workload generator\n"); + + time_present = now_realtime_sec(); + for (i = 0 ; i < DSET_CHARTS ; ++i) { + thread_info[i]->host = host; + thread_info[i]->chartname = "random"; + thread_info[i]->dset_charts = DSET_CHARTS; + thread_info[i]->chart_i = i; + thread_info[i]->dset_dims = DSET_DIMS; + thread_info[i]->history_seconds = history_seconds; + thread_info[i]->time_present = time_present; + thread_info[i]->time_max = 0; + thread_info[i]->done = 0; + completion_init(&thread_info[i]->charts_initialized); + fatal_assert(0 == uv_thread_create(&thread_info[i]->thread, generate_dbengine_chart, thread_info[i])); + completion_wait_for(&thread_info[i]->charts_initialized); + completion_destroy(&thread_info[i]->charts_initialized); + } + for (i = 0 ; i < DSET_CHARTS ; ++i) { + fatal_assert(0 == uv_thread_join(&thread_info[i]->thread)); + } + + for (i = 0 ; i < DSET_CHARTS ; ++i) { + freez(thread_info[i]); + } + freez(thread_info); + rrd_wrlock(); + rrdhost_free___while_having_rrd_wrlock(localhost, true); + rrd_unlock(); +} + +struct dbengine_query_thread { + uv_thread_t thread; + RRDHOST *host; + char *chartname; /* Will be prefixed by type, e.g. "example_local1.", "example_local2." etc */ + unsigned dset_charts; /* number of charts */ + unsigned dset_dims; /* dimensions per chart */ + time_t time_present; /* current virtual time of the benchmark */ + unsigned history_seconds; /* how far back in the past to go */ + volatile long done; /* initialize to 0, set to 1 to stop thread */ + unsigned long errors, queries_nr, queried_metrics_nr; /* statistics */ + uint8_t delete_old_data; /* if non zero then data are deleted when disk space is exhausted */ + + struct dbengine_chart_thread *chart_threads[]; /* dset_charts elements */ +}; + +static void query_dbengine_chart(void *arg) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + struct dbengine_query_thread *thread_info = (struct dbengine_query_thread *)arg; + const int DSET_CHARTS = thread_info->dset_charts; + const int DSET_DIMS = thread_info->dset_dims; + time_t time_after, time_before, time_min, time_approx_min, time_max, duration; + int i, j, update_every = 1; + RRDSET *st; + RRDDIM *rd; + uint8_t same; + time_t time_now, time_retrieved, end_time; + collected_number generatedv; + NETDATA_DOUBLE value, expected; + struct storage_engine_query_handle seqh; + size_t value_errors = 0, time_errors = 0; + + do { + // pick a chart and dimension + i = random() % DSET_CHARTS; + st = thread_info->chart_threads[i]->st; + j = random() % DSET_DIMS; + rd = thread_info->chart_threads[i]->rd[j]; + + time_min = thread_info->time_present - thread_info->history_seconds + 1; + time_max = thread_info->chart_threads[i]->time_max; + + if (thread_info->delete_old_data) { + /* A time window of twice the disk space is sufficient for compression space savings of up to 50% */ + time_approx_min = time_max - (default_rrdeng_disk_quota_mb * 2 * 1024 * 1024) / + (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number)); + time_min = MAX(time_min, time_approx_min); + } + if (!time_max) { + time_before = time_after = time_min; + } else { + time_after = time_min + random() % (MAX(time_max - time_min, 1)); + duration = random() % 3600; + time_before = MIN(time_after + duration, time_max); /* up to 1 hour queries */ + } + + storage_engine_query_init(rd->tiers[0].seb, rd->tiers[0].smh, &seqh, time_after, time_before, STORAGE_PRIORITY_NORMAL); + ++thread_info->queries_nr; + for (time_now = time_after ; time_now <= time_before ; time_now += update_every) { + generatedv = generate_dbengine_chart_value(i, j, time_now); + expected = unpack_storage_number(pack_storage_number((NETDATA_DOUBLE) generatedv, SN_DEFAULT_FLAGS)); + + if (unlikely(storage_engine_query_is_finished(&seqh))) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found data gap, ### ERROR 12 ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected); + ++thread_info->errors; + } + break; + } + + STORAGE_POINT sp = storage_engine_query_next_metric(&seqh); + value = sp.sum; + time_retrieved = sp.start_time_s; + end_time = sp.end_time_s; + + if (!netdata_double_isnumber(value)) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found data gap, ### ERROR 13 ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected); + ++thread_info->errors; + } + break; + } + ++thread_info->queried_metrics_nr; + + same = (roundndd(value) == roundndd(expected)) ? 1 : 0; + if (!same) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + if(!value_errors) + fprintf(stderr, " DB-engine stresstest %s/%s: at %lu secs, expecting value " NETDATA_DOUBLE_FORMAT + ", found " NETDATA_DOUBLE_FORMAT ", ### ERROR 14 ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, expected, value); + value_errors++; + thread_info->errors++; + } + } + if (end_time != time_now) { + if (!thread_info->delete_old_data) { /* data validation only when we don't delete */ + if(!time_errors) + fprintf(stderr, + " DB-engine stresstest %s/%s: at %lu secs, found timestamp %lu ### ERROR 15 ###\n", + rrdset_name(st), rrddim_name(rd), (unsigned long) time_now, (unsigned long) time_retrieved); + time_errors++; + thread_info->errors++; + } + } + } + storage_engine_query_finalize(&seqh); + } while(!thread_info->done); + + if(value_errors) + fprintf(stderr, "%zu value errors encountered\n", value_errors); + + if(time_errors) + fprintf(stderr, "%zu time errors encountered\n", time_errors); +} + +void dbengine_stress_test(unsigned TEST_DURATION_SEC, unsigned DSET_CHARTS, unsigned QUERY_THREADS, + unsigned RAMP_UP_SECONDS, unsigned PAGE_CACHE_MB, unsigned DISK_SPACE_MB) +{ + fprintf(stderr, "%s() running...\n", __FUNCTION__ ); + const unsigned DSET_DIMS = 128; + const uint64_t EXPECTED_COMPRESSION_RATIO = 20; + const unsigned HISTORY_SECONDS = 3600 * 24 * 365 * 50; /* 50 year of history */ + RRDHOST *host = NULL; + struct dbengine_chart_thread **chart_threads; + struct dbengine_query_thread **query_threads; + unsigned i, j; + time_t time_start, test_duration; + + nd_log_limits_unlimited(); + + if (!TEST_DURATION_SEC) + TEST_DURATION_SEC = 10; + if (!DSET_CHARTS) + DSET_CHARTS = 1; + if (!QUERY_THREADS) + QUERY_THREADS = 1; + if (PAGE_CACHE_MB < RRDENG_MIN_PAGE_CACHE_SIZE_MB) + PAGE_CACHE_MB = RRDENG_MIN_PAGE_CACHE_SIZE_MB; + + default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + default_rrdeng_page_cache_mb = PAGE_CACHE_MB; + if (DISK_SPACE_MB) { + fprintf(stderr, "By setting disk space limit data are allowed to be deleted. " + "Data validation is turned off for this run.\n"); + default_rrdeng_disk_quota_mb = DISK_SPACE_MB; + } else { + // Worst case for uncompressible data + default_rrdeng_disk_quota_mb = + (((uint64_t) DSET_DIMS * DSET_CHARTS) * sizeof(storage_number) * HISTORY_SECONDS) / (1024 * 1024); + default_rrdeng_disk_quota_mb -= default_rrdeng_disk_quota_mb * EXPECTED_COMPRESSION_RATIO / 100; + } + + fprintf(stderr, "Initializing localhost with hostname 'dbengine-stress-test'\n"); + + (void)sql_init_meta_database(DB_CHECK_NONE, 1); + host = dbengine_rrdhost_find_or_create("dbengine-stress-test"); + if (NULL == host) + return; + + chart_threads = mallocz(sizeof(*chart_threads) * DSET_CHARTS); + for (i = 0 ; i < DSET_CHARTS ; ++i) { + chart_threads[i] = mallocz(sizeof(*chart_threads[i]) + sizeof(RRDDIM *) * DSET_DIMS); + } + query_threads = mallocz(sizeof(*query_threads) * QUERY_THREADS); + for (i = 0 ; i < QUERY_THREADS ; ++i) { + query_threads[i] = mallocz(sizeof(*query_threads[i]) + sizeof(struct dbengine_chart_thread *) * DSET_CHARTS); + } + fprintf(stderr, "\nRunning DB-engine stress test, %u seconds writers ramp-up time,\n" + "%u seconds of concurrent readers and writers, %u writer threads, %u reader threads,\n" + "%u MiB of page cache.\n", + RAMP_UP_SECONDS, TEST_DURATION_SEC, DSET_CHARTS, QUERY_THREADS, PAGE_CACHE_MB); + + time_start = now_realtime_sec() + HISTORY_SECONDS; /* move history to the future */ + for (i = 0 ; i < DSET_CHARTS ; ++i) { + chart_threads[i]->host = host; + chart_threads[i]->chartname = "random"; + chart_threads[i]->dset_charts = DSET_CHARTS; + chart_threads[i]->chart_i = i; + chart_threads[i]->dset_dims = DSET_DIMS; + chart_threads[i]->history_seconds = HISTORY_SECONDS; + chart_threads[i]->time_present = time_start; + chart_threads[i]->time_max = 0; + chart_threads[i]->done = 0; + chart_threads[i]->errors = chart_threads[i]->stored_metrics_nr = 0; + completion_init(&chart_threads[i]->charts_initialized); + fatal_assert(0 == uv_thread_create(&chart_threads[i]->thread, generate_dbengine_chart, chart_threads[i])); + } + /* barrier so that subsequent queries can access valid chart data */ + for (i = 0 ; i < DSET_CHARTS ; ++i) { + completion_wait_for(&chart_threads[i]->charts_initialized); + completion_destroy(&chart_threads[i]->charts_initialized); + } + sleep(RAMP_UP_SECONDS); + /* at this point data have already began being written to the database */ + for (i = 0 ; i < QUERY_THREADS ; ++i) { + query_threads[i]->host = host; + query_threads[i]->chartname = "random"; + query_threads[i]->dset_charts = DSET_CHARTS; + query_threads[i]->dset_dims = DSET_DIMS; + query_threads[i]->history_seconds = HISTORY_SECONDS; + query_threads[i]->time_present = time_start; + query_threads[i]->done = 0; + query_threads[i]->errors = query_threads[i]->queries_nr = query_threads[i]->queried_metrics_nr = 0; + for (j = 0 ; j < DSET_CHARTS ; ++j) { + query_threads[i]->chart_threads[j] = chart_threads[j]; + } + query_threads[i]->delete_old_data = DISK_SPACE_MB ? 1 : 0; + fatal_assert(0 == uv_thread_create(&query_threads[i]->thread, query_dbengine_chart, query_threads[i])); + } + sleep(TEST_DURATION_SEC); + /* stop workload */ + for (i = 0 ; i < DSET_CHARTS ; ++i) { + chart_threads[i]->done = 1; + } + for (i = 0 ; i < QUERY_THREADS ; ++i) { + query_threads[i]->done = 1; + } + for (i = 0 ; i < DSET_CHARTS ; ++i) { + assert(0 == uv_thread_join(&chart_threads[i]->thread)); + } + for (i = 0 ; i < QUERY_THREADS ; ++i) { + assert(0 == uv_thread_join(&query_threads[i]->thread)); + } + test_duration = now_realtime_sec() - (time_start - HISTORY_SECONDS); + if (!test_duration) + test_duration = 1; + fprintf(stderr, "\nDB-engine stress test finished in %lld seconds.\n", (long long)test_duration); + unsigned long stored_metrics_nr = 0; + for (i = 0 ; i < DSET_CHARTS ; ++i) { + stored_metrics_nr += chart_threads[i]->stored_metrics_nr; + } + unsigned long queried_metrics_nr = 0; + for (i = 0 ; i < QUERY_THREADS ; ++i) { + queried_metrics_nr += query_threads[i]->queried_metrics_nr; + } + fprintf(stderr, "%u metrics were stored (dataset size of %lu MiB) in %u charts by 1 writer thread per chart.\n", + DSET_CHARTS * DSET_DIMS, stored_metrics_nr * sizeof(storage_number) / (1024 * 1024), DSET_CHARTS); + fprintf(stderr, "Metrics were being generated per 1 emulated second and time was accelerated.\n"); + fprintf(stderr, "%lu metric data points were queried by %u reader threads.\n", queried_metrics_nr, QUERY_THREADS); + fprintf(stderr, "Query starting time is randomly chosen from the beginning of the time-series up to the time of\n" + "the latest data point, and ending time from 1 second up to 1 hour after the starting time.\n"); + fprintf(stderr, "Performance is %lld written data points/sec and %lld read data points/sec.\n", + (long long)(stored_metrics_nr / test_duration), (long long)(queried_metrics_nr / test_duration)); + + for (i = 0 ; i < DSET_CHARTS ; ++i) { + freez(chart_threads[i]); + } + freez(chart_threads); + for (i = 0 ; i < QUERY_THREADS ; ++i) { + freez(query_threads[i]); + } + freez(query_threads); + rrd_wrlock(); + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].si); + rrdeng_exit((struct rrdengine_instance *)host->db[0].si); + rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); + rrd_unlock(); +} + +#endif
\ No newline at end of file diff --git a/src/database/engine/dbengine-unittest.c b/src/database/engine/dbengine-unittest.c new file mode 100644 index 000000000..4c4d312c0 --- /dev/null +++ b/src/database/engine/dbengine-unittest.c @@ -0,0 +1,419 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../../daemon/common.h" + +#ifdef ENABLE_DBENGINE + +#define CHARTS 64 +#define DIMS 16 // CHARTS * DIMS dimensions +#define REGIONS 11 +#define POINTS_PER_REGION 16384 +static const int REGION_UPDATE_EVERY[REGIONS] = {1, 15, 3, 20, 2, 6, 30, 12, 5, 4, 10}; + +#define START_TIMESTAMP MAX(2 * API_RELATIVE_TIME_MAX, 200000000) + +static time_t region_start_time(time_t previous_region_end_time, time_t update_every) { + // leave a small gap between regions + // but keep them close together, so that cross-region queries will be fast + + time_t rc = previous_region_end_time + update_every; + rc += update_every - (rc % update_every); + rc += update_every; + return rc; +} + +static inline collected_number point_value_get(size_t region, size_t chart, size_t dim, size_t point) { + // calculate the value to be stored for each point in the database + + collected_number r = (collected_number)region; + collected_number c = (collected_number)chart; + collected_number d = (collected_number)dim; + collected_number p = (collected_number)point; + + return (r * CHARTS * DIMS * POINTS_PER_REGION + + c * DIMS * POINTS_PER_REGION + + d * POINTS_PER_REGION + + p) % 10000000; +} + +static inline void storage_point_check(size_t region, size_t chart, size_t dim, size_t point, time_t now, time_t update_every, STORAGE_POINT sp, size_t *value_errors, size_t *time_errors, size_t *update_every_errors) { + // check the supplied STORAGE_POINT retrieved from the database + // against the computed timestamp, update_every and expected value + + if(storage_point_is_gap(sp)) sp.min = sp.max = sp.sum = NAN; + + collected_number expected = point_value_get(region, chart, dim, point); + + if(roundndd(expected) != roundndd(sp.sum)) { + if(*value_errors < DIMS * 2) { + fprintf(stderr, " >>> DBENGINE: VALUE DOES NOT MATCH: " + "region %zu, chart %zu, dimension %zu, point %zu, time %ld: " + "expected %lld, found %f\n", + region, chart, dim, point, now, expected, sp.sum); + } + + (*value_errors)++; + } + + if(sp.start_time_s > now || sp.end_time_s < now) { + if(*time_errors < DIMS * 2) { + fprintf(stderr, " >>> DBENGINE: TIMESTAMP DOES NOT MATCH: " + "region %zu, chart %zu, dimension %zu, point %zu, timestamp %ld: " + "expected %ld, found %ld - %ld\n", + region, chart, dim, point, now, now, sp.start_time_s, sp.end_time_s); + } + + (*time_errors)++; + } + + if(update_every != sp.end_time_s - sp.start_time_s) { + if(*update_every_errors < DIMS * 2) { + fprintf(stderr, " >>> DBENGINE: UPDATE EVERY DOES NOT MATCH: " + "region %zu, chart %zu, dimension %zu, point %zu, timestamp %ld: " + "expected %ld, found %ld\n", + region, chart, dim, point, now, update_every, sp.end_time_s - sp.start_time_s); + } + + (*update_every_errors)++; + } +} + +static inline void rrddim_set_by_pointer_fake_time(RRDDIM *rd, collected_number value, time_t now) { + rd->collector.last_collected_time.tv_sec = now; + rd->collector.last_collected_time.tv_usec = 0; + rd->collector.collected_value = value; + rrddim_set_updated(rd); + + rd->collector.counter++; + + collected_number v = (value >= 0) ? value : -value; + if(unlikely(v > rd->collector.collected_value_max)) rd->collector.collected_value_max = v; +} + +static RRDHOST *dbengine_rrdhost_find_or_create(char *name) { + /* We don't want to drop metrics when generating load, + * we prefer to block data generation itself */ + + return rrdhost_find_or_create( + name, + name, + name, + os_type, + netdata_configured_timezone, + netdata_configured_abbrev_timezone, + netdata_configured_utc_offset, + program_name, + program_version, + default_rrd_update_every, + default_rrd_history_entries, + RRD_MEMORY_MODE_DBENGINE, + health_plugin_enabled(), + default_rrdpush_enabled, + default_rrdpush_destination, + default_rrdpush_api_key, + default_rrdpush_send_charts_matching, + default_rrdpush_enable_replication, + default_rrdpush_seconds_to_replicate, + default_rrdpush_replication_step, + NULL, + 0 + ); +} + +static void test_dbengine_create_charts(RRDHOST *host, RRDSET *st[CHARTS], RRDDIM *rd[CHARTS][DIMS], + int update_every) { + fprintf(stderr, "DBENGINE Creating Test Charts...\n"); + + int i, j; + char name[101]; + + for (i = 0 ; i < CHARTS ; ++i) { + snprintfz(name, sizeof(name) - 1, "dbengine-chart-%d", i); + + // create the chart + st[i] = rrdset_create(host, "netdata", name, name, "netdata", NULL, "Unit Testing", "a value", "unittest", + NULL, 1, update_every, RRDSET_TYPE_LINE); + rrdset_flag_set(st[i], RRDSET_FLAG_DEBUG); + rrdset_flag_set(st[i], RRDSET_FLAG_STORE_FIRST); + for (j = 0 ; j < DIMS ; ++j) { + snprintfz(name, sizeof(name) - 1, "dim-%d", j); + + rd[i][j] = rrddim_add(st[i], name, NULL, 1, 1, RRD_ALGORITHM_ABSOLUTE); + } + } + + // Initialize DB with the very first entries + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0 ; j < DIMS ; ++j) { + rd[i][j]->collector.last_collected_time.tv_sec = + st[i]->last_collected_time.tv_sec = st[i]->last_updated.tv_sec = START_TIMESTAMP - 1; + rd[i][j]->collector.last_collected_time.tv_usec = + st[i]->last_collected_time.tv_usec = st[i]->last_updated.tv_usec = 0; + } + } + for (i = 0 ; i < CHARTS ; ++i) { + st[i]->usec_since_last_update = USEC_PER_SEC; + + for (j = 0; j < DIMS; ++j) { + rrddim_set_by_pointer_fake_time(rd[i][j], 69, START_TIMESTAMP); // set first value to 69 + } + + struct timeval now; + now_realtime_timeval(&now); + rrdset_timed_done(st[i], now, false); + } + // Flush pages for subsequent real values + for (i = 0 ; i < CHARTS ; ++i) { + for (j = 0; j < DIMS; ++j) { + rrdeng_store_metric_flush_current_page((rd[i][j])->tiers[0].sch); + } + } +} + +static time_t test_dbengine_create_metrics( + RRDSET *st[CHARTS], + RRDDIM *rd[CHARTS][DIMS], + size_t current_region, + time_t time_start) { + + time_t update_every = REGION_UPDATE_EVERY[current_region]; + fprintf(stderr, "DBENGINE Single Region Write to " + "region %zu, from %ld to %ld, with update every %ld...\n", + current_region, time_start, time_start + POINTS_PER_REGION * update_every, update_every); + + // for the database to save the metrics at the right time, we need to set + // the last data collection time to be just before the first data collection. + time_t time_now = time_start; + for (size_t c = 0 ; c < CHARTS ; ++c) { + for (size_t d = 0 ; d < DIMS ; ++d) { + storage_engine_store_change_collection_frequency(rd[c][d]->tiers[0].sch, (int)update_every); + + // setting these timestamps, to the data collection time, prevents interpolation + // during data collection, so that our value will be written as-is to the + // database. + + rd[c][d]->collector.last_collected_time.tv_sec = + st[c]->last_collected_time.tv_sec = st[c]->last_updated.tv_sec = time_now; + + rd[c][d]->collector.last_collected_time.tv_usec = + st[c]->last_collected_time.tv_usec = st[c]->last_updated.tv_usec = 0; + } + } + + // set the samples to the database + for (size_t p = 0; p < POINTS_PER_REGION ; ++p) { + for (size_t c = 0 ; c < CHARTS ; ++c) { + st[c]->usec_since_last_update = USEC_PER_SEC * update_every; + + for (size_t d = 0; d < DIMS; ++d) + rrddim_set_by_pointer_fake_time(rd[c][d], point_value_get(current_region, c, d, p), time_now); + + rrdset_timed_done(st[c], (struct timeval){ .tv_sec = time_now, .tv_usec = 0 }, false); + } + + time_now += update_every; + } + + return time_now; +} + +// Checks the metric data for the given region, returns number of errors +static size_t test_dbengine_check_metrics( + RRDSET *st[CHARTS] __maybe_unused, + RRDDIM *rd[CHARTS][DIMS], + size_t current_region, + time_t time_start, + time_t time_end) { + + time_t update_every = REGION_UPDATE_EVERY[current_region]; + fprintf(stderr, "DBENGINE Single Region Read from " + "region %zu, from %ld to %ld, with update every %ld...\n", + current_region, time_start, time_end, update_every); + + // initialize all queries + struct storage_engine_query_handle handles[CHARTS * DIMS] = { 0 }; + for (size_t c = 0 ; c < CHARTS ; ++c) { + for (size_t d = 0; d < DIMS; ++d) { + storage_engine_query_init(rd[c][d]->tiers[0].seb, + rd[c][d]->tiers[0].smh, + &handles[c * DIMS + d], + time_start, + time_end, + STORAGE_PRIORITY_NORMAL); + } + } + + // check the stored samples + size_t value_errors = 0, time_errors = 0, update_every_errors = 0; + time_t time_now = time_start; + for(size_t p = 0; p < POINTS_PER_REGION ;p++) { + for (size_t c = 0 ; c < CHARTS ; ++c) { + for (size_t d = 0; d < DIMS; ++d) { + STORAGE_POINT sp = storage_engine_query_next_metric(&handles[c * DIMS + d]); + storage_point_check(current_region, c, d, p, time_now, update_every, sp, + &value_errors, &time_errors, &update_every_errors); + } + } + + time_now += update_every; + } + + // finalize the queries + for (size_t c = 0 ; c < CHARTS ; ++c) { + for (size_t d = 0; d < DIMS; ++d) { + storage_engine_query_finalize(&handles[c * DIMS + d]); + } + } + + if(value_errors) + fprintf(stderr, "%zu value errors encountered (out of %d checks)\n", value_errors, POINTS_PER_REGION * CHARTS * DIMS); + + if(time_errors) + fprintf(stderr, "%zu time errors encountered (out of %d checks)\n", time_errors, POINTS_PER_REGION * CHARTS * DIMS); + + if(update_every_errors) + fprintf(stderr, "%zu update every errors encountered (out of %d checks)\n", update_every_errors, POINTS_PER_REGION * CHARTS * DIMS); + + return value_errors + time_errors + update_every_errors; +} + +static size_t dbengine_test_rrdr_single_region( + RRDSET *st[CHARTS], + RRDDIM *rd[CHARTS][DIMS], + size_t current_region, + time_t time_start, + time_t time_end) { + + time_t update_every = REGION_UPDATE_EVERY[current_region]; + fprintf(stderr, "RRDR Single Region Test on " + "region %zu, start time %lld, end time %lld, update every %ld, on %d dimensions...\n", + current_region, (long long)time_start, (long long)time_end, update_every, CHARTS * DIMS); + + size_t errors = 0, value_errors = 0, time_errors = 0, update_every_errors = 0; + long points = (time_end - time_start) / update_every; + for(size_t c = 0; c < CHARTS ;c++) { + ONEWAYALLOC *owa = onewayalloc_create(0); + RRDR *r = rrd2rrdr_legacy(owa, st[c], points, time_start, time_end, + RRDR_GROUPING_AVERAGE, 0, RRDR_OPTION_NATURAL_POINTS, + NULL, NULL, 0, 0, + QUERY_SOURCE_UNITTEST, STORAGE_PRIORITY_NORMAL); + if (!r) { + fprintf(stderr, " >>> DBENGINE: %s: empty RRDR on region %zu\n", rrdset_name(st[c]), current_region); + onewayalloc_destroy(owa); + errors++; + continue; + } + + if(r->internal.qt->request.st != st[c]) + fatal("queried wrong chart"); + + if(rrdr_rows(r) != POINTS_PER_REGION) + fatal("query returned wrong number of points (expected %d, got %zu)", POINTS_PER_REGION, rrdr_rows(r)); + + time_t time_now = time_start; + for (size_t p = 0; p < rrdr_rows(r); p++) { + size_t d = 0; + RRDDIM *dim; + rrddim_foreach_read(dim, r->internal.qt->request.st) { + if(unlikely(d >= r->d)) + fatal("got more dimensions (%zu) than expected (%zu)", d, r->d); + + if(rd[c][d] != dim) + fatal("queried wrong dimension"); + + RRDR_VALUE_FLAGS *co = &r->o[ p * r->d ]; + NETDATA_DOUBLE *cn = &r->v[ p * r->d ]; + + STORAGE_POINT sp = STORAGE_POINT_UNSET; + sp.min = sp.max = sp.sum = (co[d] & RRDR_VALUE_EMPTY) ? NAN :cn[d]; + sp.count = 1; + sp.end_time_s = r->t[p]; + sp.start_time_s = sp.end_time_s - r->view.update_every; + + storage_point_check(current_region, c, d, p, time_now, update_every, sp, &value_errors, &time_errors, &update_every_errors); + d++; + } + rrddim_foreach_done(dim); + time_now += update_every; + } + + rrdr_free(owa, r); + onewayalloc_destroy(owa); + } + + if(value_errors) + fprintf(stderr, "%zu value errors encountered (out of %d checks)\n", value_errors, POINTS_PER_REGION * CHARTS * DIMS); + + if(time_errors) + fprintf(stderr, "%zu time errors encountered (out of %d checks)\n", time_errors, POINTS_PER_REGION * CHARTS * DIMS); + + if(update_every_errors) + fprintf(stderr, "%zu update every errors encountered (out of %d checks)\n", update_every_errors, POINTS_PER_REGION * CHARTS * DIMS); + + return errors + value_errors + time_errors + update_every_errors; +} + +int test_dbengine(void) { + // provide enough threads to dbengine + setenv("UV_THREADPOOL_SIZE", "48", 1); + + size_t errors = 0, value_errors = 0, time_errors = 0; + + nd_log_limits_unlimited(); + fprintf(stderr, "\nRunning DB-engine test\n"); + + default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; + fprintf(stderr, "Initializing localhost with hostname 'unittest-dbengine'"); + RRDHOST *host = dbengine_rrdhost_find_or_create("unittest-dbengine"); + if(!host) + fatal("Failed to initialize host"); + + RRDSET *st[CHARTS] = { 0 }; + RRDDIM *rd[CHARTS][DIMS] = { 0 }; + time_t time_start[REGIONS] = { 0 }, time_end[REGIONS] = { 0 }; + + // create the charts and dimensions we need + test_dbengine_create_charts(host, st, rd, REGION_UPDATE_EVERY[0]); + + time_t now = START_TIMESTAMP; + time_t update_every_old = REGION_UPDATE_EVERY[0]; + for(size_t current_region = 0; current_region < REGIONS ;current_region++) { + time_t update_every = REGION_UPDATE_EVERY[current_region]; + + if(update_every != update_every_old) { + for (size_t c = 0 ; c < CHARTS ; ++c) + rrdset_set_update_every_s(st[c], update_every); + } + + time_start[current_region] = region_start_time(now, update_every); + now = time_end[current_region] = test_dbengine_create_metrics(st,rd, current_region, time_start[current_region]); + + errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region], time_end[current_region]); + } + + // check everything again + for(size_t current_region = 0; current_region < REGIONS ;current_region++) + errors += test_dbengine_check_metrics(st, rd, current_region, time_start[current_region], time_end[current_region]); + + // check again in reverse order + for(size_t current_region = 0; current_region < REGIONS ;current_region++) { + size_t region = REGIONS - 1 - current_region; + errors += test_dbengine_check_metrics(st, rd, region, time_start[region], time_end[region]); + } + + // check all the regions using RRDR + // this also checks the query planner and the query engine of Netdata + for (size_t current_region = 0 ; current_region < REGIONS ; current_region++) { + errors += dbengine_test_rrdr_single_region(st, rd, current_region, time_start[current_region], time_end[current_region]); + } + + rrd_wrlock(); + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[0].si); + rrdeng_exit((struct rrdengine_instance *)host->db[0].si); + rrdeng_enq_cmd(NULL, RRDENG_OPCODE_SHUTDOWN_EVLOOP, NULL, NULL, STORAGE_PRIORITY_BEST_EFFORT, NULL, NULL); + rrd_unlock(); + + return (int)(errors + value_errors + time_errors); +} + +#endif diff --git a/database/engine/journalfile.c b/src/database/engine/journalfile.c index 9005b81ca..8099d017f 100644 --- a/database/engine/journalfile.c +++ b/src/database/engine/journalfile.c @@ -637,9 +637,12 @@ static int journalfile_check_superblock(uv_file file) fatal_assert(req.result >= 0); uv_fs_req_cleanup(&req); - if (strncmp(superblock->magic_number, RRDENG_JF_MAGIC, RRDENG_MAGIC_SZ) || - strncmp(superblock->version, RRDENG_JF_VER, RRDENG_VER_SZ)) { - netdata_log_error("DBENGINE: File has invalid superblock."); + + char jf_magic[RRDENG_MAGIC_SZ] = RRDENG_JF_MAGIC; + char jf_ver[RRDENG_VER_SZ] = RRDENG_JF_VER; + if (strncmp(superblock->magic_number, jf_magic, RRDENG_MAGIC_SZ) != 0 || + strncmp(superblock->version, jf_ver, RRDENG_VER_SZ) != 0) { + nd_log(NDLS_DAEMON, NDLP_ERR, "DBENGINE: File has invalid superblock."); ret = UV_EINVAL; } else { ret = 0; @@ -669,7 +672,7 @@ static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx, uuid_t *temp_id; uint8_t page_type = jf_metric_data->descr[i].type; - if (page_type > PAGE_TYPE_MAX) { + if (page_type > RRDENG_PAGE_TYPE_MAX) { if (!bitmap256_get_bit(&page_error_map, page_type)) { netdata_log_error("DBENGINE: unknown page type %d encountered.", page_type); bitmap256_set_bit(&page_error_map, page_type, 1); @@ -700,13 +703,19 @@ static void journalfile_restore_extent_metadata(struct rrdengine_instance *ctx, .section = (Word_t)ctx, .first_time_s = vd.start_time_s, .last_time_s = vd.end_time_s, - .latest_update_every_s = (uint32_t) vd.update_every_s, + .latest_update_every_s = vd.update_every_s, }; bool added; metric = mrg_metric_add_and_acquire(main_mrg, entry, &added); - if(added) + if(added) { + __atomic_add_fetch(&ctx->atomic.metrics, 1, __ATOMIC_RELAXED); update_metric_time = false; + } + if (vd.update_every_s) { + uint64_t samples = (vd.end_time_s - vd.start_time_s) / vd.update_every_s; + __atomic_add_fetch(&ctx->atomic.samples, samples, __ATOMIC_RELAXED); + } } Word_t metric_id = mrg_metric_id(main_mrg, metric); @@ -1005,7 +1014,7 @@ void journalfile_v2_populate_retention_to_mrg(struct rrdengine_instance *ctx, st time_t end_time_s = header_start_time_s + metric->delta_end_s; mrg_update_metric_retention_and_granularity_by_uuid( - main_mrg, (Word_t)ctx, &metric->uuid, start_time_s, end_time_s, (time_t) metric->update_every_s, now_s); + main_mrg, (Word_t)ctx, &metric->uuid, start_time_s, end_time_s, metric->update_every_s, now_s); metric++; } @@ -1042,7 +1051,7 @@ int journalfile_v2_load(struct rrdengine_instance *ctx, struct rrdengine_journal journal_v1_file_size = (uint32_t)statbuf.st_size; journalfile_v2_generate_path(datafile, path_v2, sizeof(path_v2)); - fd = open(path_v2, O_RDONLY); + fd = open(path_v2, O_RDONLY | O_CLOEXEC); if (fd < 0) { if (errno == ENOENT) return 1; @@ -1226,7 +1235,7 @@ void *journalfile_v2_write_data_page(struct journal_v2_header *j2_header, void * data_page->delta_end_s = (uint32_t) (page_info->end_time_s - (time_t) (j2_header->start_time_ut) / USEC_PER_SEC); data_page->extent_index = page_info->extent_index; - data_page->update_every_s = (uint32_t) page_info->update_every_s; + data_page->update_every_s = page_info->update_every_s; data_page->page_length = (uint16_t) (ei ? ei->page_length : page_info->page_length); data_page->type = 0; @@ -1252,7 +1261,7 @@ static void *journalfile_v2_write_descriptors(struct journal_v2_header *j2_heade page_info = *PValue; // Write one descriptor and return the next data page location data_page = journalfile_v2_write_data_page(j2_header, (void *) data_page, page_info); - update_every_s = (uint32_t) page_info->update_every_s; + update_every_s = page_info->update_every_s; if (NULL == data_page) break; } diff --git a/database/engine/journalfile.h b/src/database/engine/journalfile.h index 5cdf72b9d..3f881ee16 100644 --- a/database/engine/journalfile.h +++ b/src/database/engine/journalfile.h @@ -7,7 +7,6 @@ /* Forward declarations */ struct rrdengine_instance; -struct rrdengine_worker_config; struct rrdengine_datafile; struct rrdengine_journalfile; diff --git a/database/engine/metric.c b/src/database/engine/metric.c index 2e132612e..01eb22fbc 100644 --- a/database/engine/metric.c +++ b/src/database/engine/metric.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-3.0-or-later #include "metric.h" +#include "cache.h" +#include "libnetdata/locks/locks.h" +#include "rrddiskprotocol.h" typedef int32_t REFCOUNT; #define REFCOUNT_DELETING (-100) @@ -104,8 +107,11 @@ static inline void mrg_stats_size_judyhs_removed_uuid(MRG *mrg, size_t partition static inline size_t uuid_partition(MRG *mrg __maybe_unused, uuid_t *uuid) { uint8_t *u = (uint8_t *)uuid; - size_t *n = (size_t *)&u[UUID_SZ - sizeof(size_t)]; - return *n % mrg->partitions; + + size_t n; + memcpy(&n, &u[UUID_SZ - sizeof(size_t)], sizeof(size_t)); + + return n % mrg->partitions; } static inline time_t mrg_metric_get_first_time_s_smart(MRG *mrg __maybe_unused, METRIC *metric) { @@ -125,87 +131,174 @@ static inline time_t mrg_metric_get_first_time_s_smart(MRG *mrg __maybe_unused, return first_time_s; } -static inline REFCOUNT metric_acquire(MRG *mrg __maybe_unused, METRIC *metric) { +static void metric_log(MRG *mrg __maybe_unused, METRIC *metric, const char *msg) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)metric->section; + + char uuid[UUID_STR_LEN]; + uuid_unparse_lower(metric->uuid, uuid); + nd_log(NDLS_DAEMON, NDLP_ERR, + "METRIC: %s on %s at tier %d, refcount %d, partition %u, " + "retention [%ld - %ld (hot), %ld (clean)], update every %"PRIu32", " + "writer pid %d " + "--- PLEASE OPEN A GITHUB ISSUE TO REPORT THIS LOG LINE TO NETDATA --- ", + msg, + uuid, + ctx->config.tier, + metric->refcount, + metric->partition, + metric->first_time_s, + metric->latest_time_s_hot, + metric->latest_time_s_clean, + metric->latest_update_every_s, + (int)metric->writer + ); +} + +static inline bool acquired_metric_has_retention(MRG *mrg, METRIC *metric) { + time_t first, last; + mrg_metric_get_retention(mrg, metric, &first, &last, NULL); + return (!first || !last || first > last); +} + +static inline void acquired_for_deletion_metric_delete(MRG *mrg, METRIC *metric) { size_t partition = metric->partition; - REFCOUNT expected = __atomic_load_n(&metric->refcount, __ATOMIC_RELAXED); - REFCOUNT refcount; + + size_t mem_before_judyl, mem_after_judyl; + + mrg_index_write_lock(mrg, partition); + + Pvoid_t *sections_judy_pptr = JudyHSGet(mrg->index[partition].uuid_judy, &metric->uuid, sizeof(uuid_t)); + if(unlikely(!sections_judy_pptr || !*sections_judy_pptr)) { + MRG_STATS_DELETE_MISS(mrg, partition); + mrg_index_write_unlock(mrg, partition); + return; + } + + mem_before_judyl = JudyLMemUsed(*sections_judy_pptr); + int rc = JudyLDel(sections_judy_pptr, metric->section, PJE0); + mem_after_judyl = JudyLMemUsed(*sections_judy_pptr); + mrg_stats_size_judyl_change(mrg, mem_before_judyl, mem_after_judyl, partition); + + if(unlikely(!rc)) { + MRG_STATS_DELETE_MISS(mrg, partition); + mrg_index_write_unlock(mrg, partition); + return; + } + + if(!*sections_judy_pptr) { + rc = JudyHSDel(&mrg->index[partition].uuid_judy, &metric->uuid, sizeof(uuid_t), PJE0); + if(unlikely(!rc)) + fatal("DBENGINE METRIC: cannot delete UUID from JudyHS"); + mrg_stats_size_judyhs_removed_uuid(mrg, partition); + } + + MRG_STATS_DELETED_METRIC(mrg, partition); + + mrg_index_write_unlock(mrg, partition); + + aral_freez(mrg->index[partition].aral, metric); +} + +static inline bool metric_acquire(MRG *mrg, METRIC *metric) { + REFCOUNT expected, desired; + + expected = __atomic_load_n(&metric->refcount, __ATOMIC_RELAXED); do { - if(expected < 0) - fatal("METRIC: refcount is %d (negative) during acquire", metric->refcount); + if(unlikely(expected < 0)) + return false; + + desired = expected + 1; - refcount = expected + 1; - } while(!__atomic_compare_exchange_n(&metric->refcount, &expected, refcount, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)); + } while(!__atomic_compare_exchange_n(&metric->refcount, &expected, desired, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)); + + size_t partition = metric->partition; - if(refcount == 1) + if(desired == 1) __atomic_add_fetch(&mrg->index[partition].stats.entries_referenced, 1, __ATOMIC_RELAXED); __atomic_add_fetch(&mrg->index[partition].stats.current_references, 1, __ATOMIC_RELAXED); - return refcount; + return true; } -static inline bool metric_release_and_can_be_deleted(MRG *mrg __maybe_unused, METRIC *metric) { +static inline bool metric_release(MRG *mrg, METRIC *metric, bool delete_if_last_without_retention) { size_t partition = metric->partition; - REFCOUNT expected = __atomic_load_n(&metric->refcount, __ATOMIC_RELAXED); - REFCOUNT refcount; + REFCOUNT expected, desired; + + expected = __atomic_load_n(&metric->refcount, __ATOMIC_RELAXED); do { - if(expected <= 0) - fatal("METRIC: refcount is %d (zero or negative) during release", metric->refcount); + if(expected <= 0) { + metric_log(mrg, metric, "refcount is zero or negative during release"); + fatal("METRIC: refcount is %d (zero or negative) during release", expected); + } - refcount = expected - 1; - } while(!__atomic_compare_exchange_n(&metric->refcount, &expected, refcount, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)); + if(expected == 1 && delete_if_last_without_retention && !acquired_metric_has_retention(mrg, metric)) + desired = REFCOUNT_DELETING; + else + desired = expected - 1; + + } while(!__atomic_compare_exchange_n(&metric->refcount, &expected, desired, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED)); - if(unlikely(!refcount)) + if(desired == 0 || desired == REFCOUNT_DELETING) { __atomic_sub_fetch(&mrg->index[partition].stats.entries_referenced, 1, __ATOMIC_RELAXED); + if(desired == REFCOUNT_DELETING) + acquired_for_deletion_metric_delete(mrg, metric); + } + __atomic_sub_fetch(&mrg->index[partition].stats.current_references, 1, __ATOMIC_RELAXED); - time_t first, last, ue; - mrg_metric_get_retention(mrg, metric, &first, &last, &ue); - return (!first || !last || first > last); + return desired == REFCOUNT_DELETING; } static inline METRIC *metric_add_and_acquire(MRG *mrg, MRG_ENTRY *entry, bool *ret) { size_t partition = uuid_partition(mrg, entry->uuid); METRIC *allocation = aral_mallocz(mrg->index[partition].aral); + Pvoid_t *PValue; - mrg_index_write_lock(mrg, partition); + while(1) { + mrg_index_write_lock(mrg, partition); - size_t mem_before_judyl, mem_after_judyl; + size_t mem_before_judyl, mem_after_judyl; - Pvoid_t *sections_judy_pptr = JudyHSIns(&mrg->index[partition].uuid_judy, entry->uuid, sizeof(uuid_t), PJE0); - if(unlikely(!sections_judy_pptr || sections_judy_pptr == PJERR)) - fatal("DBENGINE METRIC: corrupted UUIDs JudyHS array"); + Pvoid_t *sections_judy_pptr = JudyHSIns(&mrg->index[partition].uuid_judy, entry->uuid, sizeof(uuid_t), PJE0); + if (unlikely(!sections_judy_pptr || sections_judy_pptr == PJERR)) + fatal("DBENGINE METRIC: corrupted UUIDs JudyHS array"); - if(unlikely(!*sections_judy_pptr)) - mrg_stats_size_judyhs_added_uuid(mrg, partition); + if (unlikely(!*sections_judy_pptr)) + mrg_stats_size_judyhs_added_uuid(mrg, partition); - mem_before_judyl = JudyLMemUsed(*sections_judy_pptr); - Pvoid_t *PValue = JudyLIns(sections_judy_pptr, entry->section, PJE0); - mem_after_judyl = JudyLMemUsed(*sections_judy_pptr); - mrg_stats_size_judyl_change(mrg, mem_before_judyl, mem_after_judyl, partition); + mem_before_judyl = JudyLMemUsed(*sections_judy_pptr); + PValue = JudyLIns(sections_judy_pptr, entry->section, PJE0); + mem_after_judyl = JudyLMemUsed(*sections_judy_pptr); + mrg_stats_size_judyl_change(mrg, mem_before_judyl, mem_after_judyl, partition); - if(unlikely(!PValue || PValue == PJERR)) - fatal("DBENGINE METRIC: corrupted section JudyL array"); + if (unlikely(!PValue || PValue == PJERR)) + fatal("DBENGINE METRIC: corrupted section JudyL array"); - if(unlikely(*PValue != NULL)) { - METRIC *metric = *PValue; + if (unlikely(*PValue != NULL)) { + METRIC *metric = *PValue; - metric_acquire(mrg, metric); + if(!metric_acquire(mrg, metric)) { + mrg_index_write_unlock(mrg, partition); + continue; + } - MRG_STATS_DUPLICATE_ADD(mrg, partition); + MRG_STATS_DUPLICATE_ADD(mrg, partition); + mrg_index_write_unlock(mrg, partition); - mrg_index_write_unlock(mrg, partition); + if (ret) + *ret = false; - if(ret) - *ret = false; + aral_freez(mrg->index[partition].aral, allocation); - aral_freez(mrg->index[partition].aral, allocation); + return metric; + } - return metric; + break; } METRIC *metric = allocation; @@ -216,9 +309,8 @@ static inline METRIC *metric_add_and_acquire(MRG *mrg, MRG_ENTRY *entry, bool *r metric->latest_time_s_hot = 0; metric->latest_update_every_s = entry->latest_update_every_s; metric->writer = 0; - metric->refcount = 0; + metric->refcount = 1; metric->partition = partition; - metric_acquire(mrg, metric); *PValue = metric; MRG_STATS_ADDED_METRIC(mrg, partition); @@ -234,77 +326,35 @@ static inline METRIC *metric_add_and_acquire(MRG *mrg, MRG_ENTRY *entry, bool *r static inline METRIC *metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t section) { size_t partition = uuid_partition(mrg, uuid); - mrg_index_read_lock(mrg, partition); + while(1) { + mrg_index_read_lock(mrg, partition); - Pvoid_t *sections_judy_pptr = JudyHSGet(mrg->index[partition].uuid_judy, uuid, sizeof(uuid_t)); - if(unlikely(!sections_judy_pptr)) { - mrg_index_read_unlock(mrg, partition); - MRG_STATS_SEARCH_MISS(mrg, partition); - return NULL; - } - - Pvoid_t *PValue = JudyLGet(*sections_judy_pptr, section, PJE0); - if(unlikely(!PValue)) { - mrg_index_read_unlock(mrg, partition); - MRG_STATS_SEARCH_MISS(mrg, partition); - return NULL; - } - - METRIC *metric = *PValue; - - metric_acquire(mrg, metric); - - mrg_index_read_unlock(mrg, partition); - - MRG_STATS_SEARCH_HIT(mrg, partition); - return metric; -} - -static inline bool acquired_metric_del(MRG *mrg, METRIC *metric) { - size_t partition = metric->partition; - - size_t mem_before_judyl, mem_after_judyl; - - mrg_index_write_lock(mrg, partition); + Pvoid_t *sections_judy_pptr = JudyHSGet(mrg->index[partition].uuid_judy, uuid, sizeof(uuid_t)); + if (unlikely(!sections_judy_pptr)) { + mrg_index_read_unlock(mrg, partition); + MRG_STATS_SEARCH_MISS(mrg, partition); + return NULL; + } - if(!metric_release_and_can_be_deleted(mrg, metric)) { - mrg->index[partition].stats.delete_having_retention_or_referenced++; - mrg_index_write_unlock(mrg, partition); - return false; - } + Pvoid_t *PValue = JudyLGet(*sections_judy_pptr, section, PJE0); + if (unlikely(!PValue)) { + mrg_index_read_unlock(mrg, partition); + MRG_STATS_SEARCH_MISS(mrg, partition); + return NULL; + } - Pvoid_t *sections_judy_pptr = JudyHSGet(mrg->index[partition].uuid_judy, &metric->uuid, sizeof(uuid_t)); - if(unlikely(!sections_judy_pptr || !*sections_judy_pptr)) { - MRG_STATS_DELETE_MISS(mrg, partition); - mrg_index_write_unlock(mrg, partition); - return false; - } + METRIC *metric = *PValue; - mem_before_judyl = JudyLMemUsed(*sections_judy_pptr); - int rc = JudyLDel(sections_judy_pptr, metric->section, PJE0); - mem_after_judyl = JudyLMemUsed(*sections_judy_pptr); - mrg_stats_size_judyl_change(mrg, mem_before_judyl, mem_after_judyl, partition); + if(metric && !metric_acquire(mrg, metric)) + metric = NULL; - if(unlikely(!rc)) { - MRG_STATS_DELETE_MISS(mrg, partition); - mrg_index_write_unlock(mrg, partition); - return false; - } + mrg_index_read_unlock(mrg, partition); - if(!*sections_judy_pptr) { - rc = JudyHSDel(&mrg->index[partition].uuid_judy, &metric->uuid, sizeof(uuid_t), PJE0); - if(unlikely(!rc)) - fatal("DBENGINE METRIC: cannot delete UUID from JudyHS"); - mrg_stats_size_judyhs_removed_uuid(mrg, partition); + if(metric) { + MRG_STATS_SEARCH_HIT(mrg, partition); + return metric; + } } - - MRG_STATS_DELETED_METRIC(mrg, partition); - - mrg_index_write_unlock(mrg, partition); - - aral_freez(mrg->index[partition].aral, metric); - - return true; } // ---------------------------------------------------------------------------- @@ -359,7 +409,7 @@ inline METRIC *mrg_metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t section } inline bool mrg_metric_release_and_delete(MRG *mrg, METRIC *metric) { - return acquired_metric_del(mrg, metric); + return metric_release(mrg, metric, true); } inline METRIC *mrg_metric_dup(MRG *mrg, METRIC *metric) { @@ -367,8 +417,8 @@ inline METRIC *mrg_metric_dup(MRG *mrg, METRIC *metric) { return metric; } -inline bool mrg_metric_release(MRG *mrg, METRIC *metric) { - return metric_release_and_can_be_deleted(mrg, metric); +inline void mrg_metric_release(MRG *mrg, METRIC *metric) { + metric_release(mrg, metric, false); } inline Word_t mrg_metric_id(MRG *mrg __maybe_unused, METRIC *metric) { @@ -394,8 +444,8 @@ inline bool mrg_metric_set_first_time_s(MRG *mrg __maybe_unused, METRIC *metric, return true; } -inline void mrg_metric_expand_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t first_time_s, time_t last_time_s, time_t update_every_s) { - internal_fatal(first_time_s < 0 || last_time_s < 0 || update_every_s < 0, +inline void mrg_metric_expand_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t first_time_s, time_t last_time_s, uint32_t update_every_s) { + internal_fatal(first_time_s < 0 || last_time_s < 0, "DBENGINE METRIC: timestamp is negative"); internal_fatal(first_time_s > max_acceptable_collected_time(), "DBENGINE METRIC: metric first time is in the future"); @@ -425,13 +475,14 @@ inline time_t mrg_metric_get_first_time_s(MRG *mrg __maybe_unused, METRIC *metri return mrg_metric_get_first_time_s_smart(mrg, metric); } -inline void mrg_metric_get_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t *first_time_s, time_t *last_time_s, time_t *update_every_s) { +inline void mrg_metric_get_retention(MRG *mrg __maybe_unused, METRIC *metric, time_t *first_time_s, time_t *last_time_s, uint32_t *update_every_s) { time_t clean = __atomic_load_n(&metric->latest_time_s_clean, __ATOMIC_RELAXED); time_t hot = __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED); *last_time_s = MAX(clean, hot); *first_time_s = mrg_metric_get_first_time_s_smart(mrg, metric); - *update_every_s = __atomic_load_n(&metric->latest_update_every_s, __ATOMIC_RELAXED); + if (update_every_s) + *update_every_s = __atomic_load_n(&metric->latest_update_every_s, __ATOMIC_RELAXED); } inline bool mrg_metric_set_clean_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric, time_t latest_time_s) { @@ -498,8 +549,8 @@ inline bool mrg_metric_zero_disk_retention(MRG *mrg __maybe_unused, METRIC *metr } } while(do_again); - time_t first, last, ue; - mrg_metric_get_retention(mrg, metric, &first, &last, &ue); + time_t first, last; + mrg_metric_get_retention(mrg, metric, &first, &last, NULL); return (first && last && first < last); } @@ -517,6 +568,11 @@ inline bool mrg_metric_set_hot_latest_time_s(MRG *mrg __maybe_unused, METRIC *me return false; } +inline time_t mrg_metric_get_latest_clean_time_s(MRG *mrg __maybe_unused, METRIC *metric) { + time_t clean = __atomic_load_n(&metric->latest_time_s_clean, __ATOMIC_RELAXED); + return clean; +} + inline time_t mrg_metric_get_latest_time_s(MRG *mrg __maybe_unused, METRIC *metric) { time_t clean = __atomic_load_n(&metric->latest_time_s_clean, __ATOMIC_RELAXED); time_t hot = __atomic_load_n(&metric->latest_time_s_hot, __ATOMIC_RELAXED); @@ -524,25 +580,21 @@ inline time_t mrg_metric_get_latest_time_s(MRG *mrg __maybe_unused, METRIC *metr return MAX(clean, hot); } -inline bool mrg_metric_set_update_every(MRG *mrg __maybe_unused, METRIC *metric, time_t update_every_s) { - internal_fatal(update_every_s < 0, "DBENGINE METRIC: timestamp is negative"); - +inline bool mrg_metric_set_update_every(MRG *mrg __maybe_unused, METRIC *metric, uint32_t update_every_s) { if(update_every_s > 0) return set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, true); return false; } -inline bool mrg_metric_set_update_every_s_if_zero(MRG *mrg __maybe_unused, METRIC *metric, time_t update_every_s) { - internal_fatal(update_every_s < 0, "DBENGINE METRIC: timestamp is negative"); - +inline bool mrg_metric_set_update_every_s_if_zero(MRG *mrg __maybe_unused, METRIC *metric, uint32_t update_every_s) { if(update_every_s > 0) return set_metric_field_with_condition(metric->latest_update_every_s, update_every_s, _current <= 0); return false; } -inline time_t mrg_metric_get_update_every_s(MRG *mrg __maybe_unused, METRIC *metric) { +inline uint32_t mrg_metric_get_update_every_s(MRG *mrg __maybe_unused, METRIC *metric) { return __atomic_load_n(&metric->latest_update_every_s, __ATOMIC_RELAXED); } @@ -589,7 +641,7 @@ inline bool mrg_metric_clear_writer(MRG *mrg, METRIC *metric) { inline void mrg_update_metric_retention_and_granularity_by_uuid( MRG *mrg, Word_t section, uuid_t *uuid, time_t first_time_s, time_t last_time_s, - time_t update_every_s, time_t now_s) + uint32_t update_every_s, time_t now_s) { if(unlikely(last_time_s > now_s)) { nd_log_limit_static_global_var(erl, 1, 0); @@ -626,14 +678,35 @@ inline void mrg_update_metric_retention_and_granularity_by_uuid( .section = section, .first_time_s = first_time_s, .last_time_s = last_time_s, - .latest_update_every_s = (uint32_t) update_every_s + .latest_update_every_s = update_every_s }; metric = mrg_metric_add_and_acquire(mrg, entry, &added); } - if (likely(!added)) + struct rrdengine_instance *ctx = (struct rrdengine_instance *) section; + if (likely(!added)) { + uint64_t old_samples = 0; + + if (update_every_s && metric->latest_update_every_s && metric->latest_time_s_clean) + old_samples = (metric->latest_time_s_clean - metric->first_time_s) / metric->latest_update_every_s; + mrg_metric_expand_retention(mrg, metric, first_time_s, last_time_s, update_every_s); + uint64_t new_samples = 0; + if (update_every_s && metric->latest_update_every_s && metric->latest_time_s_clean) + new_samples = (metric->latest_time_s_clean - metric->first_time_s) / metric->latest_update_every_s; + + __atomic_add_fetch(&ctx->atomic.samples, new_samples - old_samples, __ATOMIC_RELAXED); + } + else { + // Newly added + if (update_every_s) { + uint64_t samples = (last_time_s - first_time_s) / update_every_s; + __atomic_add_fetch(&ctx->atomic.samples, samples, __ATOMIC_RELAXED); + } + __atomic_add_fetch(&ctx->atomic.metrics, 1, __ATOMIC_RELAXED); + } + mrg_metric_release(mrg, metric); } diff --git a/database/engine/metric.h b/src/database/engine/metric.h index dbb949301..3bace9057 100644 --- a/database/engine/metric.h +++ b/src/database/engine/metric.h @@ -52,7 +52,7 @@ MRG *mrg_create(ssize_t partitions); void mrg_destroy(MRG *mrg); METRIC *mrg_metric_dup(MRG *mrg, METRIC *metric); -bool mrg_metric_release(MRG *mrg, METRIC *metric); +void mrg_metric_release(MRG *mrg, METRIC *metric); METRIC *mrg_metric_add_and_acquire(MRG *mrg, MRG_ENTRY entry, bool *ret); METRIC *mrg_metric_get_and_acquire(MRG *mrg, uuid_t *uuid, Word_t section); @@ -69,13 +69,14 @@ time_t mrg_metric_get_first_time_s(MRG *mrg, METRIC *metric); bool mrg_metric_set_clean_latest_time_s(MRG *mrg, METRIC *metric, time_t latest_time_s); bool mrg_metric_set_hot_latest_time_s(MRG *mrg, METRIC *metric, time_t latest_time_s); time_t mrg_metric_get_latest_time_s(MRG *mrg, METRIC *metric); +time_t mrg_metric_get_latest_clean_time_s(MRG *mrg, METRIC *metric); -bool mrg_metric_set_update_every(MRG *mrg, METRIC *metric, time_t update_every_s); -bool mrg_metric_set_update_every_s_if_zero(MRG *mrg, METRIC *metric, time_t update_every_s); -time_t mrg_metric_get_update_every_s(MRG *mrg, METRIC *metric); +bool mrg_metric_set_update_every(MRG *mrg, METRIC *metric, uint32_t update_every_s); +bool mrg_metric_set_update_every_s_if_zero(MRG *mrg, METRIC *metric, uint32_t update_every_s); +uint32_t mrg_metric_get_update_every_s(MRG *mrg, METRIC *metric); -void mrg_metric_expand_retention(MRG *mrg, METRIC *metric, time_t first_time_s, time_t last_time_s, time_t update_every_s); -void mrg_metric_get_retention(MRG *mrg, METRIC *metric, time_t *first_time_s, time_t *last_time_s, time_t *update_every_s); +void mrg_metric_expand_retention(MRG *mrg, METRIC *metric, time_t first_time_s, time_t last_time_s, uint32_t update_every_s); +void mrg_metric_get_retention(MRG *mrg, METRIC *metric, time_t *first_time_s, time_t *last_time_s, uint32_t *update_every_s); bool mrg_metric_zero_disk_retention(MRG *mrg __maybe_unused, METRIC *metric); bool mrg_metric_set_writer(MRG *mrg, METRIC *metric); @@ -89,6 +90,6 @@ size_t mrg_aral_overhead(void); void mrg_update_metric_retention_and_granularity_by_uuid( MRG *mrg, Word_t section, uuid_t *uuid, time_t first_time_s, time_t last_time_s, - time_t update_every_s, time_t now_s); + uint32_t update_every_s, time_t now_s); #endif // DBENGINE_METRIC_H diff --git a/database/engine/page.c b/src/database/engine/page.c index b7a393483..13fe90f7f 100644 --- a/database/engine/page.c +++ b/src/database/engine/page.c @@ -111,9 +111,9 @@ void pgd_init_arals(void) // FIXME: add stats pgd_alloc_globals.aral_gorilla_buffer[i] = aral_create( buf, - GORILLA_BUFFER_SIZE, + RRDENG_GORILLA_32BIT_BUFFER_SIZE, 64, - 512 * GORILLA_BUFFER_SIZE, + 512 * RRDENG_GORILLA_32BIT_BUFFER_SIZE, pgc_aral_statistics(), NULL, NULL, false, false); } @@ -165,8 +165,8 @@ PGD *pgd_create(uint8_t type, uint32_t slots) pg->states = PGD_STATE_CREATED_FROM_COLLECTOR; switch (type) { - case PAGE_METRICS: - case PAGE_TIER: { + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: { uint32_t size = slots * page_type_size[type]; internal_fatal(!size || slots == 1, @@ -176,11 +176,11 @@ PGD *pgd_create(uint8_t type, uint32_t slots) pg->raw.data = pgd_data_aral_alloc(size); break; } - case PAGE_GORILLA_METRICS: { + case RRDENG_PAGE_TYPE_GORILLA_32BIT: { internal_fatal(slots == 1, "DBENGINE: invalid number of slots (%u) or page type (%u)", slots, type); - pg->slots = 8 * GORILLA_BUFFER_SLOTS; + pg->slots = 8 * RRDENG_GORILLA_32BIT_BUFFER_SLOTS; // allocate new gorilla writer pg->gorilla.aral_index = gettid() % 4; @@ -188,16 +188,19 @@ PGD *pgd_create(uint8_t type, uint32_t slots) // allocate new gorilla buffer gorilla_buffer_t *gbuf = aral_mallocz(pgd_alloc_globals.aral_gorilla_buffer[pg->gorilla.aral_index]); - memset(gbuf, 0, GORILLA_BUFFER_SIZE); + memset(gbuf, 0, RRDENG_GORILLA_32BIT_BUFFER_SIZE); global_statistics_gorilla_buffer_add_hot(); - *pg->gorilla.writer = gorilla_writer_init(gbuf, GORILLA_BUFFER_SLOTS); + *pg->gorilla.writer = gorilla_writer_init(gbuf, RRDENG_GORILLA_32BIT_BUFFER_SLOTS); pg->gorilla.num_buffers = 1; break; } default: - fatal("Unknown page type: %uc", type); + netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, type); + aral_freez(pgd_alloc_globals.aral_pgd, pg); + pg = PGD_EMPTY; + break; } return pg; @@ -219,8 +222,8 @@ PGD *pgd_create_from_disk_data(uint8_t type, void *base, uint32_t size) switch (type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: pg->raw.size = size; pg->used = size / page_type_size[type]; pg->slots = pg->used; @@ -228,10 +231,11 @@ PGD *pgd_create_from_disk_data(uint8_t type, void *base, uint32_t size) pg->raw.data = pgd_data_aral_alloc(size); memcpy(pg->raw.data, base, size); break; - case PAGE_GORILLA_METRICS: + case RRDENG_PAGE_TYPE_GORILLA_32BIT: internal_fatal(size == 0, "Asked to create page with 0 data!!!"); internal_fatal(size % sizeof(uint32_t), "Unaligned gorilla buffer size"); - internal_fatal(size % GORILLA_BUFFER_SIZE, "Expected size to be a multiple of %zu-bytes", GORILLA_BUFFER_SIZE); + internal_fatal(size % RRDENG_GORILLA_32BIT_BUFFER_SIZE, "Expected size to be a multiple of %zu-bytes", + RRDENG_GORILLA_32BIT_BUFFER_SIZE); pg->raw.data = mallocz(size); pg->raw.size = size; @@ -246,7 +250,10 @@ PGD *pgd_create_from_disk_data(uint8_t type, void *base, uint32_t size) pg->slots = pg->used; break; default: - fatal("Unknown page type: %uc", type); + netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, type); + aral_freez(pgd_alloc_globals.aral_pgd, pg); + pg = PGD_EMPTY; + break; } return pg; @@ -262,11 +269,11 @@ void pgd_free(PGD *pg) switch (pg->type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: pgd_data_aral_free(pg->raw.data, pg->raw.size); break; - case PAGE_GORILLA_METRICS: { + case RRDENG_PAGE_TYPE_GORILLA_32BIT: { if (pg->states & PGD_STATE_CREATED_FROM_DISK) { internal_fatal(pg->raw.data == NULL, "Tried to free gorilla PGD loaded from disk with NULL data"); @@ -306,7 +313,8 @@ void pgd_free(PGD *pg) break; } default: - fatal("Unknown page type: %uc", pg->type); + netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type); + break; } aral_freez(pgd_alloc_globals.aral_pgd, pg); @@ -358,20 +366,21 @@ uint32_t pgd_memory_footprint(PGD *pg) size_t footprint = 0; switch (pg->type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: footprint = sizeof(PGD) + pg->raw.size; break; - case PAGE_GORILLA_METRICS: { + case RRDENG_PAGE_TYPE_GORILLA_32BIT: { if (pg->states & PGD_STATE_CREATED_FROM_DISK) footprint = sizeof(PGD) + pg->raw.size; else - footprint = sizeof(PGD) + sizeof(gorilla_writer_t) + (pg->gorilla.num_buffers * GORILLA_BUFFER_SIZE); + footprint = sizeof(PGD) + sizeof(gorilla_writer_t) + (pg->gorilla.num_buffers * RRDENG_GORILLA_32BIT_BUFFER_SIZE); break; } default: - fatal("Unknown page type: %uc", pg->type); + netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type); + break; } return footprint; @@ -385,15 +394,15 @@ uint32_t pgd_disk_footprint(PGD *pg) size_t size = 0; switch (pg->type) { - case PAGE_METRICS: - case PAGE_TIER: { + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: { uint32_t used_size = pg->used * page_type_size[pg->type]; internal_fatal(used_size > pg->raw.size, "Wrong disk footprint page size"); size = used_size; break; } - case PAGE_GORILLA_METRICS: { + case RRDENG_PAGE_TYPE_GORILLA_32BIT: { if (pg->states & PGD_STATE_CREATED_FROM_COLLECTOR || pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING || pg->states & PGD_STATE_FLUSHED_TO_DISK) @@ -404,7 +413,7 @@ uint32_t pgd_disk_footprint(PGD *pg) internal_fatal(pg->gorilla.num_buffers == 0, "Gorilla writer does not have any buffers"); - size = pg->gorilla.num_buffers * GORILLA_BUFFER_SIZE; + size = pg->gorilla.num_buffers * RRDENG_GORILLA_32BIT_BUFFER_SIZE; if (pg->states & PGD_STATE_CREATED_FROM_COLLECTOR) { global_statistics_tier0_disk_compressed_bytes(gorilla_writer_nbytes(pg->gorilla.writer)); @@ -419,7 +428,8 @@ uint32_t pgd_disk_footprint(PGD *pg) break; } default: - fatal("Unknown page type: %uc", pg->type); + netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type); + break; } internal_fatal(pg->states & PGD_STATE_CREATED_FROM_DISK, @@ -434,11 +444,11 @@ void pgd_copy_to_extent(PGD *pg, uint8_t *dst, uint32_t dst_size) pgd_disk_footprint(pg), dst_size); switch (pg->type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: memcpy(dst, pg->raw.data, dst_size); break; - case PAGE_GORILLA_METRICS: { + case RRDENG_PAGE_TYPE_GORILLA_32BIT: { if ((pg->states & PGD_STATE_SCHEDULED_FOR_FLUSHING) == 0) fatal("Copying to extent is supported only for PGDs that are scheduled for flushing."); @@ -456,7 +466,8 @@ void pgd_copy_to_extent(PGD *pg, uint8_t *dst, uint32_t dst_size) break; } default: - fatal("Unknown page type: %uc", pg->type); + netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type); + break; } pg->states = PGD_STATE_FLUSHED_TO_DISK; @@ -490,7 +501,7 @@ void pgd_append_point(PGD *pg, fatal("Data collection on page already scheduled for flushing"); switch (pg->type) { - case PAGE_METRICS: { + case RRDENG_PAGE_TYPE_ARRAY_32BIT: { storage_number *tier0_metric_data = (storage_number *)pg->raw.data; storage_number t = pack_storage_number(n, flags); tier0_metric_data[pg->used++] = t; @@ -500,7 +511,7 @@ void pgd_append_point(PGD *pg, break; } - case PAGE_TIER: { + case RRDENG_PAGE_TYPE_ARRAY_TIER1: { storage_number_tier1_t *tier12_metric_data = (storage_number_tier1_t *)pg->raw.data; storage_number_tier1_t t; t.sum_value = (float) n; @@ -515,7 +526,7 @@ void pgd_append_point(PGD *pg, break; } - case PAGE_GORILLA_METRICS: { + case RRDENG_PAGE_TYPE_GORILLA_32BIT: { pg->used++; storage_number t = pack_storage_number(n, flags); @@ -525,9 +536,9 @@ void pgd_append_point(PGD *pg, bool ok = gorilla_writer_write(pg->gorilla.writer, t); if (!ok) { gorilla_buffer_t *new_buffer = aral_mallocz(pgd_alloc_globals.aral_gorilla_buffer[pg->gorilla.aral_index]); - memset(new_buffer, 0, GORILLA_BUFFER_SIZE); + memset(new_buffer, 0, RRDENG_GORILLA_32BIT_BUFFER_SIZE); - gorilla_writer_add_buffer(pg->gorilla.writer, new_buffer, GORILLA_BUFFER_SLOTS); + gorilla_writer_add_buffer(pg->gorilla.writer, new_buffer, RRDENG_GORILLA_32BIT_BUFFER_SLOTS); pg->gorilla.num_buffers += 1; global_statistics_gorilla_buffer_add_hot(); @@ -537,7 +548,7 @@ void pgd_append_point(PGD *pg, break; } default: - fatal("DBENGINE: unknown page type id %d", pg->type); + netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type); break; } } @@ -550,11 +561,11 @@ static void pgdc_seek(PGDC *pgdc, uint32_t position) PGD *pg = pgdc->pgd; switch (pg->type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: pgdc->slots = pgdc->pgd->used; break; - case PAGE_GORILLA_METRICS: { + case RRDENG_PAGE_TYPE_GORILLA_32BIT: { if (pg->states & PGD_STATE_CREATED_FROM_DISK) { pgdc->slots = pgdc->pgd->slots; pgdc->gr = gorilla_reader_init((void *) pg->raw.data); @@ -588,7 +599,7 @@ static void pgdc_seek(PGDC *pgdc, uint32_t position) break; } default: - fatal("DBENGINE: unknown page type id %d", pg->type); + netdata_log_error("%s() - Unknown page type: %uc", __FUNCTION__, pg->type); break; } } @@ -612,7 +623,7 @@ void pgdc_reset(PGDC *pgdc, PGD *pgd, uint32_t position) pgdc_seek(pgdc, position); } -bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT *sp) +bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position __maybe_unused, STORAGE_POINT *sp) { if (!pgdc->pgd || pgdc->pgd == PGD_EMPTY || pgdc->position >= pgdc->slots) { @@ -624,7 +635,7 @@ bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT * switch (pgdc->pgd->type) { - case PAGE_METRICS: { + case RRDENG_PAGE_TYPE_ARRAY_32BIT: { storage_number *array = (storage_number *) pgdc->pgd->raw.data; storage_number n = array[pgdc->position++]; @@ -635,7 +646,7 @@ bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT * return true; } - case PAGE_TIER: { + case RRDENG_PAGE_TYPE_ARRAY_TIER1: { storage_number_tier1_t *array = (storage_number_tier1_t *) pgdc->pgd->raw.data; storage_number_tier1_t n = array[pgdc->position++]; @@ -648,7 +659,7 @@ bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT * return true; } - case PAGE_GORILLA_METRICS: { + case RRDENG_PAGE_TYPE_GORILLA_32BIT: { pgdc->position++; uint32_t n = 666666666; @@ -668,7 +679,8 @@ bool pgdc_get_next_point(PGDC *pgdc, uint32_t expected_position, STORAGE_POINT * static bool logged = false; if (!logged) { - netdata_log_error("DBENGINE: unknown page type %d found. Cannot decode it. Ignoring its metrics.", pgd_type(pgdc->pgd)); + netdata_log_error("DBENGINE: unknown page type %"PRIu32" found. Cannot decode it. Ignoring its metrics.", + pgd_type(pgdc->pgd)); logged = true; } diff --git a/database/engine/page.h b/src/database/engine/page.h index 32c87c580..32c87c580 100644 --- a/database/engine/page.h +++ b/src/database/engine/page.h diff --git a/database/engine/page_test.cc b/src/database/engine/page_test.cc index d61299bc4..d61299bc4 100644 --- a/database/engine/page_test.cc +++ b/src/database/engine/page_test.cc diff --git a/database/engine/page_test.h b/src/database/engine/page_test.h index 30837f0ab..30837f0ab 100644 --- a/database/engine/page_test.h +++ b/src/database/engine/page_test.h diff --git a/database/engine/pagecache.c b/src/database/engine/pagecache.c index dab9cdd0d..452fdc50b 100644 --- a/database/engine/pagecache.c +++ b/src/database/engine/pagecache.c @@ -222,7 +222,7 @@ static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengin Word_t metric_id = mrg_metric_id(main_mrg, metric); time_t now_s = wanted_start_time_s; - time_t dt_s = mrg_metric_get_update_every_s(main_mrg, metric); + uint32_t dt_s = mrg_metric_get_update_every_s(main_mrg, metric); if(!dt_s) dt_s = default_rrd_update_every; @@ -246,7 +246,7 @@ static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengin time_t page_start_time_s = pgc_page_start_time_s(page); time_t page_end_time_s = pgc_page_end_time_s(page); - time_t page_update_every_s = pgc_page_update_every_s(page); + uint32_t page_update_every_s = pgc_page_update_every_s(page); if(!page_update_every_s) page_update_every_s = dt_s; @@ -282,7 +282,7 @@ static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengin pd->metric_id = metric_id; pd->first_time_s = page_start_time_s; pd->last_time_s = page_end_time_s; - pd->update_every_s = (uint32_t) page_update_every_s; + pd->update_every_s = page_update_every_s; pd->page = (open_cache_mode) ? NULL : page; pd->status |= tags; @@ -332,8 +332,8 @@ static size_t get_page_list_from_pgc(PGC *cache, METRIC *metric, struct rrdengin static void pgc_inject_gap(struct rrdengine_instance *ctx, METRIC *metric, time_t start_time_s, time_t end_time_s) { - time_t db_first_time_s, db_last_time_s, db_update_every_s; - mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s); + time_t db_first_time_s, db_last_time_s; + mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, NULL); if(is_page_in_time_range(start_time_s, end_time_s, db_first_time_s, db_last_time_s) != PAGE_IS_IN_RANGE) return; @@ -547,7 +547,7 @@ static size_t get_page_list_from_journal_v2(struct rrdengine_instance *ctx, METR if(prc == PAGE_IS_IN_THE_FUTURE) break; - time_t page_update_every_s = page_entry_in_journal->update_every_s; + uint32_t page_update_every_s = page_entry_in_journal->update_every_s; size_t page_length = page_entry_in_journal->page_length; if(datafile_acquire(datafile, DATAFILE_ACQUIRE_OPEN_CACHE)) { //for open cache item @@ -567,7 +567,7 @@ static size_t get_page_list_from_journal_v2(struct rrdengine_instance *ctx, METR .metric_id = metric_id, .start_time_s = page_first_time_s, .end_time_s = page_last_time_s, - .update_every_s = (uint32_t) page_update_every_s, + .update_every_s = page_update_every_s, .data = datafile, .size = 0, .custom_data = (uint8_t *) &ei, @@ -845,7 +845,7 @@ struct pgc_page *pg_cache_lookup_next( struct rrdengine_instance *ctx, PDC *pdc, time_t now_s, - time_t last_update_every_s, + uint32_t last_update_every_s, size_t *entries ) { if (unlikely(!pdc)) @@ -905,7 +905,7 @@ struct pgc_page *pg_cache_lookup_next( time_t page_start_time_s = pgc_page_start_time_s(page); time_t page_end_time_s = pgc_page_end_time_s(page); - time_t page_update_every_s = pgc_page_update_every_s(page); + uint32_t page_update_every_s = pgc_page_update_every_s(page); if(unlikely(page_start_time_s == INVALID_TIME || page_end_time_s == INVALID_TIME)) { __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_zero_time_skipped, 1, __ATOMIC_RELAXED); @@ -918,7 +918,7 @@ struct pgc_page *pg_cache_lookup_next( if (unlikely(page_update_every_s <= 0 || page_update_every_s > 86400)) { __atomic_add_fetch(&rrdeng_cache_efficiency_stats.pages_invalid_update_every_fixed, 1, __ATOMIC_RELAXED); page_update_every_s = pgc_page_fix_update_every(page, last_update_every_s); - pd->update_every_s = (uint32_t) page_update_every_s; + pd->update_every_s = page_update_every_s; } size_t entries_by_size = pgd_slots_used(pgc_page_data(page)); @@ -983,7 +983,7 @@ struct pgc_page *pg_cache_lookup_next( return page; } -void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s, time_t end_time_s, time_t update_every_s, +void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s, time_t end_time_s, uint32_t update_every_s, struct rrdengine_datafile *datafile, uint64_t extent_offset, unsigned extent_size, uint32_t page_length) { if(!datafile_acquire(datafile, DATAFILE_ACQUIRE_OPEN_CACHE)) // for open cache item @@ -1003,7 +1003,7 @@ void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s .metric_id = metric_id, .start_time_s = start_time_s, .end_time_s = end_time_s, - .update_every_s = (uint32_t) update_every_s, + .update_every_s = update_every_s, .size = 0, .data = datafile, .custom_data = (uint8_t *) &ext_io_data, diff --git a/database/engine/pagecache.h b/src/database/engine/pagecache.h index dbcbea53a..103d36484 100644 --- a/database/engine/pagecache.h +++ b/src/database/engine/pagecache.h @@ -14,8 +14,6 @@ extern struct pgc *extent_cache; struct rrdengine_instance; #define INVALID_TIME (0) -#define MAX_PAGE_CACHE_FETCH_RETRIES (3) -#define PAGE_CACHE_FETCH_WAIT_TIMEOUT (3) extern struct rrdeng_cache_efficiency_stats rrdeng_cache_efficiency_stats; @@ -54,9 +52,9 @@ struct page_details_control; void rrdeng_prep_wait(struct page_details_control *pdc); void rrdeng_prep_query(struct page_details_control *pdc, bool worker); void pg_cache_preload(struct rrdeng_query_handle *handle); -struct pgc_page *pg_cache_lookup_next(struct rrdengine_instance *ctx, struct page_details_control *pdc, time_t now_s, time_t last_update_every_s, size_t *entries); +struct pgc_page *pg_cache_lookup_next(struct rrdengine_instance *ctx, struct page_details_control *pdc, time_t now_s, uint32_t last_update_every_s, size_t *entries); void pgc_and_mrg_initialize(void); -void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s, time_t end_time_s, time_t update_every_s, struct rrdengine_datafile *datafile, uint64_t extent_offset, unsigned extent_size, uint32_t page_length); +void pgc_open_add_hot_page(Word_t section, Word_t metric_id, time_t start_time_s, time_t end_time_s, uint32_t update_every_s, struct rrdengine_datafile *datafile, uint64_t extent_offset, unsigned extent_size, uint32_t page_length); #endif /* NETDATA_PAGECACHE_H */ diff --git a/database/engine/pdc.c b/src/database/engine/pdc.c index 5fe205e64..79a424b77 100644 --- a/database/engine/pdc.c +++ b/src/database/engine/pdc.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later #define NETDATA_RRD_INTERNALS #include "pdc.h" +#include "dbengine-compression.h" struct extent_page_details_list { uv_file file; @@ -628,24 +629,25 @@ void collect_page_flags_to_buffer(BUFFER *wb, RRDENG_COLLECT_PAGE_FLAGS flags) { buffer_strcat(wb, "STEP_UNALIGNED"); } -inline VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_extent_page_descr *descr, time_t now_s, time_t overwrite_zero_update_every_s, bool have_read_error) { +inline VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_extent_page_descr *descr, time_t now_s, uint32_t overwrite_zero_update_every_s, bool have_read_error) { time_t start_time_s = (time_t) (descr->start_time_ut / USEC_PER_SEC); - time_t end_time_s; - size_t entries; + time_t end_time_s = 0; + size_t entries = 0; switch (descr->type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: end_time_s = descr->end_time_ut / USEC_PER_SEC; entries = 0; break; - case PAGE_GORILLA_METRICS: + case RRDENG_PAGE_TYPE_GORILLA_32BIT: end_time_s = start_time_s + descr->gorilla.delta_time_s; entries = descr->gorilla.entries; break; default: - fatal("Unknown page type: %uc\n", descr->type); + // Nothing to do. Validate page will notify the user. + break; } return validate_page( @@ -666,29 +668,30 @@ VALIDATED_PAGE_DESCRIPTOR validate_page( uuid_t *uuid, time_t start_time_s, time_t end_time_s, - time_t update_every_s, // can be zero, if unknown + uint32_t update_every_s, // can be zero, if unknown size_t page_length, uint8_t page_type, size_t entries, // can be zero, if unknown time_t now_s, // can be zero, to disable future timestamp check - time_t overwrite_zero_update_every_s, // can be zero, if unknown + uint32_t overwrite_zero_update_every_s, // can be zero, if unknown bool have_read_error, const char *msg, - RRDENG_COLLECT_PAGE_FLAGS flags) { - + RRDENG_COLLECT_PAGE_FLAGS flags) +{ VALIDATED_PAGE_DESCRIPTOR vd = { .start_time_s = start_time_s, .end_time_s = end_time_s, .update_every_s = update_every_s, .page_length = page_length, + .point_size = page_type_size[page_type], .type = page_type, .is_valid = true, }; - vd.point_size = page_type_size[vd.type]; + bool known_page_type = true; switch (page_type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: // always calculate entries by size vd.entries = page_entries_by_size(vd.page_length, vd.point_size); @@ -696,13 +699,13 @@ VALIDATED_PAGE_DESCRIPTOR validate_page( if(!entries) entries = vd.entries; break; - case PAGE_GORILLA_METRICS: + case RRDENG_PAGE_TYPE_GORILLA_32BIT: internal_fatal(entries == 0, "0 number of entries found on gorilla page"); vd.entries = entries; break; default: - // TODO: should set vd.is_valid false instead? - fatal("Unknown page type: %uc", page_type); + known_page_type = false; + break; } // allow to be called without update every (when loading pages from disk) @@ -723,16 +726,16 @@ VALIDATED_PAGE_DESCRIPTOR validate_page( // If gorilla can not compress the data we might end up needing slightly more // than 4KiB. However, gorilla pages extend the page length by increments of // 512 bytes. - max_page_length += ((page_type == PAGE_GORILLA_METRICS) * GORILLA_BUFFER_SIZE); + max_page_length += ((page_type == RRDENG_PAGE_TYPE_GORILLA_32BIT) * RRDENG_GORILLA_32BIT_BUFFER_SIZE); - if( have_read_error || + if (!known_page_type || + have_read_error || vd.page_length == 0 || vd.page_length > max_page_length || vd.start_time_s > vd.end_time_s || (now_s && vd.end_time_s > now_s) || vd.start_time_s <= 0 || vd.end_time_s <= 0 || - vd.update_every_s < 0 || (vd.start_time_s == vd.end_time_s && vd.entries > 1) || (vd.update_every_s == 0 && vd.entries > 1)) { @@ -791,13 +794,13 @@ VALIDATED_PAGE_DESCRIPTOR validate_page( nd_log_limit(&erl, NDLS_DAEMON, NDLP_ERR, #endif "DBENGINE: metric '%s' %s invalid page of type %u " - "from %ld to %ld (now %ld), update every %ld, page length %zu, entries %zu (flags: %s)", + "from %ld to %ld (now %ld), update every %u, page length %zu, entries %zu (flags: %s)", uuid_str, msg, vd.type, vd.start_time_s, vd.end_time_s, now_s, vd.update_every_s, vd.page_length, vd.entries, wb?buffer_tostring(wb):"" ); } else { - const char *err_valid = (vd.is_valid) ? "" : "found invalid, "; + const char *err_valid = ""; const char *err_start = (vd.start_time_s == start_time_s) ? "" : "start time updated, "; const char *err_end = (vd.end_time_s == end_time_s) ? "" : "end time updated, "; const char *err_update = (vd.update_every_s == update_every_s) ? "" : "update every updated, "; @@ -811,9 +814,9 @@ VALIDATED_PAGE_DESCRIPTOR validate_page( nd_log_limit(&erl, NDLS_DAEMON, NDLP_ERR, #endif "DBENGINE: metric '%s' %s page of type %u " - "from %ld to %ld (now %ld), update every %ld, page length %zu, entries %zu (flags: %s), " + "from %ld to %ld (now %ld), update every %u, page length %zu, entries %zu (flags: %s), " "found inconsistent - the right is " - "from %ld to %ld, update every %ld, page length %zu, entries %zu: " + "from %ld to %ld, update every %u, page length %zu, entries %zu: " "%s%s%s%s%s%s%s", uuid_str, msg, vd.type, start_time_s, end_time_s, now_s, update_every_s, page_length, entries, wb?buffer_tostring(wb):"", @@ -871,11 +874,11 @@ static void epdl_extent_loading_error_log(struct rrdengine_instance *ctx, EPDL * if (descr) { start_time_s = (time_t)(descr->start_time_ut / USEC_PER_SEC); switch (descr->type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: end_time_s = (time_t)(descr->end_time_ut / USEC_PER_SEC); break; - case PAGE_GORILLA_METRICS: + case RRDENG_PAGE_TYPE_GORILLA_32BIT: end_time_s = (time_t) start_time_s + (descr->gorilla.delta_time_s); break; } @@ -938,7 +941,6 @@ static bool epdl_populate_pages_from_extent_data( PDC_PAGE_STATUS tags, bool cached_extent) { - int ret; unsigned i, count; void *uncompressed_buf = NULL; uint32_t payload_length, payload_offset, trailer_offset, uncompressed_payload_length = 0; @@ -973,18 +975,17 @@ static bool epdl_populate_pages_from_extent_data( if( !can_use_data || count < 1 || count > MAX_PAGES_PER_EXTENT || - (header->compression_algorithm != RRD_NO_COMPRESSION && header->compression_algorithm != RRD_LZ4) || + !dbengine_valid_compression_algorithm(header->compression_algorithm) || (payload_length != trailer_offset - payload_offset) || (data_length != payload_offset + payload_length + sizeof(*trailer)) - ) { + ) { epdl_extent_loading_error_log(ctx, epdl, NULL, "header is INVALID"); return false; } crc = crc32(0L, Z_NULL, 0); crc = crc32(crc, data, epdl->extent_size - sizeof(*trailer)); - ret = crc32cmp(trailer->checksum, crc); - if (unlikely(ret)) { + if (unlikely(crc32cmp(trailer->checksum, crc))) { ctx_io_error(ctx); have_read_error = true; epdl_extent_loading_error_log(ctx, epdl, NULL, "CRC32 checksum FAILED"); @@ -993,14 +994,15 @@ static bool epdl_populate_pages_from_extent_data( if(worker) worker_is_busy(UV_EVENT_DBENGINE_EXTENT_DECOMPRESSION); - if (likely(!have_read_error && RRD_NO_COMPRESSION != header->compression_algorithm)) { + if (likely(!have_read_error && RRDENG_COMPRESSION_NONE != header->compression_algorithm)) { // find the uncompressed extent size uncompressed_payload_length = 0; for (i = 0; i < count; ++i) { size_t page_length = header->descr[i].page_length; - if (page_length > RRDENG_BLOCK_SIZE && (header->descr[i].type != PAGE_GORILLA_METRICS || - (header->descr[i].type == PAGE_GORILLA_METRICS && - (page_length - RRDENG_BLOCK_SIZE) % GORILLA_BUFFER_SIZE))) { + if (page_length > RRDENG_BLOCK_SIZE && + (header->descr[i].type != RRDENG_PAGE_TYPE_GORILLA_32BIT || + (header->descr[i].type == RRDENG_PAGE_TYPE_GORILLA_32BIT && + (page_length - RRDENG_BLOCK_SIZE) % RRDENG_GORILLA_32BIT_BUFFER_SIZE))) { have_read_error = true; break; } @@ -1015,11 +1017,16 @@ static bool epdl_populate_pages_from_extent_data( eb = extent_buffer_get(uncompressed_payload_length); uncompressed_buf = eb->data; - ret = LZ4_decompress_safe(data + payload_offset, uncompressed_buf, - (int) payload_length, (int) uncompressed_payload_length); + size_t bytes = dbengine_decompress(uncompressed_buf, data + payload_offset, + uncompressed_payload_length, payload_length, + header->compression_algorithm); - __atomic_add_fetch(&ctx->stats.before_decompress_bytes, payload_length, __ATOMIC_RELAXED); - __atomic_add_fetch(&ctx->stats.after_decompress_bytes, ret, __ATOMIC_RELAXED); + if(!bytes) + have_read_error = true; + else { + __atomic_add_fetch(&ctx->stats.before_decompress_bytes, payload_length, __ATOMIC_RELAXED); + __atomic_add_fetch(&ctx->stats.after_decompress_bytes, bytes, __ATOMIC_RELAXED); + } } } @@ -1075,7 +1082,7 @@ static bool epdl_populate_pages_from_extent_data( stats_load_invalid_page++; } else { - if (RRD_NO_COMPRESSION == header->compression_algorithm) { + if (RRDENG_COMPRESSION_NONE == header->compression_algorithm) { pgd = pgd_create_from_disk_data(header->descr[i].type, data + payload_offset + page_offset, vd.page_length); diff --git a/database/engine/pdc.h b/src/database/engine/pdc.h index 9bae39ade..9bae39ade 100644 --- a/database/engine/pdc.h +++ b/src/database/engine/pdc.h diff --git a/database/engine/rrddiskprotocol.h b/src/database/engine/rrddiskprotocol.h index 86b41f0b3..dc1a4c980 100644 --- a/database/engine/rrddiskprotocol.h +++ b/src/database/engine/rrddiskprotocol.h @@ -19,13 +19,16 @@ #define UUID_SZ (16) #define CHECKSUM_SZ (4) /* CRC32 */ -#define RRD_NO_COMPRESSION (0) -#define RRD_LZ4 (1) +#define RRDENG_COMPRESSION_NONE (0) +#define RRDENG_COMPRESSION_LZ4 (1) +#define RRDENG_COMPRESSION_ZSTD (2) #define RRDENG_DF_SB_PADDING_SZ (RRDENG_BLOCK_SIZE - (RRDENG_MAGIC_SZ + RRDENG_VER_SZ + sizeof(uint8_t))) + /* * Data file persistent super-block */ + struct rrdeng_df_sb { char magic_number[RRDENG_MAGIC_SZ]; char version[RRDENG_VER_SZ]; @@ -36,10 +39,11 @@ struct rrdeng_df_sb { /* * Page types */ -#define PAGE_METRICS (0) -#define PAGE_TIER (1) -#define PAGE_GORILLA_METRICS (2) -#define PAGE_TYPE_MAX 2 // Maximum page type (inclusive) + +#define RRDENG_PAGE_TYPE_ARRAY_32BIT (0) +#define RRDENG_PAGE_TYPE_ARRAY_TIER1 (1) +#define RRDENG_PAGE_TYPE_GORILLA_32BIT (2) +#define RRDENG_PAGE_TYPE_MAX (2) // Maximum page type (inclusive) /* * Data file page descriptor diff --git a/database/engine/rrdengine.c b/src/database/engine/rrdengine.c index b82cc1ad1..7b2137436 100644 --- a/database/engine/rrdengine.c +++ b/src/database/engine/rrdengine.c @@ -3,6 +3,7 @@ #include "rrdengine.h" #include "pdc.h" +#include "dbengine-compression.h" rrdeng_stats_t global_io_errors = 0; rrdeng_stats_t global_fs_errors = 0; @@ -229,7 +230,7 @@ static void after_work_standard_callback(uv_work_t* req, int status) { worker_is_idle(); } -static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct completion *completion, enum rrdeng_opcode opcode, work_cb work_cb, after_work_cb after_work_cb) { +static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct completion *completion, enum rrdeng_opcode opcode, work_cb do_work_cb, after_work_cb do_after_work_cb) { struct rrdeng_work *work_request = NULL; internal_fatal(rrdeng_main.tid != gettid(), "work_dispatch() can only be run from the event loop thread"); @@ -240,8 +241,8 @@ static bool work_dispatch(struct rrdengine_instance *ctx, void *data, struct com work_request->ctx = ctx; work_request->data = data; work_request->completion = completion; - work_request->work_cb = work_cb; - work_request->after_work_cb = after_work_cb; + work_request->work_cb = do_work_cb; + work_request->after_work_cb = do_after_work_cb; work_request->opcode = opcode; if(uv_queue_work(&rrdeng_main.loop, &work_request->req, work_standard_worker, after_work_standard_callback)) { @@ -772,13 +773,10 @@ static struct rrdengine_datafile *get_datafile_to_write_extent(struct rrdengine_ */ static struct extent_io_descriptor *datafile_extent_build(struct rrdengine_instance *ctx, struct page_descr_with_data *base, struct completion *completion) { int ret; - int compressed_size, max_compressed_size = 0; unsigned i, count, size_bytes, pos, real_io_size; - uint32_t uncompressed_payload_length, payload_offset; + uint32_t uncompressed_payload_length, max_compressed_size, payload_offset; struct page_descr_with_data *descr, *eligible_pages[MAX_PAGES_PER_EXTENT]; struct extent_io_descriptor *xt_io_descr; - struct extent_buffer *eb = NULL; - void *compressed_buf = NULL; Word_t Index; uint8_t compression_algorithm = ctx->config.global_compress_alg; struct rrdengine_datafile *datafile; @@ -807,20 +805,8 @@ static struct extent_io_descriptor *datafile_extent_build(struct rrdengine_insta xt_io_descr = extent_io_descriptor_get(); xt_io_descr->ctx = ctx; payload_offset = sizeof(*header) + count * sizeof(header->descr[0]); - switch (compression_algorithm) { - case RRD_NO_COMPRESSION: - size_bytes = payload_offset + uncompressed_payload_length + sizeof(*trailer); - break; - - default: /* Compress */ - fatal_assert(uncompressed_payload_length < LZ4_MAX_INPUT_SIZE); - max_compressed_size = LZ4_compressBound(uncompressed_payload_length); - eb = extent_buffer_get(max_compressed_size); - compressed_buf = eb->data; - size_bytes = payload_offset + MAX(uncompressed_payload_length, (unsigned)max_compressed_size) + sizeof(*trailer); - break; - } - + max_compressed_size = dbengine_max_compressed_size(uncompressed_payload_length, compression_algorithm); + size_bytes = payload_offset + MAX(uncompressed_payload_length, max_compressed_size) + sizeof(*trailer); ret = posix_memalign((void *)&xt_io_descr->buf, RRDFILE_ALIGNMENT, ALIGN_BYTES_CEILING(size_bytes)); if (unlikely(ret)) { fatal("DBENGINE: posix_memalign:%s", strerror(ret)); @@ -832,7 +818,6 @@ static struct extent_io_descriptor *datafile_extent_build(struct rrdengine_insta pos = 0; header = xt_io_descr->buf; - header->compression_algorithm = compression_algorithm; header->number_of_pages = count; pos += sizeof(*header); @@ -844,11 +829,11 @@ static struct extent_io_descriptor *datafile_extent_build(struct rrdengine_insta header->descr[i].start_time_ut = descr->start_time_ut; switch (descr->type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: header->descr[i].end_time_ut = descr->end_time_ut; break; - case PAGE_GORILLA_METRICS: + case RRDENG_PAGE_TYPE_GORILLA_32BIT: header->descr[i].gorilla.delta_time_s = (uint32_t) ((descr->end_time_ut - descr->start_time_ut) / USEC_PER_SEC); header->descr[i].gorilla.entries = pgd_slots_used(descr->pgd); break; @@ -858,29 +843,40 @@ static struct extent_io_descriptor *datafile_extent_build(struct rrdengine_insta pos += sizeof(header->descr[i]); } + + // build the extent payload for (i = 0 ; i < count ; ++i) { descr = xt_io_descr->descr_array[i]; pgd_copy_to_extent(descr->pgd, xt_io_descr->buf + pos, descr->page_length); pos += descr->page_length; } - if(likely(compression_algorithm == RRD_LZ4)) { - compressed_size = LZ4_compress_default( - xt_io_descr->buf + payload_offset, - compressed_buf, - (int)uncompressed_payload_length, - max_compressed_size); + // compress the payload + size_t compressed_size = + (int)dbengine_compress(xt_io_descr->buf + payload_offset, + uncompressed_payload_length, + compression_algorithm); - __atomic_add_fetch(&ctx->stats.before_compress_bytes, uncompressed_payload_length, __ATOMIC_RELAXED); - __atomic_add_fetch(&ctx->stats.after_compress_bytes, compressed_size, __ATOMIC_RELAXED); + internal_fatal(compressed_size > max_compressed_size, "DBENGINE: compression returned more data than the max allowed"); + internal_fatal(compressed_size > uncompressed_payload_length, "DBENGINE: compression returned more data than the uncompressed extent"); - (void) memcpy(xt_io_descr->buf + payload_offset, compressed_buf, compressed_size); - extent_buffer_release(eb); - size_bytes = payload_offset + compressed_size + sizeof(*trailer); + if(compressed_size) { + header->compression_algorithm = compression_algorithm; header->payload_length = compressed_size; } - else { // RRD_NO_COMPRESSION - header->payload_length = uncompressed_payload_length; + else { + // compression failed, or generated bigger pages + // so it didn't touch our uncompressed buffer + header->compression_algorithm = RRDENG_COMPRESSION_NONE; + header->payload_length = compressed_size = uncompressed_payload_length; + } + + // set the correct size + size_bytes = payload_offset + compressed_size + sizeof(*trailer); + + if(compression_algorithm != RRDENG_COMPRESSION_NONE) { + __atomic_add_fetch(&ctx->stats.before_compress_bytes, uncompressed_payload_length, __ATOMIC_RELAXED); + __atomic_add_fetch(&ctx->stats.after_compress_bytes, compressed_size, __ATOMIC_RELAXED); } real_io_size = ALIGN_BYTES_CEILING(size_bytes); @@ -1171,7 +1167,17 @@ static void update_metrics_first_time_s(struct rrdengine_instance *ctx, struct r for (size_t index = 0; index < added; ++index) { uuid_first_t_entry = &uuid_first_entry_list[index]; if (likely(uuid_first_t_entry->first_time_s != LONG_MAX)) { - mrg_metric_set_first_time_s_if_bigger(main_mrg, uuid_first_t_entry->metric, uuid_first_t_entry->first_time_s); + + time_t old_first_time_s = mrg_metric_get_first_time_s(main_mrg, uuid_first_t_entry->metric); + + bool changed = mrg_metric_set_first_time_s_if_bigger(main_mrg, uuid_first_t_entry->metric, uuid_first_t_entry->first_time_s); + if (changed) { + uint32_t update_every_s = mrg_metric_get_update_every_s(main_mrg, uuid_first_t_entry->metric); + if (update_every_s && old_first_time_s && uuid_first_t_entry->first_time_s > old_first_time_s) { + uint64_t remove_samples = (uuid_first_t_entry->first_time_s - old_first_time_s) / update_every_s; + __atomic_sub_fetch(&ctx->atomic.samples, remove_samples, __ATOMIC_RELAXED); + } + } mrg_metric_release(main_mrg, uuid_first_t_entry->metric); } else { @@ -1180,6 +1186,14 @@ static void update_metrics_first_time_s(struct rrdengine_instance *ctx, struct r // there is no retention for this metric bool has_retention = mrg_metric_zero_disk_retention(main_mrg, uuid_first_t_entry->metric); if (!has_retention) { + time_t first_time_s = mrg_metric_get_first_time_s(main_mrg, uuid_first_t_entry->metric); + time_t last_time_s = mrg_metric_get_latest_time_s(main_mrg, uuid_first_t_entry->metric); + time_t update_every_s = mrg_metric_get_update_every_s(main_mrg, uuid_first_t_entry->metric); + if (update_every_s && first_time_s && last_time_s) { + uint64_t remove_samples = (first_time_s - last_time_s) / update_every_s; + __atomic_sub_fetch(&ctx->atomic.samples, remove_samples, __ATOMIC_RELAXED); + } + bool deleted = mrg_metric_release_and_delete(main_mrg, uuid_first_t_entry->metric); if(deleted) deleted_metrics++; diff --git a/database/engine/rrdengine.h b/src/database/engine/rrdengine.h index cd3352f12..3047e0c6a 100644 --- a/database/engine/rrdengine.h +++ b/src/database/engine/rrdengine.h @@ -153,9 +153,9 @@ struct jv2_metrics_info { struct jv2_page_info { time_t start_time_s; time_t end_time_s; - time_t update_every_s; - size_t page_length; + uint32_t update_every_s; uint32_t extent_index; + size_t page_length; void *custom_data; // private @@ -217,7 +217,7 @@ struct rrdeng_query_handle { // internal data time_t now_s; - time_t dt_s; + uint32_t dt_s; unsigned position; unsigned entries; @@ -387,6 +387,8 @@ struct rrdengine_instance { unsigned extents_currently_being_flushed; // non-zero until we commit data to disk (both datafile and journal file) time_t first_time_s; + uint64_t metrics; + uint64_t samples; } atomic; struct { @@ -482,7 +484,7 @@ struct page_descr_with_data *page_descriptor_get(void); typedef struct validated_page_descriptor { time_t start_time_s; time_t end_time_s; - time_t update_every_s; + uint32_t update_every_s; size_t page_length; size_t point_size; size_t entries; @@ -499,16 +501,16 @@ typedef struct validated_page_descriptor { VALIDATED_PAGE_DESCRIPTOR validate_page(uuid_t *uuid, time_t start_time_s, time_t end_time_s, - time_t update_every_s, + uint32_t update_every_s, size_t page_length, uint8_t page_type, size_t entries, time_t now_s, - time_t overwrite_zero_update_every_s, + uint32_t overwrite_zero_update_every_s, bool have_read_error, const char *msg, RRDENG_COLLECT_PAGE_FLAGS flags); -VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_extent_page_descr *descr, time_t now_s, time_t overwrite_zero_update_every_s, bool have_read_error); +VALIDATED_PAGE_DESCRIPTOR validate_extent_page_descr(const struct rrdeng_extent_page_descr *descr, time_t now_s, uint32_t overwrite_zero_update_every_s, bool have_read_error); void collect_page_flags_to_buffer(BUFFER *wb, RRDENG_COLLECT_PAGE_FLAGS flags); typedef enum { diff --git a/database/engine/rrdengineapi.c b/src/database/engine/rrdengineapi.c index 1ddce5243..43fed492b 100755 --- a/database/engine/rrdengineapi.c +++ b/src/database/engine/rrdengineapi.c @@ -2,6 +2,7 @@ #include "database/engine/rrddiskprotocol.h" #include "rrdengine.h" +#include "dbengine-compression.h" /* Default global database instance */ struct rrdengine_instance multidb_ctx_storage_tier0; @@ -16,7 +17,12 @@ struct rrdengine_instance multidb_ctx_storage_tier4; #error RRD_STORAGE_TIERS is not 5 - you need to add allocations here #endif struct rrdengine_instance *multidb_ctx[RRD_STORAGE_TIERS]; -uint8_t tier_page_type[RRD_STORAGE_TIERS] = {PAGE_METRICS, PAGE_TIER, PAGE_TIER, PAGE_TIER, PAGE_TIER}; +uint8_t tier_page_type[RRD_STORAGE_TIERS] = { + RRDENG_PAGE_TYPE_GORILLA_32BIT, + RRDENG_PAGE_TYPE_ARRAY_TIER1, + RRDENG_PAGE_TYPE_ARRAY_TIER1, + RRDENG_PAGE_TYPE_ARRAY_TIER1, + RRDENG_PAGE_TYPE_ARRAY_TIER1}; #if defined(ENV32BIT) size_t tier_page_size[RRD_STORAGE_TIERS] = {2048, 1024, 192, 192, 192}; @@ -24,14 +30,14 @@ size_t tier_page_size[RRD_STORAGE_TIERS] = {2048, 1024, 192, 192, 192}; size_t tier_page_size[RRD_STORAGE_TIERS] = {4096, 2048, 384, 384, 384}; #endif -#if PAGE_TYPE_MAX != 2 +#if RRDENG_PAGE_TYPE_MAX != 2 #error PAGE_TYPE_MAX is not 2 - you need to add allocations here #endif size_t page_type_size[256] = { - [PAGE_METRICS] = sizeof(storage_number), - [PAGE_TIER] = sizeof(storage_number_tier1_t), - [PAGE_GORILLA_METRICS] = sizeof(storage_number) + [RRDENG_PAGE_TYPE_ARRAY_32BIT] = sizeof(storage_number), + [RRDENG_PAGE_TYPE_ARRAY_TIER1] = sizeof(storage_number_tier1_t), + [RRDENG_PAGE_TYPE_GORILLA_32BIT] = sizeof(storage_number) }; __attribute__((constructor)) void initialize_multidb_ctx(void) { @@ -74,14 +80,14 @@ static inline bool rrdeng_page_alignment_release(struct pg_alignment *pa) { } // charts call this -STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance __maybe_unused, uuid_t *uuid __maybe_unused) { +STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *si __maybe_unused, uuid_t *uuid __maybe_unused) { struct pg_alignment *pa = callocz(1, sizeof(struct pg_alignment)); rrdeng_page_alignment_acquire(pa); return (STORAGE_METRICS_GROUP *)pa; } // charts call this -void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance __maybe_unused, STORAGE_METRICS_GROUP *smg) { +void rrdeng_metrics_group_release(STORAGE_INSTANCE *si __maybe_unused, STORAGE_METRICS_GROUP *smg) { if(unlikely(!smg)) return; struct pg_alignment *pa = (struct pg_alignment *)smg; @@ -108,8 +114,8 @@ void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_ memcpy(ret_uuid, hash_value, sizeof(uuid_t)); } -static METRIC *rrdeng_metric_get_legacy(STORAGE_INSTANCE *db_instance, const char *rd_id, const char *st_id) { - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; +static METRIC *rrdeng_metric_get_legacy(STORAGE_INSTANCE *si, const char *rd_id, const char *st_id) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; uuid_t legacy_uuid; rrdeng_generate_legacy_uuid(rd_id, st_id, &legacy_uuid); return mrg_metric_get_and_acquire(main_mrg, &legacy_uuid, (Word_t) ctx); @@ -118,25 +124,25 @@ static METRIC *rrdeng_metric_get_legacy(STORAGE_INSTANCE *db_instance, const cha // ---------------------------------------------------------------------------- // metric handle -void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle) { - METRIC *metric = (METRIC *)db_metric_handle; +void rrdeng_metric_release(STORAGE_METRIC_HANDLE *smh) { + METRIC *metric = (METRIC *)smh; mrg_metric_release(main_mrg, metric); } -STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle) { - METRIC *metric = (METRIC *)db_metric_handle; +STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *smh) { + METRIC *metric = (METRIC *)smh; return (STORAGE_METRIC_HANDLE *) mrg_metric_dup(main_mrg, metric); } -STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid) { - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; +STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *si, uuid_t *uuid) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; return (STORAGE_METRIC_HANDLE *) mrg_metric_get_and_acquire(main_mrg, uuid, (Word_t) ctx); } -static METRIC *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid) { - internal_fatal(!db_instance, "DBENGINE: db_instance is NULL"); +static METRIC *rrdeng_metric_create(STORAGE_INSTANCE *si, uuid_t *uuid) { + internal_fatal(!si, "DBENGINE: STORAGE_INSTANCE is NULL"); - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; MRG_ENTRY entry = { .uuid = uuid, .section = (Word_t)ctx, @@ -145,12 +151,15 @@ static METRIC *rrdeng_metric_create(STORAGE_INSTANCE *db_instance, uuid_t *uuid) .latest_update_every_s = 0, }; - METRIC *metric = mrg_metric_add_and_acquire(main_mrg, entry, NULL); + bool added; + METRIC *metric = mrg_metric_add_and_acquire(main_mrg, entry, &added); + if (added) + __atomic_add_fetch(&ctx->atomic.metrics, 1, __ATOMIC_RELAXED); return metric; } -STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance) { - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; +STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *si) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; METRIC *metric; metric = mrg_metric_get_and_acquire(main_mrg, &rd->metric_uuid, (Word_t) ctx); @@ -160,13 +169,13 @@ STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE // this is a single host database // generate uuid from the chart and dimensions ids // and overwrite the one supplied by rrddim - metric = rrdeng_metric_get_legacy(db_instance, rrddim_id(rd), rrdset_id(rd->rrdset)); + metric = rrdeng_metric_get_legacy(si, rrddim_id(rd), rrdset_id(rd->rrdset)); if (metric) uuid_copy(rd->metric_uuid, *mrg_metric_uuid(main_mrg, metric)); } if(likely(!metric)) - metric = rrdeng_metric_create(db_instance, &rd->metric_uuid); + metric = rrdeng_metric_create(si, &rd->metric_uuid); } #ifdef NETDATA_INTERNAL_CHECKS @@ -192,14 +201,14 @@ STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE // collect ops static inline void check_and_fix_mrg_update_every(struct rrdeng_collect_handle *handle) { - if(unlikely((time_t)(handle->update_every_ut / USEC_PER_SEC) != mrg_metric_get_update_every_s(main_mrg, handle->metric))) { - internal_error(true, "DBENGINE: collection handle has update every %ld, but the metric registry has %ld. Fixing it.", - (time_t)(handle->update_every_ut / USEC_PER_SEC), mrg_metric_get_update_every_s(main_mrg, handle->metric)); + if(unlikely((uint32_t)(handle->update_every_ut / USEC_PER_SEC) != mrg_metric_get_update_every_s(main_mrg, handle->metric))) { + internal_error(true, "DBENGINE: collection handle has update every %u, but the metric registry has %u. Fixing it.", + (uint32_t)(handle->update_every_ut / USEC_PER_SEC), mrg_metric_get_update_every_s(main_mrg, handle->metric)); if(unlikely(!handle->update_every_ut)) handle->update_every_ut = (usec_t)mrg_metric_get_update_every_s(main_mrg, handle->metric) * USEC_PER_SEC; else - mrg_metric_set_update_every(main_mrg, handle->metric, (time_t)(handle->update_every_ut / USEC_PER_SEC)); + mrg_metric_set_update_every(main_mrg, handle->metric, (uint32_t)(handle->update_every_ut / USEC_PER_SEC)); } } @@ -213,7 +222,7 @@ static inline bool check_completed_page_consistency(struct rrdeng_collect_handle uuid_t *uuid = mrg_metric_uuid(main_mrg, handle->metric); time_t start_time_s = pgc_page_start_time_s(handle->pgc_page); time_t end_time_s = pgc_page_end_time_s(handle->pgc_page); - time_t update_every_s = pgc_page_update_every_s(handle->pgc_page); + uint32_t update_every_s = pgc_page_update_every_s(handle->pgc_page); size_t page_length = handle->page_position * CTX_POINT_SIZE_BYTES(ctx); size_t entries = handle->page_position; time_t overwrite_zero_update_every_s = (time_t)(handle->update_every_ut / USEC_PER_SEC); @@ -245,8 +254,8 @@ static inline bool check_completed_page_consistency(struct rrdeng_collect_handle * Gets a handle for storing metrics to the database. * The handle must be released with rrdeng_store_metric_final(). */ -STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg) { - METRIC *metric = (METRIC *)db_metric_handle; +STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *smh, uint32_t update_every, STORAGE_METRICS_GROUP *smg) { + METRIC *metric = (METRIC *)smh; struct rrdengine_instance *ctx = mrg_metric_ctx(metric); bool is_1st_metric_writer = true; @@ -262,7 +271,7 @@ STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metri struct rrdeng_collect_handle *handle; handle = callocz(1, sizeof(struct rrdeng_collect_handle)); - handle->common.backend = STORAGE_ENGINE_BACKEND_DBENGINE; + handle->common.seb = STORAGE_ENGINE_BACKEND_DBENGINE; handle->metric = metric; handle->pgc_page = NULL; @@ -288,15 +297,15 @@ STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metri // data collection may be able to go back in time and during the addition of new pages // clean pages may be found matching ours! - time_t db_first_time_s, db_last_time_s, db_update_every_s; - mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s); + time_t db_first_time_s, db_last_time_s; + mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, NULL); handle->page_end_time_ut = (usec_t)db_last_time_s * USEC_PER_SEC; return (STORAGE_COLLECT_HANDLE *)handle; } -void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle) { - struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; +void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *sch) { + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch; if (unlikely(!handle->pgc_page)) return; @@ -307,7 +316,17 @@ void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_h else { check_completed_page_consistency(handle); mrg_metric_set_clean_latest_time_s(main_mrg, handle->metric, pgc_page_end_time_s(handle->pgc_page)); - pgc_page_hot_to_dirty_and_release(main_cache, handle->pgc_page); + + struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric); + time_t start_time_s = pgc_page_start_time_s(handle->pgc_page); + time_t end_time_s = pgc_page_end_time_s(handle->pgc_page); + uint32_t update_every_s = mrg_metric_get_update_every_s(main_mrg, handle->metric); + if (end_time_s && start_time_s && end_time_s > start_time_s && update_every_s) { + uint64_t add_samples = (end_time_s - start_time_s) / update_every_s; + __atomic_add_fetch(&ctx->atomic.samples, add_samples, __ATOMIC_RELAXED); + } + + pgc_page_hot_to_dirty_and_release(main_cache, handle->pgc_page, false); } mrg_metric_set_hot_latest_time_s(main_mrg, handle->metric, 0); @@ -336,7 +355,7 @@ static void rrdeng_store_metric_create_new_page(struct rrdeng_collect_handle *ha PGD *data, size_t data_size) { time_t point_in_time_s = (time_t)(point_in_time_ut / USEC_PER_SEC); - const time_t update_every_s = (time_t)(handle->update_every_ut / USEC_PER_SEC); + const uint32_t update_every_s = (uint32_t)(handle->update_every_ut / USEC_PER_SEC); PGC_ENTRY page_entry = { .section = (Word_t) ctx, @@ -345,7 +364,7 @@ static void rrdeng_store_metric_create_new_page(struct rrdeng_collect_handle *ha .end_time_s = point_in_time_s, .size = data_size, .data = data, - .update_every_s = (uint32_t) update_every_s, + .update_every_s = update_every_s, .hot = true }; @@ -364,11 +383,11 @@ static void rrdeng_store_metric_create_new_page(struct rrdeng_collect_handle *ha nd_log_limit_static_global_var(erl, 1, 0); nd_log_limit(&erl, NDLS_DAEMON, NDLP_WARNING, #endif - "DBENGINE: metric '%s' new page from %ld to %ld, update every %ld, has a conflict in main cache " - "with existing %s%s page from %ld to %ld, update every %ld - " + "DBENGINE: metric '%s' new page from %ld to %ld, update every %u, has a conflict in main cache " + "with existing %s%s page from %ld to %ld, update every %u - " "is it collected more than once?", uuid, - page_entry.start_time_s, page_entry.end_time_s, (time_t)page_entry.update_every_s, + page_entry.start_time_s, page_entry.end_time_s, page_entry.update_every_s, pgc_is_page_hot(pgc_page) ? "hot" : "not-hot", pgc_page_data(pgc_page) == PGD_EMPTY ? " gap" : "", pgc_page_start_time_s(pgc_page), pgc_page_end_time_s(pgc_page), pgc_page_update_every_s(pgc_page) @@ -444,14 +463,14 @@ static PGD *rrdeng_alloc_new_page_data(struct rrdeng_collect_handle *handle, siz *data_size = size; switch (ctx->config.page_type) { - case PAGE_METRICS: - case PAGE_TIER: + case RRDENG_PAGE_TYPE_ARRAY_32BIT: + case RRDENG_PAGE_TYPE_ARRAY_TIER1: d = pgd_create(ctx->config.page_type, slots); break; - case PAGE_GORILLA_METRICS: + case RRDENG_PAGE_TYPE_GORILLA_32BIT: // ignore slots, and use the fixed number of slots per gorilla buffer. // gorilla will automatically add more buffers if needed. - d = pgd_create(ctx->config.page_type, GORILLA_BUFFER_SLOTS); + d = pgd_create(ctx->config.page_type, RRDENG_GORILLA_32BIT_BUFFER_SLOTS); break; default: fatal("Unknown page type: %uc\n", ctx->config.page_type); @@ -461,7 +480,7 @@ static PGD *rrdeng_alloc_new_page_data(struct rrdeng_collect_handle *handle, siz return d; } -static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *collection_handle, +static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *sch, const usec_t point_in_time_ut, const NETDATA_DOUBLE n, const NETDATA_DOUBLE min_value, @@ -470,7 +489,7 @@ static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *collection_ const uint16_t anomaly_count, const SN_FLAGS flags) { - struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch; struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric); if(unlikely(!handle->page_data)) @@ -497,7 +516,7 @@ static void rrdeng_store_metric_append_point(STORAGE_COLLECT_HANDLE *collection_ if(unlikely(++handle->page_position >= handle->page_entries_max)) { internal_fatal(handle->page_position > handle->page_entries_max, "DBENGINE: exceeded page max number of points"); handle->page_flags |= RRDENG_PAGE_FULL; - rrdeng_store_metric_flush_current_page(collection_handle); + rrdeng_store_metric_flush_current_page(sch); } } @@ -543,7 +562,7 @@ static void store_metric_next_error_log(struct rrdeng_collect_handle *handle __m #endif } -void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, +void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *sch, const usec_t point_in_time_ut, const NETDATA_DOUBLE n, const NETDATA_DOUBLE min_value, @@ -554,7 +573,7 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, { timing_step(TIMING_STEP_RRDSET_STORE_METRIC); - struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch; #ifdef NETDATA_INTERNAL_CHECKS if(unlikely(point_in_time_ut > (usec_t)max_acceptable_collected_time() * USEC_PER_SEC)) @@ -571,11 +590,11 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, if(handle->pgc_page) { if (unlikely(delta_ut < handle->update_every_ut)) { handle->page_flags |= RRDENG_PAGE_STEP_TOO_SMALL; - rrdeng_store_metric_flush_current_page(collection_handle); + rrdeng_store_metric_flush_current_page(sch); } else if (unlikely(delta_ut % handle->update_every_ut)) { handle->page_flags |= RRDENG_PAGE_STEP_UNALIGNED; - rrdeng_store_metric_flush_current_page(collection_handle); + rrdeng_store_metric_flush_current_page(sch); } else { size_t points_gap = delta_ut / handle->update_every_ut; @@ -583,7 +602,7 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, if (points_gap >= page_remaining_points) { handle->page_flags |= RRDENG_PAGE_BIG_GAP; - rrdeng_store_metric_flush_current_page(collection_handle); + rrdeng_store_metric_flush_current_page(sch); } else { // loop to fill the gap @@ -594,7 +613,7 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, this_ut <= stop_ut; this_ut = handle->page_end_time_ut + handle->update_every_ut) { rrdeng_store_metric_append_point( - collection_handle, + sch, this_ut, NAN, NAN, NAN, 1, 0, @@ -618,7 +637,7 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, timing_step(TIMING_STEP_DBENGINE_FIRST_CHECK); - rrdeng_store_metric_append_point(collection_handle, + rrdeng_store_metric_append_point(sch, point_in_time_ut, n, min_value, max_value, count, anomaly_count, @@ -629,12 +648,12 @@ void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, * Releases the database reference from the handle for storing metrics. * Returns 1 if it's safe to delete the dimension. */ -int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { - struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; +int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *sch) { + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch; struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric); handle->page_flags |= RRDENG_PAGE_COLLECT_FINALIZE; - rrdeng_store_metric_flush_current_page(collection_handle); + rrdeng_store_metric_flush_current_page(sch); rrdeng_page_alignment_release(handle->alignment); __atomic_sub_fetch(&ctx->atomic.collectors_running, 1, __ATOMIC_RELAXED); @@ -644,8 +663,8 @@ int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { if((handle->options & RRDENG_1ST_METRIC_WRITER) && !mrg_metric_clear_writer(main_mrg, handle->metric)) internal_fatal(true, "DBENGINE: metric is already released"); - time_t first_time_s, last_time_s, update_every_s; - mrg_metric_get_retention(main_mrg, handle->metric, &first_time_s, &last_time_s, &update_every_s); + time_t first_time_s, last_time_s; + mrg_metric_get_retention(main_mrg, handle->metric, &first_time_s, &last_time_s, NULL); mrg_metric_release(main_mrg, handle->metric); freez(handle); @@ -656,8 +675,8 @@ int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { return 0; } -void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every) { - struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)collection_handle; +void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *sch, int update_every) { + struct rrdeng_collect_handle *handle = (struct rrdeng_collect_handle *)sch; check_and_fix_mrg_update_every(handle); METRIC *metric = handle->metric; @@ -667,7 +686,7 @@ void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *col return; handle->page_flags |= RRDENG_PAGE_UPDATE_EVERY_CHANGE; - rrdeng_store_metric_flush_current_page(collection_handle); + rrdeng_store_metric_flush_current_page(sch); mrg_metric_set_update_every(main_mrg, metric, update_every); handle->update_every_ut = update_every_ut; } @@ -704,8 +723,8 @@ static void unregister_query_handle(struct rrdeng_query_handle *handle __maybe_u * Gets a handle for loading metrics from the database. * The handle must be released with rrdeng_load_metric_final(). */ -void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, - struct storage_engine_query_handle *rrddim_handle, +void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *smh, + struct storage_engine_query_handle *seqh, time_t start_time_s, time_t end_time_s, STORAGE_PRIORITY priority) @@ -714,7 +733,7 @@ void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, netdata_thread_disable_cancelability(); - METRIC *metric = (METRIC *)db_metric_handle; + METRIC *metric = (METRIC *)smh; struct rrdengine_instance *ctx = mrg_metric_ctx(metric); struct rrdeng_query_handle *handle; @@ -736,7 +755,8 @@ void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, // is inserted into the main cache, to avoid scanning the journals // again for pages matching the gap. - time_t db_first_time_s, db_last_time_s, db_update_every_s; + time_t db_first_time_s, db_last_time_s; + uint32_t db_update_every_s; mrg_metric_get_retention(main_mrg, metric, &db_first_time_s, &db_last_time_s, &db_update_every_s); if(is_page_in_time_range(start_time_s, end_time_s, db_first_time_s, db_last_time_s) == PAGE_IS_IN_RANGE) { @@ -750,11 +770,11 @@ void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, mrg_metric_set_update_every_s_if_zero(main_mrg, metric, default_rrd_update_every); } - rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle; - rrddim_handle->start_time_s = handle->start_time_s; - rrddim_handle->end_time_s = handle->end_time_s; - rrddim_handle->priority = priority; - rrddim_handle->backend = STORAGE_ENGINE_BACKEND_DBENGINE; + seqh->handle = (STORAGE_QUERY_HANDLE *) handle; + seqh->start_time_s = handle->start_time_s; + seqh->end_time_s = handle->end_time_s; + seqh->priority = priority; + seqh->seb = STORAGE_ENGINE_BACKEND_DBENGINE; pg_cache_preload(handle); @@ -766,16 +786,16 @@ void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, handle->now_s = start_time_s; handle->dt_s = db_update_every_s; - rrddim_handle->handle = (STORAGE_QUERY_HANDLE *) handle; - rrddim_handle->start_time_s = handle->start_time_s; - rrddim_handle->end_time_s = 0; - rrddim_handle->priority = priority; - rrddim_handle->backend = STORAGE_ENGINE_BACKEND_DBENGINE; + seqh->handle = (STORAGE_QUERY_HANDLE *) handle; + seqh->start_time_s = handle->start_time_s; + seqh->end_time_s = 0; + seqh->priority = priority; + seqh->seb = STORAGE_ENGINE_BACKEND_DBENGINE; } } -static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_handle, bool debug_this __maybe_unused) { - struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle; +static bool rrdeng_load_page_next(struct storage_engine_query_handle *seqh, bool debug_this __maybe_unused) { + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle; struct rrdengine_instance *ctx = mrg_metric_ctx(handle->metric); if (likely(handle->page)) { @@ -785,7 +805,7 @@ static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_han pgdc_reset(&handle->pgdc, NULL, UINT32_MAX); } - if (unlikely(handle->now_s > rrddim_handle->end_time_s)) + if (unlikely(handle->now_s > seqh->end_time_s)) return false; size_t entries = 0; @@ -799,7 +819,7 @@ static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_han time_t page_start_time_s = pgc_page_start_time_s(handle->page); time_t page_end_time_s = pgc_page_end_time_s(handle->page); - time_t page_update_every_s = pgc_page_update_every_s(handle->page); + uint32_t page_update_every_s = pgc_page_update_every_s(handle->page); unsigned position; if(likely(handle->now_s >= page_start_time_s && handle->now_s <= page_end_time_s)) { @@ -810,13 +830,13 @@ static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_han } else { position = (handle->now_s - page_start_time_s) * (entries - 1) / (page_end_time_s - page_start_time_s); - time_t point_end_time_s = page_start_time_s + position * page_update_every_s; + time_t point_end_time_s = page_start_time_s + position * (time_t) page_update_every_s; while(point_end_time_s < handle->now_s && position + 1 < entries) { // https://github.com/netdata/netdata/issues/14411 // we really need a while() here, because the delta may be // 2 points at higher tiers position++; - point_end_time_s = page_start_time_s + position * page_update_every_s; + point_end_time_s = page_start_time_s + position * (time_t) page_update_every_s; } handle->now_s = point_end_time_s; } @@ -845,11 +865,11 @@ static bool rrdeng_load_page_next(struct storage_engine_query_handle *rrddim_han // Returns the metric and sets its timestamp into current_time // IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags) // IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES -STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle) { - struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle; +STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *seqh) { + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle; STORAGE_POINT sp; - if (unlikely(handle->now_s > rrddim_handle->end_time_s)) { + if (unlikely(handle->now_s > seqh->end_time_s)) { storage_point_empty(sp, handle->now_s - handle->dt_s, handle->now_s); goto prepare_for_next_iteration; } @@ -857,8 +877,8 @@ STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim if (unlikely(!handle->page || handle->position >= handle->entries)) { // We need to get a new page - if (!rrdeng_load_page_next(rrddim_handle, false)) { - handle->now_s = rrddim_handle->end_time_s; + if (!rrdeng_load_page_next(seqh, false)) { + handle->now_s = seqh->end_time_s; storage_point_empty(sp, handle->now_s - handle->dt_s, handle->now_s); goto prepare_for_next_iteration; } @@ -870,7 +890,7 @@ STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim pgdc_get_next_point(&handle->pgdc, handle->position, &sp); prepare_for_next_iteration: - internal_fatal(sp.end_time_s < rrddim_handle->start_time_s, "DBENGINE: this point is too old for this query"); + internal_fatal(sp.end_time_s < seqh->start_time_s, "DBENGINE: this point is too old for this query"); internal_fatal(sp.end_time_s < handle->now_s, "DBENGINE: this point is too old for this point in time"); handle->now_s += handle->dt_s; @@ -879,17 +899,17 @@ prepare_for_next_iteration: return sp; } -int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrddim_handle) { - struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle; - return (handle->now_s > rrddim_handle->end_time_s); +int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *seqh) { + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle; + return (handle->now_s > seqh->end_time_s); } /* * Releases the database reference from the handle for loading metrics. */ -void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrddim_handle) +void rrdeng_load_metric_finalize(struct storage_engine_query_handle *seqh) { - struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle; + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle; if (handle->page) { pgc_page_release(main_cache, handle->page); @@ -901,24 +921,24 @@ void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrddim_hand unregister_query_handle(handle); rrdeng_query_handle_release(handle); - rrddim_handle->handle = NULL; + seqh->handle = NULL; netdata_thread_enable_cancelability(); } -time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *rrddim_handle) { - struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)rrddim_handle->handle; +time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *seqh) { + struct rrdeng_query_handle *handle = (struct rrdeng_query_handle *)seqh->handle; if(handle->pdc) { rrdeng_prep_wait(handle->pdc); - if (handle->pdc->optimal_end_time_s > rrddim_handle->end_time_s) - rrddim_handle->end_time_s = handle->pdc->optimal_end_time_s; + if (handle->pdc->optimal_end_time_s > seqh->end_time_s) + seqh->end_time_s = handle->pdc->optimal_end_time_s; } - return rrddim_handle->end_time_s; + return seqh->end_time_s; } -time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { - METRIC *metric = (METRIC *)db_metric_handle; +time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *smh) { + METRIC *metric = (METRIC *)smh; time_t latest_time_s = 0; if (metric) @@ -927,8 +947,8 @@ time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { return latest_time_s; } -time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { - METRIC *metric = (METRIC *)db_metric_handle; +time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *smh) { + METRIC *metric = (METRIC *)smh; time_t oldest_time_s = 0; if (metric) @@ -937,9 +957,9 @@ time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle) { return oldest_time_s; } -bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *db_instance, uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s) +bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s) { - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; if (unlikely(!ctx)) { netdata_log_error("DBENGINE: invalid STORAGE INSTANCE to %s()", __FUNCTION__); return false; @@ -949,26 +969,35 @@ bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *db_instance, uuid_t *dim_ if (unlikely(!metric)) return false; - time_t update_every_s; - mrg_metric_get_retention(main_mrg, metric, first_entry_s, last_entry_s, &update_every_s); + mrg_metric_get_retention(main_mrg, metric, first_entry_s, last_entry_s, NULL); mrg_metric_release(main_mrg, metric); return true; } -uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *db_instance) { - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; +uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *si) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; return ctx->config.max_disk_space; } -uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *db_instance) { - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; +uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *si) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; return __atomic_load_n(&ctx->atomic.current_disk_space, __ATOMIC_RELAXED); } -time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *db_instance) { - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; +uint64_t rrdeng_metrics(STORAGE_INSTANCE *si) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; + return __atomic_load_n(&ctx->atomic.metrics, __ATOMIC_RELAXED); +} + +uint64_t rrdeng_samples(STORAGE_INSTANCE *si) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; + return __atomic_load_n(&ctx->atomic.samples, __ATOMIC_RELAXED); +} + +time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *si) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; time_t t = __atomic_load_n(&ctx->atomic.first_time_s, __ATOMIC_RELAXED); if(t == LONG_MAX || t < 0) @@ -977,8 +1006,8 @@ time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *db_instance) { return t; } -size_t rrdeng_currently_collected_metrics(STORAGE_INSTANCE *db_instance) { - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; +size_t rrdeng_currently_collected_metrics(STORAGE_INSTANCE *si) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; return __atomic_load_n(&ctx->atomic.collectors_running, __ATOMIC_RELAXED); } @@ -1099,8 +1128,8 @@ void rrdeng_readiness_wait(struct rrdengine_instance *ctx) { netdata_log_info("DBENGINE: tier %d is ready for data collection and queries", ctx->config.tier); } -bool rrdeng_is_legacy(STORAGE_INSTANCE *db_instance) { - struct rrdengine_instance *ctx = (struct rrdengine_instance *)db_instance; +bool rrdeng_is_legacy(STORAGE_INSTANCE *si) { + struct rrdengine_instance *ctx = (struct rrdengine_instance *)si; return ctx->config.legacy; } @@ -1142,7 +1171,7 @@ int rrdeng_init(struct rrdengine_instance **ctxp, const char *dbfiles_path, ctx->config.tier = (int)tier; ctx->config.page_type = tier_page_type[tier]; - ctx->config.global_compress_alg = RRD_LZ4; + ctx->config.global_compress_alg = dbengine_default_compression(); if (disk_space_mb < RRDENG_MIN_DISK_SPACE_MB) disk_space_mb = RRDENG_MIN_DISK_SPACE_MB; ctx->config.max_disk_space = disk_space_mb * 1048576LLU; @@ -1154,6 +1183,8 @@ int rrdeng_init(struct rrdengine_instance **ctxp, const char *dbfiles_path, rw_spinlock_init(&ctx->njfv2idx.spinlock); ctx->atomic.first_time_s = LONG_MAX; + ctx->atomic.metrics = 0; + ctx->atomic.samples = 0; if (rrdeng_dbengine_spawn(ctx) && !init_rrd_files(ctx)) { // success - we run this ctx too diff --git a/database/engine/rrdengineapi.h b/src/database/engine/rrdengineapi.h index 7ae0e7079..fb449cd9b 100644 --- a/database/engine/rrdengineapi.h +++ b/src/database/engine/rrdengineapi.h @@ -26,32 +26,32 @@ extern uint8_t tier_page_type[]; void rrdeng_generate_legacy_uuid(const char *dim_id, const char *chart_id, uuid_t *ret_uuid); -STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *db_instance); -STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); -void rrdeng_metric_release(STORAGE_METRIC_HANDLE *db_metric_handle); -STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *db_metric_handle); - -STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg); -void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle); -void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every); -void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time_ut, NETDATA_DOUBLE n, +STORAGE_METRIC_HANDLE *rrdeng_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *si); +STORAGE_METRIC_HANDLE *rrdeng_metric_get(STORAGE_INSTANCE *si, uuid_t *uuid); +void rrdeng_metric_release(STORAGE_METRIC_HANDLE *smh); +STORAGE_METRIC_HANDLE *rrdeng_metric_dup(STORAGE_METRIC_HANDLE *smh); + +STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *smh, uint32_t update_every, STORAGE_METRICS_GROUP *smg); +void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *sch); +void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *sch, int update_every); +void rrdeng_store_metric_next(STORAGE_COLLECT_HANDLE *sch, usec_t point_in_time_ut, NETDATA_DOUBLE n, NETDATA_DOUBLE min_value, NETDATA_DOUBLE max_value, uint16_t count, uint16_t anomaly_count, SN_FLAGS flags); -int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle); +int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *sch); -void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *rrddim_handle, +void rrdeng_load_metric_init(STORAGE_METRIC_HANDLE *smh, struct storage_engine_query_handle *seqh, time_t start_time_s, time_t end_time_s, STORAGE_PRIORITY priority); -STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle); +STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *seqh); -int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrddim_handle); -void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrddim_handle); -time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle); -time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle); -time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *rrddim_handle); +int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *seqh); +void rrdeng_load_metric_finalize(struct storage_engine_query_handle *seqh); +time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *smh); +time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *smh); +time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *seqh); void rrdeng_get_37_statistics(struct rrdengine_instance *ctx, unsigned long long *array); @@ -64,10 +64,10 @@ void rrdeng_exit_mode(struct rrdengine_instance *ctx); int rrdeng_exit(struct rrdengine_instance *ctx); void rrdeng_prepare_exit(struct rrdengine_instance *ctx); -bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *db_instance, uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s); +bool rrdeng_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *dim_uuid, time_t *first_entry_s, time_t *last_entry_s); -extern STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); -extern void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg); +extern STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *si, uuid_t *uuid); +extern void rrdeng_metrics_group_release(STORAGE_INSTANCE *si, STORAGE_METRICS_GROUP *smg); typedef struct rrdengine_size_statistics { size_t default_granularity_secs; @@ -221,9 +221,6 @@ struct rrdeng_cache_efficiency_stats rrdeng_get_cache_efficiency_stats(void); RRDENG_SIZE_STATS rrdeng_size_statistics(struct rrdengine_instance *ctx); size_t rrdeng_collectors_running(struct rrdengine_instance *ctx); -bool rrdeng_is_legacy(STORAGE_INSTANCE *db_instance); - -uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *db_instance); -uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *db_instance); +bool rrdeng_is_legacy(STORAGE_INSTANCE *si); #endif /* NETDATA_RRDENGINEAPI_H */ diff --git a/database/engine/rrdenginelib.c b/src/database/engine/rrdenginelib.c index dc581d98d..dc581d98d 100644 --- a/database/engine/rrdenginelib.c +++ b/src/database/engine/rrdenginelib.c diff --git a/database/engine/rrdenginelib.h b/src/database/engine/rrdenginelib.h index a0febd4f4..a0febd4f4 100644 --- a/database/engine/rrdenginelib.h +++ b/src/database/engine/rrdenginelib.h diff --git a/src/database/ram/README.md b/src/database/ram/README.md new file mode 100644 index 000000000..6ece6d0f4 --- /dev/null +++ b/src/database/ram/README.md @@ -0,0 +1,11 @@ +<!-- +title: "RAM database modes" +description: "Netdata's RAM database modes." +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/database/ram/README.md +sidebar_label: "RAM database modes" +learn_status: "Published" +learn_topic_type: "References" +learn_rel_path: "Developers/Database" +--> + +# RAM database modes diff --git a/src/database/ram/rrddim_mem.c b/src/database/ram/rrddim_mem.c new file mode 100644 index 000000000..718dd357c --- /dev/null +++ b/src/database/ram/rrddim_mem.c @@ -0,0 +1,437 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrddim_mem.h" +#include "Judy.h" + +static Pvoid_t rrddim_JudyHS_array = NULL; +static netdata_rwlock_t rrddim_JudyHS_rwlock = NETDATA_RWLOCK_INITIALIZER; + +// ---------------------------------------------------------------------------- +// metrics groups + +STORAGE_METRICS_GROUP *rrddim_metrics_group_get(STORAGE_INSTANCE *si __maybe_unused, uuid_t *uuid __maybe_unused) { + return NULL; +} + +void rrddim_metrics_group_release(STORAGE_INSTANCE *si __maybe_unused, STORAGE_METRICS_GROUP *smg __maybe_unused) { + // if(!smg) return; // smg may be NULL + ; +} + +// ---------------------------------------------------------------------------- +// RRDDIM legacy data collection functions + +struct mem_metric_handle { + RRDDIM *rd; + + size_t counter; + size_t entries; + size_t current_entry; + time_t last_updated_s; + time_t update_every_s; + + int32_t refcount; +}; + +static void update_metric_handle_from_rrddim(struct mem_metric_handle *mh, RRDDIM *rd) { + mh->counter = rd->rrdset->counter; + mh->entries = rd->rrdset->db.entries; + mh->current_entry = rd->rrdset->db.current_entry; + mh->last_updated_s = rd->rrdset->last_updated.tv_sec; + mh->update_every_s = rd->rrdset->update_every; +} + +static void check_metric_handle_from_rrddim(struct mem_metric_handle *mh) { + RRDDIM *rd = mh->rd; (void)rd; + internal_fatal(mh->entries != (size_t)rd->rrdset->db.entries, "RRDDIM: entries do not match"); + internal_fatal(mh->update_every_s != rd->rrdset->update_every, "RRDDIM: update every does not match"); +} + +STORAGE_METRIC_HANDLE * +rrddim_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *si __maybe_unused) { + struct mem_metric_handle *mh = (struct mem_metric_handle *)rrddim_metric_get(si, &rd->metric_uuid); + while(!mh) { + netdata_rwlock_wrlock(&rrddim_JudyHS_rwlock); + Pvoid_t *PValue = JudyHSIns(&rrddim_JudyHS_array, &rd->metric_uuid, sizeof(uuid_t), PJE0); + mh = *PValue; + if(!mh) { + mh = callocz(1, sizeof(struct mem_metric_handle)); + mh->rd = rd; + mh->refcount = 1; + update_metric_handle_from_rrddim(mh, rd); + *PValue = mh; + __atomic_add_fetch(&rrddim_db_memory_size, sizeof(struct mem_metric_handle) + JUDYHS_INDEX_SIZE_ESTIMATE(sizeof(uuid_t)), __ATOMIC_RELAXED); + } + else { + if(__atomic_add_fetch(&mh->refcount, 1, __ATOMIC_RELAXED) <= 0) + mh = NULL; + } + netdata_rwlock_unlock(&rrddim_JudyHS_rwlock); + } + + internal_fatal(mh->rd != rd, "RRDDIM_MEM: incorrect pointer returned from index."); + + return (STORAGE_METRIC_HANDLE *)mh; +} + +STORAGE_METRIC_HANDLE * +rrddim_metric_get(STORAGE_INSTANCE *si __maybe_unused, uuid_t *uuid) { + struct mem_metric_handle *mh = NULL; + netdata_rwlock_rdlock(&rrddim_JudyHS_rwlock); + Pvoid_t *PValue = JudyHSGet(rrddim_JudyHS_array, uuid, sizeof(uuid_t)); + if (likely(NULL != PValue)) { + mh = *PValue; + if(__atomic_add_fetch(&mh->refcount, 1, __ATOMIC_RELAXED) <= 0) + mh = NULL; + } + netdata_rwlock_unlock(&rrddim_JudyHS_rwlock); + + return (STORAGE_METRIC_HANDLE *)mh; +} + +STORAGE_METRIC_HANDLE *rrddim_metric_dup(STORAGE_METRIC_HANDLE *smh) { + struct mem_metric_handle *mh = (struct mem_metric_handle *)smh; + __atomic_add_fetch(&mh->refcount, 1, __ATOMIC_RELAXED); + return smh; +} + +void rrddim_metric_release(STORAGE_METRIC_HANDLE *smh __maybe_unused) { + struct mem_metric_handle *mh = (struct mem_metric_handle *)smh; + + if(__atomic_sub_fetch(&mh->refcount, 1, __ATOMIC_RELAXED) == 0) { + // we are the last one holding this + + int32_t expected = 0; + if(__atomic_compare_exchange_n(&mh->refcount, &expected, -99999, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { + // we can delete it + + RRDDIM *rd = mh->rd; + netdata_rwlock_wrlock(&rrddim_JudyHS_rwlock); + JudyHSDel(&rrddim_JudyHS_array, &rd->metric_uuid, sizeof(uuid_t), PJE0); + netdata_rwlock_unlock(&rrddim_JudyHS_rwlock); + + freez(mh); + __atomic_sub_fetch(&rrddim_db_memory_size, sizeof(struct mem_metric_handle) + JUDYHS_INDEX_SIZE_ESTIMATE(sizeof(uuid_t)), __ATOMIC_RELAXED); + } + } +} + +bool rrddim_metric_retention_by_uuid(STORAGE_INSTANCE *si __maybe_unused, uuid_t *uuid, time_t *first_entry_s, time_t *last_entry_s) { + STORAGE_METRIC_HANDLE *smh = rrddim_metric_get(si, uuid); + if(!smh) + return false; + + *first_entry_s = rrddim_query_oldest_time_s(smh); + *last_entry_s = rrddim_query_latest_time_s(smh); + + return true; +} + +void rrddim_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *sch, int update_every) { + struct mem_collect_handle *ch = (struct mem_collect_handle *)sch; + struct mem_metric_handle *mh = (struct mem_metric_handle *)ch->smh; + + rrddim_store_metric_flush(sch); + mh->update_every_s = update_every; +} + +STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *smh, uint32_t update_every __maybe_unused, STORAGE_METRICS_GROUP *smg __maybe_unused) { + struct mem_metric_handle *mh = (struct mem_metric_handle *)smh; + RRDDIM *rd = mh->rd; + + update_metric_handle_from_rrddim(mh, rd); + internal_fatal((uint32_t)mh->update_every_s != update_every, "RRDDIM: update requested does not match the dimension"); + + struct mem_collect_handle *ch = callocz(1, sizeof(struct mem_collect_handle)); + ch->common.seb = STORAGE_ENGINE_BACKEND_RRDDIM; + ch->rd = rd; + ch->smh = smh; + + __atomic_add_fetch(&rrddim_db_memory_size, sizeof(struct mem_collect_handle), __ATOMIC_RELAXED); + + return (STORAGE_COLLECT_HANDLE *)ch; +} + +void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *sch) { + struct mem_collect_handle *ch = (struct mem_collect_handle *)sch; + struct mem_metric_handle *mh = (struct mem_metric_handle *)ch->smh; + + RRDDIM *rd = mh->rd; + size_t entries = mh->entries; + storage_number empty = pack_storage_number(NAN, SN_FLAG_NONE); + + for(size_t i = 0; i < entries ;i++) + rd->db.data[i] = empty; + + mh->counter = 0; + mh->last_updated_s = 0; + mh->current_entry = 0; +} + +static inline void rrddim_fill_the_gap(STORAGE_COLLECT_HANDLE *sch, time_t now_collect_s) { + struct mem_collect_handle *ch = (struct mem_collect_handle *)sch; + struct mem_metric_handle *mh = (struct mem_metric_handle *)ch->smh; + + RRDDIM *rd = mh->rd; + + internal_fatal(ch->rd != mh->rd, "RRDDIM: dimensions do not match"); + check_metric_handle_from_rrddim(mh); + + size_t entries = mh->entries; + time_t update_every_s = mh->update_every_s; + time_t last_stored_s = mh->last_updated_s; + size_t gap_entries = (now_collect_s - last_stored_s) / update_every_s; + if(gap_entries >= entries) + rrddim_store_metric_flush(sch); + + else { + storage_number empty = pack_storage_number(NAN, SN_FLAG_NONE); + size_t current_entry = mh->current_entry; + time_t now_store_s = last_stored_s + update_every_s; + + // fill the dimension + size_t c; + for(c = 0; c < entries && now_store_s <= now_collect_s ; now_store_s += update_every_s, c++) { + rd->db.data[current_entry++] = empty; + + if(unlikely(current_entry >= entries)) + current_entry = 0; + } + mh->counter += c; + mh->current_entry = current_entry; + mh->last_updated_s = now_store_s; + } +} + +void rrddim_collect_store_metric(STORAGE_COLLECT_HANDLE *sch, + usec_t point_in_time_ut, + NETDATA_DOUBLE n, + NETDATA_DOUBLE min_value __maybe_unused, + NETDATA_DOUBLE max_value __maybe_unused, + uint16_t count __maybe_unused, + uint16_t anomaly_count __maybe_unused, + SN_FLAGS flags) +{ + struct mem_collect_handle *ch = (struct mem_collect_handle *)sch; + struct mem_metric_handle *mh = (struct mem_metric_handle *)ch->smh; + + RRDDIM *rd = ch->rd; + time_t point_in_time_s = (time_t)(point_in_time_ut / USEC_PER_SEC); + + internal_fatal(ch->rd != mh->rd, "RRDDIM: dimensions do not match"); + check_metric_handle_from_rrddim(mh); + + if(unlikely(point_in_time_s <= mh->last_updated_s)) + return; + + if(unlikely(mh->last_updated_s && point_in_time_s - mh->update_every_s > mh->last_updated_s)) + rrddim_fill_the_gap(sch, point_in_time_s); + + rd->db.data[mh->current_entry] = pack_storage_number(n, flags); + mh->counter++; + mh->current_entry = (mh->current_entry + 1) >= mh->entries ? 0 : mh->current_entry + 1; + mh->last_updated_s = point_in_time_s; +} + +int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *sch) { + freez(sch); + __atomic_sub_fetch(&rrddim_db_memory_size, sizeof(struct mem_collect_handle), __ATOMIC_RELAXED); + return 0; +} + +// ---------------------------------------------------------------------------- + +// get the total duration in seconds of the round-robin database +#define metric_duration(mh) (( (time_t)(mh)->counter >= (time_t)(mh)->entries ? (time_t)(mh)->entries : (time_t)(mh)->counter ) * (time_t)(mh)->update_every_s) + +// get the last slot updated in the round-robin database +#define rrddim_last_slot(mh) ((size_t)(((mh)->current_entry == 0) ? (mh)->entries - 1 : (mh)->current_entry - 1)) + +// return the slot that has the oldest value +#define rrddim_first_slot(mh) ((size_t)((mh)->counter >= (size_t)(mh)->entries ? (mh)->current_entry : 0)) + +// get the slot of the round-robin database, for the given timestamp (t) +// it always returns a valid slot, although it may not be for the time requested if the time is outside the round-robin database +// only valid when not using dbengine +static inline size_t rrddim_time2slot(STORAGE_METRIC_HANDLE *smh, time_t t) { + struct mem_metric_handle *mh = (struct mem_metric_handle *)smh; + RRDDIM *rd = mh->rd; + + size_t ret = 0; + time_t last_entry_s = rrddim_query_latest_time_s(smh); + time_t first_entry_s = rrddim_query_oldest_time_s(smh); + size_t entries = mh->entries; + size_t first_slot = rrddim_first_slot(mh); + size_t last_slot = rrddim_last_slot(mh); + size_t update_every = mh->update_every_s; + + if(t >= last_entry_s) { + // the requested time is after the last entry we have + ret = last_slot; + } + else { + if(t <= first_entry_s) { + // the requested time is before the first entry we have + ret = first_slot; + } + else { + if(last_slot >= (size_t)((last_entry_s - t) / update_every)) + ret = last_slot - ((last_entry_s - t) / update_every); + else + ret = last_slot - ((last_entry_s - t) / update_every) + entries; + } + } + + if(unlikely(ret >= entries)) { + netdata_log_error("INTERNAL ERROR: rrddim_time2slot() on %s returns values outside entries", rrddim_name(rd)); + ret = entries - 1; + } + + return ret; +} + +// get the timestamp of a specific slot in the round-robin database +// only valid when not using dbengine +static inline time_t rrddim_slot2time(STORAGE_METRIC_HANDLE *smh, size_t slot) { + struct mem_metric_handle *mh = (struct mem_metric_handle *)smh; + RRDDIM *rd = mh->rd; + + time_t ret; + time_t last_entry_s = rrddim_query_latest_time_s(smh); + time_t first_entry_s = rrddim_query_oldest_time_s(smh); + size_t entries = mh->entries; + size_t last_slot = rrddim_last_slot(mh); + size_t update_every = mh->update_every_s; + + if(slot >= entries) { + netdata_log_error("INTERNAL ERROR: caller of rrddim_slot2time() gives invalid slot %zu", slot); + slot = entries - 1; + } + + if(slot > last_slot) + ret = last_entry_s - (time_t)(update_every * (last_slot - slot + entries)); + else + ret = last_entry_s - (time_t)(update_every * (last_slot - slot)); + + if(unlikely(ret < first_entry_s)) { + netdata_log_error("INTERNAL ERROR: rrddim_slot2time() on dimension '%s' of chart '%s' returned time (%ld) too far in the past (before first_entry_s %ld) for slot %zu", + rrddim_name(rd), rrdset_id(rd->rrdset), ret, first_entry_s, slot); + + ret = first_entry_s; + } + + if(unlikely(ret > last_entry_s)) { + netdata_log_error("INTERNAL ERROR: rrddim_slot2time() on dimension '%s' of chart '%s' returned time (%ld) too far into the future (after last_entry_s %ld) for slot %zu", + rrddim_name(rd), rrdset_id(rd->rrdset), ret, last_entry_s, slot); + + ret = last_entry_s; + } + + return ret; +} + +// ---------------------------------------------------------------------------- +// RRDDIM legacy database query functions + +void rrddim_query_init(STORAGE_METRIC_HANDLE *smh, struct storage_engine_query_handle *seqh, time_t start_time_s, time_t end_time_s, STORAGE_PRIORITY priority __maybe_unused) { + struct mem_metric_handle *mh = (struct mem_metric_handle *)smh; + + check_metric_handle_from_rrddim(mh); + + seqh->start_time_s = start_time_s; + seqh->end_time_s = end_time_s; + seqh->priority = priority; + seqh->seb = STORAGE_ENGINE_BACKEND_RRDDIM; + struct mem_query_handle* h = mallocz(sizeof(struct mem_query_handle)); + h->smh = smh; + + h->slot = rrddim_time2slot(smh, start_time_s); + h->last_slot = rrddim_time2slot(smh, end_time_s); + h->dt = mh->update_every_s; + + h->next_timestamp = start_time_s; + h->slot_timestamp = rrddim_slot2time(smh, h->slot); + h->last_timestamp = rrddim_slot2time(smh, h->last_slot); + + // netdata_log_info("RRDDIM QUERY INIT: start %ld, end %ld, next %ld, first %ld, last %ld, dt %ld", start_time, end_time, h->next_timestamp, h->slot_timestamp, h->last_timestamp, h->dt); + + __atomic_add_fetch(&rrddim_db_memory_size, sizeof(struct mem_query_handle), __ATOMIC_RELAXED); + seqh->handle = (STORAGE_QUERY_HANDLE *)h; +} + +// Returns the metric and sets its timestamp into current_time +// IT IS REQUIRED TO **ALWAYS** SET ALL RETURN VALUES (current_time, end_time, flags) +// IT IS REQUIRED TO **ALWAYS** KEEP TRACK OF TIME, EVEN OUTSIDE THE DATABASE BOUNDARIES +STORAGE_POINT rrddim_query_next_metric(struct storage_engine_query_handle *seqh) { + struct mem_query_handle* h = (struct mem_query_handle*)seqh->handle; + struct mem_metric_handle *mh = (struct mem_metric_handle *)h->smh; + RRDDIM *rd = mh->rd; + + size_t entries = mh->entries; + size_t slot = h->slot; + + STORAGE_POINT sp; + sp.count = 1; + + time_t this_timestamp = h->next_timestamp; + h->next_timestamp += h->dt; + + // set this timestamp for our caller + sp.start_time_s = this_timestamp - h->dt; + sp.end_time_s = this_timestamp; + + if(unlikely(this_timestamp < h->slot_timestamp)) { + storage_point_empty(sp, sp.start_time_s, sp.end_time_s); + return sp; + } + + if(unlikely(this_timestamp > h->last_timestamp)) { + storage_point_empty(sp, sp.start_time_s, sp.end_time_s); + return sp; + } + + storage_number n = rd->db.data[slot++]; + if(unlikely(slot >= entries)) slot = 0; + + h->slot = slot; + h->slot_timestamp += h->dt; + + sp.anomaly_count = is_storage_number_anomalous(n) ? 1 : 0; + sp.flags = (n & SN_USER_FLAGS); + sp.min = sp.max = sp.sum = unpack_storage_number(n); + + return sp; +} + +int rrddim_query_is_finished(struct storage_engine_query_handle *seqh) { + struct mem_query_handle *h = (struct mem_query_handle*)seqh->handle; + return (h->next_timestamp > seqh->end_time_s); +} + +void rrddim_query_finalize(struct storage_engine_query_handle *seqh) { +#ifdef NETDATA_INTERNAL_CHECKS + struct mem_query_handle *h = (struct mem_query_handle*)seqh->handle; + struct mem_metric_handle *mh = (struct mem_metric_handle *)h->smh; + + internal_error(!rrddim_query_is_finished(seqh), + "QUERY: query for chart '%s' dimension '%s' has been stopped unfinished", + rrdset_id(mh->rd->rrdset), rrddim_name(mh->rd)); + +#endif + freez(seqh->handle); + __atomic_sub_fetch(&rrddim_db_memory_size, sizeof(struct mem_query_handle), __ATOMIC_RELAXED); +} + +time_t rrddim_query_align_to_optimal_before(struct storage_engine_query_handle *seqh) { + return seqh->end_time_s; +} + +time_t rrddim_query_latest_time_s(STORAGE_METRIC_HANDLE *smh) { + struct mem_metric_handle *mh = (struct mem_metric_handle *)smh; + return mh->last_updated_s; +} + +time_t rrddim_query_oldest_time_s(STORAGE_METRIC_HANDLE *smh) { + struct mem_metric_handle *mh = (struct mem_metric_handle *)smh; + return (time_t)(mh->last_updated_s - metric_duration(mh)); +} diff --git a/src/database/ram/rrddim_mem.h b/src/database/ram/rrddim_mem.h new file mode 100644 index 000000000..166b8777a --- /dev/null +++ b/src/database/ram/rrddim_mem.h @@ -0,0 +1,54 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDDIMMEM_H +#define NETDATA_RRDDIMMEM_H + +#include "database/rrd.h" + +struct mem_collect_handle { + struct storage_collect_handle common; // has to be first item + + STORAGE_METRIC_HANDLE *smh; + RRDDIM *rd; +}; + +struct mem_query_handle { + STORAGE_METRIC_HANDLE *smh; + time_t dt; + time_t next_timestamp; + time_t last_timestamp; + time_t slot_timestamp; + size_t slot; + size_t last_slot; +}; + +STORAGE_METRIC_HANDLE *rrddim_metric_get_or_create(RRDDIM *rd, STORAGE_INSTANCE *si); +STORAGE_METRIC_HANDLE *rrddim_metric_get(STORAGE_INSTANCE *si, uuid_t *uuid); +STORAGE_METRIC_HANDLE *rrddim_metric_dup(STORAGE_METRIC_HANDLE *smh); +void rrddim_metric_release(STORAGE_METRIC_HANDLE *smh); + +bool rrddim_metric_retention_by_uuid(STORAGE_INSTANCE *si, uuid_t *uuid, time_t *first_entry_s, time_t *last_entry_s); + +STORAGE_METRICS_GROUP *rrddim_metrics_group_get(STORAGE_INSTANCE *si, uuid_t *uuid); +void rrddim_metrics_group_release(STORAGE_INSTANCE *si, STORAGE_METRICS_GROUP *smg); + +STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *smh, uint32_t update_every, STORAGE_METRICS_GROUP *smg); +void rrddim_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *sch, int update_every); +void rrddim_collect_store_metric(STORAGE_COLLECT_HANDLE *sch, usec_t point_in_time_ut, NETDATA_DOUBLE n, + NETDATA_DOUBLE min_value, + NETDATA_DOUBLE max_value, + uint16_t count, + uint16_t anomaly_count, + SN_FLAGS flags); +void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *sch); +int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *sch); + +void rrddim_query_init(STORAGE_METRIC_HANDLE *smh, struct storage_engine_query_handle *seqh, time_t start_time_s, time_t end_time_s, STORAGE_PRIORITY priority); +STORAGE_POINT rrddim_query_next_metric(struct storage_engine_query_handle *seqh); +int rrddim_query_is_finished(struct storage_engine_query_handle *seqh); +void rrddim_query_finalize(struct storage_engine_query_handle *seqh); +time_t rrddim_query_latest_time_s(STORAGE_METRIC_HANDLE *smh); +time_t rrddim_query_oldest_time_s(STORAGE_METRIC_HANDLE *smh); +time_t rrddim_query_align_to_optimal_before(struct storage_engine_query_handle *seqh); + +#endif diff --git a/database/rrd.c b/src/database/rrd.c index 5b7752a5e..b664ad3ae 100644 --- a/database/rrd.c +++ b/src/database/rrd.c @@ -19,7 +19,7 @@ int default_rrd_history_entries = RRD_DEFAULT_HISTORY_ENTRIES; #ifdef ENABLE_DBENGINE RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_DBENGINE; #else -RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_SAVE; +RRD_MEMORY_MODE default_rrd_memory_mode = RRD_MEMORY_MODE_RAM; #endif int gap_when_lost_iterations_above = 1; @@ -32,15 +32,9 @@ inline const char *rrd_memory_mode_name(RRD_MEMORY_MODE id) { case RRD_MEMORY_MODE_RAM: return RRD_MEMORY_MODE_RAM_NAME; - case RRD_MEMORY_MODE_MAP: - return RRD_MEMORY_MODE_MAP_NAME; - case RRD_MEMORY_MODE_NONE: return RRD_MEMORY_MODE_NONE_NAME; - case RRD_MEMORY_MODE_SAVE: - return RRD_MEMORY_MODE_SAVE_NAME; - case RRD_MEMORY_MODE_ALLOC: return RRD_MEMORY_MODE_ALLOC_NAME; @@ -53,7 +47,7 @@ inline const char *rrd_memory_mode_name(RRD_MEMORY_MODE id) { return eng->name; } - return RRD_MEMORY_MODE_SAVE_NAME; + return RRD_MEMORY_MODE_RAM_NAME; } RRD_MEMORY_MODE rrd_memory_mode_id(const char *name) { @@ -62,7 +56,7 @@ RRD_MEMORY_MODE rrd_memory_mode_id(const char *name) { return eng->id; } - return RRD_MEMORY_MODE_SAVE; + return RRD_MEMORY_MODE_RAM; } @@ -133,28 +127,6 @@ const char *rrdset_type_name(RRDSET_TYPE chart_type) { } // ---------------------------------------------------------------------------- -// RRD - cache directory - -char *rrdhost_cache_dir_for_rrdset_alloc(RRDHOST *host, const char *id) { - char *ret = NULL; - - char b[FILENAME_MAX + 1]; - char n[FILENAME_MAX + 1]; - rrdset_strncpyz_name(b, id, FILENAME_MAX); - - snprintfz(n, FILENAME_MAX, "%s/%s", host->cache_dir, b); - ret = strdupz(n); - - if(host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE) { - int r = mkdir(ret, 0775); - if(r != 0 && errno != EEXIST) - netdata_log_error("Cannot create directory '%s'", ret); - } - - return ret; -} - -// ---------------------------------------------------------------------------- // RRD - string management STRING *rrd_string_strdupz(const char *s) { diff --git a/database/rrd.h b/src/database/rrd.h index 5f4bee037..3295f036c 100644 --- a/database/rrd.h +++ b/src/database/rrd.h @@ -20,14 +20,11 @@ typedef struct rrdhost RRDHOST; typedef struct rrddim RRDDIM; typedef struct rrdset RRDSET; typedef struct rrdcalc RRDCALC; -typedef struct rrdcalctemplate RRDCALCTEMPLATE; typedef struct alarm_entry ALARM_ENTRY; typedef struct rrdlabels RRDLABELS; -typedef struct rrdfamily_acquired RRDFAMILY_ACQUIRED; typedef struct rrdvar_acquired RRDVAR_ACQUIRED; -typedef struct rrdsetvar_acquired RRDSETVAR_ACQUIRED; typedef struct rrdcalc_acquired RRDCALC_ACQUIRED; typedef struct rrdhost_acquired RRDHOST_ACQUIRED; @@ -68,9 +65,7 @@ typedef enum __attribute__ ((__packed__)) storage_priority { struct rrddim_tier; #ifdef ENABLE_DBENGINE -struct rrdeng_page_descr; struct rrdengine_instance; -struct pg_cache_page_index; #endif // ---------------------------------------------------------------------------- @@ -79,8 +74,6 @@ struct pg_cache_page_index; typedef enum __attribute__ ((__packed__)) rrd_memory_mode { RRD_MEMORY_MODE_NONE = 0, RRD_MEMORY_MODE_RAM = 1, - RRD_MEMORY_MODE_MAP = 2, - RRD_MEMORY_MODE_SAVE = 3, RRD_MEMORY_MODE_ALLOC = 4, RRD_MEMORY_MODE_DBENGINE = 5, @@ -89,8 +82,6 @@ typedef enum __attribute__ ((__packed__)) rrd_memory_mode { #define RRD_MEMORY_MODE_NONE_NAME "none" #define RRD_MEMORY_MODE_RAM_NAME "ram" -#define RRD_MEMORY_MODE_MAP_NAME "map" -#define RRD_MEMORY_MODE_SAVE_NAME "save" #define RRD_MEMORY_MODE_ALLOC_NAME "alloc" #define RRD_MEMORY_MODE_DBENGINE_NAME "dbengine" @@ -110,11 +101,8 @@ struct ml_metrics_statistics { #include "daemon/common.h" #include "web/api/queries/query.h" #include "web/api/queries/rrdr.h" -#include "rrdvar.h" -#include "rrdsetvar.h" -#include "rrddimvar.h" -#include "rrdcalc.h" -#include "rrdcalctemplate.h" +#include "health/rrdvar.h" +#include "health/rrdcalc.h" #include "rrdlabels.h" #include "streaming/rrdpush.h" #include "aclk/aclk_rrdhost_state.h" @@ -134,7 +122,7 @@ struct storage_engine_query_handle { time_t start_time_s; time_t end_time_s; STORAGE_PRIORITY priority; - STORAGE_ENGINE_BACKEND backend; + STORAGE_ENGINE_BACKEND seb; STORAGE_QUERY_HANDLE *handle; }; @@ -156,7 +144,6 @@ const char *rrdset_type_name(RRDSET_TYPE chart_type); #include "contexts/rrdcontext.h" -extern bool unittest_running; extern bool dbengine_enabled; extern size_t storage_tiers; extern bool use_direct_io; @@ -194,10 +181,9 @@ extern time_t rrdset_free_obsolete_time_s; extern int libuv_worker_threads; extern bool ieee754_doubles; -#define RRD_ID_LENGTH_MAX 1000 +#define RRD_ID_LENGTH_MAX 1200 typedef long long total_number; -#define TOTAL_NUMBER_FORMAT "%lld" // ---------------------------------------------------------------------------- // algorithms types @@ -220,16 +206,6 @@ RRD_ALGORITHM rrd_algorithm_id(const char *name); const char *rrd_algorithm_name(RRD_ALGORITHM algorithm); // ---------------------------------------------------------------------------- -// RRD FAMILY - -const RRDFAMILY_ACQUIRED *rrdfamily_add_and_acquire(RRDHOST *host, const char *id); -void rrdfamily_release(RRDHOST *host, const RRDFAMILY_ACQUIRED *rfa); -void rrdfamily_index_init(RRDHOST *host); -void rrdfamily_index_destroy(RRDHOST *host); -DICTIONARY *rrdfamily_rrdvars_dict(const RRDFAMILY_ACQUIRED *rf); - - -// ---------------------------------------------------------------------------- // flags & options // options are permanent configuration options (no atomics to alter/access them) @@ -272,7 +248,7 @@ typedef enum __attribute__ ((__packed__)) rrddim_flags { // ---------------------------------------------------------------------------- // engine-specific iterator state for dimension data collection typedef struct storage_collect_handle { - STORAGE_ENGINE_BACKEND backend; + STORAGE_ENGINE_BACKEND seb; } STORAGE_COLLECT_HANDLE; // ---------------------------------------------------------------------------- @@ -280,11 +256,12 @@ typedef struct storage_collect_handle { struct rrddim_tier { STORAGE_POINT virtual_point; - STORAGE_ENGINE_BACKEND backend; + STORAGE_ENGINE_BACKEND seb; + SPINLOCK spinlock; uint32_t tier_grouping; time_t next_point_end_time_s; - STORAGE_METRIC_HANDLE *db_metric_handle; // the metric handle inside the database - STORAGE_COLLECT_HANDLE *db_collection_handle; // the data collection handle + STORAGE_METRIC_HANDLE *smh; // the metric handle inside the database + STORAGE_COLLECT_HANDLE *sch; // the data collection handle }; void rrdr_fill_tier_gap_from_smaller_tiers(RRDDIM *rd, size_t tier, time_t now_s); @@ -326,13 +303,12 @@ struct rrddim { #endif // ------------------------------------------------------------------------ - // db mode RAM, SAVE, MAP, ALLOC, NONE specifics + // db mode RAM, ALLOC, NONE specifics // TODO - they should be managed by storage engine // (RRDDIM_DB_STATE ptr to an undefined structure, and a call to clean this up during destruction) struct { size_t memsize; // the memory allocated for this dimension (without RRDDIM) - void *rd_on_file; // pointer to the header written on disk storage_number *data; // the array of values } db; @@ -382,287 +358,292 @@ size_t rrddim_size(void); #define rrddim_set_updated(rd) (rd)->collector.options |= RRDDIM_OPTION_UPDATED #define rrddim_clear_updated(rd) (rd)->collector.options &= ~RRDDIM_OPTION_UPDATED -// returns the RRDDIM cache filename, or NULL if it does not exist -const char *rrddim_cache_filename(RRDDIM *rd); - -// updated the header with the latest RRDDIM value, for memory mode MAP and SAVE -void rrddim_memory_file_update(RRDDIM *rd); - -// free the memory file structures for memory mode MAP and SAVE -void rrddim_memory_file_free(RRDDIM *rd); - -bool rrddim_memory_load_or_create_map_save(RRDSET *st, RRDDIM *rd, RRD_MEMORY_MODE memory_mode); - -// return the v019 header size of RRDDIM files -size_t rrddim_memory_file_header_size(void); - -void rrddim_memory_file_save(RRDDIM *rd); - // ------------------------------------------------------------------------ // DATA COLLECTION STORAGE OPS -STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); -STORAGE_METRICS_GROUP *rrddim_metrics_group_get(STORAGE_INSTANCE *db_instance, uuid_t *uuid); -static inline STORAGE_METRICS_GROUP *storage_engine_metrics_group_get(STORAGE_ENGINE_BACKEND backend __maybe_unused, STORAGE_INSTANCE *db_instance, uuid_t *uuid) { - internal_fatal(!is_valid_backend(backend), "STORAGE: invalid backend"); +STORAGE_METRICS_GROUP *rrdeng_metrics_group_get(STORAGE_INSTANCE *si, uuid_t *uuid); +STORAGE_METRICS_GROUP *rrddim_metrics_group_get(STORAGE_INSTANCE *si, uuid_t *uuid); +static inline STORAGE_METRICS_GROUP *storage_engine_metrics_group_get(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_INSTANCE *si, uuid_t *uuid) { + internal_fatal(!is_valid_backend(seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_metrics_group_get(db_instance, uuid); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_metrics_group_get(si, uuid); #endif - return rrddim_metrics_group_get(db_instance, uuid); + return rrddim_metrics_group_get(si, uuid); } -void rrdeng_metrics_group_release(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg); -void rrddim_metrics_group_release(STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg); -static inline void storage_engine_metrics_group_release(STORAGE_ENGINE_BACKEND backend __maybe_unused, STORAGE_INSTANCE *db_instance, STORAGE_METRICS_GROUP *smg) { - internal_fatal(!is_valid_backend(backend), "STORAGE: invalid backend"); +void rrdeng_metrics_group_release(STORAGE_INSTANCE *si, STORAGE_METRICS_GROUP *smg); +void rrddim_metrics_group_release(STORAGE_INSTANCE *si, STORAGE_METRICS_GROUP *smg); +static inline void storage_engine_metrics_group_release(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_INSTANCE *si, STORAGE_METRICS_GROUP *smg) { + internal_fatal(!is_valid_backend(seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - rrdeng_metrics_group_release(db_instance, smg); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + rrdeng_metrics_group_release(si, smg); else #endif - rrddim_metrics_group_release(db_instance, smg); + rrddim_metrics_group_release(si, smg); } -STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg); -STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg); -static inline STORAGE_COLLECT_HANDLE *storage_metric_store_init(STORAGE_ENGINE_BACKEND backend __maybe_unused, STORAGE_METRIC_HANDLE *db_metric_handle, uint32_t update_every, STORAGE_METRICS_GROUP *smg) { - internal_fatal(!is_valid_backend(backend), "STORAGE: invalid backend"); +STORAGE_COLLECT_HANDLE *rrdeng_store_metric_init(STORAGE_METRIC_HANDLE *smh, uint32_t update_every, STORAGE_METRICS_GROUP *smg); +STORAGE_COLLECT_HANDLE *rrddim_collect_init(STORAGE_METRIC_HANDLE *smh, uint32_t update_every, STORAGE_METRICS_GROUP *smg); +static inline STORAGE_COLLECT_HANDLE *storage_metric_store_init(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_METRIC_HANDLE *smh, uint32_t update_every, STORAGE_METRICS_GROUP *smg) { + internal_fatal(!is_valid_backend(seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_store_metric_init(db_metric_handle, update_every, smg); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_store_metric_init(smh, update_every, smg); #endif - return rrddim_collect_init(db_metric_handle, update_every, smg); + return rrddim_collect_init(smh, update_every, smg); } void rrdeng_store_metric_next( - STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time_ut, + STORAGE_COLLECT_HANDLE *sch, usec_t point_in_time_ut, NETDATA_DOUBLE n, NETDATA_DOUBLE min_value, NETDATA_DOUBLE max_value, uint16_t count, uint16_t anomaly_count, SN_FLAGS flags); void rrddim_collect_store_metric( - STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time_ut, + STORAGE_COLLECT_HANDLE *sch, usec_t point_in_time_ut, NETDATA_DOUBLE n, NETDATA_DOUBLE min_value, NETDATA_DOUBLE max_value, uint16_t count, uint16_t anomaly_count, SN_FLAGS flags); static inline void storage_engine_store_metric( - STORAGE_COLLECT_HANDLE *collection_handle, usec_t point_in_time_ut, + STORAGE_COLLECT_HANDLE *sch, usec_t point_in_time_ut, NETDATA_DOUBLE n, NETDATA_DOUBLE min_value, NETDATA_DOUBLE max_value, uint16_t count, uint16_t anomaly_count, SN_FLAGS flags) { - internal_fatal(!is_valid_backend(collection_handle->backend), "STORAGE: invalid backend"); + internal_fatal(!is_valid_backend(sch->seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(collection_handle->backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_store_metric_next(collection_handle, point_in_time_ut, + if(likely(sch->seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_store_metric_next(sch, point_in_time_ut, n, min_value, max_value, count, anomaly_count, flags); #endif - return rrddim_collect_store_metric(collection_handle, point_in_time_ut, + return rrddim_collect_store_metric(sch, point_in_time_ut, n, min_value, max_value, count, anomaly_count, flags); } -uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *db_instance); -static inline uint64_t storage_engine_disk_space_max(STORAGE_ENGINE_BACKEND backend __maybe_unused, STORAGE_INSTANCE *db_instance __maybe_unused) { +uint64_t rrdeng_disk_space_max(STORAGE_INSTANCE *si); +static inline uint64_t storage_engine_disk_space_max(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_INSTANCE *si __maybe_unused) { +#ifdef ENABLE_DBENGINE + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_disk_space_max(si); +#endif + + return 0; +} + +uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *si); +static inline uint64_t storage_engine_disk_space_used(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_INSTANCE *si __maybe_unused) { #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_disk_space_max(db_instance); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_disk_space_used(si); #endif + // TODO - calculate the total host disk space for memory mode save and map return 0; } -uint64_t rrdeng_disk_space_used(STORAGE_INSTANCE *db_instance); -static inline uint64_t storage_engine_disk_space_used(STORAGE_ENGINE_BACKEND backend __maybe_unused, STORAGE_INSTANCE *db_instance __maybe_unused) { +uint64_t rrdeng_metrics(STORAGE_INSTANCE *si); +static inline uint64_t storage_engine_metrics(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_INSTANCE *si __maybe_unused) { #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_disk_space_used(db_instance); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_metrics(si); #endif // TODO - calculate the total host disk space for memory mode save and map return 0; } -time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *db_instance); -static inline time_t storage_engine_global_first_time_s(STORAGE_ENGINE_BACKEND backend __maybe_unused, STORAGE_INSTANCE *db_instance __maybe_unused) { +uint64_t rrdeng_samples(STORAGE_INSTANCE *si); +static inline uint64_t storage_engine_samples(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_INSTANCE *si __maybe_unused) { +#ifdef ENABLE_DBENGINE + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_samples(si); +#endif + return 0; +} + + +time_t rrdeng_global_first_time_s(STORAGE_INSTANCE *si); +static inline time_t storage_engine_global_first_time_s(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_INSTANCE *si __maybe_unused) { #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_global_first_time_s(db_instance); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_global_first_time_s(si); #endif return now_realtime_sec() - (time_t)(default_rrd_history_entries * default_rrd_update_every); } -size_t rrdeng_currently_collected_metrics(STORAGE_INSTANCE *db_instance); -static inline size_t storage_engine_collected_metrics(STORAGE_ENGINE_BACKEND backend __maybe_unused, STORAGE_INSTANCE *db_instance __maybe_unused) { +size_t rrdeng_currently_collected_metrics(STORAGE_INSTANCE *si); +static inline size_t storage_engine_collected_metrics(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_INSTANCE *si __maybe_unused) { #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_currently_collected_metrics(db_instance); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_currently_collected_metrics(si); #endif // TODO - calculate the total host disk space for memory mode save and map return 0; } -void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *collection_handle); -void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *collection_handle); -static inline void storage_engine_store_flush(STORAGE_COLLECT_HANDLE *collection_handle) { - if(unlikely(!collection_handle)) +void rrdeng_store_metric_flush_current_page(STORAGE_COLLECT_HANDLE *sch); +void rrddim_store_metric_flush(STORAGE_COLLECT_HANDLE *sch); +static inline void storage_engine_store_flush(STORAGE_COLLECT_HANDLE *sch) { + if(unlikely(!sch)) return; - internal_fatal(!is_valid_backend(collection_handle->backend), "STORAGE: invalid backend"); + internal_fatal(!is_valid_backend(sch->seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(collection_handle->backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - rrdeng_store_metric_flush_current_page(collection_handle); + if(likely(sch->seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + rrdeng_store_metric_flush_current_page(sch); else #endif - rrddim_store_metric_flush(collection_handle); + rrddim_store_metric_flush(sch); } -int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *collection_handle); -int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *collection_handle); +int rrdeng_store_metric_finalize(STORAGE_COLLECT_HANDLE *sch); +int rrddim_collect_finalize(STORAGE_COLLECT_HANDLE *sch); // a finalization function to run after collection is over // returns 1 if it's safe to delete the dimension -static inline int storage_engine_store_finalize(STORAGE_COLLECT_HANDLE *collection_handle) { - internal_fatal(!is_valid_backend(collection_handle->backend), "STORAGE: invalid backend"); +static inline int storage_engine_store_finalize(STORAGE_COLLECT_HANDLE *sch) { + internal_fatal(!is_valid_backend(sch->seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(collection_handle->backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_store_metric_finalize(collection_handle); + if(likely(sch->seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_store_metric_finalize(sch); #endif - return rrddim_collect_finalize(collection_handle); + return rrddim_collect_finalize(sch); } -void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every); -void rrddim_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every); -static inline void storage_engine_store_change_collection_frequency(STORAGE_COLLECT_HANDLE *collection_handle, int update_every) { - internal_fatal(!is_valid_backend(collection_handle->backend), "STORAGE: invalid backend"); +void rrdeng_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *sch, int update_every); +void rrddim_store_metric_change_collection_frequency(STORAGE_COLLECT_HANDLE *sch, int update_every); +static inline void storage_engine_store_change_collection_frequency(STORAGE_COLLECT_HANDLE *sch, int update_every) { + internal_fatal(!is_valid_backend(sch->seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(collection_handle->backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - rrdeng_store_metric_change_collection_frequency(collection_handle, update_every); + if(likely(sch->seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + rrdeng_store_metric_change_collection_frequency(sch, update_every); else #endif - rrddim_store_metric_change_collection_frequency(collection_handle, update_every); + rrddim_store_metric_change_collection_frequency(sch, update_every); } // ---------------------------------------------------------------------------- // STORAGE ENGINE QUERY OPS -time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *db_metric_handle); -time_t rrddim_query_oldest_time_s(STORAGE_METRIC_HANDLE *db_metric_handle); -static inline time_t storage_engine_oldest_time_s(STORAGE_ENGINE_BACKEND backend __maybe_unused, STORAGE_METRIC_HANDLE *db_metric_handle) { - internal_fatal(!is_valid_backend(backend), "STORAGE: invalid backend"); +time_t rrdeng_metric_oldest_time(STORAGE_METRIC_HANDLE *smh); +time_t rrddim_query_oldest_time_s(STORAGE_METRIC_HANDLE *smh); +static inline time_t storage_engine_oldest_time_s(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_METRIC_HANDLE *smh) { + internal_fatal(!is_valid_backend(seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_metric_oldest_time(db_metric_handle); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_metric_oldest_time(smh); #endif - return rrddim_query_oldest_time_s(db_metric_handle); + return rrddim_query_oldest_time_s(smh); } -time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *db_metric_handle); -time_t rrddim_query_latest_time_s(STORAGE_METRIC_HANDLE *db_metric_handle); -static inline time_t storage_engine_latest_time_s(STORAGE_ENGINE_BACKEND backend __maybe_unused, STORAGE_METRIC_HANDLE *db_metric_handle) { - internal_fatal(!is_valid_backend(backend), "STORAGE: invalid backend"); +time_t rrdeng_metric_latest_time(STORAGE_METRIC_HANDLE *smh); +time_t rrddim_query_latest_time_s(STORAGE_METRIC_HANDLE *smh); +static inline time_t storage_engine_latest_time_s(STORAGE_ENGINE_BACKEND seb __maybe_unused, STORAGE_METRIC_HANDLE *smh) { + internal_fatal(!is_valid_backend(seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_metric_latest_time(db_metric_handle); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_metric_latest_time(smh); #endif - return rrddim_query_latest_time_s(db_metric_handle); + return rrddim_query_latest_time_s(smh); } void rrdeng_load_metric_init( - STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *rrddim_handle, + STORAGE_METRIC_HANDLE *smh, struct storage_engine_query_handle *seqh, time_t start_time_s, time_t end_time_s, STORAGE_PRIORITY priority); void rrddim_query_init( - STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *handle, + STORAGE_METRIC_HANDLE *smh, struct storage_engine_query_handle *seqh, time_t start_time_s, time_t end_time_s, STORAGE_PRIORITY priority); static inline void storage_engine_query_init( - STORAGE_ENGINE_BACKEND backend __maybe_unused, - STORAGE_METRIC_HANDLE *db_metric_handle, struct storage_engine_query_handle *handle, + STORAGE_ENGINE_BACKEND seb __maybe_unused, + STORAGE_METRIC_HANDLE *smh, struct storage_engine_query_handle *seqh, time_t start_time_s, time_t end_time_s, STORAGE_PRIORITY priority) { - internal_fatal(!is_valid_backend(backend), "STORAGE: invalid backend"); + internal_fatal(!is_valid_backend(seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - rrdeng_load_metric_init(db_metric_handle, handle, start_time_s, end_time_s, priority); + if(likely(seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + rrdeng_load_metric_init(smh, seqh, start_time_s, end_time_s, priority); else #endif - rrddim_query_init(db_metric_handle, handle, start_time_s, end_time_s, priority); + rrddim_query_init(smh, seqh, start_time_s, end_time_s, priority); } -STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *rrddim_handle); -STORAGE_POINT rrddim_query_next_metric(struct storage_engine_query_handle *handle); -static inline STORAGE_POINT storage_engine_query_next_metric(struct storage_engine_query_handle *handle) { - internal_fatal(!is_valid_backend(handle->backend), "STORAGE: invalid backend"); +STORAGE_POINT rrdeng_load_metric_next(struct storage_engine_query_handle *seqh); +STORAGE_POINT rrddim_query_next_metric(struct storage_engine_query_handle *seqh); +static inline STORAGE_POINT storage_engine_query_next_metric(struct storage_engine_query_handle *seqh) { + internal_fatal(!is_valid_backend(seqh->seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(handle->backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_load_metric_next(handle); + if(likely(seqh->seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_load_metric_next(seqh); #endif - return rrddim_query_next_metric(handle); + return rrddim_query_next_metric(seqh); } -int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *rrddim_handle); -int rrddim_query_is_finished(struct storage_engine_query_handle *handle); -static inline int storage_engine_query_is_finished(struct storage_engine_query_handle *handle) { - internal_fatal(!is_valid_backend(handle->backend), "STORAGE: invalid backend"); +int rrdeng_load_metric_is_finished(struct storage_engine_query_handle *seqh); +int rrddim_query_is_finished(struct storage_engine_query_handle *seqh); +static inline int storage_engine_query_is_finished(struct storage_engine_query_handle *seqh) { + internal_fatal(!is_valid_backend(seqh->seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(handle->backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_load_metric_is_finished(handle); + if(likely(seqh->seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_load_metric_is_finished(seqh); #endif - return rrddim_query_is_finished(handle); + return rrddim_query_is_finished(seqh); } -void rrdeng_load_metric_finalize(struct storage_engine_query_handle *rrddim_handle); -void rrddim_query_finalize(struct storage_engine_query_handle *handle); -static inline void storage_engine_query_finalize(struct storage_engine_query_handle *handle) { - internal_fatal(!is_valid_backend(handle->backend), "STORAGE: invalid backend"); +void rrdeng_load_metric_finalize(struct storage_engine_query_handle *seqh); +void rrddim_query_finalize(struct storage_engine_query_handle *seqh); +static inline void storage_engine_query_finalize(struct storage_engine_query_handle *seqh) { + internal_fatal(!is_valid_backend(seqh->seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(handle->backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - rrdeng_load_metric_finalize(handle); + if(likely(seqh->seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + rrdeng_load_metric_finalize(seqh); else #endif - rrddim_query_finalize(handle); + rrddim_query_finalize(seqh); } -time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *rrddim_handle); -time_t rrddim_query_align_to_optimal_before(struct storage_engine_query_handle *rrddim_handle); -static inline time_t storage_engine_align_to_optimal_before(struct storage_engine_query_handle *handle) { - internal_fatal(!is_valid_backend(handle->backend), "STORAGE: invalid backend"); +time_t rrdeng_load_align_to_optimal_before(struct storage_engine_query_handle *seqh); +time_t rrddim_query_align_to_optimal_before(struct storage_engine_query_handle *seqh); +static inline time_t storage_engine_align_to_optimal_before(struct storage_engine_query_handle *seqh) { + internal_fatal(!is_valid_backend(seqh->seb), "STORAGE: invalid backend"); #ifdef ENABLE_DBENGINE - if(likely(handle->backend == STORAGE_ENGINE_BACKEND_DBENGINE)) - return rrdeng_load_align_to_optimal_before(handle); + if(likely(seqh->seb == STORAGE_ENGINE_BACKEND_DBENGINE)) + return rrdeng_load_align_to_optimal_before(seqh); #endif - return rrddim_query_align_to_optimal_before(handle); + return rrddim_query_align_to_optimal_before(seqh); } // ------------------------------------------------------------------------ // function pointers for all APIs provided by a storage engine typedef struct storage_engine_api { // metric management - STORAGE_METRIC_HANDLE *(*metric_get)(STORAGE_INSTANCE *instance, uuid_t *uuid); - STORAGE_METRIC_HANDLE *(*metric_get_or_create)(RRDDIM *rd, STORAGE_INSTANCE *instance); + STORAGE_METRIC_HANDLE *(*metric_get)(STORAGE_INSTANCE *si, uuid_t *uuid); + STORAGE_METRIC_HANDLE *(*metric_get_or_create)(RRDDIM *rd, STORAGE_INSTANCE *si); void (*metric_release)(STORAGE_METRIC_HANDLE *); STORAGE_METRIC_HANDLE *(*metric_dup)(STORAGE_METRIC_HANDLE *); - bool (*metric_retention_by_uuid)(STORAGE_INSTANCE *db_instance, uuid_t *uuid, time_t *first_entry_s, time_t *last_entry_s); + bool (*metric_retention_by_uuid)(STORAGE_INSTANCE *si, uuid_t *uuid, time_t *first_entry_s, time_t *last_entry_s); } STORAGE_ENGINE_API; typedef struct storage_engine { - STORAGE_ENGINE_BACKEND backend; + STORAGE_ENGINE_BACKEND seb; RRD_MEMORY_MODE id; const char* name; STORAGE_ENGINE_API api; @@ -762,9 +743,6 @@ struct rrdset { int32_t update_every; // data collection frequency RRDLABELS *rrdlabels; // chart labels - DICTIONARY *rrdsetvar_root_index; // chart variables - DICTIONARY *rrddimvar_root_index; // dimension variables - // we use this dictionary to manage their allocation uint32_t version; // the metadata version (auto-increment) @@ -780,7 +758,7 @@ struct rrdset { rrd_ml_chart_t *ml_chart; - STORAGE_METRICS_GROUP *storage_metrics_groups[RRD_STORAGE_TIERS]; + STORAGE_METRICS_GROUP *smg[RRD_STORAGE_TIERS]; // ------------------------------------------------------------------------ // linking to siblings and parents @@ -830,11 +808,7 @@ struct rrdset { // (RRDSET_DB_STATE ptr to an undefined structure, and a call to clean this up during destruction) struct { - char *cache_dir; // the directory to store dimensions - void *st_on_file; // compatibility with V019 RRDSET files - int32_t entries; // total number of entries in the data set - int32_t current_entry; // the entry that is currently being updated // it goes around in a round-robin fashion } db; @@ -855,7 +829,6 @@ struct rrdset { NETDATA_DOUBLE red; // red threshold for this chart DICTIONARY *rrdvars; // RRDVAR index for this chart - const RRDFAMILY_ACQUIRED *rrdfamily; // pointer to RRDFAMILY dictionary item, this chart belongs to struct { RW_SPINLOCK spinlock; // protection for RRDCALC *base @@ -961,12 +934,7 @@ STRING *rrd_string_strdupz(const char *s); #define rrdset_number_of_dimensions(st) \ dictionary_entries((st)->rrddim_root_index) -void rrdset_memory_file_save(RRDSET *st); -void rrdset_memory_file_free(RRDSET *st); -void rrdset_memory_file_update(RRDSET *st); -const char *rrdset_cache_filename(RRDSET *st); -bool rrdset_memory_load_or_create_map_save(RRDSET *st_on_file, RRD_MEMORY_MODE memory_mode); - +#include "rrdcollector.h" #include "rrdfunctions.h" // ---------------------------------------------------------------------------- @@ -1002,19 +970,18 @@ typedef enum __attribute__ ((__packed__)) rrdhost_flags { // ACLK RRDHOST_FLAG_ACLK_STREAM_CONTEXTS = (1 << 21), // when set, we should send ACLK stream context updates - RRDHOST_FLAG_ACLK_STREAM_ALERTS = (1 << 22), // set when the receiver part is disconnected + RRDHOST_FLAG_ACLK_STREAM_ALERTS = (1 << 22), // Host should stream alerts // Metadata RRDHOST_FLAG_METADATA_UPDATE = (1 << 23), // metadata needs to be stored in the database RRDHOST_FLAG_METADATA_LABELS = (1 << 24), // metadata needs to be stored in the database RRDHOST_FLAG_METADATA_INFO = (1 << 25), // metadata needs to be stored in the database - RRDHOST_FLAG_PENDING_CONTEXT_LOAD = (1 << 26), // metadata needs to be stored in the database - RRDHOST_FLAG_CONTEXT_LOAD_IN_PROGRESS = (1 << 27), // metadata needs to be stored in the database + RRDHOST_FLAG_PENDING_CONTEXT_LOAD = (1 << 26), // Context needs to be loaded - RRDHOST_FLAG_METADATA_CLAIMID = (1 << 28), // metadata needs to be stored in the database - RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED = (1 << 29), // set when the receiver part is disconnected + RRDHOST_FLAG_METADATA_CLAIMID = (1 << 27), // metadata needs to be stored in the database + RRDHOST_FLAG_RRDPUSH_RECEIVER_DISCONNECTED = (1 << 28), // set when the receiver part is disconnected - RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED = (1 << 30), // set when the host has updated global functions + RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED = (1 << 29), // set when the host has updated global functions } RRDHOST_FLAGS; #define rrdhost_flag_check(host, flag) (__atomic_load_n(&((host)->flags), __ATOMIC_SEQ_CST) & (flag)) @@ -1035,15 +1002,11 @@ typedef enum __attribute__ ((__packed__)) { // Streaming configuration RRDHOST_OPTION_SENDER_ENABLED = (1 << 2), // set when the host is configured to send metrics to a parent + RRDHOST_OPTION_REPLICATION = (1 << 3), // when set, we support replication for this host - // Configuration options - RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS = (1 << 3), // delete files of obsolete charts - RRDHOST_OPTION_DELETE_ORPHAN_HOST = (1 << 4), // delete the entire host when orphan - - RRDHOST_OPTION_REPLICATION = (1 << 5), // when set, we support replication for this host - - RRDHOST_OPTION_VIRTUAL_HOST = (1 << 6), // when set, this host is a virtual one - RRDHOST_OPTION_EPHEMERAL_HOST = (1 << 7), // when set, this host is an ephemeral one + // Other options + RRDHOST_OPTION_VIRTUAL_HOST = (1 << 4), // when set, this host is a virtual one + RRDHOST_OPTION_EPHEMERAL_HOST = (1 << 5), // when set, this host is an ephemeral one } RRDHOST_OPTIONS; #define rrdhost_option_check(host, flag) ((host)->options & (flag)) @@ -1141,8 +1104,6 @@ typedef struct health { time_t health_delay_up_to; // a timestamp to delay alarms processing up to STRING *health_default_exec; // the full path of the alarms notifications program STRING *health_default_recipient; // the default recipient for all alarms - uint32_t health_default_warn_repeat_every; // the default value for the interval between repeating warning notifications - uint32_t health_default_crit_repeat_every; // the default value for the interval between repeating critical notifications unsigned int health_enabled; // 1 when this host has health enabled bool use_summary_for_notifications; // whether or not to use the summary field as a subject for notifications } HEALTH; @@ -1199,7 +1160,6 @@ struct rrdhost { STRING *hostname; // the hostname of this host STRING *registry_hostname; // the registry hostname for this host STRING *os; // the O/S type of the host - STRING *tags; // tags for this host STRING *timezone; // the timezone of the host STRING *abbrev_timezone; // the abbriviated timezone of the host STRING *program_name; // the program name that collects metrics for this host @@ -1222,7 +1182,7 @@ struct rrdhost { struct { RRD_MEMORY_MODE mode; // the db mode for this tier STORAGE_ENGINE *eng; // the storage engine API for this tier - STORAGE_INSTANCE *instance; // the db instance for this tier + STORAGE_INSTANCE *si; // the db instance for this tier uint32_t tier_grouping; // tier 0 iterations aggregated on this tier } db[RRD_STORAGE_TIERS]; @@ -1301,9 +1261,6 @@ struct rrdhost { // all RRDCALCs are primarily allocated and linked here DICTIONARY *rrdcalc_root_index; - // templates of alarms - DICTIONARY *rrdcalctemplate_root_index; - ALARM_LOG health_log; // alarms historical events (event log) uint32_t health_last_processed_id; // the last processed health id from the log uint32_t health_max_unique_id; // the max alarm log unique id given for the host @@ -1333,7 +1290,6 @@ struct rrdhost { DICTIONARY *rrdset_root_index; // the host's charts index (by id) DICTIONARY *rrdset_root_index_name; // the host's charts index (by name) - DICTIONARY *rrdfamily_root_index; // the host's chart families index DICTIONARY *rrdvars; // the host's chart variables index // this includes custom host variables @@ -1357,8 +1313,6 @@ struct rrdhost { netdata_mutex_t aclk_state_lock; aclk_rrdhost_state aclk_state; - DICTIONARY *configurable_plugins; // configurable plugins for this host - struct rrdhost *next; struct rrdhost *prev; }; @@ -1367,7 +1321,6 @@ extern RRDHOST *localhost; #define rrdhost_hostname(host) string2str((host)->hostname) #define rrdhost_registry_hostname(host) string2str((host)->registry_hostname) #define rrdhost_os(host) string2str((host)->os) -#define rrdhost_tags(host) string2str((host)->tags) #define rrdhost_timezone(host) string2str((host)->timezone) #define rrdhost_abbrev_timezone(host) string2str((host)->abbrev_timezone) #define rrdhost_program_name(host) string2str((host)->program_name) @@ -1416,7 +1369,7 @@ extern netdata_rwlock_t rrd_rwlock; // ---------------------------------------------------------------------------- -bool is_storage_engine_shared(STORAGE_INSTANCE *engine); +bool is_storage_engine_shared(STORAGE_INSTANCE *si); void rrdset_index_init(RRDHOST *host); void rrdset_index_destroy(RRDHOST *host); @@ -1442,9 +1395,8 @@ RRDHOST *rrdhost_find_or_create( const char *timezone, const char *abbrev_timezone, int32_t utc_offset, - const char *tags, - const char *program_name, - const char *program_version, + const char *prog_name, + const char *prog_version, int update_every, long history, RRD_MEMORY_MODE mode, @@ -1489,13 +1441,9 @@ RRDSET *rrdset_create_custom(RRDHOST *host rrdset_create(localhost, type, id, name, family, context, title, units, plugin, module, priority, update_every, chart_type) void rrdhost_free_all(void); -void rrdhost_save_all(void); -void rrdhost_cleanup_all(void); void rrdhost_system_info_free(struct rrdhost_system_info *system_info); void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force); -void rrdhost_save_charts(RRDHOST *host); -void rrdhost_delete_charts(RRDHOST *host); int rrdhost_should_be_removed(RRDHOST *host, RRDHOST *protected_host, time_t now_s); @@ -1624,8 +1572,6 @@ void rrdhost_set_is_parent_label(void); // ---------------------------------------------------------------------------- // RRD internal functions -void rrdset_delete_files(RRDSET *st); -void rrdset_save(RRDSET *st); void rrdset_free(RRDSET *st); void rrddim_free(RRDSET *st, RRDDIM *rd); @@ -1633,17 +1579,15 @@ void rrddim_free(RRDSET *st, RRDDIM *rd); #ifdef NETDATA_RRD_INTERNALS char *rrdhost_cache_dir_for_rrdset_alloc(RRDHOST *host, const char *id); -const char *rrdset_cache_dir(RRDSET *st); void rrdset_reset(RRDSET *st); -void rrdset_delete_obsolete_dimensions(RRDSET *st); #endif /* NETDATA_RRD_INTERNALS */ void set_host_properties( RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, const char *registry_hostname, - const char *os, const char *tags, const char *tzone, const char *abbrev_tzone, int32_t utc_offset, - const char *program_name, const char *program_version); + const char *os, const char *tzone, const char *abbrev_tzone, int32_t utc_offset, + const char *prog_name, const char *prog_version); size_t get_tier_grouping(size_t tier); void store_metric_collection_completed(void); diff --git a/src/database/rrdcollector-internals.h b/src/database/rrdcollector-internals.h new file mode 100644 index 000000000..d63ef6a76 --- /dev/null +++ b/src/database/rrdcollector-internals.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDCOLLECTOR_INTERNALS_H +#define NETDATA_RRDCOLLECTOR_INTERNALS_H + +#include "rrd.h" + +struct rrd_collector; +struct rrd_collector *rrd_collector_acquire_current_thread(void); +void rrd_collector_release(struct rrd_collector *rdc); +extern __thread struct rrd_collector *thread_rrd_collector; +bool rrd_collector_running(struct rrd_collector *rdc); +pid_t rrd_collector_tid(struct rrd_collector *rdc); +bool rrd_collector_dispatcher_acquire(struct rrd_collector *rdc); +void rrd_collector_dispatcher_release(struct rrd_collector *rdc); + +#endif //NETDATA_RRDCOLLECTOR_INTERNALS_H diff --git a/src/database/rrdcollector.c b/src/database/rrdcollector.c new file mode 100644 index 000000000..1a116c0c2 --- /dev/null +++ b/src/database/rrdcollector.c @@ -0,0 +1,137 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdcollector.h" +#include "rrdcollector-internals.h" + +// Each function points to this collector structure +// so that when the collector exits, all of them will +// be invalidated (running == false) +// The last function using this collector +// frees the structure too (or when the collector calls +// rrdset_collector_finished()). + +struct rrd_collector { + int32_t refcount; + int32_t refcount_dispatcher; + pid_t tid; + bool running; +}; + +// Each thread that adds RRDSET functions has to call +// rrdset_collector_started() and rrdset_collector_finished() +// to create the collector structure. + +__thread struct rrd_collector *thread_rrd_collector = NULL; + +inline bool rrd_collector_running(struct rrd_collector *rdc) { + return __atomic_load_n(&rdc->running, __ATOMIC_RELAXED); +} + +inline pid_t rrd_collector_tid(struct rrd_collector *rdc) { + return rdc->tid; +} + +bool rrd_collector_dispatcher_acquire(struct rrd_collector *rdc) { + int32_t expected = __atomic_load_n(&rdc->refcount_dispatcher, __ATOMIC_RELAXED); + int32_t wanted; + do { + if(expected < 0) + return false; + + wanted = expected + 1; + } while(!__atomic_compare_exchange_n(&rdc->refcount_dispatcher, &expected, wanted, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)); + + return true; +} + +void rrd_collector_dispatcher_release(struct rrd_collector *rdc) { + __atomic_sub_fetch(&rdc->refcount_dispatcher, 1, __ATOMIC_RELAXED); +} + +static void rrd_collector_free(struct rrd_collector *rdc) { + if(rdc->running) + return; + + int32_t expected = 0; + if(!__atomic_compare_exchange_n(&rdc->refcount, &expected, -1, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { + // the collector is still referenced by charts. + // leave it hanging there, the last chart will actually free it. + return; + } + + // we can free it now + freez(rdc); +} + +// called once per collector +void rrd_collector_started(void) { + if(!thread_rrd_collector) + thread_rrd_collector = callocz(1, sizeof(struct rrd_collector)); + + thread_rrd_collector->tid = gettid(); + __atomic_store_n(&thread_rrd_collector->running, true, __ATOMIC_RELAXED); +} + +// called once per collector +void rrd_collector_finished(void) { + if(!thread_rrd_collector) + return; + + __atomic_store_n(&thread_rrd_collector->running, false, __ATOMIC_RELAXED); + + // wait for any cancellation requests to be dispatched; + // the problem is that cancellation requests require a structure allocated by the collector, + // so, while cancellation requests are being dispatched, this structure is accessed. + // delaying the exit of the thread is required to avoid cleaning up this structure. + + int32_t expected = 0; + while(!__atomic_compare_exchange_n(&thread_rrd_collector->refcount_dispatcher, &expected, -1, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) { + expected = 0; + sleep_usec(1 * USEC_PER_MS); + } + + rrd_collector_free(thread_rrd_collector); + thread_rrd_collector = NULL; +} + +bool rrd_collector_acquire(struct rrd_collector *rdc) { + + int32_t expected = __atomic_load_n(&rdc->refcount, __ATOMIC_RELAXED), wanted = 0; + do { + if(expected < 0 || !rrd_collector_running(rdc)) + return false; + + wanted = expected + 1; + } while(!__atomic_compare_exchange_n(&rdc->refcount, &expected, wanted, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)); + + return true; +} + +struct rrd_collector *rrd_collector_acquire_current_thread(void) { + rrd_collector_started(); + + if(!rrd_collector_acquire(thread_rrd_collector)) + internal_fatal(true, "FUNCTIONS: Trying to acquire a the current thread collector, that is currently exiting."); + + return thread_rrd_collector; +} + +void rrd_collector_release(struct rrd_collector *rdc) { + if(unlikely(!rdc)) return; + + int32_t expected = __atomic_load_n(&rdc->refcount, __ATOMIC_RELAXED), wanted = 0; + do { + if(expected < 0) + return; + + if(expected == 0) { + internal_fatal(true, "FUNCTIONS: Trying to release a collector that is not acquired."); + return; + } + + wanted = expected - 1; + } while(!__atomic_compare_exchange_n(&rdc->refcount, &expected, wanted, false, __ATOMIC_RELEASE, __ATOMIC_RELAXED)); + + if(wanted == 0) + rrd_collector_free(rdc); +} diff --git a/src/database/rrdcollector.h b/src/database/rrdcollector.h new file mode 100644 index 000000000..f1bbcbb97 --- /dev/null +++ b/src/database/rrdcollector.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDCOLLECTOR_H +#define NETDATA_RRDCOLLECTOR_H + +#include "rrd.h" + +// ---------------------------------------------------------------------------- +// public API + +void rrd_collector_started(void); +void rrd_collector_finished(void); + +#endif //NETDATA_RRDCOLLECTOR_H diff --git a/src/database/rrddim.c b/src/database/rrddim.c new file mode 100644 index 000000000..c02b48789 --- /dev/null +++ b/src/database/rrddim.c @@ -0,0 +1,586 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS +#include "rrd.h" +#include "storage_engine.h" + +void rrddim_metadata_updated(RRDDIM *rd) { + rrdcontext_updated_rrddim(rd); + rrdset_metadata_updated(rd->rrdset); +} + +// ---------------------------------------------------------------------------- +// RRDDIM index + +struct rrddim_constructor { + RRDSET *st; + const char *id; + const char *name; + collected_number multiplier; + collected_number divisor; + RRD_ALGORITHM algorithm; + RRD_MEMORY_MODE memory_mode; + + enum { + RRDDIM_REACT_NONE = 0, + RRDDIM_REACT_NEW = (1 << 0), + RRDDIM_REACT_UPDATED = (1 << 2), + } react_action; + +}; + +// isolated call to appear +// separate in statistics +static void *rrddim_alloc_db(size_t entries) { + return callocz(entries, sizeof(storage_number)); +} + +static void rrddim_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *constructor_data) { + struct rrddim_constructor *ctr = constructor_data; + RRDDIM *rd = rrddim; + RRDSET *st = ctr->st; + RRDHOST *host = st->rrdhost; + + rd->flags = RRDDIM_FLAG_NONE; + + rd->id = string_strdupz(ctr->id); + rd->name = (ctr->name && *ctr->name)?rrd_string_strdupz(ctr->name):string_dup(rd->id); + + rd->algorithm = ctr->algorithm; + rd->multiplier = ctr->multiplier; + rd->divisor = ctr->divisor; + if(!rd->divisor) rd->divisor = 1; + + rd->rrdset = st; + + rd->rrdpush.sender.dim_slot = __atomic_add_fetch(&st->rrdpush.sender.dim_last_slot_used, 1, __ATOMIC_RELAXED); + + if(rrdset_flag_check(st, RRDSET_FLAG_STORE_FIRST)) + rd->collector.counter = 1; + + if(ctr->memory_mode == RRD_MEMORY_MODE_RAM) { + size_t entries = st->db.entries; + if(!entries) entries = 5; + + rd->db.data = netdata_mmap(NULL, entries * sizeof(storage_number), MAP_PRIVATE, 1, false, NULL); + if(!rd->db.data) { + netdata_log_info("Failed to use memory mode ram for chart '%s', dimension '%s', falling back to alloc", rrdset_name(st), rrddim_name(rd)); + ctr->memory_mode = RRD_MEMORY_MODE_ALLOC; + } + else { + rd->db.memsize = entries * sizeof(storage_number); + __atomic_add_fetch(&rrddim_db_memory_size, rd->db.memsize, __ATOMIC_RELAXED); + } + } + + if(ctr->memory_mode == RRD_MEMORY_MODE_ALLOC || ctr->memory_mode == RRD_MEMORY_MODE_NONE) { + size_t entries = st->db.entries; + if(entries < 5) entries = 5; + + rd->db.data = rrddim_alloc_db(entries); + rd->db.memsize = entries * sizeof(storage_number); + __atomic_add_fetch(&rrddim_db_memory_size, rd->db.memsize, __ATOMIC_RELAXED); + } + + rd->rrd_memory_mode = ctr->memory_mode; + + if (unlikely(rrdcontext_find_dimension_uuid(st, rrddim_id(rd), &(rd->metric_uuid)))) + uuid_generate(rd->metric_uuid); + + // initialize the db tiers + { + size_t initialized = 0; + for(size_t tier = 0; tier < storage_tiers ; tier++) { + STORAGE_ENGINE *eng = host->db[tier].eng; + rd->tiers[tier].seb = eng->seb; + rd->tiers[tier].tier_grouping = host->db[tier].tier_grouping; + rd->tiers[tier].smh = eng->api.metric_get_or_create(rd, host->db[tier].si); + spinlock_init(&rd->tiers[tier].spinlock); + storage_point_unset(rd->tiers[tier].virtual_point); + initialized++; + + // internal_error(true, "TIER GROUPING of chart '%s', dimension '%s' for tier %d is set to %d", rd->rrdset->name, rd->name, tier, rd->tiers[tier]->tier_grouping); + } + + if(!initialized) + netdata_log_error("Failed to initialize all db tiers for chart '%s', dimension '%s", rrdset_name(st), rrddim_name(rd)); + + if(!rd->tiers[0].smh) + netdata_log_error("Failed to initialize the first db tier for chart '%s', dimension '%s", rrdset_name(st), rrddim_name(rd)); + } + + // initialize data collection for all tiers + { + size_t initialized = 0; + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (rd->tiers[tier].smh) { + rd->tiers[tier].sch = + storage_metric_store_init(rd->tiers[tier].seb, rd->tiers[tier].smh, st->rrdhost->db[tier].tier_grouping * st->update_every, rd->rrdset->smg[tier]); + initialized++; + } + } + + if(!initialized) + netdata_log_error("Failed to initialize data collection for all db tiers for chart '%s', dimension '%s", rrdset_name(st), rrddim_name(rd)); + } + + if(rrdset_number_of_dimensions(st) != 0) { + RRDDIM *td; + dfe_start_write(st->rrddim_root_index, td) { + if(td) break; + } + dfe_done(td); + + if(td && (td->algorithm != rd->algorithm || ABS(td->multiplier) != ABS(rd->multiplier) || ABS(td->divisor) != ABS(rd->divisor))) { + if(!rrdset_flag_check(st, RRDSET_FLAG_HETEROGENEOUS)) { +#ifdef NETDATA_INTERNAL_CHECKS + netdata_log_info("Dimension '%s' added on chart '%s' of host '%s' is not homogeneous to other dimensions already " + "present (algorithm is '%s' vs '%s', multiplier is %d vs %d, " + "divisor is %d vs %d).", + rrddim_name(rd), + rrdset_name(st), + rrdhost_hostname(host), + rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(td->algorithm), + rd->multiplier, td->multiplier, + rd->divisor, td->divisor + ); +#endif + rrdset_flag_set(st, RRDSET_FLAG_HETEROGENEOUS); + } + } + } + + rrddim_flag_set(rd, RRDDIM_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdset_flag_set(rd->rrdset, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION); + + // let the chart resync + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + + ml_dimension_new(rd); + + ctr->react_action = RRDDIM_REACT_NEW; + + internal_error(false, "RRDDIM: inserted dimension '%s' of chart '%s' of host '%s'", + rrddim_name(rd), rrdset_name(st), rrdhost_hostname(st->rrdhost)); + +} + +bool rrddim_finalize_collection_and_check_retention(RRDDIM *rd) { + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_NIDL_NODE, rrdhost_hostname(rd->rrdset->rrdhost)), + ND_LOG_FIELD_TXT(NDF_NIDL_CONTEXT, rrdset_context(rd->rrdset)), + ND_LOG_FIELD_TXT(NDF_NIDL_INSTANCE, rrdset_name(rd->rrdset)), + ND_LOG_FIELD_TXT(NDF_NIDL_DIMENSION, rrddim_name(rd)), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + size_t tiers_available = 0, tiers_said_no_retention = 0; + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + spinlock_lock(&rd->tiers[tier].spinlock); + + if(rd->tiers[tier].sch) { + tiers_available++; + + if (storage_engine_store_finalize(rd->tiers[tier].sch)) + tiers_said_no_retention++; + + rd->tiers[tier].sch = NULL; + } + + spinlock_unlock(&rd->tiers[tier].spinlock); + } + + // return true if the dimension has retention in the db + return (!tiers_said_no_retention || tiers_available > tiers_said_no_retention); +} + +static void rrddim_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *rrdset) { + RRDDIM *rd = rrddim; + RRDSET *st = rrdset; + RRDHOST *host = st->rrdhost; + + internal_error(false, "RRDDIM: deleting dimension '%s' of chart '%s' of host '%s'", + rrddim_name(rd), rrdset_name(st), rrdhost_hostname(host)); + + rrdcontext_removed_rrddim(rd); + + ml_dimension_delete(rd); + + netdata_log_debug(D_RRD_CALLS, "rrddim_free() %s.%s", rrdset_name(st), rrddim_name(rd)); + + if (!rrddim_finalize_collection_and_check_retention(rd) && rd->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { + /* This metric has no data and no references */ + metaqueue_delete_dimension_uuid(&rd->metric_uuid); + } + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + spinlock_lock(&rd->tiers[tier].spinlock); + if(rd->tiers[tier].smh) { + STORAGE_ENGINE *eng = host->db[tier].eng; + eng->api.metric_release(rd->tiers[tier].smh); + rd->tiers[tier].smh = NULL; + } + spinlock_unlock(&rd->tiers[tier].spinlock); + } + + if(rd->db.data) { + __atomic_sub_fetch(&rrddim_db_memory_size, rd->db.memsize, __ATOMIC_RELAXED); + + if(rd->rrd_memory_mode == RRD_MEMORY_MODE_RAM) + netdata_munmap(rd->db.data, rd->db.memsize); + else + freez(rd->db.data); + } + + string_freez(rd->id); + string_freez(rd->name); +} + +static bool rrddim_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *new_rrddim, void *constructor_data) { + (void)new_rrddim; // it is NULL + + struct rrddim_constructor *ctr = constructor_data; + RRDDIM *rd = rrddim; + RRDSET *st = ctr->st; + + ctr->react_action = RRDDIM_REACT_NONE; + + int rc = rrddim_reset_name(st, rd, ctr->name); + rc += rrddim_set_algorithm(st, rd, ctr->algorithm); + rc += rrddim_set_multiplier(st, rd, ctr->multiplier); + rc += rrddim_set_divisor(st, rd, ctr->divisor); + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + if (!rd->tiers[tier].sch) + rd->tiers[tier].sch = + storage_metric_store_init(rd->tiers[tier].seb, rd->tiers[tier].smh, st->rrdhost->db[tier].tier_grouping * st->update_every, rd->rrdset->smg[tier]); + } + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { + rrddim_flag_clear(rd, RRDDIM_FLAG_ARCHIVED); + + rrddim_flag_set(rd, RRDDIM_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdset_flag_set(rd->rrdset, RRDSET_FLAG_PENDING_HEALTH_INITIALIZATION); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_PENDING_HEALTH_INITIALIZATION); + } + + if(unlikely(rc)) + ctr->react_action = RRDDIM_REACT_UPDATED; + + return ctr->react_action == RRDDIM_REACT_UPDATED; +} + +static void rrddim_react_callback(const DICTIONARY_ITEM *item __maybe_unused, void *rrddim, void *constructor_data) { + struct rrddim_constructor *ctr = constructor_data; + RRDDIM *rd = rrddim; + RRDSET *st = ctr->st; + + if(ctr->react_action & (RRDDIM_REACT_UPDATED | RRDDIM_REACT_NEW)) { + rrddim_flag_set(rd, RRDDIM_FLAG_METADATA_UPDATE); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); + } + + if(ctr->react_action == RRDDIM_REACT_UPDATED) { + // the chart needs to be updated to the parent + rrdset_flag_set(st, RRDSET_FLAG_SYNC_CLOCK); + } + + rrddim_metadata_updated(rd); +} + +size_t rrddim_size(void) { + return sizeof(RRDDIM) + storage_tiers * sizeof(struct rrddim_tier); +} + +void rrddim_index_init(RRDSET *st) { + if(!st->rrddim_root_index) { + st->rrddim_root_index = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + &dictionary_stats_category_rrdset_rrddim, rrddim_size()); + + dictionary_register_insert_callback(st->rrddim_root_index, rrddim_insert_callback, NULL); + dictionary_register_conflict_callback(st->rrddim_root_index, rrddim_conflict_callback, NULL); + dictionary_register_delete_callback(st->rrddim_root_index, rrddim_delete_callback, st); + dictionary_register_react_callback(st->rrddim_root_index, rrddim_react_callback, st); + } +} + +void rrddim_index_destroy(RRDSET *st) { + dictionary_destroy(st->rrddim_root_index); + st->rrddim_root_index = NULL; +} + +static inline RRDDIM *rrddim_index_find(RRDSET *st, const char *id) { + return dictionary_get(st->rrddim_root_index, id); +} + +// ---------------------------------------------------------------------------- +// RRDDIM - find a dimension + +inline RRDDIM *rrddim_find(RRDSET *st, const char *id) { + netdata_log_debug(D_RRD_CALLS, "rrddim_find() for chart %s, dimension %s", rrdset_name(st), id); + + return rrddim_index_find(st, id); +} + +inline RRDDIM_ACQUIRED *rrddim_find_and_acquire(RRDSET *st, const char *id) { + netdata_log_debug(D_RRD_CALLS, "rrddim_find_and_acquire() for chart %s, dimension %s", rrdset_name(st), id); + + return (RRDDIM_ACQUIRED *)dictionary_get_and_acquire_item(st->rrddim_root_index, id); +} + +RRDDIM *rrddim_acquired_to_rrddim(RRDDIM_ACQUIRED *rda) { + if(unlikely(!rda)) + return NULL; + + return (RRDDIM *) dictionary_acquired_item_value((const DICTIONARY_ITEM *)rda); +} + +void rrddim_acquired_release(RRDDIM_ACQUIRED *rda) { + if(unlikely(!rda)) + return; + + RRDDIM *rd = rrddim_acquired_to_rrddim(rda); + dictionary_acquired_item_release(rd->rrdset->rrddim_root_index, (const DICTIONARY_ITEM *)rda); +} + +// This will not return dimensions that are archived +RRDDIM *rrddim_find_active(RRDSET *st, const char *id) { + RRDDIM *rd = rrddim_find(st, id); + + if (unlikely(rd && rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED))) + return NULL; + + return rd; +} + +// ---------------------------------------------------------------------------- +// RRDDIM rename a dimension + +inline int rrddim_reset_name(RRDSET *st __maybe_unused, RRDDIM *rd, const char *name) { + if(unlikely(!name || !*name || !strcmp(rrddim_name(rd), name))) + return 0; + + netdata_log_debug(D_RRD_CALLS, "rrddim_reset_name() from %s.%s to %s.%s", rrdset_name(st), rrddim_name(rd), rrdset_name(st), name); + + STRING *old = rd->name; + rd->name = rrd_string_strdupz(name); + string_freez(old); + + rrddim_metadata_updated(rd); + + return 1; +} + +inline int rrddim_set_algorithm(RRDSET *st, RRDDIM *rd, RRD_ALGORITHM algorithm) { + if(unlikely(rd->algorithm == algorithm)) + return 0; + + netdata_log_debug(D_RRD_CALLS, "Updating algorithm of dimension '%s/%s' from %s to %s", rrdset_id(st), rrddim_name(rd), rrd_algorithm_name(rd->algorithm), rrd_algorithm_name(algorithm)); + rd->algorithm = algorithm; + rrddim_metadata_updated(rd); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + rrdcontext_updated_rrddim_algorithm(rd); + return 1; +} + +inline int rrddim_set_multiplier(RRDSET *st, RRDDIM *rd, int32_t multiplier) { + if(unlikely(rd->multiplier == multiplier)) + return 0; + + netdata_log_debug(D_RRD_CALLS, "Updating multiplier of dimension '%s/%s' from %d to %d", + rrdset_id(st), rrddim_name(rd), rd->multiplier, multiplier); + rd->multiplier = multiplier; + rrddim_metadata_updated(rd); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + rrdcontext_updated_rrddim_multiplier(rd); + return 1; +} + +inline int rrddim_set_divisor(RRDSET *st, RRDDIM *rd, int32_t divisor) { + if(unlikely(rd->divisor == divisor)) + return 0; + + netdata_log_debug(D_RRD_CALLS, "Updating divisor of dimension '%s/%s' from %d to %d", + rrdset_id(st), rrddim_name(rd), rd->divisor, divisor); + rd->divisor = divisor; + rrddim_metadata_updated(rd); + rrdset_flag_set(st, RRDSET_FLAG_HOMOGENEOUS_CHECK); + rrdcontext_updated_rrddim_divisor(rd); + return 1; +} + +// ---------------------------------------------------------------------------- + +time_t rrddim_last_entry_s_of_tier(RRDDIM *rd, size_t tier) { + if(unlikely(tier > storage_tiers || !rd->tiers[tier].smh)) + return 0; + + return storage_engine_latest_time_s(rd->tiers[tier].seb, rd->tiers[tier].smh); +} + +// get the timestamp of the last entry in the round-robin database +time_t rrddim_last_entry_s(RRDDIM *rd) { + time_t latest_time_s = rrddim_last_entry_s_of_tier(rd, 0); + + for(size_t tier = 1; tier < storage_tiers ;tier++) { + if(unlikely(!rd->tiers[tier].smh)) continue; + + time_t t = rrddim_last_entry_s_of_tier(rd, tier); + if(t > latest_time_s) + latest_time_s = t; + } + + return latest_time_s; +} + +time_t rrddim_first_entry_s_of_tier(RRDDIM *rd, size_t tier) { + if(unlikely(tier > storage_tiers || !rd->tiers[tier].smh)) + return 0; + + return storage_engine_oldest_time_s(rd->tiers[tier].seb, rd->tiers[tier].smh); +} + +time_t rrddim_first_entry_s(RRDDIM *rd) { + time_t oldest_time_s = 0; + + for(size_t tier = 0; tier < storage_tiers ;tier++) { + time_t t = rrddim_first_entry_s_of_tier(rd, tier); + if(t != 0 && (oldest_time_s == 0 || t < oldest_time_s)) + oldest_time_s = t; + } + + return oldest_time_s; +} + +RRDDIM *rrddim_add_custom(RRDSET *st + , const char *id + , const char *name + , collected_number multiplier + , collected_number divisor + , RRD_ALGORITHM algorithm + , RRD_MEMORY_MODE memory_mode + ) { + struct rrddim_constructor tmp = { + .st = st, + .id = id, + .name = name, + .multiplier = multiplier, + .divisor = divisor, + .algorithm = algorithm, + .memory_mode = memory_mode, + }; + + RRDDIM *rd = dictionary_set_advanced(st->rrddim_root_index, tmp.id, -1, NULL, rrddim_size(), &tmp); + return(rd); +} + +// ---------------------------------------------------------------------------- +// RRDDIM remove / free a dimension + +void rrddim_free(RRDSET *st, RRDDIM *rd) { + dictionary_del(st->rrddim_root_index, string2str(rd->id)); +} + + +// ---------------------------------------------------------------------------- +// RRDDIM - set dimension options + +int rrddim_hide(RRDSET *st, const char *id) { + netdata_log_debug(D_RRD_CALLS, "rrddim_hide() for chart %s, dimension %s", rrdset_name(st), id); + + RRDHOST *host = st->rrdhost; + + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(!rd)) { + netdata_log_error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, rrdset_name(st), rrdset_id(st), rrdhost_hostname(host)); + return 1; + } + if (!rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { + rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN | RRDDIM_FLAG_METADATA_UPDATE); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); + } + + rrddim_option_set(rd, RRDDIM_OPTION_HIDDEN); + rrdcontext_updated_rrddim_flags(rd); + return 0; +} + +int rrddim_unhide(RRDSET *st, const char *id) { + netdata_log_debug(D_RRD_CALLS, "rrddim_unhide() for chart %s, dimension %s", rrdset_name(st), id); + + RRDHOST *host = st->rrdhost; + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(!rd)) { + netdata_log_error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, rrdset_name(st), rrdset_id(st), rrdhost_hostname(host)); + return 1; + } + if (rrddim_flag_check(rd, RRDDIM_FLAG_META_HIDDEN)) { + rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN); + rrddim_flag_set(rd, RRDDIM_FLAG_METADATA_UPDATE); + rrdhost_flag_set(rd->rrdset->rrdhost, RRDHOST_FLAG_METADATA_UPDATE); + } + + rrddim_option_clear(rd, RRDDIM_OPTION_HIDDEN); + rrdcontext_updated_rrddim_flags(rd); + return 0; +} + +inline void rrddim_is_obsolete___safe_from_collector_thread(RRDSET *st, RRDDIM *rd) { + netdata_log_debug(D_RRD_CALLS, "rrddim_is_obsolete___safe_from_collector_thread() for chart %s, dimension %s", rrdset_name(st), rrddim_name(rd)); + + if(unlikely(rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED))) { + netdata_log_info("Cannot obsolete already archived dimension %s from chart %s", rrddim_name(rd), rrdset_name(st)); + return; + } + rrddim_flag_set(rd, RRDDIM_FLAG_OBSOLETE); + rrdset_flag_set(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS); + rrdhost_flag_set(st->rrdhost, RRDHOST_FLAG_PENDING_OBSOLETE_DIMENSIONS); + rrdcontext_updated_rrddim_flags(rd); +} + +inline void rrddim_isnot_obsolete___safe_from_collector_thread(RRDSET *st __maybe_unused, RRDDIM *rd) { + netdata_log_debug(D_RRD_CALLS, "rrddim_isnot_obsolete___safe_from_collector_thread() for chart %s, dimension %s", rrdset_name(st), rrddim_name(rd)); + + rrddim_flag_clear(rd, RRDDIM_FLAG_OBSOLETE); + rrdcontext_updated_rrddim_flags(rd); +} + +// ---------------------------------------------------------------------------- +// RRDDIM - collect values for a dimension + +inline collected_number rrddim_set_by_pointer(RRDSET *st, RRDDIM *rd, collected_number value) { + struct timeval now; + now_realtime_timeval(&now); + + return rrddim_timed_set_by_pointer(st, rd, now, value); +} + +collected_number rrddim_timed_set_by_pointer(RRDSET *st __maybe_unused, RRDDIM *rd, struct timeval collected_time, collected_number value) { + netdata_log_debug(D_RRD_CALLS, "rrddim_set_by_pointer() for chart %s, dimension %s, value " COLLECTED_NUMBER_FORMAT, rrdset_name(st), rrddim_name(rd), value); + + rd->collector.last_collected_time = collected_time; + rd->collector.collected_value = value; + rrddim_set_updated(rd); + rd->collector.counter++; + + collected_number v = (value >= 0) ? value : -value; + if (unlikely(v > rd->collector.collected_value_max)) + rd->collector.collected_value_max = v; + + return rd->collector.last_collected_value; +} + + +collected_number rrddim_set(RRDSET *st, const char *id, collected_number value) { + RRDHOST *host = st->rrdhost; + RRDDIM *rd = rrddim_find(st, id); + if(unlikely(!rd)) { + netdata_log_error("Cannot find dimension with id '%s' on stats '%s' (%s) on host '%s'.", id, rrdset_name(st), rrdset_id(st), rrdhost_hostname(host)); + return 0; + } + + return rrddim_set_by_pointer(st, rd, value); +} diff --git a/src/database/rrdfunctions-exporters.c b/src/database/rrdfunctions-exporters.c new file mode 100644 index 000000000..afcdc8a98 --- /dev/null +++ b/src/database/rrdfunctions-exporters.c @@ -0,0 +1,164 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS + +#include "rrdfunctions-internals.h" +#include "rrdfunctions-exporters.h" + +void rrd_chart_functions_expose_rrdpush(RRDSET *st, BUFFER *wb) { + if(!st->functions_view) + return; + + struct rrd_host_function *t; + dfe_start_read(st->functions_view, t) { + if(t->options & RRD_FUNCTION_DYNCFG) continue; + + buffer_sprintf(wb + , PLUGINSD_KEYWORD_FUNCTION " \"%s\" %d \"%s\" \"%s\" "HTTP_ACCESS_FORMAT" %d\n" + , t_dfe.name + , t->timeout + , string2str(t->help) + , string2str(t->tags) + , (HTTP_ACCESS_FORMAT_CAST)t->access + , t->priority + ); + } + dfe_done(t); +} + +void rrd_global_functions_expose_rrdpush(RRDHOST *host, BUFFER *wb, bool dyncfg) { + rrdhost_flag_clear(host, RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED); + + size_t configs = 0; + + struct rrd_host_function *tmp; + dfe_start_read(host->functions, tmp) { + if(tmp->options & RRD_FUNCTION_LOCAL) continue; + if(tmp->options & RRD_FUNCTION_DYNCFG) { + // we should not send dyncfg to this parent + configs++; + continue; + } + + buffer_sprintf(wb + , PLUGINSD_KEYWORD_FUNCTION " GLOBAL \"%s\" %d \"%s\" \"%s\" "HTTP_ACCESS_FORMAT" %d\n" + , tmp_dfe.name + , tmp->timeout + , string2str(tmp->help) + , string2str(tmp->tags) + , (HTTP_ACCESS_FORMAT_CAST)tmp->access + , tmp->priority + ); + } + dfe_done(tmp); + + if(dyncfg && configs) + dyncfg_add_streaming(wb); +} + +static void functions2json(DICTIONARY *functions, BUFFER *wb) { + struct rrd_host_function *t; + dfe_start_read(functions, t) { + if (!rrd_collector_running(t->collector)) continue; + if(t->options & RRD_FUNCTION_DYNCFG) continue; + + buffer_json_member_add_object(wb, t_dfe.name); + { + buffer_json_member_add_string_or_empty(wb, "help", string2str(t->help)); + buffer_json_member_add_int64(wb, "timeout", (int64_t) t->timeout); + + char options[65]; + snprintfz( + options, 64 + , "%s%s" + , (t->options & RRD_FUNCTION_LOCAL) ? "LOCAL " : "" + , (t->options & RRD_FUNCTION_GLOBAL) ? "GLOBAL" : "" + ); + + buffer_json_member_add_string_or_empty(wb, "options", options); + buffer_json_member_add_string_or_empty(wb, "tags", string2str(t->tags)); + http_access2buffer_json_array(wb, "access", t->access); + buffer_json_member_add_uint64(wb, "priority", t->priority); + } + buffer_json_object_close(wb); + } + dfe_done(t); +} + +void chart_functions2json(RRDSET *st, BUFFER *wb) { + if(!st || !st->functions_view) return; + + functions2json(st->functions_view, wb); +} + +void host_functions2json(RRDHOST *host, BUFFER *wb) { + if(!host || !host->functions) return; + + buffer_json_member_add_object(wb, "functions"); + + struct rrd_host_function *t; + dfe_start_read(host->functions, t) { + if(!rrd_collector_running(t->collector)) continue; + if(t->options & RRD_FUNCTION_DYNCFG) continue; + + buffer_json_member_add_object(wb, t_dfe.name); + { + buffer_json_member_add_string(wb, "help", string2str(t->help)); + buffer_json_member_add_int64(wb, "timeout", t->timeout); + buffer_json_member_add_array(wb, "options"); + { + if (t->options & RRD_FUNCTION_GLOBAL) + buffer_json_add_array_item_string(wb, "GLOBAL"); + if (t->options & RRD_FUNCTION_LOCAL) + buffer_json_add_array_item_string(wb, "LOCAL"); + } + buffer_json_array_close(wb); + buffer_json_member_add_string(wb, "tags", string2str(t->tags)); + http_access2buffer_json_array(wb, "access", t->access); + buffer_json_member_add_uint64(wb, "priority", t->priority); + } + buffer_json_object_close(wb); + } + dfe_done(t); + + buffer_json_object_close(wb); +} + +void chart_functions_to_dict(DICTIONARY *rrdset_functions_view, DICTIONARY *dst, void *value, size_t value_size) { + if(!rrdset_functions_view || !dst) return; + + struct rrd_host_function *t; + dfe_start_read(rrdset_functions_view, t) { + if(!rrd_collector_running(t->collector)) continue; + if(t->options & RRD_FUNCTION_DYNCFG) continue; + + dictionary_set(dst, t_dfe.name, value, value_size); + } + dfe_done(t); +} + +void host_functions_to_dict(RRDHOST *host, DICTIONARY *dst, void *value, size_t value_size, + STRING **help, STRING **tags, HTTP_ACCESS *access, int *priority) { + if(!host || !host->functions || !dictionary_entries(host->functions) || !dst) return; + + struct rrd_host_function *t; + dfe_start_read(host->functions, t) { + if(!rrd_collector_running(t->collector)) continue; + if(t->options & RRD_FUNCTION_DYNCFG) continue; + + if(help) + *help = t->help; + + if(tags) + *tags = t->tags; + + if(access) + *access = t->access; + + if(priority) + *priority = t->priority; + + dictionary_set(dst, t_dfe.name, value, value_size); + } + dfe_done(t); +} diff --git a/src/database/rrdfunctions-exporters.h b/src/database/rrdfunctions-exporters.h new file mode 100644 index 000000000..43bb660eb --- /dev/null +++ b/src/database/rrdfunctions-exporters.h @@ -0,0 +1,17 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDFUNCTIONS_EXPORTERS_H +#define NETDATA_RRDFUNCTIONS_EXPORTERS_H + +#include "rrd.h" + +void rrd_chart_functions_expose_rrdpush(RRDSET *st, BUFFER *wb); +void rrd_global_functions_expose_rrdpush(RRDHOST *host, BUFFER *wb, bool dyncfg); + +void chart_functions2json(RRDSET *st, BUFFER *wb); +void chart_functions_to_dict(DICTIONARY *rrdset_functions_view, DICTIONARY *dst, void *value, size_t value_size); +void host_functions_to_dict(RRDHOST *host, DICTIONARY *dst, void *value, size_t value_size, STRING **help, STRING **tags, + HTTP_ACCESS *access, int *priority); +void host_functions2json(RRDHOST *host, BUFFER *wb); + +#endif //NETDATA_RRDFUNCTIONS_EXPORTERS_H diff --git a/src/database/rrdfunctions-inflight.c b/src/database/rrdfunctions-inflight.c new file mode 100644 index 000000000..585a6d269 --- /dev/null +++ b/src/database/rrdfunctions-inflight.c @@ -0,0 +1,691 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS + +#include "rrdcollector-internals.h" +#include "rrdfunctions-internals.h" +#include "rrdfunctions-inflight.h" + +struct rrd_function_inflight { + bool used; + + RRDHOST *host; + uuid_t transaction_uuid; + const char *transaction; + const char *cmd; + const char *sanitized_cmd; + const char *source; + size_t sanitized_cmd_length; + int timeout; + bool cancelled; + usec_t stop_monotonic_ut; + + HTTP_ACCESS user_access; + + BUFFER *payload; + + const DICTIONARY_ITEM *host_function_acquired; + + // the collector + // we acquire this structure at the beginning, + // and we release it at the end + struct rrd_host_function *rdcf; + + struct { + BUFFER *wb; + + // in async mode, + // the function to call to send the result back + rrd_function_result_callback_t cb; + void *data; + } result; + + struct { + // to be called in sync mode + // while the function is running + // to check if the function has been canceled + rrd_function_is_cancelled_cb_t cb; + void *data; + } is_cancelled; + + struct { + // to be registered by the function itself + // used to signal the function to cancel + rrd_function_cancel_cb_t cb; + void *data; + } canceller; + + struct { + // callback to receive progress reports from function + rrd_function_progress_cb_t cb; + void *data; + } progress; + + struct { + // to be registered by the function itself + // used to send progress requests to function + rrd_function_progresser_cb_t cb; + void *data; + } progresser; +}; + +static DICTIONARY *rrd_functions_inflight_requests = NULL; + +static void rrd_function_cancel_inflight(struct rrd_function_inflight *r); + +// ---------------------------------------------------------------------------- + +static void rrd_functions_inflight_cleanup(struct rrd_function_inflight *r) { + buffer_free(r->payload); + freez((void *)r->transaction); + freez((void *)r->cmd); + freez((void *)r->sanitized_cmd); + freez((void *)r->source); + + r->payload = NULL; + r->transaction = NULL; + r->cmd = NULL; + r->sanitized_cmd = NULL; +} + +static void rrd_functions_inflight_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct rrd_function_inflight *r = value; + + // internal_error(true, "FUNCTIONS: transaction '%s' finished", r->transaction); + + rrd_functions_inflight_cleanup(r); + dictionary_acquired_item_release(r->host->functions, r->host_function_acquired); +} + +void rrd_functions_inflight_init(void) { + if(rrd_functions_inflight_requests) + return; + + rrd_functions_inflight_requests = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct rrd_function_inflight)); + + dictionary_register_delete_callback(rrd_functions_inflight_requests, rrd_functions_inflight_delete_cb, NULL); +} + +void rrd_functions_inflight_destroy(void) { + if(!rrd_functions_inflight_requests) + return; + + dictionary_destroy(rrd_functions_inflight_requests); + rrd_functions_inflight_requests = NULL; +} + +static void rrd_inflight_async_function_register_canceller_cb(void *register_canceller_cb_data, rrd_function_cancel_cb_t canceller_cb, void *canceller_cb_data) { + struct rrd_function_inflight *r = register_canceller_cb_data; + r->canceller.cb = canceller_cb; + r->canceller.data = canceller_cb_data; +} + +static void rrd_inflight_async_function_register_progresser_cb(void *register_progresser_cb_data, rrd_function_progresser_cb_t progresser_cb, void *progresser_cb_data) { + struct rrd_function_inflight *r = register_progresser_cb_data; + r->progresser.cb = progresser_cb; + r->progresser.data = progresser_cb_data; +} + +// ---------------------------------------------------------------------------- +// waiting for async function completion + +struct rrd_function_call_wait { + RRDHOST *host; + const DICTIONARY_ITEM *host_function_acquired; + char *transaction; + + bool free_with_signal; + bool data_are_ready; + netdata_mutex_t mutex; + pthread_cond_t cond; + int code; +}; + +static void rrd_inflight_function_cleanup(RRDHOST *host __maybe_unused, const char *transaction) { + dictionary_del(rrd_functions_inflight_requests, transaction); + dictionary_garbage_collect(rrd_functions_inflight_requests); +} + +static void rrd_function_call_wait_free(struct rrd_function_call_wait *tmp) { + rrd_inflight_function_cleanup(tmp->host, tmp->transaction); + freez(tmp->transaction); + + pthread_cond_destroy(&tmp->cond); + netdata_mutex_destroy(&tmp->mutex); + freez(tmp); +} + +static void rrd_async_function_signal_when_ready(BUFFER *temp_wb __maybe_unused, int code, void *callback_data) { + struct rrd_function_call_wait *tmp = callback_data; + bool we_should_free = false; + + netdata_mutex_lock(&tmp->mutex); + + // since we got the mutex, + // the waiting thread is either in pthread_cond_timedwait() + // or gave up and left. + + tmp->code = code; + tmp->data_are_ready = true; + + if(tmp->free_with_signal) + we_should_free = true; + + pthread_cond_signal(&tmp->cond); + + netdata_mutex_unlock(&tmp->mutex); + + if(we_should_free) { + buffer_free(temp_wb); + rrd_function_call_wait_free(tmp); + } +} + +static void rrd_inflight_async_function_nowait_finished(BUFFER *wb, int code, void *data) { + struct rrd_function_inflight *r = data; + + if(r->result.cb) + r->result.cb(wb, code, r->result.data); + + rrd_inflight_function_cleanup(r->host, r->transaction); +} + +static bool rrd_inflight_async_function_is_cancelled(void *data) { + struct rrd_function_inflight *r = data; + return __atomic_load_n(&r->cancelled, __ATOMIC_RELAXED); +} + +static inline int rrd_call_function_async_and_dont_wait(struct rrd_function_inflight *r) { + struct rrd_function_execute rfe = { + .transaction = &r->transaction_uuid, + .function = r->sanitized_cmd, + .payload = r->payload, + .user_access = r->user_access, + .source = r->source, + .stop_monotonic_ut = &r->stop_monotonic_ut, + .result = { + .wb = r->result.wb, + .cb = rrd_inflight_async_function_nowait_finished, + .data = r, + }, + .progress = { + .cb = r->progress.cb, + .data = r->progress.data, + }, + .is_cancelled = { + .cb = rrd_inflight_async_function_is_cancelled, + .data = r, + }, + .register_canceller = { + .cb = rrd_inflight_async_function_register_canceller_cb, + .data = r, + }, + .register_progresser = { + .cb = rrd_inflight_async_function_register_progresser_cb, + .data = r, + }, + }; + int code = r->rdcf->execute_cb(&rfe, r->rdcf->execute_cb_data); + + return code; +} + +static int rrd_call_function_async_and_wait(struct rrd_function_inflight *r) { + struct rrd_function_call_wait *tmp = mallocz(sizeof(struct rrd_function_call_wait)); + tmp->free_with_signal = false; + tmp->data_are_ready = false; + tmp->host = r->host; + tmp->host_function_acquired = r->host_function_acquired; + tmp->transaction = strdupz(r->transaction); + netdata_mutex_init(&tmp->mutex); + pthread_cond_init(&tmp->cond, NULL); + + // we need a temporary BUFFER, because we may time out and the caller supplied one may vanish, + // so we create a new one we guarantee will survive until the collector finishes... + + bool we_should_free = false; + BUFFER *temp_wb = buffer_create(1024, &netdata_buffers_statistics.buffers_functions); // we need it because we may give up on it + temp_wb->content_type = r->result.wb->content_type; + + struct rrd_function_execute rfe = { + .transaction = &r->transaction_uuid, + .function = r->sanitized_cmd, + .payload = r->payload, + .user_access = r->user_access, + .source = r->source, + .stop_monotonic_ut = &r->stop_monotonic_ut, + .result = { + .wb = temp_wb, + + // we overwrite the result callbacks, + // so that we can clean up the allocations made + .cb = rrd_async_function_signal_when_ready, + .data = tmp, + }, + .progress = { + .cb = r->progress.cb, + .data = r->progress.data, + }, + .is_cancelled = { + .cb = rrd_inflight_async_function_is_cancelled, + .data = r, + }, + .register_canceller = { + .cb = rrd_inflight_async_function_register_canceller_cb, + .data = r, + }, + .register_progresser = { + .cb = rrd_inflight_async_function_register_progresser_cb, + .data = r, + }, + }; + int code = r->rdcf->execute_cb(&rfe, r->rdcf->execute_cb_data); + + // this has to happen after we execute the callback + // because if an async call is responded in sync mode, there will be a deadlock. + netdata_mutex_lock(&tmp->mutex); + + if (code == HTTP_RESP_OK || tmp->data_are_ready) { + bool cancelled = false; + int rc = 0; + while (rc == 0 && !cancelled && !tmp->data_are_ready) { + usec_t now_mono_ut = now_monotonic_usec(); + usec_t stop_mono_ut = __atomic_load_n(&r->stop_monotonic_ut, __ATOMIC_RELAXED) + RRDFUNCTIONS_TIMEOUT_EXTENSION_UT; + if(now_mono_ut > stop_mono_ut) { + rc = ETIMEDOUT; + break; + } + + // wait for 10ms, and loop again... + struct timespec tp; + clock_gettime(CLOCK_REALTIME, &tp); + tp.tv_nsec += 10 * NSEC_PER_MSEC; + if(tp.tv_nsec > (long)(1 * NSEC_PER_SEC)) { + tp.tv_sec++; + tp.tv_nsec -= 1 * NSEC_PER_SEC; + } + + // the mutex is unlocked within pthread_cond_timedwait() + rc = pthread_cond_timedwait(&tmp->cond, &tmp->mutex, &tp); + // the mutex is again ours + + if(rc == ETIMEDOUT) { + // 10ms have passed + + rc = 0; + if (!tmp->data_are_ready && r->is_cancelled.cb && + r->is_cancelled.cb(r->is_cancelled.data)) { + // internal_error(true, "FUNCTIONS: transaction '%s' is cancelled while waiting for response", + // r->transaction); + cancelled = true; + rrd_function_cancel_inflight(r); + break; + } + } + } + + if (tmp->data_are_ready) { + // we have a response + + buffer_contents_replace(r->result.wb, buffer_tostring(temp_wb), buffer_strlen(temp_wb)); + r->result.wb->content_type = temp_wb->content_type; + r->result.wb->expires = temp_wb->expires; + + if(r->result.wb->expires) + buffer_cacheable(r->result.wb); + else + buffer_no_cacheable(r->result.wb); + + code = tmp->code; + + tmp->free_with_signal = false; + we_should_free = true; + } + else if (rc == ETIMEDOUT || cancelled) { + // timeout + // we will go away and let the callback free the structure + + if(cancelled) + code = rrd_call_function_error(r->result.wb, + "Request cancelled", + HTTP_RESP_CLIENT_CLOSED_REQUEST); + else + code = rrd_call_function_error(r->result.wb, + "Timeout while waiting for a response from the collector.", + HTTP_RESP_GATEWAY_TIMEOUT); + + tmp->free_with_signal = true; + we_should_free = false; + } + else { + code = rrd_call_function_error( + r->result.wb, "Internal error while communicating with the collector", + HTTP_RESP_INTERNAL_SERVER_ERROR); + + tmp->free_with_signal = true; + we_should_free = false; + } + } + else { + // the response is not ok, and we don't have the data + tmp->free_with_signal = true; + we_should_free = false; + } + + netdata_mutex_unlock(&tmp->mutex); + + if (we_should_free) { + rrd_function_call_wait_free(tmp); + buffer_free(temp_wb); + } + + return code; +} + +static inline int rrd_call_function_async(struct rrd_function_inflight *r, bool wait) { + if(wait) + return rrd_call_function_async_and_wait(r); + else + return rrd_call_function_async_and_dont_wait(r); +} + + +// ---------------------------------------------------------------------------- + +int rrd_function_run(RRDHOST *host, BUFFER *result_wb, int timeout_s, + HTTP_ACCESS user_access, const char *cmd, + bool wait, const char *transaction, + rrd_function_result_callback_t result_cb, void *result_cb_data, + rrd_function_progress_cb_t progress_cb, void *progress_cb_data, + rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, + BUFFER *payload, const char *source) { + + int code; + char sanitized_cmd[PLUGINSD_LINE_MAX + 1]; + const DICTIONARY_ITEM *host_function_acquired = NULL; + + char sanitized_source[(source ? strlen(source) : 0) + 1]; + rrd_functions_sanitize(sanitized_source, source ? source : "", sizeof(sanitized_source)); + + // ------------------------------------------------------------------------ + // check for the host + if(!host) { + code = HTTP_RESP_INTERNAL_SERVER_ERROR; + + rrd_call_function_error(result_wb, "no host given for running the function", code); + + if(result_cb) + result_cb(result_wb, code, result_cb_data); + + return code; + } + + // ------------------------------------------------------------------------ + // find the function + + size_t sanitized_cmd_length = rrd_functions_sanitize(sanitized_cmd, cmd, sizeof(sanitized_cmd)); + + code = rrd_functions_find_by_name(host, result_wb, sanitized_cmd, sanitized_cmd_length, &host_function_acquired); + if(code != HTTP_RESP_OK) { + + if(result_cb) + result_cb(result_wb, code, result_cb_data); + + return code; + } + + struct rrd_host_function *rdcf = dictionary_acquired_item_value(host_function_acquired); + + if(!http_access_user_has_enough_access_level_for_endpoint(user_access, rdcf->access)) { + + if(!aclk_connected) + code = rrd_call_function_error(result_wb, + "This Netdata must be connected to Netdata Cloud for Single-Sign-On (SSO) " + "access this feature. Claim this Netdata to Netdata Cloud to enable access.", + HTTP_ACCESS_PERMISSION_DENIED_HTTP_CODE(user_access)); + + else if((rdcf->access & HTTP_ACCESS_SIGNED_ID) && !(user_access & HTTP_ACCESS_SIGNED_ID)) + code = rrd_call_function_error(result_wb, + "You need to be authenticated via Netdata Cloud Single-Sign-On (SSO) " + "to access this feature. Sign-in on this dashboard, " + "or access your Netdata via https://app.netdata.cloud.", + HTTP_ACCESS_PERMISSION_DENIED_HTTP_CODE(user_access)); + + else if((rdcf->access & HTTP_ACCESS_SAME_SPACE) && !(user_access & HTTP_ACCESS_SAME_SPACE)) + code = rrd_call_function_error(result_wb, + "You need to login to the Netdata Cloud space this agent is claimed to, " + "to access this feature.", + HTTP_ACCESS_PERMISSION_DENIED_HTTP_CODE(user_access)); + + else if((rdcf->access & HTTP_ACCESS_COMMERCIAL_SPACE) && !(user_access & HTTP_ACCESS_COMMERCIAL_SPACE)) + code = rrd_call_function_error(result_wb, + "This feature is only available for commercial users and supporters " + "of Netdata. To use it, please upgrade your space. " + "Thank you for supporting Netdata.", + HTTP_ACCESS_PERMISSION_DENIED_HTTP_CODE(user_access)); + + else { + HTTP_ACCESS missing_access = (~user_access) & rdcf->access; + char perms_str[1024]; + http_access2txt(perms_str, sizeof(perms_str), ", ", missing_access); + + char msg[2048]; + snprintfz(msg, sizeof(msg), "This feature requires additional permissions: %s.", perms_str); + + code = rrd_call_function_error(result_wb, msg, + HTTP_ACCESS_PERMISSION_DENIED_HTTP_CODE(user_access)); + } + + dictionary_acquired_item_release(host->functions, host_function_acquired); + + if(result_cb) + result_cb(result_wb, code, result_cb_data); + + return code; + } + + if(timeout_s <= 0) + timeout_s = rdcf->timeout; + + // ------------------------------------------------------------------------ + // validate and parse the transaction, or generate a new transaction id + + char uuid_str[UUID_COMPACT_STR_LEN]; + uuid_t uuid; + + if(!transaction || !*transaction || uuid_parse_flexi(transaction, uuid) != 0) + uuid_generate_random(uuid); + + uuid_unparse_lower_compact(uuid, uuid_str); + transaction = uuid_str; + + // ------------------------------------------------------------------------ + // the function can only be executed in async mode + // put the function into the inflight requests + + struct rrd_function_inflight t = { + .used = false, + .host = host, + .cmd = strdupz(cmd), + .sanitized_cmd = strdupz(sanitized_cmd), + .sanitized_cmd_length = sanitized_cmd_length, + .transaction = strdupz(transaction), + .user_access = user_access, + .source = strdupz(sanitized_source), + .payload = buffer_dup(payload), + .timeout = timeout_s, + .cancelled = false, + .stop_monotonic_ut = now_monotonic_usec() + timeout_s * USEC_PER_SEC, + .host_function_acquired = host_function_acquired, + .rdcf = rdcf, + .result = { + .wb = result_wb, + .cb = result_cb, + .data = result_cb_data, + }, + .is_cancelled = { + .cb = is_cancelled_cb, + .data = is_cancelled_cb_data, + }, + .progress = { + .cb = progress_cb, + .data = progress_cb_data, + }, + }; + uuid_copy(t.transaction_uuid, uuid); + + struct rrd_function_inflight *r = dictionary_set(rrd_functions_inflight_requests, transaction, &t, sizeof(t)); + if(r->used) { + nd_log(NDLS_DAEMON, NDLP_NOTICE, + "FUNCTIONS: duplicate transaction '%s', function: '%s'", + t.transaction, t.cmd); + + code = rrd_call_function_error(result_wb, "duplicate transaction", HTTP_RESP_BAD_REQUEST); + + rrd_functions_inflight_cleanup(&t); + dictionary_acquired_item_release(r->host->functions, t.host_function_acquired); + + if(result_cb) + result_cb(result_wb, code, result_cb_data); + + return code; + } + r->used = true; + // internal_error(true, "FUNCTIONS: transaction '%s' started", r->transaction); + + if(r->rdcf->sync) { + // the caller has to wait + + struct rrd_function_execute rfe = { + .transaction = &r->transaction_uuid, + .function = r->sanitized_cmd, + .payload = r->payload, + .user_access = r->user_access, + .source = r->source, + .stop_monotonic_ut = &r->stop_monotonic_ut, + .result = { + .wb = r->result.wb, + + // we overwrite the result callbacks, + // so that we can clean up the allocations made + .cb = r->result.cb, + .data = r->result.data, + }, + .progress = { + .cb = r->progress.cb, + .data = r->progress.data, + }, + .is_cancelled = { + .cb = r->is_cancelled.cb, + .data = r->is_cancelled.data, + }, + .register_canceller = { + .cb = NULL, + .data = NULL, + }, + .register_progresser = { + .cb = NULL, + .data = NULL, + }, + }; + code = r->rdcf->execute_cb(&rfe, r->rdcf->execute_cb_data); + + rrd_inflight_function_cleanup(host, r->transaction); + return code; + } + + return rrd_call_function_async(r, wait); +} + +bool rrd_function_has_this_original_result_callback(uuid_t *transaction, rrd_function_result_callback_t cb) { + bool ret = false; + char str[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(*transaction, str); + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(rrd_functions_inflight_requests, str); + if(item) { + struct rrd_function_inflight *r = dictionary_acquired_item_value(item); + if(r->result.cb == cb) + ret = true; + + dictionary_acquired_item_release(rrd_functions_inflight_requests, item); + } + return ret; +} + +static void rrd_function_cancel_inflight(struct rrd_function_inflight *r) { + if(!r) + return; + + bool cancelled = __atomic_load_n(&r->cancelled, __ATOMIC_RELAXED); + if(cancelled) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: received a CANCEL request for transaction '%s', but it is already cancelled.", + r->transaction); + return; + } + + __atomic_store_n(&r->cancelled, true, __ATOMIC_RELAXED); + + if(!rrd_collector_dispatcher_acquire(r->rdcf->collector)) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: received a CANCEL request for transaction '%s', but the collector is not running.", + r->transaction); + return; + } + + if(r->canceller.cb) + r->canceller.cb(r->canceller.data); + + rrd_collector_dispatcher_release(r->rdcf->collector); +} + +void rrd_function_cancel(const char *transaction) { + // internal_error(true, "FUNCTIONS: request to cancel transaction '%s'", transaction); + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(rrd_functions_inflight_requests, transaction); + if(!item) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: received a CANCEL request for transaction '%s', but the transaction is not running.", + transaction); + return; + } + + struct rrd_function_inflight *r = dictionary_acquired_item_value(item); + rrd_function_cancel_inflight(r); + dictionary_acquired_item_release(rrd_functions_inflight_requests, item); +} + +void rrd_function_progress(const char *transaction) { + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(rrd_functions_inflight_requests, transaction); + if(!item) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: received a PROGRESS request for transaction '%s', but the transaction is not running.", + transaction); + return; + } + + struct rrd_function_inflight *r = dictionary_acquired_item_value(item); + + if(!rrd_collector_dispatcher_acquire(r->rdcf->collector)) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: received a PROGRESS request for transaction '%s', but the collector is not running.", + transaction); + goto cleanup; + } + + functions_stop_monotonic_update_on_progress(&r->stop_monotonic_ut); + + if(r->progresser.cb) + r->progresser.cb(r->progresser.data); + + rrd_collector_dispatcher_release(r->rdcf->collector); + +cleanup: + dictionary_acquired_item_release(rrd_functions_inflight_requests, item); +} + +void rrd_function_call_progresser(uuid_t *transaction) { + char str[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(*transaction, str); + rrd_function_progress(str); +} diff --git a/src/database/rrdfunctions-inflight.h b/src/database/rrdfunctions-inflight.h new file mode 100644 index 000000000..3cad31784 --- /dev/null +++ b/src/database/rrdfunctions-inflight.h @@ -0,0 +1,16 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDFUNCTIONS_INFLIGHT_H +#define NETDATA_RRDFUNCTIONS_INFLIGHT_H + +#include "rrd.h" + +void rrd_functions_inflight_init(void); + +// cancel a running function, to be run from anywhere +void rrd_function_cancel(const char *transaction); + +void rrd_function_progress(const char *transaction); +void rrd_function_call_progresser(uuid_t *transaction); + +#endif //NETDATA_RRDFUNCTIONS_INFLIGHT_H diff --git a/src/database/rrdfunctions-inline.c b/src/database/rrdfunctions-inline.c new file mode 100644 index 000000000..3eb30e7b5 --- /dev/null +++ b/src/database/rrdfunctions-inline.c @@ -0,0 +1,44 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdfunctions-inline.h" + +struct rrd_function_inline { + rrd_function_execute_inline_cb_t cb; +}; + +static int rrd_function_run_inline(struct rrd_function_execute *rfe, void *data) { + + // IMPORTANT: this function MUST call the result_cb even on failures + + struct rrd_function_inline *fi = data; + + int code; + + if(rfe->is_cancelled.cb && rfe->is_cancelled.cb(rfe->is_cancelled.data)) + code = HTTP_RESP_CLIENT_CLOSED_REQUEST; + else + code = fi->cb(rfe->result.wb, rfe->function); + + if(code == HTTP_RESP_CLIENT_CLOSED_REQUEST || (rfe->is_cancelled.cb && rfe->is_cancelled.cb(rfe->is_cancelled.data))) { + buffer_flush(rfe->result.wb); + code = HTTP_RESP_CLIENT_CLOSED_REQUEST; + } + + if(rfe->result.cb) + rfe->result.cb(rfe->result.wb, code, rfe->result.data); + + return code; +} + +void rrd_function_add_inline(RRDHOST *host, RRDSET *st, const char *name, int timeout, int priority, + const char *help, const char *tags, + HTTP_ACCESS access, rrd_function_execute_inline_cb_t execute_cb) { + + rrd_collector_started(); // this creates a collector that runs for as long as netdata runs + + struct rrd_function_inline *fi = callocz(1, sizeof(struct rrd_function_inline)); + fi->cb = execute_cb; + + rrd_function_add(host, st, name, timeout, priority, help, tags, access, true, + rrd_function_run_inline, fi); +} diff --git a/src/database/rrdfunctions-inline.h b/src/database/rrdfunctions-inline.h new file mode 100644 index 000000000..9948edbef --- /dev/null +++ b/src/database/rrdfunctions-inline.h @@ -0,0 +1,14 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDFUNCTIONS_INLINE_H +#define NETDATA_RRDFUNCTIONS_INLINE_H + +#include "rrd.h" + +typedef int (*rrd_function_execute_inline_cb_t)(BUFFER *wb, const char *function); + +void rrd_function_add_inline(RRDHOST *host, RRDSET *st, const char *name, int timeout, int priority, + const char *help, const char *tags, + HTTP_ACCESS access, rrd_function_execute_inline_cb_t execute_cb); + +#endif //NETDATA_RRDFUNCTIONS_INLINE_H diff --git a/src/database/rrdfunctions-internals.h b/src/database/rrdfunctions-internals.h new file mode 100644 index 000000000..a846e4de0 --- /dev/null +++ b/src/database/rrdfunctions-internals.h @@ -0,0 +1,36 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDFUNCTIONS_INTERNALS_H +#define NETDATA_RRDFUNCTIONS_INTERNALS_H + +#include "rrd.h" + +#include "rrdcollector-internals.h" + +typedef enum __attribute__((packed)) { + RRD_FUNCTION_LOCAL = (1 << 0), + RRD_FUNCTION_GLOBAL = (1 << 1), + RRD_FUNCTION_DYNCFG = (1 << 2), + + // this is 8-bit +} RRD_FUNCTION_OPTIONS; + +struct rrd_host_function { + bool sync; // when true, the function is called synchronously + RRD_FUNCTION_OPTIONS options; // RRD_FUNCTION_OPTIONS + HTTP_ACCESS access; + STRING *help; + STRING *tags; + int timeout; // the default timeout of the function + int priority; + + rrd_function_execute_cb_t execute_cb; + void *execute_cb_data; + + struct rrd_collector *collector; +}; + +size_t rrd_functions_sanitize(char *dst, const char *src, size_t dst_len); +int rrd_functions_find_by_name(RRDHOST *host, BUFFER *wb, const char *name, size_t key_length, const DICTIONARY_ITEM **item); + +#endif //NETDATA_RRDFUNCTIONS_INTERNALS_H diff --git a/src/database/rrdfunctions-progress.c b/src/database/rrdfunctions-progress.c new file mode 100644 index 000000000..81d663909 --- /dev/null +++ b/src/database/rrdfunctions-progress.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdfunctions-progress.h" + +int rrdhost_function_progress(BUFFER *wb, const char *function __maybe_unused) { + return progress_function_result(wb, rrdhost_hostname(localhost)); +} + diff --git a/src/database/rrdfunctions-progress.h b/src/database/rrdfunctions-progress.h new file mode 100644 index 000000000..8f97bf7e9 --- /dev/null +++ b/src/database/rrdfunctions-progress.h @@ -0,0 +1,10 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDFUNCTIONS_PROGRESS_H +#define NETDATA_RRDFUNCTIONS_PROGRESS_H + +#include "rrd.h" + +int rrdhost_function_progress(BUFFER *wb, const char *function __maybe_unused); + +#endif //NETDATA_RRDFUNCTIONS_PROGRESS_H diff --git a/src/database/rrdfunctions-streaming.c b/src/database/rrdfunctions-streaming.c new file mode 100644 index 000000000..baf3ebc38 --- /dev/null +++ b/src/database/rrdfunctions-streaming.c @@ -0,0 +1,627 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "rrdfunctions-streaming.h" + +int rrdhost_function_streaming(BUFFER *wb, const char *function __maybe_unused) { + + time_t now = now_realtime_sec(); + + buffer_flush(wb); + wb->content_type = CT_APPLICATION_JSON; + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + + buffer_json_member_add_string(wb, "hostname", rrdhost_hostname(localhost)); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", 1); + buffer_json_member_add_boolean(wb, "has_history", false); + buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_STREAMING_HELP); + buffer_json_member_add_array(wb, "data"); + + size_t max_sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_MAX] = { 0 }; + size_t max_db_metrics = 0, max_db_instances = 0, max_db_contexts = 0; + size_t max_collection_replication_instances = 0, max_streaming_replication_instances = 0; + size_t max_ml_anomalous = 0, max_ml_normal = 0, max_ml_trained = 0, max_ml_pending = 0, max_ml_silenced = 0; + { + RRDHOST *host; + dfe_start_read(rrdhost_root_index, host) { + RRDHOST_STATUS s; + rrdhost_status(host, now, &s); + buffer_json_add_array_item_array(wb); + + if(s.db.metrics > max_db_metrics) + max_db_metrics = s.db.metrics; + + if(s.db.instances > max_db_instances) + max_db_instances = s.db.instances; + + if(s.db.contexts > max_db_contexts) + max_db_contexts = s.db.contexts; + + if(s.ingest.replication.instances > max_collection_replication_instances) + max_collection_replication_instances = s.ingest.replication.instances; + + if(s.stream.replication.instances > max_streaming_replication_instances) + max_streaming_replication_instances = s.stream.replication.instances; + + for(int i = 0; i < STREAM_TRAFFIC_TYPE_MAX ;i++) { + if (s.stream.sent_bytes_on_this_connection_per_type[i] > + max_sent_bytes_on_this_connection_per_type[i]) + max_sent_bytes_on_this_connection_per_type[i] = + s.stream.sent_bytes_on_this_connection_per_type[i]; + } + + // retention + buffer_json_add_array_item_string(wb, rrdhost_hostname(s.host)); // Node + buffer_json_add_array_item_uint64(wb, s.db.first_time_s * MSEC_PER_SEC); // dbFrom + buffer_json_add_array_item_uint64(wb, s.db.last_time_s * MSEC_PER_SEC); // dbTo + + if(s.db.first_time_s && s.db.last_time_s && s.db.last_time_s > s.db.first_time_s) + buffer_json_add_array_item_uint64(wb, s.db.last_time_s - s.db.first_time_s); // dbDuration + else + buffer_json_add_array_item_string(wb, NULL); // dbDuration + + buffer_json_add_array_item_uint64(wb, s.db.metrics); // dbMetrics + buffer_json_add_array_item_uint64(wb, s.db.instances); // dbInstances + buffer_json_add_array_item_uint64(wb, s.db.contexts); // dbContexts + + // statuses + buffer_json_add_array_item_string(wb, rrdhost_ingest_status_to_string(s.ingest.status)); // InStatus + buffer_json_add_array_item_string(wb, rrdhost_streaming_status_to_string(s.stream.status)); // OutStatus + buffer_json_add_array_item_string(wb, rrdhost_ml_status_to_string(s.ml.status)); // MLStatus + + // collection + if(s.ingest.since) { + buffer_json_add_array_item_uint64(wb, s.ingest.since * MSEC_PER_SEC); // InSince + buffer_json_add_array_item_time_t(wb, s.now - s.ingest.since); // InAge + } + else { + buffer_json_add_array_item_string(wb, NULL); // InSince + buffer_json_add_array_item_string(wb, NULL); // InAge + } + buffer_json_add_array_item_string(wb, stream_handshake_error_to_string(s.ingest.reason)); // InReason + buffer_json_add_array_item_uint64(wb, s.ingest.hops); // InHops + buffer_json_add_array_item_double(wb, s.ingest.replication.completion); // InReplCompletion + buffer_json_add_array_item_uint64(wb, s.ingest.replication.instances); // InReplInstances + buffer_json_add_array_item_string(wb, s.ingest.peers.local.ip); // InLocalIP + buffer_json_add_array_item_uint64(wb, s.ingest.peers.local.port); // InLocalPort + buffer_json_add_array_item_string(wb, s.ingest.peers.peer.ip); // InRemoteIP + buffer_json_add_array_item_uint64(wb, s.ingest.peers.peer.port); // InRemotePort + buffer_json_add_array_item_string(wb, s.ingest.ssl ? "SSL" : "PLAIN"); // InSSL + stream_capabilities_to_json_array(wb, s.ingest.capabilities, NULL); // InCapabilities + + // streaming + if(s.stream.since) { + buffer_json_add_array_item_uint64(wb, s.stream.since * MSEC_PER_SEC); // OutSince + buffer_json_add_array_item_time_t(wb, s.now - s.stream.since); // OutAge + } + else { + buffer_json_add_array_item_string(wb, NULL); // OutSince + buffer_json_add_array_item_string(wb, NULL); // OutAge + } + buffer_json_add_array_item_string(wb, stream_handshake_error_to_string(s.stream.reason)); // OutReason + buffer_json_add_array_item_uint64(wb, s.stream.hops); // OutHops + buffer_json_add_array_item_double(wb, s.stream.replication.completion); // OutReplCompletion + buffer_json_add_array_item_uint64(wb, s.stream.replication.instances); // OutReplInstances + buffer_json_add_array_item_string(wb, s.stream.peers.local.ip); // OutLocalIP + buffer_json_add_array_item_uint64(wb, s.stream.peers.local.port); // OutLocalPort + buffer_json_add_array_item_string(wb, s.stream.peers.peer.ip); // OutRemoteIP + buffer_json_add_array_item_uint64(wb, s.stream.peers.peer.port); // OutRemotePort + buffer_json_add_array_item_string(wb, s.stream.ssl ? "SSL" : "PLAIN"); // OutSSL + buffer_json_add_array_item_string(wb, s.stream.compression ? "COMPRESSED" : "UNCOMPRESSED"); // OutCompression + stream_capabilities_to_json_array(wb, s.stream.capabilities, NULL); // OutCapabilities + buffer_json_add_array_item_uint64(wb, s.stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_DATA]); + buffer_json_add_array_item_uint64(wb, s.stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_METADATA]); + buffer_json_add_array_item_uint64(wb, s.stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_REPLICATION]); + buffer_json_add_array_item_uint64(wb, s.stream.sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_FUNCTIONS]); + + buffer_json_add_array_item_array(wb); // OutAttemptHandshake + time_t last_attempt = 0; + for(struct rrdpush_destinations *d = host->destinations; d ; d = d->next) { + if(d->since > last_attempt) + last_attempt = d->since; + + buffer_json_add_array_item_string(wb, stream_handshake_error_to_string(d->reason)); + } + buffer_json_array_close(wb); // // OutAttemptHandshake + + if(!last_attempt) { + buffer_json_add_array_item_string(wb, NULL); // OutAttemptSince + buffer_json_add_array_item_string(wb, NULL); // OutAttemptAge + } + else { + buffer_json_add_array_item_uint64(wb, last_attempt * 1000); // OutAttemptSince + buffer_json_add_array_item_time_t(wb, s.now - last_attempt); // OutAttemptAge + } + + // ML + if(s.ml.status == RRDHOST_ML_STATUS_RUNNING) { + buffer_json_add_array_item_uint64(wb, s.ml.metrics.anomalous); // MlAnomalous + buffer_json_add_array_item_uint64(wb, s.ml.metrics.normal); // MlNormal + buffer_json_add_array_item_uint64(wb, s.ml.metrics.trained); // MlTrained + buffer_json_add_array_item_uint64(wb, s.ml.metrics.pending); // MlPending + buffer_json_add_array_item_uint64(wb, s.ml.metrics.silenced); // MlSilenced + + if(s.ml.metrics.anomalous > max_ml_anomalous) + max_ml_anomalous = s.ml.metrics.anomalous; + + if(s.ml.metrics.normal > max_ml_normal) + max_ml_normal = s.ml.metrics.normal; + + if(s.ml.metrics.trained > max_ml_trained) + max_ml_trained = s.ml.metrics.trained; + + if(s.ml.metrics.pending > max_ml_pending) + max_ml_pending = s.ml.metrics.pending; + + if(s.ml.metrics.silenced > max_ml_silenced) + max_ml_silenced = s.ml.metrics.silenced; + + } + else { + buffer_json_add_array_item_string(wb, NULL); // MlAnomalous + buffer_json_add_array_item_string(wb, NULL); // MlNormal + buffer_json_add_array_item_string(wb, NULL); // MlTrained + buffer_json_add_array_item_string(wb, NULL); // MlPending + buffer_json_add_array_item_string(wb, NULL); // MlSilenced + } + + // close + buffer_json_array_close(wb); + } + dfe_done(host); + } + buffer_json_array_close(wb); // data + buffer_json_member_add_object(wb, "columns"); + { + size_t field_id = 0; + + // Node + buffer_rrdf_table_add_field(wb, field_id++, "Node", "Node's Hostname", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY | RRDF_FIELD_OPTS_STICKY, + NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "dbFrom", "DB Data Retention From", + RRDF_FIELD_TYPE_TIMESTAMP, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DATETIME_MS, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_MIN, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "dbTo", "DB Data Retention To", + RRDF_FIELD_TYPE_TIMESTAMP, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DATETIME_MS, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "dbDuration", "DB Data Retention Duration", + RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DURATION_S, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "dbMetrics", "Time-series Metrics in the DB", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, NULL, (double)max_db_metrics, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "dbInstances", "Instances in the DB", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, NULL, (double)max_db_instances, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "dbContexts", "Contexts in the DB", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, NULL, (double)max_db_contexts, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // --- statuses --- + + buffer_rrdf_table_add_field(wb, field_id++, "InStatus", "Data Collection Online Status", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + + buffer_rrdf_table_add_field(wb, field_id++, "OutStatus", "Streaming Online Status", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "MlStatus", "ML Status", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // --- collection --- + + buffer_rrdf_table_add_field(wb, field_id++, "InSince", "Last Data Collection Status Change", + RRDF_FIELD_TYPE_TIMESTAMP, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DATETIME_MS, + 0, NULL, NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MIN, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InAge", "Last Data Collection Online Status Change Age", + RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DURATION_S, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InReason", "Data Collection Online Status Reason", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InHops", "Data Collection Distance Hops from Origin Node", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_MIN, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InReplCompletion", "Inbound Replication Completion", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 1, "%", 100.0, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MIN, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InReplInstances", "Inbound Replicating Instances", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "instances", (double)max_collection_replication_instances, RRDF_FIELD_SORT_DESCENDING, + NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InLocalIP", "Inbound Local IP", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InLocalPort", "Inbound Local Port", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InRemoteIP", "Inbound Remote IP", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InRemotePort", "Inbound Remote Port", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InSSL", "Inbound SSL Connection", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "InCapabilities", "Inbound Connection Capabilities", + RRDF_FIELD_TYPE_ARRAY, RRDF_FIELD_VISUAL_PILL, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + // --- streaming --- + + buffer_rrdf_table_add_field(wb, field_id++, "OutSince", "Last Streaming Status Change", + RRDF_FIELD_TYPE_TIMESTAMP, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DATETIME_MS, + 0, NULL, NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutAge", "Last Streaming Status Change Age", + RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DURATION_S, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_MIN, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutReason", "Streaming Status Reason", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutHops", "Streaming Distance Hops from Origin Node", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_MIN, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutReplCompletion", "Outbound Replication Completion", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_BAR, RRDF_FIELD_TRANSFORM_NUMBER, + 1, "%", 100.0, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MIN, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutReplInstances", "Outbound Replicating Instances", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "instances", (double)max_streaming_replication_instances, RRDF_FIELD_SORT_DESCENDING, + NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutLocalIP", "Outbound Local IP", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutLocalPort", "Outbound Local Port", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutRemoteIP", "Outbound Remote IP", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutRemotePort", "Outbound Remote Port", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutSSL", "Outbound SSL Connection", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutCompression", "Outbound Compressed Connection", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutCapabilities", "Outbound Connection Capabilities", + RRDF_FIELD_TYPE_ARRAY, RRDF_FIELD_VISUAL_PILL, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutTrafficData", "Outbound Metric Data Traffic", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "bytes", (double)max_sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_DATA], + RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutTrafficMetadata", "Outbound Metric Metadata Traffic", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "bytes", + (double)max_sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_METADATA], + RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutTrafficReplication", "Outbound Metric Replication Traffic", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "bytes", + (double)max_sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_REPLICATION], + RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutTrafficFunctions", "Outbound Metric Functions Traffic", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "bytes", + (double)max_sent_bytes_on_this_connection_per_type[STREAM_TRAFFIC_TYPE_FUNCTIONS], + RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutAttemptHandshake", + "Outbound Connection Attempt Handshake Status", + RRDF_FIELD_TYPE_ARRAY, RRDF_FIELD_VISUAL_PILL, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutAttemptSince", + "Last Outbound Connection Attempt Status Change Time", + RRDF_FIELD_TYPE_TIMESTAMP, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DATETIME_MS, + 0, NULL, NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "OutAttemptAge", + "Last Outbound Connection Attempt Status Change Age", + RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DURATION_S, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_MIN, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // --- ML --- + + buffer_rrdf_table_add_field(wb, field_id++, "MlAnomalous", "Number of Anomalous Metrics", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "metrics", + (double)max_ml_anomalous, + RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "MlNormal", "Number of Not Anomalous Metrics", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "metrics", + (double)max_ml_normal, + RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "MlTrained", "Number of Trained Metrics", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "metrics", + (double)max_ml_trained, + RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "MlPending", "Number of Pending Metrics", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "metrics", + (double)max_ml_pending, + RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + buffer_rrdf_table_add_field(wb, field_id++, "MlSilenced", "Number of Silenced Metrics", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 0, "metrics", + (double)max_ml_silenced, + RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + } + buffer_json_object_close(wb); // columns + buffer_json_member_add_string(wb, "default_sort_column", "Node"); + buffer_json_member_add_object(wb, "charts"); + { + // Data Collection Age chart + buffer_json_member_add_object(wb, "InAge"); + { + buffer_json_member_add_string(wb, "name", "Data Collection Age"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "InAge"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + // Streaming Age chart + buffer_json_member_add_object(wb, "OutAge"); + { + buffer_json_member_add_string(wb, "name", "Streaming Age"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "OutAge"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + // DB Duration + buffer_json_member_add_object(wb, "dbDuration"); + { + buffer_json_member_add_string(wb, "name", "Retention Duration"); + buffer_json_member_add_string(wb, "type", "stacked-bar"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "dbDuration"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // charts + + buffer_json_member_add_array(wb, "default_charts"); + { + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "InAge"); + buffer_json_add_array_item_string(wb, "Node"); + buffer_json_array_close(wb); + + buffer_json_add_array_item_array(wb); + buffer_json_add_array_item_string(wb, "OutAge"); + buffer_json_add_array_item_string(wb, "Node"); + buffer_json_array_close(wb); + } + buffer_json_array_close(wb); + + buffer_json_member_add_object(wb, "group_by"); + { + buffer_json_member_add_object(wb, "Node"); + { + buffer_json_member_add_string(wb, "name", "Node"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "Node"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "InStatus"); + { + buffer_json_member_add_string(wb, "name", "Nodes by Collection Status"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "InStatus"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "OutStatus"); + { + buffer_json_member_add_string(wb, "name", "Nodes by Streaming Status"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "OutStatus"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "MlStatus"); + { + buffer_json_member_add_string(wb, "name", "Nodes by ML Status"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "MlStatus"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "InRemoteIP"); + { + buffer_json_member_add_string(wb, "name", "Nodes by Inbound IP"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "InRemoteIP"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + + buffer_json_member_add_object(wb, "OutRemoteIP"); + { + buffer_json_member_add_string(wb, "name", "Nodes by Outbound IP"); + buffer_json_member_add_array(wb, "columns"); + { + buffer_json_add_array_item_string(wb, "OutRemoteIP"); + } + buffer_json_array_close(wb); + } + buffer_json_object_close(wb); + } + buffer_json_object_close(wb); // group_by + + buffer_json_member_add_time_t(wb, "expires", now_realtime_sec() + 1); + buffer_json_finalize(wb); + + return HTTP_RESP_OK; +} diff --git a/src/database/rrdfunctions-streaming.h b/src/database/rrdfunctions-streaming.h new file mode 100644 index 000000000..cfa15bdb5 --- /dev/null +++ b/src/database/rrdfunctions-streaming.h @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_RRDFUNCTIONS_STREAMING_H +#define NETDATA_RRDFUNCTIONS_STREAMING_H + +#include "rrd.h" + +#define RRDFUNCTIONS_STREAMING_HELP "Streaming status for parents and children." + +int rrdhost_function_streaming(BUFFER *wb, const char *function); + +#endif //NETDATA_RRDFUNCTIONS_STREAMING_H diff --git a/src/database/rrdfunctions.c b/src/database/rrdfunctions.c new file mode 100644 index 000000000..cb7cdab36 --- /dev/null +++ b/src/database/rrdfunctions.c @@ -0,0 +1,374 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define NETDATA_RRD_INTERNALS + +#include "rrd.h" +#include "rrdfunctions-internals.h" + +#define MAX_FUNCTION_LENGTH (PLUGINSD_LINE_MAX - 512) // we need some space for the rest of the line + +static unsigned char functions_allowed_chars[256] = { + [0] = '\0', [1] = '_', [2] = '_', [3] = '_', [4] = '_', [5] = '_', [6] = '_', [7] = '_', [8] = '_', + + // control + ['\t'] = ' ', ['\n'] = ' ', ['\v'] = ' ', [12] = ' ', ['\r'] = ' ', + + [14] = '_', [15] = '_', [16] = '_', [17] = '_', [18] = '_', [19] = '_', [20] = '_', [21] = '_', + [22] = '_', [23] = '_', [24] = '_', [25] = '_', [26] = '_', [27] = '_', [28] = '_', [29] = '_', + [30] = '_', [31] = '_', + + // symbols + [' '] = ' ', ['!'] = '!', ['"'] = '\'', ['#'] = '#', ['$'] = '$', ['%'] = '%', ['&'] = '&', ['\''] = '\'', + ['('] = '(', [')'] = ')', ['*'] = '*', ['+'] = '+', [','] = ',', ['-'] = '-', ['.'] = '.', ['/'] = '/', + + // numbers + ['0'] = '0', ['1'] = '1', ['2'] = '2', ['3'] = '3', ['4'] = '4', ['5'] = '5', ['6'] = '6', ['7'] = '7', + ['8'] = '8', ['9'] = '9', + + // symbols + [':'] = ':', [';'] = ';', ['<'] = '<', ['='] = '=', ['>'] = '>', ['?'] = '?', ['@'] = '@', + + // capitals + ['A'] = 'A', ['B'] = 'B', ['C'] = 'C', ['D'] = 'D', ['E'] = 'E', ['F'] = 'F', ['G'] = 'G', ['H'] = 'H', + ['I'] = 'I', ['J'] = 'J', ['K'] = 'K', ['L'] = 'L', ['M'] = 'M', ['N'] = 'N', ['O'] = 'O', ['P'] = 'P', + ['Q'] = 'Q', ['R'] = 'R', ['S'] = 'S', ['T'] = 'T', ['U'] = 'U', ['V'] = 'V', ['W'] = 'W', ['X'] = 'X', + ['Y'] = 'Y', ['Z'] = 'Z', + + // symbols + ['['] = '[', ['\\'] = '\\', [']'] = ']', ['^'] = '^', ['_'] = '_', ['`'] = '`', + + // lower + ['a'] = 'a', ['b'] = 'b', ['c'] = 'c', ['d'] = 'd', ['e'] = 'e', ['f'] = 'f', ['g'] = 'g', ['h'] = 'h', + ['i'] = 'i', ['j'] = 'j', ['k'] = 'k', ['l'] = 'l', ['m'] = 'm', ['n'] = 'n', ['o'] = 'o', ['p'] = 'p', + ['q'] = 'q', ['r'] = 'r', ['s'] = 's', ['t'] = 't', ['u'] = 'u', ['v'] = 'v', ['w'] = 'w', ['x'] = 'x', + ['y'] = 'y', ['z'] = 'z', + + // symbols + ['{'] = '{', ['|'] = '|', ['}'] = '}', ['~'] = '~', + + // rest + [127] = '_', [128] = '_', [129] = '_', [130] = '_', [131] = '_', [132] = '_', [133] = '_', [134] = '_', + [135] = '_', [136] = '_', [137] = '_', [138] = '_', [139] = '_', [140] = '_', [141] = '_', [142] = '_', + [143] = '_', [144] = '_', [145] = '_', [146] = '_', [147] = '_', [148] = '_', [149] = '_', [150] = '_', + [151] = '_', [152] = '_', [153] = '_', [154] = '_', [155] = '_', [156] = '_', [157] = '_', [158] = '_', + [159] = '_', [160] = '_', [161] = '_', [162] = '_', [163] = '_', [164] = '_', [165] = '_', [166] = '_', + [167] = '_', [168] = '_', [169] = '_', [170] = '_', [171] = '_', [172] = '_', [173] = '_', [174] = '_', + [175] = '_', [176] = '_', [177] = '_', [178] = '_', [179] = '_', [180] = '_', [181] = '_', [182] = '_', + [183] = '_', [184] = '_', [185] = '_', [186] = '_', [187] = '_', [188] = '_', [189] = '_', [190] = '_', + [191] = '_', [192] = '_', [193] = '_', [194] = '_', [195] = '_', [196] = '_', [197] = '_', [198] = '_', + [199] = '_', [200] = '_', [201] = '_', [202] = '_', [203] = '_', [204] = '_', [205] = '_', [206] = '_', + [207] = '_', [208] = '_', [209] = '_', [210] = '_', [211] = '_', [212] = '_', [213] = '_', [214] = '_', + [215] = '_', [216] = '_', [217] = '_', [218] = '_', [219] = '_', [220] = '_', [221] = '_', [222] = '_', + [223] = '_', [224] = '_', [225] = '_', [226] = '_', [227] = '_', [228] = '_', [229] = '_', [230] = '_', + [231] = '_', [232] = '_', [233] = '_', [234] = '_', [235] = '_', [236] = '_', [237] = '_', [238] = '_', + [239] = '_', [240] = '_', [241] = '_', [242] = '_', [243] = '_', [244] = '_', [245] = '_', [246] = '_', + [247] = '_', [248] = '_', [249] = '_', [250] = '_', [251] = '_', [252] = '_', [253] = '_', [254] = '_', + [255] = '_' +}; + +size_t rrd_functions_sanitize(char *dst, const char *src, size_t dst_len) { + return text_sanitize((unsigned char *)dst, (const unsigned char *)src, dst_len, + functions_allowed_chars, true, "", NULL); +} + +// ---------------------------------------------------------------------------- + +// we keep a dictionary per RRDSET with these functions +// the dictionary is created on demand (only when a function is added to an RRDSET) + +// ---------------------------------------------------------------------------- + +static void rrd_functions_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func, void *rrdhost) { + RRDHOST *host = rrdhost; (void)host; + struct rrd_host_function *rdcf = func; + + rrd_collector_started(); + rdcf->collector = rrd_collector_acquire_current_thread(); + + if(!rdcf->priority) + rdcf->priority = RRDFUNCTIONS_PRIORITY_DEFAULT; + +// internal_error(true, "FUNCTIONS: adding function '%s' on host '%s', collection tid %d, %s", +// dictionary_acquired_item_name(item), rrdhost_hostname(host), +// rdcf->collector->tid, rdcf->collector->running ? "running" : "NOT running"); +} + +static void rrd_functions_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func, + void *rrdhost __maybe_unused) { + struct rrd_host_function *rdcf = func; + rrd_collector_release(rdcf->collector); +} + +static bool rrd_functions_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, void *func, + void *new_func, void *rrdhost) { + RRDHOST *host = rrdhost; (void)host; + struct rrd_host_function *rdcf = func; + struct rrd_host_function *new_rdcf = new_func; + + rrd_collector_started(); + + bool changed = false; + + if(rdcf->collector != thread_rrd_collector) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: function '%s' of host '%s' changed collector from %d to %d", + dictionary_acquired_item_name(item), rrdhost_hostname(host), + rrd_collector_tid(rdcf->collector), rrd_collector_tid(thread_rrd_collector)); + + struct rrd_collector *old_rdc = rdcf->collector; + rdcf->collector = rrd_collector_acquire_current_thread(); + rrd_collector_release(old_rdc); + changed = true; + } + + if(rdcf->execute_cb != new_rdcf->execute_cb) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: function '%s' of host '%s' changed execute callback", + dictionary_acquired_item_name(item), rrdhost_hostname(host)); + + rdcf->execute_cb = new_rdcf->execute_cb; + changed = true; + } + + if(rdcf->help != new_rdcf->help) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: function '%s' of host '%s' changed help text", + dictionary_acquired_item_name(item), rrdhost_hostname(host)); + + STRING *old = rdcf->help; + rdcf->help = new_rdcf->help; + string_freez(old); + changed = true; + } + else + string_freez(new_rdcf->help); + + if(rdcf->tags != new_rdcf->tags) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: function '%s' of host '%s' changed tags", + dictionary_acquired_item_name(item), rrdhost_hostname(host)); + + STRING *old = rdcf->tags; + rdcf->tags = new_rdcf->tags; + string_freez(old); + changed = true; + } + else + string_freez(new_rdcf->tags); + + if(rdcf->timeout != new_rdcf->timeout) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: function '%s' of host '%s' changed timeout", + dictionary_acquired_item_name(item), rrdhost_hostname(host)); + + rdcf->timeout = new_rdcf->timeout; + changed = true; + } + + if(rdcf->priority != new_rdcf->priority) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: function '%s' of host '%s' changed priority", + dictionary_acquired_item_name(item), rrdhost_hostname(host)); + + rdcf->priority = new_rdcf->priority; + changed = true; + } + + if(rdcf->access != new_rdcf->access) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: function '%s' of host '%s' changed access level", + dictionary_acquired_item_name(item), rrdhost_hostname(host)); + + rdcf->access = new_rdcf->access; + changed = true; + } + + if(rdcf->sync != new_rdcf->sync) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: function '%s' of host '%s' changed sync/async mode", + dictionary_acquired_item_name(item), rrdhost_hostname(host)); + + rdcf->sync = new_rdcf->sync; + changed = true; + } + + if(rdcf->execute_cb_data != new_rdcf->execute_cb_data) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, + "FUNCTIONS: function '%s' of host '%s' changed execute callback data", + dictionary_acquired_item_name(item), rrdhost_hostname(host)); + + rdcf->execute_cb_data = new_rdcf->execute_cb_data; + changed = true; + } + +// internal_error(true, "FUNCTIONS: adding function '%s' on host '%s', collection tid %d, %s", +// dictionary_acquired_item_name(item), rrdhost_hostname(host), +// rdcf->collector->tid, rdcf->collector->running ? "running" : "NOT running"); + + return changed; +} + +void rrd_functions_host_init(RRDHOST *host) { + if(host->functions) return; + + host->functions = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE | DICT_OPTION_FIXED_SIZE, + &dictionary_stats_category_functions, sizeof(struct rrd_host_function)); + + dictionary_register_insert_callback(host->functions, rrd_functions_insert_callback, host); + dictionary_register_delete_callback(host->functions, rrd_functions_delete_callback, host); + dictionary_register_conflict_callback(host->functions, rrd_functions_conflict_callback, host); +} + +void rrd_functions_host_destroy(RRDHOST *host) { + dictionary_destroy(host->functions); +} + +// ---------------------------------------------------------------------------- + +static inline bool is_function_dyncfg(const char *name) { + if(!name || !*name) + return false; + + if(strncmp(name, PLUGINSD_FUNCTION_CONFIG, sizeof(PLUGINSD_FUNCTION_CONFIG) - 1) != 0) + return false; + + char c = name[sizeof(PLUGINSD_FUNCTION_CONFIG) - 1]; + if(c == 0 || isspace(c)) + return true; + + return false; +} + +void rrd_function_add(RRDHOST *host, RRDSET *st, const char *name, int timeout, int priority, + const char *help, const char *tags, + HTTP_ACCESS access, bool sync, + rrd_function_execute_cb_t execute_cb, void *execute_cb_data) { + + // RRDSET *st may be NULL in this function + // to create a GLOBAL function + + if(!tags || !*tags) { + if(strcmp(name, "systemd-journal") == 0) + tags = "logs"; + else + tags = "top"; + } + + if(st && !st->functions_view) + st->functions_view = dictionary_create_view(host->functions); + + char key[strlen(name) + 1]; + rrd_functions_sanitize(key, name, sizeof(key)); + + struct rrd_host_function tmp = { + .sync = sync, + .timeout = timeout, + .options = st ? RRD_FUNCTION_LOCAL: (is_function_dyncfg(name) ? RRD_FUNCTION_DYNCFG : RRD_FUNCTION_GLOBAL), + .access = access, + .execute_cb = execute_cb, + .execute_cb_data = execute_cb_data, + .help = string_strdupz(help), + .tags = string_strdupz(tags), + .priority = priority, + }; + const DICTIONARY_ITEM *item = dictionary_set_and_acquire_item(host->functions, key, &tmp, sizeof(tmp)); + + if(st) + dictionary_view_set(st->functions_view, key, item); + else + rrdhost_flag_set(host, RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED); + + dictionary_acquired_item_release(host->functions, item); +} + +void rrd_function_del(RRDHOST *host, RRDSET *st, const char *name) { + char key[strlen(name) + 1]; + rrd_functions_sanitize(key, name, sizeof(key)); + dictionary_del(host->functions, key); + + if(st) + dictionary_del(st->functions_view, key); + else + rrdhost_flag_set(host, RRDHOST_FLAG_GLOBAL_FUNCTIONS_UPDATED); + + dictionary_garbage_collect(host->functions); +} + +int rrd_call_function_error(BUFFER *wb, const char *msg, int code) { + char buffer[PLUGINSD_LINE_MAX]; + json_escape_string(buffer, msg, PLUGINSD_LINE_MAX); + + buffer_flush(wb); + buffer_sprintf(wb, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer); + wb->content_type = CT_APPLICATION_JSON; + buffer_no_cacheable(wb); + return code; +} + +int rrd_functions_find_by_name(RRDHOST *host, BUFFER *wb, const char *name, size_t key_length, const DICTIONARY_ITEM **item) { + char buffer[MAX_FUNCTION_LENGTH + 1]; + strncpyz(buffer, name, sizeof(buffer) - 1); + char *s = NULL; + + bool found = false; + *item = NULL; + if(host->functions) { + while (buffer[0]) { + if((*item = dictionary_get_and_acquire_item(host->functions, buffer))) { + found = true; + + struct rrd_host_function *rdcf = dictionary_acquired_item_value(*item); + if(rrd_collector_running(rdcf->collector)) { + break; + } + else { + dictionary_acquired_item_release(host->functions, *item); + *item = NULL; + } + } + + // if s == NULL, set it to the end of the buffer; + // this should happen only the first time + if (unlikely(!s)) + s = &buffer[key_length - 1]; + + // skip a word from the end + while (s >= buffer && !isspace(*s)) *s-- = '\0'; + + // skip all spaces + while (s >= buffer && isspace(*s)) *s-- = '\0'; + } + } + + buffer_flush(wb); + + if(!(*item)) { + if(found) + return rrd_call_function_error(wb, + "The collector that registered this function, is not currently running.", + HTTP_RESP_SERVICE_UNAVAILABLE); + else + return rrd_call_function_error(wb, + "No collector is supplying this function on this host at this time.", + HTTP_RESP_NOT_FOUND); + } + + return HTTP_RESP_OK; +} + +bool rrd_function_available(RRDHOST *host, const char *function) { + if(!host || !host->functions) + return false; + + bool ret = false; + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(host->functions, function); + if(item) { + struct rrd_host_function *rdcf = dictionary_acquired_item_value(item); + if(rrd_collector_running(rdcf->collector)) + ret = true; + + dictionary_acquired_item_release(host->functions, item); + } + + return ret; +} diff --git a/src/database/rrdfunctions.h b/src/database/rrdfunctions.h new file mode 100644 index 000000000..c9df26f4d --- /dev/null +++ b/src/database/rrdfunctions.h @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-3.0-or-later +#ifndef NETDATA_RRDFUNCTIONS_H +#define NETDATA_RRDFUNCTIONS_H 1 + +// ---------------------------------------------------------------------------- + +#include "libnetdata/libnetdata.h" + +#define RRDFUNCTIONS_PRIORITY_DEFAULT 100 + +#define RRDFUNCTIONS_TIMEOUT_EXTENSION_UT (1 * USEC_PER_SEC) + +typedef void (*rrd_function_result_callback_t)(BUFFER *wb, int code, void *result_cb_data); +typedef bool (*rrd_function_is_cancelled_cb_t)(void *is_cancelled_cb_data); +typedef void (*rrd_function_cancel_cb_t)(void *data); +typedef void (*rrd_function_register_canceller_cb_t)(void *register_cancel_cb_data, rrd_function_cancel_cb_t cancel_cb, void *cancel_cb_data); +typedef void (*rrd_function_progress_cb_t)(void *data, size_t done, size_t all); +typedef void (*rrd_function_progresser_cb_t)(void *data); +typedef void (*rrd_function_register_progresser_cb_t)(void *register_progresser_cb_data, rrd_function_progresser_cb_t progresser_cb, void *progresser_cb_data); + +struct rrd_function_execute { + uuid_t *transaction; + const char *function; + BUFFER *payload; + const char *source; + + HTTP_ACCESS user_access; + + usec_t *stop_monotonic_ut; + + struct { + BUFFER *wb; // the response should be written here + rrd_function_result_callback_t cb; + void *data; + } result; + + struct { + rrd_function_progress_cb_t cb; + void *data; + } progress; + + struct { + rrd_function_is_cancelled_cb_t cb; + void *data; + } is_cancelled; + + struct { + rrd_function_register_canceller_cb_t cb; + void *data; + } register_canceller; + + struct { + rrd_function_register_progresser_cb_t cb; + void *data; + } register_progresser; +}; + +typedef int (*rrd_function_execute_cb_t)(struct rrd_function_execute *rfe, void *data); + + +// ---------------------------------------------------------------------------- + +#include "rrd.h" + +void rrd_functions_host_init(RRDHOST *host); +void rrd_functions_host_destroy(RRDHOST *host); + +// add a function, to be run from the collector +void rrd_function_add(RRDHOST *host, RRDSET *st, const char *name, int timeout, int priority, const char *help, const char *tags, + HTTP_ACCESS access, bool sync, rrd_function_execute_cb_t execute_cb, + void *execute_cb_data); + +void rrd_function_del(RRDHOST *host, RRDSET *st, const char *name); + +// call a function, to be run from anywhere +int rrd_function_run(RRDHOST *host, BUFFER *result_wb, int timeout_s, + HTTP_ACCESS user_access, const char *cmd, + bool wait, const char *transaction, + rrd_function_result_callback_t result_cb, void *result_cb_data, + rrd_function_progress_cb_t progress_cb, void *progress_cb_data, + rrd_function_is_cancelled_cb_t is_cancelled_cb, void *is_cancelled_cb_data, + BUFFER *payload, const char *source); + +int rrd_call_function_error(BUFFER *wb, const char *msg, int code); + +bool rrd_function_available(RRDHOST *host, const char *function); + +bool rrd_function_has_this_original_result_callback(uuid_t *transaction, rrd_function_result_callback_t cb); + +#include "rrdfunctions-inline.h" +#include "rrdfunctions-inflight.h" +#include "rrdfunctions-exporters.h" +#include "rrdfunctions-streaming.h" +#include "rrdfunctions-progress.h" + +#endif // NETDATA_RRDFUNCTIONS_H diff --git a/database/rrdhost.c b/src/database/rrdhost.c index a3c272153..9c818618f 100644 --- a/database/rrdhost.c +++ b/src/database/rrdhost.c @@ -33,9 +33,9 @@ time_t rrdset_free_obsolete_time_s = 3600; time_t rrdhost_free_orphan_time_s = 3600; time_t rrdhost_free_ephemeral_time_s = 86400; -bool is_storage_engine_shared(STORAGE_INSTANCE *engine __maybe_unused) { +bool is_storage_engine_shared(STORAGE_INSTANCE *si __maybe_unused) { #ifdef ENABLE_DBENGINE - if(!rrdeng_is_legacy(engine)) + if(!rrdeng_is_legacy(si)) return true; #endif @@ -178,15 +178,6 @@ static inline RRDHOST *rrdhost_index_add_hostname(RRDHOST *host) { // ---------------------------------------------------------------------------- // RRDHOST - internal helpers -static inline void rrdhost_init_tags(RRDHOST *host, const char *tags) { - if(host->tags && tags && !strcmp(rrdhost_tags(host), tags)) - return; - - STRING *old = host->tags; - host->tags = string_strdupz((tags && *tags)?tags:NULL); - string_freez(old); -} - static inline void rrdhost_init_hostname(RRDHOST *host, const char *hostname, bool add_to_index) { if(unlikely(hostname && !*hostname)) hostname = NULL; @@ -229,9 +220,9 @@ static inline void rrdhost_init_timezone(RRDHOST *host, const char *timezone, co } void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory_mode, - const char *registry_hostname, const char *os, const char *tags, - const char *tzone, const char *abbrev_tzone, int32_t utc_offset, const char *program_name, - const char *program_version) + const char *registry_hostname, const char *os, const char *tzone, + const char *abbrev_tzone, int32_t utc_offset, const char *prog_name, + const char *prog_version) { host->rrd_update_every = update_every; @@ -239,10 +230,9 @@ void set_host_properties(RRDHOST *host, int update_every, RRD_MEMORY_MODE memory rrdhost_init_os(host, os); rrdhost_init_timezone(host, tzone, abbrev_tzone, utc_offset); - rrdhost_init_tags(host, tags); - host->program_name = string_strdupz((program_name && *program_name) ? program_name : "unknown"); - host->program_version = string_strdupz((program_version && *program_version) ? program_version : "unknown"); + host->program_name = string_strdupz((prog_name && *prog_name) ? prog_name : "unknown"); + host->program_version = string_strdupz((prog_version && *prog_version) ? prog_version : "unknown"); host->registry_hostname = string_strdupz((registry_hostname && *registry_hostname) ? registry_hostname : rrdhost_hostname(host)); } @@ -287,9 +277,8 @@ static RRDHOST *rrdhost_create( const char *timezone, const char *abbrev_timezone, int32_t utc_offset, - const char *tags, - const char *program_name, - const char *program_version, + const char *prog_name, + const char *prog_version, int update_every, long entries, RRD_MEMORY_MODE memory_mode, @@ -326,7 +315,9 @@ int is_legacy = 1; strncpyz(host->machine_guid, guid, GUID_LEN + 1); set_host_properties(host, (update_every > 0)?update_every:1, memory_mode, registry_hostname, os, - tags, timezone, abbrev_timezone, utc_offset, program_name, program_version); + timezone, abbrev_timezone, utc_offset, + prog_name, + prog_version); rrdhost_init_hostname(host, hostname, false); @@ -337,7 +328,7 @@ int is_legacy = 1; netdata_mutex_init(&host->receiver_lock); if (likely(!archived)) { - rrdfunctions_host_init(host); + rrd_functions_host_init(host); host->last_connected = now_realtime_sec(); host->rrdlabels = rrdlabels_create(); rrdhost_initialize_rrdpush_sender( @@ -356,8 +347,6 @@ int is_legacy = 1; switch(memory_mode) { default: case RRD_MEMORY_MODE_ALLOC: - case RRD_MEMORY_MODE_MAP: - case RRD_MEMORY_MODE_SAVE: case RRD_MEMORY_MODE_RAM: if(host->rrdpush_seconds_to_replicate > (time_t) host->rrd_history_entries * (time_t) host->rrd_update_every) host->rrdpush_seconds_to_replicate = (time_t) host->rrd_history_entries * (time_t) host->rrd_update_every; @@ -371,12 +360,6 @@ int is_legacy = 1; rrdset_index_init(host); - if(config_get_boolean(CONFIG_SECTION_DB, "delete obsolete charts files", 1)) - rrdhost_option_set(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS); - - if(config_get_boolean(CONFIG_SECTION_DB, "delete orphan hosts files", 1) && !is_localhost) - rrdhost_option_set(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST); - char filename[FILENAME_MAX + 1]; if(is_localhost) host->cache_dir = strdupz(netdata_configured_cache_dir); @@ -390,8 +373,8 @@ int is_legacy = 1; host->cache_dir = strdupz(filename); } - if((host->rrd_memory_mode == RRD_MEMORY_MODE_MAP || host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || - (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_legacy))) { + if(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_legacy) + { int r = mkdir(host->cache_dir, 0775); if(r != 0 && errno != EEXIST) nd_log(NDLS_DAEMON, NDLP_CRIT, @@ -409,8 +392,6 @@ int is_legacy = 1; else error_report("Host machine GUID %s is not valid", host->machine_guid); - rrdfamily_index_init(host); - rrdcalctemplate_index_init(host); rrdcalc_rrdhost_index_init(host); if (host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { @@ -436,13 +417,13 @@ int is_legacy = 1; host->db[0].tier_grouping = get_tier_grouping(0); ret = rrdeng_init( - (struct rrdengine_instance **)&host->db[0].instance, + (struct rrdengine_instance **)&host->db[0].si, dbenginepath, default_rrdeng_disk_quota_mb, 0); // may fail here for legacy dbengine initialization if(ret == 0) { - rrdeng_readiness_wait((struct rrdengine_instance *)host->db[0].instance); + rrdeng_readiness_wait((struct rrdengine_instance *)host->db[0].si); // assign the rest of the shared storage instances to it // to allow them collect its metrics too @@ -450,7 +431,7 @@ int is_legacy = 1; for(size_t tier = 1; tier < storage_tiers ; tier++) { host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE; host->db[tier].eng = storage_engine_get(host->db[tier].mode); - host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier]; + host->db[tier].si = (STORAGE_INSTANCE *) multidb_ctx[tier]; host->db[tier].tier_grouping = get_tier_grouping(tier); } } @@ -459,7 +440,7 @@ int is_legacy = 1; for(size_t tier = 0; tier < storage_tiers ; tier++) { host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE; host->db[tier].eng = storage_engine_get(host->db[tier].mode); - host->db[tier].instance = (STORAGE_INSTANCE *)multidb_ctx[tier]; + host->db[tier].si = (STORAGE_INSTANCE *)multidb_ctx[tier]; host->db[tier].tier_grouping = get_tier_grouping(tier); } } @@ -483,7 +464,7 @@ int is_legacy = 1; else { host->db[0].mode = host->rrd_memory_mode; host->db[0].eng = storage_engine_get(host->db[0].mode); - host->db[0].instance = NULL; + host->db[0].si = NULL; host->db[0].tier_grouping = get_tier_grouping(0); #ifdef ENABLE_DBENGINE @@ -491,7 +472,7 @@ int is_legacy = 1; for(size_t tier = 1; tier < storage_tiers ; tier++) { host->db[tier].mode = RRD_MEMORY_MODE_DBENGINE; host->db[tier].eng = storage_engine_get(host->db[tier].mode); - host->db[tier].instance = (STORAGE_INSTANCE *) multidb_ctx[tier]; + host->db[tier].si = (STORAGE_INSTANCE *) multidb_ctx[tier]; host->db[tier].tier_grouping = get_tier_grouping(tier); } #endif @@ -541,7 +522,6 @@ int is_legacy = 1; "Host '%s' (at registry as '%s') with guid '%s' initialized" ", os '%s'" ", timezone '%s'" - ", tags '%s'" ", program_name '%s'" ", program_version '%s'" ", update every %d" @@ -558,7 +538,6 @@ int is_legacy = 1; , host->machine_guid , rrdhost_os(host) , rrdhost_timezone(host) - , rrdhost_tags(host) , rrdhost_program_name(host) , rrdhost_program_version(host) , host->rrd_update_every @@ -573,9 +552,6 @@ int is_legacy = 1; , string2str(host->health.health_default_recipient) ); - host->configurable_plugins = dyncfg_dictionary_create(); - dictionary_register_delete_callback(host->configurable_plugins, plugin_del_cb, NULL); - if(!archived) { metaqueue_host_update_info(host); rrdhost_load_rrdcontext_data(host); @@ -588,28 +564,27 @@ int is_legacy = 1; } static void rrdhost_update(RRDHOST *host - , const char *hostname - , const char *registry_hostname - , const char *guid - , const char *os - , const char *timezone - , const char *abbrev_timezone - , int32_t utc_offset - , const char *tags - , const char *program_name - , const char *program_version - , int update_every - , long history - , RRD_MEMORY_MODE mode - , unsigned int health_enabled - , unsigned int rrdpush_enabled - , char *rrdpush_destination - , char *rrdpush_api_key - , char *rrdpush_send_charts_matching - , bool rrdpush_enable_replication - , time_t rrdpush_seconds_to_replicate - , time_t rrdpush_replication_step - , struct rrdhost_system_info *system_info + , const char *hostname + , const char *registry_hostname + , const char *guid + , const char *os + , const char *timezone + , const char *abbrev_timezone + , int32_t utc_offset + , const char *prog_name + , const char *prog_version + , int update_every + , long history + , RRD_MEMORY_MODE mode + , unsigned int health_enabled + , unsigned int rrdpush_enabled + , char *rrdpush_destination + , char *rrdpush_api_key + , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step + , struct rrdhost_system_info *system_info ) { UNUSED(guid); @@ -641,23 +616,25 @@ static void rrdhost_update(RRDHOST *host rrdhost_index_add_hostname(host); } - if(strcmp(rrdhost_program_name(host), program_name) != 0) { + if(strcmp(rrdhost_program_name(host), prog_name) != 0) { nd_log(NDLS_DAEMON, NDLP_NOTICE, "Host '%s' switched program name from '%s' to '%s'", - rrdhost_hostname(host), rrdhost_program_name(host), program_name); + rrdhost_hostname(host), rrdhost_program_name(host), + prog_name); STRING *t = host->program_name; - host->program_name = string_strdupz(program_name); + host->program_name = string_strdupz(prog_name); string_freez(t); } - if(strcmp(rrdhost_program_version(host), program_version) != 0) { + if(strcmp(rrdhost_program_version(host), prog_version) != 0) { nd_log(NDLS_DAEMON, NDLP_NOTICE, "Host '%s' switched program version from '%s' to '%s'", - rrdhost_hostname(host), rrdhost_program_version(host), program_version); + rrdhost_hostname(host), rrdhost_program_version(host), + prog_version); STRING *t = host->program_version; - host->program_version = string_strdupz(program_version); + host->program_version = string_strdupz(prog_version); string_freez(t); } @@ -683,9 +660,6 @@ static void rrdhost_update(RRDHOST *host host->rrd_history_entries, history); - // update host tags - rrdhost_init_tags(host, tags); - if(!host->rrdvars) host->rrdvars = rrdvariables_create(); @@ -694,7 +668,7 @@ static void rrdhost_update(RRDHOST *host if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) { rrdhost_flag_clear(host, RRDHOST_FLAG_ARCHIVED); - rrdfunctions_host_init(host); + rrd_functions_host_init(host); if(!host->rrdlabels) host->rrdlabels = rrdlabels_create(); @@ -708,8 +682,6 @@ static void rrdhost_update(RRDHOST *host rrdpush_api_key, rrdpush_send_charts_matching); - rrdfamily_index_init(host); - rrdcalctemplate_index_init(host); rrdcalc_rrdhost_index_init(host); if(rrdpush_enable_replication) @@ -732,29 +704,28 @@ static void rrdhost_update(RRDHOST *host } RRDHOST *rrdhost_find_or_create( - const char *hostname - , const char *registry_hostname - , const char *guid - , const char *os - , const char *timezone - , const char *abbrev_timezone - , int32_t utc_offset - , const char *tags - , const char *program_name - , const char *program_version - , int update_every - , long history - , RRD_MEMORY_MODE mode - , unsigned int health_enabled - , unsigned int rrdpush_enabled - , char *rrdpush_destination - , char *rrdpush_api_key - , char *rrdpush_send_charts_matching - , bool rrdpush_enable_replication - , time_t rrdpush_seconds_to_replicate - , time_t rrdpush_replication_step - , struct rrdhost_system_info *system_info - , bool archived + const char *hostname + , const char *registry_hostname + , const char *guid + , const char *os + , const char *timezone + , const char *abbrev_timezone + , int32_t utc_offset + , const char *prog_name + , const char *prog_version + , int update_every + , long history + , RRD_MEMORY_MODE mode + , unsigned int health_enabled + , unsigned int rrdpush_enabled + , char *rrdpush_destination + , char *rrdpush_api_key + , char *rrdpush_send_charts_matching + , bool rrdpush_enable_replication + , time_t rrdpush_seconds_to_replicate + , time_t rrdpush_replication_step + , struct rrdhost_system_info *system_info + , bool archived ) { RRDHOST *host = rrdhost_find_by_guid(guid); if (unlikely(host && host->rrd_memory_mode != mode && rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED))) { @@ -784,9 +755,8 @@ RRDHOST *rrdhost_find_or_create( , timezone , abbrev_timezone , utc_offset - , tags - , program_name - , program_version + , prog_name + , prog_version , update_every , history , mode @@ -813,9 +783,8 @@ RRDHOST *rrdhost_find_or_create( , timezone , abbrev_timezone , utc_offset - , tags - , program_name - , program_version + , prog_name + , prog_version , update_every , history , mode @@ -904,23 +873,12 @@ void dbengine_init(char *hostname) { struct dbengine_initialization tiers_init[RRD_STORAGE_TIERS] = {}; + bool tiers_adjusted = false; size_t created_tiers = 0; char dbenginepath[FILENAME_MAX + 1]; char dbengineconfig[200 + 1]; int divisor = 1; for(size_t tier = 0; tier < storage_tiers ;tier++) { - if(tier == 0) - snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", netdata_configured_cache_dir); - else - snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine-tier%zu", netdata_configured_cache_dir, tier); - - int ret = mkdir(dbenginepath, 0775); - if (ret != 0 && errno != EEXIST) { - nd_log(NDLS_DAEMON, NDLP_CRIT, - "DBENGINE on '%s': cannot create directory '%s'", - hostname, dbenginepath); - break; - } if(tier > 0) divisor *= 2; @@ -949,10 +907,7 @@ void dbengine_init(char *hostname) { else if(strcmp(bf, "full") == 0) backfill = RRD_BACKFILL_FULL; else if(strcmp(bf, "none") == 0) backfill = RRD_BACKFILL_NONE; else { - nd_log(NDLS_DAEMON, NDLP_WARNING, - "DBENGINE: unknown backfill value '%s', assuming 'new'", - bf); - + nd_log(NDLS_DAEMON, NDLP_WARNING, "DBENGINE: unknown backfill value '%s', assuming 'new'", bf); config_set(CONFIG_SECTION_DB, dbengineconfig, "new"); backfill = RRD_BACKFILL_NEW; } @@ -965,8 +920,21 @@ void dbengine_init(char *hostname) { storage_tiers_grouping_iterations[tier] = 1; nd_log(NDLS_DAEMON, NDLP_WARNING, "DBENGINE on '%s': dbengine tier %zu gives aggregation of more than 65535 points of tier 0. " - "Disabling tiers above %zu", + "Disabling tiers %zu and above", hostname, tier, tier); + storage_tiers = tier; + tiers_adjusted = true; + break; + } + + if(tier == 0) + snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine", netdata_configured_cache_dir); + else + snprintfz(dbenginepath, FILENAME_MAX, "%s/dbengine-tier%zu", netdata_configured_cache_dir, tier); + + int ret = mkdir(dbenginepath, 0775); + if (ret != 0 && errno != EEXIST) { + nd_log(NDLS_DAEMON, NDLP_CRIT, "DBENGINE on '%s': cannot create directory '%s'", hostname, dbenginepath); break; } @@ -986,6 +954,8 @@ void dbengine_init(char *hostname) { else dbengine_tier_init(&tiers_init[tier]); } + if (tiers_adjusted) + config_set_number(CONFIG_SECTION_DB, "storage tiers", storage_tiers); for(size_t tier = 0; tier < storage_tiers ;tier++) { void *ptr; @@ -1033,7 +1003,7 @@ void dbengine_init(char *hostname) { int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unittest) { rrdhost_init(); - if (unlikely(sql_init_database(DB_CHECK_NONE, system_info ? 0 : 1))) { + if (unlikely(sql_init_meta_database(DB_CHECK_NONE, system_info ? 0 : 1))) { if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) { set_late_global_environment(system_info); fatal("Failed to initialize SQLite"); @@ -1051,7 +1021,6 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unitt dbengine_enabled = true; } else { - health_init(); rrdpush_init(); if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE || rrdpush_receiver_needs_dbengine()) { @@ -1093,13 +1062,12 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unitt , netdata_configured_timezone , netdata_configured_abbrev_timezone , netdata_configured_utc_offset - , "" , program_name , program_version , default_rrd_update_every , default_rrd_history_entries , default_rrd_memory_mode - , default_health_enabled + , health_plugin_enabled() , default_rrdpush_enabled , default_rrdpush_destination , default_rrdpush_api_key @@ -1112,23 +1080,34 @@ int rrd_init(char *hostname, struct rrdhost_system_info *system_info, bool unitt , 0 ); - if (unlikely(!localhost)) { + if (unlikely(!localhost)) return 1; + + dyncfg_host_init(localhost); + + if(!unittest) { + health_plugin_init(); } // we register this only on localhost // for the other nodes, the origin server should register it - rrd_collector_started(); // this creates a collector that runs for as long as netdata runs - rrd_function_add(localhost, NULL, "streaming", 10, - RRDFUNCTIONS_STREAMING_HELP, true, - rrdhost_function_streaming, NULL); + rrd_function_add_inline(localhost, NULL, "streaming", 10, + RRDFUNCTIONS_PRIORITY_DEFAULT + 1, RRDFUNCTIONS_STREAMING_HELP, "top", + HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_SENSITIVE_DATA, + rrdhost_function_streaming); + + rrd_function_add_inline(localhost, NULL, "netdata-api-calls", 10, + RRDFUNCTIONS_PRIORITY_DEFAULT + 2, RRDFUNCTIONS_PROGRESS_HELP, "top", + HTTP_ACCESS_SIGNED_ID | HTTP_ACCESS_SAME_SPACE | HTTP_ACCESS_SENSITIVE_DATA, + rrdhost_function_progress); if (likely(system_info)) { - migrate_localhost(&localhost->host_uuid); + detect_machine_guid_change(&localhost->host_uuid); sql_aclk_sync_init(); web_client_api_v1_management_init(); } - return localhost==NULL; + + return 0; } // ---------------------------------------------------------------------------- @@ -1261,16 +1240,15 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) { #ifdef ENABLE_DBENGINE for(size_t tier = 0; tier < storage_tiers ;tier++) { if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE - && host->db[tier].instance - && !is_storage_engine_shared(host->db[tier].instance)) - rrdeng_prepare_exit((struct rrdengine_instance *)host->db[tier].instance); + && host->db[tier].si + && !is_storage_engine_shared(host->db[tier].si)) + rrdeng_prepare_exit((struct rrdengine_instance *)host->db[tier].si); } #endif // delete all the RRDSETs of the host rrdset_index_destroy(host); rrdcalc_rrdhost_index_destroy(host); - rrdcalctemplate_index_destroy(host); // cleanup ML resources ml_host_delete(host); @@ -1282,9 +1260,9 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) { #ifdef ENABLE_DBENGINE for(size_t tier = 0; tier < storage_tiers ;tier++) { if(host->db[tier].mode == RRD_MEMORY_MODE_DBENGINE - && host->db[tier].instance - && !is_storage_engine_shared(host->db[tier].instance)) - rrdeng_exit((struct rrdengine_instance *)host->db[tier].instance); + && host->db[tier].si + && !is_storage_engine_shared(host->db[tier].si)) + rrdeng_exit((struct rrdengine_instance *)host->db[tier].si); } #endif @@ -1303,7 +1281,6 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) { pthread_mutex_destroy(&host->aclk_state_lock); freez(host->aclk_state.claimed_id); freez(host->aclk_state.prev_claimed_id); - string_freez(host->tags); rrdlabels_destroy(host->rrdlabels); string_freez(host->os); string_freez(host->timezone); @@ -1321,11 +1298,10 @@ void rrdhost_free___while_having_rrd_wrlock(RRDHOST *host, bool force) { simple_pattern_free(host->rrdpush_send_charts_matching); freez(host->node_id); - rrdfamily_index_destroy(host); - rrdfunctions_host_destroy(host); + rrd_functions_host_destroy(host); rrdvariables_destroy(host->rrdvars); if (host == localhost) - rrdvariables_destroy(health_rrdvars); + health_plugin_destroy(); rrdhost_destroy_rrdcontexts(host); @@ -1356,26 +1332,6 @@ void rrd_finalize_collection_for_all_hosts(void) { dfe_done(host); } -// ---------------------------------------------------------------------------- -// RRDHOST - save host files - -void rrdhost_save_charts(RRDHOST *host) { - if(!host) return; - - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "RRD: 'host:%s' saving / closing database...", - rrdhost_hostname(host)); - - RRDSET *st; - - // we get a write lock - // to ensure only one thread is saving the database - rrdset_foreach_write(st, host) { - rrdset_save(st); - } - rrdset_foreach_done(st); -} - struct rrdhost_system_info *rrdhost_labels_to_system_info(RRDLABELS *labels) { struct rrdhost_system_info *info = callocz(1, sizeof(struct rrdhost_system_info)); info->hops = 1; @@ -1476,6 +1432,9 @@ static void rrdhost_load_auto_labels(void) { rrdlabels_add(labels, "_is_parent", (localhost->connected_children_count > 0) ? "true" : "false", RRDLABEL_SRC_AUTO); + rrdlabels_add(labels, "_hostname", string2str(localhost->hostname), RRDLABEL_SRC_AUTO); + rrdlabels_add(labels, "_os", string2str(localhost->os), RRDLABEL_SRC_AUTO); + if (localhost->rrdpush_send_destination) rrdlabels_add(labels, "_streams_to", localhost->rrdpush_send_destination, RRDLABEL_SRC_AUTO); } @@ -1564,6 +1523,12 @@ void reload_host_labels(void) { } void rrdhost_finalize_collection(RRDHOST *host) { + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_NIDL_NODE, rrdhost_hostname(host)), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "RRD: 'host:%s' stopping data collection...", rrdhost_hostname(host)); @@ -1575,103 +1540,6 @@ void rrdhost_finalize_collection(RRDHOST *host) { } // ---------------------------------------------------------------------------- -// RRDHOST - delete host files - -void rrdhost_delete_charts(RRDHOST *host) { - if(!host) return; - - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "RRD: 'host:%s' deleting disk files...", - rrdhost_hostname(host)); - - RRDSET *st; - - if(host->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || host->rrd_memory_mode == RRD_MEMORY_MODE_MAP) { - // we get a write lock - // to ensure only one thread is saving the database - rrdset_foreach_write(st, host){ - rrdset_delete_files(st); - } - rrdset_foreach_done(st); - } - - recursively_delete_dir(host->cache_dir, "left over host"); -} - -// ---------------------------------------------------------------------------- -// RRDHOST - cleanup host files - -void rrdhost_cleanup_charts(RRDHOST *host) { - if(!host) return; - - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "RRD: 'host:%s' cleaning up disk files...", - rrdhost_hostname(host)); - - RRDSET *st; - uint32_t rrdhost_delete_obsolete_charts = rrdhost_option_check(host, RRDHOST_OPTION_DELETE_OBSOLETE_CHARTS); - - // we get a write lock - // to ensure only one thread is saving the database - rrdset_foreach_write(st, host) { - - if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE)) - rrdset_delete_files(st); - - else if(rrdhost_delete_obsolete_charts && rrdset_flag_check(st, RRDSET_FLAG_OBSOLETE_DIMENSIONS)) - rrdset_delete_obsolete_dimensions(st); - - else - rrdset_save(st); - - } - rrdset_foreach_done(st); -} - - -// ---------------------------------------------------------------------------- -// RRDHOST - save all hosts to disk - -void rrdhost_save_all(void) { - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "RRD: saving databases [%zu hosts(s)]...", - rrdhost_hosts_available()); - - rrd_rdlock(); - - RRDHOST *host; - rrdhost_foreach_read(host) - rrdhost_save_charts(host); - - rrd_unlock(); -} - -// ---------------------------------------------------------------------------- -// RRDHOST - save or delete all hosts from disk - -void rrdhost_cleanup_all(void) { - nd_log(NDLS_DAEMON, NDLP_DEBUG, - "RRD: cleaning up database [%zu hosts(s)]...", - rrdhost_hosts_available()); - - rrd_rdlock(); - - RRDHOST *host; - rrdhost_foreach_read(host) { - if (host != localhost && rrdhost_option_check(host, RRDHOST_OPTION_DELETE_ORPHAN_HOST) && !host->receiver - /* don't delete multi-host DB host files */ - && !(host->rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE && is_storage_engine_shared(host->db[0].instance)) - ) - rrdhost_delete_charts(host); - else - rrdhost_cleanup_charts(host); - } - - rrd_unlock(); -} - - -// ---------------------------------------------------------------------------- // RRDHOST - set system info from environment variables // system_info fields must be heap allocated or NULL int rrdhost_set_system_info_variable(struct rrdhost_system_info *system_info, char *name, char *value) { @@ -1843,6 +1711,10 @@ void rrdhost_status(RRDHOST *host, time_t now, RRDHOST_STATUS *s) { RRDHOST_FLAGS flags = __atomic_load_n(&host->flags, __ATOMIC_RELAXED); + // --- dyncfg --- + + s->dyncfg.status = dyncfg_available_for_rrdhost(host) ? RRDHOST_DYNCFG_STATUS_AVAILABLE : RRDHOST_DYNCFG_STATUS_UNAVAILABLE; + // --- db --- bool online = rrdhost_is_online(host); @@ -1852,7 +1724,7 @@ void rrdhost_status(RRDHOST *host, time_t now, RRDHOST_STATUS *s) { s->db.instances = host->rrdctx.instances; s->db.contexts = dictionary_entries(host->rrdctx.contexts); if(!s->db.first_time_s || !s->db.last_time_s || !s->db.metrics || !s->db.instances || !s->db.contexts || - (flags & (RRDHOST_FLAG_PENDING_CONTEXT_LOAD|RRDHOST_FLAG_CONTEXT_LOAD_IN_PROGRESS))) + (flags & (RRDHOST_FLAG_PENDING_CONTEXT_LOAD))) s->db.status = RRDHOST_DB_STATUS_INITIALIZING; else s->db.status = RRDHOST_DB_STATUS_QUERYABLE; diff --git a/database/rrdlabels.c b/src/database/rrdlabels.c index 69ee55526..9ea1d7c58 100644 --- a/database/rrdlabels.c +++ b/src/database/rrdlabels.c @@ -448,7 +448,7 @@ __attribute__((constructor)) void initialize_labels_keys_char_map(void) { label_names_char_map[' '] = '_'; label_names_char_map['\\'] = '/'; - // create the spaces map + // create the space map for(i = 0; i < 256 ;i++) label_spaces_char_map[i] = (isspace(i) || iscntrl(i) || !isprint(i))?1:0; @@ -460,8 +460,8 @@ __attribute__((constructor)) void initialize_label_stats(void) { dictionary_stats_category_rrdlabels.memory.values = 0; } -size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length) { - if(unlikely(!dst_size)) return 0; +size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, const unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length) { + if(unlikely(!src || !dst_size)) return 0; if(unlikely(!src || !*src)) { strncpyz((char *)dst, empty, dst_size); @@ -476,7 +476,7 @@ size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_si // make room for the final string termination unsigned char *end = &d[dst_size - 1]; - // copy while converting, but keep only one white space + // copy while converting, but keep only one space // we start wil last_is_space = 1 to skip leading spaces int last_is_space = 1; @@ -671,8 +671,11 @@ void rrdlabels_destroy(RRDLABELS *labels) freez(labels); } +// // Check in labels to see if we have the key specified in label -static RRDLABEL *rrdlabels_find_label_with_key_unsafe(RRDLABELS *labels, RRDLABEL *label) +// same_value indicates if the value should also be matched +// +static RRDLABEL *rrdlabels_find_label_with_key_unsafe(RRDLABELS *labels, RRDLABEL *label, bool same_value) { if (unlikely(!labels)) return NULL; @@ -683,7 +686,7 @@ static RRDLABEL *rrdlabels_find_label_with_key_unsafe(RRDLABELS *labels, RRDLABE RRDLABEL *found = NULL; while ((PValue = JudyLFirstThenNext(labels->JudyL, &Index, &first_then_next))) { RRDLABEL *lb = (RRDLABEL *)Index; - if (lb->index.key == label->index.key && lb != label) { + if (lb->index.key == label->index.key && ((lb == label) == same_value)) { found = (RRDLABEL *)Index; break; } @@ -718,7 +721,7 @@ static void labels_add_already_sanitized(RRDLABELS *labels, const char *key, con new_ls |= RRDLABEL_FLAG_NEW; *((RRDLABEL_SRC *)PValue) = new_ls; - RRDLABEL *old_label_with_same_key = rrdlabels_find_label_with_key_unsafe(labels, new_label); + RRDLABEL *old_label_with_same_key = rrdlabels_find_label_with_key_unsafe(labels, new_label, false); if (old_label_with_same_key) { (void) JudyLDel(&labels->JudyL, (Word_t) old_label_with_same_key, PJE0); delete_label(old_label_with_same_key); @@ -982,6 +985,25 @@ int rrdlabels_walkthrough_read(RRDLABELS *labels, int (*callback)(const char *na return ret; } +static SIMPLE_PATTERN_RESULT rrdlabels_walkthrough_read_sp(RRDLABELS *labels, SIMPLE_PATTERN_RESULT (*callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *data) +{ + SIMPLE_PATTERN_RESULT ret = SP_NOT_MATCHED; + + if(unlikely(!labels || !callback)) return 0; + + RRDLABEL *lb; + RRDLABEL_SRC ls; + lfe_start_read(labels, lb, ls) + { + ret = callback(string2str(lb->index.key), string2str(lb->index.value), ls, data); + if (ret != SP_NOT_MATCHED) + break; + } + lfe_done(labels); + + return ret; +} + // ---------------------------------------------------------------------------- // rrdlabels_migrate_to_these() // migrate an existing label list to a new list @@ -1027,6 +1049,39 @@ void rrdlabels_migrate_to_these(RRDLABELS *dst, RRDLABELS *src) { spinlock_unlock(&dst->spinlock); } +// +// +// Return the common labels count in labels1, labels2 +// +size_t rrdlabels_common_count(RRDLABELS *labels1, RRDLABELS *labels2) +{ + if (!labels1 || !labels2) + return 0; + + if (labels1 == labels2) + return rrdlabels_entries(labels1); + + RRDLABEL *label; + RRDLABEL_SRC ls; + + spinlock_lock(&labels1->spinlock); + spinlock_lock(&labels2->spinlock); + + size_t count = 0; + lfe_start_nolock(labels2, label, ls) + { + RRDLABEL *old_label_with_key = rrdlabels_find_label_with_key_unsafe(labels1, label, true); + if (old_label_with_key) + count++; + } + lfe_done_nolock(); + + spinlock_unlock(&labels2->spinlock); + spinlock_unlock(&labels1->spinlock); + return count; +} + + void rrdlabels_copy(RRDLABELS *dst, RRDLABELS *src) { if (!dst || !src || (dst == src)) @@ -1042,7 +1097,7 @@ void rrdlabels_copy(RRDLABELS *dst, RRDLABELS *src) bool update_statistics = false; lfe_start_nolock(src, label, ls) { - RRDLABEL *old_label_with_key = rrdlabels_find_label_with_key_unsafe(dst, label); + RRDLABEL *old_label_with_key = rrdlabels_find_label_with_key_unsafe(dst, label, false); Pvoid_t *PValue = JudyLIns(&dst->JudyL, (Word_t)label, PJE0); if(unlikely(!PValue || PValue == PJERR)) fatal("RRDLABELS: corrupted labels array"); @@ -1083,18 +1138,16 @@ struct simple_pattern_match_name_value { char equal; }; -static int simple_pattern_match_name_only_callback(const char *name, const char *value, RRDLABEL_SRC ls __maybe_unused, void *data) { +static SIMPLE_PATTERN_RESULT simple_pattern_match_name_only_callback(const char *name, const char *value, RRDLABEL_SRC ls __maybe_unused, void *data) { struct simple_pattern_match_name_value *t = (struct simple_pattern_match_name_value *)data; (void)value; // we return -1 to stop the walkthrough on first match t->searches++; - if(simple_pattern_matches(t->pattern, name)) return -1; - - return 0; + return simple_pattern_matches_extract(t->pattern, name, NULL, 0); } -static int simple_pattern_match_name_and_value_callback(const char *name, const char *value, RRDLABEL_SRC ls __maybe_unused, void *data) { +static SIMPLE_PATTERN_RESULT simple_pattern_match_name_and_value_callback(const char *name, const char *value, RRDLABEL_SRC ls __maybe_unused, void *data) { struct simple_pattern_match_name_value *t = (struct simple_pattern_match_name_value *)data; // we return -1 to stop the walkthrough on first match @@ -1118,13 +1171,10 @@ static int simple_pattern_match_name_and_value_callback(const char *name, const *dst = '\0'; t->searches++; - if(simple_pattern_matches_length_extract(t->pattern, tmp, dst - tmp, NULL, 0) == SP_MATCHED_POSITIVE) - return -1; - - return 0; + return simple_pattern_matches_length_extract(t->pattern, tmp, dst - tmp, NULL, 0); } -bool rrdlabels_match_simple_pattern_parsed(RRDLABELS *labels, SIMPLE_PATTERN *pattern, char equal, size_t *searches) { +SIMPLE_PATTERN_RESULT rrdlabels_match_simple_pattern_parsed(RRDLABELS *labels, SIMPLE_PATTERN *pattern, char equal, size_t *searches) { if (!labels) return false; struct simple_pattern_match_name_value t = { @@ -1133,12 +1183,12 @@ bool rrdlabels_match_simple_pattern_parsed(RRDLABELS *labels, SIMPLE_PATTERN *pa .equal = equal }; - int ret = rrdlabels_walkthrough_read(labels, equal?simple_pattern_match_name_and_value_callback:simple_pattern_match_name_only_callback, &t); + SIMPLE_PATTERN_RESULT ret = rrdlabels_walkthrough_read_sp(labels, equal?simple_pattern_match_name_and_value_callback:simple_pattern_match_name_only_callback, &t); if(searches) *searches = t.searches; - return (ret == -1)?true:false; + return ret; } bool rrdlabels_match_simple_pattern(RRDLABELS *labels, const char *simple_pattern_txt) { @@ -1155,11 +1205,11 @@ bool rrdlabels_match_simple_pattern(RRDLABELS *labels, const char *simple_patter } } - bool ret = rrdlabels_match_simple_pattern_parsed(labels, pattern, equal, NULL); + SIMPLE_PATTERN_RESULT ret = rrdlabels_match_simple_pattern_parsed(labels, pattern, equal, NULL); simple_pattern_free(pattern); - return ret; + return ret == SP_MATCHED_POSITIVE; } @@ -1311,6 +1361,153 @@ void rrdset_update_rrdlabels(RRDSET *st, RRDLABELS *new_rrdlabels) { rrdset_metadata_updated(st); } +struct pattern_array *pattern_array_allocate() +{ + struct pattern_array *pa = callocz(1, sizeof(*pa)); + return pa; +} + +static void pattern_array_add_lblkey_with_sp(struct pattern_array *pa, const char *key, SIMPLE_PATTERN *sp) +{ + if (!pa || !key || !sp) + return; + + STRING *string_key = string_strdupz(key); + Pvoid_t *Pvalue = JudyLIns(&pa->JudyL, (Word_t) string_key, PJE0); + if (!Pvalue) { + string_freez(string_key); + simple_pattern_free(sp); + return; + } + + struct pattern_array_item *pai; + if (*Pvalue) { + pai = *Pvalue; + } else { + *Pvalue = pai = callocz(1, sizeof(*pai)); + pa->key_count++; + } + + pai->size++; + Pvalue = JudyLIns(&pai->JudyL, (Word_t) pai->size, PJE0); + if (!Pvalue) { + simple_pattern_free(sp); + return; + } + + *Pvalue = sp; +} + +bool pattern_array_label_match( + struct pattern_array *pa, + RRDLABELS *labels, + char eq, + size_t *searches) +{ + if (!pa || !labels) + return true; + + Pvoid_t *Pvalue; + Word_t Index = 0; + bool first_then_next = true; + while ((Pvalue = JudyLFirstThenNext(pa->JudyL, &Index, &first_then_next))) { + // for each label key in the patterns array + + struct pattern_array_item *pai = *Pvalue; + SIMPLE_PATTERN_RESULT match = SP_NOT_MATCHED ; + for (Word_t i = 1; i <= pai->size; i++) { + // for each pattern in the label key pattern list + + if (!(Pvalue = JudyLGet(pai->JudyL, i, PJE0)) || !*Pvalue) + continue; + + match = rrdlabels_match_simple_pattern_parsed(labels, (SIMPLE_PATTERN *)(*Pvalue), eq, searches); + + if(match != SP_NOT_MATCHED) + break; + } + + if (match != SP_MATCHED_POSITIVE) + return false; + } + return true; +} + +struct pattern_array *pattern_array_add_key_simple_pattern(struct pattern_array *pa, const char *key, SIMPLE_PATTERN *pattern) +{ + if (unlikely(!pattern || !key)) + return pa; + + if (!pa) + pa = pattern_array_allocate(); + + pattern_array_add_lblkey_with_sp(pa, key, pattern); + return pa; +} + +struct pattern_array *pattern_array_add_simple_pattern(struct pattern_array *pa, SIMPLE_PATTERN *pattern, char sep) +{ + if (unlikely(!pattern)) + return pa; + + if (!pa) + pa = pattern_array_allocate(); + + char *label_key; + while (pattern && (label_key = simple_pattern_iterate(&pattern))) { + char key[RRDLABELS_MAX_NAME_LENGTH + 1], *key_sep; + + if (unlikely(!label_key || !(key_sep = strchr(label_key, sep)))) + return pa; + + *key_sep = '\0'; + strncpyz(key, label_key, RRDLABELS_MAX_NAME_LENGTH); + *key_sep = sep; + + pattern_array_add_lblkey_with_sp(pa, key, string_to_simple_pattern(label_key)); + } + return pa; +} + +struct pattern_array *pattern_array_add_key_value(struct pattern_array *pa, const char *key, const char *value, char sep) +{ + if (unlikely(!key || !value)) + return pa; + + if (!pa) + pa = pattern_array_allocate(); + + char label_key[RRDLABELS_MAX_NAME_LENGTH + RRDLABELS_MAX_VALUE_LENGTH + 2]; + snprintfz(label_key, sizeof(label_key) - 1, "%s%c%s", key, sep, value); + pattern_array_add_lblkey_with_sp( + pa, key, simple_pattern_create(label_key, SIMPLE_PATTERN_DEFAULT_WEB_SEPARATORS, SIMPLE_PATTERN_EXACT, true)); + return pa; +} + +void pattern_array_free(struct pattern_array *pa) +{ + if (!pa) + return; + + Pvoid_t *Pvalue; + Word_t Index = 0; + while ((Pvalue = JudyLFirst(pa->JudyL, &Index, PJE0))) { + struct pattern_array_item *pai = *Pvalue; + + for (Word_t i = 1; i <= pai->size; i++) { + if (!(Pvalue = JudyLGet(pai->JudyL, i, PJE0))) + continue; + simple_pattern_free((SIMPLE_PATTERN *) (*Pvalue)); + } + JudyLFreeArray(&(pai->JudyL), PJE0); + + string_freez((STRING *)Index); + (void) JudyLDel(&(pa->JudyL), Index, PJE0); + freez(pai); + Index = 0; + } + freez(pa); +} // ---------------------------------------------------------------------------- // rrdlabels unit test @@ -1497,6 +1694,7 @@ static int rrdlabels_walkthrough_index_read(RRDLABELS *labels, int (*callback)(c ret = callback(string2str(lb->index.key), string2str(lb->index.value), ls, index, data); if (ret < 0) break; + index++; } lfe_done(labels); @@ -1513,6 +1711,70 @@ static int unittest_dump_labels(const char *name, const char *value, RRDLABEL_SR return 1; } +static int rrdlabels_unittest_pattern_check() +{ + fprintf(stderr, "\n%s() tests\n", __FUNCTION__); + int rc = 0; + + RRDLABELS *labels = NULL; + + labels = rrdlabels_create(); + + rrdlabels_add(labels, "_module", "disk_detection", RRDLABEL_SRC_CONFIG); + rrdlabels_add(labels, "_plugin", "super_plugin", RRDLABEL_SRC_CONFIG); + rrdlabels_add(labels, "key1", "value1", RRDLABEL_SRC_CONFIG); + rrdlabels_add(labels, "key2", "caterpillar", RRDLABEL_SRC_CONFIG); + rrdlabels_add(labels, "key3", "elephant", RRDLABEL_SRC_CONFIG); + rrdlabels_add(labels, "key4", "value4", RRDLABEL_SRC_CONFIG); + + bool match; + struct pattern_array *pa = pattern_array_add_key_value(NULL, "_module", "wrong_module", '='); + match = pattern_array_label_match(pa, labels, '=', NULL); + // This should not match: _module in ("wrong_module") + if (match) + rc++; + + pattern_array_add_key_value(pa, "_module", "disk_detection", '='); + match = pattern_array_label_match(pa, labels, '=', NULL); + // This should match: _module in ("wrong_module","disk_detection") + if (!match) + rc++; + + pattern_array_add_key_value(pa, "key1", "wrong_key1_value", '='); + match = pattern_array_label_match(pa, labels, '=', NULL); + // This should not match: _module in ("wrong_module","disk_detection") AND key1 in ("wrong_key1_value") + if (match) + rc++; + + pattern_array_add_key_value(pa, "key1", "value1", '='); + match = pattern_array_label_match(pa, labels, '=', NULL); + // This should match: _module in ("wrong_module","disk_detection") AND key1 in ("wrong_key1_value", "value1") + if (!match) + rc++; + + SIMPLE_PATTERN *sp = simple_pattern_create("key2=cat*,!d*", SIMPLE_PATTERN_DEFAULT_WEB_SEPARATORS, SIMPLE_PATTERN_EXACT, true); + pattern_array_add_lblkey_with_sp(pa, "key2", sp); + + sp = simple_pattern_create("key3=*phant", SIMPLE_PATTERN_DEFAULT_WEB_SEPARATORS, SIMPLE_PATTERN_EXACT, true); + pattern_array_add_lblkey_with_sp(pa, "key3", sp); + + match = pattern_array_label_match(pa, labels, '=', NULL); + // This should match: _module in ("wrong_module","disk_detection") AND key1 in ("wrong_key1_value", "value1") AND key2 in ("cat* !d*") AND key3 in ("*phant") + if (!match) + rc++; + + rrdlabels_add(labels, "key3", "now_fail", RRDLABEL_SRC_CONFIG); + match = pattern_array_label_match(pa, labels, '=', NULL); + // This should not match: _module in ("wrong_module","disk_detection") AND key1 in ("wrong_key1_value", "value1") AND key2 in ("cat* !d*") AND key3 in ("*phant") + if (match) + rc++; + + pattern_array_free(pa); + rrdlabels_destroy(labels); + + return rc; +} + static int rrdlabels_unittest_migrate_check() { fprintf(stderr, "\n%s() tests\n", __FUNCTION__); @@ -1594,6 +1856,69 @@ static int rrdlabels_unittest_migrate_check() return rc; } +struct pattern_array *trim_and_add_key_to_values(struct pattern_array *pa, const char *key, STRING *input); +static int rrdlabels_unittest_check_pattern_list(RRDLABELS *labels, const char *pattern, bool expected) { + fprintf(stderr, "rrdlabels_match_simple_pattern(labels, \"%s\") ... ", pattern); + + STRING *str = string_strdupz(pattern); + struct pattern_array *pa = trim_and_add_key_to_values(NULL, NULL, str); + + bool ret = pattern_array_label_match(pa, labels, '=', NULL); + + fprintf(stderr, "%s, got %s expected %s\n", (ret == expected)?"OK":"FAILED", ret?"true":"false", expected?"true":"false"); + + string_freez(str); + pattern_array_free(pa); + + return (ret == expected)?0:1; +} + +static int rrdlabels_unittest_host_chart_labels() { + fprintf(stderr, "\n%s() tests\n", __FUNCTION__); + + int errors = 0; + + RRDLABELS *labels = rrdlabels_create(); + rrdlabels_add(labels, "_hostname", "hostname1", RRDLABEL_SRC_CONFIG); + rrdlabels_add(labels, "_os", "linux", RRDLABEL_SRC_CONFIG); + rrdlabels_add(labels, "_distro", "ubuntu", RRDLABEL_SRC_CONFIG); + + // match a single key + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=*", true); + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=!*", false); + + // conflicting keys (some positive, some negative) + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=* _os=!*", false); + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=!* _os=*", false); + + // the user uses a key that is not there + errors += rrdlabels_unittest_check_pattern_list(labels, "_not_a_key=*", false); + errors += rrdlabels_unittest_check_pattern_list(labels, "_not_a_key=!*", false); + errors += rrdlabels_unittest_check_pattern_list(labels, "_not_a_key=* _hostname=* _os=*", false); + errors += rrdlabels_unittest_check_pattern_list(labels, "_not_a_key=!* _hostname=* _os=*", false); + + // positive and negative matches on the same key + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=!*invalid* !*bad* *name*", true); + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=*name* !*invalid* !*bad*", true); + + // positive and negative matches on the same key with catch all + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=!*invalid* !*bad* *", true); + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=* !*invalid* !*bad*", true); + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=!*invalid* !*name* *", false); + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=* !*invalid* !*name*", true); + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=*name* !*", true); + + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=!*name* _os=l*", false); + errors += rrdlabels_unittest_check_pattern_list(labels, "_os=l* hostname=!*name*", false); + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=*name* _hostname=*", true); + errors += rrdlabels_unittest_check_pattern_list(labels, "_hostname=*name* _os=l*", true); + errors += rrdlabels_unittest_check_pattern_list(labels, "_os=l* _hostname=*name*", true); + + rrdlabels_destroy(labels); + + return errors; +} + static int rrdlabels_unittest_check_simple_pattern(RRDLABELS *labels, const char *pattern, bool expected) { fprintf(stderr, "rrdlabels_match_simple_pattern(labels, \"%s\") ... ", pattern); @@ -1686,9 +2011,12 @@ int rrdlabels_unittest(void) { errors += rrdlabels_unittest_sanitization(); errors += rrdlabels_unittest_add_pairs(); errors += rrdlabels_unittest_simple_pattern(); + errors += rrdlabels_unittest_host_chart_labels(); errors += rrdlabels_unittest_double_check(); errors += rrdlabels_unittest_migrate_check(); + errors += rrdlabels_unittest_pattern_check(); fprintf(stderr, "%d errors found\n", errors); return errors; } + diff --git a/database/rrdlabels.h b/src/database/rrdlabels.h index 64a0e2384..88b35cf92 100644 --- a/database/rrdlabels.h +++ b/src/database/rrdlabels.h @@ -5,6 +5,16 @@ #include "rrd.h" +struct pattern_array_item { + Word_t size; + Pvoid_t JudyL; +}; + +struct pattern_array { + Word_t key_count; + Pvoid_t JudyL; +}; + typedef enum __attribute__ ((__packed__)) rrdlabel_source { RRDLABEL_SRC_AUTO = (1 << 0), // set when Netdata found the label by some automation RRDLABEL_SRC_CONFIG = (1 << 1), // set when the user configured the label @@ -20,7 +30,7 @@ typedef enum __attribute__ ((__packed__)) rrdlabel_source { #define RRDLABEL_FLAG_INTERNAL (RRDLABEL_FLAG_OLD | RRDLABEL_FLAG_NEW | RRDLABEL_FLAG_DONT_DELETE) -size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length); +size_t text_sanitize(unsigned char *dst, const unsigned char *src, size_t dst_size, const unsigned char *char_map, bool utf, const char *empty, size_t *multibyte_length); RRDLABELS *rrdlabels_create(void); void rrdlabels_destroy(RRDLABELS *labels_dict); @@ -41,7 +51,7 @@ int rrdlabels_walkthrough_read(RRDLABELS *labels, int (*callback)(const char *na void rrdlabels_log_to_buffer(RRDLABELS *labels, BUFFER *wb); bool rrdlabels_match_simple_pattern(RRDLABELS *labels, const char *simple_pattern_txt); -bool rrdlabels_match_simple_pattern_parsed(RRDLABELS *labels, SIMPLE_PATTERN *pattern, char equal, size_t *searches); +SIMPLE_PATTERN_RESULT rrdlabels_match_simple_pattern_parsed(RRDLABELS *labels, SIMPLE_PATTERN *pattern, char equal, size_t *searches); int rrdlabels_to_buffer(RRDLABELS *labels, BUFFER *wb, const char *before_each, const char *equal, const char *quote, const char *between_them, bool (*filter_callback)(const char *name, const char *value, RRDLABEL_SRC ls, void *data), void *filter_data, void (*name_sanitizer)(char *dst, const char *src, size_t dst_size), @@ -50,6 +60,20 @@ void rrdlabels_to_buffer_json_members(RRDLABELS *labels, BUFFER *wb); void rrdlabels_migrate_to_these(RRDLABELS *dst, RRDLABELS *src); void rrdlabels_copy(RRDLABELS *dst, RRDLABELS *src); +size_t rrdlabels_common_count(RRDLABELS *labels1, RRDLABELS *labels2); + +struct pattern_array *pattern_array_allocate(); +struct pattern_array * +pattern_array_add_key_value(struct pattern_array *pa, const char *key, const char *value, char sep); +bool pattern_array_label_match( + struct pattern_array *pa, + RRDLABELS *labels, + char eq, + size_t *searches); +struct pattern_array *pattern_array_add_simple_pattern(struct pattern_array *pa, SIMPLE_PATTERN *pattern, char sep); +struct pattern_array * +pattern_array_add_key_simple_pattern(struct pattern_array *pa, const char *key, SIMPLE_PATTERN *pattern); +void pattern_array_free(struct pattern_array *pa); int rrdlabels_unittest(void); diff --git a/database/rrdset.c b/src/database/rrdset.c index f4bb48aa7..fc206585d 100644 --- a/database/rrdset.c +++ b/src/database/rrdset.c @@ -2,7 +2,6 @@ #define NETDATA_RRD_INTERNALS #include "rrd.h" -#include <sched.h> #include "storage_engine.h" @@ -270,34 +269,19 @@ static void rrdset_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v rw_spinlock_init(&st->alerts.spinlock); - if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || st->rrd_memory_mode == RRD_MEMORY_MODE_MAP) { - if(!rrdset_memory_load_or_create_map_save(st, st->rrd_memory_mode)) { - netdata_log_info("Failed to use db mode %s for chart '%s', falling back to ram mode.", (st->rrd_memory_mode == RRD_MEMORY_MODE_MAP)?"map":"save", rrdset_name(st)); - st->rrd_memory_mode = RRD_MEMORY_MODE_RAM; - } - } - // initialize the db tiers { for(size_t tier = 0; tier < storage_tiers ; tier++) { STORAGE_ENGINE *eng = st->rrdhost->db[tier].eng; if(!eng) continue; - st->storage_metrics_groups[tier] = storage_engine_metrics_group_get(eng->backend, host->db[tier].instance, &st->chart_uuid); + st->smg[tier] = storage_engine_metrics_group_get(eng->seb, host->db[tier].si, &st->chart_uuid); } } rrddim_index_init(st); - // chart variables - we need this for data collection to work (collector given chart variables) - not only health - rrdsetvar_index_init(st); - - if (host->health.health_enabled) { - st->rrdfamily = rrdfamily_add_and_acquire(host, rrdset_family(st)); - st->rrdvars = rrdvariables_create(); - rrddimvar_index_init(st); - } - + st->rrdvars = rrdvariables_create(); st->rrdlabels = rrdlabels_create(); rrdset_update_permanent_labels(st); @@ -312,6 +296,14 @@ static void rrdset_insert_callback(const DICTIONARY_ITEM *item __maybe_unused, v } void rrdset_finalize_collection(RRDSET *st, bool dimensions_too) { + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_NIDL_NODE, rrdhost_hostname(st->rrdhost)), + ND_LOG_FIELD_TXT(NDF_NIDL_CONTEXT, rrdset_context(st)), + ND_LOG_FIELD_TXT(NDF_NIDL_INSTANCE, rrdset_name(st)), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + RRDHOST *host = st->rrdhost; rrdset_flag_set(st, RRDSET_FLAG_COLLECTION_FINISHED); @@ -327,9 +319,9 @@ void rrdset_finalize_collection(RRDSET *st, bool dimensions_too) { STORAGE_ENGINE *eng = st->rrdhost->db[tier].eng; if(!eng) continue; - if(st->storage_metrics_groups[tier]) { - storage_engine_metrics_group_release(eng->backend, host->db[tier].instance, st->storage_metrics_groups[tier]); - st->storage_metrics_groups[tier] = NULL; + if(st->smg[tier]) { + storage_engine_metrics_group_release(eng->seb, host->db[tier].si, st->smg[tier]); + st->smg[tier] = NULL; } } @@ -353,40 +345,26 @@ static void rrdset_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, v // release the collector info dictionary_destroy(st->functions_view); - rrdcalc_unlink_all_rrdset_alerts(st); + rrdcalc_unlink_and_delete_all_rrdset_alerts(st); // ------------------------------------------------------------------------ // the order of destruction is important here - // 1. delete RRDDIMVAR index - this will speed up the destruction of RRDDIMs - // because each dimension loops to find its own variables in this index. - // There are no references to the items on this index from the dimensions. - // To find their own, they have to walk-through the dictionary. - rrddimvar_index_destroy(st); // destroy the rrddimvar index - - // 2. delete RRDSETVAR index - rrdsetvar_index_destroy(st); // destroy the rrdsetvar index - - // 3. delete RRDVAR index after the above, to avoid triggering its garbage collector (they have references on this) + // 1. delete RRDVAR index after the above, to avoid triggering its garbage collector (they have references on this) rrdvariables_destroy(st->rrdvars); // free all variables and destroy the rrdvar dictionary - // 4. delete RRDFAMILY - this has to be last, because RRDDIMVAR and RRDSETVAR need the reference counter - rrdfamily_release(host, st->rrdfamily); // release the acquired rrdfamily -- has to be after all variables - - // 5. delete RRDDIMs, now their variables are not existing, so this is fast + // 2. delete RRDDIMs, now their variables are not existing, so this is fast rrddim_index_destroy(st); // free all the dimensions and destroy the dimensions index - // 6. this has to be after the dimensions are freed, but before labels are freed (contexts need the labels) + // 3. this has to be after the dimensions are freed, but before labels are freed (contexts need the labels) rrdcontext_removed_rrdset(st); // let contexts know - // 7. destroy the chart labels + // 4. destroy the chart labels rrdlabels_destroy(st->rrdlabels); // destroy the labels, after letting the contexts know - // 8. destroy the ml handle + // 5. destroy the ml handle ml_chart_delete(st); - rrdset_memory_file_free(st); // remove files of db mode save and map - // ------------------------------------------------------------------------ // free it @@ -403,7 +381,6 @@ static void rrdset_delete_callback(const DICTIONARY_ITEM *item __maybe_unused, v string_freez(st->module_name); freez(st->exporting_flags); - freez(st->db.cache_dir); } // the item to be inserted, is already in the dictionary @@ -470,8 +447,6 @@ static bool rrdset_conflict_callback(const DICTIONARY_ITEM *item __maybe_unused, if(old_family != st->family) ctr->react_action |= RRDSET_REACT_UPDATED; string_freez(old_family); - - // TODO - we should rename RRDFAMILY variables } if(ctr->context && *ctr->context) { @@ -652,16 +627,10 @@ int rrdset_reset_name(RRDSET *st, const char *name) { rrdset_index_del_name(host, st); string_freez(st->name); st->name = name_string; - rrdsetvar_rename_all(st); } else st->name = name_string; - RRDDIM *rd; - rrddim_foreach_read(rd, st) - rrddimvar_rename_all(rd); - rrddim_foreach_done(rd); - rrdset_index_add_name(host, st); rrdset_flag_clear(st, RRDSET_FLAG_EXPORTING_SEND); @@ -888,7 +857,7 @@ void rrdset_reset(RRDSET *st) { if(!rrddim_flag_check(rd, RRDDIM_FLAG_ARCHIVED)) { for(size_t tier = 0; tier < storage_tiers ;tier++) - storage_engine_store_flush(rd->tiers[tier].db_collection_handle); + storage_engine_store_flush(rd->tiers[tier].sch); } } rrddim_foreach_done(rd); @@ -904,12 +873,9 @@ inline long align_entries_to_pagesize(RRD_MEMORY_MODE mode, long entries) { if(entries < 5) entries = 5; if(entries > RRD_HISTORY_ENTRIES_MAX) entries = RRD_HISTORY_ENTRIES_MAX; - if(mode == RRD_MEMORY_MODE_MAP || mode == RRD_MEMORY_MODE_SAVE || mode == RRD_MEMORY_MODE_RAM) { + if(mode == RRD_MEMORY_MODE_RAM) { long header_size = 0; - if(mode == RRD_MEMORY_MODE_MAP || mode == RRD_MEMORY_MODE_SAVE) - header_size = (long)rrddim_memory_file_header_size(); - long page = (long)sysconf(_SC_PAGESIZE); long size = (long)(header_size + entries * sizeof(storage_number)); if (unlikely(size % page)) { @@ -946,62 +912,6 @@ void rrdset_free(RRDSET *st) { rrdset_index_del(st->rrdhost, st); } -void rrdset_save(RRDSET *st) { - rrdset_memory_file_save(st); - - RRDDIM *rd; - rrddim_foreach_read(rd, st) - rrddim_memory_file_save(rd); - rrddim_foreach_done(rd); -} - -void rrdset_delete_files(RRDSET *st) { - RRDDIM *rd; - - netdata_log_info("Deleting chart '%s' ('%s') from disk...", rrdset_id(st), rrdset_name(st)); - - if(st->rrd_memory_mode == RRD_MEMORY_MODE_SAVE || st->rrd_memory_mode == RRD_MEMORY_MODE_MAP) { - const char *cache_filename = rrdset_cache_filename(st); - if(cache_filename) { - netdata_log_info("Deleting chart header file '%s'.", cache_filename); - if (unlikely(unlink(cache_filename) == -1)) - netdata_log_error("Cannot delete chart header file '%s'", cache_filename); - } - else - netdata_log_error("Cannot find the cache filename of chart '%s'", rrdset_id(st)); - } - - rrddim_foreach_read(rd, st) { - const char *cache_filename = rrddim_cache_filename(rd); - if(!cache_filename) continue; - - netdata_log_info("Deleting dimension file '%s'.", cache_filename); - if(unlikely(unlink(cache_filename) == -1)) - netdata_log_error("Cannot delete dimension file '%s'", cache_filename); - } - rrddim_foreach_done(rd); - - if(st->db.cache_dir) - recursively_delete_dir(st->db.cache_dir, "left-over chart"); -} - -void rrdset_delete_obsolete_dimensions(RRDSET *st) { - RRDDIM *rd; - - netdata_log_info("Deleting dimensions of chart '%s' ('%s') from disk...", rrdset_id(st), rrdset_name(st)); - - rrddim_foreach_read(rd, st) { - if(rrddim_flag_check(rd, RRDDIM_FLAG_OBSOLETE)) { - const char *cache_filename = rrddim_cache_filename(rd); - if(!cache_filename) continue; - netdata_log_info("Deleting dimension file '%s'.", cache_filename); - if(unlikely(unlink(cache_filename) == -1)) - netdata_log_error("Cannot delete dimension file '%s'", cache_filename); - } - } - rrddim_foreach_done(rd); -} - // ---------------------------------------------------------------------------- // RRDSET - create a chart @@ -1267,7 +1177,7 @@ void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAG if (likely(!storage_point_is_unset(t->virtual_point))) { storage_engine_store_metric( - t->db_collection_handle, + t->sch, t->next_point_end_time_s * USEC_PER_SEC, t->virtual_point.sum, t->virtual_point.min, @@ -1278,7 +1188,7 @@ void store_metric_at_tier(RRDDIM *rd, size_t tier, struct rrddim_tier *t, STORAG } else { storage_engine_store_metric( - t->db_collection_handle, + t->sch, t->next_point_end_time_s * USEC_PER_SEC, NAN, NAN, @@ -1357,7 +1267,7 @@ void rrddim_store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, #endif // NETDATA_LOG_COLLECTION_ERRORS // store the metric on tier 0 - storage_engine_store_metric(rd->tiers[0].db_collection_handle, point_end_time_ut, + storage_engine_store_metric(rd->tiers[0].sch, point_end_time_ut, n, 0, 0, 1, 0, flags); @@ -1377,7 +1287,7 @@ void rrddim_store_metric(RRDDIM *rd, usec_t point_end_time_ut, NETDATA_DOUBLE n, }; for(size_t tier = 1; tier < storage_tiers ;tier++) { - if(unlikely(!rd->tiers[tier].db_metric_handle)) continue; + if(unlikely(!rd->tiers[tier].smh)) continue; struct rrddim_tier *t = &rd->tiers[tier]; @@ -1718,8 +1628,7 @@ void rrdset_timed_done(RRDSET *st, struct timeval now, bool pending_rrdset_next) // check if we will re-write the entire data set if(unlikely(dt_usec(&st->last_collected_time, &st->last_updated) > st->db.entries * update_every_ut && st->rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE)) { - netdata_log_info( - "'%s': too old data (last updated at %"PRId64".%"PRId64", last collected at %"PRId64".%"PRId64"). " + nd_log_daemon(NDLP_DEBUG, "'%s': too old data (last updated at %" PRId64 ".%" PRId64 ", last collected at %" PRId64 ".%" PRId64 "). " "Resetting it. Will not store the next entry.", rrdset_id(st), (int64_t)st->last_updated.tv_sec, @@ -2101,18 +2010,6 @@ void rrdset_timed_done(RRDSET *st, struct timeval now, bool pending_rrdset_next) // ALL DONE ABOUT THE DATA UPDATE // -------------------------------------------------------------------- - if(unlikely(st->rrd_memory_mode == RRD_MEMORY_MODE_MAP)) { - // update the memory mapped files with the latest values - - rrdset_memory_file_update(st); - - for(dim_id = 0, rda = rda_base; dim_id < rda_slots ; ++dim_id, ++rda) { - rd = rda->rd; - if(unlikely(!rd)) continue; - rrddim_memory_file_update(rd); - } - } - for(dim_id = 0, rda = rda_base; dim_id < rda_slots ; ++dim_id, ++rda) { rd = rda->rd; if(unlikely(!rd)) continue; @@ -2141,9 +2038,9 @@ time_t rrdset_set_update_every_s(RRDSET *st, time_t update_every_s) { RRDDIM *rd; rrddim_foreach_read(rd, st) { for (size_t tier = 0; tier < storage_tiers; tier++) { - if (rd->tiers[tier].db_collection_handle) + if (rd->tiers[tier].sch) storage_engine_store_change_collection_frequency( - rd->tiers[tier].db_collection_handle, + rd->tiers[tier].sch, (int)(st->rrdhost->db[tier].tier_grouping * st->update_every)); } } @@ -2151,211 +2048,3 @@ time_t rrdset_set_update_every_s(RRDSET *st, time_t update_every_s) { return prev_update_every_s; } - -// ---------------------------------------------------------------------------- -// compatibility layer for RRDSET files v019 - -#define RRDSET_MAGIC_V019 "NETDATA RRD SET FILE V019" -#define RRD_ID_LENGTH_MAX_V019 200 - -struct avl_element_v019 { - void *avl_link[2]; - signed char avl_balance; -}; -struct avl_tree_type_v019 { - void *root; - int (*compar)(void *a, void *b); -}; -struct avl_tree_lock_v019 { - struct avl_tree_type_v019 avl_tree; - pthread_rwlock_t rwlock; -}; -struct rrdset_map_save_v019 { - struct avl_element_v019 avl; // ignored - struct avl_element_v019 avlname; // ignored - char id[RRD_ID_LENGTH_MAX_V019 + 1]; // check to reset all - update on load - void *name; // ignored - void *unused_ptr; // ignored - void *type; // ignored - void *family; // ignored - void *title; // ignored - void *units; // ignored - void *context; // ignored - uint32_t hash_context; // ignored - uint32_t chart_type; // ignored - int update_every; // check to reset all - update on load - long entries; // check to reset all - update on load - long current_entry; // NEEDS TO BE UPDATED - FIXED ON LOAD - uint32_t flags; // ignored - void *exporting_flags; // ignored - int gap_when_lost_iterations_above; // ignored - long priority; // ignored - uint32_t rrd_memory_mode; // ignored - void *cache_dir; // ignored - char cache_filename[FILENAME_MAX+1]; // ignored - update on load - pthread_rwlock_t rrdset_rwlock; // ignored - size_t counter; // NEEDS TO BE UPDATED - maintained on load - size_t counter_done; // ignored - union { // - time_t last_accessed_time_s; // ignored - time_t last_entry_s; // ignored - }; // - time_t upstream_resync_time; // ignored - void *plugin_name; // ignored - void *module_name; // ignored - void *chart_uuid; // ignored - void *state; // ignored - size_t unused[3]; // ignored - size_t rrddim_page_alignment; // ignored - uint32_t hash; // ignored - uint32_t hash_name; // ignored - usec_t usec_since_last_update; // NEEDS TO BE UPDATED - maintained on load - struct timeval last_updated; // NEEDS TO BE UPDATED - check to reset all - fixed on load - struct timeval last_collected_time; // ignored - long long collected_total; // ignored - long long last_collected_total; // ignored - void *rrdfamily; // ignored - void *rrdhost; // ignored - void *next; // ignored - long double green; // ignored - long double red; // ignored - struct avl_tree_lock_v019 rrdvar_root_index; // ignored - void *variables; // ignored - void *alarms; // ignored - unsigned long memsize; // check to reset all - update on load - char magic[sizeof(RRDSET_MAGIC_V019) + 1]; // check to reset all - update on load - struct avl_tree_lock_v019 dimensions_index; // ignored - void *dimensions; // ignored -}; - -void rrdset_memory_file_update(RRDSET *st) { - if(!st->db.st_on_file) return; - struct rrdset_map_save_v019 *st_on_file = st->db.st_on_file; - - st_on_file->current_entry = st->db.current_entry; - st_on_file->counter = st->counter; - st_on_file->usec_since_last_update = st->usec_since_last_update; - st_on_file->last_updated.tv_sec = st->last_updated.tv_sec; - st_on_file->last_updated.tv_usec = st->last_updated.tv_usec; -} - -const char *rrdset_cache_filename(RRDSET *st) { - if(!st->db.st_on_file) return NULL; - struct rrdset_map_save_v019 *st_on_file = st->db.st_on_file; - return st_on_file->cache_filename; -} - -const char *rrdset_cache_dir(RRDSET *st) { - if(!st->db.cache_dir) - st->db.cache_dir = rrdhost_cache_dir_for_rrdset_alloc(st->rrdhost, rrdset_id(st)); - - return st->db.cache_dir; -} - -void rrdset_memory_file_free(RRDSET *st) { - if(!st->db.st_on_file) return; - - // needed for memory mode map, to save the latest state - rrdset_memory_file_update(st); - - struct rrdset_map_save_v019 *st_on_file = st->db.st_on_file; - __atomic_sub_fetch(&rrddim_db_memory_size, st_on_file->memsize, __ATOMIC_RELAXED); - netdata_munmap(st_on_file, st_on_file->memsize); - - // remove the pointers from the RRDDIM - st->db.st_on_file = NULL; -} - -void rrdset_memory_file_save(RRDSET *st) { - if(!st->db.st_on_file) return; - - rrdset_memory_file_update(st); - - struct rrdset_map_save_v019 *st_on_file = st->db.st_on_file; - if(st_on_file->rrd_memory_mode != RRD_MEMORY_MODE_SAVE) return; - - memory_file_save(st_on_file->cache_filename, st->db.st_on_file, st_on_file->memsize); -} - -bool rrdset_memory_load_or_create_map_save(RRDSET *st, RRD_MEMORY_MODE memory_mode) { - if(memory_mode != RRD_MEMORY_MODE_SAVE && memory_mode != RRD_MEMORY_MODE_MAP) - return false; - - char fullfilename[FILENAME_MAX + 1]; - snprintfz(fullfilename, FILENAME_MAX, "%s/main.db", rrdset_cache_dir(st)); - - unsigned long size = sizeof(struct rrdset_map_save_v019); - struct rrdset_map_save_v019 *st_on_file = (struct rrdset_map_save_v019 *)netdata_mmap( - fullfilename, size, ((memory_mode == RRD_MEMORY_MODE_MAP) ? MAP_SHARED : MAP_PRIVATE), 0, false, NULL); - - if(!st_on_file) return false; - - time_t now_s = now_realtime_sec(); - - st_on_file->magic[sizeof(RRDSET_MAGIC_V019)] = '\0'; - if(strcmp(st_on_file->magic, RRDSET_MAGIC_V019) != 0) { - netdata_log_info("Initializing file '%s'.", fullfilename); - memset(st_on_file, 0, size); - } - else if(strncmp(st_on_file->id, rrdset_id(st), RRD_ID_LENGTH_MAX_V019) != 0) { - netdata_log_error("File '%s' contents are not for chart '%s'. Clearing it.", fullfilename, rrdset_id(st)); - memset(st_on_file, 0, size); - } - else if(st_on_file->memsize != size || st_on_file->entries != st->db.entries) { - netdata_log_error("File '%s' does not have the desired size. Clearing it.", fullfilename); - memset(st_on_file, 0, size); - } - else if(st_on_file->update_every != st->update_every) { - netdata_log_error("File '%s' does not have the desired granularity. Clearing it.", fullfilename); - memset(st_on_file, 0, size); - } - else if((now_s - st_on_file->last_updated.tv_sec) > (long)st->update_every * (long)st->db.entries) { - netdata_log_info("File '%s' is too old. Clearing it.", fullfilename); - memset(st_on_file, 0, size); - } - else if(st_on_file->last_updated.tv_sec > now_s + st->update_every) { - netdata_log_error("File '%s' refers to the future by %zd secs. Resetting it to now.", fullfilename, (ssize_t)(st_on_file->last_updated.tv_sec - now_s)); - st_on_file->last_updated.tv_sec = now_s; - } - - if(st_on_file->current_entry >= st_on_file->entries) - st_on_file->current_entry = 0; - - // make sure the database is aligned - bool align_last_updated = false; - if(st_on_file->last_updated.tv_sec) { - st_on_file->update_every = st->update_every; - align_last_updated = true; - } - - // copy the useful values to st - st->db.current_entry = st_on_file->current_entry; - st->counter = st_on_file->counter; - st->usec_since_last_update = st_on_file->usec_since_last_update; - st->last_updated.tv_sec = st_on_file->last_updated.tv_sec; - st->last_updated.tv_usec = st_on_file->last_updated.tv_usec; - - // link it to st - st->db.st_on_file = st_on_file; - - // clear everything - memset(st_on_file, 0, size); - - // set the values we need - strncpyz(st_on_file->id, rrdset_id(st), RRD_ID_LENGTH_MAX_V019); - strcpy(st_on_file->cache_filename, fullfilename); - strcpy(st_on_file->magic, RRDSET_MAGIC_V019); - st_on_file->memsize = size; - st_on_file->entries = st->db.entries; - st_on_file->update_every = st->update_every; - st_on_file->rrd_memory_mode = memory_mode; - - if(align_last_updated) - last_updated_time_align(st); - - // copy the useful values back to st_on_file - rrdset_memory_file_update(st); - - __atomic_add_fetch(&rrddim_db_memory_size, st_on_file->memsize, __ATOMIC_RELAXED); - return true; -} diff --git a/database/sqlite/dbdata.c b/src/database/sqlite/dbdata.c index 1ad742e04..1ad742e04 100644 --- a/database/sqlite/dbdata.c +++ b/src/database/sqlite/dbdata.c diff --git a/database/sqlite/sqlite3.c b/src/database/sqlite/sqlite3.c index da8c38d09..da8c38d09 100644 --- a/database/sqlite/sqlite3.c +++ b/src/database/sqlite/sqlite3.c diff --git a/database/sqlite/sqlite3.h b/src/database/sqlite/sqlite3.h index 48effe202..48effe202 100644 --- a/database/sqlite/sqlite3.h +++ b/src/database/sqlite/sqlite3.h diff --git a/database/sqlite/sqlite3recover.c b/src/database/sqlite/sqlite3recover.c index 3dae0b7a9..3dae0b7a9 100644 --- a/database/sqlite/sqlite3recover.c +++ b/src/database/sqlite/sqlite3recover.c diff --git a/database/sqlite/sqlite3recover.h b/src/database/sqlite/sqlite3recover.h index 7a1cd1cd8..7a1cd1cd8 100644 --- a/database/sqlite/sqlite3recover.h +++ b/src/database/sqlite/sqlite3recover.h diff --git a/database/sqlite/sqlite_aclk.c b/src/database/sqlite/sqlite_aclk.c index ac574879c..c410406b2 100644 --- a/database/sqlite/sqlite_aclk.c +++ b/src/database/sqlite/sqlite_aclk.c @@ -61,7 +61,6 @@ enum { IDX_UPDATE_EVERY, IDX_OS, IDX_TIMEZONE, - IDX_TAGS, IDX_HOPS, IDX_MEMORY_MODE, IDX_ABBREV_TIMEZONE, @@ -72,6 +71,7 @@ enum { IDX_HEALTH_ENABLED, IDX_LAST_CONNECTED, IDX_IS_EPHEMERAL, + IDX_IS_REGISTERED, }; static int create_host_callback(void *data, int argc, char **argv, char **column) @@ -88,21 +88,27 @@ static int create_host_callback(void *data, int argc, char **argv, char **column time_t age = now_realtime_sec() - last_connected; int is_ephemeral = 0; + int is_registered = 0; if (argv[IDX_IS_EPHEMERAL]) is_ephemeral = str2i(argv[IDX_IS_EPHEMERAL]); + if (argv[IDX_IS_REGISTERED]) + is_registered = str2i(argv[IDX_IS_REGISTERED]); + char guid[UUID_STR_LEN]; uuid_unparse_lower(*(uuid_t *)argv[IDX_HOST_ID], guid); if (is_ephemeral && age > rrdhost_free_ephemeral_time_s) { netdata_log_info( - "Skipping ephemeral hostname \"%s\" with GUID \"%s\", age = %ld seconds (limit %ld seconds)", + "%s ephemeral hostname \"%s\" with GUID \"%s\", age = %ld seconds (limit %ld seconds)", + is_registered ? "Loading registered" : "Skipping unregistered", (const char *)argv[IDX_HOSTNAME], guid, age, rrdhost_free_ephemeral_time_s); - return 0; + if (!is_registered) + return 0; } struct rrdhost_system_info *system_info = callocz(1, sizeof(struct rrdhost_system_info)); @@ -120,7 +126,6 @@ static int create_host_callback(void *data, int argc, char **argv, char **column (const char *)argv[IDX_TIMEZONE], (const char *)argv[IDX_ABBREV_TIMEZONE], (int32_t)(argv[IDX_UTC_OFFSET] ? str2uint32_t(argv[IDX_UTC_OFFSET], NULL) : 0), - (const char *)argv[IDX_TAGS], (const char *)(argv[IDX_PROGRAM_NAME] ? argv[IDX_PROGRAM_NAME] : "unknown"), (const char *)(argv[IDX_PROGRAM_VERSION] ? argv[IDX_PROGRAM_VERSION] : "unknown"), argv[IDX_UPDATE_EVERY] ? str2i(argv[IDX_UPDATE_EVERY]) : 1, @@ -557,11 +562,12 @@ void sql_create_aclk_table(RRDHOST *host __maybe_unused, uuid_t *host_uuid __may #define SQL_FETCH_ALL_HOSTS \ "SELECT host_id, hostname, registry_hostname, update_every, os, " \ - "timezone, tags, hops, memory_mode, abbrev_timezone, utc_offset, program_name, " \ + "timezone, hops, memory_mode, abbrev_timezone, utc_offset, program_name, " \ "program_version, entries, health_enabled, last_connected, " \ "(SELECT CASE WHEN hl.label_value = 'true' THEN 1 ELSE 0 END FROM " \ - "host_label hl WHERE hl.host_id = h.host_id AND hl.label_key = '_is_ephemeral') " \ - "FROM host h WHERE hops > 0" + "host_label hl WHERE hl.host_id = h.host_id AND hl.label_key = '_is_ephemeral'), " \ + "(SELECT CASE WHEN ni.node_id is NULL THEN 0 ELSE 1 END FROM " \ + "node_instance ni WHERE ni.host_id = h.host_id) FROM host h WHERE hops > 0" #define SQL_FETCH_ALL_INSTANCES \ "SELECT ni.host_id, ni.node_id FROM host h, node_instance ni " \ @@ -675,4 +681,4 @@ void unregister_node(const char *machine_guid) cmd.completion = NULL; aclk_database_enq_cmd(&cmd); } -#endif
\ No newline at end of file +#endif diff --git a/database/sqlite/sqlite_aclk.h b/src/database/sqlite/sqlite_aclk.h index 0db2647bf..0db2647bf 100644 --- a/database/sqlite/sqlite_aclk.h +++ b/src/database/sqlite/sqlite_aclk.h diff --git a/database/sqlite/sqlite_aclk_alert.c b/src/database/sqlite/sqlite_aclk_alert.c index 9bd060f96..c96f0eef8 100644 --- a/database/sqlite/sqlite_aclk_alert.c +++ b/src/database/sqlite/sqlite_aclk_alert.c @@ -201,7 +201,7 @@ void sql_queue_alarm_to_aclk(RRDHOST *host, ALARM_ENTRY *ae, bool skip_filter) ae->flags |= HEALTH_ENTRY_FLAG_ACLK_QUEUED; rrdhost_flag_set(host, RRDHOST_FLAG_ACLK_STREAM_ALERTS); } else - error_report("Failed to store alert event %"PRId64", rc = %d", ae->unique_id, rc); + error_report("Failed to store alert event %"PRIu32", rc = %d", ae->unique_id, rc); done: if (unlikely(sqlite3_finalize(res_alert) != SQLITE_OK)) @@ -351,7 +351,8 @@ static void aclk_push_alert_event(struct aclk_sync_cfg_t *wc __maybe_unused) strdupz("UNKNOWN=0=UNKNOWN"); alarm_log.command = strdupz(edit_command); - alarm_log.duration = (time_t) sqlite3_column_int64(res, 6); + time_t duration = (time_t) sqlite3_column_int64(res, 6); + alarm_log.duration = (duration > 0) ? duration : 0; alarm_log.non_clear_duration = (time_t) sqlite3_column_int64(res, 7); alarm_log.status = rrdcalc_status_to_proto_enum((RRDCALC_STATUS) sqlite3_column_int(res, 19)); alarm_log.old_status = rrdcalc_status_to_proto_enum((RRDCALC_STATUS) sqlite3_column_int(res, 20)); @@ -609,7 +610,7 @@ void aclk_push_alert_config_event(char *node_id __maybe_unused, char *config_has netdata_log_error("aclk_push_alert_config_event: Unexpected param number %d", param); BUFFER *tmp_buf = buffer_create(1024, &netdata_buffers_statistics.buffers_sqlite); - buffer_data_options2string(tmp_buf, sqlite3_column_int(res, 28)); + rrdr_options_to_buffer(tmp_buf, sqlite3_column_int(res, 28)); alarm_config.p_db_lookup_options = strdupz((char *)buffer_tostring(tmp_buf)); buffer_free(tmp_buf); @@ -821,7 +822,7 @@ void health_alarm_entry2proto_nolock(struct alarm_log_entry *alarm_log, ALARM_EN #endif #ifdef ENABLE_ACLK -static bool have_recent_alarm(RRDHOST *host, int64_t alarm_id, int64_t mark) +static bool have_recent_alarm_unsafe(RRDHOST *host, int64_t alarm_id, int64_t mark) { ALARM_ENTRY *ae = host->health_log.alarms; @@ -882,7 +883,7 @@ void aclk_push_alert_snapshot_event(char *node_id __maybe_unused) if (unlikely(ae->new_status == RRDCALC_STATUS_UNINITIALIZED)) continue; - if (have_recent_alarm(host, ae->alarm_id, ae->unique_id)) + if (have_recent_alarm_unsafe(host, ae->alarm_id, ae->unique_id)) continue; if (is_event_from_alert_variable_config(ae->unique_id, &host->host_uuid)) @@ -911,7 +912,7 @@ void aclk_push_alert_snapshot_event(char *node_id __maybe_unused) if (likely(ae->updated_by_id) || unlikely(ae->new_status == RRDCALC_STATUS_UNINITIALIZED)) continue; - if (have_recent_alarm(host, ae->alarm_id, ae->unique_id)) + if (have_recent_alarm_unsafe(host, ae->alarm_id, ae->unique_id)) continue; if (is_event_from_alert_variable_config(ae->unique_id, &host->host_uuid)) @@ -1090,7 +1091,7 @@ void aclk_push_alarm_checkpoint(RRDHOST *host __maybe_unused) } active_alerts[cnt].name = (char *)rrdcalc_name(rc); - len += string_strlen(rc->name); + len += string_strlen(rc->config.name); active_alerts[cnt].chart = (char *)rrdcalc_chart_name(rc); len += string_strlen(rc->chart); active_alerts[cnt].status = rc->status; diff --git a/database/sqlite/sqlite_aclk_alert.h b/src/database/sqlite/sqlite_aclk_alert.h index cfb3468b9..cfb3468b9 100644 --- a/database/sqlite/sqlite_aclk_alert.h +++ b/src/database/sqlite/sqlite_aclk_alert.h diff --git a/database/sqlite/sqlite_aclk_node.c b/src/database/sqlite/sqlite_aclk_node.c index dcc8c375c..dcc8c375c 100644 --- a/database/sqlite/sqlite_aclk_node.c +++ b/src/database/sqlite/sqlite_aclk_node.c diff --git a/database/sqlite/sqlite_aclk_node.h b/src/database/sqlite/sqlite_aclk_node.h index 6afdf8d78..6afdf8d78 100644 --- a/database/sqlite/sqlite_aclk_node.h +++ b/src/database/sqlite/sqlite_aclk_node.h diff --git a/database/sqlite/sqlite_context.c b/src/database/sqlite/sqlite_context.c index 26ed8a96a..ad76a1ee2 100644 --- a/database/sqlite/sqlite_context.c +++ b/src/database/sqlite/sqlite_context.c @@ -52,7 +52,7 @@ int sql_init_context_database(int memory) if (likely(!memory)) target_version = perform_context_database_migration(db_context_meta, DB_CONTEXT_METADATA_VERSION); - if (configure_sqlite_database(db_context_meta, target_version)) + if (configure_sqlite_database(db_context_meta, target_version, "context_config")) return 1; if (likely(!memory)) @@ -60,34 +60,17 @@ int sql_init_context_database(int memory) else snprintfz(buf, sizeof(buf) - 1, "ATTACH DATABASE ':memory:' as meta"); - if(init_database_batch(db_context_meta, list)) return 1; + if(init_database_batch(db_context_meta, list, "context")) return 1; - if (init_database_batch(db_context_meta, &database_context_config[0])) + if (init_database_batch(db_context_meta, &database_context_config[0], "context_init")) return 1; - if (init_database_batch(db_context_meta, &database_context_cleanup[0])) + if (init_database_batch(db_context_meta, &database_context_cleanup[0], "context_cleanup")) return 1; return 0; } -/* - * Close the sqlite database - */ - -void sql_close_context_database(void) -{ - int rc; - if (unlikely(!db_context_meta)) - return; - - netdata_log_info("Closing context SQLite database"); - - rc = sqlite3_close_v2(db_context_meta); - if (unlikely(rc != SQLITE_OK)) - error_report("Error %d while closing the context SQLite database, %s", rc, sqlite3_errstr(rc)); -} - // // Fetching data // @@ -421,6 +404,12 @@ int sql_context_cache_stats(int op) return count; } + +uint64_t sqlite_get_context_space(void) +{ + return sqlite_get_db_space(db_context_meta); +} + // // TESTING FUNCTIONS // @@ -455,7 +444,8 @@ int ctx_unittest(void) uuid_t host_uuid; uuid_generate(host_uuid); - initialize_thread_key_pool(); + if (sqlite_library_init()) + return 1; int rc = sql_init_context_database(1); @@ -531,7 +521,8 @@ int ctx_unittest(void) ctx_get_context_list(&host_uuid, dict_ctx_get_context_list_cb, NULL); netdata_log_info("List context end after delete"); - sql_close_context_database(); + sql_close_database(db_context_meta, "CONTEXT"); + sqlite_library_shutdown(); return 0; } diff --git a/database/sqlite/sqlite_context.h b/src/database/sqlite/sqlite_context.h index 2586916ea..92d02fdd2 100644 --- a/database/sqlite/sqlite_context.h +++ b/src/database/sqlite/sqlite_context.h @@ -65,6 +65,7 @@ int ctx_store_context(uuid_t *host_uuid, VERSIONED_CONTEXT_DATA *context_data); int ctx_delete_context(uuid_t *host_id, VERSIONED_CONTEXT_DATA *context_data); int sql_init_context_database(int memory); +uint64_t sqlite_get_context_space(void); void sql_close_context_database(void); int ctx_unittest(void); #endif //NETDATA_SQLITE_CONTEXT_H diff --git a/database/sqlite/sqlite_db_migration.c b/src/database/sqlite/sqlite_db_migration.c index 29da6c249..0131c4bf6 100644 --- a/database/sqlite/sqlite_db_migration.c +++ b/src/database/sqlite/sqlite_db_migration.c @@ -153,17 +153,36 @@ const char *database_migrate_v13_v14[] = { NULL }; +const char *database_migrate_v16_v17[] = { + "ALTER TABLE alert_hash ADD time_group_condition INT", + "ALTER TABLE alert_hash ADD time_group_value DOUBLE", + "ALTER TABLE alert_hash ADD dims_group INT", + "ALTER TABLE alert_hash ADD data_source INT", + NULL +}; + +// Note: Same as database_migrate_v16_v17. This is not wrong +// Do additional migration to handle agents that created wrong alert_hash table +const char *database_migrate_v17_v18[] = { + "ALTER TABLE alert_hash ADD time_group_condition INT", + "ALTER TABLE alert_hash ADD time_group_value DOUBLE", + "ALTER TABLE alert_hash ADD dims_group INT", + "ALTER TABLE alert_hash ADD data_source INT", + NULL +}; + + static int do_migration_v1_v2(sqlite3 *database) { if (table_exists_in_database(database, "host") && !column_exists_in_table(database, "host", "hops")) - return init_database_batch(database, &database_migrate_v1_v2[0]); + return init_database_batch(database, &database_migrate_v1_v2[0], "meta_migrate"); return 0; } static int do_migration_v2_v3(sqlite3 *database) { if (table_exists_in_database(database, "host") && !column_exists_in_table(database, "host", "memory_mode")) - return init_database_batch(database, &database_migrate_v2_v3[0]); + return init_database_batch(database, &database_migrate_v2_v3[0], "meta_migrate"); return 0; } @@ -198,12 +217,12 @@ static int do_migration_v3_v4(sqlite3 *database) static int do_migration_v4_v5(sqlite3 *database) { - return init_database_batch(database, &database_migrate_v4_v5[0]); + return init_database_batch(database, &database_migrate_v4_v5[0], "meta_migrate"); } static int do_migration_v5_v6(sqlite3 *database) { - return init_database_batch(database, &database_migrate_v5_v6[0]); + return init_database_batch(database, &database_migrate_v5_v6[0], "meta_migrate"); } static int do_migration_v6_v7(sqlite3 *database) @@ -341,14 +360,14 @@ static int do_migration_v8_v9(sqlite3 *database) static int do_migration_v9_v10(sqlite3 *database) { if (table_exists_in_database(database, "alert_hash") && !column_exists_in_table(database, "alert_hash", "chart_labels")) - return init_database_batch(database, &database_migrate_v9_v10[0]); + return init_database_batch(database, &database_migrate_v9_v10[0], "meta_migrate"); return 0; } static int do_migration_v10_v11(sqlite3 *database) { if (table_exists_in_database(database, "health_log") && !column_exists_in_table(database, "health_log", "chart_name")) - return init_database_batch(database, &database_migrate_v10_v11[0]); + return init_database_batch(database, &database_migrate_v10_v11[0], "meta_migrate"); return 0; } @@ -360,7 +379,7 @@ static int do_migration_v11_v12(sqlite3 *database) if (table_exists_in_database(database, "health_log_detail") && !column_exists_in_table(database, "health_log_detail", "summary") && table_exists_in_database(database, "alert_hash") && !column_exists_in_table(database, "alert_hash", "summary")) - rc = init_database_batch(database, &database_migrate_v11_v12[0]); + rc = init_database_batch(database, &database_migrate_v11_v12[0], "meta_migrate"); if (!rc) sqlite3_exec_monitored(database, MIGR_11_12_UPD_HEALTH_LOG_DETAIL, 0, 0, NULL); @@ -430,17 +449,34 @@ static int do_migration_v15_v16(sqlite3 *database) return 0; } +static int do_migration_v16_v17(sqlite3 *database) +{ + if (table_exists_in_database(database, "alert_hash") && !column_exists_in_table(database, "alert_hash", "time_group_condition")) + return init_database_batch(database, &database_migrate_v16_v17[0], "meta_migrate"); + + return 0; +} + +static int do_migration_v17_v18(sqlite3 *database) +{ + if (table_exists_in_database(database, "alert_hash") && !column_exists_in_table(database, "alert_hash", "time_group_condition")) + return init_database_batch(database, &database_migrate_v17_v18[0], "meta_migrate"); + + return 0; +} + + static int do_migration_v12_v13(sqlite3 *database) { int rc = 0; if (table_exists_in_database(database, "health_log_detail") && !column_exists_in_table(database, "health_log_detail", "summary")) { - rc = init_database_batch(database, &database_migrate_v12_v13_detail[0]); + rc = init_database_batch(database, &database_migrate_v12_v13_detail[0], "meta_migrate"); sqlite3_exec_monitored(database, MIGR_11_12_UPD_HEALTH_LOG_DETAIL, 0, 0, NULL); } if (table_exists_in_database(database, "alert_hash") && !column_exists_in_table(database, "alert_hash", "summary")) - rc = init_database_batch(database, &database_migrate_v12_v13_hash[0]); + rc = init_database_batch(database, &database_migrate_v12_v13_hash[0], "meta_migrate"); return rc; } @@ -448,7 +484,7 @@ static int do_migration_v12_v13(sqlite3 *database) static int do_migration_v13_v14(sqlite3 *database) { if (table_exists_in_database(database, "host") && !column_exists_in_table(database, "host", "last_connected")) - return init_database_batch(database, &database_migrate_v13_v14[0]); + return init_database_batch(database, &database_migrate_v13_v14[0], "meta_migrate"); return 0; } @@ -466,7 +502,7 @@ const char *database_ml_migrate_v1_v2[] = { static int do_ml_migration_v1_v2(sqlite3 *database) { if (get_auto_vaccum(database) != 2) - return init_database_batch(database, &database_ml_migrate_v1_v2[0]); + return init_database_batch(database, &database_ml_migrate_v1_v2[0], "ml_migrate"); return 0; } @@ -527,6 +563,8 @@ DATABASE_FUNC_MIGRATION_LIST migration_action[] = { {.name = "v13 to v14", .func = do_migration_v13_v14}, {.name = "v14 to v15", .func = do_migration_v14_v15}, {.name = "v15 to v16", .func = do_migration_v15_v16}, + {.name = "v16 to v17", .func = do_migration_v16_v17}, + {.name = "v17 to v18", .func = do_migration_v17_v18}, // the terminator of this array {.name = NULL, .func = NULL} }; diff --git a/database/sqlite/sqlite_db_migration.h b/src/database/sqlite/sqlite_db_migration.h index e3c1be84f..e3c1be84f 100644 --- a/database/sqlite/sqlite_db_migration.h +++ b/src/database/sqlite/sqlite_db_migration.h diff --git a/src/database/sqlite/sqlite_functions.c b/src/database/sqlite/sqlite_functions.c new file mode 100644 index 000000000..1dc2022b3 --- /dev/null +++ b/src/database/sqlite/sqlite_functions.c @@ -0,0 +1,373 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sqlite_functions.h" + +#define MAX_PREPARED_STATEMENTS (32) +pthread_key_t key_pool[MAX_PREPARED_STATEMENTS]; + +SQLITE_API int sqlite3_exec_monitored( + sqlite3 *db, /* An open database */ + const char *sql, /* SQL to be evaluated */ + int (*callback)(void*,int,char**,char**), /* Callback function */ + void *data, /* 1st argument to callback */ + char **errmsg /* Error msg written here */ +) { + int rc = sqlite3_exec(db, sql, callback, data, errmsg); + global_statistics_sqlite3_query_completed(rc == SQLITE_OK, rc == SQLITE_BUSY, rc == SQLITE_LOCKED); + return rc; +} + +SQLITE_API int sqlite3_step_monitored(sqlite3_stmt *stmt) { + int rc; + int cnt = 0; + + while (cnt++ < SQL_MAX_RETRY) { + rc = sqlite3_step(stmt); + switch (rc) { + case SQLITE_DONE: + global_statistics_sqlite3_query_completed(1, 0, 0); + break; + case SQLITE_ROW: + global_statistics_sqlite3_row_completed(); + break; + case SQLITE_BUSY: + case SQLITE_LOCKED: + global_statistics_sqlite3_query_completed(false, rc == SQLITE_BUSY, rc == SQLITE_LOCKED); + usleep(SQLITE_INSERT_DELAY * USEC_PER_MS); + continue; + default: + break; + } + break; + } + return rc; +} + +static bool mark_database_to_recover(sqlite3_stmt *res, sqlite3 *database) +{ + + if (!res && !database) + return false; + + if (!database) + database = sqlite3_db_handle(res); + + if (db_meta == database) { + char recover_file[FILENAME_MAX + 1]; + snprintfz(recover_file, FILENAME_MAX, "%s/.netdata-meta.db.recover", netdata_configured_cache_dir); + int fd = open(recover_file, O_WRONLY | O_CREAT | O_TRUNC | O_CLOEXEC, 444); + if (fd >= 0) { + close(fd); + return true; + } + } + return false; +} + +int execute_insert(sqlite3_stmt *res) +{ + int rc; + rc = sqlite3_step_monitored(res); + if (rc == SQLITE_CORRUPT) { + (void)mark_database_to_recover(res, NULL); + error_report("SQLite error %d", rc); + } + return rc; +} + +int configure_sqlite_database(sqlite3 *database, int target_version, const char *description) +{ + char buf[1024 + 1] = ""; + const char *list[2] = { buf, NULL }; + + // https://www.sqlite.org/pragma.html#pragma_auto_vacuum + // PRAGMA schema.auto_vacuum = 0 | NONE | 1 | FULL | 2 | INCREMENTAL; + snprintfz(buf, sizeof(buf) - 1, "PRAGMA auto_vacuum=%s", config_get(CONFIG_SECTION_SQLITE, "auto vacuum", "INCREMENTAL")); + if (init_database_batch(database, list, description)) + return 1; + + // https://www.sqlite.org/pragma.html#pragma_synchronous + // PRAGMA schema.synchronous = 0 | OFF | 1 | NORMAL | 2 | FULL | 3 | EXTRA; + snprintfz(buf, sizeof(buf) - 1, "PRAGMA synchronous=%s", config_get(CONFIG_SECTION_SQLITE, "synchronous", "NORMAL")); + if (init_database_batch(database, list, description)) + return 1; + + // https://www.sqlite.org/pragma.html#pragma_journal_mode + // PRAGMA schema.journal_mode = DELETE | TRUNCATE | PERSIST | MEMORY | WAL | OFF + snprintfz(buf, sizeof(buf) - 1, "PRAGMA journal_mode=%s", config_get(CONFIG_SECTION_SQLITE, "journal mode", "WAL")); + if (init_database_batch(database, list, description)) + return 1; + + // https://www.sqlite.org/pragma.html#pragma_temp_store + // PRAGMA temp_store = 0 | DEFAULT | 1 | FILE | 2 | MEMORY; + snprintfz(buf, sizeof(buf) - 1, "PRAGMA temp_store=%s", config_get(CONFIG_SECTION_SQLITE, "temp store", "MEMORY")); + if (init_database_batch(database, list, description)) + return 1; + + // https://www.sqlite.org/pragma.html#pragma_journal_size_limit + // PRAGMA schema.journal_size_limit = N ; + snprintfz(buf, sizeof(buf) - 1, "PRAGMA journal_size_limit=%lld", config_get_number(CONFIG_SECTION_SQLITE, "journal size limit", 16777216)); + if (init_database_batch(database, list, description)) + return 1; + + // https://www.sqlite.org/pragma.html#pragma_cache_size + // PRAGMA schema.cache_size = pages; + // PRAGMA schema.cache_size = -kibibytes; + snprintfz(buf, sizeof(buf) - 1, "PRAGMA cache_size=%lld", config_get_number(CONFIG_SECTION_SQLITE, "cache size", -2000)); + if (init_database_batch(database, list, description)) + return 1; + + snprintfz(buf, sizeof(buf) - 1, "PRAGMA user_version=%d", target_version); + if (init_database_batch(database, list, description)) + return 1; + + return 0; +} + +#define MAX_OPEN_STATEMENTS (512) + +static void add_stmt_to_list(sqlite3_stmt *res) +{ + static int idx = 0; + static sqlite3_stmt *statements[MAX_OPEN_STATEMENTS]; + + if (unlikely(!res)) { + if (idx) + netdata_log_info("Finilizing %d statements", idx); + else + netdata_log_info("No statements pending to finalize"); + while (idx > 0) { + int rc; + rc = sqlite3_finalize(statements[--idx]); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to finalize statement during shutdown, rc = %d", rc); + } + return; + } + + if (unlikely(idx == MAX_OPEN_STATEMENTS)) + return; +} + +static void release_statement(void *statement) +{ + int rc; + if (unlikely(rc = sqlite3_finalize((sqlite3_stmt *) statement) != SQLITE_OK)) + error_report("Failed to finalize statement, rc = %d", rc); +} + +static void initialize_thread_key_pool(void) +{ + for (int i = 0; i < MAX_PREPARED_STATEMENTS; i++) + (void)pthread_key_create(&key_pool[i], release_statement); +} + +int prepare_statement(sqlite3 *database, const char *query, sqlite3_stmt **statement) +{ + static __thread uint32_t keys_used = 0; + + pthread_key_t *key = NULL; + int ret = 1; + + if (likely(keys_used < MAX_PREPARED_STATEMENTS)) + key = &key_pool[keys_used++]; + + int rc = sqlite3_prepare_v2(database, query, -1, statement, 0); + if (rc == SQLITE_OK) { + if (key) + ret = pthread_setspecific(*key, *statement); + if (ret) + add_stmt_to_list(*statement); + } + return rc; +} + +char *get_database_extented_error(sqlite3 *database, int i, const char *description) +{ + const char *err = sqlite3_errstr(sqlite3_extended_errcode(database)); + + if (!err) + return NULL; + + size_t len = strlen(err)+ strlen(description) + 32; + char *full_err = mallocz(len); + + snprintfz(full_err, len - 1, "%s: %d: %s", description, i, err); + return full_err; +} + +int init_database_batch(sqlite3 *database, const char *batch[], const char *description) +{ + int rc; + char *err_msg = NULL; + for (int i = 0; batch[i]; i++) { + rc = sqlite3_exec_monitored(database, batch[i], 0, 0, &err_msg); + if (rc != SQLITE_OK) { + error_report("SQLite error during database initialization, rc = %d (%s)", rc, err_msg); + error_report("SQLite failed statement %s", batch[i]); + char *error_str = get_database_extented_error(database, i, description); + if (error_str) + analytics_set_data_str(&analytics_data.netdata_fail_reason, error_str); + sqlite3_free(err_msg); + freez(error_str); + if (SQLITE_CORRUPT == rc) { + if (mark_database_to_recover(NULL, database)) + error_report("Database is corrupted will attempt to fix"); + return SQLITE_CORRUPT; + } + return 1; + } + } + return 0; +} + +// Return 0 OK +// Return 1 Failed +int db_execute(sqlite3 *db, const char *cmd) +{ + int rc; + int cnt = 0; + + while (cnt < SQL_MAX_RETRY) { + char *err_msg; + rc = sqlite3_exec_monitored(db, cmd, 0, 0, &err_msg); + if (likely(rc == SQLITE_OK)) + break; + + ++cnt; + error_report("Failed to execute '%s', rc = %d (%s) -- attempt %d", cmd, rc, err_msg, cnt); + sqlite3_free(err_msg); + + if (likely(rc == SQLITE_BUSY || rc == SQLITE_LOCKED)) { + usleep(SQLITE_INSERT_DELAY * USEC_PER_MS); + continue; + } + + if (rc == SQLITE_CORRUPT) + mark_database_to_recover(NULL, db); + break; + } + return (rc != SQLITE_OK); +} + +// Utils +int bind_text_null(sqlite3_stmt *res, int position, const char *text, bool can_be_null) +{ + if (likely(text)) + return sqlite3_bind_text(res, position, text, -1, SQLITE_STATIC); + if (!can_be_null) + return 1; + return sqlite3_bind_null(res, position); +} + +#define SQL_DROP_TABLE "DROP table %s" + +void sql_drop_table(const char *table) +{ + if (!table) + return; + + char wstr[255]; + snprintfz(wstr, sizeof(wstr) - 1, SQL_DROP_TABLE, table); + + int rc = sqlite3_exec_monitored(db_meta, wstr, 0, 0, NULL); + if (rc != SQLITE_OK) { + error_report("DES SQLite error during drop table operation for %s, rc = %d", table, rc); + } +} + +static int get_pragma_value(sqlite3 *database, const char *sql) +{ + sqlite3_stmt *res = NULL; + int rc = sqlite3_prepare_v2(database, sql, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) + return -1; + + int result = -1; + rc = sqlite3_step_monitored(res); + if (likely(rc == SQLITE_ROW)) + result = sqlite3_column_int(res, 0); + + rc = sqlite3_finalize(res); + (void) rc; + + return result; +} + +int get_free_page_count(sqlite3 *database) +{ + return get_pragma_value(database, "PRAGMA freelist_count"); +} + +int get_database_page_count(sqlite3 *database) +{ + return get_pragma_value(database, "PRAGMA page_count"); +} + +uint64_t sqlite_get_db_space(sqlite3 *db) +{ + if (!db) + return 0; + + uint64_t page_size = (uint64_t) get_pragma_value(db, "PRAGMA page_size"); + uint64_t page_count = (uint64_t) get_pragma_value(db, "PRAGMA page_count"); + + return page_size * page_count; +} + +/* + * Close the sqlite database + */ + +void sql_close_database(sqlite3 *database, const char *database_name) +{ + int rc; + if (unlikely(!database)) + return; + + (void) db_execute(database, "PRAGMA analysis_limit=10000"); + (void) db_execute(database, "PRAGMA optimize"); + + netdata_log_info("%s: Closing sqlite database", database_name); + +#ifdef NETDATA_DEV_MODE + int t_count_used,t_count_hit,t_count_miss,t_count_full, dummy; + (void) sqlite3_db_status(database, SQLITE_DBSTATUS_LOOKASIDE_USED, &dummy, &t_count_used, 0); + (void) sqlite3_db_status(database, SQLITE_DBSTATUS_LOOKASIDE_HIT, &dummy,&t_count_hit, 0); + (void) sqlite3_db_status(database, SQLITE_DBSTATUS_LOOKASIDE_MISS_SIZE, &dummy,&t_count_miss, 0); + (void) sqlite3_db_status(database, SQLITE_DBSTATUS_LOOKASIDE_MISS_FULL, &dummy,&t_count_full, 0); + + netdata_log_info("%s: Database lookaside allocation statistics: Used slots %d, Hit %d, Misses due to small slot size %d, Misses due to slots full %d", database_name, + t_count_used,t_count_hit, t_count_miss, t_count_full); + + (void) sqlite3_db_release_memory(database); +#endif + + rc = sqlite3_close_v2(database); + if (unlikely(rc != SQLITE_OK)) + error_report("%s: Error while closing the sqlite database: rc %d, error \"%s\"", database_name, rc, sqlite3_errstr(rc)); +} + +extern sqlite3 *db_context_meta; + +void sqlite_close_databases(void) +{ + add_stmt_to_list(NULL); + + sql_close_database(db_context_meta, "CONTEXT"); + sql_close_database(db_meta, "METADATA"); +} + +int sqlite_library_init(void) +{ + initialize_thread_key_pool(); + + int rc = sqlite3_initialize(); + + return (SQLITE_OK != rc); +} + +void sqlite_library_shutdown(void) +{ + (void) sqlite3_shutdown(); +} diff --git a/src/database/sqlite/sqlite_functions.h b/src/database/sqlite/sqlite_functions.h new file mode 100644 index 000000000..2841b27f6 --- /dev/null +++ b/src/database/sqlite/sqlite_functions.h @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SQLITE_FUNCTIONS_H +#define NETDATA_SQLITE_FUNCTIONS_H + +#include "daemon/common.h" +#include "sqlite3.h" + +void analytics_set_data_str(char **name, const char *value); + +#define SQL_MAX_RETRY (100) +#define SQLITE_INSERT_DELAY (10) // Insert delay in case of lock + +SQLITE_API int sqlite3_step_monitored(sqlite3_stmt *stmt); +SQLITE_API int sqlite3_exec_monitored( + sqlite3 *db, /* An open database */ + const char *sql, /* SQL to be evaluated */ + int (*callback)(void*,int,char**,char**), /* Callback function */ + void *data, /* 1st argument to callback */ + char **errmsg /* Error msg written here */ + ); + +// Initialization and shutdown +int init_database_batch(sqlite3 *database, const char *batch[], const char *description); +int configure_sqlite_database(sqlite3 *database, int target_version, const char *description); + +// Helpers +int bind_text_null(sqlite3_stmt *res, int position, const char *text, bool can_be_null); +int prepare_statement(sqlite3 *database, const char *query, sqlite3_stmt **statement); +int execute_insert(sqlite3_stmt *res); +int db_execute(sqlite3 *database, const char *cmd); +char *get_database_extented_error(sqlite3 *database, int i, const char *description); + +void sql_drop_table(const char *table); +void sqlite_now_usec(sqlite3_context *context, int argc, sqlite3_value **argv); + +uint64_t sqlite_get_db_space(sqlite3 *db); + +int get_free_page_count(sqlite3 *database); +int get_database_page_count(sqlite3 *database); + +int sqlite_library_init(void); +void sqlite_library_shutdown(void); + +void sql_close_database(sqlite3 *database, const char *database_name); +void sqlite_close_databases(void); +#endif //NETDATA_SQLITE_FUNCTIONS_H diff --git a/database/sqlite/sqlite_health.c b/src/database/sqlite/sqlite_health.c index 7d79ff70b..ea883c51b 100644 --- a/database/sqlite/sqlite_health.c +++ b/src/database/sqlite/sqlite_health.c @@ -3,6 +3,7 @@ #include "sqlite_health.h" #include "sqlite_functions.h" #include "sqlite_db_migration.h" +#include "health/health_internals.h" #define MAX_HEALTH_SQL_SIZE 2048 #define SQLITE3_BIND_STRING_OR_NULL(res, key, param) \ @@ -101,7 +102,8 @@ failed: "config_hash_id, name, chart, exec, recipient, units, chart_context, last_transition_id, chart_name) " \ "VALUES (@host_id,@alarm_id, @config_hash_id,@name,@chart,@exec,@recipient,@units,@chart_context," \ "@last_transition_id,@chart_name) ON CONFLICT (host_id, alarm_id) DO UPDATE " \ - "SET last_transition_id = excluded.last_transition_id, chart_name = excluded.chart_name RETURNING health_log_id" + "SET last_transition_id = excluded.last_transition_id, chart_name = excluded.chart_name, " \ + "config_hash_id=excluded.config_hash_id RETURNING health_log_id" #define SQL_INSERT_HEALTH_LOG_DETAIL \ "INSERT INTO health_log_detail (health_log_id, unique_id, alarm_id, alarm_event_id, " \ @@ -893,18 +895,21 @@ void sql_health_alarm_log_load(RRDHOST *host) /* * Store an alert config hash in the database */ -#define SQL_STORE_ALERT_CONFIG_HASH \ - "insert or replace into alert_hash (hash_id, date_updated, alarm, template, " \ - "on_key, class, component, type, os, hosts, lookup, every, units, calc, plugin, module, " \ - "charts, green, red, warn, crit, exec, to_key, info, delay, options, repeat, host_labels, " \ - "p_db_lookup_dimensions, p_db_lookup_method, p_db_lookup_options, p_db_lookup_after, " \ - "p_db_lookup_before, p_update_every, source, chart_labels, summary) values (@hash_id,UNIXEPOCH(),@alarm,@template," \ - "@on_key,@class,@component,@type,@os,@hosts,@lookup,@every,@units,@calc,@plugin,@module," \ - "@charts,@green,@red,@warn,@crit,@exec,@to_key,@info,@delay,@options,@repeat,@host_labels," \ - "@p_db_lookup_dimensions,@p_db_lookup_method,@p_db_lookup_options,@p_db_lookup_after," \ - "@p_db_lookup_before,@p_update_every,@source,@chart_labels,@summary)" - -int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg) +#define SQL_STORE_ALERT_CONFIG_HASH \ + "insert or replace into alert_hash (hash_id, date_updated, alarm, template, " \ + "on_key, class, component, type, lookup, every, units, calc, " \ + "green, red, warn, crit, exec, to_key, info, delay, options, repeat, host_labels, " \ + "p_db_lookup_dimensions, p_db_lookup_method, p_db_lookup_options, p_db_lookup_after, " \ + "p_db_lookup_before, p_update_every, source, chart_labels, summary, time_group_condition, " \ + "time_group_value, dims_group, data_source) " \ + "values (@hash_id,UNIXEPOCH(),@alarm,@template," \ + "@on_key,@class,@component,@type,@lookup,@every,@units,@calc," \ + "@green,@red,@warn,@crit,@exec,@to_key,@info,@delay,@options,@repeat,@host_labels," \ + "@p_db_lookup_dimensions,@p_db_lookup_method,@p_db_lookup_options,@p_db_lookup_after," \ + "@p_db_lookup_before,@p_update_every,@source,@chart_labels,@summary, @time_group_condition, " \ + "@time_group_value, @dims_group, @data_source)" + +int sql_alert_store_config(RRD_ALERT_PROTOTYPE *ap __maybe_unused) { static __thread sqlite3_stmt *res = NULL; int rc, param = 0; @@ -923,133 +928,153 @@ int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg) return 1; } } + BUFFER *buf = buffer_create(128, NULL); - rc = sqlite3_bind_blob(res, ++param, hash_id, sizeof(*hash_id), SQLITE_STATIC); + rc = sqlite3_bind_blob(res, ++param, &ap->config.hash_id, sizeof(ap->config.hash_id), SQLITE_STATIC); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->alarm, ++param); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; + if (ap->match.is_template) + rc = SQLITE3_BIND_STRING_OR_NULL(res, NULL, ++param); + else + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.name, ++param); - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->template_key, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->on, ++param); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; + if (ap->match.is_template) + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.name, ++param); + else + rc = SQLITE3_BIND_STRING_OR_NULL(res, NULL, ++param); - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->classification, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->component, ++param); + if (ap->match.is_template) + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.on.context, ++param); + else + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.on.chart, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->type, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.classification, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->os, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.component, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->host, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.type, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->lookup, ++param); + // Rebuild lookup + rc = SQLITE3_BIND_STRING_OR_NULL(res, NULL, ++param); // lookup line if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->every, ++param); + rc = sqlite3_bind_int(res, ++param, ap->config.update_every); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->units, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.units, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->calc, ++param); + if (ap->config.calculation) + rc = sqlite3_bind_text(res, ++param, expression_source(ap->config.calculation), -1, SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->plugin, ++param); + NETDATA_DOUBLE green = NAN; + rc = sqlite3_bind_double(res, ++param, green); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->module, ++param); + NETDATA_DOUBLE red = NAN; + rc = sqlite3_bind_double(res, ++param, red); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->charts, ++param); + if (ap->config.warning) + rc = sqlite3_bind_text(res, ++param, expression_source(ap->config.warning), -1, SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->green, ++param); + if (ap->config.critical) + rc = sqlite3_bind_text(res, ++param, expression_source(ap->config.critical), -1, SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->red, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.exec, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->warn, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.recipient, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->crit, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.info, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->exec, ++param); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; + if (ap->config.delay_up_duration) + buffer_sprintf(buf, "up %ds ", ap->config.delay_up_duration); - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->to, ++param); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; + if (ap->config.delay_down_duration) + buffer_sprintf(buf, "down %ds ", ap->config.delay_down_duration); - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->info, ++param); - if (unlikely(rc != SQLITE_OK)) - goto bind_fail; + if (ap->config.delay_multiplier) + buffer_sprintf(buf, "multiplier %.1f ", ap->config.delay_multiplier); + + if (ap->config.delay_max_duration) + buffer_sprintf(buf, "max %ds", ap->config.delay_max_duration); - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->delay, ++param); + // delay + rc = sqlite3_bind_text(res, ++param, buffer_tostring(buf), -1, SQLITE_STATIC); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->options, ++param); + if (ap->config.alert_action_options & ALERT_ACTION_OPTION_NO_CLEAR_NOTIFICATION) + rc = sqlite3_bind_text(res, ++param, "no-clear-notification", -1, SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->repeat, ++param); + rc = sqlite3_bind_int(res, ++param, ap->config.update_every); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->host_labels, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.host_labels, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - if (cfg->p_db_lookup_after) { - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->p_db_lookup_dimensions, ++param); + if (ap->config.after) { + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.dimensions, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->p_db_lookup_method, ++param); + rc = sqlite3_bind_text(res, ++param, time_grouping_id2txt(ap->config.time_group), -1, SQLITE_STATIC); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = sqlite3_bind_int(res, ++param, (int) cfg->p_db_lookup_options); + rc = sqlite3_bind_int(res, ++param, (int) RRDR_OPTIONS_REMOVE_OVERLAPPING(ap->config.options)); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = sqlite3_bind_int(res, ++param, (int) cfg->p_db_lookup_after); + rc = sqlite3_bind_int64(res, ++param, (int) ap->config.after); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = sqlite3_bind_int(res, ++param, (int) cfg->p_db_lookup_before); + rc = sqlite3_bind_int64(res, ++param, (int) ap->config.before); if (unlikely(rc != SQLITE_OK)) goto bind_fail; } else { @@ -1074,19 +1099,35 @@ int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg) goto bind_fail; } - rc = sqlite3_bind_int(res, ++param, cfg->p_update_every); + rc = sqlite3_bind_int(res, ++param, ap->config.update_every); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->source, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.source, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->chart_labels, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->match.chart_labels, ++param); if (unlikely(rc != SQLITE_OK)) goto bind_fail; - rc = SQLITE3_BIND_STRING_OR_NULL(res, cfg->summary, ++param); + rc = SQLITE3_BIND_STRING_OR_NULL(res, ap->config.summary, ++param); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, ap->config.time_group_condition); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_double(res, ++param, ap->config.time_group_value); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, ap->config.dims_group); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, ap->config.data_source); if (unlikely(rc != SQLITE_OK)) goto bind_fail; @@ -1098,9 +1139,11 @@ int sql_store_alert_config_hash(uuid_t *hash_id, struct alert_config *cfg) if (unlikely(rc != SQLITE_OK)) error_report("Failed to reset statement in alert hash_id store function, rc = %d", rc); + buffer_free(buf); return 0; bind_fail: + buffer_free(buf); error_report("Failed to bind parameter %d to store alert hash_id, rc = %d", param, rc); rc = sqlite3_reset(res); if (unlikely(rc != SQLITE_OK)) @@ -1108,75 +1151,6 @@ bind_fail: return 1; } -/* - alert hashes are used for cloud communication. - if cloud is disabled or openssl is not available (which will prevent cloud connectivity) - skip hash calculations -*/ -#if defined ENABLE_HTTPS -#define DIGEST_ALERT_CONFIG_VAL(v) ((v) ? EVP_DigestUpdate(evpctx, (string2str(v)), string_strlen((v))) : EVP_DigestUpdate(evpctx, "", 1)) -#endif -int alert_hash_and_store_config( - uuid_t hash_id, - struct alert_config *cfg, - int store_hash) -{ -#if defined ENABLE_HTTPS - EVP_MD_CTX *evpctx; - unsigned char hash_value[EVP_MAX_MD_SIZE]; - unsigned int hash_len; - evpctx = EVP_MD_CTX_create(); - EVP_DigestInit_ex(evpctx, EVP_sha256(), NULL); - - DIGEST_ALERT_CONFIG_VAL(cfg->alarm); - DIGEST_ALERT_CONFIG_VAL(cfg->template_key); - DIGEST_ALERT_CONFIG_VAL(cfg->os); - DIGEST_ALERT_CONFIG_VAL(cfg->host); - DIGEST_ALERT_CONFIG_VAL(cfg->on); - DIGEST_ALERT_CONFIG_VAL(cfg->plugin); - DIGEST_ALERT_CONFIG_VAL(cfg->module); - DIGEST_ALERT_CONFIG_VAL(cfg->charts); - DIGEST_ALERT_CONFIG_VAL(cfg->lookup); - DIGEST_ALERT_CONFIG_VAL(cfg->calc); - DIGEST_ALERT_CONFIG_VAL(cfg->every); - DIGEST_ALERT_CONFIG_VAL(cfg->green); - DIGEST_ALERT_CONFIG_VAL(cfg->red); - DIGEST_ALERT_CONFIG_VAL(cfg->warn); - DIGEST_ALERT_CONFIG_VAL(cfg->crit); - DIGEST_ALERT_CONFIG_VAL(cfg->exec); - DIGEST_ALERT_CONFIG_VAL(cfg->to); - DIGEST_ALERT_CONFIG_VAL(cfg->units); - DIGEST_ALERT_CONFIG_VAL(cfg->info); - DIGEST_ALERT_CONFIG_VAL(cfg->classification); - DIGEST_ALERT_CONFIG_VAL(cfg->component); - DIGEST_ALERT_CONFIG_VAL(cfg->type); - DIGEST_ALERT_CONFIG_VAL(cfg->delay); - DIGEST_ALERT_CONFIG_VAL(cfg->options); - DIGEST_ALERT_CONFIG_VAL(cfg->repeat); - DIGEST_ALERT_CONFIG_VAL(cfg->host_labels); - DIGEST_ALERT_CONFIG_VAL(cfg->chart_labels); - DIGEST_ALERT_CONFIG_VAL(cfg->summary); - - EVP_DigestFinal_ex(evpctx, hash_value, &hash_len); - EVP_MD_CTX_destroy(evpctx); - fatal_assert(hash_len > sizeof(uuid_t)); - - char uuid_str[UUID_STR_LEN]; - uuid_unparse_lower(*((uuid_t *)&hash_value), uuid_str); - uuid_copy(hash_id, *((uuid_t *)&hash_value)); - - /* store everything, so it can be recreated when not in memory or just a subset ? */ - if (store_hash) - (void)sql_store_alert_config_hash( (uuid_t *)&hash_value, cfg); -#else - UNUSED(hash_id); - UNUSED(cfg); - UNUSED(store_hash); -#endif - - return 1; -} - #define SQL_SELECT_HEALTH_LAST_EXECUTED_EVENT \ "SELECT hld.new_status FROM health_log hl, health_log_detail hld " \ "WHERE hl.host_id = @host_id AND hl.alarm_id = @alarm_id AND hld.unique_id != @unique_id AND hld.flags & @flags " \ @@ -1580,10 +1554,9 @@ static uint32_t get_next_alarm_event_id(uint64_t health_log_id, uint32_t alarm_i } #define SQL_GET_ALARM_ID \ - "SELECT alarm_id, health_log_id FROM health_log WHERE host_id = @host_id AND chart = @chart " \ - "AND name = @name AND config_hash_id = @config_hash_id" + "SELECT alarm_id, health_log_id FROM health_log WHERE host_id = @host_id AND chart = @chart AND name = @name" -uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id) +uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id) { int rc = 0; sqlite3_stmt *res = NULL; @@ -1617,13 +1590,6 @@ uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t * return alarm_id; } - rc = sqlite3_bind_blob(res, 4, config_hash_id, sizeof(*config_hash_id), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind config_hash_id parameter for SQL_GET_ALARM_ID."); - sqlite3_finalize(res); - return alarm_id; - } - while (sqlite3_step_monitored(res) == SQLITE_ROW) { alarm_id = (uint32_t) sqlite3_column_int64(res, 0); health_log_id = (uint64_t) sqlite3_column_int64(res, 1); @@ -1639,111 +1605,6 @@ uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t * return alarm_id; } -#define SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH \ - "UPDATE health_log SET config_hash_id = @config_hash_id WHERE host_id = @host_id AND alarm_id = @alarm_id " \ - "AND health_log_id = @health_log_id" - -void sql_update_alarm_with_config_hash(RRDHOST *host, uint32_t alarm_id, uint64_t health_log_id, uuid_t *config_hash_id) -{ - int rc = 0; - sqlite3_stmt *res = NULL; - - rc = sqlite3_prepare_v2(db_meta, SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH, -1, &res, 0); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement when trying to update an alarm id with a config hash."); - return; - } - - rc = sqlite3_bind_blob(res, 1, config_hash_id, sizeof(*config_hash_id), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind config_hash_id parameter for SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH."); - goto done; - } - - rc = sqlite3_bind_blob(res, 2, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind host_id parameter for SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH."); - goto done; - } - - rc = sqlite3_bind_int64(res, 3, (sqlite3_int64) alarm_id); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind alarm_id parameter for SQL_GET_ALARM_ID."); - goto done; - } - - rc = sqlite3_bind_int64(res, 4, (sqlite3_int64) health_log_id); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind alarm_id parameter for SQL_GET_ALARM_ID."); - goto done; - } - - rc = execute_insert(res); - if (unlikely(rc != SQLITE_DONE)) - error_report("Failed to execute SQL_UPDATE_ALARM_ID_WITH_CONFIG_HASH, rc = %d", rc); - -done: - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to reset statement to update health log detail table with config hash ids, rc = %d", rc); - -} - -#define SQL_GET_ALARM_ID_CHECK_ZERO_HASH \ - "SELECT alarm_id, health_log_id FROM health_log WHERE host_id = @host_id AND chart = @chart " \ - "AND name = @name AND (config_hash_id IS NULL OR config_hash_id = ZEROBLOB(16))" - -uint32_t sql_get_alarm_id_check_zero_hash(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id) -{ - int rc = 0; - sqlite3_stmt *res = NULL; - uint32_t alarm_id = 0; - uint64_t health_log_id = 0; - - rc = sqlite3_prepare_v2(db_meta, SQL_GET_ALARM_ID_CHECK_ZERO_HASH, -1, &res, 0); - if (rc != SQLITE_OK) { - error_report("Failed to prepare statement when trying to get an alarm id with zero hash"); - return alarm_id; - } - - rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind host_id parameter for SQL_GET_ALARM_ID_CHECK_ZERO_HASH."); - sqlite3_finalize(res); - return alarm_id; - } - - rc = SQLITE3_BIND_STRING_OR_NULL(res, chart, 2); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind char parameter for SQL_GET_ALARM_ID_CHECK_ZERO_HASH."); - sqlite3_finalize(res); - return alarm_id; - } - - rc = SQLITE3_BIND_STRING_OR_NULL(res, name, 3); - if (unlikely(rc != SQLITE_OK)) { - error_report("Failed to bind name parameter for SQL_GET_ALARM_ID_CHECK_ZERO_HASH."); - sqlite3_finalize(res); - return alarm_id; - } - - while (sqlite3_step_monitored(res) == SQLITE_ROW) { - alarm_id = (uint32_t) sqlite3_column_int64(res, 0); - health_log_id = (uint64_t) sqlite3_column_int64(res, 1); - } - - rc = sqlite3_finalize(res); - if (unlikely(rc != SQLITE_OK)) - error_report("Failed to finalize the statement while getting an alarm id."); - - if (alarm_id) { - sql_update_alarm_with_config_hash(host, alarm_id, health_log_id, config_hash_id); - *next_event_id = get_next_alarm_event_id(health_log_id, alarm_id); - } - - return alarm_id; -} - #define SQL_GET_ALARM_ID_FROM_TRANSITION_ID \ "SELECT hld.alarm_id, hl.host_id, hl.chart_context FROM health_log_detail hld, health_log hl " \ "WHERE hld.transition_id = @transition_id " \ @@ -1996,10 +1857,11 @@ done_only_drop: #define SQL_POPULATE_TEMP_CONFIG_TARGET_TABLE "INSERT INTO c_%p (hash_id) VALUES (@hash_id)" #define SQL_SEARCH_CONFIG_LIST \ - "SELECT ah.hash_id, alarm, template, on_key, class, component, type, os, hosts, lookup, every, " \ - " units, calc, families, plugin, module, charts, green, red, warn, crit, " \ + "SELECT ah.hash_id, alarm, template, on_key, class, component, type, lookup, every, " \ + " units, calc, families, green, red, warn, crit, " \ " exec, to_key, info, delay, options, repeat, host_labels, p_db_lookup_dimensions, p_db_lookup_method, " \ - " p_db_lookup_options, p_db_lookup_after, p_db_lookup_before, p_update_every, source, chart_labels, summary " \ + " p_db_lookup_options, p_db_lookup_after, p_db_lookup_before, p_update_every, source, chart_labels, summary, " \ + " time_group_condition, time_group_value, dims_group, data_source " \ " FROM alert_hash ah, c_%p t where ah.hash_id = t.hash_id" int sql_get_alert_configuration( @@ -2079,16 +1941,11 @@ int sql_get_alert_configuration( acd.classification = (const char *) sqlite3_column_text(res, param++); acd.component = (const char *) sqlite3_column_text(res, param++); acd.type = (const char *) sqlite3_column_text(res, param++); - acd.selectors.os = (const char *) sqlite3_column_text(res, param++); - acd.selectors.hosts = (const char *) sqlite3_column_text(res, param++); acd.value.db.lookup = (const char *) sqlite3_column_text(res, param++); acd.value.every = (const char *) sqlite3_column_text(res, param++); acd.value.units = (const char *) sqlite3_column_text(res, param++); acd.value.calc = (const char *) sqlite3_column_text(res, param++); acd.selectors.families = (const char *) sqlite3_column_text(res, param++); - acd.selectors.plugin = (const char *) sqlite3_column_text(res, param++); - acd.selectors.module = (const char *) sqlite3_column_text(res, param++); - acd.selectors.charts = (const char *) sqlite3_column_text(res, param++); acd.status.green = (const char *) sqlite3_column_text(res, param++); acd.status.red = (const char *) sqlite3_column_text(res, param++); acd.status.warn = (const char *) sqlite3_column_text(res, param++); @@ -2109,6 +1966,10 @@ int sql_get_alert_configuration( acd.source = (const char *) sqlite3_column_text(res, param++); acd.selectors.chart_labels = (const char *) sqlite3_column_text(res, param++); acd.summary = (const char *) sqlite3_column_text(res, param++); + acd.value.db.time_group_condition =(int32_t) sqlite3_column_int(res, param++); + acd.value.db.time_group_value = sqlite3_column_double(res, param++); + acd.value.db.dims_group = (int32_t) sqlite3_column_int(res, param++); + acd.value.db.data_source = (int32_t) sqlite3_column_int(res, param++); cb(&acd, data); added++; diff --git a/database/sqlite/sqlite_health.h b/src/database/sqlite/sqlite_health.h index 5549b7525..1b889436e 100644 --- a/database/sqlite/sqlite_health.h +++ b/src/database/sqlite/sqlite_health.h @@ -2,21 +2,22 @@ #ifndef NETDATA_SQLITE_HEALTH_H #define NETDATA_SQLITE_HEALTH_H -#include "../../daemon/common.h" + +#include "daemon/common.h" #include "sqlite3.h" struct sql_alert_transition_data; struct sql_alert_config_data; +struct rrd_alert_prototype; void sql_health_alarm_log_load(RRDHOST *host); void sql_health_alarm_log_save(RRDHOST *host, ALARM_ENTRY *ae); void sql_health_alarm_log_cleanup(RRDHOST *host, bool claimed); -int alert_hash_and_store_config(uuid_t hash_id, struct alert_config *cfg, int store_hash); +int sql_alert_store_config(struct rrd_alert_prototype *ap); void sql_aclk_alert_clean_dead_entries(RRDHOST *host); int sql_health_get_last_executed_event(RRDHOST *host, ALARM_ENTRY *ae, RRDCALC_STATUS *last_executed_status); void sql_health_alarm_log2json(RRDHOST *host, BUFFER *wb, time_t after, const char *chart); int health_migrate_old_health_log_table(char *table); -uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id); -uint32_t sql_get_alarm_id_check_zero_hash(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id, uuid_t *config_hash_id); +uint32_t sql_get_alarm_id(RRDHOST *host, STRING *chart, STRING *name, uint32_t *next_event_id); void sql_alert_transitions( DICTIONARY *nodes, time_t after, diff --git a/src/database/sqlite/sqlite_metadata.c b/src/database/sqlite/sqlite_metadata.c new file mode 100644 index 000000000..86ecee1f6 --- /dev/null +++ b/src/database/sqlite/sqlite_metadata.c @@ -0,0 +1,2645 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "sqlite_metadata.h" +#include "sqlite3recover.h" +//#include "sqlite_db_migration.h" + +#define DB_METADATA_VERSION 18 + +const char *database_config[] = { + "CREATE TABLE IF NOT EXISTS host(host_id BLOB PRIMARY KEY, hostname TEXT NOT NULL, " + "registry_hostname TEXT NOT NULL default 'unknown', update_every INT NOT NULL default 1, " + "os TEXT NOT NULL default 'unknown', timezone TEXT NOT NULL default 'unknown', tags TEXT NOT NULL default ''," + "hops INT NOT NULL DEFAULT 0," + "memory_mode INT DEFAULT 0, abbrev_timezone TEXT DEFAULT '', utc_offset INT NOT NULL DEFAULT 0," + "program_name TEXT NOT NULL DEFAULT 'unknown', program_version TEXT NOT NULL DEFAULT 'unknown', " + "entries INT NOT NULL DEFAULT 0," + "health_enabled INT NOT NULL DEFAULT 0, last_connected INT NOT NULL DEFAULT 0)", + + "CREATE TABLE IF NOT EXISTS chart(chart_id blob PRIMARY KEY, host_id blob, type text, id text, name text, " + "family text, context text, title text, unit text, plugin text, module text, priority int, update_every int, " + "chart_type int, memory_mode int, history_entries)", + + "CREATE TABLE IF NOT EXISTS dimension(dim_id blob PRIMARY KEY, chart_id blob, id text, name text, " + "multiplier int, divisor int , algorithm int, options text)", + + "CREATE TABLE IF NOT EXISTS metadata_migration(filename text, file_size, date_created int)", + + "CREATE TABLE IF NOT EXISTS chart_label(chart_id blob, source_type int, label_key text, " + "label_value text, date_created int, PRIMARY KEY (chart_id, label_key))", + + "CREATE TABLE IF NOT EXISTS node_instance (host_id blob PRIMARY KEY, claim_id, node_id, date_created)", + + "CREATE TABLE IF NOT EXISTS alert_hash(hash_id blob PRIMARY KEY, date_updated int, alarm text, template text, " + "on_key text, class text, component text, type text, os text, hosts text, lookup text, " + "every text, units text, calc text, families text, plugin text, module text, charts text, green text, " + "red text, warn text, crit text, exec text, to_key text, info text, delay text, options text, " + "repeat text, host_labels text, p_db_lookup_dimensions text, p_db_lookup_method text, p_db_lookup_options int, " + "p_db_lookup_after int, p_db_lookup_before int, p_update_every int, source text, chart_labels text, " + "summary text, time_group_condition INT, time_group_value DOUBLE, dims_group INT, data_source INT)", + + "CREATE TABLE IF NOT EXISTS host_info(host_id blob, system_key text NOT NULL, system_value text NOT NULL, " + "date_created INT, PRIMARY KEY(host_id, system_key))", + + "CREATE TABLE IF NOT EXISTS host_label(host_id blob, source_type int, label_key text NOT NULL, " + "label_value text NOT NULL, date_created INT, PRIMARY KEY (host_id, label_key))", + + "CREATE TRIGGER IF NOT EXISTS ins_host AFTER INSERT ON host BEGIN INSERT INTO node_instance (host_id, date_created)" + " SELECT new.host_id, unixepoch() WHERE new.host_id NOT IN (SELECT host_id FROM node_instance); END", + + "CREATE TABLE IF NOT EXISTS health_log (health_log_id INTEGER PRIMARY KEY, host_id blob, alarm_id int, " + "config_hash_id blob, name text, chart text, family text, recipient text, units text, exec text, " + "chart_context text, last_transition_id blob, chart_name text, UNIQUE (host_id, alarm_id))", + + "CREATE TABLE IF NOT EXISTS health_log_detail (health_log_id int, unique_id int, alarm_id int, alarm_event_id int, " + "updated_by_id int, updates_id int, when_key int, duration int, non_clear_duration int, " + "flags int, exec_run_timestamp int, delay_up_to_timestamp int, " + "info text, exec_code int, new_status real, old_status real, delay int, " + "new_value double, old_value double, last_repeat int, transition_id blob, global_id int, summary text)", + + "CREATE INDEX IF NOT EXISTS ind_d2 on dimension (chart_id)", + "CREATE INDEX IF NOT EXISTS ind_c3 on chart (host_id)", + "CREATE INDEX IF NOT EXISTS health_log_ind_1 ON health_log (host_id)", + "CREATE INDEX IF NOT EXISTS health_log_d_ind_2 ON health_log_detail (global_id)", + "CREATE INDEX IF NOT EXISTS health_log_d_ind_3 ON health_log_detail (transition_id)", + "CREATE INDEX IF NOT EXISTS health_log_d_ind_9 ON health_log_detail (unique_id DESC, health_log_id)", + "CREATE INDEX IF NOT EXISTS health_log_d_ind_6 on health_log_detail (health_log_id, when_key)", + "CREATE INDEX IF NOT EXISTS health_log_d_ind_7 on health_log_detail (alarm_id)", + "CREATE INDEX IF NOT EXISTS health_log_d_ind_8 on health_log_detail (new_status, updated_by_id)", + + NULL +}; + +const char *database_cleanup[] = { + "DELETE FROM host WHERE host_id NOT IN (SELECT host_id FROM chart)", + "DELETE FROM node_instance WHERE host_id NOT IN (SELECT host_id FROM host)", + "DELETE FROM host_info WHERE host_id NOT IN (SELECT host_id FROM host)", + "DELETE FROM host_label WHERE host_id NOT IN (SELECT host_id FROM host)", + "DROP TRIGGER IF EXISTS tr_dim_del", + "DROP INDEX IF EXISTS ind_d1", + "DROP INDEX IF EXISTS ind_c1", + "DROP INDEX IF EXISTS ind_c2", + "DROP INDEX IF EXISTS alert_hash_index", + "DROP INDEX IF EXISTS health_log_d_ind_4", + "DROP INDEX IF EXISTS health_log_d_ind_1", + "DROP INDEX IF EXISTS health_log_d_ind_5", + NULL +}; + +sqlite3 *db_meta = NULL; + +// SQL statements + +#define SQL_STORE_CLAIM_ID \ + "INSERT INTO node_instance " \ + "(host_id, claim_id, date_created) VALUES (@host_id, @claim_id, UNIXEPOCH()) " \ + "ON CONFLICT(host_id) DO UPDATE SET claim_id = excluded.claim_id" + +#define SQL_DELETE_HOST_LABELS "DELETE FROM host_label WHERE host_id = @uuid" + +#define STORE_HOST_LABEL \ + "INSERT INTO host_label (host_id, source_type, label_key, label_value, date_created) VALUES " + +#define STORE_CHART_LABEL \ + "INSERT INTO chart_label (chart_id, source_type, label_key, label_value, date_created) VALUES " + +#define STORE_HOST_OR_CHART_LABEL_VALUE "(u2h('%s'), %d,'%s','%s', unixepoch())" + +#define DELETE_DIMENSION_UUID "DELETE FROM dimension WHERE dim_id = @uuid" + +#define SQL_STORE_HOST_INFO \ + "INSERT OR REPLACE INTO host (host_id, hostname, registry_hostname, update_every, os, timezone, tags, hops, " \ + "memory_mode, abbrev_timezone, utc_offset, program_name, program_version, entries, health_enabled, last_connected) " \ + "VALUES (@host_id, @hostname, @registry_hostname, @update_every, @os, @timezone, @tags, @hops, " \ + "@memory_mode, @abbrev_tz, @utc_offset, @prog_name, @prog_version, @entries, @health_enabled, @last_connected)" + +#define SQL_STORE_CHART \ + "INSERT INTO chart (chart_id, host_id, type, id, name, family, context, title, unit, plugin, module, priority, " \ + "update_every, chart_type, memory_mode, history_entries) " \ + "values (@chart_id, @host_id, @type, @id, @name, @family, @context, @title, @unit, @plugin, @module, @priority, " \ + "@update_every, @chart_type, @memory_mode, @history_entries) " \ + "ON CONFLICT(chart_id) DO UPDATE SET type=excluded.type, id=excluded.id, name=excluded.name, " \ + "family=excluded.family, context=excluded.context, title=excluded.title, unit=excluded.unit, " \ + "plugin=excluded.plugin, module=excluded.module, priority=excluded.priority, update_every=excluded.update_every, " \ + "chart_type=excluded.chart_type, memory_mode = excluded.memory_mode, history_entries = excluded.history_entries" + +#define SQL_STORE_DIMENSION \ + "INSERT INTO dimension (dim_id, chart_id, id, name, multiplier, divisor , algorithm, options) " \ + "VALUES (@dim_id, @chart_id, @id, @name, @multiplier, @divisor, @algorithm, @options) " \ + "ON CONFLICT(dim_id) DO UPDATE SET id=excluded.id, name=excluded.name, multiplier=excluded.multiplier, " \ + "divisor=excluded.divisor, algorithm=excluded.algorithm, options=excluded.options" + +#define SELECT_DIMENSION_LIST "SELECT dim_id, rowid FROM dimension WHERE rowid > @row_id" +#define SELECT_CHART_LIST "SELECT chart_id, rowid FROM chart WHERE rowid > @row_id" +#define SELECT_CHART_LABEL_LIST "SELECT chart_id, rowid FROM chart_label WHERE rowid > @row_id" + +#define SQL_STORE_HOST_SYSTEM_INFO_VALUES \ + "INSERT OR REPLACE INTO host_info (host_id, system_key, system_value, date_created) VALUES " \ + "(@uuid, @name, @value, UNIXEPOCH())" + +#define CONVERT_EXISTING_LOCALHOST "UPDATE host SET hops = 1 WHERE hops = 0 AND host_id <> @host_id" +#define DELETE_MISSING_NODE_INSTANCES "DELETE FROM node_instance WHERE host_id NOT IN (SELECT host_id FROM host)" + +#define METADATA_MAINTENANCE_FIRST_CHECK (1800) // Maintenance first run after agent startup in seconds +#define METADATA_MAINTENANCE_REPEAT (60) // Repeat if last run for dimensions, charts, labels needs more work +#define METADATA_HEALTH_LOG_INTERVAL (3600) // Repeat maintenance for health +#define METADATA_DIM_CHECK_INTERVAL (3600) // Repeat maintenance for dimensions +#define METADATA_CHART_CHECK_INTERVAL (3600) // Repeat maintenance for charts +#define METADATA_LABEL_CHECK_INTERVAL (3600) // Repeat maintenance for labels +#define METADATA_RUNTIME_THRESHOLD (5) // Run time threshold for cleanup task + +#define METADATA_HOST_CHECK_FIRST_CHECK (5) // First check for pending metadata +#define METADATA_HOST_CHECK_INTERVAL (30) // Repeat check for pending metadata +#define METADATA_HOST_CHECK_IMMEDIATE (5) // Repeat immediate run because we have more metadata to write +#define MAX_METADATA_CLEANUP (500) // Maximum metadata write operations (e.g deletes before retrying) +#define METADATA_MAX_BATCH_SIZE (512) // Maximum commands to execute before running the event loop + +#define DATABASE_FREE_PAGES_THRESHOLD_PC (5) // Percentage of free pages to trigger vacuum +#define DATABASE_FREE_PAGES_VACUUM_PC (10) // Percentage of free pages to vacuum + +enum metadata_opcode { + METADATA_DATABASE_NOOP = 0, + METADATA_DATABASE_TIMER, + METADATA_DEL_DIMENSION, + METADATA_STORE_CLAIM_ID, + METADATA_ADD_HOST_INFO, + METADATA_SCAN_HOSTS, + METADATA_LOAD_HOST_CONTEXT, + METADATA_DELETE_HOST_CHART_LABELS, + METADATA_MAINTENANCE, + METADATA_SYNC_SHUTDOWN, + METADATA_UNITTEST, + // leave this last + // we need it to check for worker utilization + METADATA_MAX_ENUMERATIONS_DEFINED +}; + +#define MAX_PARAM_LIST (2) +struct metadata_cmd { + enum metadata_opcode opcode; + struct completion *completion; + const void *param[MAX_PARAM_LIST]; + struct metadata_cmd *prev, *next; +}; + +typedef enum { + METADATA_FLAG_PROCESSING = (1 << 0), // store or cleanup + METADATA_FLAG_SHUTDOWN = (1 << 1), // Shutting down +} METADATA_FLAG; + +struct metadata_wc { + uv_thread_t thread; + uv_loop_t *loop; + uv_async_t async; + uv_timer_t timer_req; + time_t metadata_check_after; + METADATA_FLAG flags; + struct completion start_stop_complete; + struct completion *scan_complete; + /* FIFO command queue */ + SPINLOCK cmd_queue_lock; + struct metadata_cmd *cmd_base; +}; + +#define metadata_flag_check(target_flags, flag) (__atomic_load_n(&((target_flags)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define metadata_flag_set(target_flags, flag) __atomic_or_fetch(&((target_flags)->flags), (flag), __ATOMIC_SEQ_CST) +#define metadata_flag_clear(target_flags, flag) __atomic_and_fetch(&((target_flags)->flags), ~(flag), __ATOMIC_SEQ_CST) + +struct metadata_wc metasync_worker = {.loop = NULL}; + +// +// For unittest +// +struct thread_unittest { + int join; + unsigned added; + unsigned processed; + unsigned *done; +}; + +int sql_metadata_cache_stats(int op) +{ + int count, dummy; + + if (unlikely(!db_meta)) + return 0; + + netdata_thread_disable_cancelability(); + sqlite3_db_status(db_meta, op, &count, &dummy, 0); + netdata_thread_enable_cancelability(); + return count; +} + +static inline void set_host_node_id(RRDHOST *host, uuid_t *node_id) +{ + if (unlikely(!host)) + return; + + if (unlikely(!node_id)) { + freez(host->node_id); + __atomic_store_n(&host->node_id, NULL, __ATOMIC_RELAXED); + return; + } + + struct aclk_sync_cfg_t *wc = host->aclk_config; + + if (unlikely(!host->node_id)) { + uuid_t *t = mallocz(sizeof(*host->node_id)); + uuid_copy(*t, *node_id); + __atomic_store_n(&host->node_id, t, __ATOMIC_RELAXED); + } + else { + uuid_copy(*(host->node_id), *node_id); + } + + if (unlikely(!wc)) + sql_create_aclk_table(host, &host->host_uuid, node_id); + else + uuid_unparse_lower(*node_id, wc->node_id); +} + +#define SQL_UPDATE_NODE_ID "UPDATE node_instance SET node_id = @node_id WHERE host_id = @host_id" + +int update_node_id(uuid_t *host_id, uuid_t *node_id) +{ + sqlite3_stmt *res = NULL; + RRDHOST *host = NULL; + int rc = 2; + + char host_guid[GUID_LEN + 1]; + uuid_unparse_lower(*host_id, host_guid); + rrd_wrlock(); + host = rrdhost_find_by_guid(host_guid); + if (likely(host)) + set_host_node_id(host, node_id); + rrd_unlock(); + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) + error_report("Database has not been initialized"); + return 1; + } + + rc = sqlite3_prepare_v2(db_meta, SQL_UPDATE_NODE_ID, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to store node instance information"); + return 1; + } + + rc = sqlite3_bind_blob(res, 1, node_id, sizeof(*node_id), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host_id parameter to store node instance information"); + goto failed; + } + + rc = sqlite3_bind_blob(res, 2, host_id, sizeof(*host_id), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host_id parameter to store node instance information"); + goto failed; + } + + rc = execute_insert(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to store node instance information, rc = %d", rc); + rc = sqlite3_changes(db_meta); + +failed: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when storing node instance information"); + + return rc - 1; +} + +#define SQL_SELECT_NODE_ID "SELECT node_id FROM node_instance WHERE host_id = @host_id AND node_id IS NOT NULL" + +int get_node_id(uuid_t *host_id, uuid_t *node_id) +{ + sqlite3_stmt *res = NULL; + int rc; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) + error_report("Database has not been initialized"); + return 1; + } + + rc = sqlite3_prepare_v2(db_meta, SQL_SELECT_NODE_ID, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to select node instance information for a host"); + return 1; + } + + rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host_id parameter to select node instance information"); + goto failed; + } + + rc = sqlite3_step_monitored(res); + if (likely(rc == SQLITE_ROW && node_id)) + uuid_copy(*node_id, *((uuid_t *) sqlite3_column_blob(res, 0))); + +failed: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when selecting node instance information"); + + return (rc == SQLITE_ROW) ? 0 : -1; +} + +#define SQL_INVALIDATE_NODE_INSTANCES \ + "UPDATE node_instance SET node_id = NULL WHERE EXISTS " \ + "(SELECT host_id FROM node_instance WHERE host_id = @host_id AND (@claim_id IS NULL OR claim_id <> @claim_id))" + +void invalidate_node_instances(uuid_t *host_id, uuid_t *claim_id) +{ + sqlite3_stmt *res = NULL; + int rc; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) + error_report("Database has not been initialized"); + return; + } + + rc = sqlite3_prepare_v2(db_meta, SQL_INVALIDATE_NODE_INSTANCES, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to invalidate node instance ids"); + return; + } + + rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host_id parameter to invalidate node instance information"); + goto failed; + } + + if (claim_id) + rc = sqlite3_bind_blob(res, 2, claim_id, sizeof(*claim_id), SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, 2); + + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind claim_id parameter to invalidate node instance information"); + goto failed; + } + + rc = execute_insert(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to invalidate node instance information, rc = %d", rc); + +failed: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when invalidating node instance information"); +} + +#define SQL_GET_NODE_INSTANCE_LIST \ + "SELECT ni.node_id, ni.host_id, h.hostname " \ + "FROM node_instance ni, host h WHERE ni.host_id = h.host_id AND h.hops >=0" + +struct node_instance_list *get_node_list(void) +{ + struct node_instance_list *node_list = NULL; + sqlite3_stmt *res = NULL; + int rc; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) + error_report("Database has not been initialized"); + return NULL; + } + + rc = sqlite3_prepare_v2(db_meta, SQL_GET_NODE_INSTANCE_LIST, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to get node instance information"); + return NULL; + } + + int row = 0; + char host_guid[UUID_STR_LEN]; + while (sqlite3_step_monitored(res) == SQLITE_ROW) + row++; + + if (sqlite3_reset(res) != SQLITE_OK) { + error_report("Failed to reset the prepared statement while fetching node instance information"); + goto failed; + } + node_list = callocz(row + 1, sizeof(*node_list)); + int max_rows = row; + row = 0; + // TODO: Check to remove lock + rrd_rdlock(); + while (sqlite3_step_monitored(res) == SQLITE_ROW) { + if (sqlite3_column_bytes(res, 0) == sizeof(uuid_t)) + uuid_copy(node_list[row].node_id, *((uuid_t *)sqlite3_column_blob(res, 0))); + if (sqlite3_column_bytes(res, 1) == sizeof(uuid_t)) { + uuid_t *host_id = (uuid_t *)sqlite3_column_blob(res, 1); + uuid_unparse_lower(*host_id, host_guid); + RRDHOST *host = rrdhost_find_by_guid(host_guid); + if (!host) + continue; + if (rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD)) { + netdata_log_info("ACLK: 'host:%s' skipping get node list because context is initializing", rrdhost_hostname(host)); + continue; + } + uuid_copy(node_list[row].host_id, *host_id); + node_list[row].queryable = 1; + node_list[row].live = (host && (host == localhost || host->receiver + || !(rrdhost_flag_check(host, RRDHOST_FLAG_ORPHAN)))) ? 1 : 0; + node_list[row].hops = (host && host->system_info) ? host->system_info->hops : + uuid_memcmp(host_id, &localhost->host_uuid) ? 1 : 0; + node_list[row].hostname = + sqlite3_column_bytes(res, 2) ? strdupz((char *)sqlite3_column_text(res, 2)) : NULL; + } + row++; + if (row == max_rows) + break; + } + rrd_unlock(); + +failed: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when fetching node instance information"); + + return node_list; +} + +#define SQL_GET_HOST_NODE_ID "SELECT node_id FROM node_instance WHERE host_id = @host_id" + +void sql_load_node_id(RRDHOST *host) +{ + sqlite3_stmt *res = NULL; + int rc; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) + error_report("Database has not been initialized"); + return; + } + + rc = sqlite3_prepare_v2(db_meta, SQL_GET_HOST_NODE_ID, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to fetch node id"); + return; + } + + rc = sqlite3_bind_blob(res, 1, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host_id parameter to load node instance information"); + goto failed; + } + + rc = sqlite3_step_monitored(res); + if (likely(rc == SQLITE_ROW)) { + if (likely(sqlite3_column_bytes(res, 0) == sizeof(uuid_t))) + set_host_node_id(host, (uuid_t *)sqlite3_column_blob(res, 0)); + else + set_host_node_id(host, NULL); + } + +failed: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when loading node instance information"); +} + +#define SELECT_HOST_INFO "SELECT system_key, system_value FROM host_info WHERE host_id = @host_id" + +void sql_build_host_system_info(uuid_t *host_id, struct rrdhost_system_info *system_info) +{ + int rc; + + sqlite3_stmt *res = NULL; + + rc = sqlite3_prepare_v2(db_meta, SELECT_HOST_INFO, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to read host information"); + return; + } + + rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host parameter host information"); + goto skip; + } + + while (sqlite3_step_monitored(res) == SQLITE_ROW) { + rrdhost_set_system_info_variable(system_info, (char *) sqlite3_column_text(res, 0), + (char *) sqlite3_column_text(res, 1)); + } + +skip: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when reading host information"); +} + +#define SELECT_HOST_LABELS "SELECT label_key, label_value, source_type FROM host_label WHERE host_id = @host_id " \ + "AND label_key IS NOT NULL AND label_value IS NOT NULL" + +RRDLABELS *sql_load_host_labels(uuid_t *host_id) +{ + int rc; + + RRDLABELS *labels = NULL; + sqlite3_stmt *res = NULL; + + rc = sqlite3_prepare_v2(db_meta, SELECT_HOST_LABELS, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to read host information"); + return NULL; + } + + rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host parameter host information"); + goto skip; + } + + labels = rrdlabels_create(); + + while (sqlite3_step_monitored(res) == SQLITE_ROW) { + rrdlabels_add(labels, (const char *)sqlite3_column_text(res, 0), (const char *)sqlite3_column_text(res, 1), sqlite3_column_int(res, 2)); + } + +skip: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when reading host information"); + return labels; +} + +static int exec_statement_with_uuid(const char *sql, uuid_t *uuid) +{ + int rc, result = 1; + sqlite3_stmt *res = NULL; + + rc = sqlite3_prepare_v2(db_meta, sql, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement %s, rc = %d", sql, rc); + return 1; + } + + rc = sqlite3_bind_blob(res, 1, uuid, sizeof(*uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind UUID parameter to %s, rc = %d", sql, rc); + goto skip; + } + + rc = execute_insert(res); + if (likely(rc == SQLITE_DONE)) + result = SQLITE_OK; + else + error_report("Failed to execute %s, rc = %d", sql, rc); + +skip: + rc = sqlite3_finalize(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to finalize statement %s, rc = %d", sql, rc); + return result; +} + +static void recover_database(const char *sqlite_database, const char *new_sqlite_database) +{ + sqlite3 *database; + int rc = sqlite3_open(sqlite_database, &database); + if (rc != SQLITE_OK) + return; + + netdata_log_info("Recover %s", sqlite_database); + netdata_log_info(" to %s", new_sqlite_database); + + // This will remove the -shm and -wal files when we close the database + (void) db_execute(database, "select count(*) from sqlite_master limit 0"); + + sqlite3_recover *recover = sqlite3_recover_init(database, "main", new_sqlite_database); + if (recover) { + + rc = sqlite3_recover_run(recover); + + if (rc == SQLITE_OK) + netdata_log_info("Recover complete"); + else + netdata_log_error("Recover encountered an error but the database may be usable"); + + rc = sqlite3_recover_finish(recover); + + (void) sqlite3_close(database); + + if (rc == SQLITE_OK) { + rc = rename(new_sqlite_database, sqlite_database); + if (rc == 0) { + netdata_log_info("Renamed %s", new_sqlite_database); + netdata_log_info(" to %s", sqlite_database); + } + } + else + netdata_log_error("Recover failed to free resources"); + } + else + (void) sqlite3_close(database); +} + + +static void sqlite_uuid_parse(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + uuid_t uuid; + + if ( argc != 1 ){ + sqlite3_result_null(context); + return ; + } + int rc = uuid_parse((const char *) sqlite3_value_text(argv[0]), uuid); + if (rc == -1) { + sqlite3_result_null(context); + return ; + } + + sqlite3_result_blob(context, &uuid, sizeof(uuid_t), SQLITE_TRANSIENT); +} + +void sqlite_now_usec(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + if (argc != 1 ){ + sqlite3_result_null(context); + return ; + } + + if (sqlite3_value_int(argv[0]) != 0) { + struct timespec req = {.tv_sec = 0, .tv_nsec = 1}; + nanosleep(&req, NULL); + } + + sqlite3_result_int64(context, (sqlite_int64) now_realtime_usec()); +} + +void sqlite_uuid_random(sqlite3_context *context, int argc, sqlite3_value **argv) +{ + (void)argc; + (void)argv; + + uuid_t uuid; + uuid_generate_random(uuid); + sqlite3_result_blob(context, &uuid, sizeof(uuid_t), SQLITE_TRANSIENT); +} + +// Init +/* + * Initialize the SQLite database + * Return 0 on success + */ +int sql_init_meta_database(db_check_action_type_t rebuild, int memory) +{ + char *err_msg = NULL; + char sqlite_database[FILENAME_MAX + 1]; + int rc; + + if (likely(!memory)) { + snprintfz(sqlite_database, sizeof(sqlite_database) - 1, "%s/.netdata-meta.db.recover", netdata_configured_cache_dir); + rc = unlink(sqlite_database); + snprintfz(sqlite_database, FILENAME_MAX, "%s/netdata-meta.db", netdata_configured_cache_dir); + + if (rc == 0 || (rebuild & DB_CHECK_RECOVER)) { + char new_sqlite_database[FILENAME_MAX + 1]; + snprintfz(new_sqlite_database, sizeof(new_sqlite_database) - 1, "%s/netdata-meta-recover.db", netdata_configured_cache_dir); + recover_database(sqlite_database, new_sqlite_database); + if (rebuild & DB_CHECK_RECOVER) + return 0; + } + } + else + strncpyz(sqlite_database, ":memory:", sizeof(sqlite_database) - 1); + + rc = sqlite3_open(sqlite_database, &db_meta); + if (rc != SQLITE_OK) { + error_report("Failed to initialize database at %s, due to \"%s\"", sqlite_database, sqlite3_errstr(rc)); + char *error_str = get_database_extented_error(db_meta, 0, "meta_open"); + if (error_str) + analytics_set_data_str(&analytics_data.netdata_fail_reason, error_str); + freez(error_str); + sqlite3_close(db_meta); + db_meta = NULL; + return 1; + } + + if (rebuild & DB_CHECK_RECLAIM_SPACE) { + netdata_log_info("Reclaiming space of %s", sqlite_database); + rc = sqlite3_exec_monitored(db_meta, "VACUUM", 0, 0, &err_msg); + if (rc != SQLITE_OK) { + error_report("Failed to execute VACUUM rc = %d (%s)", rc, err_msg); + sqlite3_free(err_msg); + } + else { + (void) db_execute(db_meta, "select count(*) from sqlite_master limit 0"); + (void) sqlite3_close(db_meta); + } + return 1; + } + + if (rebuild & DB_CHECK_ANALYZE) { + errno = 0; + netdata_log_info("Running ANALYZE on %s", sqlite_database); + rc = sqlite3_exec_monitored(db_meta, "ANALYZE", 0, 0, &err_msg); + if (rc != SQLITE_OK) { + error_report("Failed to execute ANALYZE rc = %d (%s)", rc, err_msg); + sqlite3_free(err_msg); + } + else { + (void) db_execute(db_meta, "select count(*) from sqlite_master limit 0"); + (void) sqlite3_close(db_meta); + } + return 1; + } + + netdata_log_info("SQLite database %s initialization", sqlite_database); + + rc = sqlite3_create_function(db_meta, "u2h", 1, SQLITE_ANY | SQLITE_DETERMINISTIC, 0, sqlite_uuid_parse, 0, 0); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to register internal u2h function"); + + rc = sqlite3_create_function(db_meta, "now_usec", 1, SQLITE_ANY, 0, sqlite_now_usec, 0, 0); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to register internal now_usec function"); + + rc = sqlite3_create_function(db_meta, "uuid_random", 0, SQLITE_ANY, 0, sqlite_uuid_random, 0, 0); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to register internal uuid_random function"); + + int target_version = DB_METADATA_VERSION; + + if (likely(!memory)) + target_version = perform_database_migration(db_meta, DB_METADATA_VERSION); + + if (configure_sqlite_database(db_meta, target_version, "meta_config")) + return 1; + + if (init_database_batch(db_meta, &database_config[0], "meta_init")) + return 1; + + if (init_database_batch(db_meta, &database_cleanup[0], "meta_cleanup")) + return 1; + + netdata_log_info("SQLite database initialization completed"); + + return 0; +} + +// Metadata functions + +struct query_build { + BUFFER *sql; + int count; + char uuid_str[UUID_STR_LEN]; +}; + +#define SQL_DELETE_CHART_LABELS_BY_HOST \ + "DELETE FROM chart_label WHERE chart_id in (SELECT chart_id FROM chart WHERE host_id = @host_id)" + +static void delete_host_chart_labels(uuid_t *host_uuid) +{ + sqlite3_stmt *res = NULL; + + int rc = sqlite3_prepare_v2(db_meta, SQL_DELETE_CHART_LABELS_BY_HOST, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to delete chart labels by host"); + return; + } + + rc = sqlite3_bind_blob(res, 1, host_uuid, sizeof(*host_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host_id parameter to host chart labels"); + goto failed; + } + rc = sqlite3_step_monitored(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to execute command to remove host chart labels"); + +failed: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize statement to remove host chart labels"); +} + +static int host_label_store_to_sql_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + struct query_build *lb = data; + if (unlikely(!lb->count)) + buffer_sprintf(lb->sql, STORE_HOST_LABEL); + else + buffer_strcat(lb->sql, ", "); + buffer_sprintf(lb->sql, STORE_HOST_OR_CHART_LABEL_VALUE, lb->uuid_str, (int) (ls & ~(RRDLABEL_FLAG_INTERNAL)), name, value); + lb->count++; + return 1; +} + +static int chart_label_store_to_sql_callback(const char *name, const char *value, RRDLABEL_SRC ls, void *data) { + struct query_build *lb = data; + if (unlikely(!lb->count)) + buffer_sprintf(lb->sql, STORE_CHART_LABEL); + else + buffer_strcat(lb->sql, ", "); + buffer_sprintf(lb->sql, STORE_HOST_OR_CHART_LABEL_VALUE, lb->uuid_str, (int) (ls & ~(RRDLABEL_FLAG_INTERNAL)), name, value); + lb->count++; + return 1; +} + +#define SQL_DELETE_CHART_LABEL "DELETE FROM chart_label WHERE chart_id = @chart_id" +#define SQL_DELETE_CHART_LABEL_HISTORY "DELETE FROM chart_label WHERE date_created < %ld AND chart_id = @chart_id" + +static void clean_old_chart_labels(RRDSET *st) +{ + char sql[512]; + time_t first_time_s = rrdset_first_entry_s(st); + + if (unlikely(!first_time_s)) + snprintfz(sql, sizeof(sql) - 1, SQL_DELETE_CHART_LABEL); + else + snprintfz(sql, sizeof(sql) - 1, SQL_DELETE_CHART_LABEL_HISTORY, first_time_s); + + int rc = exec_statement_with_uuid(sql, &st->chart_uuid); + if (unlikely(rc)) + error_report("METADATA: 'host:%s' Failed to clean old labels for chart %s", rrdhost_hostname(st->rrdhost), rrdset_name(st)); +} + +static int check_and_update_chart_labels(RRDSET *st, BUFFER *work_buffer, size_t *query_counter) +{ + size_t old_version = st->rrdlabels_last_saved_version; + size_t new_version = rrdlabels_version(st->rrdlabels); + + if (new_version == old_version) + return 0; + + struct query_build tmp = {.sql = work_buffer, .count = 0}; + uuid_unparse_lower(st->chart_uuid, tmp.uuid_str); + rrdlabels_walkthrough_read(st->rrdlabels, chart_label_store_to_sql_callback, &tmp); + buffer_strcat(work_buffer, " ON CONFLICT (chart_id, label_key) DO UPDATE SET source_type = excluded.source_type, label_value=excluded.label_value, date_created=UNIXEPOCH()"); + int rc = db_execute(db_meta, buffer_tostring(work_buffer)); + if (likely(!rc)) { + st->rrdlabels_last_saved_version = new_version; + (*query_counter)++; + } + + clean_old_chart_labels(st); + return rc; +} + +// If the machine guid has changed, then existing one with hops 0 will be marked as hops 1 (child) +void detect_machine_guid_change(uuid_t *host_uuid) +{ + int rc; + + rc = exec_statement_with_uuid(CONVERT_EXISTING_LOCALHOST, host_uuid); + if (!rc) { + if (unlikely(db_execute(db_meta, DELETE_MISSING_NODE_INSTANCES))) + error_report("Failed to remove deleted hosts from node instances"); + } +} + +static int store_claim_id(uuid_t *host_id, uuid_t *claim_id) +{ + sqlite3_stmt *res = NULL; + int rc; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode == RRD_MEMORY_MODE_DBENGINE) + error_report("Database has not been initialized"); + return 1; + } + + rc = sqlite3_prepare_v2(db_meta, SQL_STORE_CLAIM_ID, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to store host claim id"); + return 1; + } + + rc = sqlite3_bind_blob(res, 1, host_id, sizeof(*host_id), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind host_id parameter to store claim id"); + goto failed; + } + + if (claim_id) + rc = sqlite3_bind_blob(res, 2, claim_id, sizeof(*claim_id), SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, 2); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind claim_id parameter to host claim id"); + goto failed; + } + + rc = execute_insert(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to store host claim id rc = %d", rc); + +failed: + if (unlikely(sqlite3_finalize(res) != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when storing a host claim id"); + + return rc != SQLITE_DONE; +} + +static void delete_dimension_uuid(uuid_t *dimension_uuid, sqlite3_stmt **action_res __maybe_unused, bool flag __maybe_unused) +{ + static __thread sqlite3_stmt *res = NULL; + int rc; + + if (unlikely(!res)) { + rc = prepare_statement(db_meta, DELETE_DIMENSION_UUID, &res); + if (rc != SQLITE_OK) { + error_report("Failed to prepare statement to delete a dimension uuid"); + return; + } + } + + rc = sqlite3_bind_blob(res, 1, dimension_uuid, sizeof(*dimension_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto skip_execution; + + rc = sqlite3_step_monitored(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to delete dimension uuid, rc = %d", rc); + +skip_execution: + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement when deleting dimension UUID, rc = %d", rc); +} + +// +// Store host and host system info information in the database +static int store_host_metadata(RRDHOST *host) +{ + static __thread sqlite3_stmt *res = NULL; + int rc, param = 0; + + if (unlikely((!res))) { + rc = prepare_statement(db_meta, SQL_STORE_HOST_INFO, &res); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to store host, rc = %d", rc); + return 1; + } + } + + rc = sqlite3_bind_blob(res, ++param, &host->host_uuid, sizeof(host->host_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_hostname(host), 0); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_registry_hostname(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, host->rrd_update_every); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_os(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_timezone(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + // TODO: remove/migrate + rc = bind_text_null(res, ++param, "", 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, host->system_info ? host->system_info->hops : 0); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, host->rrd_memory_mode); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_abbrev_timezone(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, host->utc_offset); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_program_name(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, rrdhost_program_version(host), 1); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int64(res, ++param, host->rrd_history_entries); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, (int ) host->health.health_enabled); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int64(res, ++param, (sqlite3_int64) host->last_connected); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + int store_rc = sqlite3_step_monitored(res); + if (unlikely(store_rc != SQLITE_DONE)) + error_report("Failed to store host %s, rc = %d", rrdhost_hostname(host), rc); + + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement to store host %s, rc = %d", rrdhost_hostname(host), rc); + + return store_rc != SQLITE_DONE; +bind_fail: + error_report("Failed to bind %d parameter to store host %s, rc = %d", param, rrdhost_hostname(host), rc); + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement to store host %s, rc = %d", rrdhost_hostname(host), rc); + return 1; +} + +static int add_host_sysinfo_key_value(const char *name, const char *value, uuid_t *uuid) +{ + static __thread sqlite3_stmt *res = NULL; + int rc, param = 0; + + if (unlikely(!db_meta)) { + if (default_rrd_memory_mode != RRD_MEMORY_MODE_DBENGINE) + return 0; + error_report("Database has not been initialized"); + return 0; + } + + if (unlikely((!res))) { + rc = prepare_statement(db_meta, SQL_STORE_HOST_SYSTEM_INFO_VALUES, &res); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to store host info values, rc = %d", rc); + return 0; + } + } + + rc = sqlite3_bind_blob(res, ++param, uuid, sizeof(*uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, name, 0); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = bind_text_null(res, ++param, value ? value : "unknown", 0); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + int store_rc = sqlite3_step_monitored(res); + if (unlikely(store_rc != SQLITE_DONE)) + error_report("Failed to store host info value %s, rc = %d", name, rc); + + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement to store host info value %s, rc = %d", name, rc); + + return store_rc == SQLITE_DONE; +bind_fail: + error_report("Failed to bind %d parameter to store host info values %s, rc = %d", param, name, rc); + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement to store host info values %s, rc = %d", name, rc); + return 0; +} + +static bool store_host_systeminfo(RRDHOST *host) +{ + struct rrdhost_system_info *system_info = host->system_info; + + if (unlikely(!system_info)) + return false; + + int ret = 0; + + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_NAME", system_info->container_os_name, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_ID", system_info->container_os_id, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_ID_LIKE", system_info->container_os_id_like, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_VERSION", system_info->container_os_version, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_VERSION_ID", system_info->container_os_version_id, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_CONTAINER_OS_DETECTION", system_info->host_os_detection, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_NAME", system_info->host_os_name, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_ID", system_info->host_os_id, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_ID_LIKE", system_info->host_os_id_like, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_VERSION", system_info->host_os_version, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_VERSION_ID", system_info->host_os_version_id, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_OS_DETECTION", system_info->host_os_detection, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_KERNEL_NAME", system_info->kernel_name, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CPU_LOGICAL_CPU_COUNT", system_info->host_cores, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CPU_FREQ", system_info->host_cpu_freq, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_TOTAL_RAM", system_info->host_ram_total, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_TOTAL_DISK_SIZE", system_info->host_disk_space, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_KERNEL_VERSION", system_info->kernel_version, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_ARCHITECTURE", system_info->architecture, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_VIRTUALIZATION", system_info->virtualization, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_VIRT_DETECTION", system_info->virt_detection, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CONTAINER", system_info->container, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_SYSTEM_CONTAINER_DETECTION", system_info->container_detection, &host->host_uuid); + ret += add_host_sysinfo_key_value("NETDATA_HOST_IS_K8S_NODE", system_info->is_k8s_node, &host->host_uuid); + + return !(24 == ret); +} + + +/* + * Store a chart in the database + */ + +static int store_chart_metadata(RRDSET *st) +{ + static __thread sqlite3_stmt *res = NULL; + int rc, param = 0, store_rc = 0; + + if (unlikely(!res)) { + rc = prepare_statement(db_meta, SQL_STORE_CHART, &res); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to store chart, rc = %d", rc); + return 1; + } + } + + rc = sqlite3_bind_blob(res, ++param, &st->chart_uuid, sizeof(st->chart_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_blob(res, ++param, &st->rrdhost->host_uuid, sizeof(st->rrdhost->host_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, string2str(st->parts.type), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, string2str(st->parts.id), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + const char *name = string2str(st->parts.name); + if (name && *name) + rc = sqlite3_bind_text(res, ++param, name, -1, SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, ++param); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, rrdset_family(st), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, rrdset_context(st), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, rrdset_title(st), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, rrdset_units(st), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, rrdset_plugin_name(st), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, rrdset_module_name(st), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, (int) st->priority); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, st->update_every); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, st->chart_type); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, st->rrd_memory_mode); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, (int) st->db.entries); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + store_rc = execute_insert(res); + if (unlikely(store_rc != SQLITE_DONE)) + error_report("Failed to store chart, rc = %d", store_rc); + + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement in chart store function, rc = %d", rc); + + return store_rc != SQLITE_DONE; + +bind_fail: + error_report("Failed to bind parameter %d to store chart, rc = %d", param, rc); + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement in chart store function, rc = %d", rc); + return 1; +} + +/* + * Store a dimension + */ +static int store_dimension_metadata(RRDDIM *rd) +{ + static __thread sqlite3_stmt *res = NULL; + int rc, param = 0; + + if (unlikely(!res)) { + rc = prepare_statement(db_meta, SQL_STORE_DIMENSION, &res); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to store dimension, rc = %d", rc); + return 1; + } + } + + rc = sqlite3_bind_blob(res, ++param, &rd->metric_uuid, sizeof(rd->metric_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_blob(res, ++param, &rd->rrdset->chart_uuid, sizeof(rd->rrdset->chart_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, string2str(rd->id), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_text(res, ++param, string2str(rd->name), -1, SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, (int) rd->multiplier); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, (int ) rd->divisor); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = sqlite3_bind_int(res, ++param, rd->algorithm); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + if (rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN)) + rc = sqlite3_bind_text(res, ++param, "hidden", -1, SQLITE_STATIC); + else + rc = sqlite3_bind_null(res, ++param); + if (unlikely(rc != SQLITE_OK)) + goto bind_fail; + + rc = execute_insert(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to store dimension, rc = %d", rc); + + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement in store dimension, rc = %d", rc); + return 0; + +bind_fail: + error_report("Failed to bind parameter %d to store dimension, rc = %d", param, rc); + rc = sqlite3_reset(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to reset statement in store dimension, rc = %d", rc); + return 1; +} + +static bool dimension_can_be_deleted(uuid_t *dim_uuid __maybe_unused, sqlite3_stmt **res __maybe_unused, bool flag __maybe_unused) +{ +#ifdef ENABLE_DBENGINE + if(dbengine_enabled) { + bool no_retention = true; + for (size_t tier = 0; tier < storage_tiers; tier++) { + if (!multidb_ctx[tier]) + continue; + time_t first_time_t = 0, last_time_t = 0; + if (rrdeng_metric_retention_by_uuid((void *) multidb_ctx[tier], dim_uuid, &first_time_t, &last_time_t)) { + if (first_time_t > 0) { + no_retention = false; + break; + } + } + } + return no_retention; + } + else + return false; +#else + return false; +#endif +} + +static bool run_cleanup_loop( + sqlite3_stmt *res, + struct metadata_wc *wc, + bool (*check_cb)(uuid_t *, sqlite3_stmt **, bool), + void (*action_cb)(uuid_t *, sqlite3_stmt **, bool), + uint32_t *total_checked, + uint32_t *total_deleted, + uint64_t *row_id, + sqlite3_stmt **check_stmt, + sqlite3_stmt **action_stmt, + bool check_flag, + bool action_flag) +{ + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) + return true; + + int rc = sqlite3_bind_int64(res, 1, (sqlite3_int64) *row_id); + if (unlikely(rc != SQLITE_OK)) + return true; + + time_t start_running = now_monotonic_sec(); + bool time_expired = false; + while (!time_expired && sqlite3_step_monitored(res) == SQLITE_ROW && + (*total_deleted < MAX_METADATA_CLEANUP && *total_checked < MAX_METADATA_CLEANUP)) { + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) + break; + + *row_id = sqlite3_column_int64(res, 1); + rc = check_cb((uuid_t *)sqlite3_column_blob(res, 0), check_stmt, check_flag); + + if (rc == true) { + action_cb((uuid_t *)sqlite3_column_blob(res, 0), action_stmt, action_flag); + (*total_deleted)++; + } + + (*total_checked)++; + time_expired = ((now_monotonic_sec() - start_running) > METADATA_RUNTIME_THRESHOLD); + } + return time_expired || (*total_checked == MAX_METADATA_CLEANUP) || (*total_deleted == MAX_METADATA_CLEANUP); +} + + +#define SQL_CHECK_CHART_EXISTENCE_IN_DIMENSION "SELECT count(1) FROM dimension WHERE chart_id = @chart_id" +#define SQL_CHECK_CHART_EXISTENCE_IN_CHART "SELECT count(1) FROM chart WHERE chart_id = @chart_id" + +static bool chart_can_be_deleted(uuid_t *chart_uuid, sqlite3_stmt **check_res, bool check_in_dimension) +{ + int rc, result = 1; + sqlite3_stmt *res = check_res ? *check_res : NULL; + + if (!res) { + if (check_in_dimension) + rc = sqlite3_prepare_v2(db_meta, SQL_CHECK_CHART_EXISTENCE_IN_DIMENSION, -1, &res, 0); + else + rc = sqlite3_prepare_v2(db_meta, SQL_CHECK_CHART_EXISTENCE_IN_CHART, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to check for chart existence, rc = %d", rc); + return 0; + } + if (check_res) + *check_res = res; + } + + rc = sqlite3_bind_blob(res, 1, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind chart uuid parameter, rc = %d", rc); + goto skip; + } + + rc = sqlite3_step_monitored(res); + if (likely(rc == SQLITE_ROW)) + result = sqlite3_column_int(res, 0); + +skip: + if (check_res) + rc = sqlite3_reset(res); + else + rc = sqlite3_finalize(res); + + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to %s statement that checks chart uuid existence rc = %d", check_res ? "reset" : "finalize", rc); + return result == 0; +} + +#define SQL_DELETE_CHART_BY_UUID "DELETE FROM chart WHERE chart_id = @chart_id" +#define SQL_DELETE_CHART_LABEL_BY_UUID "DELETE FROM chart_label WHERE chart_id = @chart_id" + +static void delete_chart_uuid(uuid_t *chart_uuid, sqlite3_stmt **action_res, bool label_only) +{ + int rc; + sqlite3_stmt *res = action_res ? *action_res : NULL; + + if (!res) { + if (label_only) + rc = sqlite3_prepare_v2(db_meta, SQL_DELETE_CHART_LABEL_BY_UUID, -1, &res, 0); + else + rc = sqlite3_prepare_v2(db_meta, SQL_DELETE_CHART_BY_UUID, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to check for chart existence, rc = %d", rc); + return; + } + if (action_res) + *action_res = res; + } + + rc = sqlite3_bind_blob(res, 1, chart_uuid, sizeof(*chart_uuid), SQLITE_STATIC); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to bind chart uuid parameter, rc = %d", rc); + goto skip; + } + + rc = sqlite3_step_monitored(res); + if (unlikely(rc != SQLITE_DONE)) + error_report("Failed to delete a chart uuid from the %s table, rc = %d", label_only ? "labels" : "chart", rc); + +skip: + if (action_res) + rc = sqlite3_reset(res); + else + rc = sqlite3_finalize(res); + + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to %s statement that deletes a chart uuid rc = %d", action_res ? "reset" : "finalize", rc); +} + +static void check_dimension_metadata(struct metadata_wc *wc) +{ + static time_t next_execution_t = 0; + static uint64_t last_row_id = 0; + + time_t now = now_realtime_sec(); + + if (!next_execution_t) + next_execution_t = now + METADATA_MAINTENANCE_FIRST_CHECK; + + if (next_execution_t && next_execution_t > now) + return; + + int rc; + sqlite3_stmt *res = NULL; + + rc = sqlite3_prepare_v2(db_meta, SELECT_DIMENSION_LIST, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to fetch host dimensions"); + return; + } + + uint32_t total_checked = 0; + uint32_t total_deleted = 0; + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Checking dimensions starting after row %" PRIu64, last_row_id); + + bool more_to_do = run_cleanup_loop( + res, + wc, + dimension_can_be_deleted, + delete_dimension_uuid, + &total_checked, + &total_deleted, + &last_row_id, + NULL, + NULL, + false, + false); + + now = now_realtime_sec(); + if (more_to_do) + next_execution_t = now + METADATA_MAINTENANCE_REPEAT; + else { + last_row_id = 0; + next_execution_t = now + METADATA_DIM_CHECK_INTERVAL; + } + + nd_log( + NDLS_DAEMON, + NDLP_DEBUG, + "Dimensions checked %u, deleted %u. Checks will %s in %lld seconds", + total_checked, + total_deleted, + last_row_id ? "resume" : "restart", + (long long)(next_execution_t - now)); + + rc = sqlite3_finalize(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to finalize the prepared statement to check dimensions"); +} + +static void check_chart_metadata(struct metadata_wc *wc) +{ + static time_t next_execution_t = 0; + static uint64_t last_row_id = 0; + + time_t now = now_realtime_sec(); + + if (!next_execution_t) + next_execution_t = now + METADATA_MAINTENANCE_FIRST_CHECK; + + if (next_execution_t && next_execution_t > now) + return; + + sqlite3_stmt *res = NULL; + + int rc = sqlite3_prepare_v2(db_meta, SELECT_CHART_LIST, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to fetch charts"); + return; + } + + uint32_t total_checked = 0; + uint32_t total_deleted = 0; + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Checking charts starting after row %" PRIu64, last_row_id); + + sqlite3_stmt *check_res = NULL; + sqlite3_stmt *action_res = NULL; + bool more_to_do = run_cleanup_loop( + res, + wc, + chart_can_be_deleted, + delete_chart_uuid, + &total_checked, + &total_deleted, + &last_row_id, + &check_res, + &action_res, + true, + false); + + if (check_res) + sqlite3_finalize(check_res); + + if (action_res) + sqlite3_finalize(action_res); + + now = now_realtime_sec(); + if (more_to_do) + next_execution_t = now + METADATA_MAINTENANCE_REPEAT; + else { + last_row_id = 0; + next_execution_t = now + METADATA_CHART_CHECK_INTERVAL; + } + + nd_log( + NDLS_DAEMON, + NDLP_DEBUG, + "Charts checked %u, deleted %u. Checks will %s in %lld seconds", + total_checked, + total_deleted, + last_row_id ? "resume" : "restart", + (long long)(next_execution_t - now)); + + rc = sqlite3_finalize(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when reading charts"); +} + +static void check_label_metadata(struct metadata_wc *wc) +{ + static time_t next_execution_t = 0; + static uint64_t last_row_id = 0; + + time_t now = now_realtime_sec(); + + if (!next_execution_t) + next_execution_t = now + METADATA_MAINTENANCE_FIRST_CHECK; + + if (next_execution_t && next_execution_t > now) + return; + + int rc; + sqlite3_stmt *res = NULL; + + rc = sqlite3_prepare_v2(db_meta, SELECT_CHART_LABEL_LIST, -1, &res, 0); + if (unlikely(rc != SQLITE_OK)) { + error_report("Failed to prepare statement to fetch charts"); + return; + } + + uint32_t total_checked = 0; + uint32_t total_deleted = 0; + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Checking charts labels starting after row %" PRIu64, last_row_id); + + sqlite3_stmt *check_res = NULL; + sqlite3_stmt *action_res = NULL; + + bool more_to_do = run_cleanup_loop( + res, + wc, + chart_can_be_deleted, + delete_chart_uuid, + &total_checked, + &total_deleted, + &last_row_id, + &check_res, + &action_res, + false, + true); + + if (check_res) + sqlite3_finalize(check_res); + + if (action_res) + sqlite3_finalize(action_res); + + now = now_realtime_sec(); + if (more_to_do) + next_execution_t = now + METADATA_MAINTENANCE_REPEAT; + else { + last_row_id = 0; + next_execution_t = now + METADATA_LABEL_CHECK_INTERVAL; + } + + nd_log( + NDLS_DAEMON, + NDLP_DEBUG, + "Chart labels checked %u, deleted %u. Checks will %s in %lld seconds", + total_checked, + total_deleted, + last_row_id ? "resume" : "restart", + (long long)(next_execution_t - now)); + + rc = sqlite3_finalize(res); + if (unlikely(rc != SQLITE_OK)) + error_report("Failed to finalize the prepared statement when checking charts"); +} + + +static void cleanup_health_log(struct metadata_wc *wc) +{ + static time_t next_execution_t = 0; + + time_t now = now_realtime_sec(); + + if (!next_execution_t) + next_execution_t = now + METADATA_MAINTENANCE_FIRST_CHECK; + + if (next_execution_t && next_execution_t > now) + return; + + next_execution_t = now + METADATA_HEALTH_LOG_INTERVAL; + + RRDHOST *host; + + bool is_claimed = claimed(); + dfe_start_reentrant(rrdhost_root_index, host){ + if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED)) + continue; + sql_health_alarm_log_cleanup(host, is_claimed); + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) + break; + } + dfe_done(host); + + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) + return; + + (void) db_execute(db_meta,"DELETE FROM health_log WHERE host_id NOT IN (SELECT host_id FROM host)"); + (void) db_execute(db_meta,"DELETE FROM health_log_detail WHERE health_log_id NOT IN (SELECT health_log_id FROM health_log)"); +} + +// +// EVENT LOOP STARTS HERE +// + +static void metadata_free_cmd_queue(struct metadata_wc *wc) +{ + spinlock_lock(&wc->cmd_queue_lock); + while(wc->cmd_base) { + struct metadata_cmd *t = wc->cmd_base; + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wc->cmd_base, t, prev, next); + freez(t); + } + spinlock_unlock(&wc->cmd_queue_lock); +} + +static void metadata_enq_cmd(struct metadata_wc *wc, struct metadata_cmd *cmd) +{ + if (cmd->opcode == METADATA_SYNC_SHUTDOWN) { + metadata_flag_set(wc, METADATA_FLAG_SHUTDOWN); + goto wakeup_event_loop; + } + + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) + goto wakeup_event_loop; + + struct metadata_cmd *t = mallocz(sizeof(*t)); + *t = *cmd; + t->prev = t->next = NULL; + + spinlock_lock(&wc->cmd_queue_lock); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(wc->cmd_base, t, prev, next); + spinlock_unlock(&wc->cmd_queue_lock); + +wakeup_event_loop: + (void) uv_async_send(&wc->async); +} + +static struct metadata_cmd metadata_deq_cmd(struct metadata_wc *wc) +{ + struct metadata_cmd ret; + + spinlock_lock(&wc->cmd_queue_lock); + if(wc->cmd_base) { + struct metadata_cmd *t = wc->cmd_base; + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(wc->cmd_base, t, prev, next); + ret = *t; + freez(t); + } + else { + ret.opcode = METADATA_DATABASE_NOOP; + ret.completion = NULL; + } + spinlock_unlock(&wc->cmd_queue_lock); + + return ret; +} + +static void async_cb(uv_async_t *handle) +{ + uv_stop(handle->loop); + uv_update_time(handle->loop); +} + +#define TIMER_INITIAL_PERIOD_MS (1000) +#define TIMER_REPEAT_PERIOD_MS (1000) + +static void timer_cb(uv_timer_t* handle) +{ + uv_stop(handle->loop); + uv_update_time(handle->loop); + + struct metadata_wc *wc = handle->data; + struct metadata_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + + if (wc->metadata_check_after < now_realtime_sec()) { + cmd.opcode = METADATA_SCAN_HOSTS; + metadata_enq_cmd(wc, &cmd); + } +} + +void vacuum_database(sqlite3 *database, const char *db_alias, int threshold, int vacuum_pc) +{ + int free_pages = get_free_page_count(database); + int total_pages = get_database_page_count(database); + + if (!threshold) + threshold = DATABASE_FREE_PAGES_THRESHOLD_PC; + + if (!vacuum_pc) + vacuum_pc = DATABASE_FREE_PAGES_VACUUM_PC; + + if (free_pages > (total_pages * threshold / 100)) { + + int do_free_pages = (int) (free_pages * vacuum_pc / 100); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "%s: Freeing %d database pages", db_alias, do_free_pages); + + char sql[128]; + snprintfz(sql, sizeof(sql) - 1, "PRAGMA incremental_vacuum(%d)", do_free_pages); + (void) db_execute(database, sql); + } +} + +void run_metadata_cleanup(struct metadata_wc *wc) +{ + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) + return; + + check_dimension_metadata(wc); + check_chart_metadata(wc); + check_label_metadata(wc); + cleanup_health_log(wc); + + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) + return; + + vacuum_database(db_meta, "METADATA", DATABASE_FREE_PAGES_THRESHOLD_PC, DATABASE_FREE_PAGES_VACUUM_PC); + + (void) sqlite3_wal_checkpoint(db_meta, NULL); +} + +struct scan_metadata_payload { + uv_work_t request; + struct metadata_wc *wc; + void *data; + BUFFER *work_buffer; + uint32_t max_count; +}; + +struct host_context_load_thread { + uv_thread_t thread; + RRDHOST *host; + bool busy; + bool finished; +}; + +static void restore_host_context(void *arg) +{ + struct host_context_load_thread *hclt = arg; + RRDHOST *host = hclt->host; + + usec_t started_ut = now_monotonic_usec(); (void)started_ut; + rrdhost_load_rrdcontext_data(host); + usec_t ended_ut = now_monotonic_usec(); (void)ended_ut; + + rrdhost_flag_clear(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD); + +#ifdef ENABLE_ACLK + aclk_queue_node_info(host, false); +#endif + + nd_log( + NDLS_DAEMON, + NDLP_DEBUG, + "Contexts for host %s loaded in %0.2f ms", + rrdhost_hostname(host), + (double)(ended_ut - started_ut) / USEC_PER_MS); + + __atomic_store_n(&hclt->finished, true, __ATOMIC_RELEASE); +} + +// Callback after scan of hosts is done +static void after_start_host_load_context(uv_work_t *req, int status __maybe_unused) +{ + struct scan_metadata_payload *data = req->data; + freez(data); +} + +#define MAX_FIND_THREAD_RETRIES (10) + +static void cleanup_finished_threads(struct host_context_load_thread *hclt, size_t max_thread_slots, bool wait) +{ + if (!hclt) + return; + + for (size_t index = 0; index < max_thread_slots; index++) { + if (__atomic_load_n(&(hclt[index].finished), __ATOMIC_RELAXED) + || (wait && __atomic_load_n(&(hclt[index].busy), __ATOMIC_ACQUIRE))) { + int rc = uv_thread_join(&(hclt[index].thread)); + if (rc) + nd_log(NDLS_DAEMON, NDLP_WARNING, "Failed to join thread, rc = %d", rc); + __atomic_store_n(&(hclt[index].busy), false, __ATOMIC_RELEASE); + __atomic_store_n(&(hclt[index].finished), false, __ATOMIC_RELEASE); + } + } +} + +static size_t find_available_thread_slot(struct host_context_load_thread *hclt, size_t max_thread_slots, size_t *found_index) +{ + size_t retries = MAX_FIND_THREAD_RETRIES; + while (retries--) { + size_t index = 0; + while (index < max_thread_slots) { + if (false == __atomic_load_n(&(hclt[index].busy), __ATOMIC_ACQUIRE)) { + *found_index = index; + return true; + } + index++; + } + sleep_usec(10 * USEC_PER_MS); + } + return false; +} + +static void start_all_host_load_context(uv_work_t *req __maybe_unused) +{ + register_libuv_worker_jobs(); + + struct scan_metadata_payload *data = req->data; + struct metadata_wc *wc = data->wc; + + worker_is_busy(UV_EVENT_HOST_CONTEXT_LOAD); + usec_t started_ut = now_monotonic_usec(); (void)started_ut; + + RRDHOST *host; + + size_t max_threads = MIN(get_netdata_cpus() / 2, 6); + if (max_threads < 1) + max_threads = 1; + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Using %zu threads for context loading", max_threads); + struct host_context_load_thread *hclt = max_threads > 1 ? callocz(max_threads, sizeof(*hclt)) : NULL; + + size_t thread_index = 0; + dfe_start_reentrant(rrdhost_root_index, host) { + if (!rrdhost_flag_check(host, RRDHOST_FLAG_PENDING_CONTEXT_LOAD)) + continue; + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Loading context for host %s", rrdhost_hostname(host)); + + int rc = 0; + if (hclt) { + bool found_slot = false; + do { + if (metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)) + break; + + cleanup_finished_threads(hclt, max_threads, false); + found_slot = find_available_thread_slot(hclt, max_threads, &thread_index); + } while (!found_slot); + + if (metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)) + break; + + __atomic_store_n(&hclt[thread_index].busy, true, __ATOMIC_RELAXED); + hclt[thread_index].host = host; + rc = uv_thread_create(&hclt[thread_index].thread, restore_host_context, &hclt[thread_index]); + } + // if single thread or thread creation failed + if (rc || !hclt) { + struct host_context_load_thread hclt_sync = {.host = host}; + restore_host_context(&hclt_sync); + + if (metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN)) + break; + } + } + dfe_done(host); + + cleanup_finished_threads(hclt, max_threads, true); + freez(hclt); + usec_t ended_ut = now_monotonic_usec(); (void)ended_ut; + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Host contexts loaded in %0.2f ms", (double)(ended_ut - started_ut) / USEC_PER_MS); + + worker_is_idle(); +} + +// Callback after scan of hosts is done +static void after_metadata_hosts(uv_work_t *req, int status __maybe_unused) +{ + struct scan_metadata_payload *data = req->data; + struct metadata_wc *wc = data->wc; + + metadata_flag_clear(wc, METADATA_FLAG_PROCESSING); + + if (unlikely(wc->scan_complete)) + completion_mark_complete(wc->scan_complete); + + freez(data); +} + +static bool metadata_scan_host(RRDHOST *host, uint32_t max_count, bool use_transaction, BUFFER *work_buffer, size_t *query_counter) { + RRDSET *st; + int rc; + + bool more_to_do = false; + uint32_t scan_count = 1; + + sqlite3_stmt *ml_load_stmt = NULL; + + bool load_ml_models = max_count; + + if (use_transaction) + (void)db_execute(db_meta, "BEGIN TRANSACTION"); + + rrdset_foreach_reentrant(st, host) { + if (scan_count == max_count) { + more_to_do = true; + break; + } + if(rrdset_flag_check(st, RRDSET_FLAG_METADATA_UPDATE)) { + (*query_counter)++; + + rrdset_flag_clear(st, RRDSET_FLAG_METADATA_UPDATE); + scan_count++; + + buffer_flush(work_buffer); + rc = check_and_update_chart_labels(st, work_buffer, query_counter); + if (unlikely(rc)) + error_report("METADATA: 'host:%s': Failed to update labels for chart %s", rrdhost_hostname(host), rrdset_name(st)); + else + (*query_counter)++; + + rc = store_chart_metadata(st); + if (unlikely(rc)) + error_report("METADATA: 'host:%s': Failed to store metadata for chart %s", rrdhost_hostname(host), rrdset_name(st)); + } + + RRDDIM *rd; + rrddim_foreach_read(rd, st) { + if(rrddim_flag_check(rd, RRDDIM_FLAG_METADATA_UPDATE)) { + (*query_counter)++; + + rrddim_flag_clear(rd, RRDDIM_FLAG_METADATA_UPDATE); + + if (rrddim_option_check(rd, RRDDIM_OPTION_HIDDEN)) + rrddim_flag_set(rd, RRDDIM_FLAG_META_HIDDEN); + else + rrddim_flag_clear(rd, RRDDIM_FLAG_META_HIDDEN); + + rc = store_dimension_metadata(rd); + if (unlikely(rc)) + error_report("METADATA: 'host:%s': Failed to dimension metadata for chart %s. dimension %s", + rrdhost_hostname(host), rrdset_name(st), + rrddim_name(rd)); + } + + if(rrddim_flag_check(rd, RRDDIM_FLAG_ML_MODEL_LOAD)) { + rrddim_flag_clear(rd, RRDDIM_FLAG_ML_MODEL_LOAD); + if (likely(load_ml_models)) + (void) ml_dimension_load_models(rd, &ml_load_stmt); + } + + worker_is_idle(); + } + rrddim_foreach_done(rd); + } + rrdset_foreach_done(st); + + if (use_transaction) + (void)db_execute(db_meta, "COMMIT TRANSACTION"); + + if (ml_load_stmt) { + sqlite3_finalize(ml_load_stmt); + ml_load_stmt = NULL; + } + + return more_to_do; +} + +static void store_host_and_system_info(RRDHOST *host, size_t *query_counter) +{ + if (unlikely(store_host_systeminfo(host))) { + error_report("METADATA: 'host:%s': Failed to store host updated system information in the database", rrdhost_hostname(host)); + rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_INFO | RRDHOST_FLAG_METADATA_UPDATE); + } + else { + if (likely(query_counter)) + (*query_counter)++; + } + + if (unlikely(store_host_metadata(host))) { + error_report("METADATA: 'host:%s': Failed to store host info in the database", rrdhost_hostname(host)); + rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_INFO | RRDHOST_FLAG_METADATA_UPDATE); + } + else { + if (likely(query_counter)) + (*query_counter)++; + } +} + +struct host_chart_label_cleanup { + Pvoid_t JudyL; + Word_t count; +}; + +static void do_chart_label_cleanup(struct host_chart_label_cleanup *cl_cleanup_data) +{ + if (!cl_cleanup_data) + return; + + Word_t Index = 0; + bool first = true; + Pvoid_t *PValue; + while ((PValue = JudyLFirstThenNext(cl_cleanup_data->JudyL, &Index, &first))) { + char *machine_guid = *PValue; + + RRDHOST *host = rrdhost_find_by_guid(machine_guid); + if (likely(!host)) { + uuid_t host_uuid; + if (!uuid_parse(machine_guid, host_uuid)) + delete_host_chart_labels(&host_uuid); + } + + freez(machine_guid); + } + JudyLFreeArray(&cl_cleanup_data->JudyL, PJE0); + freez(cl_cleanup_data); +} + +// Worker thread to scan hosts for pending metadata to store +static void start_metadata_hosts(uv_work_t *req __maybe_unused) +{ + register_libuv_worker_jobs(); + + RRDHOST *host; + int transaction_started = 0; + + struct scan_metadata_payload *data = req->data; + struct metadata_wc *wc = data->wc; + + BUFFER *work_buffer = data->work_buffer; + usec_t all_started_ut = now_monotonic_usec(); (void)all_started_ut; + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Checking all hosts started"); + usec_t started_ut = now_monotonic_usec(); (void)started_ut; + + do_chart_label_cleanup((struct host_chart_label_cleanup *) data->data); + + bool run_again = false; + worker_is_busy(UV_EVENT_METADATA_STORE); + + if (!data->max_count) + transaction_started = !db_execute(db_meta, "BEGIN TRANSACTION"); + + dfe_start_reentrant(rrdhost_root_index, host) { + if (rrdhost_flag_check(host, RRDHOST_FLAG_ARCHIVED) || !rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_UPDATE)) + continue; + + size_t query_counter = 0; (void)query_counter; + + rrdhost_flag_clear(host,RRDHOST_FLAG_METADATA_UPDATE); + + if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_LABELS))) { + rrdhost_flag_clear(host, RRDHOST_FLAG_METADATA_LABELS); + + int rc = exec_statement_with_uuid(SQL_DELETE_HOST_LABELS, &host->host_uuid); + if (likely(!rc)) { + query_counter++; + + buffer_flush(work_buffer); + struct query_build tmp = {.sql = work_buffer, .count = 0}; + uuid_unparse_lower(host->host_uuid, tmp.uuid_str); + rrdlabels_walkthrough_read(host->rrdlabels, host_label_store_to_sql_callback, &tmp); + buffer_strcat(work_buffer, " ON CONFLICT (host_id, label_key) DO UPDATE SET source_type = excluded.source_type, label_value=excluded.label_value, date_created=UNIXEPOCH()"); + rc = db_execute(db_meta, buffer_tostring(work_buffer)); + + if (unlikely(rc)) { + error_report("METADATA: 'host:%s': failed to update metadata host labels", rrdhost_hostname(host)); + rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE); + } + else + query_counter++; + } else { + error_report("METADATA: 'host:%s': failed to delete old host labels", rrdhost_hostname(host)); + rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_LABELS | RRDHOST_FLAG_METADATA_UPDATE); + } + } + + if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_CLAIMID))) { + rrdhost_flag_clear(host, RRDHOST_FLAG_METADATA_CLAIMID); + uuid_t uuid; + int rc; + if (likely(host->aclk_state.claimed_id && !uuid_parse(host->aclk_state.claimed_id, uuid))) + rc = store_claim_id(&host->host_uuid, &uuid); + else + rc = store_claim_id(&host->host_uuid, NULL); + + if (unlikely(rc)) + rrdhost_flag_set(host, RRDHOST_FLAG_METADATA_CLAIMID | RRDHOST_FLAG_METADATA_UPDATE); + else + query_counter++; + } + if (unlikely(rrdhost_flag_check(host, RRDHOST_FLAG_METADATA_INFO))) { + rrdhost_flag_clear(host, RRDHOST_FLAG_METADATA_INFO); + store_host_and_system_info(host, &query_counter); + } + + // For clarity + bool use_transaction = data->max_count; + if (unlikely(metadata_scan_host(host, data->max_count, use_transaction, work_buffer, &query_counter))) { + run_again = true; + rrdhost_flag_set(host,RRDHOST_FLAG_METADATA_UPDATE); + } + usec_t ended_ut = now_monotonic_usec(); (void)ended_ut; + nd_log( + NDLS_DAEMON, + NDLP_DEBUG, + "Host %s saved metadata with %zu SQL statements, in %0.2f ms", + rrdhost_hostname(host), + query_counter, + (double)(ended_ut - started_ut) / USEC_PER_MS); + } + dfe_done(host); + + if (!data->max_count && transaction_started) + transaction_started = db_execute(db_meta, "COMMIT TRANSACTION"); + + usec_t all_ended_ut = now_monotonic_usec(); (void)all_ended_ut; + nd_log( + NDLS_DAEMON, + NDLP_DEBUG, + "Checking all hosts completed in %0.2f ms", + (double)(all_ended_ut - all_started_ut) / USEC_PER_MS); + + if (unlikely(run_again)) + wc->metadata_check_after = now_realtime_sec() + METADATA_HOST_CHECK_IMMEDIATE; + else { + wc->metadata_check_after = now_realtime_sec() + METADATA_HOST_CHECK_INTERVAL; + run_metadata_cleanup(wc); + } + worker_is_idle(); +} + +static void metadata_event_loop(void *arg) +{ + worker_register("METASYNC"); + worker_register_job_name(METADATA_DATABASE_NOOP, "noop"); + worker_register_job_name(METADATA_DATABASE_TIMER, "timer"); + worker_register_job_name(METADATA_DEL_DIMENSION, "delete dimension"); + worker_register_job_name(METADATA_STORE_CLAIM_ID, "add claim id"); + worker_register_job_name(METADATA_ADD_HOST_INFO, "add host info"); + worker_register_job_name(METADATA_MAINTENANCE, "maintenance"); + + int ret; + uv_loop_t *loop; + unsigned cmd_batch_size; + struct metadata_wc *wc = arg; + enum metadata_opcode opcode; + + uv_thread_set_name_np(wc->thread, "METASYNC"); + loop = wc->loop = mallocz(sizeof(uv_loop_t)); + ret = uv_loop_init(loop); + if (ret) { + netdata_log_error("uv_loop_init(): %s", uv_strerror(ret)); + goto error_after_loop_init; + } + loop->data = wc; + + ret = uv_async_init(wc->loop, &wc->async, async_cb); + if (ret) { + netdata_log_error("uv_async_init(): %s", uv_strerror(ret)); + goto error_after_async_init; + } + wc->async.data = wc; + + ret = uv_timer_init(loop, &wc->timer_req); + if (ret) { + netdata_log_error("uv_timer_init(): %s", uv_strerror(ret)); + goto error_after_timer_init; + } + wc->timer_req.data = wc; + fatal_assert(0 == uv_timer_start(&wc->timer_req, timer_cb, TIMER_INITIAL_PERIOD_MS, TIMER_REPEAT_PERIOD_MS)); + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Starting metadata sync thread"); + + struct metadata_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + metadata_flag_clear(wc, METADATA_FLAG_PROCESSING); + + wc->metadata_check_after = now_realtime_sec() + METADATA_HOST_CHECK_FIRST_CHECK; + + int shutdown = 0; + completion_mark_complete(&wc->start_stop_complete); + BUFFER *work_buffer = buffer_create(1024, &netdata_buffers_statistics.buffers_sqlite); + struct scan_metadata_payload *data; + struct host_chart_label_cleanup *cl_cleanup_data = NULL; + + while (shutdown == 0 || (wc->flags & METADATA_FLAG_PROCESSING)) { + uuid_t *uuid; + RRDHOST *host = NULL; + + worker_is_idle(); + uv_run(loop, UV_RUN_DEFAULT); + + /* wait for commands */ + cmd_batch_size = 0; + do { + if (unlikely(cmd_batch_size >= METADATA_MAX_BATCH_SIZE)) + break; + + cmd = metadata_deq_cmd(wc); + opcode = cmd.opcode; + + if (unlikely(opcode == METADATA_DATABASE_NOOP && metadata_flag_check(wc, METADATA_FLAG_SHUTDOWN))) { + shutdown = 1; + continue; + } + + ++cmd_batch_size; + + if (likely(opcode != METADATA_DATABASE_NOOP)) + worker_is_busy(opcode); + + switch (opcode) { + case METADATA_DATABASE_NOOP: + case METADATA_DATABASE_TIMER: + break; + case METADATA_DEL_DIMENSION: + uuid = (uuid_t *) cmd.param[0]; + if (likely(dimension_can_be_deleted(uuid, NULL, false))) + delete_dimension_uuid(uuid, NULL, false); + freez(uuid); + break; + case METADATA_STORE_CLAIM_ID: + store_claim_id((uuid_t *) cmd.param[0], (uuid_t *) cmd.param[1]); + freez((void *) cmd.param[0]); + freez((void *) cmd.param[1]); + break; + case METADATA_ADD_HOST_INFO: + host = (RRDHOST *) cmd.param[0]; + store_host_and_system_info(host, NULL); + break; + case METADATA_SCAN_HOSTS: + if (unlikely(metadata_flag_check(wc, METADATA_FLAG_PROCESSING))) + break; + + if (unittest_running) + break; + + data = mallocz(sizeof(*data)); + data->request.data = data; + data->wc = wc; + data->data = cl_cleanup_data; + data->work_buffer = work_buffer; + cl_cleanup_data = NULL; + + if (unlikely(cmd.completion)) { + data->max_count = 0; // 0 will process all pending updates + cmd.completion = NULL; // Do not complete after launching worker (worker will do) + } + else + data->max_count = 5000; + + metadata_flag_set(wc, METADATA_FLAG_PROCESSING); + if (unlikely( + uv_queue_work(loop,&data->request, + start_metadata_hosts, + after_metadata_hosts))) { + // Failed to launch worker -- let the event loop handle completion + cmd.completion = wc->scan_complete; + cl_cleanup_data = data->data; + freez(data); + metadata_flag_clear(wc, METADATA_FLAG_PROCESSING); + } + break; + case METADATA_LOAD_HOST_CONTEXT:; + if (unittest_running) + break; + + data = callocz(1,sizeof(*data)); + data->request.data = data; + data->wc = wc; + if (unlikely( + uv_queue_work(loop,&data->request, start_all_host_load_context, + after_start_host_load_context))) { + freez(data); + } + break; + case METADATA_DELETE_HOST_CHART_LABELS:; + if (!cl_cleanup_data) + cl_cleanup_data = callocz(1,sizeof(*cl_cleanup_data)); + + Pvoid_t *PValue = JudyLIns(&cl_cleanup_data->JudyL, (Word_t) ++cl_cleanup_data->count, PJE0); + if (PValue) + *PValue = (void *) cmd.param[0]; + + break; + case METADATA_UNITTEST:; + struct thread_unittest *tu = (struct thread_unittest *) cmd.param[0]; + sleep_usec(1000); // processing takes 1ms + __atomic_fetch_add(&tu->processed, 1, __ATOMIC_SEQ_CST); + break; + default: + break; + } + + if (cmd.completion) + completion_mark_complete(cmd.completion); + } while (opcode != METADATA_DATABASE_NOOP); + } + + if (!uv_timer_stop(&wc->timer_req)) + uv_close((uv_handle_t *)&wc->timer_req, NULL); + + uv_close((uv_handle_t *)&wc->async, NULL); + int rc; + do { + rc = uv_loop_close(loop); + } while (rc != UV_EBUSY); + + buffer_free(work_buffer); + freez(loop); + worker_unregister(); + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Shutting down metadata thread"); + completion_mark_complete(&wc->start_stop_complete); + if (wc->scan_complete) { + completion_destroy(wc->scan_complete); + freez(wc->scan_complete); + } + metadata_free_cmd_queue(wc); + return; + +error_after_timer_init: + uv_close((uv_handle_t *)&wc->async, NULL); +error_after_async_init: + fatal_assert(0 == uv_loop_close(loop)); +error_after_loop_init: + freez(loop); + worker_unregister(); +} + +void metadata_sync_shutdown(void) +{ + completion_init(&metasync_worker.start_stop_complete); + + struct metadata_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Sending a shutdown command"); + cmd.opcode = METADATA_SYNC_SHUTDOWN; + metadata_enq_cmd(&metasync_worker, &cmd); + + /* wait for metadata thread to shut down */ + nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Waiting for shutdown ACK"); + completion_wait_for(&metasync_worker.start_stop_complete); + completion_destroy(&metasync_worker.start_stop_complete); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Shutdown complete"); +} + +void metadata_sync_shutdown_prepare(void) +{ + static bool running = false; + if (unlikely(!metasync_worker.loop || running)) + return; + + running = true; + + struct metadata_cmd cmd; + memset(&cmd, 0, sizeof(cmd)); + + struct metadata_wc *wc = &metasync_worker; + + struct completion *compl = mallocz(sizeof(*compl)); + completion_init(compl); + __atomic_store_n(&wc->scan_complete, compl, __ATOMIC_RELAXED); + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Sending a scan host command"); + uint32_t max_wait_iterations = 2000; + while (unlikely(metadata_flag_check(&metasync_worker, METADATA_FLAG_PROCESSING)) && max_wait_iterations--) { + if (max_wait_iterations == 1999) + nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Current worker is running; waiting to finish"); + sleep_usec(1000); + } + + cmd.opcode = METADATA_SCAN_HOSTS; + metadata_enq_cmd(&metasync_worker, &cmd); + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Waiting for host scan completion"); + completion_wait_for(wc->scan_complete); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "METADATA: Host scan complete; can continue with shutdown"); +} + +// ------------------------------------------------------------- +// Init function called on agent startup + +void metadata_sync_init(void) +{ + struct metadata_wc *wc = &metasync_worker; + + memset(wc, 0, sizeof(*wc)); + completion_init(&wc->start_stop_complete); + + fatal_assert(0 == uv_thread_create(&(wc->thread), metadata_event_loop, wc)); + + completion_wait_for(&wc->start_stop_complete); + completion_destroy(&wc->start_stop_complete); + + nd_log(NDLS_DAEMON, NDLP_DEBUG, "SQLite metadata sync initialization complete"); +} + + +// Helpers + +static inline void queue_metadata_cmd(enum metadata_opcode opcode, const void *param0, const void *param1) +{ + struct metadata_cmd cmd; + cmd.opcode = opcode; + cmd.param[0] = param0; + cmd.param[1] = param1; + cmd.completion = NULL; + metadata_enq_cmd(&metasync_worker, &cmd); +} + +// Public +void metaqueue_delete_dimension_uuid(uuid_t *uuid) +{ + if (unlikely(!metasync_worker.loop)) + return; + uuid_t *use_uuid = mallocz(sizeof(*uuid)); + uuid_copy(*use_uuid, *uuid); + queue_metadata_cmd(METADATA_DEL_DIMENSION, use_uuid, NULL); +} + +void metaqueue_store_claim_id(uuid_t *host_uuid, uuid_t *claim_uuid) +{ + if (unlikely(!host_uuid)) + return; + + uuid_t *local_host_uuid = mallocz(sizeof(*host_uuid)); + uuid_t *local_claim_uuid = NULL; + + uuid_copy(*local_host_uuid, *host_uuid); + if (likely(claim_uuid)) { + local_claim_uuid = mallocz(sizeof(*claim_uuid)); + uuid_copy(*local_claim_uuid, *claim_uuid); + } + queue_metadata_cmd(METADATA_STORE_CLAIM_ID, local_host_uuid, local_claim_uuid); +} + +void metaqueue_host_update_info(RRDHOST *host) +{ + if (unlikely(!metasync_worker.loop)) + return; + queue_metadata_cmd(METADATA_ADD_HOST_INFO, host, NULL); +} + +void metaqueue_ml_load_models(RRDDIM *rd) +{ + rrddim_flag_set(rd, RRDDIM_FLAG_ML_MODEL_LOAD); +} + +void metadata_queue_load_host_context(RRDHOST *host) +{ + if (unlikely(!metasync_worker.loop)) + return; + queue_metadata_cmd(METADATA_LOAD_HOST_CONTEXT, host, NULL); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Queued command to load host contexts"); +} + +void metadata_delete_host_chart_labels(char *machine_guid) +{ + if (unlikely(!metasync_worker.loop)) { + freez(machine_guid); + return; + } + + // Node machine guid is already strdup-ed + queue_metadata_cmd(METADATA_DELETE_HOST_CHART_LABELS, machine_guid, NULL); + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Queued command delete chart labels for host %s", machine_guid); +} + + +// +// unitests +// + +static void *unittest_queue_metadata(void *arg) { + struct thread_unittest *tu = arg; + + struct metadata_cmd cmd; + cmd.opcode = METADATA_UNITTEST; + cmd.param[0] = tu; + cmd.param[1] = NULL; + cmd.completion = NULL; + metadata_enq_cmd(&metasync_worker, &cmd); + + do { + __atomic_fetch_add(&tu->added, 1, __ATOMIC_SEQ_CST); + metadata_enq_cmd(&metasync_worker, &cmd); + sleep_usec(10000); + } while (!__atomic_load_n(&tu->join, __ATOMIC_RELAXED)); + return arg; +} + +static void *metadata_unittest_threads(void) +{ + + unsigned done; + + struct thread_unittest tu = { + .join = 0, + .added = 0, + .processed = 0, + .done = &done, + }; + + // Queue messages / Time it + time_t seconds_to_run = 5; + int threads_to_create = 4; + fprintf( + stderr, + "\nChecking metadata queue using %d threads for %lld seconds...\n", + threads_to_create, + (long long)seconds_to_run); + + netdata_thread_t threads[threads_to_create]; + tu.join = 0; + for (int i = 0; i < threads_to_create; i++) { + char buf[100 + 1]; + snprintf(buf, sizeof(buf) - 1, "META[%d]", i); + netdata_thread_create( + &threads[i], + buf, + NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, + unittest_queue_metadata, + &tu); + } + (void) uv_async_send(&metasync_worker.async); + sleep_usec(seconds_to_run * USEC_PER_SEC); + + __atomic_store_n(&tu.join, 1, __ATOMIC_RELAXED); + for (int i = 0; i < threads_to_create; i++) { + void *retval; + netdata_thread_join(threads[i], &retval); + } + sleep_usec(5 * USEC_PER_SEC); + + fprintf(stderr, "Added %u elements, processed %u\n", tu.added, tu.processed); + + return 0; +} + +int metadata_unittest(void) +{ + metadata_sync_init(); + + // Queue items for a specific period of time + metadata_unittest_threads(); + + metadata_sync_shutdown(); + + return 0; +} diff --git a/src/database/sqlite/sqlite_metadata.h b/src/database/sqlite/sqlite_metadata.h new file mode 100644 index 000000000..a276f5c4d --- /dev/null +++ b/src/database/sqlite/sqlite_metadata.h @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SQLITE_METADATA_H +#define NETDATA_SQLITE_METADATA_H + +#include "sqlite3.h" +#include "sqlite_functions.h" + +// return a node list +struct node_instance_list { + uuid_t node_id; + uuid_t host_id; + char *hostname; + int live; + int queryable; + int hops; +}; + +typedef enum db_check_action_type { + DB_CHECK_NONE = (1 << 0), + DB_CHECK_RECLAIM_SPACE = (1 << 1), + DB_CHECK_ANALYZE = (1 << 2), + DB_CHECK_CONT = (1 << 3), + DB_CHECK_RECOVER = (1 << 4), +} db_check_action_type_t; + +// To initialize and shutdown +void metadata_sync_init(void); +void metadata_sync_shutdown(void); +void metadata_sync_shutdown_prepare(void); + +void metaqueue_delete_dimension_uuid(uuid_t *uuid); +void metaqueue_store_claim_id(uuid_t *host_uuid, uuid_t *claim_uuid); +void metaqueue_host_update_info(RRDHOST *host); +void metaqueue_ml_load_models(RRDDIM *rd); +void detect_machine_guid_change(uuid_t *host_uuid); +void metadata_queue_load_host_context(RRDHOST *host); +void metadata_delete_host_chart_labels(char *machine_guid); +void vacuum_database(sqlite3 *database, const char *db_alias, int threshold, int vacuum_pc); + +int sql_metadata_cache_stats(int op); + +int get_node_id(uuid_t *host_id, uuid_t *node_id); +int update_node_id(uuid_t *host_id, uuid_t *node_id); +struct node_instance_list *get_node_list(void); +void sql_load_node_id(RRDHOST *host); + +// Help build archived hosts in memory when agent starts +void sql_build_host_system_info(uuid_t *host_id, struct rrdhost_system_info *system_info); +void invalidate_node_instances(uuid_t *host_id, uuid_t *claim_id); +RRDLABELS *sql_load_host_labels(uuid_t *host_id); + +uint64_t sqlite_get_meta_space(void); +int sql_init_meta_database(db_check_action_type_t rebuild, int memory); +void sql_close_meta_database(void); + +// UNIT TEST +int metadata_unittest(void); +#endif //NETDATA_SQLITE_METADATA_H diff --git a/src/database/storage_engine.c b/src/database/storage_engine.c new file mode 100644 index 000000000..88a35b2e4 --- /dev/null +++ b/src/database/storage_engine.c @@ -0,0 +1,94 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "storage_engine.h" +#include "ram/rrddim_mem.h" +#ifdef ENABLE_DBENGINE +#include "engine/rrdengineapi.h" +#endif + +static STORAGE_ENGINE engines[] = { + { + .id = RRD_MEMORY_MODE_NONE, + .name = RRD_MEMORY_MODE_NONE_NAME, + .seb = STORAGE_ENGINE_BACKEND_RRDDIM, + .api = { + .metric_get = rrddim_metric_get, + .metric_get_or_create = rrddim_metric_get_or_create, + .metric_dup = rrddim_metric_dup, + .metric_release = rrddim_metric_release, + .metric_retention_by_uuid = rrddim_metric_retention_by_uuid, + } + }, + { + .id = RRD_MEMORY_MODE_RAM, + .name = RRD_MEMORY_MODE_RAM_NAME, + .seb = STORAGE_ENGINE_BACKEND_RRDDIM, + .api = { + .metric_get = rrddim_metric_get, + .metric_get_or_create = rrddim_metric_get_or_create, + .metric_dup = rrddim_metric_dup, + .metric_release = rrddim_metric_release, + .metric_retention_by_uuid = rrddim_metric_retention_by_uuid, + } + }, + { + .id = RRD_MEMORY_MODE_ALLOC, + .name = RRD_MEMORY_MODE_ALLOC_NAME, + .seb = STORAGE_ENGINE_BACKEND_RRDDIM, + .api = { + .metric_get = rrddim_metric_get, + .metric_get_or_create = rrddim_metric_get_or_create, + .metric_dup = rrddim_metric_dup, + .metric_release = rrddim_metric_release, + .metric_retention_by_uuid = rrddim_metric_retention_by_uuid, + } + }, +#ifdef ENABLE_DBENGINE + { + .id = RRD_MEMORY_MODE_DBENGINE, + .name = RRD_MEMORY_MODE_DBENGINE_NAME, + .seb = STORAGE_ENGINE_BACKEND_DBENGINE, + .api = { + .metric_get = rrdeng_metric_get, + .metric_get_or_create = rrdeng_metric_get_or_create, + .metric_dup = rrdeng_metric_dup, + .metric_release = rrdeng_metric_release, + .metric_retention_by_uuid = rrdeng_metric_retention_by_uuid, + } + }, +#endif + { .id = RRD_MEMORY_MODE_NONE, .name = NULL } +}; + +STORAGE_ENGINE* storage_engine_find(const char* name) +{ + for (STORAGE_ENGINE* it = engines; it->name; it++) { + if (strcmp(it->name, name) == 0) + return it; + } + return NULL; +} + +STORAGE_ENGINE* storage_engine_get(RRD_MEMORY_MODE mmode) +{ + for (STORAGE_ENGINE* it = engines; it->name; it++) { + if (it->id == mmode) + return it; + } + return NULL; +} + +STORAGE_ENGINE* storage_engine_foreach_init() +{ + // Assuming at least one engine exists + return &engines[0]; +} + +STORAGE_ENGINE* storage_engine_foreach_next(STORAGE_ENGINE* it) +{ + if (!it || !it->name) + return NULL; + + it++; + return it->name ? it : NULL; +} diff --git a/database/storage_engine.h b/src/database/storage_engine.h index b7fb7383a..b7fb7383a 100644 --- a/database/storage_engine.h +++ b/src/database/storage_engine.h |