From be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Fri, 19 Apr 2024 04:57:58 +0200 Subject: Adding upstream version 1.44.3. Signed-off-by: Daniel Baumann --- libnetdata/adaptive_resortable_list/Makefile.am | 8 + libnetdata/adaptive_resortable_list/README.md | 103 ++++++++ .../adaptive_resortable_list.c | 280 +++++++++++++++++++++ .../adaptive_resortable_list.h | 138 ++++++++++ 4 files changed, 529 insertions(+) create mode 100644 libnetdata/adaptive_resortable_list/Makefile.am create mode 100644 libnetdata/adaptive_resortable_list/README.md create mode 100644 libnetdata/adaptive_resortable_list/adaptive_resortable_list.c create mode 100644 libnetdata/adaptive_resortable_list/adaptive_resortable_list.h (limited to 'libnetdata/adaptive_resortable_list') diff --git a/libnetdata/adaptive_resortable_list/Makefile.am b/libnetdata/adaptive_resortable_list/Makefile.am new file mode 100644 index 00000000..161784b8 --- /dev/null +++ b/libnetdata/adaptive_resortable_list/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/adaptive_resortable_list/README.md b/libnetdata/adaptive_resortable_list/README.md new file mode 100644 index 00000000..ceed467d --- /dev/null +++ b/libnetdata/adaptive_resortable_list/README.md @@ -0,0 +1,103 @@ + + +# Adaptive Re-sortable List (ARL) + +This library allows Netdata to read a series of `name - value` pairs +in the **fastest possible way**. + +ARLs are used all over Netdata, as they are the most +CPU utilization efficient way to process `/proc` files. They are used to +process both vertical (csv like) and horizontal (one pair per line) `name - value` pairs. + +## How ARL works + +It maintains a linked list of all `NAME` (keywords), sorted in the +order found in the data source. The linked list is kept +sorted at all times - the data source may change at any time, the +linked list will adapt at the next iteration. + +### Initialization + +During initialization (just once), the caller: + +- calls `arl_create()` to create the ARL + +- calls `arl_expect()` multiple times to register the expected keywords + +The library will call the `processor()` function (given to +`arl_create()`), for each expected keyword found. +The default `processor()` expects `dst` to be an `unsigned long long *`. + +Each `name` keyword may have a different `processor()` (by calling +`arl_expect_custom()` instead of `arl_expect()`). + +### Data collection iterations + +For each iteration through the data source, the caller: + +- calls `arl_begin()` to initiate a data collection iteration. + This is to be called just ONCE every time the source is re-evaluated. + +- calls `arl_check()` for each entry read from the file. + +### Cleanup + +When the caller exits: + +- calls `arl_free()` to destroy this and free all memory. + +### Performance + +ARL maintains a list of `name` keywords found in the data source (even the ones +that are not useful for data collection). + +If the data source maintains the same order on the `name-value` pairs, for each +each call to `arl_check()` only an `strcmp()` is executed to verify the +expected order has not changed, a counter is incremented and a pointer is changed. +So, if the data source has 100 `name-value` pairs, and their order remains constant +over time, 100 successful `strcmp()` are executed. + +In the unlikely event that an iteration sees the data source with a different order, +for each out-of-order keyword, a full search of the remaining keywords is made. But +this search uses 32bit hashes, not string comparisons, so it should also be fast. + +When all expectations are satisfied (even in the middle of an iteration), +the call to `arl_check()` will return 1, to signal the caller to stop the loop, +saving valuable CPU resources for the rest of the data source. + +In the following test we used alternative methods to process, **1M times**, +a data source like `/proc/meminfo`, already tokenized, in memory, +to extract the same number of expected metrics: + +|test|code|string comparison|number parsing|duration| +|:--:|:--:|:---------------:|:------------:|:------:| +|1|if-else-if-else-if|`strcmp()`|`strtoull()`|4630.337 ms| +|2|nested loops|inline `simple_hash()` and `strcmp()`|`strtoull()`|1597.481 ms| +|3|nested loops|inline `simple_hash()` and `strcmp()`|`str2ull()`|923.523 ms| +|4|if-else-if-else-if|inline `simple_hash()` and `strcmp()`|`strtoull()`|854.574 ms| +|5|if-else-if-else-if|statement expression `simple_hash()` and `strcmp()`|`strtoull()`|912.013 ms| +|6|if-continue|inline `simple_hash()` and `strcmp()`|`strtoull()`|842.279 ms| +|7|if-else-if-else-if|inline `simple_hash()` and `strcmp()`|`str2ull()`|602.837 ms| +|8|ARL|ARL|`strtoull()`|350.360 ms| +|9|ARL|ARL|`str2ull()`|157.237 ms| + +Compared to unoptimized code (test No 1: 4.6sec): + +- before ARL Netdata was using test No **7** with hashing and a custom `str2ull()` to achieve 602ms. +- the current ARL implementation is test No **9** that needs only 157ms (29 times faster vs unoptimized code, about 4 times faster vs optimized code). + +[Check the source code of this test](https://raw.githubusercontent.com/netdata/netdata/master/tests/profile/benchmark-value-pairs.c). + +## Limitations + +Do not use ARL if the a name/keyword may appear more than once in the +source data. + + diff --git a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c new file mode 100644 index 00000000..b645927d --- /dev/null +++ b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +// the default processor() of the ARL +// can be overwritten at arl_create() +inline void arl_callback_str2ull(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; + (void)hash; + + register unsigned long long *d = dst; + *d = str2ull(value, NULL); + // fprintf(stderr, "name '%s' with hash %u and value '%s' is %llu\n", name, hash, value, *d); +} + +inline void arl_callback_str2kernel_uint_t(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; + (void)hash; + + register kernel_uint_t *d = dst; + *d = str2kernel_uint_t(value); + // fprintf(stderr, "name '%s' with hash %u and value '%s' is %llu\n", name, hash, value, (unsigned long long)*d); +} + +inline void arl_callback_ssize_t(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; + (void)hash; + + register ssize_t *d = dst; + *d = (ssize_t)str2ll(value, NULL); + // fprintf(stderr, "name '%s' with hash %u and value '%s' is %zd\n", name, hash, value, *d); +} + +// create a new ARL +ARL_BASE *arl_create(const char *name, void (*processor)(const char *, uint32_t, const char *, void *), size_t rechecks) { + ARL_BASE *base = callocz(1, sizeof(ARL_BASE)); + + base->name = strdupz(name); + + if(!processor) + base->processor = arl_callback_str2ull; + else + base->processor = processor; + + base->rechecks = rechecks; + + return base; +} + +void arl_free(ARL_BASE *arl_base) { + if(unlikely(!arl_base)) + return; + + while(arl_base->head) { + ARL_ENTRY *e = arl_base->head; + arl_base->head = e->next; + + freez(e->name); +#ifdef NETDATA_INTERNAL_CHECKS + memset(e, 0, sizeof(ARL_ENTRY)); +#endif + freez(e); + } + + freez(arl_base->name); + +#ifdef NETDATA_INTERNAL_CHECKS + memset(arl_base, 0, sizeof(ARL_BASE)); +#endif + + freez(arl_base); +} + +void arl_begin(ARL_BASE *base) { + +#ifdef NETDATA_INTERNAL_CHECKS + if(likely(base->iteration > 10)) { + // do these checks after the ARL has been sorted + + if(unlikely(base->relinkings > (base->expected + base->allocated))) + netdata_log_info("ARL '%s' has %zu relinkings with %zu expected and %zu allocated entries. Is the source changing so fast?" + , base->name, base->relinkings, base->expected, base->allocated); + + if(unlikely(base->slow > base->fast)) + netdata_log_info("ARL '%s' has %zu fast searches and %zu slow searches. Is the source really changing so fast?" + , base->name, base->fast, base->slow); + + /* + if(unlikely(base->iteration % 60 == 0)) { + netdata_log_info("ARL '%s' statistics: iteration %zu, expected %zu, wanted %zu, allocated %zu, fred %zu, relinkings %zu, found %zu, added %zu, fast %zu, slow %zu" + , base->name + , base->iteration + , base->expected + , base->wanted + , base->allocated + , base->fred + , base->relinkings + , base->found + , base->added + , base->fast + , base->slow + ); + // for(e = base->head; e; e = e->next) fprintf(stderr, "%s ", e->name); + // fprintf(stderr, "\n"); + } + */ + } +#endif + + if(unlikely(base->iteration > 0 && (base->added || (base->iteration % base->rechecks) == 0))) { + int wanted_equals_expected = ((base->iteration % base->rechecks) == 0); + + // fprintf(stderr, "\n\narl_begin() rechecking, added %zu, iteration %zu, rechecks %zu, wanted_equals_expected %d\n\n\n", base->added, base->iteration, base->rechecks, wanted_equals_expected); + + base->added = 0; + base->wanted = (wanted_equals_expected)?base->expected:0; + + ARL_ENTRY *e = base->head; + while(e) { + if(e->flags & ARL_ENTRY_FLAG_FOUND) { + + // remove the found flag + e->flags &= ~ARL_ENTRY_FLAG_FOUND; + + // count it in wanted + if(!wanted_equals_expected && e->flags & ARL_ENTRY_FLAG_EXPECTED) + base->wanted++; + + } + else if(e->flags & ARL_ENTRY_FLAG_DYNAMIC && !(base->head == e && !e->next)) { // not last entry + // we can remove this entry + // it is not found, and it was created because + // it was found in the source file + + // remember the next one + ARL_ENTRY *t = e->next; + + // remove it from the list + if(e->next) e->next->prev = e->prev; + if(e->prev) e->prev->next = e->next; + if(base->head == e) base->head = e->next; + + // free it + freez(e->name); + freez(e); + + // count it + base->fred++; + + // continue + e = t; + continue; + } + + e = e->next; + } + } + + if(unlikely(!base->head)) { + // hm... no nodes at all in the list #1700 + // add a fake one to prevent a crash + // this is better than checking for the existence of nodes all the time + arl_expect(base, "a-really-not-existing-source-keyword", NULL); + } + + base->iteration++; + base->next_keyword = base->head; + base->found = 0; + +} + +// register an expected keyword to the ARL +// together with its destination ( i.e. the output of the processor() ) +ARL_ENTRY *arl_expect_custom(ARL_BASE *base, const char *keyword, void (*processor)(const char *name, uint32_t hash, const char *value, void *dst), void *dst) { + ARL_ENTRY *e = callocz(1, sizeof(ARL_ENTRY)); + e->name = strdupz(keyword); + e->hash = simple_hash(e->name); + e->processor = (processor)?processor:base->processor; + e->dst = dst; + e->flags = ARL_ENTRY_FLAG_EXPECTED; + e->prev = NULL; + e->next = base->head; + + if(base->head) base->head->prev = e; + else base->next_keyword = e; + + base->head = e; + base->expected++; + base->allocated++; + + base->wanted = base->expected; + + return e; +} + +int arl_find_or_create_and_relink(ARL_BASE *base, const char *s, const char *value) { + ARL_ENTRY *e; + + uint32_t hash = simple_hash(s); + + // find if it already exists in the data + for(e = base->head; e ; e = e->next) + if(e->hash == hash && !strcmp(e->name, s)) + break; + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(base->next_keyword && e == base->next_keyword)) + fatal("Internal Error: e == base->last"); +#endif + + if(e) { + // found it in the keywords + + base->relinkings++; + + // run the processor for it + if(unlikely(e->dst)) { + e->processor(e->name, hash, value, e->dst); + base->found++; + } + + // unlink it - we will relink it below + if(e->next) e->next->prev = e->prev; + if(e->prev) e->prev->next = e->next; + + // make sure the head is properly linked + if(base->head == e) + base->head = e->next; + } + else { + // not found + + // create it + e = callocz(1, sizeof(ARL_ENTRY)); + e->name = strdupz(s); + e->hash = hash; + e->flags = ARL_ENTRY_FLAG_DYNAMIC; + + base->allocated++; + base->added++; + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(base->iteration % 60 == 0 && e->flags & ARL_ENTRY_FLAG_FOUND)) + netdata_log_info("ARL '%s': entry '%s' is already found. Did you forget to call arl_begin()?", base->name, s); +#endif + + e->flags |= ARL_ENTRY_FLAG_FOUND; + + // link it here + e->next = base->next_keyword; + if(base->next_keyword) { + e->prev = base->next_keyword->prev; + base->next_keyword->prev = e; + + if(e->prev) + e->prev->next = e; + + if(base->head == base->next_keyword) + base->head = e; + } + else { + e->prev = NULL; + + if(!base->head) + base->head = e; + } + + // prepare the next iteration + base->next_keyword = e->next; + if(unlikely(!base->next_keyword)) + base->next_keyword = base->head; + + if(unlikely(base->found == base->wanted)) { + // fprintf(stderr, "FOUND ALL WANTED 1: found = %zu, wanted = %zu, expected %zu\n", base->found, base->wanted, base->expected); + return 1; + } + + return 0; +} diff --git a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h new file mode 100644 index 00000000..bca0ff27 --- /dev/null +++ b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +#ifndef NETDATA_ADAPTIVE_RESORTABLE_LIST_H +#define NETDATA_ADAPTIVE_RESORTABLE_LIST_H 1 + +#define ARL_ENTRY_FLAG_FOUND 0x01 // the entry has been found in the source data +#define ARL_ENTRY_FLAG_EXPECTED 0x02 // the entry is expected by the program +#define ARL_ENTRY_FLAG_DYNAMIC 0x04 // the entry was dynamically allocated, from source data + +typedef struct arl_entry { + char *name; // the keywords + uint32_t hash; // the hash of the keyword + + void *dst; // the dst to pass to the processor + + uint8_t flags; // ARL_ENTRY_FLAG_* + + // the processor to do the job + void (*processor)(const char *name, uint32_t hash, const char *value, void *dst); + + // double linked list for fast re-linkings + struct arl_entry *prev, *next; +} ARL_ENTRY; + +typedef struct arl_base { + char *name; + + size_t iteration; // incremented on each iteration (arl_begin()) + size_t found; // the number of expected keywords found in this iteration + size_t expected; // the number of expected keywords + size_t wanted; // the number of wanted keywords + // i.e. the number of keywords found and expected + + size_t relinkings; // the number of relinkings we have made so far + + size_t allocated; // the number of keywords allocated + size_t fred; // the number of keywords cleaned up + + size_t rechecks; // the number of iterations between re-checks of the + // wanted number of keywords + // this is only needed in cases where the source + // is having less lines over time. + + size_t added; // it is non-zero if new keywords have been added + // this is only needed to detect new lines have + // been added to the file, over time. + +#ifdef NETDATA_INTERNAL_CHECKS + size_t fast; // the number of times we have taken the fast path + size_t slow; // the number of times we have taken the slow path +#endif + + // the processor to do the job + void (*processor)(const char *name, uint32_t hash, const char *value, void *dst); + + // the linked list of the keywords + ARL_ENTRY *head; + + // since we keep the list of keywords sorted (as found in the source data) + // this is next keyword that we expect to find in the source data. + ARL_ENTRY *next_keyword; +} ARL_BASE; + +// create a new ARL +ARL_BASE *arl_create(const char *name, void (*processor)(const char *, uint32_t, const char *, void *), size_t rechecks); + +// free an ARL +void arl_free(ARL_BASE *arl_base); + +// register an expected keyword to the ARL +// together with its destination ( i.e. the output of the processor() ) +ARL_ENTRY *arl_expect_custom(ARL_BASE *base, const char *keyword, void (*processor)(const char *name, uint32_t hash, const char *value, void *dst), void *dst); +#define arl_expect(base, keyword, dst) arl_expect_custom(base, keyword, NULL, dst) + +// an internal call to complete the check() call +int arl_find_or_create_and_relink(ARL_BASE *base, const char *s, const char *value); + +// begin an ARL iteration +void arl_begin(ARL_BASE *base); + +void arl_callback_str2ull(const char *name, uint32_t hash, const char *value, void *dst); +void arl_callback_str2kernel_uint_t(const char *name, uint32_t hash, const char *value, void *dst); +void arl_callback_ssize_t(const char *name, uint32_t hash, const char *value, void *dst); + +// check a keyword against the ARL +// this is to be called for each keyword read from source data +// s = the keyword, as collected +// src = the src data to be passed to the processor +// it is defined in the header file in order to be inlined +static inline int arl_check(ARL_BASE *base, const char *keyword, const char *value) { + ARL_ENTRY *e = base->next_keyword; + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely((base->fast + base->slow) % (base->expected + base->allocated) == 0 && (base->fast + base->slow) > (base->expected + base->allocated) * base->iteration)) + netdata_log_info("ARL '%s': Did you forget to call arl_begin()?", base->name); +#endif + + // it should be the first entry (pointed by base->next_keyword) + if(likely(!strcmp(keyword, e->name))) { + // it is + +#ifdef NETDATA_INTERNAL_CHECKS + base->fast++; +#endif + + e->flags |= ARL_ENTRY_FLAG_FOUND; + + // execute the processor + if(unlikely(e->dst)) { + e->processor(e->name, e->hash, value, e->dst); + base->found++; + } + + // be prepared for the next iteration + base->next_keyword = e->next; + if(unlikely(!base->next_keyword)) + base->next_keyword = base->head; + + // stop if we collected all the values for this iteration + if(unlikely(base->found == base->wanted)) { + // fprintf(stderr, "FOUND ALL WANTED 2: found = %zu, wanted = %zu, expected %zu\n", base->found, base->wanted, base->expected); + return 1; + } + + return 0; + } + +#ifdef NETDATA_INTERNAL_CHECKS + base->slow++; +#endif + + // we read from source, a not-expected keyword + return arl_find_or_create_and_relink(base, keyword, value); +} + +#endif //NETDATA_ADAPTIVE_RESORTABLE_LIST_H -- cgit v1.2.3