summaryrefslogtreecommitdiffstats
path: root/libnetdata/adaptive_resortable_list
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 02:57:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 02:57:58 +0000
commitbe1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 (patch)
tree9754ff1ca740f6346cf8483ec915d4054bc5da2d /libnetdata/adaptive_resortable_list
parentInitial commit. (diff)
downloadnetdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.tar.xz
netdata-be1c7e50e1e8809ea56f2c9d472eccd8ffd73a97.zip
Adding upstream version 1.44.3.upstream/1.44.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'libnetdata/adaptive_resortable_list')
-rw-r--r--libnetdata/adaptive_resortable_list/Makefile.am8
-rw-r--r--libnetdata/adaptive_resortable_list/README.md103
-rw-r--r--libnetdata/adaptive_resortable_list/adaptive_resortable_list.c280
-rw-r--r--libnetdata/adaptive_resortable_list/adaptive_resortable_list.h138
4 files changed, 529 insertions, 0 deletions
diff --git a/libnetdata/adaptive_resortable_list/Makefile.am b/libnetdata/adaptive_resortable_list/Makefile.am
new file mode 100644
index 00000000..161784b8
--- /dev/null
+++ b/libnetdata/adaptive_resortable_list/Makefile.am
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+
+AUTOMAKE_OPTIONS = subdir-objects
+MAINTAINERCLEANFILES = $(srcdir)/Makefile.in
+
+dist_noinst_DATA = \
+ README.md \
+ $(NULL)
diff --git a/libnetdata/adaptive_resortable_list/README.md b/libnetdata/adaptive_resortable_list/README.md
new file mode 100644
index 00000000..ceed467d
--- /dev/null
+++ b/libnetdata/adaptive_resortable_list/README.md
@@ -0,0 +1,103 @@
+<!--
+title: "Adaptive Re-sortable List (ARL)"
+custom_edit_url: https://github.com/netdata/netdata/edit/master/libnetdata/adaptive_resortable_list/README.md
+sidebar_label: "Adaptive Re-sortable List (ARL)"
+learn_status: "Published"
+learn_topic_type: "Tasks"
+learn_rel_path: "Developers/libnetdata"
+-->
+
+# Adaptive Re-sortable List (ARL)
+
+This library allows Netdata to read a series of `name - value` pairs
+in the **fastest possible way**.
+
+ARLs are used all over Netdata, as they are the most
+CPU utilization efficient way to process `/proc` files. They are used to
+process both vertical (csv like) and horizontal (one pair per line) `name - value` pairs.
+
+## How ARL works
+
+It maintains a linked list of all `NAME` (keywords), sorted in the
+order found in the data source. The linked list is kept
+sorted at all times - the data source may change at any time, the
+linked list will adapt at the next iteration.
+
+### Initialization
+
+During initialization (just once), the caller:
+
+- calls `arl_create()` to create the ARL
+
+- calls `arl_expect()` multiple times to register the expected keywords
+
+The library will call the `processor()` function (given to
+`arl_create()`), for each expected keyword found.
+The default `processor()` expects `dst` to be an `unsigned long long *`.
+
+Each `name` keyword may have a different `processor()` (by calling
+`arl_expect_custom()` instead of `arl_expect()`).
+
+### Data collection iterations
+
+For each iteration through the data source, the caller:
+
+- calls `arl_begin()` to initiate a data collection iteration.
+ This is to be called just ONCE every time the source is re-evaluated.
+
+- calls `arl_check()` for each entry read from the file.
+
+### Cleanup
+
+When the caller exits:
+
+- calls `arl_free()` to destroy this and free all memory.
+
+### Performance
+
+ARL maintains a list of `name` keywords found in the data source (even the ones
+that are not useful for data collection).
+
+If the data source maintains the same order on the `name-value` pairs, for each
+each call to `arl_check()` only an `strcmp()` is executed to verify the
+expected order has not changed, a counter is incremented and a pointer is changed.
+So, if the data source has 100 `name-value` pairs, and their order remains constant
+over time, 100 successful `strcmp()` are executed.
+
+In the unlikely event that an iteration sees the data source with a different order,
+for each out-of-order keyword, a full search of the remaining keywords is made. But
+this search uses 32bit hashes, not string comparisons, so it should also be fast.
+
+When all expectations are satisfied (even in the middle of an iteration),
+the call to `arl_check()` will return 1, to signal the caller to stop the loop,
+saving valuable CPU resources for the rest of the data source.
+
+In the following test we used alternative methods to process, **1M times**,
+a data source like `/proc/meminfo`, already tokenized, in memory,
+to extract the same number of expected metrics:
+
+|test|code|string comparison|number parsing|duration|
+|:--:|:--:|:---------------:|:------------:|:------:|
+|1|if-else-if-else-if|`strcmp()`|`strtoull()`|4630.337 ms|
+|2|nested loops|inline `simple_hash()` and `strcmp()`|`strtoull()`|1597.481 ms|
+|3|nested loops|inline `simple_hash()` and `strcmp()`|`str2ull()`|923.523 ms|
+|4|if-else-if-else-if|inline `simple_hash()` and `strcmp()`|`strtoull()`|854.574 ms|
+|5|if-else-if-else-if|statement expression `simple_hash()` and `strcmp()`|`strtoull()`|912.013 ms|
+|6|if-continue|inline `simple_hash()` and `strcmp()`|`strtoull()`|842.279 ms|
+|7|if-else-if-else-if|inline `simple_hash()` and `strcmp()`|`str2ull()`|602.837 ms|
+|8|ARL|ARL|`strtoull()`|350.360 ms|
+|9|ARL|ARL|`str2ull()`|157.237 ms|
+
+Compared to unoptimized code (test No 1: 4.6sec):
+
+- before ARL Netdata was using test No **7** with hashing and a custom `str2ull()` to achieve 602ms.
+- the current ARL implementation is test No **9** that needs only 157ms (29 times faster vs unoptimized code, about 4 times faster vs optimized code).
+
+[Check the source code of this test](https://raw.githubusercontent.com/netdata/netdata/master/tests/profile/benchmark-value-pairs.c).
+
+## Limitations
+
+Do not use ARL if the a name/keyword may appear more than once in the
+source data.
+
+
diff --git a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c
new file mode 100644
index 00000000..b645927d
--- /dev/null
+++ b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c
@@ -0,0 +1,280 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "../libnetdata.h"
+
+// the default processor() of the ARL
+// can be overwritten at arl_create()
+inline void arl_callback_str2ull(const char *name, uint32_t hash, const char *value, void *dst) {
+ (void)name;
+ (void)hash;
+
+ register unsigned long long *d = dst;
+ *d = str2ull(value, NULL);
+ // fprintf(stderr, "name '%s' with hash %u and value '%s' is %llu\n", name, hash, value, *d);
+}
+
+inline void arl_callback_str2kernel_uint_t(const char *name, uint32_t hash, const char *value, void *dst) {
+ (void)name;
+ (void)hash;
+
+ register kernel_uint_t *d = dst;
+ *d = str2kernel_uint_t(value);
+ // fprintf(stderr, "name '%s' with hash %u and value '%s' is %llu\n", name, hash, value, (unsigned long long)*d);
+}
+
+inline void arl_callback_ssize_t(const char *name, uint32_t hash, const char *value, void *dst) {
+ (void)name;
+ (void)hash;
+
+ register ssize_t *d = dst;
+ *d = (ssize_t)str2ll(value, NULL);
+ // fprintf(stderr, "name '%s' with hash %u and value '%s' is %zd\n", name, hash, value, *d);
+}
+
+// create a new ARL
+ARL_BASE *arl_create(const char *name, void (*processor)(const char *, uint32_t, const char *, void *), size_t rechecks) {
+ ARL_BASE *base = callocz(1, sizeof(ARL_BASE));
+
+ base->name = strdupz(name);
+
+ if(!processor)
+ base->processor = arl_callback_str2ull;
+ else
+ base->processor = processor;
+
+ base->rechecks = rechecks;
+
+ return base;
+}
+
+void arl_free(ARL_BASE *arl_base) {
+ if(unlikely(!arl_base))
+ return;
+
+ while(arl_base->head) {
+ ARL_ENTRY *e = arl_base->head;
+ arl_base->head = e->next;
+
+ freez(e->name);
+#ifdef NETDATA_INTERNAL_CHECKS
+ memset(e, 0, sizeof(ARL_ENTRY));
+#endif
+ freez(e);
+ }
+
+ freez(arl_base->name);
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ memset(arl_base, 0, sizeof(ARL_BASE));
+#endif
+
+ freez(arl_base);
+}
+
+void arl_begin(ARL_BASE *base) {
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ if(likely(base->iteration > 10)) {
+ // do these checks after the ARL has been sorted
+
+ if(unlikely(base->relinkings > (base->expected + base->allocated)))
+ netdata_log_info("ARL '%s' has %zu relinkings with %zu expected and %zu allocated entries. Is the source changing so fast?"
+ , base->name, base->relinkings, base->expected, base->allocated);
+
+ if(unlikely(base->slow > base->fast))
+ netdata_log_info("ARL '%s' has %zu fast searches and %zu slow searches. Is the source really changing so fast?"
+ , base->name, base->fast, base->slow);
+
+ /*
+ if(unlikely(base->iteration % 60 == 0)) {
+ netdata_log_info("ARL '%s' statistics: iteration %zu, expected %zu, wanted %zu, allocated %zu, fred %zu, relinkings %zu, found %zu, added %zu, fast %zu, slow %zu"
+ , base->name
+ , base->iteration
+ , base->expected
+ , base->wanted
+ , base->allocated
+ , base->fred
+ , base->relinkings
+ , base->found
+ , base->added
+ , base->fast
+ , base->slow
+ );
+ // for(e = base->head; e; e = e->next) fprintf(stderr, "%s ", e->name);
+ // fprintf(stderr, "\n");
+ }
+ */
+ }
+#endif
+
+ if(unlikely(base->iteration > 0 && (base->added || (base->iteration % base->rechecks) == 0))) {
+ int wanted_equals_expected = ((base->iteration % base->rechecks) == 0);
+
+ // fprintf(stderr, "\n\narl_begin() rechecking, added %zu, iteration %zu, rechecks %zu, wanted_equals_expected %d\n\n\n", base->added, base->iteration, base->rechecks, wanted_equals_expected);
+
+ base->added = 0;
+ base->wanted = (wanted_equals_expected)?base->expected:0;
+
+ ARL_ENTRY *e = base->head;
+ while(e) {
+ if(e->flags & ARL_ENTRY_FLAG_FOUND) {
+
+ // remove the found flag
+ e->flags &= ~ARL_ENTRY_FLAG_FOUND;
+
+ // count it in wanted
+ if(!wanted_equals_expected && e->flags & ARL_ENTRY_FLAG_EXPECTED)
+ base->wanted++;
+
+ }
+ else if(e->flags & ARL_ENTRY_FLAG_DYNAMIC && !(base->head == e && !e->next)) { // not last entry
+ // we can remove this entry
+ // it is not found, and it was created because
+ // it was found in the source file
+
+ // remember the next one
+ ARL_ENTRY *t = e->next;
+
+ // remove it from the list
+ if(e->next) e->next->prev = e->prev;
+ if(e->prev) e->prev->next = e->next;
+ if(base->head == e) base->head = e->next;
+
+ // free it
+ freez(e->name);
+ freez(e);
+
+ // count it
+ base->fred++;
+
+ // continue
+ e = t;
+ continue;
+ }
+
+ e = e->next;
+ }
+ }
+
+ if(unlikely(!base->head)) {
+ // hm... no nodes at all in the list #1700
+ // add a fake one to prevent a crash
+ // this is better than checking for the existence of nodes all the time
+ arl_expect(base, "a-really-not-existing-source-keyword", NULL);
+ }
+
+ base->iteration++;
+ base->next_keyword = base->head;
+ base->found = 0;
+
+}
+
+// register an expected keyword to the ARL
+// together with its destination ( i.e. the output of the processor() )
+ARL_ENTRY *arl_expect_custom(ARL_BASE *base, const char *keyword, void (*processor)(const char *name, uint32_t hash, const char *value, void *dst), void *dst) {
+ ARL_ENTRY *e = callocz(1, sizeof(ARL_ENTRY));
+ e->name = strdupz(keyword);
+ e->hash = simple_hash(e->name);
+ e->processor = (processor)?processor:base->processor;
+ e->dst = dst;
+ e->flags = ARL_ENTRY_FLAG_EXPECTED;
+ e->prev = NULL;
+ e->next = base->head;
+
+ if(base->head) base->head->prev = e;
+ else base->next_keyword = e;
+
+ base->head = e;
+ base->expected++;
+ base->allocated++;
+
+ base->wanted = base->expected;
+
+ return e;
+}
+
+int arl_find_or_create_and_relink(ARL_BASE *base, const char *s, const char *value) {
+ ARL_ENTRY *e;
+
+ uint32_t hash = simple_hash(s);
+
+ // find if it already exists in the data
+ for(e = base->head; e ; e = e->next)
+ if(e->hash == hash && !strcmp(e->name, s))
+ break;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ if(unlikely(base->next_keyword && e == base->next_keyword))
+ fatal("Internal Error: e == base->last");
+#endif
+
+ if(e) {
+ // found it in the keywords
+
+ base->relinkings++;
+
+ // run the processor for it
+ if(unlikely(e->dst)) {
+ e->processor(e->name, hash, value, e->dst);
+ base->found++;
+ }
+
+ // unlink it - we will relink it below
+ if(e->next) e->next->prev = e->prev;
+ if(e->prev) e->prev->next = e->next;
+
+ // make sure the head is properly linked
+ if(base->head == e)
+ base->head = e->next;
+ }
+ else {
+ // not found
+
+ // create it
+ e = callocz(1, sizeof(ARL_ENTRY));
+ e->name = strdupz(s);
+ e->hash = hash;
+ e->flags = ARL_ENTRY_FLAG_DYNAMIC;
+
+ base->allocated++;
+ base->added++;
+ }
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ if(unlikely(base->iteration % 60 == 0 && e->flags & ARL_ENTRY_FLAG_FOUND))
+ netdata_log_info("ARL '%s': entry '%s' is already found. Did you forget to call arl_begin()?", base->name, s);
+#endif
+
+ e->flags |= ARL_ENTRY_FLAG_FOUND;
+
+ // link it here
+ e->next = base->next_keyword;
+ if(base->next_keyword) {
+ e->prev = base->next_keyword->prev;
+ base->next_keyword->prev = e;
+
+ if(e->prev)
+ e->prev->next = e;
+
+ if(base->head == base->next_keyword)
+ base->head = e;
+ }
+ else {
+ e->prev = NULL;
+
+ if(!base->head)
+ base->head = e;
+ }
+
+ // prepare the next iteration
+ base->next_keyword = e->next;
+ if(unlikely(!base->next_keyword))
+ base->next_keyword = base->head;
+
+ if(unlikely(base->found == base->wanted)) {
+ // fprintf(stderr, "FOUND ALL WANTED 1: found = %zu, wanted = %zu, expected %zu\n", base->found, base->wanted, base->expected);
+ return 1;
+ }
+
+ return 0;
+}
diff --git a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h
new file mode 100644
index 00000000..bca0ff27
--- /dev/null
+++ b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-3.0-or-later
+
+#include "../libnetdata.h"
+
+#ifndef NETDATA_ADAPTIVE_RESORTABLE_LIST_H
+#define NETDATA_ADAPTIVE_RESORTABLE_LIST_H 1
+
+#define ARL_ENTRY_FLAG_FOUND 0x01 // the entry has been found in the source data
+#define ARL_ENTRY_FLAG_EXPECTED 0x02 // the entry is expected by the program
+#define ARL_ENTRY_FLAG_DYNAMIC 0x04 // the entry was dynamically allocated, from source data
+
+typedef struct arl_entry {
+ char *name; // the keywords
+ uint32_t hash; // the hash of the keyword
+
+ void *dst; // the dst to pass to the processor
+
+ uint8_t flags; // ARL_ENTRY_FLAG_*
+
+ // the processor to do the job
+ void (*processor)(const char *name, uint32_t hash, const char *value, void *dst);
+
+ // double linked list for fast re-linkings
+ struct arl_entry *prev, *next;
+} ARL_ENTRY;
+
+typedef struct arl_base {
+ char *name;
+
+ size_t iteration; // incremented on each iteration (arl_begin())
+ size_t found; // the number of expected keywords found in this iteration
+ size_t expected; // the number of expected keywords
+ size_t wanted; // the number of wanted keywords
+ // i.e. the number of keywords found and expected
+
+ size_t relinkings; // the number of relinkings we have made so far
+
+ size_t allocated; // the number of keywords allocated
+ size_t fred; // the number of keywords cleaned up
+
+ size_t rechecks; // the number of iterations between re-checks of the
+ // wanted number of keywords
+ // this is only needed in cases where the source
+ // is having less lines over time.
+
+ size_t added; // it is non-zero if new keywords have been added
+ // this is only needed to detect new lines have
+ // been added to the file, over time.
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ size_t fast; // the number of times we have taken the fast path
+ size_t slow; // the number of times we have taken the slow path
+#endif
+
+ // the processor to do the job
+ void (*processor)(const char *name, uint32_t hash, const char *value, void *dst);
+
+ // the linked list of the keywords
+ ARL_ENTRY *head;
+
+ // since we keep the list of keywords sorted (as found in the source data)
+ // this is next keyword that we expect to find in the source data.
+ ARL_ENTRY *next_keyword;
+} ARL_BASE;
+
+// create a new ARL
+ARL_BASE *arl_create(const char *name, void (*processor)(const char *, uint32_t, const char *, void *), size_t rechecks);
+
+// free an ARL
+void arl_free(ARL_BASE *arl_base);
+
+// register an expected keyword to the ARL
+// together with its destination ( i.e. the output of the processor() )
+ARL_ENTRY *arl_expect_custom(ARL_BASE *base, const char *keyword, void (*processor)(const char *name, uint32_t hash, const char *value, void *dst), void *dst);
+#define arl_expect(base, keyword, dst) arl_expect_custom(base, keyword, NULL, dst)
+
+// an internal call to complete the check() call
+int arl_find_or_create_and_relink(ARL_BASE *base, const char *s, const char *value);
+
+// begin an ARL iteration
+void arl_begin(ARL_BASE *base);
+
+void arl_callback_str2ull(const char *name, uint32_t hash, const char *value, void *dst);
+void arl_callback_str2kernel_uint_t(const char *name, uint32_t hash, const char *value, void *dst);
+void arl_callback_ssize_t(const char *name, uint32_t hash, const char *value, void *dst);
+
+// check a keyword against the ARL
+// this is to be called for each keyword read from source data
+// s = the keyword, as collected
+// src = the src data to be passed to the processor
+// it is defined in the header file in order to be inlined
+static inline int arl_check(ARL_BASE *base, const char *keyword, const char *value) {
+ ARL_ENTRY *e = base->next_keyword;
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ if(unlikely((base->fast + base->slow) % (base->expected + base->allocated) == 0 && (base->fast + base->slow) > (base->expected + base->allocated) * base->iteration))
+ netdata_log_info("ARL '%s': Did you forget to call arl_begin()?", base->name);
+#endif
+
+ // it should be the first entry (pointed by base->next_keyword)
+ if(likely(!strcmp(keyword, e->name))) {
+ // it is
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ base->fast++;
+#endif
+
+ e->flags |= ARL_ENTRY_FLAG_FOUND;
+
+ // execute the processor
+ if(unlikely(e->dst)) {
+ e->processor(e->name, e->hash, value, e->dst);
+ base->found++;
+ }
+
+ // be prepared for the next iteration
+ base->next_keyword = e->next;
+ if(unlikely(!base->next_keyword))
+ base->next_keyword = base->head;
+
+ // stop if we collected all the values for this iteration
+ if(unlikely(base->found == base->wanted)) {
+ // fprintf(stderr, "FOUND ALL WANTED 2: found = %zu, wanted = %zu, expected %zu\n", base->found, base->wanted, base->expected);
+ return 1;
+ }
+
+ return 0;
+ }
+
+#ifdef NETDATA_INTERNAL_CHECKS
+ base->slow++;
+#endif
+
+ // we read from source, a not-expected keyword
+ return arl_find_or_create_and_relink(base, keyword, value);
+}
+
+#endif //NETDATA_ADAPTIVE_RESORTABLE_LIST_H