From 8020f71afd34d7696d7933659df2d763ab05542f Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 16:31:17 +0200 Subject: Adding upstream version 1.37.1. Signed-off-by: Daniel Baumann --- libnetdata/Makefile.am | 37 + libnetdata/README.md | 10 + libnetdata/adaptive_resortable_list/Makefile.am | 8 + libnetdata/adaptive_resortable_list/README.md | 99 + .../adaptive_resortable_list.c | 280 ++ .../adaptive_resortable_list.h | 138 + libnetdata/arrayalloc/Makefile.am | 8 + libnetdata/arrayalloc/README.md | 7 + libnetdata/arrayalloc/arrayalloc.c | 489 +++ libnetdata/arrayalloc/arrayalloc.h | 48 + libnetdata/avl/Makefile.am | 8 + libnetdata/avl/README.md | 17 + libnetdata/avl/avl.c | 420 +++ libnetdata/avl/avl.h | 92 + libnetdata/buffer/Makefile.am | 8 + libnetdata/buffer/README.md | 16 + libnetdata/buffer/buffer.c | 525 +++ libnetdata/buffer/buffer.h | 89 + libnetdata/circular_buffer/Makefile.am | 8 + libnetdata/circular_buffer/README.md | 10 + libnetdata/circular_buffer/circular_buffer.c | 96 + libnetdata/circular_buffer/circular_buffer.h | 19 + libnetdata/clocks/Makefile.am | 8 + libnetdata/clocks/README.md | 5 + libnetdata/clocks/clocks.c | 432 +++ libnetdata/clocks/clocks.h | 160 + libnetdata/completion/Makefile.am | 4 + libnetdata/completion/completion.c | 34 + libnetdata/completion/completion.h | 22 + libnetdata/config/Makefile.am | 8 + libnetdata/config/README.md | 54 + libnetdata/config/appconfig.c | 960 ++++++ libnetdata/config/appconfig.h | 219 ++ libnetdata/dictionary/Makefile.am | 8 + libnetdata/dictionary/README.md | 231 ++ libnetdata/dictionary/dictionary.c | 3620 ++++++++++++++++++++ libnetdata/dictionary/dictionary.h | 323 ++ libnetdata/ebpf/Makefile.am | 8 + libnetdata/ebpf/README.md | 5 + libnetdata/ebpf/ebpf.c | 1427 ++++++++ libnetdata/ebpf/ebpf.h | 371 ++ libnetdata/eval/Makefile.am | 8 + libnetdata/eval/README.md | 1 + libnetdata/eval/eval.c | 1201 +++++++ libnetdata/eval/eval.h | 87 + libnetdata/health/Makefile.am | 7 + libnetdata/health/health.c | 173 + libnetdata/health/health.h | 55 + libnetdata/inlined.h | 269 ++ libnetdata/json/Makefile.am | 8 + libnetdata/json/README.md | 10 + libnetdata/json/jsmn.c | 328 ++ libnetdata/json/jsmn.h | 75 + libnetdata/json/json.c | 557 +++ libnetdata/json/json.h | 77 + libnetdata/libjudy/src/Judy.h | 622 ++++ libnetdata/libjudy/src/JudyCommon/JudyMalloc.c | 87 + libnetdata/libjudy/src/JudyCommon/JudyPrivate.h | 1613 +++++++++ libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h | 485 +++ .../libjudy/src/JudyCommon/JudyPrivateBranch.h | 788 +++++ libnetdata/libjudy/src/JudyHS/JudyHS.c | 771 +++++ libnetdata/libjudy/src/JudyL/JudyL.h | 505 +++ libnetdata/libjudy/src/JudyL/JudyLByCount.c | 954 ++++++ libnetdata/libjudy/src/JudyL/JudyLCascade.c | 1942 +++++++++++ libnetdata/libjudy/src/JudyL/JudyLCount.c | 1195 +++++++ libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c | 314 ++ libnetdata/libjudy/src/JudyL/JudyLDecascade.c | 1206 +++++++ libnetdata/libjudy/src/JudyL/JudyLDel.c | 2146 ++++++++++++ libnetdata/libjudy/src/JudyL/JudyLFirst.c | 213 ++ libnetdata/libjudy/src/JudyL/JudyLFreeArray.c | 363 ++ libnetdata/libjudy/src/JudyL/JudyLGet.c | 1094 ++++++ libnetdata/libjudy/src/JudyL/JudyLIns.c | 1873 ++++++++++ libnetdata/libjudy/src/JudyL/JudyLInsArray.c | 1178 +++++++ libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c | 135 + libnetdata/libjudy/src/JudyL/JudyLMallocIF.c | 782 +++++ libnetdata/libjudy/src/JudyL/JudyLMemActive.c | 259 ++ libnetdata/libjudy/src/JudyL/JudyLMemUsed.c | 61 + libnetdata/libjudy/src/JudyL/JudyLNext.c | 1890 ++++++++++ libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c | 1390 ++++++++ libnetdata/libjudy/src/JudyL/JudyLPrev.c | 1890 ++++++++++ libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c | 1390 ++++++++ libnetdata/libjudy/src/JudyL/JudyLTablesGen.c | 296 ++ libnetdata/libjudy/src/JudyL/j__udyLGet.c | 1094 ++++++ libnetdata/libnetdata.c | 1936 +++++++++++ libnetdata/libnetdata.h | 507 +++ libnetdata/locks/Makefile.am | 8 + libnetdata/locks/README.md | 100 + libnetdata/locks/locks.c | 757 ++++ libnetdata/locks/locks.h | 127 + libnetdata/log/Makefile.am | 8 + libnetdata/log/README.md | 5 + libnetdata/log/log.c | 1084 ++++++ libnetdata/log/log.h | 141 + libnetdata/onewayalloc/Makefile.am | 8 + libnetdata/onewayalloc/README.md | 71 + libnetdata/onewayalloc/onewayalloc.c | 193 ++ libnetdata/onewayalloc/onewayalloc.h | 19 + libnetdata/os.c | 230 ++ libnetdata/os.h | 67 + libnetdata/popen/Makefile.am | 8 + libnetdata/popen/README.md | 5 + libnetdata/popen/popen.c | 460 +++ libnetdata/popen/popen.h | 40 + libnetdata/procfile/Makefile.am | 8 + libnetdata/procfile/README.md | 67 + libnetdata/procfile/procfile.c | 479 +++ libnetdata/procfile/procfile.h | 108 + libnetdata/required_dummies.h | 43 + libnetdata/simple_pattern/Makefile.am | 8 + libnetdata/simple_pattern/README.md | 43 + libnetdata/simple_pattern/simple_pattern.c | 363 ++ libnetdata/simple_pattern/simple_pattern.h | 42 + libnetdata/socket/Makefile.am | 8 + libnetdata/socket/README.md | 5 + libnetdata/socket/security.c | 390 +++ libnetdata/socket/security.h | 63 + libnetdata/socket/socket.c | 1914 +++++++++++ libnetdata/socket/socket.h | 215 ++ libnetdata/statistical/Makefile.am | 8 + libnetdata/statistical/README.md | 5 + libnetdata/statistical/statistical.c | 460 +++ libnetdata/statistical/statistical.h | 34 + libnetdata/storage_number/Makefile.am | 12 + libnetdata/storage_number/README.md | 17 + libnetdata/storage_number/storage_number.c | 231 ++ libnetdata/storage_number/storage_number.h | 232 ++ libnetdata/storage_number/tests/Makefile.am | 4 + .../storage_number/tests/test_storage_number.c | 52 + libnetdata/string/Makefile.am | 8 + libnetdata/string/README.md | 20 + libnetdata/string/string.c | 596 ++++ libnetdata/string/string.h | 30 + libnetdata/string/utf8.h | 9 + libnetdata/tests/Makefile.am | 4 + libnetdata/tests/test_str2ld.c | 48 + libnetdata/threads/Makefile.am | 8 + libnetdata/threads/README.md | 5 + libnetdata/threads/threads.c | 267 ++ libnetdata/threads/threads.h | 49 + libnetdata/url/Makefile.am | 8 + libnetdata/url/README.md | 5 + libnetdata/url/url.c | 391 +++ libnetdata/url/url.h | 35 + libnetdata/worker_utilization/Makefile.am | 8 + libnetdata/worker_utilization/README.md | 90 + libnetdata/worker_utilization/worker_utilization.c | 362 ++ libnetdata/worker_utilization/worker_utilization.h | 47 + 147 files changed, 52298 insertions(+) create mode 100644 libnetdata/Makefile.am create mode 100644 libnetdata/README.md create mode 100644 libnetdata/adaptive_resortable_list/Makefile.am create mode 100644 libnetdata/adaptive_resortable_list/README.md create mode 100644 libnetdata/adaptive_resortable_list/adaptive_resortable_list.c create mode 100644 libnetdata/adaptive_resortable_list/adaptive_resortable_list.h create mode 100644 libnetdata/arrayalloc/Makefile.am create mode 100644 libnetdata/arrayalloc/README.md create mode 100644 libnetdata/arrayalloc/arrayalloc.c create mode 100644 libnetdata/arrayalloc/arrayalloc.h create mode 100644 libnetdata/avl/Makefile.am create mode 100644 libnetdata/avl/README.md create mode 100644 libnetdata/avl/avl.c create mode 100644 libnetdata/avl/avl.h create mode 100644 libnetdata/buffer/Makefile.am create mode 100644 libnetdata/buffer/README.md create mode 100644 libnetdata/buffer/buffer.c create mode 100644 libnetdata/buffer/buffer.h create mode 100644 libnetdata/circular_buffer/Makefile.am create mode 100644 libnetdata/circular_buffer/README.md create mode 100644 libnetdata/circular_buffer/circular_buffer.c create mode 100644 libnetdata/circular_buffer/circular_buffer.h create mode 100644 libnetdata/clocks/Makefile.am create mode 100644 libnetdata/clocks/README.md create mode 100644 libnetdata/clocks/clocks.c create mode 100644 libnetdata/clocks/clocks.h create mode 100644 libnetdata/completion/Makefile.am create mode 100644 libnetdata/completion/completion.c create mode 100644 libnetdata/completion/completion.h create mode 100644 libnetdata/config/Makefile.am create mode 100644 libnetdata/config/README.md create mode 100644 libnetdata/config/appconfig.c create mode 100644 libnetdata/config/appconfig.h create mode 100644 libnetdata/dictionary/Makefile.am create mode 100644 libnetdata/dictionary/README.md create mode 100644 libnetdata/dictionary/dictionary.c create mode 100644 libnetdata/dictionary/dictionary.h create mode 100644 libnetdata/ebpf/Makefile.am create mode 100644 libnetdata/ebpf/README.md create mode 100644 libnetdata/ebpf/ebpf.c create mode 100644 libnetdata/ebpf/ebpf.h create mode 100644 libnetdata/eval/Makefile.am create mode 100644 libnetdata/eval/README.md create mode 100644 libnetdata/eval/eval.c create mode 100644 libnetdata/eval/eval.h create mode 100644 libnetdata/health/Makefile.am create mode 100644 libnetdata/health/health.c create mode 100644 libnetdata/health/health.h create mode 100644 libnetdata/inlined.h create mode 100644 libnetdata/json/Makefile.am create mode 100644 libnetdata/json/README.md create mode 100644 libnetdata/json/jsmn.c create mode 100644 libnetdata/json/jsmn.h create mode 100644 libnetdata/json/json.c create mode 100644 libnetdata/json/json.h create mode 100644 libnetdata/libjudy/src/Judy.h create mode 100644 libnetdata/libjudy/src/JudyCommon/JudyMalloc.c create mode 100644 libnetdata/libjudy/src/JudyCommon/JudyPrivate.h create mode 100644 libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h create mode 100644 libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h create mode 100644 libnetdata/libjudy/src/JudyHS/JudyHS.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyL.h create mode 100644 libnetdata/libjudy/src/JudyL/JudyLByCount.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLCascade.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLCount.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLDecascade.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLDel.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLFirst.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLFreeArray.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLGet.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLIns.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLInsArray.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLMallocIF.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLMemActive.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLMemUsed.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLNext.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLPrev.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c create mode 100644 libnetdata/libjudy/src/JudyL/JudyLTablesGen.c create mode 100644 libnetdata/libjudy/src/JudyL/j__udyLGet.c create mode 100644 libnetdata/libnetdata.c create mode 100644 libnetdata/libnetdata.h create mode 100644 libnetdata/locks/Makefile.am create mode 100644 libnetdata/locks/README.md create mode 100644 libnetdata/locks/locks.c create mode 100644 libnetdata/locks/locks.h create mode 100644 libnetdata/log/Makefile.am create mode 100644 libnetdata/log/README.md create mode 100644 libnetdata/log/log.c create mode 100644 libnetdata/log/log.h create mode 100644 libnetdata/onewayalloc/Makefile.am create mode 100644 libnetdata/onewayalloc/README.md create mode 100644 libnetdata/onewayalloc/onewayalloc.c create mode 100644 libnetdata/onewayalloc/onewayalloc.h create mode 100644 libnetdata/os.c create mode 100644 libnetdata/os.h create mode 100644 libnetdata/popen/Makefile.am create mode 100644 libnetdata/popen/README.md create mode 100644 libnetdata/popen/popen.c create mode 100644 libnetdata/popen/popen.h create mode 100644 libnetdata/procfile/Makefile.am create mode 100644 libnetdata/procfile/README.md create mode 100644 libnetdata/procfile/procfile.c create mode 100644 libnetdata/procfile/procfile.h create mode 100644 libnetdata/required_dummies.h create mode 100644 libnetdata/simple_pattern/Makefile.am create mode 100644 libnetdata/simple_pattern/README.md create mode 100644 libnetdata/simple_pattern/simple_pattern.c create mode 100644 libnetdata/simple_pattern/simple_pattern.h create mode 100644 libnetdata/socket/Makefile.am create mode 100644 libnetdata/socket/README.md create mode 100644 libnetdata/socket/security.c create mode 100644 libnetdata/socket/security.h create mode 100644 libnetdata/socket/socket.c create mode 100644 libnetdata/socket/socket.h create mode 100644 libnetdata/statistical/Makefile.am create mode 100644 libnetdata/statistical/README.md create mode 100644 libnetdata/statistical/statistical.c create mode 100644 libnetdata/statistical/statistical.h create mode 100644 libnetdata/storage_number/Makefile.am create mode 100644 libnetdata/storage_number/README.md create mode 100644 libnetdata/storage_number/storage_number.c create mode 100644 libnetdata/storage_number/storage_number.h create mode 100644 libnetdata/storage_number/tests/Makefile.am create mode 100644 libnetdata/storage_number/tests/test_storage_number.c create mode 100644 libnetdata/string/Makefile.am create mode 100644 libnetdata/string/README.md create mode 100644 libnetdata/string/string.c create mode 100644 libnetdata/string/string.h create mode 100644 libnetdata/string/utf8.h create mode 100644 libnetdata/tests/Makefile.am create mode 100644 libnetdata/tests/test_str2ld.c create mode 100644 libnetdata/threads/Makefile.am create mode 100644 libnetdata/threads/README.md create mode 100644 libnetdata/threads/threads.c create mode 100644 libnetdata/threads/threads.h create mode 100644 libnetdata/url/Makefile.am create mode 100644 libnetdata/url/README.md create mode 100644 libnetdata/url/url.c create mode 100644 libnetdata/url/url.h create mode 100644 libnetdata/worker_utilization/Makefile.am create mode 100644 libnetdata/worker_utilization/README.md create mode 100644 libnetdata/worker_utilization/worker_utilization.c create mode 100644 libnetdata/worker_utilization/worker_utilization.h (limited to 'libnetdata') diff --git a/libnetdata/Makefile.am b/libnetdata/Makefile.am new file mode 100644 index 0000000..1208d16 --- /dev/null +++ b/libnetdata/Makefile.am @@ -0,0 +1,37 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +SUBDIRS = \ + adaptive_resortable_list \ + arrayalloc \ + avl \ + buffer \ + clocks \ + completion \ + config \ + dictionary \ + ebpf \ + eval \ + json \ + health \ + locks \ + log \ + onewayalloc \ + popen \ + procfile \ + simple_pattern \ + socket \ + statistical \ + storage_number \ + string \ + threads \ + url \ + worker_utilization \ + tests \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/README.md b/libnetdata/README.md new file mode 100644 index 0000000..fe0690d --- /dev/null +++ b/libnetdata/README.md @@ -0,0 +1,10 @@ + + +# libnetdata + +`libnetdata` is a collection of library code that is used by all Netdata `C` programs. + + diff --git a/libnetdata/adaptive_resortable_list/Makefile.am b/libnetdata/adaptive_resortable_list/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/adaptive_resortable_list/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/adaptive_resortable_list/README.md b/libnetdata/adaptive_resortable_list/README.md new file mode 100644 index 0000000..9eb942b --- /dev/null +++ b/libnetdata/adaptive_resortable_list/README.md @@ -0,0 +1,99 @@ + + +# Adaptive Re-sortable List (ARL) + +This library allows Netdata to read a series of `name - value` pairs +in the **fastest possible way**. + +ARLs are used all over Netdata, as they are the most +CPU utilization efficient way to process `/proc` files. They are used to +process both vertical (csv like) and horizontal (one pair per line) `name - value` pairs. + +## How ARL works + +It maintains a linked list of all `NAME` (keywords), sorted in the +order found in the data source. The linked list is kept +sorted at all times - the data source may change at any time, the +linked list will adapt at the next iteration. + +### Initialization + +During initialization (just once), the caller: + +- calls `arl_create()` to create the ARL + +- calls `arl_expect()` multiple times to register the expected keywords + +The library will call the `processor()` function (given to +`arl_create()`), for each expected keyword found. +The default `processor()` expects `dst` to be an `unsigned long long *`. + +Each `name` keyword may have a different `processor()` (by calling +`arl_expect_custom()` instead of `arl_expect()`). + +### Data collection iterations + +For each iteration through the data source, the caller: + +- calls `arl_begin()` to initiate a data collection iteration. + This is to be called just ONCE every time the source is re-evaluated. + +- calls `arl_check()` for each entry read from the file. + +### Cleanup + +When the caller exits: + +- calls `arl_free()` to destroy this and free all memory. + +### Performance + +ARL maintains a list of `name` keywords found in the data source (even the ones +that are not useful for data collection). + +If the data source maintains the same order on the `name-value` pairs, for each +each call to `arl_check()` only an `strcmp()` is executed to verify the +expected order has not changed, a counter is incremented and a pointer is changed. +So, if the data source has 100 `name-value` pairs, and their order remains constant +over time, 100 successful `strcmp()` are executed. + +In the unlikely event that an iteration sees the data source with a different order, +for each out-of-order keyword, a full search of the remaining keywords is made. But +this search uses 32bit hashes, not string comparisons, so it should also be fast. + +When all expectations are satisfied (even in the middle of an iteration), +the call to `arl_check()` will return 1, to signal the caller to stop the loop, +saving valuable CPU resources for the rest of the data source. + +In the following test we used alternative methods to process, **1M times**, +a data source like `/proc/meminfo`, already tokenized, in memory, +to extract the same number of expected metrics: + +|test|code|string comparison|number parsing|duration| +|:--:|:--:|:---------------:|:------------:|:------:| +|1|if-else-if-else-if|`strcmp()`|`strtoull()`|4630.337 ms| +|2|nested loops|inline `simple_hash()` and `strcmp()`|`strtoull()`|1597.481 ms| +|3|nested loops|inline `simple_hash()` and `strcmp()`|`str2ull()`|923.523 ms| +|4|if-else-if-else-if|inline `simple_hash()` and `strcmp()`|`strtoull()`|854.574 ms| +|5|if-else-if-else-if|statement expression `simple_hash()` and `strcmp()`|`strtoull()`|912.013 ms| +|6|if-continue|inline `simple_hash()` and `strcmp()`|`strtoull()`|842.279 ms| +|7|if-else-if-else-if|inline `simple_hash()` and `strcmp()`|`str2ull()`|602.837 ms| +|8|ARL|ARL|`strtoull()`|350.360 ms| +|9|ARL|ARL|`str2ull()`|157.237 ms| + +Compared to unoptimized code (test No 1: 4.6sec): + +- before ARL Netdata was using test No **7** with hashing and a custom `str2ull()` to achieve 602ms. +- the current ARL implementation is test No **9** that needs only 157ms (29 times faster vs unoptimized code, about 4 times faster vs optimized code). + +[Check the source code of this test](https://raw.githubusercontent.com/netdata/netdata/master/tests/profile/benchmark-value-pairs.c). + +## Limitations + +Do not use ARL if the a name/keyword may appear more than once in the +source data. + + diff --git a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c new file mode 100644 index 0000000..7f4c6c5 --- /dev/null +++ b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c @@ -0,0 +1,280 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +// the default processor() of the ARL +// can be overwritten at arl_create() +inline void arl_callback_str2ull(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; + (void)hash; + + register unsigned long long *d = dst; + *d = str2ull(value); + // fprintf(stderr, "name '%s' with hash %u and value '%s' is %llu\n", name, hash, value, *d); +} + +inline void arl_callback_str2kernel_uint_t(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; + (void)hash; + + register kernel_uint_t *d = dst; + *d = str2kernel_uint_t(value); + // fprintf(stderr, "name '%s' with hash %u and value '%s' is %llu\n", name, hash, value, (unsigned long long)*d); +} + +inline void arl_callback_ssize_t(const char *name, uint32_t hash, const char *value, void *dst) { + (void)name; + (void)hash; + + register ssize_t *d = dst; + *d = (ssize_t)str2ll(value, NULL); + // fprintf(stderr, "name '%s' with hash %u and value '%s' is %zd\n", name, hash, value, *d); +} + +// create a new ARL +ARL_BASE *arl_create(const char *name, void (*processor)(const char *, uint32_t, const char *, void *), size_t rechecks) { + ARL_BASE *base = callocz(1, sizeof(ARL_BASE)); + + base->name = strdupz(name); + + if(!processor) + base->processor = arl_callback_str2ull; + else + base->processor = processor; + + base->rechecks = rechecks; + + return base; +} + +void arl_free(ARL_BASE *arl_base) { + if(unlikely(!arl_base)) + return; + + while(arl_base->head) { + ARL_ENTRY *e = arl_base->head; + arl_base->head = e->next; + + freez(e->name); +#ifdef NETDATA_INTERNAL_CHECKS + memset(e, 0, sizeof(ARL_ENTRY)); +#endif + freez(e); + } + + freez(arl_base->name); + +#ifdef NETDATA_INTERNAL_CHECKS + memset(arl_base, 0, sizeof(ARL_BASE)); +#endif + + freez(arl_base); +} + +void arl_begin(ARL_BASE *base) { + +#ifdef NETDATA_INTERNAL_CHECKS + if(likely(base->iteration > 10)) { + // do these checks after the ARL has been sorted + + if(unlikely(base->relinkings > (base->expected + base->allocated))) + info("ARL '%s' has %zu relinkings with %zu expected and %zu allocated entries. Is the source changing so fast?" + , base->name, base->relinkings, base->expected, base->allocated); + + if(unlikely(base->slow > base->fast)) + info("ARL '%s' has %zu fast searches and %zu slow searches. Is the source really changing so fast?" + , base->name, base->fast, base->slow); + + /* + if(unlikely(base->iteration % 60 == 0)) { + info("ARL '%s' statistics: iteration %zu, expected %zu, wanted %zu, allocated %zu, fred %zu, relinkings %zu, found %zu, added %zu, fast %zu, slow %zu" + , base->name + , base->iteration + , base->expected + , base->wanted + , base->allocated + , base->fred + , base->relinkings + , base->found + , base->added + , base->fast + , base->slow + ); + // for(e = base->head; e; e = e->next) fprintf(stderr, "%s ", e->name); + // fprintf(stderr, "\n"); + } + */ + } +#endif + + if(unlikely(base->iteration > 0 && (base->added || (base->iteration % base->rechecks) == 0))) { + int wanted_equals_expected = ((base->iteration % base->rechecks) == 0); + + // fprintf(stderr, "\n\narl_begin() rechecking, added %zu, iteration %zu, rechecks %zu, wanted_equals_expected %d\n\n\n", base->added, base->iteration, base->rechecks, wanted_equals_expected); + + base->added = 0; + base->wanted = (wanted_equals_expected)?base->expected:0; + + ARL_ENTRY *e = base->head; + while(e) { + if(e->flags & ARL_ENTRY_FLAG_FOUND) { + + // remove the found flag + e->flags &= ~ARL_ENTRY_FLAG_FOUND; + + // count it in wanted + if(!wanted_equals_expected && e->flags & ARL_ENTRY_FLAG_EXPECTED) + base->wanted++; + + } + else if(e->flags & ARL_ENTRY_FLAG_DYNAMIC && !(base->head == e && !e->next)) { // not last entry + // we can remove this entry + // it is not found, and it was created because + // it was found in the source file + + // remember the next one + ARL_ENTRY *t = e->next; + + // remove it from the list + if(e->next) e->next->prev = e->prev; + if(e->prev) e->prev->next = e->next; + if(base->head == e) base->head = e->next; + + // free it + freez(e->name); + freez(e); + + // count it + base->fred++; + + // continue + e = t; + continue; + } + + e = e->next; + } + } + + if(unlikely(!base->head)) { + // hm... no nodes at all in the list #1700 + // add a fake one to prevent a crash + // this is better than checking for the existence of nodes all the time + arl_expect(base, "a-really-not-existing-source-keyword", NULL); + } + + base->iteration++; + base->next_keyword = base->head; + base->found = 0; + +} + +// register an expected keyword to the ARL +// together with its destination ( i.e. the output of the processor() ) +ARL_ENTRY *arl_expect_custom(ARL_BASE *base, const char *keyword, void (*processor)(const char *name, uint32_t hash, const char *value, void *dst), void *dst) { + ARL_ENTRY *e = callocz(1, sizeof(ARL_ENTRY)); + e->name = strdupz(keyword); + e->hash = simple_hash(e->name); + e->processor = (processor)?processor:base->processor; + e->dst = dst; + e->flags = ARL_ENTRY_FLAG_EXPECTED; + e->prev = NULL; + e->next = base->head; + + if(base->head) base->head->prev = e; + else base->next_keyword = e; + + base->head = e; + base->expected++; + base->allocated++; + + base->wanted = base->expected; + + return e; +} + +int arl_find_or_create_and_relink(ARL_BASE *base, const char *s, const char *value) { + ARL_ENTRY *e; + + uint32_t hash = simple_hash(s); + + // find if it already exists in the data + for(e = base->head; e ; e = e->next) + if(e->hash == hash && !strcmp(e->name, s)) + break; + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(base->next_keyword && e == base->next_keyword)) + fatal("Internal Error: e == base->last"); +#endif + + if(e) { + // found it in the keywords + + base->relinkings++; + + // run the processor for it + if(unlikely(e->dst)) { + e->processor(e->name, hash, value, e->dst); + base->found++; + } + + // unlink it - we will relink it below + if(e->next) e->next->prev = e->prev; + if(e->prev) e->prev->next = e->next; + + // make sure the head is properly linked + if(base->head == e) + base->head = e->next; + } + else { + // not found + + // create it + e = callocz(1, sizeof(ARL_ENTRY)); + e->name = strdupz(s); + e->hash = hash; + e->flags = ARL_ENTRY_FLAG_DYNAMIC; + + base->allocated++; + base->added++; + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(base->iteration % 60 == 0 && e->flags & ARL_ENTRY_FLAG_FOUND)) + info("ARL '%s': entry '%s' is already found. Did you forget to call arl_begin()?", base->name, s); +#endif + + e->flags |= ARL_ENTRY_FLAG_FOUND; + + // link it here + e->next = base->next_keyword; + if(base->next_keyword) { + e->prev = base->next_keyword->prev; + base->next_keyword->prev = e; + + if(e->prev) + e->prev->next = e; + + if(base->head == base->next_keyword) + base->head = e; + } + else { + e->prev = NULL; + + if(!base->head) + base->head = e; + } + + // prepare the next iteration + base->next_keyword = e->next; + if(unlikely(!base->next_keyword)) + base->next_keyword = base->head; + + if(unlikely(base->found == base->wanted)) { + // fprintf(stderr, "FOUND ALL WANTED 1: found = %zu, wanted = %zu, expected %zu\n", base->found, base->wanted, base->expected); + return 1; + } + + return 0; +} diff --git a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h new file mode 100644 index 0000000..294c52e --- /dev/null +++ b/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h @@ -0,0 +1,138 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +#ifndef NETDATA_ADAPTIVE_RESORTABLE_LIST_H +#define NETDATA_ADAPTIVE_RESORTABLE_LIST_H 1 + +#define ARL_ENTRY_FLAG_FOUND 0x01 // the entry has been found in the source data +#define ARL_ENTRY_FLAG_EXPECTED 0x02 // the entry is expected by the program +#define ARL_ENTRY_FLAG_DYNAMIC 0x04 // the entry was dynamically allocated, from source data + +typedef struct arl_entry { + char *name; // the keywords + uint32_t hash; // the hash of the keyword + + void *dst; // the dst to pass to the processor + + uint8_t flags; // ARL_ENTRY_FLAG_* + + // the processor to do the job + void (*processor)(const char *name, uint32_t hash, const char *value, void *dst); + + // double linked list for fast re-linkings + struct arl_entry *prev, *next; +} ARL_ENTRY; + +typedef struct arl_base { + char *name; + + size_t iteration; // incremented on each iteration (arl_begin()) + size_t found; // the number of expected keywords found in this iteration + size_t expected; // the number of expected keywords + size_t wanted; // the number of wanted keywords + // i.e. the number of keywords found and expected + + size_t relinkings; // the number of relinkings we have made so far + + size_t allocated; // the number of keywords allocated + size_t fred; // the number of keywords cleaned up + + size_t rechecks; // the number of iterations between re-checks of the + // wanted number of keywords + // this is only needed in cases where the source + // is having less lines over time. + + size_t added; // it is non-zero if new keywords have been added + // this is only needed to detect new lines have + // been added to the file, over time. + +#ifdef NETDATA_INTERNAL_CHECKS + size_t fast; // the number of times we have taken the fast path + size_t slow; // the number of times we have taken the slow path +#endif + + // the processor to do the job + void (*processor)(const char *name, uint32_t hash, const char *value, void *dst); + + // the linked list of the keywords + ARL_ENTRY *head; + + // since we keep the list of keywords sorted (as found in the source data) + // this is next keyword that we expect to find in the source data. + ARL_ENTRY *next_keyword; +} ARL_BASE; + +// create a new ARL +ARL_BASE *arl_create(const char *name, void (*processor)(const char *, uint32_t, const char *, void *), size_t rechecks); + +// free an ARL +void arl_free(ARL_BASE *arl_base); + +// register an expected keyword to the ARL +// together with its destination ( i.e. the output of the processor() ) +ARL_ENTRY *arl_expect_custom(ARL_BASE *base, const char *keyword, void (*processor)(const char *name, uint32_t hash, const char *value, void *dst), void *dst); +#define arl_expect(base, keyword, dst) arl_expect_custom(base, keyword, NULL, dst) + +// an internal call to complete the check() call +int arl_find_or_create_and_relink(ARL_BASE *base, const char *s, const char *value); + +// begin an ARL iteration +void arl_begin(ARL_BASE *base); + +void arl_callback_str2ull(const char *name, uint32_t hash, const char *value, void *dst); +void arl_callback_str2kernel_uint_t(const char *name, uint32_t hash, const char *value, void *dst); +void arl_callback_ssize_t(const char *name, uint32_t hash, const char *value, void *dst); + +// check a keyword against the ARL +// this is to be called for each keyword read from source data +// s = the keyword, as collected +// src = the src data to be passed to the processor +// it is defined in the header file in order to be inlined +static inline int arl_check(ARL_BASE *base, const char *keyword, const char *value) { + ARL_ENTRY *e = base->next_keyword; + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely((base->fast + base->slow) % (base->expected + base->allocated) == 0 && (base->fast + base->slow) > (base->expected + base->allocated) * base->iteration)) + info("ARL '%s': Did you forget to call arl_begin()?", base->name); +#endif + + // it should be the first entry (pointed by base->next_keyword) + if(likely(!strcmp(keyword, e->name))) { + // it is + +#ifdef NETDATA_INTERNAL_CHECKS + base->fast++; +#endif + + e->flags |= ARL_ENTRY_FLAG_FOUND; + + // execute the processor + if(unlikely(e->dst)) { + e->processor(e->name, e->hash, value, e->dst); + base->found++; + } + + // be prepared for the next iteration + base->next_keyword = e->next; + if(unlikely(!base->next_keyword)) + base->next_keyword = base->head; + + // stop if we collected all the values for this iteration + if(unlikely(base->found == base->wanted)) { + // fprintf(stderr, "FOUND ALL WANTED 2: found = %zu, wanted = %zu, expected %zu\n", base->found, base->wanted, base->expected); + return 1; + } + + return 0; + } + +#ifdef NETDATA_INTERNAL_CHECKS + base->slow++; +#endif + + // we read from source, a not-expected keyword + return arl_find_or_create_and_relink(base, keyword, value); +} + +#endif //NETDATA_ADAPTIVE_RESORTABLE_LIST_H diff --git a/libnetdata/arrayalloc/Makefile.am b/libnetdata/arrayalloc/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/arrayalloc/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/arrayalloc/README.md b/libnetdata/arrayalloc/README.md new file mode 100644 index 0000000..2f21bf3 --- /dev/null +++ b/libnetdata/arrayalloc/README.md @@ -0,0 +1,7 @@ + + +# Array Allocator + diff --git a/libnetdata/arrayalloc/arrayalloc.c b/libnetdata/arrayalloc/arrayalloc.c new file mode 100644 index 0000000..f337279 --- /dev/null +++ b/libnetdata/arrayalloc/arrayalloc.c @@ -0,0 +1,489 @@ +#include "../libnetdata.h" +#include "arrayalloc.h" +#include "daemon/common.h" + +// max file size +#define ARAL_MAX_PAGE_SIZE_MMAP (1*1024*1024*1024) + +// max malloc size +// optimal at current versions of libc is up to 256k +// ideal to have the same overhead as libc is 4k +#define ARAL_MAX_PAGE_SIZE_MALLOC (64*1024) + +typedef struct arrayalloc_free { + size_t size; + struct arrayalloc_page *page; + struct arrayalloc_free *next; +} ARAL_FREE; + +typedef struct arrayalloc_page { + const char *filename; + size_t size; // the total size of the page + size_t used_elements; // the total number of used elements on this page + uint8_t *data; + ARAL_FREE *free_list; + struct arrayalloc_page *prev; // the prev page on the list + struct arrayalloc_page *next; // the next page on the list +} ARAL_PAGE; + +#define ARAL_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2) +static inline size_t natural_alignment(size_t size, size_t alignment) { + if(unlikely(size % alignment)) + size = size + alignment - (size % alignment); + + return size; +} + +static void arrayalloc_delete_leftover_files(const char *path, const char *required_prefix) { + DIR *dir = opendir(path); + if(!dir) return; + + char fullpath[FILENAME_MAX + 1]; + size_t len = strlen(required_prefix); + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR) + continue; + + if(strncmp(de->d_name, required_prefix, len) != 0) + continue; + + snprintfz(fullpath, FILENAME_MAX, "%s/%s", path, de->d_name); + info("ARRAYALLOC: removing left-over file '%s'", fullpath); + if(unlikely(unlink(fullpath) == -1)) + error("Cannot delete file '%s'", fullpath); + } + + closedir(dir); +} + +// ---------------------------------------------------------------------------- +// arrayalloc_init() + +static void arrayalloc_init(ARAL *ar) { + static netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER; + netdata_mutex_lock(&mutex); + + if(!ar->internal.initialized) { + netdata_mutex_init(&ar->internal.mutex); + + long int page_size = sysconf(_SC_PAGE_SIZE); + if (unlikely(page_size == -1)) + ar->internal.natural_page_size = 4096; + else + ar->internal.natural_page_size = page_size; + + // we need to add a page pointer after the element + // so, first align the element size to the pointer size + ar->internal.element_size = natural_alignment(ar->requested_element_size, sizeof(uintptr_t)); + + // then add the size of a pointer to it + ar->internal.element_size += sizeof(uintptr_t); + + // make sure it is at least what we need for an ARAL_FREE slot + if (ar->internal.element_size < sizeof(ARAL_FREE)) + ar->internal.element_size = sizeof(ARAL_FREE); + + // and finally align it to the natural alignment + ar->internal.element_size = natural_alignment(ar->internal.element_size, ARAL_NATURAL_ALIGNMENT); + + // we write the page pointer just after each element + ar->internal.page_ptr_offset = ar->internal.element_size - sizeof(uintptr_t); + + if(ar->requested_element_size + sizeof(uintptr_t) > ar->internal.element_size) + fatal("ARRAYALLOC: failed to calculate properly page_ptr_offset: element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, final element size %zu, page_ptr_offset %zu", + ar->requested_element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, ar->internal.element_size, ar->internal.page_ptr_offset); + + //info("ARRAYALLOC: element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, final element size %zu, page_ptr_offset %zu", + // ar->element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, ar->internal.element_size, ar->internal.page_ptr_offset); + + if (ar->initial_elements < 10) + ar->initial_elements = 10; + + ar->internal.mmap = (ar->use_mmap && ar->cache_dir && *ar->cache_dir) ? true : false; + ar->internal.max_alloc_size = ar->internal.mmap ? ARAL_MAX_PAGE_SIZE_MMAP : ARAL_MAX_PAGE_SIZE_MALLOC; + + if(ar->internal.max_alloc_size % ar->internal.natural_page_size) + ar->internal.max_alloc_size += ar->internal.natural_page_size - (ar->internal.max_alloc_size % ar->internal.natural_page_size) ; + + if(ar->internal.max_alloc_size % ar->internal.element_size) + ar->internal.max_alloc_size -= ar->internal.max_alloc_size % ar->internal.element_size; + + ar->internal.pages = NULL; + ar->internal.allocation_multiplier = 1; + ar->internal.file_number = 0; + + if(ar->internal.mmap) { + char directory_name[FILENAME_MAX + 1]; + snprintfz(directory_name, FILENAME_MAX, "%s/array_alloc.mmap", *ar->cache_dir); + int r = mkdir(directory_name, 0775); + if (r != 0 && errno != EEXIST) + fatal("Cannot create directory '%s'", directory_name); + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s.", ar->filename); + arrayalloc_delete_leftover_files(directory_name, filename); + } + + ar->internal.initialized = true; + } + + netdata_mutex_unlock(&mutex); +} + +// ---------------------------------------------------------------------------- +// check a free slot + +#ifdef NETDATA_INTERNAL_CHECKS +static inline void arrayalloc_free_validate_internal_check(ARAL *ar, ARAL_FREE *fr) { + if(fr->size < ar->internal.element_size) + fatal("ARRAYALLOC: free item of size %zu, less than the expected element size %zu", fr->size, ar->internal.element_size); + + if(fr->size % ar->internal.element_size) + fatal("ARRAYALLOC: free item of size %zu is not multiple to element size %zu", fr->size, ar->internal.element_size); +} +#else +#define arrayalloc_free_validate_internal_check(ar, fr) debug_dummy() +#endif + +// ---------------------------------------------------------------------------- +// find the page a pointer belongs to + +#ifdef NETDATA_INTERNAL_CHECKS +static inline ARAL_PAGE *find_page_with_allocation_internal_check(ARAL *ar, void *ptr) { + uintptr_t seeking = (uintptr_t)ptr; + ARAL_PAGE *page; + + for(page = ar->internal.pages; page ; page = page->next) { + if(unlikely(seeking >= (uintptr_t)page->data && seeking < (uintptr_t)page->data + page->size)) + break; + } + + return page; +} +#endif + +// ---------------------------------------------------------------------------- +// find a page with a free slot (there shouldn't be any) + +#ifdef NETDATA_INTERNAL_CHECKS +static inline ARAL_PAGE *find_page_with_free_slots_internal_check(ARAL *ar) { + ARAL_PAGE *page; + + for(page = ar->internal.pages; page ; page = page->next) { + if(page->free_list) + break; + + internal_fatal(page->size - page->used_elements * ar->internal.element_size >= ar->internal.element_size, + "ARRAYALLOC: a page is marked full, but it is not!"); + + internal_fatal(page->size < page->used_elements * ar->internal.element_size, + "ARRAYALLOC: a page has been overflown!"); + } + + return page; +} +#endif + +#ifdef NETDATA_TRACE_ALLOCATIONS +static void arrayalloc_add_page(ARAL *ar, const char *file, const char *function, size_t line) { +#else +static void arrayalloc_add_page(ARAL *ar) { +#endif + if(unlikely(!ar->internal.initialized)) + arrayalloc_init(ar); + + ARAL_PAGE *page = callocz(1, sizeof(ARAL_PAGE)); + page->size = ar->initial_elements * ar->internal.element_size * ar->internal.allocation_multiplier; + if(page->size > ar->internal.max_alloc_size) + page->size = ar->internal.max_alloc_size; + else + ar->internal.allocation_multiplier *= 2; + + if(ar->internal.mmap) { + ar->internal.file_number++; + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/array_alloc.mmap/%s.%zu", *ar->cache_dir, ar->filename, ar->internal.file_number); + page->filename = strdupz(filename); + page->data = netdata_mmap(page->filename, page->size, MAP_SHARED, 0); + if (unlikely(!page->data)) + fatal("Cannot allocate arrayalloc buffer of size %zu on filename '%s'", page->size, page->filename); + } + else { +#ifdef NETDATA_TRACE_ALLOCATIONS + page->data = mallocz_int(page->size, file, function, line); +#else + page->data = mallocz(page->size); +#endif + } + + // link the free space to its page + ARAL_FREE *fr = (ARAL_FREE *)page->data; + fr->size = page->size; + fr->page = page; + fr->next = NULL; + page->free_list = fr; + + // link the new page at the front of the list of pages + DOUBLE_LINKED_LIST_PREPEND_UNSAFE(ar->internal.pages, page, prev, next); + + arrayalloc_free_validate_internal_check(ar, fr); +} + +static void arrayalloc_lock(ARAL *ar) { + if(!ar->internal.lockless) + netdata_mutex_lock(&ar->internal.mutex); +} + +static void arrayalloc_unlock(ARAL *ar) { + if(!ar->internal.lockless) + netdata_mutex_unlock(&ar->internal.mutex); +} + +ARAL *arrayalloc_create(size_t element_size, size_t elements, const char *filename, char **cache_dir, bool mmap) { + ARAL *ar = callocz(1, sizeof(ARAL)); + ar->requested_element_size = element_size; + ar->initial_elements = elements; + ar->filename = filename; + ar->cache_dir = cache_dir; + ar->use_mmap = mmap; + return ar; +} + +#ifdef NETDATA_TRACE_ALLOCATIONS +void *arrayalloc_mallocz_int(ARAL *ar, const char *file, const char *function, size_t line) { +#else +void *arrayalloc_mallocz(ARAL *ar) { +#endif + if(unlikely(!ar->internal.initialized)) + arrayalloc_init(ar); + + arrayalloc_lock(ar); + + if(unlikely(!ar->internal.pages || !ar->internal.pages->free_list)) { + internal_fatal(find_page_with_free_slots_internal_check(ar) != NULL, + "ARRAYALLOC: first page does not have any free slots, but there is another that has!"); + +#ifdef NETDATA_TRACE_ALLOCATIONS + arrayalloc_add_page(ar, file, function, line); +#else + arrayalloc_add_page(ar); +#endif + } + + ARAL_PAGE *page = ar->internal.pages; + ARAL_FREE *found_fr = page->free_list; + + internal_fatal(!found_fr, + "ARRAYALLOC: free item to use, cannot be NULL."); + + internal_fatal(found_fr->size < ar->internal.element_size, + "ARRAYALLOC: free item size %zu, cannot be smaller than %zu", + found_fr->size, ar->internal.element_size); + + if(unlikely(found_fr->size - ar->internal.element_size < ar->internal.element_size)) { + // we can use the entire free space entry + + page->free_list = found_fr->next; + + if(unlikely(!page->free_list)) { + // we are done with this page + // move the full page last + // so that pages with free items remain first in the list + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ar->internal.pages, page, prev, next); + DOUBLE_LINKED_LIST_APPEND_UNSAFE(ar->internal.pages, page, prev, next); + } + } + else { + // we can split the free space entry + + uint8_t *data = (uint8_t *)found_fr; + ARAL_FREE *fr = (ARAL_FREE *)&data[ar->internal.element_size]; + fr->page = page; + fr->size = found_fr->size - ar->internal.element_size; + + // link the free slot first in the page + fr->next = found_fr->next; + page->free_list = fr; + + arrayalloc_free_validate_internal_check(ar, fr); + } + + page->used_elements++; + + // put the page pointer after the element + uint8_t *data = (uint8_t *)found_fr; + ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->internal.page_ptr_offset]; + *page_ptr = page; + + arrayalloc_unlock(ar); + return (void *)found_fr; +} + +#ifdef NETDATA_TRACE_ALLOCATIONS +void arrayalloc_freez_int(ARAL *ar, void *ptr, const char *file, const char *function, size_t line) { +#else +void arrayalloc_freez(ARAL *ar, void *ptr) { +#endif + if(unlikely(!ptr)) return; + arrayalloc_lock(ar); + + // get the page pointer + ARAL_PAGE *page; + { + uint8_t *data = (uint8_t *)ptr; + ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->internal.page_ptr_offset]; + page = *page_ptr; + +#ifdef NETDATA_INTERNAL_CHECKS + // make it NULL so that we will fail on double free + // do not enable this on production, because the MMAP file + // will need to be saved again! + *page_ptr = NULL; +#endif + } + +#ifdef NETDATA_ARRAYALLOC_INTERNAL_CHECKS + { + // find the page ptr belongs + ARAL_PAGE *page2 = find_page_with_allocation_internal_check(ar, ptr); + + if(unlikely(page != page2)) + fatal("ARRAYALLOC: page pointers do not match!"); + + if (unlikely(!page2)) + fatal("ARRAYALLOC: free of pointer %p is not in arrayalloc address space.", ptr); + } +#endif + + if(unlikely(!page)) + fatal("ARRAYALLOC: possible corruption or double free of pointer %p", ptr); + + if (unlikely(!page->used_elements)) + fatal("ARRAYALLOC: free of pointer %p is inside a page without any active allocations.", ptr); + + page->used_elements--; + + // make this element available + ARAL_FREE *fr = (ARAL_FREE *)ptr; + fr->page = page; + fr->size = ar->internal.element_size; + fr->next = page->free_list; + page->free_list = fr; + + // if the page is empty, release it + if(!page->used_elements) { + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ar->internal.pages, page, prev, next); + + // free it + if(ar->internal.mmap) { + netdata_munmap(page->data, page->size); + if (unlikely(unlink(page->filename) == 1)) + error("Cannot delete file '%s'", page->filename); + freez((void *)page->filename); + } + else { +#ifdef NETDATA_TRACE_ALLOCATIONS + freez_int(page->data, file, function, line); +#else + freez(page->data); +#endif + } + + freez(page); + } + else if(page != ar->internal.pages) { + // move the page with free item first + // so that the next allocation will use this page + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ar->internal.pages, page, prev, next); + DOUBLE_LINKED_LIST_PREPEND_UNSAFE(ar->internal.pages, page, prev, next); + } + + arrayalloc_unlock(ar); +} + +int aral_unittest(size_t elements) { + char *cache_dir = "/tmp/"; + ARAL *ar = arrayalloc_create(20, 10, "test-aral", &cache_dir, false); + + void *pointers[elements]; + + for(size_t i = 0; i < elements ;i++) { + pointers[i] = arrayalloc_mallocz(ar); + } + + for(size_t div = 5; div >= 2 ;div--) { + for (size_t i = 0; i < elements / div; i++) { + arrayalloc_freez(ar, pointers[i]); + } + + for (size_t i = 0; i < elements / div; i++) { + pointers[i] = arrayalloc_mallocz(ar); + } + } + + for(size_t step = 50; step >= 10 ;step -= 10) { + for (size_t i = 0; i < elements; i += step) { + arrayalloc_freez(ar, pointers[i]); + } + + for (size_t i = 0; i < elements; i += step) { + pointers[i] = arrayalloc_mallocz(ar); + } + } + + for(size_t i = 0; i < elements ;i++) { + arrayalloc_freez(ar, pointers[i]); + } + + if(ar->internal.pages) { + fprintf(stderr, "ARAL leftovers detected (1)"); + return 1; + } + + size_t ops = 0; + size_t increment = elements / 10; + size_t allocated = 0; + for(size_t all = increment; all <= elements ; all += increment) { + + for(; allocated < all ; allocated++) { + pointers[allocated] = arrayalloc_mallocz(ar); + ops++; + } + + size_t to_free = now_realtime_usec() % all; + size_t free_list[to_free]; + for(size_t i = 0; i < to_free ;i++) { + size_t pos; + do { + pos = now_realtime_usec() % all; + } while(!pointers[pos]); + + arrayalloc_freez(ar, pointers[pos]); + pointers[pos] = NULL; + free_list[i] = pos; + ops++; + } + + for(size_t i = 0; i < to_free ;i++) { + size_t pos = free_list[i]; + pointers[pos] = arrayalloc_mallocz(ar); + ops++; + } + } + + for(size_t i = 0; i < allocated - 1 ;i++) { + arrayalloc_freez(ar, pointers[i]); + ops++; + } + + arrayalloc_freez(ar, pointers[allocated - 1]); + + if(ar->internal.pages) { + fprintf(stderr, "ARAL leftovers detected (2)"); + return 1; + } + + return 0; +} diff --git a/libnetdata/arrayalloc/arrayalloc.h b/libnetdata/arrayalloc/arrayalloc.h new file mode 100644 index 0000000..cf80b73 --- /dev/null +++ b/libnetdata/arrayalloc/arrayalloc.h @@ -0,0 +1,48 @@ + +#ifndef ARRAYALLOC_H +#define ARRAYALLOC_H 1 + +#include "../libnetdata.h" + +typedef struct arrayalloc { + size_t requested_element_size; + size_t initial_elements; + const char *filename; + char **cache_dir; + bool use_mmap; + + // private members - do not touch + struct { + bool mmap; + bool lockless; + bool initialized; + size_t element_size; + size_t page_ptr_offset; + size_t file_number; + size_t natural_page_size; + size_t allocation_multiplier; + size_t max_alloc_size; + netdata_mutex_t mutex; + struct arrayalloc_page *pages; + } internal; +} ARAL; + +ARAL *arrayalloc_create(size_t element_size, size_t elements, const char *filename, char **cache_dir, bool mmap); +int aral_unittest(size_t elements); + +#ifdef NETDATA_TRACE_ALLOCATIONS + +#define arrayalloc_mallocz(ar) arrayalloc_mallocz_int(ar, __FILE__, __FUNCTION__, __LINE__) +#define arrayalloc_freez(ar, ptr) arrayalloc_freez_int(ar, ptr, __FILE__, __FUNCTION__, __LINE__) + +void *arrayalloc_mallocz_int(ARAL *ar, const char *file, const char *function, size_t line); +void arrayalloc_freez_int(ARAL *ar, void *ptr, const char *file, const char *function, size_t line); + +#else // NETDATA_TRACE_ALLOCATIONS + +void *arrayalloc_mallocz(ARAL *ar); +void arrayalloc_freez(ARAL *ar, void *ptr); + +#endif // NETDATA_TRACE_ALLOCATIONS + +#endif // ARRAYALLOC_H diff --git a/libnetdata/avl/Makefile.am b/libnetdata/avl/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/avl/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/avl/README.md b/libnetdata/avl/README.md new file mode 100644 index 0000000..36392bd --- /dev/null +++ b/libnetdata/avl/README.md @@ -0,0 +1,17 @@ + + +# AVL + +AVL is a library indexing objects in B-Trees. + +`avl_insert()`, `avl_remove()` and `avl_search()` are adaptations +of the AVL algorithm found in `libavl` v2.0.3, so that they do not +use any memory allocations and their memory footprint is optimized +(by eliminating non-necessary data members). + +In addition to the above, this version of AVL, provides versions using locks +and traversal functions. + diff --git a/libnetdata/avl/avl.c b/libnetdata/avl/avl.c new file mode 100644 index 0000000..5a4c1a9 --- /dev/null +++ b/libnetdata/avl/avl.c @@ -0,0 +1,420 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later + +#include "../libnetdata.h" + +/* ------------------------------------------------------------------------- */ +/* + * avl_insert(), avl_remove() and avl_search() + * are adaptations (by Costa Tsaousis) of the AVL algorithm found in libavl + * v2.0.3, so that they do not use any memory allocations and their memory + * footprint is optimized (by eliminating non-necessary data members). + * + * libavl - library for manipulation of binary trees. + * Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004 Free Software + * Foundation, Inc. +*/ + + +/* Search |tree| for an item matching |item|, and return it if found. + Otherwise return |NULL|. */ +avl_t *avl_search(avl_tree_type *tree, avl_t *item) { + avl_t *p; + + // assert (tree != NULL && item != NULL); + + for (p = tree->root; p != NULL; ) { + int cmp = tree->compar(item, p); + + if (cmp < 0) + p = p->avl_link[0]; + else if (cmp > 0) + p = p->avl_link[1]; + else /* |cmp == 0| */ + return p; + } + + return NULL; +} + +/* Inserts |item| into |tree| and returns a pointer to |item|'s address. + If a duplicate item is found in the tree, + returns a pointer to the duplicate without inserting |item|. + */ +avl_t *avl_insert(avl_tree_type *tree, avl_t *item) { + avl_t *y, *z; /* Top node to update balance factor, and parent. */ + avl_t *p, *q; /* Iterator, and parent. */ + avl_t *n; /* Newly inserted node. */ + avl_t *w; /* New root of rebalanced subtree. */ + unsigned char dir; /* Direction to descend. */ + + unsigned char da[AVL_MAX_HEIGHT]; /* Cached comparison results. */ + int k = 0; /* Number of cached results. */ + + // assert(tree != NULL && item != NULL); + + z = (avl_t *) &tree->root; + y = tree->root; + dir = 0; + for (q = z, p = y; p != NULL; q = p, p = p->avl_link[dir]) { + int cmp = tree->compar(item, p); + if (cmp == 0) + return p; + + if (p->avl_balance != 0) + z = q, y = p, k = 0; + da[k++] = dir = (unsigned char)(cmp > 0); + } + + n = q->avl_link[dir] = item; + + // tree->avl_count++; + n->avl_link[0] = n->avl_link[1] = NULL; + n->avl_balance = 0; + if (y == NULL) return n; + + for (p = y, k = 0; p != n; p = p->avl_link[da[k]], k++) + if (da[k] == 0) + p->avl_balance--; + else + p->avl_balance++; + + if (y->avl_balance == -2) { + avl_t *x = y->avl_link[0]; + if (x->avl_balance == -1) { + w = x; + y->avl_link[0] = x->avl_link[1]; + x->avl_link[1] = y; + x->avl_balance = y->avl_balance = 0; + } + else { + // assert (x->avl_balance == +1); + w = x->avl_link[1]; + x->avl_link[1] = w->avl_link[0]; + w->avl_link[0] = x; + y->avl_link[0] = w->avl_link[1]; + w->avl_link[1] = y; + if (w->avl_balance == -1) + x->avl_balance = 0, y->avl_balance = +1; + else if (w->avl_balance == 0) + x->avl_balance = y->avl_balance = 0; + else /* |w->avl_balance == +1| */ + x->avl_balance = -1, y->avl_balance = 0; + w->avl_balance = 0; + } + } + else if (y->avl_balance == +2) { + avl_t *x = y->avl_link[1]; + if (x->avl_balance == +1) { + w = x; + y->avl_link[1] = x->avl_link[0]; + x->avl_link[0] = y; + x->avl_balance = y->avl_balance = 0; + } + else { + // assert (x->avl_balance == -1); + w = x->avl_link[0]; + x->avl_link[0] = w->avl_link[1]; + w->avl_link[1] = x; + y->avl_link[1] = w->avl_link[0]; + w->avl_link[0] = y; + if (w->avl_balance == +1) + x->avl_balance = 0, y->avl_balance = -1; + else if (w->avl_balance == 0) + x->avl_balance = y->avl_balance = 0; + else /* |w->avl_balance == -1| */ + x->avl_balance = +1, y->avl_balance = 0; + w->avl_balance = 0; + } + } + else return n; + + z->avl_link[y != z->avl_link[0]] = w; + + // tree->avl_generation++; + return n; +} + +/* Deletes from |tree| and returns an item matching |item|. + Returns a null pointer if no matching item found. */ +avl_t *avl_remove(avl_tree_type *tree, avl_t *item) { + /* Stack of nodes. */ + avl_t *pa[AVL_MAX_HEIGHT]; /* Nodes. */ + unsigned char da[AVL_MAX_HEIGHT]; /* |avl_link[]| indexes. */ + int k; /* Stack pointer. */ + + avl_t *p; /* Traverses tree to find node to delete. */ + int cmp; /* Result of comparison between |item| and |p|. */ + + // assert (tree != NULL && item != NULL); + + k = 0; + p = (avl_t *) &tree->root; + for(cmp = -1; cmp != 0; cmp = tree->compar(item, p)) { + unsigned char dir = (unsigned char)(cmp > 0); + + pa[k] = p; + da[k++] = dir; + + p = p->avl_link[dir]; + if(p == NULL) return NULL; + } + + item = p; + + if (p->avl_link[1] == NULL) + pa[k - 1]->avl_link[da[k - 1]] = p->avl_link[0]; + else { + avl_t *r = p->avl_link[1]; + if (r->avl_link[0] == NULL) { + r->avl_link[0] = p->avl_link[0]; + r->avl_balance = p->avl_balance; + pa[k - 1]->avl_link[da[k - 1]] = r; + da[k] = 1; + pa[k++] = r; + } + else { + avl_t *s; + int j = k++; + + for (;;) { + da[k] = 0; + pa[k++] = r; + s = r->avl_link[0]; + if (s->avl_link[0] == NULL) break; + + r = s; + } + + s->avl_link[0] = p->avl_link[0]; + r->avl_link[0] = s->avl_link[1]; + s->avl_link[1] = p->avl_link[1]; + s->avl_balance = p->avl_balance; + + pa[j - 1]->avl_link[da[j - 1]] = s; + da[j] = 1; + pa[j] = s; + } + } + + // assert (k > 0); + while (--k > 0) { + avl_t *y = pa[k]; + + if (da[k] == 0) { + y->avl_balance++; + if (y->avl_balance == +1) break; + else if (y->avl_balance == +2) { + avl_t *x = y->avl_link[1]; + if (x->avl_balance == -1) { + avl_t *w; + // assert (x->avl_balance == -1); + w = x->avl_link[0]; + x->avl_link[0] = w->avl_link[1]; + w->avl_link[1] = x; + y->avl_link[1] = w->avl_link[0]; + w->avl_link[0] = y; + if (w->avl_balance == +1) + x->avl_balance = 0, y->avl_balance = -1; + else if (w->avl_balance == 0) + x->avl_balance = y->avl_balance = 0; + else /* |w->avl_balance == -1| */ + x->avl_balance = +1, y->avl_balance = 0; + w->avl_balance = 0; + pa[k - 1]->avl_link[da[k - 1]] = w; + } + else { + y->avl_link[1] = x->avl_link[0]; + x->avl_link[0] = y; + pa[k - 1]->avl_link[da[k - 1]] = x; + if (x->avl_balance == 0) { + x->avl_balance = -1; + y->avl_balance = +1; + break; + } + else x->avl_balance = y->avl_balance = 0; + } + } + } + else + { + y->avl_balance--; + if (y->avl_balance == -1) break; + else if (y->avl_balance == -2) { + avl_t *x = y->avl_link[0]; + if (x->avl_balance == +1) { + avl_t *w; + // assert (x->avl_balance == +1); + w = x->avl_link[1]; + x->avl_link[1] = w->avl_link[0]; + w->avl_link[0] = x; + y->avl_link[0] = w->avl_link[1]; + w->avl_link[1] = y; + if (w->avl_balance == -1) + x->avl_balance = 0, y->avl_balance = +1; + else if (w->avl_balance == 0) + x->avl_balance = y->avl_balance = 0; + else /* |w->avl_balance == +1| */ + x->avl_balance = -1, y->avl_balance = 0; + w->avl_balance = 0; + pa[k - 1]->avl_link[da[k - 1]] = w; + } + else { + y->avl_link[0] = x->avl_link[1]; + x->avl_link[1] = y; + pa[k - 1]->avl_link[da[k - 1]] = x; + if (x->avl_balance == 0) { + x->avl_balance = +1; + y->avl_balance = -1; + break; + } + else x->avl_balance = y->avl_balance = 0; + } + } + } + } + + // tree->avl_count--; + // tree->avl_generation++; + return item; +} + +/* ------------------------------------------------------------------------- */ +// below are functions by (C) Costa Tsaousis + +// --------------------------- +// traversing + +int avl_walker(avl_t *node, int (*callback)(void * /*entry*/, void * /*data*/), void *data) { + int total = 0, ret = 0; + + if(node->avl_link[0]) { + ret = avl_walker(node->avl_link[0], callback, data); + if(ret < 0) return ret; + total += ret; + } + + ret = callback(node, data); + if(ret < 0) return ret; + total += ret; + + if(node->avl_link[1]) { + ret = avl_walker(node->avl_link[1], callback, data); + if (ret < 0) return ret; + total += ret; + } + + return total; +} + +int avl_traverse(avl_tree_type *tree, int (*callback)(void * /*entry*/, void * /*data*/), void *data) { + if(tree->root) + return avl_walker(tree->root, callback, data); + else + return 0; +} + +// --------------------------- +// locks + +void avl_read_lock(avl_tree_lock *t) { +#ifndef AVL_WITHOUT_PTHREADS +#ifdef AVL_LOCK_WITH_MUTEX + netdata_mutex_lock(&t->mutex); +#else + netdata_rwlock_rdlock(&t->rwlock); +#endif +#endif /* AVL_WITHOUT_PTHREADS */ +} + +void avl_write_lock(avl_tree_lock *t) { +#ifndef AVL_WITHOUT_PTHREADS +#ifdef AVL_LOCK_WITH_MUTEX + netdata_mutex_lock(&t->mutex); +#else + netdata_rwlock_wrlock(&t->rwlock); +#endif +#endif /* AVL_WITHOUT_PTHREADS */ +} + +void avl_unlock(avl_tree_lock *t) { +#ifndef AVL_WITHOUT_PTHREADS +#ifdef AVL_LOCK_WITH_MUTEX + netdata_mutex_unlock(&t->mutex); +#else + netdata_rwlock_unlock(&t->rwlock); +#endif +#endif /* AVL_WITHOUT_PTHREADS */ +} + +// --------------------------- +// operations with locking + +void avl_init_lock(avl_tree_lock *tree, int (*compar)(void * /*a*/, void * /*b*/)) { + avl_init(&tree->avl_tree, compar); + +#ifndef AVL_WITHOUT_PTHREADS + int lock; + +#ifdef AVL_LOCK_WITH_MUTEX + lock = netdata_mutex_init(&tree->mutex, NULL); +#else + lock = netdata_rwlock_init(&tree->rwlock); +#endif + + if(lock != 0) + fatal("Failed to initialize AVL mutex/rwlock, error: %d", lock); + +#endif /* AVL_WITHOUT_PTHREADS */ +} + +void avl_destroy_lock(avl_tree_lock *tree) { +#ifndef AVL_WITHOUT_PTHREADS + int lock; + +#ifdef AVL_LOCK_WITH_MUTEX + lock = netdata_mutex_destroy(&tree->mutex); +#else + lock = netdata_rwlock_destroy(&tree->rwlock); +#endif + + if(lock != 0) + fatal("Failed to destroy AVL mutex/rwlock, error: %d", lock); + +#endif /* AVL_WITHOUT_PTHREADS */ +} + +avl_t *avl_search_lock(avl_tree_lock *tree, avl_t *item) { + avl_read_lock(tree); + avl_t *ret = avl_search(&tree->avl_tree, item); + avl_unlock(tree); + return ret; +} + +avl_t * avl_remove_lock(avl_tree_lock *tree, avl_t *item) { + avl_write_lock(tree); + avl_t *ret = avl_remove(&tree->avl_tree, item); + avl_unlock(tree); + return ret; +} + +avl_t *avl_insert_lock(avl_tree_lock *tree, avl_t *item) { + avl_write_lock(tree); + avl_t * ret = avl_insert(&tree->avl_tree, item); + avl_unlock(tree); + return ret; +} + +int avl_traverse_lock(avl_tree_lock *tree, int (*callback)(void * /*entry*/, void * /*data*/), void *data) { + int ret; + avl_read_lock(tree); + ret = avl_traverse(&tree->avl_tree, callback, data); + avl_unlock(tree); + return ret; +} + +void avl_init(avl_tree_type *tree, int (*compar)(void * /*a*/, void * /*b*/)) { + tree->root = NULL; + tree->compar = compar; +} + +// ------------------ diff --git a/libnetdata/avl/avl.h b/libnetdata/avl/avl.h new file mode 100644 index 0000000..eba967f --- /dev/null +++ b/libnetdata/avl/avl.h @@ -0,0 +1,92 @@ +// SPDX-License-Identifier: LGPL-3.0-or-later + +#ifndef _AVL_H +#define _AVL_H 1 + +#include "../libnetdata.h" + +/* Maximum AVL tree height. */ +#ifndef AVL_MAX_HEIGHT +#define AVL_MAX_HEIGHT 92 +#endif + +#ifndef AVL_WITHOUT_PTHREADS +#include + +// #define AVL_LOCK_WITH_MUTEX 1 + +#ifdef AVL_LOCK_WITH_MUTEX +#define AVL_LOCK_INITIALIZER NETDATA_MUTEX_INITIALIZER +#else /* AVL_LOCK_WITH_MUTEX */ +#define AVL_LOCK_INITIALIZER NETDATA_RWLOCK_INITIALIZER +#endif /* AVL_LOCK_WITH_MUTEX */ + +#else /* AVL_WITHOUT_PTHREADS */ +#define AVL_LOCK_INITIALIZER +#endif /* AVL_WITHOUT_PTHREADS */ + +/* Data structures */ + +/* One element of the AVL tree */ +typedef struct avl_element { + struct avl_element *avl_link[2]; /* Subtrees. */ + signed char avl_balance; /* Balance factor. */ +} avl_t; + +/* An AVL tree */ +typedef struct avl_tree_type { + avl_t *root; + int (*compar)(void *a, void *b); +} avl_tree_type; + +typedef struct avl_tree_lock { + avl_tree_type avl_tree; + +#ifndef AVL_WITHOUT_PTHREADS +#ifdef AVL_LOCK_WITH_MUTEX + netdata_mutex_t mutex; +#else /* AVL_LOCK_WITH_MUTEX */ + netdata_rwlock_t rwlock; +#endif /* AVL_LOCK_WITH_MUTEX */ +#endif /* AVL_WITHOUT_PTHREADS */ +} avl_tree_lock; + +/* Public methods */ + +/* Insert element a into the AVL tree t + * returns the added element a, or a pointer the + * element that is equal to a (as returned by t->compar()) + * a is linked directly to the tree, so it has to + * be properly allocated by the caller. + */ +avl_t *avl_insert_lock(avl_tree_lock *tree, avl_t *item) NEVERNULL WARNUNUSED; +avl_t *avl_insert(avl_tree_type *tree, avl_t *item) NEVERNULL WARNUNUSED; + +/* Remove an element a from the AVL tree t + * returns a pointer to the removed element + * or NULL if an element equal to a is not found + * (equal as returned by t->compar()) + */ +avl_t *avl_remove_lock(avl_tree_lock *tree, avl_t *item) WARNUNUSED; +avl_t *avl_remove(avl_tree_type *tree, avl_t *item) WARNUNUSED; + +/* Find the element into the tree that equal to a + * (equal as returned by t->compar()) + * returns NULL is no element is equal to a + */ +avl_t *avl_search_lock(avl_tree_lock *tree, avl_t *item); +avl_t *avl_search(avl_tree_type *tree, avl_t *item); + +/* Initialize the avl_tree_lock + */ +void avl_init_lock(avl_tree_lock *tree, int (*compar)(void *a, void *b)); +void avl_init(avl_tree_type *tree, int (*compar)(void *a, void *b)); + +/* Destroy the avl_tree_lock locks + */ +void avl_destroy_lock(avl_tree_lock *tree); + +int avl_traverse_lock(avl_tree_lock *tree, int (*callback)(void *entry, void *data), void *data); +int avl_traverse(avl_tree_type *tree, int (*callback)(void *entry, void *data), void *data); + +#endif /* avl.h */ diff --git a/libnetdata/buffer/Makefile.am b/libnetdata/buffer/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/buffer/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/buffer/README.md b/libnetdata/buffer/README.md new file mode 100644 index 0000000..c5f66e6 --- /dev/null +++ b/libnetdata/buffer/README.md @@ -0,0 +1,16 @@ + + +# BUFFER + +`BUFFER` is a convenience library for working with strings in `C`. +Mainly, `BUFFER`s eliminate the need for tracking the string length, thus providing +a safe alternative for string operations. + +Also, they are super fast in printing and appending data to the string and its `buffer_strlen()` +is just a lookup (it does not traverse the string). + +Netdata uses `BUFFER`s for preparing web responses and buffering data to be sent upstream or +to external databases. diff --git a/libnetdata/buffer/buffer.c b/libnetdata/buffer/buffer.c new file mode 100644 index 0000000..d094058 --- /dev/null +++ b/libnetdata/buffer/buffer.c @@ -0,0 +1,525 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +#define BUFFER_OVERFLOW_EOF "EOF" + +static inline void buffer_overflow_init(BUFFER *b) +{ + b->buffer[b->size] = '\0'; + strcpy(&b->buffer[b->size + 1], BUFFER_OVERFLOW_EOF); +} + +#ifdef NETDATA_INTERNAL_CHECKS +#define buffer_overflow_check(b) _buffer_overflow_check(b, __FILE__, __FUNCTION__, __LINE__) +#else +#define buffer_overflow_check(b) +#endif + +static inline void _buffer_overflow_check(BUFFER *b, const char *file, const char *function, const unsigned long line) +{ + if(b->len > b->size) { + error("BUFFER: length %zu is above size %zu, at line %lu, at function %s() of file '%s'.", b->len, b->size, line, function, file); + b->len = b->size; + } + + if(b->buffer[b->size] != '\0' || strcmp(&b->buffer[b->size + 1], BUFFER_OVERFLOW_EOF) != 0) { + error("BUFFER: detected overflow at line %lu, at function %s() of file '%s'.", line, function, file); + buffer_overflow_init(b); + } +} + + +void buffer_reset(BUFFER *wb) +{ + buffer_flush(wb); + + wb->contenttype = CT_TEXT_PLAIN; + wb->options = 0; + wb->date = 0; + wb->expires = 0; + + buffer_overflow_check(wb); +} + +const char *buffer_tostring(BUFFER *wb) +{ + buffer_need_bytes(wb, 1); + wb->buffer[wb->len] = '\0'; + + buffer_overflow_check(wb); + + return(wb->buffer); +} + +void buffer_char_replace(BUFFER *wb, char from, char to) +{ + char *s = wb->buffer, *end = &wb->buffer[wb->len]; + + while(s != end) { + if(*s == from) *s = to; + s++; + } + + buffer_overflow_check(wb); +} + +// This trick seems to give an 80% speed increase in 32bit systems +// print_number_llu_r() will just print the digits up to the +// point the remaining value fits in 32 bits, and then calls +// print_number_lu_r() to print the rest with 32 bit arithmetic. + +inline char *print_number_lu_r(char *str, unsigned long uvalue) { + char *wstr = str; + + // print each digit + do *wstr++ = (char)('0' + (uvalue % 10)); while(uvalue /= 10); + return wstr; +} + +inline char *print_number_llu_r(char *str, unsigned long long uvalue) { + char *wstr = str; + + // print each digit + do *wstr++ = (char)('0' + (uvalue % 10)); while((uvalue /= 10) && uvalue > (unsigned long long)0xffffffff); + if(uvalue) return print_number_lu_r(wstr, uvalue); + return wstr; +} + +inline char *print_number_llu_r_smart(char *str, unsigned long long uvalue) { + switch (sizeof(void *)) { + case 4: + str = (uvalue > (unsigned long long) 0xffffffff) ? print_number_llu_r(str, uvalue) : + print_number_lu_r(str, uvalue); + break; + case 8: + do { + *str++ = (char) ('0' + (uvalue % 10)); + } while (uvalue /= 10); + break; + default: + fatal("Netdata supports only 32-bit & 64-bit systems."); + } + + return str; +} + +void buffer_print_llu(BUFFER *wb, unsigned long long uvalue) +{ + buffer_need_bytes(wb, 50); + + char *str = &wb->buffer[wb->len]; + char *wstr = str; + + switch (sizeof(void *)) { + case 4: + wstr = (uvalue > (unsigned long long) 0xffffffff) ? print_number_llu_r(wstr, uvalue) : + print_number_lu_r(wstr, uvalue); + break; + case 8: + do { + *wstr++ = (char) ('0' + (uvalue % 10)); + } while (uvalue /= 10); + break; + default: + fatal("Netdata supports only 32-bit & 64-bit systems."); + } + + // terminate it + *wstr = '\0'; + + // reverse it + char *begin = str, *end = wstr - 1, aux; + while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; + + // return the buffer length + wb->len += wstr - str; +} + +void buffer_print_ll(BUFFER *wb, long long value) +{ + buffer_need_bytes(wb, 50); + + if(value < 0) { + buffer_fast_strcat(wb, "-", 1); + value = -value; + } + + buffer_print_llu(wb, value); +} + +static unsigned char bits03_to_hex[16] = { + [0] = '0', + [1] = '1', + [2] = '2', + [3] = '3', + [4] = '4', + [5] = '5', + [6] = '6', + [7] = '7', + [8] = '8', + [9] = '9', + [10] = 'A', + [11] = 'B', + [12] = 'C', + [13] = 'D', + [14] = 'E', + [15] = 'F' +}; + +void buffer_print_llu_hex(BUFFER *wb, unsigned long long value) +{ + unsigned char buffer[sizeof(unsigned long long) * 2 + 2 + 1]; // 8 bytes * 2 + '0x' + '\0' + unsigned char *e = &buffer[sizeof(unsigned long long) * 2 + 2]; + unsigned char *p = e; + + *p-- = '\0'; + *p-- = bits03_to_hex[value & 0xF]; + value >>= 4; + if(value) { + *p-- = bits03_to_hex[value & 0xF]; + value >>= 4; + + while(value) { + *p-- = bits03_to_hex[value & 0xF]; + value >>= 4; + + if(value) { + *p-- = bits03_to_hex[value & 0xF]; + value >>= 4; + } + } + } + *p-- = 'x'; + *p = '0'; + + buffer_fast_strcat(wb, (char *)p, e - p); +} + +void buffer_fast_strcat(BUFFER *wb, const char *txt, size_t len) { + if(unlikely(!txt || !*txt)) return; + + buffer_need_bytes(wb, len + 1); + + char *s = &wb->buffer[wb->len]; + const char *end = &txt[len + 1]; + + while(txt != end) + *s++ = *txt++; + + wb->len += len; + + // keep it NULL terminating + // not counting it at wb->len + wb->buffer[wb->len] = '\0'; +} + +void buffer_strcat(BUFFER *wb, const char *txt) +{ + // buffer_sprintf(wb, "%s", txt); + + if(unlikely(!txt || !*txt)) return; + + buffer_need_bytes(wb, 1); + + char *s = &wb->buffer[wb->len], *start, *end = &wb->buffer[wb->size]; + size_t len = wb->len; + + start = s; + while(*txt && s != end) + *s++ = *txt++; + + len += s - start; + + wb->len = len; + buffer_overflow_check(wb); + + if(*txt) { + debug(D_WEB_BUFFER, "strcat(): increasing web_buffer at position %zu, size = %zu\n", wb->len, wb->size); + len = strlen(txt); + buffer_fast_strcat(wb, txt, len); + } + else { + // terminate the string + // without increasing the length + buffer_need_bytes(wb, (size_t)1); + wb->buffer[wb->len] = '\0'; + } +} + +void buffer_strcat_jsonescape(BUFFER *wb, const char *txt) +{ + while(*txt) { + switch(*txt) { + case '\\': + buffer_need_bytes(wb, 2); + wb->buffer[wb->len++] = '\\'; + wb->buffer[wb->len++] = '\\'; + break; + case '"': + buffer_need_bytes(wb, 2); + wb->buffer[wb->len++] = '\\'; + wb->buffer[wb->len++] = '"'; + break; + default: { + buffer_need_bytes(wb, 1); + wb->buffer[wb->len++] = *txt; + } + } + txt++; + } + + buffer_overflow_check(wb); +} + +void buffer_strcat_htmlescape(BUFFER *wb, const char *txt) +{ + while(*txt) { + switch(*txt) { + case '&': buffer_strcat(wb, "&"); break; + case '<': buffer_strcat(wb, "<"); break; + case '>': buffer_strcat(wb, ">"); break; + case '"': buffer_strcat(wb, """); break; + case '/': buffer_strcat(wb, "/"); break; + case '\'': buffer_strcat(wb, "'"); break; + default: { + buffer_need_bytes(wb, 1); + wb->buffer[wb->len++] = *txt; + } + } + txt++; + } + + buffer_overflow_check(wb); +} + +void buffer_snprintf(BUFFER *wb, size_t len, const char *fmt, ...) +{ + if(unlikely(!fmt || !*fmt)) return; + + buffer_need_bytes(wb, len + 1); + + va_list args; + va_start(args, fmt); + wb->len += vsnprintfz(&wb->buffer[wb->len], len, fmt, args); + va_end(args); + + buffer_overflow_check(wb); + + // the buffer is \0 terminated by vsnprintfz +} + +void buffer_vsprintf(BUFFER *wb, const char *fmt, va_list args) +{ + if(unlikely(!fmt || !*fmt)) return; + + size_t wrote = 0, need = 2, space_remaining = 0; + + do { + need += space_remaining * 2; + + debug(D_WEB_BUFFER, "web_buffer_sprintf(): increasing web_buffer at position %zu, size = %zu, by %zu bytes (wrote = %zu)\n", wb->len, wb->size, need, wrote); + buffer_need_bytes(wb, need); + + space_remaining = wb->size - wb->len - 1; + + wrote = (size_t) vsnprintfz(&wb->buffer[wb->len], space_remaining, fmt, args); + + } while(wrote >= space_remaining); + + wb->len += wrote; + + // the buffer is \0 terminated by vsnprintf +} + +void buffer_sprintf(BUFFER *wb, const char *fmt, ...) +{ + if(unlikely(!fmt || !*fmt)) return; + + va_list args; + size_t wrote = 0, need = 2, space_remaining = 0; + + do { + need += space_remaining * 2; + + debug(D_WEB_BUFFER, "web_buffer_sprintf(): increasing web_buffer at position %zu, size = %zu, by %zu bytes (wrote = %zu)\n", wb->len, wb->size, need, wrote); + buffer_need_bytes(wb, need); + + space_remaining = wb->size - wb->len - 1; + + va_start(args, fmt); + wrote = (size_t) vsnprintfz(&wb->buffer[wb->len], space_remaining, fmt, args); + va_end(args); + + } while(wrote >= space_remaining); + + wb->len += wrote; + + // the buffer is \0 terminated by vsnprintf +} + + +void buffer_rrd_value(BUFFER *wb, NETDATA_DOUBLE value) +{ + buffer_need_bytes(wb, 50); + + if(isnan(value) || isinf(value)) { + buffer_strcat(wb, "null"); + return; + } + else + wb->len += print_netdata_double(&wb->buffer[wb->len], value); + + // terminate it + buffer_need_bytes(wb, 1); + wb->buffer[wb->len] = '\0'; + + buffer_overflow_check(wb); +} + +// generate a javascript date, the fastest possible way... +void buffer_jsdate(BUFFER *wb, int year, int month, int day, int hours, int minutes, int seconds) +{ + // 10 20 30 = 35 + // 01234567890123456789012345678901234 + // Date(2014,04,01,03,28,20) + + buffer_need_bytes(wb, 30); + + char *b = &wb->buffer[wb->len], *p; + unsigned int *q = (unsigned int *)b; + + #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + *q++ = 0x65746144; // "Date" backwards. + #else + *q++ = 0x44617465; // "Date" + #endif + p = (char *)q; + + *p++ = '('; + *p++ = '0' + year / 1000; year %= 1000; + *p++ = '0' + year / 100; year %= 100; + *p++ = '0' + year / 10; + *p++ = '0' + year % 10; + *p++ = ','; + *p = '0' + month / 10; if (*p != '0') p++; + *p++ = '0' + month % 10; + *p++ = ','; + *p = '0' + day / 10; if (*p != '0') p++; + *p++ = '0' + day % 10; + *p++ = ','; + *p = '0' + hours / 10; if (*p != '0') p++; + *p++ = '0' + hours % 10; + *p++ = ','; + *p = '0' + minutes / 10; if (*p != '0') p++; + *p++ = '0' + minutes % 10; + *p++ = ','; + *p = '0' + seconds / 10; if (*p != '0') p++; + *p++ = '0' + seconds % 10; + + unsigned short *r = (unsigned short *)p; + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + *r++ = 0x0029; // ")\0" backwards. + #else + *r++ = 0x2900; // ")\0" + #endif + + wb->len += (size_t)((char *)r - b - 1); + + // terminate it + wb->buffer[wb->len] = '\0'; + buffer_overflow_check(wb); +} + +// generate a date, the fastest possible way... +void buffer_date(BUFFER *wb, int year, int month, int day, int hours, int minutes, int seconds) +{ + // 10 20 30 = 35 + // 01234567890123456789012345678901234 + // 2014-04-01 03:28:20 + + buffer_need_bytes(wb, 36); + + char *b = &wb->buffer[wb->len]; + char *p = b; + + *p++ = '0' + year / 1000; year %= 1000; + *p++ = '0' + year / 100; year %= 100; + *p++ = '0' + year / 10; + *p++ = '0' + year % 10; + *p++ = '-'; + *p++ = '0' + month / 10; + *p++ = '0' + month % 10; + *p++ = '-'; + *p++ = '0' + day / 10; + *p++ = '0' + day % 10; + *p++ = ' '; + *p++ = '0' + hours / 10; + *p++ = '0' + hours % 10; + *p++ = ':'; + *p++ = '0' + minutes / 10; + *p++ = '0' + minutes % 10; + *p++ = ':'; + *p++ = '0' + seconds / 10; + *p++ = '0' + seconds % 10; + *p = '\0'; + + wb->len += (size_t)(p - b); + + // terminate it + wb->buffer[wb->len] = '\0'; + buffer_overflow_check(wb); +} + +BUFFER *buffer_create(size_t size) +{ + BUFFER *b; + + debug(D_WEB_BUFFER, "Creating new web buffer of size %zu.", size); + + b = callocz(1, sizeof(BUFFER)); + b->buffer = mallocz(size + sizeof(BUFFER_OVERFLOW_EOF) + 2); + b->buffer[0] = '\0'; + b->size = size; + b->contenttype = CT_TEXT_PLAIN; + buffer_overflow_init(b); + buffer_overflow_check(b); + + return(b); +} + +void buffer_free(BUFFER *b) { + if(unlikely(!b)) return; + + buffer_overflow_check(b); + + debug(D_WEB_BUFFER, "Freeing web buffer of size %zu.", b->size); + + freez(b->buffer); + freez(b); +} + +void buffer_increase(BUFFER *b, size_t free_size_required) { + buffer_overflow_check(b); + + size_t left = b->size - b->len; + if(left >= free_size_required) return; + + size_t wanted = free_size_required - left; + size_t minimum = WEB_DATA_LENGTH_INCREASE_STEP; + if(minimum > wanted) wanted = minimum; + + size_t optimal = b->size; + if(b->size > 5*1024*1024) optimal = b->size / 2; + + if(optimal > wanted) wanted = optimal; + + debug(D_WEB_BUFFER, "Increasing data buffer from size %zu to %zu.", b->size, b->size + wanted); + + b->buffer = reallocz(b->buffer, b->size + wanted + sizeof(BUFFER_OVERFLOW_EOF) + 2); + b->size += wanted; + + buffer_overflow_init(b); + buffer_overflow_check(b); +} diff --git a/libnetdata/buffer/buffer.h b/libnetdata/buffer/buffer.h new file mode 100644 index 0000000..ce6f528 --- /dev/null +++ b/libnetdata/buffer/buffer.h @@ -0,0 +1,89 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_WEB_BUFFER_H +#define NETDATA_WEB_BUFFER_H 1 + +#include "../libnetdata.h" + +#define WEB_DATA_LENGTH_INCREASE_STEP 1024 + +typedef struct web_buffer { + size_t size; // allocation size of buffer, in bytes + size_t len; // current data length in buffer, in bytes + char *buffer; // the buffer itself + uint8_t contenttype; // the content type of the data in the buffer + uint8_t options; // options related to the content + time_t date; // the timestamp this content has been generated + time_t expires; // the timestamp this content expires +} BUFFER; + +// options +#define WB_CONTENT_CACHEABLE 1 +#define WB_CONTENT_NO_CACHEABLE 2 + +// content-types +#define CT_APPLICATION_JSON 1 +#define CT_TEXT_PLAIN 2 +#define CT_TEXT_HTML 3 +#define CT_APPLICATION_X_JAVASCRIPT 4 +#define CT_TEXT_CSS 5 +#define CT_TEXT_XML 6 +#define CT_APPLICATION_XML 7 +#define CT_TEXT_XSL 8 +#define CT_APPLICATION_OCTET_STREAM 9 +#define CT_APPLICATION_X_FONT_TRUETYPE 10 +#define CT_APPLICATION_X_FONT_OPENTYPE 11 +#define CT_APPLICATION_FONT_WOFF 12 +#define CT_APPLICATION_FONT_WOFF2 13 +#define CT_APPLICATION_VND_MS_FONTOBJ 14 +#define CT_IMAGE_SVG_XML 15 +#define CT_IMAGE_PNG 16 +#define CT_IMAGE_JPG 17 +#define CT_IMAGE_GIF 18 +#define CT_IMAGE_XICON 19 +#define CT_IMAGE_ICNS 20 +#define CT_IMAGE_BMP 21 +#define CT_PROMETHEUS 22 + +#define buffer_cacheable(wb) do { (wb)->options |= WB_CONTENT_CACHEABLE; if((wb)->options & WB_CONTENT_NO_CACHEABLE) (wb)->options &= ~WB_CONTENT_NO_CACHEABLE; } while(0) +#define buffer_no_cacheable(wb) do { (wb)->options |= WB_CONTENT_NO_CACHEABLE; if((wb)->options & WB_CONTENT_CACHEABLE) (wb)->options &= ~WB_CONTENT_CACHEABLE; (wb)->expires = 0; } while(0) + +#define buffer_strlen(wb) ((wb)->len) +const char *buffer_tostring(BUFFER *wb); + +#define buffer_flush(wb) wb->buffer[(wb)->len = 0] = '\0' +void buffer_reset(BUFFER *wb); + +void buffer_strcat(BUFFER *wb, const char *txt); +void buffer_fast_strcat(BUFFER *wb, const char *txt, size_t len); +void buffer_rrd_value(BUFFER *wb, NETDATA_DOUBLE value); + +void buffer_date(BUFFER *wb, int year, int month, int day, int hours, int minutes, int seconds); +void buffer_jsdate(BUFFER *wb, int year, int month, int day, int hours, int minutes, int seconds); + +BUFFER *buffer_create(size_t size); +void buffer_free(BUFFER *b); +void buffer_increase(BUFFER *b, size_t free_size_required); + +void buffer_snprintf(BUFFER *wb, size_t len, const char *fmt, ...) PRINTFLIKE(3, 4); +void buffer_vsprintf(BUFFER *wb, const char *fmt, va_list args); +void buffer_sprintf(BUFFER *wb, const char *fmt, ...) PRINTFLIKE(2,3); +void buffer_strcat_jsonescape(BUFFER *wb, const char *txt); +void buffer_strcat_htmlescape(BUFFER *wb, const char *txt); + +void buffer_char_replace(BUFFER *wb, char from, char to); + +char *print_number_lu_r(char *str, unsigned long uvalue); +char *print_number_llu_r(char *str, unsigned long long uvalue); +char *print_number_llu_r_smart(char *str, unsigned long long uvalue); + +void buffer_print_llu(BUFFER *wb, unsigned long long uvalue); +void buffer_print_ll(BUFFER *wb, long long value); +void buffer_print_llu_hex(BUFFER *wb, unsigned long long value); + +static inline void buffer_need_bytes(BUFFER *buffer, size_t needed_free_size) { + if(unlikely(buffer->size - buffer->len < needed_free_size)) + buffer_increase(buffer, needed_free_size); +} + +#endif /* NETDATA_WEB_BUFFER_H */ diff --git a/libnetdata/circular_buffer/Makefile.am b/libnetdata/circular_buffer/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/circular_buffer/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/circular_buffer/README.md b/libnetdata/circular_buffer/README.md new file mode 100644 index 0000000..4482173 --- /dev/null +++ b/libnetdata/circular_buffer/README.md @@ -0,0 +1,10 @@ + + +# Circular Buffer + +`struct circular_buffer` is an adaptive circular buffer. It will start at an initial size +and grow up to a maximum size as it fills. Two indices within the structure track the current +`read` and `write` position for data. diff --git a/libnetdata/circular_buffer/circular_buffer.c b/libnetdata/circular_buffer/circular_buffer.c new file mode 100644 index 0000000..c791b42 --- /dev/null +++ b/libnetdata/circular_buffer/circular_buffer.c @@ -0,0 +1,96 @@ +#include "../libnetdata.h" + +struct circular_buffer *cbuffer_new(size_t initial, size_t max) { + struct circular_buffer *result = mallocz(sizeof(*result)); + result->size = initial; + result->data = mallocz(initial); + result->write = 0; + result->read = 0; + result->max_size = max; + return result; +} + +void cbuffer_free(struct circular_buffer *buf) { + freez(buf->data); + freez(buf); +} + +static int cbuffer_realloc_unsafe(struct circular_buffer *buf) { + // Check that we can grow + if (buf->size >= buf->max_size) + return 1; + size_t new_size = buf->size * 2; + if (new_size > buf->max_size) + new_size = buf->max_size; + + // We know that: size < new_size <= max_size + // For simplicity align the current data at the bottom of the new buffer + char *new_data = mallocz(new_size); + if (buf->read == buf->write) + buf->write = 0; // buffer is empty + else if (buf->read < buf->write) { + memcpy(new_data, buf->data + buf->read, buf->write - buf->read); + buf->write -= buf->read; + } else { + size_t top_part = buf->size - buf->read; + memcpy(new_data, buf->data + buf->read, top_part); + memcpy(new_data + top_part, buf->data, buf->write); + buf->write = top_part + buf->write; + } + buf->read = 0; + + // Switch buffers + freez(buf->data); + buf->data = new_data; + buf->size = new_size; + return 0; +} + +size_t cbuffer_available_size_unsafe(struct circular_buffer *buf) { + size_t len = (buf->write >= buf->read) ? (buf->write - buf->read) : (buf->size - buf->read + buf->write); + return buf->max_size - len; +} + +int cbuffer_add_unsafe(struct circular_buffer *buf, const char *d, size_t d_len) { + size_t len = (buf->write >= buf->read) ? (buf->write - buf->read) : (buf->size - buf->read + buf->write); + while (d_len + len >= buf->size) { + if (cbuffer_realloc_unsafe(buf)) { + return 1; + } + } + // Guarantee: write + d_len cannot hit read + if (buf->write + d_len < buf->size) { + memcpy(buf->data + buf->write, d, d_len); + buf->write += d_len; + } + else { + size_t top_part = buf->size - buf->write; + memcpy(buf->data + buf->write, d, top_part); + memcpy(buf->data, d + top_part, d_len - top_part); + buf->write = d_len - top_part; + } + return 0; +} + +// Assume caller does not remove too many bytes (i.e. read will jump over write) +void cbuffer_remove_unsafe(struct circular_buffer *buf, size_t num) { + buf->read += num; + // Assume num < size (i.e. caller cannot remove more bytes than are in the buffer) + if (buf->read >= buf->size) + buf->read -= buf->size; +} + +size_t cbuffer_next_unsafe(struct circular_buffer *buf, char **start) { + if (start != NULL) + *start = buf->data + buf->read; + + if (buf->read <= buf->write) { + return buf->write - buf->read; // Includes empty case + } + return buf->size - buf->read; +} + +void cbuffer_flush(struct circular_buffer*buf) { + buf->write = 0; + buf->read = 0; +} \ No newline at end of file diff --git a/libnetdata/circular_buffer/circular_buffer.h b/libnetdata/circular_buffer/circular_buffer.h new file mode 100644 index 0000000..8c42aa8 --- /dev/null +++ b/libnetdata/circular_buffer/circular_buffer.h @@ -0,0 +1,19 @@ +#ifndef CIRCULAR_BUFFER_H +#define CIRCULAR_BUFFER_H 1 + +#include + +struct circular_buffer { + size_t size, write, read, max_size; + char *data; +}; + +struct circular_buffer *cbuffer_new(size_t initial, size_t max); +void cbuffer_free(struct circular_buffer *buf); +int cbuffer_add_unsafe(struct circular_buffer *buf, const char *d, size_t d_len); +void cbuffer_remove_unsafe(struct circular_buffer *buf, size_t num); +size_t cbuffer_next_unsafe(struct circular_buffer *buf, char **start); +size_t cbuffer_available_size_unsafe(struct circular_buffer *buf); +void cbuffer_flush(struct circular_buffer*buf); + +#endif diff --git a/libnetdata/clocks/Makefile.am b/libnetdata/clocks/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/clocks/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/clocks/README.md b/libnetdata/clocks/README.md new file mode 100644 index 0000000..3a7ce55 --- /dev/null +++ b/libnetdata/clocks/README.md @@ -0,0 +1,5 @@ + + + diff --git a/libnetdata/clocks/clocks.c b/libnetdata/clocks/clocks.c new file mode 100644 index 0000000..cabc000 --- /dev/null +++ b/libnetdata/clocks/clocks.c @@ -0,0 +1,432 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +// defaults are for compatibility +// call clocks_init() once, to optimize these default settings +static clockid_t clock_boottime_to_use = CLOCK_MONOTONIC; +static clockid_t clock_monotonic_to_use = CLOCK_MONOTONIC; + +usec_t clock_monotonic_resolution = 1000; +usec_t clock_realtime_resolution = 1000; + +#ifndef HAVE_CLOCK_GETTIME +inline int clock_gettime(clockid_t clk_id __maybe_unused, struct timespec *ts) { + struct timeval tv; + if(unlikely(gettimeofday(&tv, NULL) == -1)) { + error("gettimeofday() failed."); + return -1; + } + ts->tv_sec = tv.tv_sec; + ts->tv_nsec = (long)((tv.tv_usec % USEC_PER_SEC) * NSEC_PER_USEC); + return 0; +} +#endif + +// Similar to CLOCK_MONOTONIC, but provides access to a raw hardware-based time that is not subject to NTP adjustments +// or the incremental adjustments performed by adjtime(3). This clock does not count time that the system is suspended + +static void test_clock_monotonic_raw(void) { +#ifdef CLOCK_MONOTONIC_RAW + struct timespec ts; + if(clock_gettime(CLOCK_MONOTONIC_RAW, &ts) == -1 && errno == EINVAL) + clock_monotonic_to_use = CLOCK_MONOTONIC; + else + clock_monotonic_to_use = CLOCK_MONOTONIC_RAW; +#else + clock_monotonic_to_use = CLOCK_MONOTONIC; +#endif +} + +// When running a binary with CLOCK_BOOTTIME defined on a system with a linux kernel older than Linux 2.6.39 the +// clock_gettime(2) system call fails with EINVAL. In that case it must fall-back to CLOCK_MONOTONIC. + +static void test_clock_boottime(void) { + struct timespec ts; + if(clock_gettime(CLOCK_BOOTTIME, &ts) == -1 && errno == EINVAL) + clock_boottime_to_use = clock_monotonic_to_use; + else + clock_boottime_to_use = CLOCK_BOOTTIME; +} + +static usec_t get_clock_resolution(clockid_t clock) { + struct timespec ts; + clock_getres(clock, &ts); + return ts.tv_sec * USEC_PER_SEC + ts.tv_nsec * NSEC_PER_USEC; +} + +// perform any initializations required for clocks + +void clocks_init(void) { + // monotonic raw has to be tested before boottime + test_clock_monotonic_raw(); + + // boottime has to be tested after monotonic coarse + test_clock_boottime(); + + clock_monotonic_resolution = get_clock_resolution(clock_monotonic_to_use); + clock_realtime_resolution = get_clock_resolution(CLOCK_REALTIME); + + // if for any reason these are zero, netdata will crash + // since we use them as modulo to calculations + if(!clock_realtime_resolution) + clock_realtime_resolution = 1000; + + if(!clock_monotonic_resolution) + clock_monotonic_resolution = 1000; +} + +inline time_t now_sec(clockid_t clk_id) { + struct timespec ts; + if(unlikely(clock_gettime(clk_id, &ts) == -1)) { + error("clock_gettime(%d, ×pec) failed.", clk_id); + return 0; + } + return ts.tv_sec; +} + +inline usec_t now_usec(clockid_t clk_id) { + struct timespec ts; + if(unlikely(clock_gettime(clk_id, &ts) == -1)) { + error("clock_gettime(%d, ×pec) failed.", clk_id); + return 0; + } + return (usec_t)ts.tv_sec * USEC_PER_SEC + (ts.tv_nsec % NSEC_PER_SEC) / NSEC_PER_USEC; +} + +inline int now_timeval(clockid_t clk_id, struct timeval *tv) { + struct timespec ts; + + if(unlikely(clock_gettime(clk_id, &ts) == -1)) { + error("clock_gettime(%d, ×pec) failed.", clk_id); + tv->tv_sec = 0; + tv->tv_usec = 0; + return -1; + } + + tv->tv_sec = ts.tv_sec; + tv->tv_usec = (suseconds_t)((ts.tv_nsec % NSEC_PER_SEC) / NSEC_PER_USEC); + return 0; +} + +inline time_t now_realtime_sec(void) { + return now_sec(CLOCK_REALTIME); +} + +inline usec_t now_realtime_usec(void) { + return now_usec(CLOCK_REALTIME); +} + +inline int now_realtime_timeval(struct timeval *tv) { + return now_timeval(CLOCK_REALTIME, tv); +} + +inline time_t now_monotonic_sec(void) { + return now_sec(clock_monotonic_to_use); +} + +inline usec_t now_monotonic_usec(void) { + return now_usec(clock_monotonic_to_use); +} + +inline int now_monotonic_timeval(struct timeval *tv) { + return now_timeval(clock_monotonic_to_use, tv); +} + +inline time_t now_monotonic_high_precision_sec(void) { + return now_sec(CLOCK_MONOTONIC); +} + +inline usec_t now_monotonic_high_precision_usec(void) { + return now_usec(CLOCK_MONOTONIC); +} + +inline int now_monotonic_high_precision_timeval(struct timeval *tv) { + return now_timeval(CLOCK_MONOTONIC, tv); +} + +inline time_t now_boottime_sec(void) { + return now_sec(clock_boottime_to_use); +} + +inline usec_t now_boottime_usec(void) { + return now_usec(clock_boottime_to_use); +} + +inline int now_boottime_timeval(struct timeval *tv) { + return now_timeval(clock_boottime_to_use, tv); +} + +inline usec_t timeval_usec(struct timeval *tv) { + return (usec_t)tv->tv_sec * USEC_PER_SEC + (tv->tv_usec % USEC_PER_SEC); +} + +inline msec_t timeval_msec(struct timeval *tv) { + return (msec_t)tv->tv_sec * MSEC_PER_SEC + ((tv->tv_usec % USEC_PER_SEC) / MSEC_PER_SEC); +} + +inline susec_t dt_usec_signed(struct timeval *now, struct timeval *old) { + usec_t ts1 = timeval_usec(now); + usec_t ts2 = timeval_usec(old); + + if(likely(ts1 >= ts2)) return (susec_t)(ts1 - ts2); + return -((susec_t)(ts2 - ts1)); +} + +inline usec_t dt_usec(struct timeval *now, struct timeval *old) { + usec_t ts1 = timeval_usec(now); + usec_t ts2 = timeval_usec(old); + return (ts1 > ts2) ? (ts1 - ts2) : (ts2 - ts1); +} + +#ifdef __linux__ +void sleep_to_absolute_time(usec_t usec) { + static int einval_printed = 0, enotsup_printed = 0, eunknown_printed = 0; + clockid_t clock = CLOCK_REALTIME; + + struct timespec req = { + .tv_sec = (time_t)(usec / USEC_PER_SEC), + .tv_nsec = (suseconds_t)((usec % USEC_PER_SEC) * NSEC_PER_USEC) + }; + + int ret = 0; + while( (ret = clock_nanosleep(clock, TIMER_ABSTIME, &req, NULL)) != 0 ) { + if(ret == EINTR) continue; + else { + if (ret == EINVAL) { + if (!einval_printed) { + einval_printed++; + error( + "Invalid time given to clock_nanosleep(): clockid = %d, tv_sec = %lld, tv_nsec = %ld", + clock, + (long long)req.tv_sec, + req.tv_nsec); + } + } else if (ret == ENOTSUP) { + if (!enotsup_printed) { + enotsup_printed++; + error( + "Invalid clock id given to clock_nanosleep(): clockid = %d, tv_sec = %lld, tv_nsec = %ld", + clock, + (long long)req.tv_sec, + req.tv_nsec); + } + } else { + if (!eunknown_printed) { + eunknown_printed++; + error( + "Unknown return value %d from clock_nanosleep(): clockid = %d, tv_sec = %lld, tv_nsec = %ld", + ret, + clock, + (long long)req.tv_sec, + req.tv_nsec); + } + } + sleep_usec(usec); + } + } +}; +#endif + +#define HEARTBEAT_ALIGNMENT_STATISTICS_SIZE 10 +netdata_mutex_t heartbeat_alignment_mutex = NETDATA_MUTEX_INITIALIZER; +static size_t heartbeat_alignment_id = 0; + +struct heartbeat_thread_statistics { + size_t sequence; + usec_t dt; +}; +static struct heartbeat_thread_statistics heartbeat_alignment_values[HEARTBEAT_ALIGNMENT_STATISTICS_SIZE] = { 0 }; + +void heartbeat_statistics(usec_t *min_ptr, usec_t *max_ptr, usec_t *average_ptr, size_t *count_ptr) { + struct heartbeat_thread_statistics current[HEARTBEAT_ALIGNMENT_STATISTICS_SIZE]; + static struct heartbeat_thread_statistics old[HEARTBEAT_ALIGNMENT_STATISTICS_SIZE] = { 0 }; + + memcpy(current, heartbeat_alignment_values, sizeof(struct heartbeat_thread_statistics) * HEARTBEAT_ALIGNMENT_STATISTICS_SIZE); + + usec_t min = 0, max = 0, total = 0, average = 0; + size_t i, count = 0; + for(i = 0; i < HEARTBEAT_ALIGNMENT_STATISTICS_SIZE ;i++) { + if(current[i].sequence == old[i].sequence) continue; + usec_t value = current[i].dt - old[i].dt; + + if(!count) { + min = max = total = value; + count = 1; + } + else { + total += value; + if(value < min) min = value; + if(value > max) max = value; + count++; + } + } + if(count) + average = total / count; + + if(min_ptr) *min_ptr = min; + if(max_ptr) *max_ptr = max; + if(average_ptr) *average_ptr = average; + if(count_ptr) *count_ptr = count; + + memcpy(old, current, sizeof(struct heartbeat_thread_statistics) * HEARTBEAT_ALIGNMENT_STATISTICS_SIZE); +} + +inline void heartbeat_init(heartbeat_t *hb) { + hb->realtime = 0ULL; + hb->randomness = 250 * USEC_PER_MS + ((now_realtime_usec() * clock_realtime_resolution) % (250 * USEC_PER_MS)); + hb->randomness -= (hb->randomness % clock_realtime_resolution); + + netdata_mutex_lock(&heartbeat_alignment_mutex); + hb->statistics_id = heartbeat_alignment_id; + heartbeat_alignment_id++; + netdata_mutex_unlock(&heartbeat_alignment_mutex); + + if(hb->statistics_id < HEARTBEAT_ALIGNMENT_STATISTICS_SIZE) { + heartbeat_alignment_values[hb->statistics_id].dt = 0; + heartbeat_alignment_values[hb->statistics_id].sequence = 0; + } +} + +// waits for the next heartbeat +// it waits using the monotonic clock +// it returns the dt using the realtime clock + +usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) { + if(unlikely(hb->randomness > tick / 2)) { + // TODO: The heartbeat tick should be specified at the heartbeat_init() function + usec_t tmp = (now_realtime_usec() * clock_realtime_resolution) % (tick / 2); + info("heartbeat randomness of %llu is too big for a tick of %llu - setting it to %llu", hb->randomness, tick, tmp); + hb->randomness = tmp; + } + + usec_t dt; + usec_t now = now_realtime_usec(); + usec_t next = now - (now % tick) + tick + hb->randomness; + + // align the next time we want to the clock resolution + if(next % clock_realtime_resolution) + next = next - (next % clock_realtime_resolution) + clock_realtime_resolution; + + // sleep_usec() has a loop to guarantee we will sleep for at least the requested time. + // According the specs, when we sleep for a relative time, clock adjustments should not affect the duration + // we sleep. + sleep_usec(next - now); + now = now_realtime_usec(); + dt = now - hb->realtime; + + if(hb->statistics_id < HEARTBEAT_ALIGNMENT_STATISTICS_SIZE) { + heartbeat_alignment_values[hb->statistics_id].dt += now - next; + heartbeat_alignment_values[hb->statistics_id].sequence++; + } + + if(unlikely(now < next)) { + errno = 0; + error("heartbeat clock: woke up %llu microseconds earlier than expected (can be due to the CLOCK_REALTIME set to the past).", next - now); + } + else if(unlikely(now - next > tick / 2)) { + errno = 0; + error("heartbeat clock: woke up %llu microseconds later than expected (can be due to system load or the CLOCK_REALTIME set to the future).", now - next); + } + + if(unlikely(!hb->realtime)) { + // the first time return zero + dt = 0; + } + + hb->realtime = now; + return dt; +} + +void sleep_usec(usec_t usec) { + // we expect microseconds (1.000.000 per second) + // but timespec is nanoseconds (1.000.000.000 per second) + struct timespec rem = { 0, 0 }, req = { + .tv_sec = (time_t) (usec / USEC_PER_SEC), + .tv_nsec = (suseconds_t) ((usec % USEC_PER_SEC) * NSEC_PER_USEC) + }; + +#ifdef __linux__ + while (clock_nanosleep(CLOCK_REALTIME, 0, &req, &rem) != 0) { +#else + while (nanosleep(&req, &rem) != 0) { +#endif + if (likely(errno == EINTR && (rem.tv_sec || rem.tv_nsec))) { + req = rem; + rem = (struct timespec){ 0, 0 }; + } + else { +#ifdef __linux__ + error("Cannot clock_nanosleep(CLOCK_REALTIME) for %llu microseconds.", usec); +#else + error("Cannot nanosleep() for %llu microseconds.", usec); +#endif + break; + } + } +} + +static inline collected_number uptime_from_boottime(void) { +#ifdef CLOCK_BOOTTIME_IS_AVAILABLE + return (collected_number)(now_boottime_usec() / USEC_PER_MS); +#else + error("uptime cannot be read from CLOCK_BOOTTIME on this system."); + return 0; +#endif +} + +static procfile *read_proc_uptime_ff = NULL; +static inline collected_number read_proc_uptime(char *filename) { + if(unlikely(!read_proc_uptime_ff)) { + read_proc_uptime_ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!read_proc_uptime_ff)) return 0; + } + + read_proc_uptime_ff = procfile_readall(read_proc_uptime_ff); + if(unlikely(!read_proc_uptime_ff)) return 0; + + if(unlikely(procfile_lines(read_proc_uptime_ff) < 1)) { + error("/proc/uptime has no lines."); + return 0; + } + if(unlikely(procfile_linewords(read_proc_uptime_ff, 0) < 1)) { + error("/proc/uptime has less than 1 word in it."); + return 0; + } + + return (collected_number)(strtondd(procfile_lineword(read_proc_uptime_ff, 0, 0), NULL) * 1000.0); +} + +inline collected_number uptime_msec(char *filename){ + static int use_boottime = -1; + + if(unlikely(use_boottime == -1)) { + collected_number uptime_boottime = uptime_from_boottime(); + collected_number uptime_proc = read_proc_uptime(filename); + + long long delta = (long long)uptime_boottime - (long long)uptime_proc; + if(delta < 0) delta = -delta; + + if(delta <= 1000 && uptime_boottime != 0) { + procfile_close(read_proc_uptime_ff); + info("Using now_boottime_usec() for uptime (dt is %lld ms)", delta); + use_boottime = 1; + } + else if(uptime_proc != 0) { + info("Using /proc/uptime for uptime (dt is %lld ms)", delta); + use_boottime = 0; + } + else { + error("Cannot find any way to read uptime on this system."); + return 1; + } + } + + collected_number uptime; + if(use_boottime) + uptime = uptime_from_boottime(); + else + uptime = read_proc_uptime(filename); + + return uptime; +} diff --git a/libnetdata/clocks/clocks.h b/libnetdata/clocks/clocks.h new file mode 100644 index 0000000..7738a2c --- /dev/null +++ b/libnetdata/clocks/clocks.h @@ -0,0 +1,160 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_CLOCKS_H +#define NETDATA_CLOCKS_H 1 + +#include "../libnetdata.h" + +#ifndef HAVE_STRUCT_TIMESPEC +struct timespec { + time_t tv_sec; /* seconds */ + long tv_nsec; /* nanoseconds */ +}; +#endif + +#ifndef HAVE_CLOCKID_T +typedef int clockid_t; +#endif + +typedef unsigned long long nsec_t; +typedef unsigned long long msec_t; +typedef unsigned long long usec_t; +typedef long long susec_t; + +typedef struct heartbeat { + usec_t realtime; + usec_t randomness; + size_t statistics_id; +} heartbeat_t; + +/* Linux value is as good as any other */ +#ifndef CLOCK_REALTIME +#define CLOCK_REALTIME 0 +#endif + +#ifndef CLOCK_MONOTONIC +/* fallback to CLOCK_REALTIME if not available */ +#define CLOCK_MONOTONIC CLOCK_REALTIME +#endif + +#ifndef CLOCK_BOOTTIME + +#ifdef CLOCK_UPTIME +/* CLOCK_BOOTTIME falls back to CLOCK_UPTIME on FreeBSD */ +#define CLOCK_BOOTTIME CLOCK_UPTIME +#else // CLOCK_UPTIME +/* CLOCK_BOOTTIME falls back to CLOCK_REALTIME */ +#define CLOCK_BOOTTIME CLOCK_REALTIME +#endif // CLOCK_UPTIME + +#else // CLOCK_BOOTTIME + +#ifdef HAVE_CLOCK_GETTIME +#define CLOCK_BOOTTIME_IS_AVAILABLE 1 // required for /proc/uptime +#endif // HAVE_CLOCK_GETTIME + +#endif // CLOCK_BOOTTIME + +#ifndef NSEC_PER_MSEC +#define NSEC_PER_MSEC 1000000ULL +#endif + +#ifndef NSEC_PER_SEC +#define NSEC_PER_SEC 1000000000ULL +#endif +#ifndef NSEC_PER_USEC +#define NSEC_PER_USEC 1000ULL +#endif + +#ifndef USEC_PER_SEC +#define USEC_PER_SEC 1000000ULL +#endif +#ifndef MSEC_PER_SEC +#define MSEC_PER_SEC 1000ULL +#endif + +#define USEC_PER_MS 1000ULL + +#ifndef HAVE_CLOCK_GETTIME +/* Fallback function for POSIX.1-2001 clock_gettime() function. + * + * We use a realtime clock from gettimeofday(), this will + * make systems without clock_gettime() support sensitive + * to time jumps or hibernation/suspend side effects. + */ +int clock_gettime(clockid_t clk_id, struct timespec *ts); +#endif + +/* + * Three clocks are available (cf. man 3 clock_gettime): + * + * REALTIME clock (i.e. wall-clock): + * This clock is affected by discontinuous jumps in the system time + * (e.g., if the system administrator manually changes the clock), and by the incremental adjustments performed by adjtime(3) and NTP. + * + * MONOTONIC clock + * Clock that cannot be set and represents monotonic time since some unspecified starting point. + * This clock is not affected by discontinuous jumps in the system time + * (e.g., if the system administrator manually changes the clock), but is affected by the incremental adjustments performed by adjtime(3) and NTP. + * If not available on the system, this clock falls back to REALTIME clock. + * + * BOOTTIME clock + * Identical to CLOCK_MONOTONIC, except it also includes any time that the system is suspended. + * This allows applications to get a suspend-aware monotonic clock without having to deal with the complications of CLOCK_REALTIME, + * which may have discontinuities if the time is changed using settimeofday(2). + * If not available on the system, this clock falls back to MONOTONIC clock. + * + * All now_*_timeval() functions fill the `struct timeval` with the time from the appropriate clock. + * Those functions return 0 on success, -1 else with errno set appropriately. + * + * All now_*_sec() functions return the time in seconds from the appropriate clock, or 0 on error. + * All now_*_usec() functions return the time in microseconds from the appropriate clock, or 0 on error. + * + */ +int now_realtime_timeval(struct timeval *tv); +time_t now_realtime_sec(void); +usec_t now_realtime_usec(void); + +int now_monotonic_timeval(struct timeval *tv); +time_t now_monotonic_sec(void); +usec_t now_monotonic_usec(void); +int now_monotonic_high_precision_timeval(struct timeval *tv); +time_t now_monotonic_high_precision_sec(void); +usec_t now_monotonic_high_precision_usec(void); + +int now_boottime_timeval(struct timeval *tv); +time_t now_boottime_sec(void); +usec_t now_boottime_usec(void); + +usec_t timeval_usec(struct timeval *tv); +msec_t timeval_msec(struct timeval *tv); + +usec_t dt_usec(struct timeval *now, struct timeval *old); +susec_t dt_usec_signed(struct timeval *now, struct timeval *old); + +void heartbeat_init(heartbeat_t *hb); + +/* Sleeps until next multiple of tick using monotonic clock. + * Returns elapsed time in microseconds since previous heartbeat + */ +usec_t heartbeat_next(heartbeat_t *hb, usec_t tick); + +void heartbeat_statistics(usec_t *min_ptr, usec_t *max_ptr, usec_t *average_ptr, size_t *count_ptr); + +void sleep_usec(usec_t usec); + +void clocks_init(void); + +// lower level functions - avoid using directly +time_t now_sec(clockid_t clk_id); +usec_t now_usec(clockid_t clk_id); +int now_timeval(clockid_t clk_id, struct timeval *tv); + +collected_number uptime_msec(char *filename); + +extern usec_t clock_monotonic_resolution; +extern usec_t clock_realtime_resolution; + +void sleep_to_absolute_time(usec_t usec); + +#endif /* NETDATA_CLOCKS_H */ diff --git a/libnetdata/completion/Makefile.am b/libnetdata/completion/Makefile.am new file mode 100644 index 0000000..babdcf0 --- /dev/null +++ b/libnetdata/completion/Makefile.am @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in diff --git a/libnetdata/completion/completion.c b/libnetdata/completion/completion.c new file mode 100644 index 0000000..b5ac86e --- /dev/null +++ b/libnetdata/completion/completion.c @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "completion.h" + +void completion_init(struct completion *p) +{ + p->completed = 0; + fatal_assert(0 == uv_cond_init(&p->cond)); + fatal_assert(0 == uv_mutex_init(&p->mutex)); +} + +void completion_destroy(struct completion *p) +{ + uv_cond_destroy(&p->cond); + uv_mutex_destroy(&p->mutex); +} + +void completion_wait_for(struct completion *p) +{ + uv_mutex_lock(&p->mutex); + while (0 == p->completed) { + uv_cond_wait(&p->cond, &p->mutex); + } + fatal_assert(1 == p->completed); + uv_mutex_unlock(&p->mutex); +} + +void completion_mark_complete(struct completion *p) +{ + uv_mutex_lock(&p->mutex); + p->completed = 1; + uv_cond_broadcast(&p->cond); + uv_mutex_unlock(&p->mutex); +} diff --git a/libnetdata/completion/completion.h b/libnetdata/completion/completion.h new file mode 100644 index 0000000..667360a --- /dev/null +++ b/libnetdata/completion/completion.h @@ -0,0 +1,22 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_COMPLETION_H +#define NETDATA_COMPLETION_H + +#include "../libnetdata.h" + +struct completion { + uv_mutex_t mutex; + uv_cond_t cond; + volatile unsigned completed; +}; + +void completion_init(struct completion *p); + +void completion_destroy(struct completion *p); + +void completion_wait_for(struct completion *p); + +void completion_mark_complete(struct completion *p); + +#endif /* NETDATA_COMPLETION_H */ diff --git a/libnetdata/config/Makefile.am b/libnetdata/config/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/config/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/config/README.md b/libnetdata/config/README.md new file mode 100644 index 0000000..2eccf7a --- /dev/null +++ b/libnetdata/config/README.md @@ -0,0 +1,54 @@ + + +# Netdata ini config files + +Configuration files `netdata.conf` and `stream.conf` are Netdata ini files. + +## Motivation + +The whole idea came up when we were evaluating the documentation involved +in maintaining a complex configuration system. Our intention was to give +configuration options for everything imaginable. But then, documenting all +these options would require a tremendous amount of time, users would have +to search through endless pages for the option they need, etc. + +We concluded then that **configuring software like that is a waste of time +and effort**. Of course there must be plenty of configuration options, but +the implementation itself should require a lot less effort for both the +developers and the users. + +So, we did this: + +1. No configuration is required to run Netdata +2. There are plenty of options to tweak +3. There is minimal documentation (or no at all) + +## Why this works? + +The configuration file is a `name = value` dictionary with `[sections]`. +Write whatever you like there as long as it follows this simple format. + +Netdata loads this dictionary and then when the code needs a value from +it, it just looks up the `name` in the dictionary at the proper `section`. +In all places, in the code, there are both the `names` and their +`default values`, so if something is not found in the configuration +file, the default is used. The lookup is made using B-Trees and hashes +(no string comparisons), so they are super fast. Also the `names` of the +settings can be `my super duper setting that once set to yes, will turn the world upside down = no` + +- so goodbye to most of the documentation involved. + +Next, Netdata can generate a valid configuration for the user to edit. +No need to remember anything or copy and paste settings. Just get the +configuration from the server (`/netdata.conf` on your Netdata server), +edit it and save it. + +Last, what about options you believe you have set, but you misspelled? +When you get the configuration file from the server, there will be a +comment above all `name = value` pairs the server does not use. +So you know that whatever you wrote there, is not used. + + diff --git a/libnetdata/config/appconfig.c b/libnetdata/config/appconfig.c new file mode 100644 index 0000000..938c7dd --- /dev/null +++ b/libnetdata/config/appconfig.c @@ -0,0 +1,960 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +/* + * @Input: + * Connector / instance to add to an internal structure + * @Return + * The current head of the linked list of connector_instance + * + */ + +_CONNECTOR_INSTANCE *add_connector_instance(struct section *connector, struct section *instance) +{ + static struct _connector_instance *global_connector_instance = NULL; + struct _connector_instance *local_ci, *local_ci_tmp; + + if (unlikely(!connector)) { + if (unlikely(!instance)) + return global_connector_instance; + + local_ci = global_connector_instance; + while (local_ci) { + local_ci_tmp = local_ci->next; + freez(local_ci); + local_ci = local_ci_tmp; + } + global_connector_instance = NULL; + return NULL; + } + + local_ci = callocz(1, sizeof(struct _connector_instance)); + local_ci->instance = instance; + local_ci->connector = connector; + strncpyz(local_ci->instance_name, instance->name, CONFIG_MAX_NAME); + strncpyz(local_ci->connector_name, connector->name, CONFIG_MAX_NAME); + local_ci->next = global_connector_instance; + global_connector_instance = local_ci; + + return global_connector_instance; +} + +int is_valid_connector(char *type, int check_reserved) +{ + int rc = 1; + + if (unlikely(!type)) + return 0; + + if (!check_reserved) { + if (unlikely(is_valid_connector(type,1))) { + return 0; + } + //if (unlikely(*type == ':') + // return 0; + char *separator = strrchr(type, ':'); + if (likely(separator)) { + *separator = '\0'; + rc = separator - type; + } else + return 0; + } +// else { +// if (unlikely(is_valid_connector(type,1))) { +// error("Section %s invalid -- reserved name", type); +// return 0; +// } +// } + + if (!strcmp(type, "graphite") || !strcmp(type, "graphite:plaintext")) { + return rc; + } else if (!strcmp(type, "graphite:http") || !strcmp(type, "graphite:https")) { + return rc; + } else if (!strcmp(type, "json") || !strcmp(type, "json:plaintext")) { + return rc; + } else if (!strcmp(type, "json:http") || !strcmp(type, "json:https")) { + return rc; + } else if (!strcmp(type, "opentsdb") || !strcmp(type, "opentsdb:telnet")) { + return rc; + } else if (!strcmp(type, "opentsdb:http") || !strcmp(type, "opentsdb:https")) { + return rc; + } else if (!strcmp(type, "prometheus_remote_write")) { + return rc; + } else if (!strcmp(type, "prometheus_remote_write:http") || !strcmp(type, "prometheus_remote_write:https")) { + return rc; + } else if (!strcmp(type, "kinesis") || !strcmp(type, "kinesis:plaintext")) { + return rc; + } else if (!strcmp(type, "pubsub") || !strcmp(type, "pubsub:plaintext")) { + return rc; + } else if (!strcmp(type, "mongodb") || !strcmp(type, "mongodb:plaintext")) { + return rc; + } + + return 0; +} + +// ---------------------------------------------------------------------------- +// locking + +inline void appconfig_wrlock(struct config *root) { + netdata_mutex_lock(&root->mutex); +} + +inline void appconfig_unlock(struct config *root) { + netdata_mutex_unlock(&root->mutex); +} + +inline void config_section_wrlock(struct section *co) { + netdata_mutex_lock(&co->mutex); +} + +inline void config_section_unlock(struct section *co) { + netdata_mutex_unlock(&co->mutex); +} + + +// ---------------------------------------------------------------------------- +// config name-value index + +static int appconfig_option_compare(void *a, void *b) { + if(((struct config_option *)a)->hash < ((struct config_option *)b)->hash) return -1; + else if(((struct config_option *)a)->hash > ((struct config_option *)b)->hash) return 1; + else return strcmp(((struct config_option *)a)->name, ((struct config_option *)b)->name); +} + +#define appconfig_option_index_add(co, cv) (struct config_option *)avl_insert_lock(&((co)->values_index), (avl_t *)(cv)) +#define appconfig_option_index_del(co, cv) (struct config_option *)avl_remove_lock(&((co)->values_index), (avl_t *)(cv)) + +static struct config_option *appconfig_option_index_find(struct section *co, const char *name, uint32_t hash) { + struct config_option tmp; + tmp.hash = (hash)?hash:simple_hash(name); + tmp.name = (char *)name; + + return (struct config_option *)avl_search_lock(&(co->values_index), (avl_t *) &tmp); +} + + +// ---------------------------------------------------------------------------- +// config sections index + +int appconfig_section_compare(void *a, void *b) { + if(((struct section *)a)->hash < ((struct section *)b)->hash) return -1; + else if(((struct section *)a)->hash > ((struct section *)b)->hash) return 1; + else return strcmp(((struct section *)a)->name, ((struct section *)b)->name); +} + +#define appconfig_index_add(root, cfg) (struct section *)avl_insert_lock(&(root)->index, (avl_t *)(cfg)) +#define appconfig_index_del(root, cfg) (struct section *)avl_remove_lock(&(root)->index, (avl_t *)(cfg)) + +static struct section *appconfig_index_find(struct config *root, const char *name, uint32_t hash) { + struct section tmp; + tmp.hash = (hash)?hash:simple_hash(name); + tmp.name = (char *)name; + + return (struct section *)avl_search_lock(&root->index, (avl_t *) &tmp); +} + + +// ---------------------------------------------------------------------------- +// config section methods + +static inline struct section *appconfig_section_find(struct config *root, const char *section) { + return appconfig_index_find(root, section, 0); +} + +static inline struct section *appconfig_section_create(struct config *root, const char *section) { + debug(D_CONFIG, "Creating section '%s'.", section); + + struct section *co = callocz(1, sizeof(struct section)); + co->name = strdupz(section); + co->hash = simple_hash(co->name); + netdata_mutex_init(&co->mutex); + + avl_init_lock(&co->values_index, appconfig_option_compare); + + if(unlikely(appconfig_index_add(root, co) != co)) + error("INTERNAL ERROR: indexing of section '%s', already exists.", co->name); + + appconfig_wrlock(root); + struct section *co2 = root->last_section; + if(co2) { + co2->next = co; + } else { + root->first_section = co; + } + root->last_section = co; + appconfig_unlock(root); + + return co; +} + +void appconfig_section_destroy_non_loaded(struct config *root, const char *section) +{ + struct section *co; + struct config_option *cv, *cv_next; + + debug(D_CONFIG, "Destroying section '%s'.", section); + + co = appconfig_section_find(root, section); + if(!co) { + error("Could not destroy section '%s'. Not found.", section); + return; + } + + config_section_wrlock(co); + for(cv = co->values; cv ; cv = cv->next) { + if (cv->flags & CONFIG_VALUE_LOADED) { + /* Do not destroy values that were loaded from the configuration files. */ + config_section_unlock(co); + return; + } + } + for(cv = co->values ; cv ; cv = cv_next) { + cv_next = cv->next; + if(unlikely(!appconfig_option_index_del(co, cv))) + error("Cannot remove config option '%s' from section '%s'.", cv->name, co->name); + freez(cv->value); + freez(cv->name); + freez(cv); + } + co->values = NULL; + config_section_unlock(co); + + if (unlikely(!appconfig_index_del(root, co))) { + error("Cannot remove section '%s' from config.", section); + return; + } + + appconfig_wrlock(root); + + if (root->first_section == co) { + root->first_section = co->next; + + if (root->last_section == co) + root->last_section = root->first_section; + } else { + struct section *co_cur = root->first_section, *co_prev = NULL; + + while(co_cur && co_cur != co) { + co_prev = co_cur; + co_cur = co_cur->next; + } + + if (co_cur) { + co_prev->next = co_cur->next; + + if (root->last_section == co_cur) + root->last_section = co_prev; + } + } + + appconfig_unlock(root); + + avl_destroy_lock(&co->values_index); + freez(co->name); + pthread_mutex_destroy(&co->mutex); + freez(co); +} + +void appconfig_section_option_destroy_non_loaded(struct config *root, const char *section, const char *name) +{ + debug(D_CONFIG, "Destroying section option '%s -> %s'.", section, name); + + struct section *co; + co = appconfig_section_find(root, section); + if (!co) { + error("Could not destroy section option '%s -> %s'. The section not found.", section, name); + return; + } + + config_section_wrlock(co); + + struct config_option *cv; + + cv = appconfig_option_index_find(co, name, simple_hash(name)); + + if (cv && cv->flags & CONFIG_VALUE_LOADED) { + config_section_unlock(co); + return; + } + + if (unlikely(!(cv && appconfig_option_index_del(co, cv)))) { + config_section_unlock(co); + error("Could not destroy section option '%s -> %s'. The option not found.", section, name); + return; + } + + if (co->values == cv) { + co->values = co->values->next; + } else { + struct config_option *cv_cur = co->values, *cv_prev = NULL; + while (cv_cur && cv_cur != cv) { + cv_prev = cv_cur; + cv_cur = cv_cur->next; + } + if (cv_cur) { + cv_prev->next = cv_cur->next; + } + } + + freez(cv->value); + freez(cv->name); + freez(cv); + + config_section_unlock(co); + return; +} + +// ---------------------------------------------------------------------------- +// config name-value methods + +static inline struct config_option *appconfig_value_create(struct section *co, const char *name, const char *value) { + debug(D_CONFIG, "Creating config entry for name '%s', value '%s', in section '%s'.", name, value, co->name); + + struct config_option *cv = callocz(1, sizeof(struct config_option)); + cv->name = strdupz(name); + cv->hash = simple_hash(cv->name); + cv->value = strdupz(value); + + struct config_option *found = appconfig_option_index_add(co, cv); + if(found != cv) { + error("indexing of config '%s' in section '%s': already exists - using the existing one.", cv->name, co->name); + freez(cv->value); + freez(cv->name); + freez(cv); + return found; + } + + config_section_wrlock(co); + struct config_option *cv2 = co->values; + if(cv2) { + while (cv2->next) cv2 = cv2->next; + cv2->next = cv; + } + else co->values = cv; + config_section_unlock(co); + + return cv; +} + +int appconfig_exists(struct config *root, const char *section, const char *name) { + struct config_option *cv; + + debug(D_CONFIG, "request to get config in section '%s', name '%s'", section, name); + + struct section *co = appconfig_section_find(root, section); + if(!co) return 0; + + cv = appconfig_option_index_find(co, name, 0); + if(!cv) return 0; + + return 1; +} + +int appconfig_move(struct config *root, const char *section_old, const char *name_old, const char *section_new, const char *name_new) { + struct config_option *cv_old, *cv_new; + int ret = -1; + + debug(D_CONFIG, "request to rename config in section '%s', old name '%s', to section '%s', new name '%s'", section_old, name_old, section_new, name_new); + + struct section *co_old = appconfig_section_find(root, section_old); + if(!co_old) return ret; + + struct section *co_new = appconfig_section_find(root, section_new); + if(!co_new) co_new = appconfig_section_create(root, section_new); + + config_section_wrlock(co_old); + if(co_old != co_new) + config_section_wrlock(co_new); + + cv_old = appconfig_option_index_find(co_old, name_old, 0); + if(!cv_old) goto cleanup; + + cv_new = appconfig_option_index_find(co_new, name_new, 0); + if(cv_new) goto cleanup; + + if(unlikely(appconfig_option_index_del(co_old, cv_old) != cv_old)) + error("INTERNAL ERROR: deletion of config '%s' from section '%s', deleted the wrong config entry.", cv_old->name, co_old->name); + + if(co_old->values == cv_old) { + co_old->values = cv_old->next; + } + else { + struct config_option *t; + for(t = co_old->values; t && t->next != cv_old ;t = t->next) ; + if(!t || t->next != cv_old) + error("INTERNAL ERROR: cannot find variable '%s' in section '%s' of the config - but it should be there.", cv_old->name, co_old->name); + else + t->next = cv_old->next; + } + + freez(cv_old->name); + cv_old->name = strdupz(name_new); + cv_old->hash = simple_hash(cv_old->name); + + cv_new = cv_old; + cv_new->next = co_new->values; + co_new->values = cv_new; + + if(unlikely(appconfig_option_index_add(co_new, cv_old) != cv_old)) + error("INTERNAL ERROR: re-indexing of config '%s' in section '%s', already exists.", cv_old->name, co_new->name); + + ret = 0; + +cleanup: + if(co_old != co_new) + config_section_unlock(co_new); + config_section_unlock(co_old); + return ret; +} + +char *appconfig_get_by_section(struct section *co, const char *name, const char *default_value) +{ + struct config_option *cv; + + // Only calls internal to this file check for a NULL result and they do not supply a NULL arg. + // External caller should treat NULL as an error case. + cv = appconfig_option_index_find(co, name, 0); + if (!cv) { + if (!default_value) return NULL; + cv = appconfig_value_create(co, name, default_value); + if (!cv) return NULL; + } + cv->flags |= CONFIG_VALUE_USED; + + if((cv->flags & CONFIG_VALUE_LOADED) || (cv->flags & CONFIG_VALUE_CHANGED)) { + // this is a loaded value from the config file + // if it is different than the default, mark it + if(!(cv->flags & CONFIG_VALUE_CHECKED)) { + if(default_value && strcmp(cv->value, default_value) != 0) cv->flags |= CONFIG_VALUE_CHANGED; + cv->flags |= CONFIG_VALUE_CHECKED; + } + } + + return(cv->value); +} + + +char *appconfig_get(struct config *root, const char *section, const char *name, const char *default_value) +{ + if (default_value == NULL) + debug(D_CONFIG, "request to get config in section '%s', name '%s' or fail", section, name); + else + debug(D_CONFIG, "request to get config in section '%s', name '%s', default_value '%s'", section, name, default_value); + + struct section *co = appconfig_section_find(root, section); + if (!co && !default_value) + return NULL; + if(!co) co = appconfig_section_create(root, section); + + return appconfig_get_by_section(co, name, default_value); +} + +long long appconfig_get_number(struct config *root, const char *section, const char *name, long long value) +{ + char buffer[100], *s; + sprintf(buffer, "%lld", value); + + s = appconfig_get(root, section, name, buffer); + if(!s) return value; + + return strtoll(s, NULL, 0); +} + +NETDATA_DOUBLE appconfig_get_float(struct config *root, const char *section, const char *name, NETDATA_DOUBLE value) +{ + char buffer[100], *s; + sprintf(buffer, "%0.5" NETDATA_DOUBLE_MODIFIER, value); + + s = appconfig_get(root, section, name, buffer); + if(!s) return value; + + return str2ndd(s, NULL); +} + +inline int appconfig_test_boolean_value(char *s) { + if(!strcasecmp(s, "yes") || !strcasecmp(s, "true") || !strcasecmp(s, "on") + || !strcasecmp(s, "auto") || !strcasecmp(s, "on demand")) + return 1; + + return 0; +} + +int appconfig_get_boolean_by_section(struct section *co, const char *name, int value) { + char *s; + + s = appconfig_get_by_section(co, name, (!value)?"no":"yes"); + if(!s) return value; + + return appconfig_test_boolean_value(s); +} + +int appconfig_get_boolean(struct config *root, const char *section, const char *name, int value) +{ + char *s; + if(value) s = "yes"; + else s = "no"; + + s = appconfig_get(root, section, name, s); + if(!s) return value; + + return appconfig_test_boolean_value(s); +} + +int appconfig_get_boolean_ondemand(struct config *root, const char *section, const char *name, int value) +{ + char *s; + + if(value == CONFIG_BOOLEAN_AUTO) + s = "auto"; + + else if(value == CONFIG_BOOLEAN_NO) + s = "no"; + + else + s = "yes"; + + s = appconfig_get(root, section, name, s); + if(!s) return value; + + if(!strcmp(s, "yes")) + return CONFIG_BOOLEAN_YES; + else if(!strcmp(s, "no")) + return CONFIG_BOOLEAN_NO; + else if(!strcmp(s, "auto") || !strcmp(s, "on demand")) + return CONFIG_BOOLEAN_AUTO; + + return value; +} + +const char *appconfig_set_default(struct config *root, const char *section, const char *name, const char *value) +{ + struct config_option *cv; + + debug(D_CONFIG, "request to set default config in section '%s', name '%s', value '%s'", section, name, value); + + struct section *co = appconfig_section_find(root, section); + if(!co) return appconfig_set(root, section, name, value); + + cv = appconfig_option_index_find(co, name, 0); + if(!cv) return appconfig_set(root, section, name, value); + + cv->flags |= CONFIG_VALUE_USED; + + if(cv->flags & CONFIG_VALUE_LOADED) + return cv->value; + + if(strcmp(cv->value, value) != 0) { + cv->flags |= CONFIG_VALUE_CHANGED; + + freez(cv->value); + cv->value = strdupz(value); + } + + return cv->value; +} + +const char *appconfig_set(struct config *root, const char *section, const char *name, const char *value) +{ + struct config_option *cv; + + debug(D_CONFIG, "request to set config in section '%s', name '%s', value '%s'", section, name, value); + + struct section *co = appconfig_section_find(root, section); + if(!co) co = appconfig_section_create(root, section); + + cv = appconfig_option_index_find(co, name, 0); + if(!cv) cv = appconfig_value_create(co, name, value); + cv->flags |= CONFIG_VALUE_USED; + + if(strcmp(cv->value, value) != 0) { + cv->flags |= CONFIG_VALUE_CHANGED; + + freez(cv->value); + cv->value = strdupz(value); + } + + return value; +} + +long long appconfig_set_number(struct config *root, const char *section, const char *name, long long value) +{ + char buffer[100]; + sprintf(buffer, "%lld", value); + + appconfig_set(root, section, name, buffer); + + return value; +} + +NETDATA_DOUBLE appconfig_set_float(struct config *root, const char *section, const char *name, NETDATA_DOUBLE value) +{ + char buffer[100]; + sprintf(buffer, "%0.5" NETDATA_DOUBLE_MODIFIER, value); + + appconfig_set(root, section, name, buffer); + + return value; +} + +int appconfig_set_boolean(struct config *root, const char *section, const char *name, int value) +{ + char *s; + if(value) s = "yes"; + else s = "no"; + + appconfig_set(root, section, name, s); + + return value; +} + +int appconfig_get_duration(struct config *root, const char *section, const char *name, const char *value) +{ + int result = 0; + const char *s; + + s = appconfig_get(root, section, name, value); + if(!s) goto fallback; + + if(!config_parse_duration(s, &result)) { + error("config option '[%s].%s = %s' is configured with an valid duration", section, name, s); + goto fallback; + } + + return result; + + fallback: + if(!config_parse_duration(value, &result)) + error("INTERNAL ERROR: default duration supplied for option '[%s].%s = %s' is not a valid duration", section, name, value); + + return result; +} + +// ---------------------------------------------------------------------------- +// config load/save + +int appconfig_load(struct config *root, char *filename, int overwrite_used, const char *section_name) +{ + int line = 0; + struct section *co = NULL; + int is_exporter_config = 0; + int _connectors = 0; // number of exporting connector sections we have + char working_instance[CONFIG_MAX_NAME + 1]; + char working_connector[CONFIG_MAX_NAME + 1]; + struct section *working_connector_section = NULL; + int global_exporting_section = 0; + + char buffer[CONFIG_FILE_LINE_MAX + 1], *s; + + if(!filename) filename = CONFIG_DIR "/" CONFIG_FILENAME; + + debug(D_CONFIG, "CONFIG: opening config file '%s'", filename); + + FILE *fp = fopen(filename, "r"); + if(!fp) { + // info("CONFIG: cannot open file '%s'. Using internal defaults.", filename); + return 0; + } + + uint32_t section_hash = 0; + if(section_name) { + section_hash = simple_hash(section_name); + } + is_exporter_config = (strstr(filename, EXPORTING_CONF) != NULL); + + while(fgets(buffer, CONFIG_FILE_LINE_MAX, fp) != NULL) { + buffer[CONFIG_FILE_LINE_MAX] = '\0'; + line++; + + s = trim(buffer); + if(!s || *s == '#') { + debug(D_CONFIG, "CONFIG: ignoring line %d of file '%s', it is empty.", line, filename); + continue; + } + + int len = (int) strlen(s); + if(*s == '[' && s[len - 1] == ']') { + // new section + s[len - 1] = '\0'; + s++; + + if (is_exporter_config) { + global_exporting_section = + !(strcmp(s, CONFIG_SECTION_EXPORTING)) || !(strcmp(s, CONFIG_SECTION_PROMETHEUS)); + if (unlikely(!global_exporting_section)) { + int rc; + rc = is_valid_connector(s, 0); + if (likely(rc)) { + strncpyz(working_connector, s, CONFIG_MAX_NAME); + s = s + rc + 1; + if (unlikely(!(*s))) { + _connectors++; + sprintf(buffer, "instance_%d", _connectors); + s = buffer; + } + strncpyz(working_instance, s, CONFIG_MAX_NAME); + working_connector_section = NULL; + if (unlikely(appconfig_section_find(root, working_instance))) { + error("Instance (%s) already exists", working_instance); + co = NULL; + continue; + } + } else { + co = NULL; + error("Section (%s) does not specify a valid connector", s); + continue; + } + } + } + + co = appconfig_section_find(root, s); + if(!co) co = appconfig_section_create(root, s); + + if(co && section_name && overwrite_used && section_hash == co->hash && !strcmp(section_name, co->name)) { + config_section_wrlock(co); + struct config_option *cv2 = co->values; + while (cv2) { + struct config_option *save = cv2->next; + struct config_option *found = appconfig_option_index_del(co, cv2); + if(found != cv2) + error("INTERNAL ERROR: Cannot remove '%s' from section '%s', it was not inserted before.", + cv2->name, co->name); + + freez(cv2->name); + freez(cv2->value); + freez(cv2); + cv2 = save; + } + co->values = NULL; + config_section_unlock(co); + } + + continue; + } + + if(!co) { + // line outside a section + error("CONFIG: ignoring line %d ('%s') of file '%s', it is outside all sections.", line, s, filename); + continue; + } + + if(section_name && overwrite_used && section_hash != co->hash && strcmp(section_name, co->name)) { + continue; + } + + char *name = s; + char *value = strchr(s, '='); + if(!value) { + error("CONFIG: ignoring line %d ('%s') of file '%s', there is no = in it.", line, s, filename); + continue; + } + *value = '\0'; + value++; + + name = trim(name); + value = trim(value); + + if(!name || *name == '#') { + error("CONFIG: ignoring line %d of file '%s', name is empty.", line, filename); + continue; + } + + if(!value) value = ""; + + struct config_option *cv = appconfig_option_index_find(co, name, 0); + + if (!cv) { + cv = appconfig_value_create(co, name, value); + if (likely(is_exporter_config) && unlikely(!global_exporting_section)) { + if (unlikely(!working_connector_section)) { + working_connector_section = appconfig_section_find(root, working_connector); + if (!working_connector_section) + working_connector_section = appconfig_section_create(root, working_connector); + if (likely(working_connector_section)) { + add_connector_instance(working_connector_section, co); + } + } + } + } else { + if (((cv->flags & CONFIG_VALUE_USED) && overwrite_used) || !(cv->flags & CONFIG_VALUE_USED)) { + debug( + D_CONFIG, "CONFIG: line %d of file '%s', overwriting '%s/%s'.", line, filename, co->name, cv->name); + freez(cv->value); + cv->value = strdupz(value); + } else + debug( + D_CONFIG, + "CONFIG: ignoring line %d of file '%s', '%s/%s' is already present and used.", + line, + filename, + co->name, + cv->name); + } + cv->flags |= CONFIG_VALUE_LOADED; + } + + fclose(fp); + + return 1; +} + +void appconfig_generate(struct config *root, BUFFER *wb, int only_changed) +{ + int i, pri; + struct section *co; + struct config_option *cv; + + { + int found_host_labels = 0; + for (co = root->first_section; co; co = co->next) + if(!strcmp(co->name, CONFIG_SECTION_HOST_LABEL)) + found_host_labels = 1; + + if(!found_host_labels) { + appconfig_section_create(root, CONFIG_SECTION_HOST_LABEL); + appconfig_get(root, CONFIG_SECTION_HOST_LABEL, "name", "value"); + } + } + + buffer_strcat(wb, + "# netdata configuration\n" + "#\n" + "# You can download the latest version of this file, using:\n" + "#\n" + "# wget -O /etc/netdata/netdata.conf http://localhost:19999/netdata.conf\n" + "# or\n" + "# curl -o /etc/netdata/netdata.conf http://localhost:19999/netdata.conf\n" + "#\n" + "# You can uncomment and change any of the options below.\n" + "# The value shown in the commented settings, is the default value.\n" + "#\n" + "\n# global netdata configuration\n"); + + for(i = 0; i <= 16 ;i++) { + appconfig_wrlock(root); + for(co = root->first_section; co ; co = co->next) { + if(!strcmp(co->name, CONFIG_SECTION_GLOBAL)) pri = 0; + else if(!strcmp(co->name, CONFIG_SECTION_DB)) pri = 1; + else if(!strcmp(co->name, CONFIG_SECTION_DIRECTORIES)) pri = 2; + else if(!strcmp(co->name, CONFIG_SECTION_LOGS)) pri = 3; + else if(!strcmp(co->name, CONFIG_SECTION_ENV_VARS)) pri = 4; + else if(!strcmp(co->name, CONFIG_SECTION_HOST_LABEL)) pri = 5; + else if(!strcmp(co->name, CONFIG_SECTION_SQLITE)) pri = 6; + else if(!strcmp(co->name, CONFIG_SECTION_CLOUD)) pri = 7; + else if(!strcmp(co->name, CONFIG_SECTION_ML)) pri = 8; + else if(!strcmp(co->name, CONFIG_SECTION_HEALTH)) pri = 9; + else if(!strcmp(co->name, CONFIG_SECTION_WEB)) pri = 10; + // by default, new sections will get pri = 11 (set at the end, below) + else if(!strcmp(co->name, CONFIG_SECTION_REGISTRY)) pri = 12; + else if(!strcmp(co->name, CONFIG_SECTION_GLOBAL_STATISTICS)) pri = 13; + else if(!strcmp(co->name, CONFIG_SECTION_PLUGINS)) pri = 14; + else if(!strcmp(co->name, CONFIG_SECTION_STATSD)) pri = 15; + else if(!strncmp(co->name, "plugin:", 7)) pri = 16; // << change the loop too if you change this + else pri = 11; // this is used for any new (currently unknown) sections + + if(i == pri) { + int loaded = 0; + int used = 0; + int changed = 0; + int count = 0; + + config_section_wrlock(co); + for(cv = co->values; cv ; cv = cv->next) { + used += (cv->flags & CONFIG_VALUE_USED)?1:0; + loaded += (cv->flags & CONFIG_VALUE_LOADED)?1:0; + changed += (cv->flags & CONFIG_VALUE_CHANGED)?1:0; + count++; + } + config_section_unlock(co); + + if(!count) continue; + if(only_changed && !changed && !loaded) continue; + + if(!used) { + buffer_sprintf(wb, "\n# section '%s' is not used.", co->name); + } + + buffer_sprintf(wb, "\n[%s]\n", co->name); + + config_section_wrlock(co); + for(cv = co->values; cv ; cv = cv->next) { + + if(used && !(cv->flags & CONFIG_VALUE_USED)) { + buffer_sprintf(wb, "\n\t# option '%s' is not used.\n", cv->name); + } + buffer_sprintf(wb, "\t%s%s = %s\n", ((!(cv->flags & CONFIG_VALUE_LOADED)) && (!(cv->flags & CONFIG_VALUE_CHANGED)) && (cv->flags & CONFIG_VALUE_USED))?"# ":"", cv->name, cv->value); + } + config_section_unlock(co); + } + } + appconfig_unlock(root); + } +} + +/** + * Parse Duration + * + * Parse the string setting the result + * + * @param string the timestamp string + * @param result the output variable + * + * @return It returns 1 on success and 0 otherwise + */ +int config_parse_duration(const char* string, int* result) { + while(*string && isspace(*string)) string++; + + if(unlikely(!*string)) goto fallback; + + if(*string == 'n' && !strcmp(string, "never")) { + // this is a valid option + *result = 0; + return 1; + } + + // make sure it is a number + if(!(isdigit(*string) || *string == '+' || *string == '-')) goto fallback; + + char *e = NULL; + NETDATA_DOUBLE n = str2ndd(string, &e); + if(e && *e) { + switch (*e) { + case 'Y': + *result = (int) (n * 31536000); + break; + case 'M': + *result = (int) (n * 2592000); + break; + case 'w': + *result = (int) (n * 604800); + break; + case 'd': + *result = (int) (n * 86400); + break; + case 'h': + *result = (int) (n * 3600); + break; + case 'm': + *result = (int) (n * 60); + break; + case 's': + default: + *result = (int) (n); + break; + } + } + else + *result = (int)(n); + + return 1; + + fallback: + *result = 0; + return 0; +} + +struct section *appconfig_get_section(struct config *root, const char *name) +{ + return appconfig_section_find(root, name); +} diff --git a/libnetdata/config/appconfig.h b/libnetdata/config/appconfig.h new file mode 100644 index 0000000..2828e10 --- /dev/null +++ b/libnetdata/config/appconfig.h @@ -0,0 +1,219 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +/* + * This section manages ini config files, like netdata.conf and stream.conf + * + * It is organized like this: + * + * struct config (i.e. netdata.conf or stream.conf) + * .sections = a linked list of struct section + * .mutex = a mutex to protect the above linked list due to multi-threading + * .index = an AVL tree of struct section + * + * struct section (i.e. [global] or [health] of netdata.conf) + * .value = a linked list of struct config_option + * .mutex = a mutex to protect the above linked list due to multi-threading + * .value_index = an AVL tree of struct config_option + * + * struct config_option (ie. a name-value pair for each ini file option) + * + * The following operations on name-value options are supported: + * SET to set the value of an option + * SET DEFAULT to set the value and the default value of an option + * GET to get the value of an option + * EXISTS to check if an option exists + * MOVE to move an option from a section to another section, and/or rename it + * + * GET and SET operations are provided for the following data types: + * STRING + * NUMBER (long long) + * FLOAT (long double) + * BOOLEAN (false, true) + * BOOLEAN ONDEMAND (false, true, auto) + * + * GET and SET operations create struct config_option, if it is not already present. + * This allows netdata to run even without netdata.conf and stream.conf. The internal + * defaults are used to create the structure that should exist in the ini file and the config + * file can be downloaded from the server. + * + * Also 2 operations are supported for the whole config file: + * + * LOAD To load the ini file from disk + * GENERATE To generate the ini file (this is used to download the ini file from the server) + * + * For each option (name-value pair), the system maintains 4 flags: + * LOADED to indicate that the value has been loaded from the file + * USED to indicate that netdata used the value + * CHANGED to indicate that the value has been changed from the loaded value or the internal default value + * CHECKED is used internally for optimization (to avoid an strcmp() every time GET is called). + * + * TODO: + * 1. The linked lists and the mutexes can be removed and the AVL trees can become DICTIONARY. + * This part of the code was written before we add traversal to AVL. + * + * 2. High level data types could be supported, to simplify the rest of the code: + * MULTIPLE CHOICE to let the user select one of the supported keywords + * this would allow users see in comments the available options + * + * SIMPLE PATTERN to let the user define netdata SIMPLE PATTERNS + * + * 3. Sorting of options should be supported. + * Today, when the ini file is downloaded from the server, the options are shown in the order + * they appear in the linked list (the order they were added, listing changed options first). + * If we remove the linked list, the order they appear in the AVL tree will be used (which is + * random due to simple_hash()). + * Ideally, we support sorting of options when generating the ini file. + * + * 4. There is no free() operation. So, memory is freed on netdata exit. + * + * 5. Avoid memory fragmentation + * Since entries are created from multiple threads and a lot of allocations are required + * for each config_option, fragmentation can be a problem for IoT. + * + * 6. Although this way of managing options is quite flexible and dynamic, it wastes memory + * for the names of the options. Since most of the option names are static, we could provide + * a method to allocate only the dynamic option names. + */ + +#ifndef NETDATA_CONFIG_H +#define NETDATA_CONFIG_H 1 + +#include "../libnetdata.h" + +#define CONFIG_FILENAME "netdata.conf" + +#define CONFIG_SECTION_GLOBAL "global" +#define CONFIG_SECTION_DIRECTORIES "directories" +#define CONFIG_SECTION_LOGS "logs" +#define CONFIG_SECTION_ENV_VARS "environment variables" +#define CONFIG_SECTION_SQLITE "sqlite" +#define CONFIG_SECTION_WEB "web" +#define CONFIG_SECTION_STATSD "statsd" +#define CONFIG_SECTION_PLUGINS "plugins" +#define CONFIG_SECTION_CLOUD "cloud" +#define CONFIG_SECTION_REGISTRY "registry" +#define CONFIG_SECTION_HEALTH "health" +#define CONFIG_SECTION_STREAM "stream" +#define CONFIG_SECTION_ML "ml" +#define CONFIG_SECTION_EXPORTING "exporting:global" +#define CONFIG_SECTION_PROMETHEUS "prometheus:exporter" +#define CONFIG_SECTION_HOST_LABEL "host labels" +#define EXPORTING_CONF "exporting.conf" +#define CONFIG_SECTION_GLOBAL_STATISTICS "global statistics" +#define CONFIG_SECTION_DB "db" + + +// these are used to limit the configuration names and values lengths +// they are not enforced by config.c functions (they will strdup() all strings, no matter of their length) +#define CONFIG_MAX_NAME 1024 +#define CONFIG_MAX_VALUE 2048 + +// ---------------------------------------------------------------------------- +// Config definitions +#define CONFIG_FILE_LINE_MAX ((CONFIG_MAX_NAME + CONFIG_MAX_VALUE + 1024) * 2) + +#define CONFIG_VALUE_LOADED 0x01 // has been loaded from the config +#define CONFIG_VALUE_USED 0x02 // has been accessed from the program +#define CONFIG_VALUE_CHANGED 0x04 // has been changed from the loaded value or the internal default value +#define CONFIG_VALUE_CHECKED 0x08 // has been checked if the value is different from the default + +struct config_option { + avl_t avl_node; // the index entry of this entry - this has to be first! + + uint8_t flags; + uint32_t hash; // a simple hash to speed up searching + // we first compare hashes, and only if the hashes are equal we do string comparisons + + char *name; + char *value; + + struct config_option *next; // config->mutex protects just this +}; + +struct section { + avl_t avl_node; // the index entry of this section - this has to be first! + + uint32_t hash; // a simple hash to speed up searching + // we first compare hashes, and only if the hashes are equal we do string comparisons + + char *name; + + struct section *next; // global config_mutex protects just this + + struct config_option *values; + avl_tree_lock values_index; + + netdata_mutex_t mutex; // this locks only the writers, to ensure atomic updates + // readers are protected using the rwlock in avl_tree_lock +}; + +struct config { + struct section *first_section; + struct section *last_section; // optimize inserting at the end + netdata_mutex_t mutex; + avl_tree_lock index; +}; + +#define CONFIG_BOOLEAN_INVALID 100 // an invalid value to check for validity (used as default initialization when needed) + +#define CONFIG_BOOLEAN_NO 0 // disabled +#define CONFIG_BOOLEAN_YES 1 // enabled + +#ifndef CONFIG_BOOLEAN_AUTO +#define CONFIG_BOOLEAN_AUTO 2 // enabled if it has useful info when enabled +#endif + +int appconfig_load(struct config *root, char *filename, int overwrite_used, const char *section_name); +void config_section_wrlock(struct section *co); +void config_section_unlock(struct section *co); + +char *appconfig_get_by_section(struct section *co, const char *name, const char *default_value); +char *appconfig_get(struct config *root, const char *section, const char *name, const char *default_value); +long long appconfig_get_number(struct config *root, const char *section, const char *name, long long value); +NETDATA_DOUBLE appconfig_get_float(struct config *root, const char *section, const char *name, NETDATA_DOUBLE value); +int appconfig_get_boolean_by_section(struct section *co, const char *name, int value); +int appconfig_get_boolean(struct config *root, const char *section, const char *name, int value); +int appconfig_get_boolean_ondemand(struct config *root, const char *section, const char *name, int value); +int appconfig_get_duration(struct config *root, const char *section, const char *name, const char *value); + +const char *appconfig_set(struct config *root, const char *section, const char *name, const char *value); +const char *appconfig_set_default(struct config *root, const char *section, const char *name, const char *value); +long long appconfig_set_number(struct config *root, const char *section, const char *name, long long value); +NETDATA_DOUBLE appconfig_set_float(struct config *root, const char *section, const char *name, NETDATA_DOUBLE value); +int appconfig_set_boolean(struct config *root, const char *section, const char *name, int value); + +int appconfig_exists(struct config *root, const char *section, const char *name); +int appconfig_move(struct config *root, const char *section_old, const char *name_old, const char *section_new, const char *name_new); + +void appconfig_generate(struct config *root, BUFFER *wb, int only_changed); + +int appconfig_section_compare(void *a, void *b); + +void appconfig_section_destroy_non_loaded(struct config *root, const char *section); +void appconfig_section_option_destroy_non_loaded(struct config *root, const char *section, const char *name); + +int config_parse_duration(const char* string, int* result); + +struct section *appconfig_get_section(struct config *root, const char *name); + +void appconfig_wrlock(struct config *root); +void appconfig_unlock(struct config *root); + +int appconfig_test_boolean_value(char *s); + +struct connector_instance { + char instance_name[CONFIG_MAX_NAME + 1]; + char connector_name[CONFIG_MAX_NAME + 1]; +}; + +typedef struct _connector_instance { + struct section *connector; // actual connector + struct section *instance; // This instance + char instance_name[CONFIG_MAX_NAME + 1]; + char connector_name[CONFIG_MAX_NAME + 1]; + struct _connector_instance *next; // Next instance +} _CONNECTOR_INSTANCE; + +_CONNECTOR_INSTANCE *add_connector_instance(struct section *connector, struct section *instance); + +#endif /* NETDATA_CONFIG_H */ diff --git a/libnetdata/dictionary/Makefile.am b/libnetdata/dictionary/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/dictionary/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/dictionary/README.md b/libnetdata/dictionary/README.md new file mode 100644 index 0000000..6d7e553 --- /dev/null +++ b/libnetdata/dictionary/README.md @@ -0,0 +1,231 @@ + + +# Dictionaries + +Netdata dictionaries associate a `name` with a `value`: + +- A `name` can be any string. +- A `value` can be anything. + +Such a pair of a `name` and a `value` consists of an `item` or an `entry` in the dictionary. + +Dictionaries provide an interface to: + +- **Add** an item to the dictionary +- **Get** an item from the dictionary (provided its `name`) +- **Delete** an item from the dictionary (provided its `name`) +- **Traverse** the list of items in the dictionary + +Dictionaries are **ordered**, meaning that the order they have been added, is preserved while traversing them. The caller may reverse this order by passing the flag `DICT_OPTION_ADD_IN_FRONT` when creating the dictionary. + +Dictionaries guarantee **uniqueness** of all items added to them, meaning that only one item with a given `name` can exist in the dictionary at any given time. + +Dictionaries are extremely fast in all operations. They are indexing the keys with `JudyHS` and they utilize a double-linked-list for the traversal operations. Deletion is the most expensive operation, usually somewhat slower than insertion. + +## Memory management + +Dictionaries come with 2 memory management options: + +- **Clone** (copy) the `name` and/or the `value` to memory allocated by the dictionary. +- **Link** the `name` and/or the `value`, without allocating any memory about them. + +In **clone** mode, the dictionary guarantees that all operations on the dictionary items, will automatically take care of the memory used by the `name` and/or the `value`. In case the `value` is an object that needs to have user allocated memory, the following callback functions can be registered: + +1. `dictionary_register_insert_callback()` that can be called just after the insertion of an item to the dictionary, or after the replacement of the value of a dictionary item. +2. `dictionary_register_delete_callback()` that will be called just prior to the deletion of an item from the dictionary, or prior to the replacement of the value of a dictionary item. +3. `dictionary_register_conflict_callback()` that will be called when `DICT_OPTION_DONT_OVERWRITE_VALUE` is set, and another `value` is attempted to be inserted for the same key. +4. `dictionary_register_react_callback()` that will be called after the the `insert` and the `conflict` callbacks. The `conflict` callback is called while the dictionary hash table is available for other threads. + +In **link** mode, the `name` and/or the `value` are just linked to the dictionary item, and it is the user's responsibility to free the memory they use after an item is deleted from the dictionary or when the dictionary is destroyed. + +By default, **clone** mode is used for both the name and the value. + +To use **link** mode for names, add `DICT_OPTION_NAME_LINK_DONT_CLONE` to the flags when creating the dictionary. + +To use **link** mode for values, add `DICT_OPTION_VALUE_LINK_DONT_CLONE` to the flags when creating the dictionary. + +## Locks + +The dictionary allows both **single-threaded** operation (no locks - faster) and **multi-threaded** operation utilizing a read-write lock. + +The default is **multi-threaded**. To enable **single-threaded** add `DICT_OPTION_SINGLE_THREADED` to the flags when creating the dictionary. + +When in **multi-threaded** mode, the dictionaries have 2 independent R/W locks. One for the linked list and one for the hash table (index). An insertion and a deletion will acquire both independently (one after another) for as long as they are needed, but a traversal may hold the the linked list for longer durations. The hash table (index) lock may be acquired while the linked list is acquired, but not the other way around (and the way the code is structured, it is not technically possible to hold and index lock and then lock the linked list one). + +These locks are R/W locks. They allow multiple readers, but only one writer. + +Unlike POSIX standards, the linked-list lock, allows one writer to lock it multiple times. This has been implemented in such a way, so that a traversal to the items of the dictionary in write-lock mode, allows the writing thread to call `dictionary_set()` or `dictionary_del()`, which alter the dictionary index and the linked list. Especially for the deletion of the currently working item, the dictionary support delayed removal, so it will remove it from the index immediately and mark it as deleted, so that it can be added to the dictionary again with a different value and the traversal will still proceed from the point it was. + +## Hash table operations + +The dictionary supports the following operations supported by the hash table: + +- `dictionary_set()` to add an item to the dictionary, or change its value. +- `dictionary_get()` and `dictionary_get_and_acquire_item()` to get an item from the dictionary. +- `dictionary_del()` to delete an item from the dictionary. + +For all the calls, there are also `*_advanced()` versions of them, that support more parameters. Check the header file for more information about them. + +## Creation and destruction + +Use `dictionary_create()` to create a dictionary. + +Use `dictionary_destroy()` to destroy a dictionary. When destroyed, a dictionary frees all the memory it has allocated on its own. This can be complemented by the registration of a deletion callback function that can be called upon deletion of each item in the dictionary, which may free additional resources linked to it. + +### dictionary_set() + +This call is used to: + +- **add** an item to the dictionary. +- **reset** the value of an existing item in the dictionary. + +If **resetting** is not desired, add `DICT_OPTION_DONT_OVERWRITE_VALUE` to the flags when creating the dictionary. In this case, `dictionary_set()` will return the value of the original item found in the dictionary instead of resetting it and the value passed to the call will be ignored. Optionally a conflict callback function can be registered, to manipulate (probably merge or extend) the original value, based on the new value attempted to be added to the dictionary. + +The format is: + +```c +value = dictionary_set(dict, name, value, value_len); +``` + +Where: + +* `dict` is a pointer to the dictionary previously created. +* `name` is a pointer to a string to be used as the key of this item. The name must not be `NULL` and must not be an empty string `""`. +* `value` is a pointer to the value associated with this item. In **clone** mode, if `value` is `NULL`, a new memory allocation will be made of `value_len` size and will be initialized to zero. +* `value_len` is the size of the `value` data in bytes. If `value_len` is zero, no allocation will be done and the dictionary item will permanently have the `NULL` value. + +### dictionary_get() + +This call is used to get the `value` of an item, given its `name`. It utilizes the hash table (index) for making the lookup. + +For **multi-threaded** operation, the `dictionary_get()` call gets a shared read lock on the index lock (multiple readers are allowed). The linked-list lock is not used. + +In clone mode, the value returned is not guaranteed to be valid, as any other thread may delete the item from the dictionary at any time. To ensure the value will be available, use `dictionary_get_and_acquire_item()`, which uses a reference counter to defer deletes until the item is released with `dictionary_acquired_item_release()`. + +The format is: + +```c +value = dictionary_get(dict, name); +``` + +Where: + +* `dict` is a pointer to the dictionary previously created. +* `name` is a pointer to a string to be used as the key of this item. The name must not be `NULL` and must not be an empty string `""`. + +### dictionary_del() + +This call is used to delete an item from the dictionary, given its name. + +If there is a deletion callback registered to the dictionary (`dictionary_register_delete_callback()`), it is called prior to the actual deletion of the item. + +The format is: + +```c +value = dictionary_del(dict, name); +``` + +Where: + +* `dict` is a pointer to the dictionary previously created. +* `name` is a pointer to a string to be used as the key of this item. The name must not be `NULL` and must not be an empty string `""`. + +### dictionary_get_and_acquire_item() + +This call can be used to search and acquire a dictionary item, while ensuring that it will be available for use, until `dictionary_acquired_item_release()` is called. + +This call **does not return the value** of the dictionary item. It returns an internal pointer to a structure that maintains the reference counter used to protect the actual value. To get the value of the item (the same value as returned by `dictionary_get()`), the function `dictionary_acquired_item_value()` has to be called. + +Example: + +```c +// create the dictionary +DICTIONARY *dict = dictionary_create(DICT_OPTION_NONE); + +// add an item to it +dictionary_set(dict, "name", "value", 6); + +// find the item we added and acquire it +const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dict, "name"); + +// extract its value +char *value = (char *)dictionary_acquired_item_value(dict, item); + +// now value points to the string "value" +printf("I got value = '%s'\n", value); + +// release the item, so that it can deleted +dictionary_acquired_item_release(dict, item); + +// destroy the dictionary +dictionary_destroy(dict); +``` + +When items are acquired, a reference counter is maintained to keep track of how many users exist for it. If an item with a non-zero number of users is deleted, it is removed from the index, it can be added again to the index (without conflict), and although it exists in the linked-list, it is not offered during traversal. Garbage collection to actually delete the item happens every time another item is added or removed from the linked-list and items are deleted only if no users are using them. + +If any item is still acquired when the dictionary is destroyed, the destruction of the dictionary is also deferred until all the acquired items are released. When the dictionary is destroyed like that, all operations on the dictionary fail (traversals do not traverse, insertions do not insert, deletions do not delete, searches do not find any items, etc). Once the last item in the dictionary is released, the dictionary is automatically destroyed too. + +## Traversal + +Dictionaries offer 3 ways to traverse the entire dictionary: + +- **walkthrough**, implemented by setting a callback function to be called for every item. +- **sorted walkthrough**, which first sorts the dictionary and then call a callback function for every item. +- **foreach**, a way to traverse the dictionary with a for-next loop. + +All these methods are available in **read**, **write**, or **reentrant** mode. In **read** mode only lookups are allowed to the dictionary. In **write** lookups but also insertions and deletions are allowed, and in **reentrant** mode the dictionary is unlocked outside dictionary code. + +### walkthrough (callback) + +There are 4 calls: + +- `dictionary_walkthrough_read()` and `dictionary_sorted_walkthrough_read()` acquire a shared read lock on the linked-list, and they call a callback function for every item of the dictionary. +- `dictionary_walkthrough_write()` and `dictionary_sorted_walkthrough_write()` acquire a write lock on the linked-list, and they call a callback function for every item of the dictionary. This is to be used when items need to be added to or removed from the dictionary. The `write` versions can be used to delete any or all the items from the dictionary, including the currently working one. For the `sorted` version, all items in the dictionary maintain a reference counter, so all deletions are deferred until the sorted walkthrough finishes. + +The non sorted versions traverse the items in the same order they have been added to the dictionary (or the reverse order if the flag `DICT_OPTION_ADD_IN_FRONT` is set during dictionary creation). The sorted versions sort alphabetically the items based on their name, and then they traverse them in the sorted order. + +The callback function returns an `int`. If this value is negative, traversal of the dictionary is stopped immediately and the negative value is returned to the caller. If the returned value of all callback calls is zero or positive, the walkthrough functions return the sum of the return values of all callbacks. So, if you are just interested to know how many items fall into some condition, write a callback function that returns 1 when the item satisfies that condition and 0 when it does not and the walkthrough function will return how many tested positive. + +### foreach (for-next loop) + +The following is a snippet of such a loop: + +```c +MY_STRUCTURE *x; +dfe_start_read(dict, x) { + printf("hey, I got an item named '%s' with value ptr %08X", x_dfe.name, x); +} +dfe_done(x); +``` + +The `x` parameter gives the name of the pointer to be used while iterating the items. Any name is accepted. `x` points to the `value` of the item in the dictionary. + +The `x_dfe.name` is a variable that is automatically created, by concatenating whatever is given as `x` and `_dfe`. It is an object and it has a few members, including `x_dfe.counter` that counts the iterations made so far, `x_dfe.item` that provides the acquired item from the dictionary and which can be used to pass it over for further processing, etc. Check the header file for more info. So, if you call `dfe_start_read(dict, myvar)`, the name will be `myvar_dfe`. + +Both `dfe_start_read(dict, item)` and `dfe_done(item)` are together inside a `do { ... } while(0)` loop, so that the following will work: + +```c +MY_ITEM *item; + +if(a = 1) + // do { + dfe_start_read(dict, x) + printf("hey, I got an item named '%s' with value ptr %08X", x_dfe.name, x); + dfe_done(x); + // } while(0); +else + something else; +``` + +In the above, the `if(a == 1)` condition will work as expected. It will do the foreach loop when a is 1, otherwise it will run `something else`. + +There are 2 versions of `dfe_start`: + +- `dfe_start_read()` that acquires a shared read linked-list lock to the dictionary. +- `dfe_start_write()` that acquires an exclusive write linked-list lock to the dictionary. + +While in the loop, depending on the read or write versions of `dfe_start`, the caller may lookup or manipulate the dictionary. The rules are the same with the unsorted walkthrough callback functions. + +PS: DFE is Dictionary For Each. diff --git a/libnetdata/dictionary/dictionary.c b/libnetdata/dictionary/dictionary.c new file mode 100644 index 0000000..0277e06 --- /dev/null +++ b/libnetdata/dictionary/dictionary.c @@ -0,0 +1,3620 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#define DICTIONARY_INTERNALS + +#include "../libnetdata.h" + +// runtime flags of the dictionary - must be checked with atomics +typedef enum __attribute__ ((__packed__)) { + DICT_FLAG_NONE = 0, + DICT_FLAG_DESTROYED = (1 << 0), // this dictionary has been destroyed +} DICT_FLAGS; + +#define dict_flag_check(dict, flag) (__atomic_load_n(&((dict)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define dict_flag_set(dict, flag) __atomic_or_fetch(&((dict)->flags), flag, __ATOMIC_SEQ_CST) +#define dict_flag_clear(dict, flag) __atomic_and_fetch(&((dict)->flags), ~(flag), __ATOMIC_SEQ_CST) + +// flags macros +#define is_dictionary_destroyed(dict) dict_flag_check(dict, DICT_FLAG_DESTROYED) + +// configuration options macros +#define is_dictionary_single_threaded(dict) ((dict)->options & DICT_OPTION_SINGLE_THREADED) +#define is_view_dictionary(dict) ((dict)->master) +#define is_master_dictionary(dict) (!is_view_dictionary(dict)) + +typedef enum __attribute__ ((__packed__)) item_options { + ITEM_OPTION_NONE = 0, + ITEM_OPTION_ALLOCATED_NAME = (1 << 0), // the name pointer is a STRING + + // IMPORTANT: This is 1-bit - to add more change ITEM_OPTIONS_BITS +} ITEM_OPTIONS; + +typedef enum __attribute__ ((__packed__)) item_flags { + ITEM_FLAG_NONE = 0, + ITEM_FLAG_DELETED = (1 << 0), // this item is marked deleted, so it is not available for traversal (deleted from the index too) + ITEM_FLAG_BEING_CREATED = (1 << 1), // this item is currently being created - this flag is removed when construction finishes + + // IMPORTANT: This is 8-bit +} ITEM_FLAGS; + +#define item_flag_check(item, flag) (__atomic_load_n(&((item)->flags), __ATOMIC_SEQ_CST) & (flag)) +#define item_flag_set(item, flag) __atomic_or_fetch(&((item)->flags), flag, __ATOMIC_SEQ_CST) +#define item_flag_clear(item, flag) __atomic_and_fetch(&((item)->flags), ~(flag), __ATOMIC_SEQ_CST) + +#define item_shared_flag_check(item, flag) (__atomic_load_n(&((item)->shared->flags), __ATOMIC_SEQ_CST) & (flag)) +#define item_shared_flag_set(item, flag) __atomic_or_fetch(&((item)->shared->flags), flag, __ATOMIC_SEQ_CST) +#define item_shared_flag_clear(item, flag) __atomic_and_fetch(&((item)->shared->flags), ~(flag), __ATOMIC_SEQ_CST) + +#define REFCOUNT_DELETING (-100) + +#define ITEM_FLAGS_TYPE uint8_t +#define KEY_LEN_TYPE uint32_t +#define VALUE_LEN_TYPE uint32_t + +#define ITEM_OPTIONS_BITS 1 +#define KEY_LEN_BITS ((sizeof(KEY_LEN_TYPE) * 8) - (sizeof(ITEM_FLAGS_TYPE) * 8) - ITEM_OPTIONS_BITS) +#define KEY_LEN_MAX ((1 << KEY_LEN_BITS) - 1) + +#define VALUE_LEN_BITS ((sizeof(VALUE_LEN_TYPE) * 8) - (sizeof(ITEM_FLAGS_TYPE) * 8)) +#define VALUE_LEN_MAX ((1 << VALUE_LEN_BITS) - 1) + + +/* + * Every item in the dictionary has the following structure. + */ + +typedef int32_t REFCOUNT; + +typedef struct dictionary_item_shared { + void *value; // the value of the dictionary item + + // the order of the following items is important! + // The total of their storage should be 64-bits + + REFCOUNT links; // how many links this item has + VALUE_LEN_TYPE value_len:VALUE_LEN_BITS; // the size of the value + ITEM_FLAGS_TYPE flags; // shared flags +} DICTIONARY_ITEM_SHARED; + +struct dictionary_item { +#ifdef NETDATA_INTERNAL_CHECKS + DICTIONARY *dict; + pid_t creator_pid; + pid_t deleter_pid; + pid_t ll_adder_pid; + pid_t ll_remover_pid; +#endif + + DICTIONARY_ITEM_SHARED *shared; + + struct dictionary_item *next; // a double linked list to allow fast insertions and deletions + struct dictionary_item *prev; + + union { + STRING *string_name; // the name of the dictionary item + char *caller_name; // the user supplied string pointer +// void *key_ptr; // binary key pointer + }; + + // the order of the following items is important! + // The total of their storage should be 64-bits + + REFCOUNT refcount; // the private reference counter + + KEY_LEN_TYPE key_len:KEY_LEN_BITS; // the size of key indexed (for strings, including the null terminator) + // this is (2^23 - 1) = 8.388.607 bytes max key length. + + ITEM_OPTIONS options:ITEM_OPTIONS_BITS; // permanent configuration options + // (no atomic operations on this - they never change) + + ITEM_FLAGS_TYPE flags; // runtime changing flags for this item (atomic operations on this) + // cannot be a bit field because of atomics. +}; + +struct dictionary_hooks { + REFCOUNT links; + usec_t last_master_deletion_us; + + void (*ins_callback)(const DICTIONARY_ITEM *item, void *value, void *data); + void *ins_callback_data; + + bool (*conflict_callback)(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data); + void *conflict_callback_data; + + void (*react_callback)(const DICTIONARY_ITEM *item, void *value, void *data); + void *react_callback_data; + + void (*del_callback)(const DICTIONARY_ITEM *item, void *value, void *data); + void *del_callback_data; +}; + +struct dictionary_stats dictionary_stats_category_other = { + .name = "other", +}; + +struct dictionary { +#ifdef NETDATA_INTERNAL_CHECKS + const char *creation_function; + const char *creation_file; + size_t creation_line; +#endif + + usec_t last_gc_run_us; + DICT_OPTIONS options; // the configuration flags of the dictionary (they never change - no atomics) + DICT_FLAGS flags; // run time flags for the dictionary (they change all the time - atomics needed) + + struct { // support for multiple indexing engines + Pvoid_t JudyHSArray; // the hash table + netdata_rwlock_t rwlock; // protect the index + } index; + + struct { + DICTIONARY_ITEM *list; // the double linked list of all items in the dictionary + netdata_rwlock_t rwlock; // protect the linked-list + pid_t writer_pid; // the gettid() of the writer + size_t writer_depth; // nesting of write locks + } items; + + struct dictionary_hooks *hooks; // pointer to external function callbacks to be called at certain points + struct dictionary_stats *stats; // statistics data, when DICT_OPTION_STATS is set + + DICTIONARY *master; // the master dictionary + DICTIONARY *next; // linked list for delayed destruction (garbage collection of whole dictionaries) + + size_t version; // the current version of the dictionary + // it is incremented when: + // - item added + // - item removed + // - item value reset + // - conflict callback returns true + // - function dictionary_version_increment() is called + + long int entries; // how many items are currently in the index (the linked list may have more) + long int referenced_items; // how many items of the dictionary are currently being used by 3rd parties + long int pending_deletion_items; // how many items of the dictionary have been deleted, but have not been removed yet + +#ifdef NETDATA_INTERNAL_CHECKS + netdata_mutex_t global_pointer_registry_mutex; + Pvoid_t global_pointer_registry; +#endif +}; + +// forward definitions of functions used in reverse order in the code +static void garbage_collect_pending_deletes(DICTIONARY *dict); +static inline void item_linked_list_remove(DICTIONARY *dict, DICTIONARY_ITEM *item); +static size_t dict_item_free_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM *item); +static inline const char *item_get_name(const DICTIONARY_ITEM *item); +static inline int hashtable_delete_unsafe(DICTIONARY *dict, const char *name, size_t name_len, void *item); +static void item_release(DICTIONARY *dict, DICTIONARY_ITEM *item); +static bool dict_item_set_deleted(DICTIONARY *dict, DICTIONARY_ITEM *item); + +#define RC_ITEM_OK ( 0) +#define RC_ITEM_MARKED_FOR_DELETION (-1) // the item is marked for deletion +#define RC_ITEM_IS_CURRENTLY_BEING_DELETED (-2) // the item is currently being deleted +#define RC_ITEM_IS_CURRENTLY_BEING_CREATED (-3) // the item is currently being deleted +#define RC_ITEM_IS_REFERENCED (-4) // the item is currently referenced +#define item_check_and_acquire(dict, item) (item_check_and_acquire_advanced(dict, item, false) == RC_ITEM_OK) +static int item_check_and_acquire_advanced(DICTIONARY *dict, DICTIONARY_ITEM *item, bool having_index_lock); +#define item_is_not_referenced_and_can_be_removed(dict, item) (item_is_not_referenced_and_can_be_removed_advanced(dict, item) == RC_ITEM_OK) +static inline int item_is_not_referenced_and_can_be_removed_advanced(DICTIONARY *dict, DICTIONARY_ITEM *item); + +// ---------------------------------------------------------------------------- +// validate each pointer is indexed once - internal checks only + +static inline void pointer_index_init(DICTIONARY *dict __maybe_unused) { +#ifdef NETDATA_INTERNAL_CHECKS + netdata_mutex_init(&dict->global_pointer_registry_mutex); +#else + ; +#endif +} + +static inline void pointer_destroy_index(DICTIONARY *dict __maybe_unused) { +#ifdef NETDATA_INTERNAL_CHECKS + netdata_mutex_lock(&dict->global_pointer_registry_mutex); + JudyHSFreeArray(&dict->global_pointer_registry, PJE0); + netdata_mutex_unlock(&dict->global_pointer_registry_mutex); +#else + ; +#endif +} +static inline void pointer_add(DICTIONARY *dict __maybe_unused, DICTIONARY_ITEM *item __maybe_unused) { +#ifdef NETDATA_INTERNAL_CHECKS + netdata_mutex_lock(&dict->global_pointer_registry_mutex); + Pvoid_t *PValue = JudyHSIns(&dict->global_pointer_registry, &item, sizeof(void *), PJE0); + if(*PValue != NULL) + fatal("pointer already exists in registry"); + *PValue = item; + netdata_mutex_unlock(&dict->global_pointer_registry_mutex); +#else + ; +#endif +} + +static inline void pointer_check(DICTIONARY *dict __maybe_unused, DICTIONARY_ITEM *item __maybe_unused) { +#ifdef NETDATA_INTERNAL_CHECKS + netdata_mutex_lock(&dict->global_pointer_registry_mutex); + Pvoid_t *PValue = JudyHSGet(dict->global_pointer_registry, &item, sizeof(void *)); + if(PValue == NULL) + fatal("pointer is not found in registry"); + netdata_mutex_unlock(&dict->global_pointer_registry_mutex); +#else + ; +#endif +} + +static inline void pointer_del(DICTIONARY *dict __maybe_unused, DICTIONARY_ITEM *item __maybe_unused) { +#ifdef NETDATA_INTERNAL_CHECKS + netdata_mutex_lock(&dict->global_pointer_registry_mutex); + int ret = JudyHSDel(&dict->global_pointer_registry, &item, sizeof(void *), PJE0); + if(!ret) + fatal("pointer to be deleted does not exist in registry"); + netdata_mutex_unlock(&dict->global_pointer_registry_mutex); +#else + ; +#endif +} + +// ---------------------------------------------------------------------------- +// memory statistics + +static inline void DICTIONARY_STATS_PLUS_MEMORY(DICTIONARY *dict, size_t key_size, size_t item_size, size_t value_size) { + if(key_size) + __atomic_fetch_add(&dict->stats->memory.indexed, (long)key_size, __ATOMIC_RELAXED); + + if(item_size) + __atomic_fetch_add(&dict->stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); + + if(value_size) + __atomic_fetch_add(&dict->stats->memory.values, (long)value_size, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_MINUS_MEMORY(DICTIONARY *dict, size_t key_size, size_t item_size, size_t value_size) { + if(key_size) + __atomic_fetch_sub(&dict->stats->memory.indexed, (long)key_size, __ATOMIC_RELAXED); + + if(item_size) + __atomic_fetch_sub(&dict->stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); + + if(value_size) + __atomic_fetch_sub(&dict->stats->memory.values, (long)value_size, __ATOMIC_RELAXED); +} + +// ---------------------------------------------------------------------------- +// callbacks registration + +static inline void dictionary_hooks_allocate(DICTIONARY *dict) { + if(dict->hooks) return; + + dict->hooks = callocz(1, sizeof(struct dictionary_hooks)); + dict->hooks->links = 1; + + DICTIONARY_STATS_PLUS_MEMORY(dict, 0, sizeof(struct dictionary_hooks), 0); +} + +static inline size_t dictionary_hooks_free(DICTIONARY *dict) { + if(!dict->hooks) return 0; + + REFCOUNT links = __atomic_sub_fetch(&dict->hooks->links, 1, __ATOMIC_SEQ_CST); + if(links == 0) { + freez(dict->hooks); + dict->hooks = NULL; + + DICTIONARY_STATS_MINUS_MEMORY(dict, 0, sizeof(struct dictionary_hooks), 0); + return sizeof(struct dictionary_hooks); + } + + return 0; +} + +void dictionary_register_insert_callback(DICTIONARY *dict, void (*ins_callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + dictionary_hooks_allocate(dict); + dict->hooks->ins_callback = ins_callback; + dict->hooks->ins_callback_data = data; +} + +void dictionary_register_conflict_callback(DICTIONARY *dict, bool (*conflict_callback)(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data), void *data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + internal_error(!(dict->options & DICT_OPTION_DONT_OVERWRITE_VALUE), "DICTIONARY: registering conflict callback without DICT_OPTION_DONT_OVERWRITE_VALUE"); + dict->options |= DICT_OPTION_DONT_OVERWRITE_VALUE; + + dictionary_hooks_allocate(dict); + dict->hooks->conflict_callback = conflict_callback; + dict->hooks->conflict_callback_data = data; +} + +void dictionary_register_react_callback(DICTIONARY *dict, void (*react_callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + dictionary_hooks_allocate(dict); + dict->hooks->react_callback = react_callback; + dict->hooks->react_callback_data = data; +} + +void dictionary_register_delete_callback(DICTIONARY *dict, void (*del_callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + dictionary_hooks_allocate(dict); + dict->hooks->del_callback = del_callback; + dict->hooks->del_callback_data = data; +} + +// ---------------------------------------------------------------------------- +// dictionary statistics API + +size_t dictionary_version(DICTIONARY *dict) { + if(unlikely(!dict)) return 0; + + // this is required for views to return the right number + garbage_collect_pending_deletes(dict); + + return __atomic_load_n(&dict->version, __ATOMIC_SEQ_CST); +} +size_t dictionary_entries(DICTIONARY *dict) { + if(unlikely(!dict)) return 0; + + // this is required for views to return the right number + garbage_collect_pending_deletes(dict); + + long int entries = __atomic_load_n(&dict->entries, __ATOMIC_SEQ_CST); + if(entries < 0) + fatal("DICTIONARY: entries is negative: %ld", entries); + + return entries; +} +size_t dictionary_referenced_items(DICTIONARY *dict) { + if(unlikely(!dict)) return 0; + + long int referenced_items = __atomic_load_n(&dict->referenced_items, __ATOMIC_SEQ_CST); + if(referenced_items < 0) + fatal("DICTIONARY: referenced items is negative: %ld", referenced_items); + + return referenced_items; +} + +long int dictionary_stats_for_registry(DICTIONARY *dict) { + if(unlikely(!dict)) return 0; + return (dict->stats->memory.indexed + dict->stats->memory.dict); +} +void dictionary_version_increment(DICTIONARY *dict) { + __atomic_fetch_add(&dict->version, 1, __ATOMIC_SEQ_CST); +} + +// ---------------------------------------------------------------------------- +// internal statistics API + +static inline void DICTIONARY_STATS_SEARCHES_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.searches, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_ENTRIES_PLUS1(DICTIONARY *dict) { + // statistics + __atomic_fetch_add(&dict->stats->items.entries, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->stats->items.referenced, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->stats->ops.inserts, 1, __ATOMIC_RELAXED); + + if(unlikely(is_dictionary_single_threaded(dict))) { + dict->version++; + dict->entries++; + dict->referenced_items++; + + } + else { + __atomic_fetch_add(&dict->version, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&dict->entries, 1, __ATOMIC_SEQ_CST); + __atomic_fetch_add(&dict->referenced_items, 1, __ATOMIC_SEQ_CST); + } +} +static inline void DICTIONARY_ENTRIES_MINUS1(DICTIONARY *dict) { + // statistics + __atomic_fetch_add(&dict->stats->ops.deletes, 1, __ATOMIC_RELAXED); + __atomic_fetch_sub(&dict->stats->items.entries, 1, __ATOMIC_RELAXED); + + size_t entries; (void)entries; + if(unlikely(is_dictionary_single_threaded(dict))) { + dict->version++; + entries = dict->entries++; + } + else { + __atomic_fetch_add(&dict->version, 1, __ATOMIC_SEQ_CST); + entries = __atomic_fetch_sub(&dict->entries, 1, __ATOMIC_SEQ_CST); + } + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(entries == 0)) + fatal("DICT: negative number of entries in dictionary created from %s() (%zu@%s)", + dict->creation_function, + dict->creation_line, + dict->creation_file); +#endif +} +static inline void DICTIONARY_VALUE_RESETS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.resets, 1, __ATOMIC_RELAXED); + + if(unlikely(is_dictionary_single_threaded(dict))) + dict->version++; + else + __atomic_fetch_add(&dict->version, 1, __ATOMIC_SEQ_CST); +} +static inline void DICTIONARY_STATS_TRAVERSALS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.traversals, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_WALKTHROUGHS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.walkthroughs, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_CHECK_SPINS_PLUS(DICTIONARY *dict, size_t count) { + __atomic_fetch_add(&dict->stats->spin_locks.use_spins, count, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_INSERT_SPINS_PLUS(DICTIONARY *dict, size_t count) { + __atomic_fetch_add(&dict->stats->spin_locks.insert_spins, count, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DELETE_SPINS_PLUS(DICTIONARY *dict, size_t count) { + __atomic_fetch_add(&dict->stats->spin_locks.delete_spins, count, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_SEARCH_IGNORES_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->spin_locks.search_spins, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_CALLBACK_INSERTS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->callbacks.inserts, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_CALLBACK_CONFLICTS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->callbacks.conflicts, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_CALLBACK_REACTS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->callbacks.reacts, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_CALLBACK_DELETES_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->callbacks.deletes, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_GARBAGE_COLLECTIONS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.garbage_collections, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_CREATIONS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->dictionaries.active, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->stats->ops.creations, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_DESTRUCTIONS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_sub(&dict->stats->dictionaries.active, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->stats->ops.destructions, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_DESTROY_QUEUED_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->dictionaries.deleted, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_DESTROY_QUEUED_MINUS1(DICTIONARY *dict) { + __atomic_fetch_sub(&dict->stats->dictionaries.deleted, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_FLUSHES_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.flushes, 1, __ATOMIC_RELAXED); +} + +static inline long int DICTIONARY_REFERENCED_ITEMS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->items.referenced, 1, __ATOMIC_RELAXED); + + if(unlikely(is_dictionary_single_threaded(dict))) + return ++dict->referenced_items; + else + return __atomic_add_fetch(&dict->referenced_items, 1, __ATOMIC_SEQ_CST); +} + +static inline long int DICTIONARY_REFERENCED_ITEMS_MINUS1(DICTIONARY *dict) { + __atomic_fetch_sub(&dict->stats->items.referenced, 1, __ATOMIC_RELAXED); + + long int referenced_items; + if(unlikely(is_dictionary_single_threaded(dict))) + referenced_items = --dict->referenced_items; + else + referenced_items = __atomic_sub_fetch(&dict->referenced_items, 1, __ATOMIC_SEQ_CST); + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(referenced_items < 0)) + fatal("DICT: negative number of referenced items (%ld) in dictionary created from %s() (%zu@%s)", + referenced_items, + dict->creation_function, + dict->creation_line, + dict->creation_file); +#endif + + return referenced_items; +} + +static inline long int DICTIONARY_PENDING_DELETES_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->items.pending_deletion, 1, __ATOMIC_RELAXED); + + if(unlikely(is_dictionary_single_threaded(dict))) + return ++dict->pending_deletion_items; + else + return __atomic_add_fetch(&dict->pending_deletion_items, 1, __ATOMIC_SEQ_CST); +} + +static inline long int DICTIONARY_PENDING_DELETES_MINUS1(DICTIONARY *dict) { + __atomic_fetch_sub(&dict->stats->items.pending_deletion, 1, __ATOMIC_RELAXED); + + if(unlikely(is_dictionary_single_threaded(dict))) + return --dict->pending_deletion_items; + else + return __atomic_sub_fetch(&dict->pending_deletion_items, 1, __ATOMIC_SEQ_CST); +} + +static inline long int DICTIONARY_PENDING_DELETES_GET(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return dict->pending_deletion_items; + else + return __atomic_load_n(&dict->pending_deletion_items, __ATOMIC_SEQ_CST); +} + +static inline REFCOUNT DICTIONARY_ITEM_REFCOUNT_GET(DICTIONARY *dict, DICTIONARY_ITEM *item) { + if(unlikely(dict && is_dictionary_single_threaded(dict))) // this is an exception, dict can be null + return item->refcount; + else + return (REFCOUNT)__atomic_load_n(&item->refcount, __ATOMIC_SEQ_CST); +} + +static inline REFCOUNT DICTIONARY_ITEM_REFCOUNT_GET_SOLE(DICTIONARY_ITEM *item) { + return (REFCOUNT)__atomic_load_n(&item->refcount, __ATOMIC_SEQ_CST); +} + +// ---------------------------------------------------------------------------- +// callbacks execution + +static void dictionary_execute_insert_callback(DICTIONARY *dict, DICTIONARY_ITEM *item, void *constructor_data) { + if(likely(!dict->hooks || !dict->hooks->ins_callback)) + return; + + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + internal_error(false, + "DICTIONARY: Running insert callback on item '%s' of dictionary created from %s() %zu@%s.", + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + DICTIONARY_STATS_CALLBACK_INSERTS_PLUS1(dict); + dict->hooks->ins_callback(item, item->shared->value, constructor_data?constructor_data:dict->hooks->ins_callback_data); +} + +static bool dictionary_execute_conflict_callback(DICTIONARY *dict, DICTIONARY_ITEM *item, void *new_value, void *constructor_data) { + if(likely(!dict->hooks || !dict->hooks->conflict_callback)) + return false; + + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + internal_error(false, + "DICTIONARY: Running conflict callback on item '%s' of dictionary created from %s() %zu@%s.", + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + DICTIONARY_STATS_CALLBACK_CONFLICTS_PLUS1(dict); + return dict->hooks->conflict_callback( + item, item->shared->value, new_value, + constructor_data ? constructor_data : dict->hooks->conflict_callback_data); +} + +static void dictionary_execute_react_callback(DICTIONARY *dict, DICTIONARY_ITEM *item, void *constructor_data) { + if(likely(!dict->hooks || !dict->hooks->react_callback)) + return; + + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + internal_error(false, + "DICTIONARY: Running react callback on item '%s' of dictionary created from %s() %zu@%s.", + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + DICTIONARY_STATS_CALLBACK_REACTS_PLUS1(dict); + dict->hooks->react_callback(item, item->shared->value, + constructor_data?constructor_data:dict->hooks->react_callback_data); +} + +static void dictionary_execute_delete_callback(DICTIONARY *dict, DICTIONARY_ITEM *item) { + if(likely(!dict->hooks || !dict->hooks->del_callback)) + return; + + // We may execute delete callback on items deleted from a view, + // because we may have references to it, after the master is gone + // so, the shared structure will remain until the last reference is released. + + internal_error(false, + "DICTIONARY: Running delete callback on item '%s' of dictionary created from %s() %zu@%s.", + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + DICTIONARY_STATS_CALLBACK_DELETES_PLUS1(dict); + dict->hooks->del_callback(item, item->shared->value, dict->hooks->del_callback_data); +} + +// ---------------------------------------------------------------------------- +// dictionary locks + +static inline size_t dictionary_locks_init(DICTIONARY *dict) { + if(likely(!is_dictionary_single_threaded(dict))) { + netdata_rwlock_init(&dict->index.rwlock); + netdata_rwlock_init(&dict->items.rwlock); + return 0; + } + return 0; +} + +static inline size_t dictionary_locks_destroy(DICTIONARY *dict) { + if(likely(!is_dictionary_single_threaded(dict))) { + netdata_rwlock_destroy(&dict->index.rwlock); + netdata_rwlock_destroy(&dict->items.rwlock); + return 0; + } + return 0; +} + +static inline void ll_recursive_lock_set_thread_as_writer(DICTIONARY *dict) { + pid_t expected = 0, desired = gettid(); + if(!__atomic_compare_exchange_n(&dict->items.writer_pid, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + fatal("DICTIONARY: Cannot set thread %d as exclusive writer, expected %d, desired %d, found %d.", gettid(), expected, desired, __atomic_load_n(&dict->items.writer_pid, __ATOMIC_SEQ_CST)); +} + +static inline void ll_recursive_unlock_unset_thread_writer(DICTIONARY *dict) { + pid_t expected = gettid(), desired = 0; + if(!__atomic_compare_exchange_n(&dict->items.writer_pid, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) + fatal("DICTIONARY: Cannot unset thread %d as exclusive writer, expected %d, desired %d, found %d.", gettid(), expected, desired, __atomic_load_n(&dict->items.writer_pid, __ATOMIC_SEQ_CST)); +} + +static inline bool ll_recursive_lock_is_thread_the_writer(DICTIONARY *dict) { + pid_t tid = gettid(); + return tid > 0 && tid == __atomic_load_n(&dict->items.writer_pid, __ATOMIC_SEQ_CST); +} + +static void ll_recursive_lock(DICTIONARY *dict, char rw) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + if(ll_recursive_lock_is_thread_the_writer(dict)) { + dict->items.writer_depth++; + return; + } + + if(rw == DICTIONARY_LOCK_READ || rw == DICTIONARY_LOCK_REENTRANT || rw == 'R') { + // read lock + netdata_rwlock_rdlock(&dict->items.rwlock); + } + else { + // write lock + netdata_rwlock_wrlock(&dict->items.rwlock); + ll_recursive_lock_set_thread_as_writer(dict); + } +} + +static void ll_recursive_unlock(DICTIONARY *dict, char rw) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + if(ll_recursive_lock_is_thread_the_writer(dict) && dict->items.writer_depth > 0) { + dict->items.writer_depth--; + return; + } + + if(rw == DICTIONARY_LOCK_READ || rw == DICTIONARY_LOCK_REENTRANT || rw == 'R') { + // read unlock + + netdata_rwlock_unlock(&dict->items.rwlock); + } + else { + // write unlock + + ll_recursive_unlock_unset_thread_writer(dict); + + netdata_rwlock_unlock(&dict->items.rwlock); + } +} + +void dictionary_write_lock(DICTIONARY *dict) { + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); +} +void dictionary_write_unlock(DICTIONARY *dict) { + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); +} + +static inline void dictionary_index_lock_rdlock(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + netdata_rwlock_rdlock(&dict->index.rwlock); +} + +static inline void dictionary_index_rdlock_unlock(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + netdata_rwlock_unlock(&dict->index.rwlock); +} + +static inline void dictionary_index_lock_wrlock(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + netdata_rwlock_wrlock(&dict->index.rwlock); +} +static inline void dictionary_index_wrlock_unlock(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + netdata_rwlock_unlock(&dict->index.rwlock); +} + +// ---------------------------------------------------------------------------- +// items garbage collector + +static void garbage_collect_pending_deletes(DICTIONARY *dict) { + usec_t last_master_deletion_us = dict->hooks?__atomic_load_n(&dict->hooks->last_master_deletion_us, __ATOMIC_SEQ_CST):0; + usec_t last_gc_run_us = __atomic_load_n(&dict->last_gc_run_us, __ATOMIC_SEQ_CST); + + bool is_view = is_view_dictionary(dict); + + if(likely(!( + DICTIONARY_PENDING_DELETES_GET(dict) > 0 || + (is_view && last_master_deletion_us > last_gc_run_us) + ))) + return; + + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + + __atomic_store_n(&dict->last_gc_run_us, now_realtime_usec(), __ATOMIC_SEQ_CST); + + if(is_view) + dictionary_index_lock_wrlock(dict); + + DICTIONARY_STATS_GARBAGE_COLLECTIONS_PLUS1(dict); + + size_t deleted = 0, pending = 0, examined = 0; + DICTIONARY_ITEM *item = dict->items.list, *item_next; + while(item) { + examined++; + + // this will clean up + item_next = item->next; + int rc = item_check_and_acquire_advanced(dict, item, is_view); + + if(rc == RC_ITEM_MARKED_FOR_DELETION) { + // we didn't get a reference + + if(item_is_not_referenced_and_can_be_removed(dict, item)) { + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(dict->items.list, item, prev, next); + dict_item_free_with_hooks(dict, item); + deleted++; + + pending = DICTIONARY_PENDING_DELETES_MINUS1(dict); + if (!pending) + break; + } + } + else if(rc == RC_ITEM_IS_CURRENTLY_BEING_DELETED) + ; // do not touch this item (we didn't get a reference) + + else if(rc == RC_ITEM_OK) + item_release(dict, item); + + item = item_next; + } + + if(is_view) + dictionary_index_wrlock_unlock(dict); + + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); + + (void)deleted; + (void)examined; + + internal_error(false, "DICTIONARY: garbage collected dictionary created by %s (%zu@%s), examined %zu items, deleted %zu items, still pending %zu items", + dict->creation_function, dict->creation_line, dict->creation_file, examined, deleted, pending); + +} + +// ---------------------------------------------------------------------------- +// reference counters + +static inline size_t reference_counter_init(DICTIONARY *dict) { + (void)dict; + + // allocate memory required for reference counters + // return number of bytes + return 0; +} + +static inline size_t reference_counter_free(DICTIONARY *dict) { + (void)dict; + + // free memory required for reference counters + // return number of bytes + return 0; +} + +static void item_acquire(DICTIONARY *dict, DICTIONARY_ITEM *item) { + REFCOUNT refcount; + + if(unlikely(is_dictionary_single_threaded(dict))) { + refcount = ++item->refcount; + } + else { + // increment the refcount + refcount = __atomic_add_fetch(&item->refcount, 1, __ATOMIC_SEQ_CST); + } + + if(refcount <= 0) { + internal_error( + true, + "DICTIONARY: attempted to acquire item which is deleted (refcount = %d): " + "'%s' on dictionary created by %s() (%zu@%s)", + refcount - 1, + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + fatal( + "DICTIONARY: request to acquire item '%s', which is deleted (refcount = %d)!", + item_get_name(item), + refcount - 1); + } + + if(refcount == 1) { + // referenced items counts number of unique items referenced + // so, we increase it only when refcount == 1 + DICTIONARY_REFERENCED_ITEMS_PLUS1(dict); + + // if this is a deleted item, but the counter increased to 1 + // we need to remove it from the pending items to delete + if(item_flag_check(item, ITEM_FLAG_DELETED)) + DICTIONARY_PENDING_DELETES_MINUS1(dict); + } +} + +static void item_release(DICTIONARY *dict, DICTIONARY_ITEM *item) { + // this function may be called without any lock on the dictionary + // or even when someone else has 'write' lock on the dictionary + + bool is_deleted; + REFCOUNT refcount; + + if(unlikely(is_dictionary_single_threaded(dict))) { + is_deleted = item->flags & ITEM_FLAG_DELETED; + refcount = --item->refcount; + } + else { + // get the flags before decrementing any reference counters + // (the other way around may lead to use-after-free) + is_deleted = item_flag_check(item, ITEM_FLAG_DELETED); + + // decrement the refcount + refcount = __atomic_sub_fetch(&item->refcount, 1, __ATOMIC_SEQ_CST); + } + + if(refcount < 0) { + internal_error( + true, + "DICTIONARY: attempted to release item without references (refcount = %d): " + "'%s' on dictionary created by %s() (%zu@%s)", + refcount + 1, + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + fatal( + "DICTIONARY: attempted to release item '%s' without references (refcount = %d)", + item_get_name(item), + refcount + 1); + } + + if(refcount == 0) { + + if(is_deleted) + DICTIONARY_PENDING_DELETES_PLUS1(dict); + + // referenced items counts number of unique items referenced + // so, we decrease it only when refcount == 0 + DICTIONARY_REFERENCED_ITEMS_MINUS1(dict); + } +} + +static int item_check_and_acquire_advanced(DICTIONARY *dict, DICTIONARY_ITEM *item, bool having_index_lock) { + size_t spins = 0; + REFCOUNT refcount, desired; + + int ret = RC_ITEM_OK; + + refcount = DICTIONARY_ITEM_REFCOUNT_GET(dict, item); + + do { + spins++; + + if(refcount < 0) { + // we can't use this item + ret = RC_ITEM_IS_CURRENTLY_BEING_DELETED; + break; + } + + if(item_flag_check(item, ITEM_FLAG_DELETED)) { + // we can't use this item + ret = RC_ITEM_MARKED_FOR_DELETION; + break; + } + + desired = refcount + 1; + + } while(!__atomic_compare_exchange_n(&item->refcount, &refcount, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); + + // if ret == ITEM_OK, we acquired the item + + if(ret == RC_ITEM_OK) { + if (is_view_dictionary(dict) && + item_shared_flag_check(item, ITEM_FLAG_DELETED) && + !item_flag_check(item, ITEM_FLAG_DELETED)) { + // but, we can't use this item + + if (having_index_lock) { + // delete it from the hashtable + if(hashtable_delete_unsafe(dict, item_get_name(item), item->key_len, item) == 0) + error("DICTIONARY: INTERNAL ERROR VIEW: tried to delete item with name '%s', name_len %u that is not in the index", item_get_name(item), (KEY_LEN_TYPE)(item->key_len - 1)); + else + pointer_del(dict, item); + + // mark it in our dictionary as deleted too, + // this is safe to be done here, because we have got + // a reference counter on item + dict_item_set_deleted(dict, item); + + // decrement the refcount we incremented above + if (__atomic_sub_fetch(&item->refcount, 1, __ATOMIC_SEQ_CST) == 0) { + // this is a deleted item, and we are the last one + DICTIONARY_PENDING_DELETES_PLUS1(dict); + } + + // do not touch the item below this point + } else { + // this is traversal / walkthrough + // decrement the refcount we incremented above + __atomic_sub_fetch(&item->refcount, 1, __ATOMIC_SEQ_CST); + } + + return RC_ITEM_MARKED_FOR_DELETION; + } + + if(desired == 1) + DICTIONARY_REFERENCED_ITEMS_PLUS1(dict); + } + + + if(unlikely(spins > 1 && dict->stats)) + DICTIONARY_STATS_CHECK_SPINS_PLUS(dict, spins - 1); + + return ret; +} + +// if a dictionary item can be deleted, return true, otherwise return false +// we use the private reference counter +static inline int item_is_not_referenced_and_can_be_removed_advanced(DICTIONARY *dict, DICTIONARY_ITEM *item) { + // if we can set refcount to REFCOUNT_DELETING, we can delete this item + + size_t spins = 0; + REFCOUNT refcount, desired = REFCOUNT_DELETING; + + int ret = RC_ITEM_OK; + + refcount = DICTIONARY_ITEM_REFCOUNT_GET(dict, item); + + do { + spins++; + + if(refcount < 0) { + // we can't use this item + ret = RC_ITEM_IS_CURRENTLY_BEING_DELETED; + break; + } + + if(refcount > 0) { + // we can't delete this + ret = RC_ITEM_IS_REFERENCED; + break; + } + + if(item_flag_check(item, ITEM_FLAG_BEING_CREATED)) { + // we can't use this item + ret = RC_ITEM_IS_CURRENTLY_BEING_CREATED; + break; + } + } while(!__atomic_compare_exchange_n(&item->refcount, &refcount, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); + +#ifdef NETDATA_INTERNAL_CHECKS + if(ret == RC_ITEM_OK) + item->deleter_pid = gettid(); +#endif + + if(unlikely(spins > 1 && dict->stats)) + DICTIONARY_STATS_DELETE_SPINS_PLUS(dict, spins - 1); + + return ret; +} + +// if a dictionary item can be freed, return true, otherwise return false +// we use the shared reference counter +static inline bool item_shared_release_and_check_if_it_can_be_freed(DICTIONARY *dict __maybe_unused, DICTIONARY_ITEM *item) { + // if we can set refcount to REFCOUNT_DELETING, we can delete this item + + REFCOUNT links = __atomic_sub_fetch(&item->shared->links, 1, __ATOMIC_SEQ_CST); + if(links == 0 && __atomic_compare_exchange_n(&item->shared->links, &links, REFCOUNT_DELETING, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)) { + + // we can delete it + return true; + } + + // we can't delete it + return false; +} + + +// ---------------------------------------------------------------------------- +// hash table operations + +static size_t hashtable_init_unsafe(DICTIONARY *dict) { + dict->index.JudyHSArray = NULL; + return 0; +} + +static size_t hashtable_destroy_unsafe(DICTIONARY *dict) { + if(unlikely(!dict->index.JudyHSArray)) return 0; + + pointer_destroy_index(dict); + + JError_t J_Error; + Word_t ret = JudyHSFreeArray(&dict->index.JudyHSArray, &J_Error); + if(unlikely(ret == (Word_t) JERR)) { + error("DICTIONARY: Cannot destroy JudyHS, JU_ERRNO_* == %u, ID == %d", + JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + } + + debug(D_DICTIONARY, "Dictionary: hash table freed %lu bytes", ret); + + dict->index.JudyHSArray = NULL; + return (size_t)ret; +} + +static inline void **hashtable_insert_unsafe(DICTIONARY *dict, const char *name, size_t name_len) { + JError_t J_Error; + Pvoid_t *Rc = JudyHSIns(&dict->index.JudyHSArray, (void *)name, name_len, &J_Error); + if (unlikely(Rc == PJERR)) { + error("DICTIONARY: Cannot insert entry with name '%s' to JudyHS, JU_ERRNO_* == %u, ID == %d", + name, JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + } + + // if *Rc == 0, new item added to the array + // otherwise the existing item value is returned in *Rc + + // we return a pointer to a pointer, so that the caller can + // put anything needed at the value of the index. + // The pointer to pointer we return has to be used before + // any other operation that may change the index (insert/delete). + return Rc; +} + +static inline int hashtable_delete_unsafe(DICTIONARY *dict, const char *name, size_t name_len, void *item) { + (void)item; + if(unlikely(!dict->index.JudyHSArray)) return 0; + + JError_t J_Error; + int ret = JudyHSDel(&dict->index.JudyHSArray, (void *)name, name_len, &J_Error); + if(unlikely(ret == JERR)) { + error("DICTIONARY: Cannot delete entry with name '%s' from JudyHS, JU_ERRNO_* == %u, ID == %d", name, + JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + return 0; + } + + // Hey, this is problematic! We need the value back, not just an int with a status! + // https://sourceforge.net/p/judy/feature-requests/23/ + + if(unlikely(ret == 0)) { + // not found in the dictionary + return 0; + } + else { + // found and deleted from the dictionary + return 1; + } +} + +static inline DICTIONARY_ITEM *hashtable_get_unsafe(DICTIONARY *dict, const char *name, size_t name_len) { + if(unlikely(!dict->index.JudyHSArray)) return NULL; + + DICTIONARY_STATS_SEARCHES_PLUS1(dict); + + Pvoid_t *Rc; + Rc = JudyHSGet(dict->index.JudyHSArray, (void *)name, name_len); + if(likely(Rc)) { + // found in the hash table + pointer_check(dict, (DICTIONARY_ITEM *)*Rc); + return (DICTIONARY_ITEM *)*Rc; + } + else { + // not found in the hash table + return NULL; + } +} + +static inline void hashtable_inserted_item_unsafe(DICTIONARY *dict, void *item) { + (void)dict; + (void)item; + + // this is called just after an item is successfully inserted to the hashtable + // we don't need this for judy, but we may need it if we integrate more hash tables + + ; +} + +// ---------------------------------------------------------------------------- +// linked list management + +static inline void item_linked_list_add(DICTIONARY *dict, DICTIONARY_ITEM *item) { + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + + if(dict->options & DICT_OPTION_ADD_IN_FRONT) + DOUBLE_LINKED_LIST_PREPEND_UNSAFE(dict->items.list, item, prev, next); + else + DOUBLE_LINKED_LIST_APPEND_UNSAFE(dict->items.list, item, prev, next); + +#ifdef NETDATA_INTERNAL_CHECKS + item->ll_adder_pid = gettid(); +#endif + + // clear the BEING created flag, + // after it has been inserted into the linked list + item_flag_clear(item, ITEM_FLAG_BEING_CREATED); + + garbage_collect_pending_deletes(dict); + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); +} + +static inline void item_linked_list_remove(DICTIONARY *dict, DICTIONARY_ITEM *item) { + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(dict->items.list, item, prev, next); + +#ifdef NETDATA_INTERNAL_CHECKS + item->ll_remover_pid = gettid(); +#endif + + garbage_collect_pending_deletes(dict); + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); +} + +// ---------------------------------------------------------------------------- +// ITEM initialization and updates + +static inline size_t item_set_name(DICTIONARY *dict, DICTIONARY_ITEM *item, const char *name, size_t name_len) { + if(likely(dict->options & DICT_OPTION_NAME_LINK_DONT_CLONE)) { + item->caller_name = (char *)name; + item->key_len = name_len; + } + else { + item->string_name = string_strdupz(name); + item->key_len = string_strlen(item->string_name) + 1; + item->options |= ITEM_OPTION_ALLOCATED_NAME; + } + + return item->key_len; +} + +static inline size_t item_free_name(DICTIONARY *dict, DICTIONARY_ITEM *item) { + if(likely(!(dict->options & DICT_OPTION_NAME_LINK_DONT_CLONE))) + string_freez(item->string_name); + + return item->key_len; +} + +static inline const char *item_get_name(const DICTIONARY_ITEM *item) { + if(item->options & ITEM_OPTION_ALLOCATED_NAME) + return string2str(item->string_name); + else + return item->caller_name; +} + +static inline size_t item_get_name_len(const DICTIONARY_ITEM *item) { + if(item->options & ITEM_OPTION_ALLOCATED_NAME) + return string_strlen(item->string_name); + else + return strlen(item->caller_name); +} + +static DICTIONARY_ITEM *dict_item_create(DICTIONARY *dict __maybe_unused, size_t *allocated_bytes, DICTIONARY_ITEM *master_item) { + DICTIONARY_ITEM *item; + + size_t size = sizeof(DICTIONARY_ITEM); + item = callocz(1, size); + +#ifdef NETDATA_INTERNAL_CHECKS + item->creator_pid = gettid(); +#endif + + item->refcount = 1; + item->flags = ITEM_FLAG_BEING_CREATED; + + *allocated_bytes += size; + + if(master_item) { + item->shared = master_item->shared; + + if(unlikely(__atomic_add_fetch(&item->shared->links, 1, __ATOMIC_SEQ_CST) <= 1)) + fatal("DICTIONARY: attempted to link to a shared item structure that had zero references"); + } + else { + size = sizeof(DICTIONARY_ITEM_SHARED); + item->shared = callocz(1, size); + item->shared->links = 1; + *allocated_bytes += size; + } + +#ifdef NETDATA_INTERNAL_CHECKS + item->dict = dict; +#endif + return item; +} + +static void *dict_item_value_create(void *value, size_t value_len) { + void *ptr = NULL; + + if(likely(value_len)) { + if (likely(value)) { + // a value has been supplied + // copy it + ptr = mallocz(value_len); + memcpy(ptr, value, value_len); + } + else { + // no value has been supplied + // allocate a clear memory block + ptr = callocz(1, value_len); + } + } + // else + // the caller wants an item without any value + + return ptr; +} + +static DICTIONARY_ITEM *dict_item_create_with_hooks(DICTIONARY *dict, const char *name, size_t name_len, void *value, size_t value_len, void *constructor_data, DICTIONARY_ITEM *master_item) { +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(name_len > KEY_LEN_MAX)) + fatal("DICTIONARY: tried to index a key of size %zu, but the maximum acceptable is %zu", name_len, (size_t)KEY_LEN_MAX); + + if(unlikely(value_len > VALUE_LEN_MAX)) + fatal("DICTIONARY: tried to add an item of size %zu, but the maximum acceptable is %zu", value_len, (size_t)VALUE_LEN_MAX); +#endif + + size_t item_size = 0, key_size = 0, value_size = 0; + + DICTIONARY_ITEM *item = dict_item_create(dict, &item_size, master_item); + key_size += item_set_name(dict, item, name, name_len); + + if(unlikely(is_view_dictionary(dict))) { + // we are on a view dictionary + // do not touch the value + ; + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(!master_item)) + fatal("DICTIONARY: cannot add an item to a view without a master item."); +#endif + } + else { + // we are on the master dictionary + + if(unlikely(dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE)) + item->shared->value = value; + else + item->shared->value = dict_item_value_create(value, value_len); + + item->shared->value_len = value_len; + value_size += value_len; + + dictionary_execute_insert_callback(dict, item, constructor_data); + } + + DICTIONARY_ENTRIES_PLUS1(dict); + DICTIONARY_STATS_PLUS_MEMORY(dict, key_size, item_size, value_size); + + return item; +} + +static void dict_item_reset_value_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM *item, void *value, size_t value_len, void *constructor_data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: %s() should never be called on views.", __FUNCTION__ ); + + debug(D_DICTIONARY, "Dictionary entry with name '%s' found. Changing its value.", item_get_name(item)); + + DICTIONARY_VALUE_RESETS_PLUS1(dict); + + if(item->shared->value_len != value_len) { + DICTIONARY_STATS_PLUS_MEMORY(dict, 0, 0, value_len); + DICTIONARY_STATS_MINUS_MEMORY(dict, 0, 0, item->shared->value_len); + } + + dictionary_execute_delete_callback(dict, item); + + if(likely(dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE)) { + debug(D_DICTIONARY, "Dictionary: linking value to '%s'", item_get_name(item)); + item->shared->value = value; + item->shared->value_len = value_len; + } + else { + debug(D_DICTIONARY, "Dictionary: cloning value to '%s'", item_get_name(item)); + + void *old_value = item->shared->value; + void *new_value = NULL; + if(value_len) { + new_value = mallocz(value_len); + if(value) memcpy(new_value, value, value_len); + else memset(new_value, 0, value_len); + } + item->shared->value = new_value; + item->shared->value_len = value_len; + + debug(D_DICTIONARY, "Dictionary: freeing old value of '%s'", item_get_name(item)); + freez(old_value); + } + + dictionary_execute_insert_callback(dict, item, constructor_data); +} + +static size_t dict_item_free_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM *item) { + debug(D_DICTIONARY, "Destroying name value entry for name '%s'.", item_get_name(item)); + + if(!item_flag_check(item, ITEM_FLAG_DELETED)) + DICTIONARY_ENTRIES_MINUS1(dict); + + size_t item_size = 0, key_size = 0, value_size = 0; + + key_size += item->key_len; + if(unlikely(!(dict->options & DICT_OPTION_NAME_LINK_DONT_CLONE))) + item_free_name(dict, item); + + if(item_shared_release_and_check_if_it_can_be_freed(dict, item)) { + dictionary_execute_delete_callback(dict, item); + + if(unlikely(!(dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE))) { + debug(D_DICTIONARY, "Dictionary freeing value of '%s'", item_get_name(item)); + freez(item->shared->value); + item->shared->value = NULL; + } + value_size += item->shared->value_len; + + freez(item->shared); + item->shared = NULL; + item_size += sizeof(DICTIONARY_ITEM_SHARED); + } + + freez(item); + item_size += sizeof(DICTIONARY_ITEM); + + DICTIONARY_STATS_MINUS_MEMORY(dict, key_size, item_size, value_size); + + // we return the memory we actually freed + return item_size + ((dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE) ? 0 : value_size); +} + +// ---------------------------------------------------------------------------- +// item operations + +static void dict_item_shared_set_deleted(DICTIONARY *dict, DICTIONARY_ITEM *item) { + if(is_master_dictionary(dict)) { + item_shared_flag_set(item, ITEM_FLAG_DELETED); + + if(dict->hooks) + __atomic_store_n(&dict->hooks->last_master_deletion_us, now_realtime_usec(), __ATOMIC_SEQ_CST); + } +} + +// returns true if we set the deleted flag on this item +static bool dict_item_set_deleted(DICTIONARY *dict, DICTIONARY_ITEM *item) { + ITEM_FLAGS expected, desired; + + expected = __atomic_load_n(&item->flags, __ATOMIC_SEQ_CST); + + do { + + if (expected & ITEM_FLAG_DELETED) + return false; + + desired = expected | ITEM_FLAG_DELETED; + + } while(!__atomic_compare_exchange_n(&item->flags, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); + + DICTIONARY_ENTRIES_MINUS1(dict); + return true; +} + +static inline void dict_item_free_or_mark_deleted(DICTIONARY *dict, DICTIONARY_ITEM *item) { + int rc = item_is_not_referenced_and_can_be_removed_advanced(dict, item); + switch(rc) { + case RC_ITEM_OK: + // the item is ours, refcount set to -100 + dict_item_shared_set_deleted(dict, item); + item_linked_list_remove(dict, item); + dict_item_free_with_hooks(dict, item); + break; + + case RC_ITEM_IS_REFERENCED: + case RC_ITEM_IS_CURRENTLY_BEING_CREATED: + // the item is currently referenced by others + dict_item_shared_set_deleted(dict, item); + dict_item_set_deleted(dict, item); + // after this point do not touch the item + break; + + case RC_ITEM_IS_CURRENTLY_BEING_DELETED: + // an item that is currently being deleted by someone else - don't touch it + break; + + default: + internal_error(true, "Hey dev! You forgot to add the new condition here!"); + break; + } +} + +// this is used by traversal functions to remove the current item +// if it is deleted, and it has zero references. This will eliminate +// the need for the garbage collector to kick-in later. +// Most deletions happen during traversal, so this is a nice hack +// to speed up everything! +static inline void dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(DICTIONARY *dict, DICTIONARY_ITEM *item, char rw) { + if(rw == DICTIONARY_LOCK_WRITE) { + bool should_be_deleted = item_flag_check(item, ITEM_FLAG_DELETED); + + item_release(dict, item); + + if(should_be_deleted && item_is_not_referenced_and_can_be_removed(dict, item)) { + // this has to be before removing from the linked list, + // otherwise the garbage collector will also kick in! + DICTIONARY_PENDING_DELETES_MINUS1(dict); + + item_linked_list_remove(dict, item); + dict_item_free_with_hooks(dict, item); + } + } + else { + // we can't do anything under this mode + item_release(dict, item); + } +} + +static bool dict_item_del(DICTIONARY *dict, const char *name, ssize_t name_len) { + if(unlikely(!name || !*name)) { + internal_error( + true, + "DICTIONARY: attempted to %s() without a name on a dictionary created from %s() %zu@%s.", + __FUNCTION__, + dict->creation_function, + dict->creation_line, + dict->creation_file); + return false; + } + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_del() on a destroyed dictionary"); + return false; + } + + if(name_len == -1) + name_len = (ssize_t)strlen(name) + 1; // we need the terminating null too + + debug(D_DICTIONARY, "DEL dictionary entry with name '%s'.", name); + + // Unfortunately, the JudyHSDel() does not return the value of the + // item that was deleted, so we have to find it before we delete it, + // since we need to release our structures too. + + dictionary_index_lock_wrlock(dict); + + int ret; + DICTIONARY_ITEM *item = hashtable_get_unsafe(dict, name, name_len); + if(unlikely(!item)) { + dictionary_index_wrlock_unlock(dict); + ret = false; + } + else { + if(hashtable_delete_unsafe(dict, name, name_len, item) == 0) + error("DICTIONARY: INTERNAL ERROR: tried to delete item with name '%s', name_len %zd that is not in the index", name, name_len - 1); + else + pointer_del(dict, item); + + dictionary_index_wrlock_unlock(dict); + + dict_item_free_or_mark_deleted(dict, item); + ret = true; + } + + return ret; +} + +static DICTIONARY_ITEM *dict_item_add_or_reset_value_and_acquire(DICTIONARY *dict, const char *name, ssize_t name_len, void *value, size_t value_len, void *constructor_data, DICTIONARY_ITEM *master_item) { + if(unlikely(!name || !*name)) { + internal_error( + true, + "DICTIONARY: attempted to %s() without a name on a dictionary created from %s() %zu@%s.", + __FUNCTION__, + dict->creation_function, + dict->creation_line, + dict->creation_file); + return NULL; + } + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_set() on a destroyed dictionary"); + return NULL; + } + + if(name_len == -1) + name_len = (ssize_t)strlen(name) + 1; // we need the terminating null too + + debug(D_DICTIONARY, "SET dictionary entry with name '%s'.", name); + + // DISCUSSION: + // Is it better to gain a read-lock and do a hashtable_get_unsafe() + // before we write lock to do hashtable_insert_unsafe()? + // + // Probably this depends on the use case. + // For statsd for example that does dictionary_set() to update received values, + // it could be beneficial to do a get() before we insert(). + // + // But the caller has the option to do this on his/her own. + // So, let's do the fastest here and let the caller decide the flow of calls. + + dictionary_index_lock_wrlock(dict); + + bool added_or_updated = false; + size_t spins = 0; + DICTIONARY_ITEM *item = NULL; + do { + DICTIONARY_ITEM **item_pptr = (DICTIONARY_ITEM **)hashtable_insert_unsafe(dict, name, name_len); + if (likely(*item_pptr == NULL)) { + // a new item added to the index + + // create the dictionary item + item = *item_pptr = + dict_item_create_with_hooks(dict, name, name_len, value, value_len, constructor_data, master_item); + + pointer_add(dict, item); + + // call the hashtable react + hashtable_inserted_item_unsafe(dict, item); + + // unlock the index lock, before we add it to the linked list + // DON'T DO IT THE OTHER WAY AROUND - DO NOT CROSS THE LOCKS! + dictionary_index_wrlock_unlock(dict); + + item_linked_list_add(dict, item); + + added_or_updated = true; + } + else { + pointer_check(dict, *item_pptr); + + if(item_check_and_acquire_advanced(dict, *item_pptr, true) != RC_ITEM_OK) { + spins++; + continue; + } + + // the item is already in the index + // so, either we will return the old one + // or overwrite the value, depending on dictionary flags + + // We should not compare the values here! + // even if they are the same, we have to do the whole job + // so that the callbacks will be called. + + item = *item_pptr; + + if(is_view_dictionary(dict)) { + // view dictionary + // the item is already there and can be used + if(item->shared != master_item->shared) + error("DICTIONARY: changing the master item on a view is not supported. The previous item will remain. To change the key of an item in a view, delete it and add it again."); + } + else { + // master dictionary + // the user wants to reset its value + + if (!(dict->options & DICT_OPTION_DONT_OVERWRITE_VALUE)) { + dict_item_reset_value_with_hooks(dict, item, value, value_len, constructor_data); + added_or_updated = true; + } + + else if (dictionary_execute_conflict_callback(dict, item, value, constructor_data)) { + dictionary_version_increment(dict); + added_or_updated = true; + } + + else { + // conflict callback returned false + // we did really nothing! + ; + } + } + + dictionary_index_wrlock_unlock(dict); + } + } while(!item); + + + if(unlikely(spins > 0 && dict->stats)) + DICTIONARY_STATS_INSERT_SPINS_PLUS(dict, spins); + + if(is_master_dictionary(dict) && added_or_updated) + dictionary_execute_react_callback(dict, item, constructor_data); + + return item; +} + +static DICTIONARY_ITEM *dict_item_find_and_acquire(DICTIONARY *dict, const char *name, ssize_t name_len) { + if(unlikely(!name || !*name)) { + internal_error( + true, + "DICTIONARY: attempted to %s() without a name on a dictionary created from %s() %zu@%s.", + __FUNCTION__, + dict->creation_function, + dict->creation_line, + dict->creation_file); + return NULL; + } + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_get() on a destroyed dictionary"); + return NULL; + } + + if(name_len == -1) + name_len = (ssize_t)strlen(name) + 1; // we need the terminating null too + + debug(D_DICTIONARY, "GET dictionary entry with name '%s'.", name); + + dictionary_index_lock_rdlock(dict); + + DICTIONARY_ITEM *item = hashtable_get_unsafe(dict, name, name_len); + if(unlikely(item && !item_check_and_acquire(dict, item))) { + item = NULL; + DICTIONARY_STATS_SEARCH_IGNORES_PLUS1(dict); + } + + dictionary_index_rdlock_unlock(dict); + + return item; +} + +// ---------------------------------------------------------------------------- +// delayed destruction of dictionaries + +static bool dictionary_free_all_resources(DICTIONARY *dict, size_t *mem, bool force) { + if(mem) + *mem = 0; + + if(!force && dictionary_referenced_items(dict)) + return false; + + size_t dict_size = 0, counted_items = 0, item_size = 0, index_size = 0; + (void)counted_items; + +#ifdef NETDATA_INTERNAL_CHECKS + long int entries = dict->entries; + long int referenced_items = dict->referenced_items; + long int pending_deletion_items = dict->pending_deletion_items; + const char *creation_function = dict->creation_function; + const char *creation_file = dict->creation_file; + size_t creation_line = dict->creation_line; +#endif + + // destroy the index + dictionary_index_lock_wrlock(dict); + index_size += hashtable_destroy_unsafe(dict); + dictionary_index_wrlock_unlock(dict); + + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + DICTIONARY_ITEM *item = dict->items.list; + while (item) { + // cache item->next + // because we are going to free item + DICTIONARY_ITEM *item_next = item->next; + + item_size += dict_item_free_with_hooks(dict, item); + item = item_next; + + // to speed up destruction, we don't + // unlink item from the linked-list here + + counted_items++; + } + dict->items.list = NULL; + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); + + dict_size += dictionary_locks_destroy(dict); + dict_size += reference_counter_free(dict); + dict_size += dictionary_hooks_free(dict); + dict_size += sizeof(DICTIONARY); + DICTIONARY_STATS_MINUS_MEMORY(dict, 0, sizeof(DICTIONARY), 0); + + freez(dict); + + internal_error( + false, + "DICTIONARY: Freed dictionary created from %s() %zu@%s, having %ld (counted %zu) entries, %ld referenced, %ld pending deletion, total freed memory: %zu bytes (sizeof(dict) = %zu, sizeof(item) = %zu).", + creation_function, + creation_line, + creation_file, + entries, counted_items, referenced_items, pending_deletion_items, + dict_size + item_size, sizeof(DICTIONARY), sizeof(DICTIONARY_ITEM) + sizeof(DICTIONARY_ITEM_SHARED)); + + if(mem) + *mem = dict_size + item_size + index_size; + + return true; +} + +netdata_mutex_t dictionaries_waiting_to_be_destroyed_mutex = NETDATA_MUTEX_INITIALIZER; +static DICTIONARY *dictionaries_waiting_to_be_destroyed = NULL; + +void dictionary_queue_for_destruction(DICTIONARY *dict) { + if(is_dictionary_destroyed(dict)) + return; + + DICTIONARY_STATS_DICT_DESTROY_QUEUED_PLUS1(dict); + dict_flag_set(dict, DICT_FLAG_DESTROYED); + + netdata_mutex_lock(&dictionaries_waiting_to_be_destroyed_mutex); + + dict->next = dictionaries_waiting_to_be_destroyed; + dictionaries_waiting_to_be_destroyed = dict; + + netdata_mutex_unlock(&dictionaries_waiting_to_be_destroyed_mutex); +} + +void cleanup_destroyed_dictionaries(void) { + if(!dictionaries_waiting_to_be_destroyed) + return; + + netdata_mutex_lock(&dictionaries_waiting_to_be_destroyed_mutex); + + DICTIONARY *dict, *last = NULL, *next = NULL; + for(dict = dictionaries_waiting_to_be_destroyed; dict ; dict = next) { + next = dict->next; + +#ifdef NETDATA_INTERNAL_CHECKS + size_t line = dict->creation_line; + const char *file = dict->creation_file; + const char *function = dict->creation_function; +#endif + + DICTIONARY_STATS_DICT_DESTROY_QUEUED_MINUS1(dict); + if(dictionary_free_all_resources(dict, NULL, false)) { + + internal_error( + true, + "DICTIONARY: freed dictionary with delayed destruction, created from %s() %zu@%s.", + function, line, file); + + if(last) last->next = next; + else dictionaries_waiting_to_be_destroyed = next; + } + else { + DICTIONARY_STATS_DICT_DESTROY_QUEUED_PLUS1(dict); + last = dict; + } + } + + netdata_mutex_unlock(&dictionaries_waiting_to_be_destroyed_mutex); +} + +// ---------------------------------------------------------------------------- +// API internal checks + +#ifdef NETDATA_INTERNAL_CHECKS +#define api_internal_check(dict, item, allow_null_dict, allow_null_item) api_internal_check_with_trace(dict, item, __FUNCTION__, allow_null_dict, allow_null_item) +static inline void api_internal_check_with_trace(DICTIONARY *dict, DICTIONARY_ITEM *item, const char *function, bool allow_null_dict, bool allow_null_item) { + if(!allow_null_dict && !dict) { + internal_error( + item, + "DICTIONARY: attempted to %s() with a NULL dictionary, passing an item created from %s() %zu@%s.", + function, + item->dict->creation_function, + item->dict->creation_line, + item->dict->creation_file); + fatal("DICTIONARY: attempted to %s() but dict is NULL", function); + } + + if(!allow_null_item && !item) { + internal_error( + true, + "DICTIONARY: attempted to %s() without an item on a dictionary created from %s() %zu@%s.", + function, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + fatal("DICTIONARY: attempted to %s() but item is NULL", function); + } + + if(dict && item && dict != item->dict) { + internal_error( + true, + "DICTIONARY: attempted to %s() an item on a dictionary created from %s() %zu@%s, but the item belongs to the dictionary created from %s() %zu@%s.", + function, + dict->creation_function, + dict->creation_line, + dict->creation_file, + item->dict->creation_function, + item->dict->creation_line, + item->dict->creation_file + ); + fatal("DICTIONARY: %s(): item does not belong to this dictionary.", function); + } + + if(item) { + REFCOUNT refcount = DICTIONARY_ITEM_REFCOUNT_GET(dict, item); + if (unlikely(refcount <= 0)) { + internal_error( + true, + "DICTIONARY: attempted to %s() of an item with reference counter = %d on a dictionary created from %s() %zu@%s", + function, + refcount, + item->dict->creation_function, + item->dict->creation_line, + item->dict->creation_file); + fatal("DICTIONARY: attempted to %s but item is having refcount = %d", function, refcount); + } + } +} +#else +#define api_internal_check(dict, item, allow_null_dict, allow_null_item) debug_dummy() +#endif + +#define api_is_name_good(dict, name, name_len) api_is_name_good_with_trace(dict, name, name_len, __FUNCTION__) +static bool api_is_name_good_with_trace(DICTIONARY *dict __maybe_unused, const char *name, ssize_t name_len __maybe_unused, const char *function __maybe_unused) { + if(unlikely(!name)) { + internal_error( + true, + "DICTIONARY: attempted to %s() with name = NULL on a dictionary created from %s() %zu@%s.", + function, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + return false; + } + + if(unlikely(!*name)) { + internal_error( + true, + "DICTIONARY: attempted to %s() with empty name on a dictionary created from %s() %zu@%s.", + function, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + return false; + } + + internal_error( + name_len > 0 && name_len != (ssize_t)(strlen(name) + 1), + "DICTIONARY: attempted to %s() with a name of '%s', having length of %zu (incl. '\\0'), but the supplied name_len = %ld, on a dictionary created from %s() %zu@%s.", + function, + name, + strlen(name) + 1, + (long int) name_len, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + + internal_error( + name_len <= 0 && name_len != -1, + "DICTIONARY: attempted to %s() with a name of '%s', having length of %zu (incl. '\\0'), but the supplied name_len = %ld, on a dictionary created from %s() %zu@%s.", + function, + name, + strlen(name) + 1, + (long int) name_len, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + + return true; +} + +// ---------------------------------------------------------------------------- +// API - dictionary management + +static DICTIONARY *dictionary_create_internal(DICT_OPTIONS options, struct dictionary_stats *stats) { + cleanup_destroyed_dictionaries(); + + DICTIONARY *dict = callocz(1, sizeof(DICTIONARY)); + dict->options = options; + dict->stats = stats; + + size_t dict_size = 0; + dict_size += sizeof(DICTIONARY); + dict_size += dictionary_locks_init(dict); + dict_size += reference_counter_init(dict); + dict_size += hashtable_init_unsafe(dict); + + pointer_index_init(dict); + + DICTIONARY_STATS_PLUS_MEMORY(dict, 0, dict_size, 0); + + return dict; +} + +#ifdef NETDATA_INTERNAL_CHECKS +DICTIONARY *dictionary_create_advanced_with_trace(DICT_OPTIONS options, struct dictionary_stats *stats, const char *function, size_t line, const char *file) { +#else +DICTIONARY *dictionary_create_advanced(DICT_OPTIONS options, struct dictionary_stats *stats) { +#endif + + DICTIONARY *dict = dictionary_create_internal(options, stats?stats:&dictionary_stats_category_other); + +#ifdef NETDATA_INTERNAL_CHECKS + dict->creation_function = function; + dict->creation_file = file; + dict->creation_line = line; +#endif + + DICTIONARY_STATS_DICT_CREATIONS_PLUS1(dict); + return dict; +} + +#ifdef NETDATA_INTERNAL_CHECKS +DICTIONARY *dictionary_create_view_with_trace(DICTIONARY *master, const char *function, size_t line, const char *file) { +#else +DICTIONARY *dictionary_create_view(DICTIONARY *master) { +#endif + + DICTIONARY *dict = dictionary_create_internal(master->options, master->stats); + dict->master = master; + + dictionary_hooks_allocate(master); + + if(unlikely(__atomic_load_n(&master->hooks->links, __ATOMIC_SEQ_CST)) < 1) + fatal("DICTIONARY: attempted to create a view that has %d links", master->hooks->links); + + dict->hooks = master->hooks; + __atomic_add_fetch(&master->hooks->links, 1, __ATOMIC_SEQ_CST); + +#ifdef NETDATA_INTERNAL_CHECKS + dict->creation_function = function; + dict->creation_file = file; + dict->creation_line = line; +#endif + + DICTIONARY_STATS_DICT_CREATIONS_PLUS1(dict); + return dict; +} + +void dictionary_flush(DICTIONARY *dict) { + if(unlikely(!dict)) + return; + + void *value; + dfe_start_write(dict, value) { + dictionary_del_advanced(dict, item_get_name(value_dfe.item), (ssize_t)item_get_name_len(value_dfe.item) + 1); + } + dfe_done(value); + + DICTIONARY_STATS_DICT_FLUSHES_PLUS1(dict); +} + +size_t dictionary_destroy(DICTIONARY *dict) { + cleanup_destroyed_dictionaries(); + + if(!dict) return 0; + + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + + dict_flag_set(dict, DICT_FLAG_DESTROYED); + DICTIONARY_STATS_DICT_DESTRUCTIONS_PLUS1(dict); + + size_t referenced_items = dictionary_referenced_items(dict); + if(referenced_items) { + dictionary_flush(dict); + dictionary_queue_for_destruction(dict); + + internal_error( + true, + "DICTIONARY: delaying destruction of dictionary created from %s() %zu@%s, because it has %ld referenced items in it (%ld total).", + dict->creation_function, + dict->creation_line, + dict->creation_file, + dict->referenced_items, + dict->entries); + + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); + return 0; + } + + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); + + size_t freed; + dictionary_free_all_resources(dict, &freed, true); + + return freed; +} + +// ---------------------------------------------------------------------------- +// SET an item to the dictionary + +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_set_and_acquire_item_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, void *value, size_t value_len, void *constructor_data) { + if(unlikely(!api_is_name_good(dict, name, name_len))) + return NULL; + + api_internal_check(dict, NULL, false, true); + + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: this dictionary is a view, you cannot add items other than the ones from the master dictionary."); + + DICTIONARY_ITEM *item = + dict_item_add_or_reset_value_and_acquire(dict, name, name_len, value, value_len, constructor_data, NULL); + api_internal_check(dict, item, false, false); + return item; +} + +void *dictionary_set_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, void *value, size_t value_len, void *constructor_data) { + DICTIONARY_ITEM *item = dictionary_set_and_acquire_item_advanced(dict, name, name_len, value, value_len, constructor_data); + + if(likely(item)) { + void *v = item->shared->value; + item_release(dict, item); + return v; + } + + return NULL; +} + +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_view_set_and_acquire_item_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, DICTIONARY_ITEM *master_item) { + if(unlikely(!api_is_name_good(dict, name, name_len))) + return NULL; + + api_internal_check(dict, NULL, false, true); + + if(unlikely(is_master_dictionary(dict))) + fatal("DICTIONARY: this dictionary is a master, you cannot add items from other dictionaries."); + + dictionary_acquired_item_dup(dict->master, master_item); + DICTIONARY_ITEM *item = dict_item_add_or_reset_value_and_acquire(dict, name, name_len, NULL, 0, NULL, master_item); + dictionary_acquired_item_release(dict->master, master_item); + + api_internal_check(dict, item, false, false); + return item; +} + +void *dictionary_view_set_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, DICTIONARY_ITEM *master_item) { + DICTIONARY_ITEM *item = dictionary_view_set_and_acquire_item_advanced(dict, name, name_len, master_item); + + if(likely(item)) { + void *v = item->shared->value; + item_release(dict, item); + return v; + } + + return NULL; +} + +// ---------------------------------------------------------------------------- +// GET an item from the dictionary + +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_get_and_acquire_item_advanced(DICTIONARY *dict, const char *name, ssize_t name_len) { + if(unlikely(!api_is_name_good(dict, name, name_len))) + return NULL; + + api_internal_check(dict, NULL, false, true); + DICTIONARY_ITEM *item = dict_item_find_and_acquire(dict, name, name_len); + api_internal_check(dict, item, false, true); + return item; +} + +void *dictionary_get_advanced(DICTIONARY *dict, const char *name, ssize_t name_len) { + DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dict, name, name_len); + + if(likely(item)) { + void *v = item->shared->value; + item_release(dict, item); + return v; + } + + return NULL; +} + +// ---------------------------------------------------------------------------- +// DUP/REL an item (increase/decrease its reference counter) + +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_acquired_item_dup(DICTIONARY *dict, DICT_ITEM_CONST DICTIONARY_ITEM *item) { + // we allow the item to be NULL here + api_internal_check(dict, item, false, true); + + if(likely(item)) { + item_acquire(dict, item); + api_internal_check(dict, item, false, false); + } + + return item; +} + +void dictionary_acquired_item_release(DICTIONARY *dict, DICT_ITEM_CONST DICTIONARY_ITEM *item) { + // we allow the item to be NULL here + api_internal_check(dict, item, false, true); + + // no need to get a lock here + // we pass the last parameter to reference_counter_release() as true + // so that the release may get a write-lock if required to clean up + + if(likely(item)) + item_release(dict, item); +} + +// ---------------------------------------------------------------------------- +// get the name/value of an item + +const char *dictionary_acquired_item_name(DICT_ITEM_CONST DICTIONARY_ITEM *item) { + return item_get_name(item); +} + +void *dictionary_acquired_item_value(DICT_ITEM_CONST DICTIONARY_ITEM *item) { + if(likely(item)) + return item->shared->value; + + return NULL; +} + +size_t dictionary_acquired_item_references(DICT_ITEM_CONST DICTIONARY_ITEM *item) { + if(likely(item)) + return DICTIONARY_ITEM_REFCOUNT_GET_SOLE(item); + + return 0; +} + +// ---------------------------------------------------------------------------- +// DEL an item + +bool dictionary_del_advanced(DICTIONARY *dict, const char *name, ssize_t name_len) { + if(unlikely(!api_is_name_good(dict, name, name_len))) + return false; + + api_internal_check(dict, NULL, false, true); + return dict_item_del(dict, name, name_len); +} + +// ---------------------------------------------------------------------------- +// traversal with loop + +void *dictionary_foreach_start_rw(DICTFE *dfe, DICTIONARY *dict, char rw) { + if(unlikely(!dfe || !dict)) return NULL; + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_foreach_start_rw() on a destroyed dictionary"); + dfe->counter = 0; + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + return NULL; + } + + dfe->counter = 0; + dfe->dict = dict; + dfe->rw = rw; + + ll_recursive_lock(dict, dfe->rw); + + DICTIONARY_STATS_TRAVERSALS_PLUS1(dict); + + // get the first item from the list + DICTIONARY_ITEM *item = dict->items.list; + + // skip all the deleted items + while(item && !item_check_and_acquire(dict, item)) + item = item->next; + + if(likely(item)) { + dfe->item = item; + dfe->name = (char *)item_get_name(item); + dfe->value = item->shared->value; + } + else { + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + } + + if(unlikely(dfe->rw == DICTIONARY_LOCK_REENTRANT)) + ll_recursive_unlock(dfe->dict, dfe->rw); + + return dfe->value; +} + +void *dictionary_foreach_next(DICTFE *dfe) { + if(unlikely(!dfe || !dfe->dict)) return NULL; + + if(unlikely(is_dictionary_destroyed(dfe->dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_foreach_next() on a destroyed dictionary"); + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + return NULL; + } + + if(unlikely(dfe->rw == DICTIONARY_LOCK_REENTRANT)) + ll_recursive_lock(dfe->dict, dfe->rw); + + // the item we just did + DICTIONARY_ITEM *item = dfe->item; + + // get the next item from the list + DICTIONARY_ITEM *item_next = (item) ? item->next : NULL; + + // skip all the deleted items until one that can be acquired is found + while(item_next && !item_check_and_acquire(dfe->dict, item_next)) + item_next = item_next->next; + + if(likely(item)) { + dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(dfe->dict, item, dfe->rw); + // item_release(dfe->dict, item); + } + + item = item_next; + if(likely(item)) { + dfe->item = item; + dfe->name = (char *)item_get_name(item); + dfe->value = item->shared->value; + dfe->counter++; + } + else { + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + } + + if(unlikely(dfe->rw == DICTIONARY_LOCK_REENTRANT)) + ll_recursive_unlock(dfe->dict, dfe->rw); + + return dfe->value; +} + +void dictionary_foreach_done(DICTFE *dfe) { + if(unlikely(!dfe || !dfe->dict)) return; + + if(unlikely(is_dictionary_destroyed(dfe->dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_foreach_next() on a destroyed dictionary"); + return; + } + + // the item we just did + DICTIONARY_ITEM *item = dfe->item; + + // release it, so that it can possibly be deleted + if(likely(item)) { + dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(dfe->dict, item, dfe->rw); + // item_release(dfe->dict, item); + } + + if(likely(dfe->rw != DICTIONARY_LOCK_REENTRANT)) + ll_recursive_unlock(dfe->dict, dfe->rw); + + dfe->dict = NULL; + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + dfe->counter = 0; +} + +// ---------------------------------------------------------------------------- +// API - walk through the dictionary. +// The dictionary is locked for reading while this happens +// do not use other dictionary calls while walking the dictionary - deadlock! + +int dictionary_walkthrough_rw(DICTIONARY *dict, char rw, int (*callback)(const DICTIONARY_ITEM *item, void *entry, void *data), void *data) { + if(unlikely(!dict || !callback)) return 0; + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_walkthrough_rw() on a destroyed dictionary"); + return 0; + } + + ll_recursive_lock(dict, rw); + + DICTIONARY_STATS_WALKTHROUGHS_PLUS1(dict); + + // written in such a way, that the callback can delete the active element + + int ret = 0; + DICTIONARY_ITEM *item = dict->items.list, *item_next; + while(item) { + + // skip the deleted items + if(unlikely(!item_check_and_acquire(dict, item))) { + item = item->next; + continue; + } + + if(unlikely(rw == DICTIONARY_LOCK_REENTRANT)) + ll_recursive_unlock(dict, rw); + + int r = callback(item, item->shared->value, data); + + if(unlikely(rw == DICTIONARY_LOCK_REENTRANT)) + ll_recursive_lock(dict, rw); + + // since we have a reference counter, this item cannot be deleted + // until we release the reference counter, so the pointers are there + item_next = item->next; + + dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(dict, item, rw); + // item_release(dict, item); + + if(unlikely(r < 0)) { + ret = r; + break; + } + + ret += r; + + item = item_next; + } + + ll_recursive_unlock(dict, rw); + + return ret; +} + +// ---------------------------------------------------------------------------- +// sorted walkthrough + +typedef int (*qsort_compar)(const void *item1, const void *item2); + +static int dictionary_sort_compar(const void *item1, const void *item2) { + return strcmp(item_get_name((*(DICTIONARY_ITEM **)item1)), item_get_name((*(DICTIONARY_ITEM **)item2))); +} + +int dictionary_sorted_walkthrough_rw(DICTIONARY *dict, char rw, int (*callback)(const DICTIONARY_ITEM *item, void *entry, void *data), void *data, dictionary_sorted_compar compar) { + if(unlikely(!dict || !callback)) return 0; + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_sorted_walkthrough_rw() on a destroyed dictionary"); + return 0; + } + + DICTIONARY_STATS_WALKTHROUGHS_PLUS1(dict); + + ll_recursive_lock(dict, rw); + size_t entries = __atomic_load_n(&dict->entries, __ATOMIC_SEQ_CST); + DICTIONARY_ITEM **array = mallocz(sizeof(DICTIONARY_ITEM *) * entries); + + size_t i; + DICTIONARY_ITEM *item; + for(item = dict->items.list, i = 0; item && i < entries; item = item->next) { + if(likely(item_check_and_acquire(dict, item))) + array[i++] = item; + } + ll_recursive_unlock(dict, rw); + + if(unlikely(i != entries)) + entries = i; + + if(compar) + qsort(array, entries, sizeof(DICTIONARY_ITEM *), (qsort_compar)compar); + else + qsort(array, entries, sizeof(DICTIONARY_ITEM *), dictionary_sort_compar); + + bool callit = true; + int ret = 0, r; + for(i = 0; i < entries ;i++) { + item = array[i]; + + if(callit) + r = callback(item, item->shared->value, data); + + dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(dict, item, rw); + // item_release(dict, item); + + if(r < 0) { + ret = r; + r = 0; + + // stop calling the callback, + // but we have to continue, to release all the reference counters + callit = false; + } + else + ret += r; + } + + freez(array); + + return ret; +} + +// ---------------------------------------------------------------------------- +// THREAD_CACHE + +static __thread Pvoid_t thread_cache_judy_array = NULL; + +void *thread_cache_entry_get_or_set(void *key, + ssize_t key_length, + void *value, + void *(*transform_the_value_before_insert)(void *key, size_t key_length, void *value) + ) { + if(unlikely(!key || !key_length)) return NULL; + + if(key_length == -1) + key_length = (ssize_t)strlen((char *)key) + 1; + + JError_t J_Error; + Pvoid_t *Rc = JudyHSIns(&thread_cache_judy_array, key, key_length, &J_Error); + if (unlikely(Rc == PJERR)) { + fatal("THREAD_CACHE: Cannot insert entry to JudyHS, JU_ERRNO_* == %u, ID == %d", + JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + } + + if(*Rc == 0) { + // new item added + + *Rc = (transform_the_value_before_insert) ? transform_the_value_before_insert(key, key_length, value) : value; + } + + return *Rc; +} + +void thread_cache_destroy(void) { + if(unlikely(!thread_cache_judy_array)) return; + + JError_t J_Error; + Word_t ret = JudyHSFreeArray(&thread_cache_judy_array, &J_Error); + if(unlikely(ret == (Word_t) JERR)) { + error("THREAD_CACHE: Cannot destroy JudyHS, JU_ERRNO_* == %u, ID == %d", + JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + } + + internal_error(true, "THREAD_CACHE: hash table freed %lu bytes", ret); + + thread_cache_judy_array = NULL; +} + +// ---------------------------------------------------------------------------- +// unit test + +static void dictionary_unittest_free_char_pp(char **pp, size_t entries) { + for(size_t i = 0; i < entries ;i++) + freez(pp[i]); + + freez(pp); +} + +static char **dictionary_unittest_generate_names(size_t entries) { + char **names = mallocz(sizeof(char *) * entries); + for(size_t i = 0; i < entries ;i++) { + char buf[25 + 1] = ""; + snprintfz(buf, 25, "name.%zu.0123456789.%zu!@#$%%^&*(),./[]{}\\|~`", i, entries / 2 + i); + names[i] = strdupz(buf); + } + return names; +} + +static char **dictionary_unittest_generate_values(size_t entries) { + char **values = mallocz(sizeof(char *) * entries); + for(size_t i = 0; i < entries ;i++) { + char buf[25 + 1] = ""; + snprintfz(buf, 25, "value-%zu-0987654321.%zu%%^&*(),. \t !@#$/[]{}\\|~`", i, entries / 2 + i); + values[i] = strdupz(buf); + } + return values; +} + +static size_t dictionary_unittest_set_clone(DICTIONARY *dict, char **names, char **values, size_t entries) { + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(values[i]) + 1; + char *val = (char *)dictionary_set(dict, names[i], values[i], vallen); + if(val == values[i]) { fprintf(stderr, ">>> %s() returns reference to value\n", __FUNCTION__); errors++; } + if(!val || memcmp(val, values[i], vallen) != 0) { fprintf(stderr, ">>> %s() returns invalid value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_set_null(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)values; + size_t errors = 0; + size_t i = 0; + for(; i < entries ;i++) { + void *val = dictionary_set(dict, names[i], NULL, 0); + if(val != NULL) { fprintf(stderr, ">>> %s() returns a non NULL value\n", __FUNCTION__); errors++; } + } + if(dictionary_entries(dict) != i) { + fprintf(stderr, ">>> %s() dictionary items do not match\n", __FUNCTION__); + errors++; + } + return errors; +} + + +static size_t dictionary_unittest_set_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries) { + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(values[i]) + 1; + char *val = (char *)dictionary_set(dict, names[i], values[i], vallen); + if(val != values[i]) { fprintf(stderr, ">>> %s() returns invalid pointer to value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_get_clone(DICTIONARY *dict, char **names, char **values, size_t entries) { + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(values[i]) + 1; + char *val = (char *)dictionary_get(dict, names[i]); + if(val == values[i]) { fprintf(stderr, ">>> %s() returns reference to value\n", __FUNCTION__); errors++; } + if(!val || memcmp(val, values[i], vallen) != 0) { fprintf(stderr, ">>> %s() returns invalid value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_get_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries) { + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + char *val = (char *)dictionary_get(dict, names[i]); + if(val != values[i]) { fprintf(stderr, ">>> %s() returns invalid pointer to value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_get_nonexisting(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + char *val = (char *)dictionary_get(dict, values[i]); + if(val) { fprintf(stderr, ">>> %s() returns non-existing item\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_del_nonexisting(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + bool ret = dictionary_del(dict, values[i]); + if(ret) { fprintf(stderr, ">>> %s() deleted non-existing item\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_del_existing(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)values; + size_t errors = 0; + + size_t forward_from = 0, forward_to = entries / 3; + size_t middle_from = forward_to, middle_to = entries * 2 / 3; + size_t backward_from = middle_to, backward_to = entries; + + for(size_t i = forward_from; i < forward_to ;i++) { + bool ret = dictionary_del(dict, names[i]); + if(!ret) { fprintf(stderr, ">>> %s() didn't delete (forward) existing item\n", __FUNCTION__); errors++; } + } + + for(size_t i = middle_to - 1; i >= middle_from ;i--) { + bool ret = dictionary_del(dict, names[i]); + if(!ret) { fprintf(stderr, ">>> %s() didn't delete (middle) existing item\n", __FUNCTION__); errors++; } + } + + for(size_t i = backward_to - 1; i >= backward_from ;i--) { + bool ret = dictionary_del(dict, names[i]); + if(!ret) { fprintf(stderr, ">>> %s() didn't delete (backward) existing item\n", __FUNCTION__); errors++; } + } + + return errors; +} + +static size_t dictionary_unittest_reset_clone(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)values; + // set the name as value too + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(names[i]) + 1; + char *val = (char *)dictionary_set(dict, names[i], names[i], vallen); + if(val == names[i]) { fprintf(stderr, ">>> %s() returns reference to value\n", __FUNCTION__); errors++; } + if(!val || memcmp(val, names[i], vallen) != 0) { fprintf(stderr, ">>> %s() returns invalid value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_reset_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)values; + // set the name as value too + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(names[i]) + 1; + char *val = (char *)dictionary_set(dict, names[i], names[i], vallen); + if(val != names[i]) { fprintf(stderr, ">>> %s() returns invalid pointer to value\n", __FUNCTION__); errors++; } + if(!val) { fprintf(stderr, ">>> %s() returns invalid value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_reset_dont_overwrite_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries) { + // set the name as value too + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(names[i]) + 1; + char *val = (char *)dictionary_set(dict, names[i], names[i], vallen); + if(val != values[i]) { fprintf(stderr, ">>> %s() returns invalid pointer to value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static int dictionary_unittest_walkthrough_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value __maybe_unused, void *data __maybe_unused) { + return 1; +} + +static size_t dictionary_unittest_walkthrough(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + int sum = dictionary_walkthrough_read(dict, dictionary_unittest_walkthrough_callback, NULL); + if(sum < (int)entries) return entries - sum; + else return sum - entries; +} + +static int dictionary_unittest_walkthrough_delete_this_callback(const DICTIONARY_ITEM *item, void *value __maybe_unused, void *data) { + const char *name = dictionary_acquired_item_name((DICTIONARY_ITEM *)item); + + if(!dictionary_del((DICTIONARY *)data, name)) + return 0; + + return 1; +} + +static size_t dictionary_unittest_walkthrough_delete_this(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + int sum = dictionary_walkthrough_write(dict, dictionary_unittest_walkthrough_delete_this_callback, dict); + if(sum < (int)entries) return entries - sum; + else return sum - entries; +} + +static int dictionary_unittest_walkthrough_stop_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value __maybe_unused, void *data __maybe_unused) { + return -1; +} + +static size_t dictionary_unittest_walkthrough_stop(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + (void)entries; + int sum = dictionary_walkthrough_read(dict, dictionary_unittest_walkthrough_stop_callback, NULL); + if(sum != -1) return 1; + return 0; +} + +static size_t dictionary_unittest_foreach(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + (void)entries; + size_t count = 0; + char *item; + dfe_start_read(dict, item) + count++; + dfe_done(item); + + if(count > entries) return count - entries; + return entries - count; +} + +static size_t dictionary_unittest_foreach_delete_this(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + (void)entries; + size_t count = 0; + char *item; + dfe_start_write(dict, item) + if(dictionary_del(dict, item_dfe.name)) count++; + dfe_done(item); + + if(count > entries) return count - entries; + return entries - count; +} + +static size_t dictionary_unittest_destroy(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + (void)entries; + size_t bytes = dictionary_destroy(dict); + fprintf(stderr, " %s() freed %zu bytes,", __FUNCTION__, bytes); + return 0; +} + +static usec_t dictionary_unittest_run_and_measure_time(DICTIONARY *dict, char *message, char **names, char **values, size_t entries, size_t *errors, size_t (*callback)(DICTIONARY *dict, char **names, char **values, size_t entries)) { + fprintf(stderr, "%40s ... ", message); + + usec_t started = now_realtime_usec(); + size_t errs = callback(dict, names, values, entries); + usec_t ended = now_realtime_usec(); + usec_t dt = ended - started; + + if(callback == dictionary_unittest_destroy) dict = NULL; + + long int found_ok = 0, found_deleted = 0, found_referenced = 0; + if(dict) { + DICTIONARY_ITEM *item; + DOUBLE_LINKED_LIST_FOREACH_FORWARD(dict->items.list, item, prev, next) { + if(item->refcount >= 0 && !(item ->flags & ITEM_FLAG_DELETED)) + found_ok++; + else + found_deleted++; + + if(item->refcount > 0) + found_referenced++; + } + } + + fprintf(stderr, " %zu errors, %ld (found %ld) items in dictionary, %ld (found %ld) referenced, %ld (found %ld) deleted, %llu usec \n", + errs, dict?dict->entries:0, found_ok, dict?dict->referenced_items:0, found_referenced, dict?dict->pending_deletion_items:0, found_deleted, dt); + *errors += errs; + return dt; +} + +static void dictionary_unittest_clone(DICTIONARY *dict, char **names, char **values, size_t entries, size_t *errors) { + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, errors, dictionary_unittest_set_clone); + dictionary_unittest_run_and_measure_time(dict, "getting entries", names, values, entries, errors, dictionary_unittest_get_clone); + dictionary_unittest_run_and_measure_time(dict, "getting non-existing entries", names, values, entries, errors, dictionary_unittest_get_nonexisting); + dictionary_unittest_run_and_measure_time(dict, "resetting entries", names, values, entries, errors, dictionary_unittest_reset_clone); + dictionary_unittest_run_and_measure_time(dict, "deleting non-existing entries", names, values, entries, errors, dictionary_unittest_del_nonexisting); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop", names, values, entries, errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback", names, values, entries, errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback stop", names, values, entries, errors, dictionary_unittest_walkthrough_stop); + dictionary_unittest_run_and_measure_time(dict, "deleting existing entries", names, values, entries, errors, dictionary_unittest_del_existing); + dictionary_unittest_run_and_measure_time(dict, "walking through empty", names, values, 0, errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach empty", names, values, 0, errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "destroying empty dictionary", names, values, entries, errors, dictionary_unittest_destroy); +} + +static void dictionary_unittest_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries, size_t *errors) { + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, errors, dictionary_unittest_set_nonclone); + dictionary_unittest_run_and_measure_time(dict, "getting entries", names, values, entries, errors, dictionary_unittest_get_nonclone); + dictionary_unittest_run_and_measure_time(dict, "getting non-existing entries", names, values, entries, errors, dictionary_unittest_get_nonexisting); + dictionary_unittest_run_and_measure_time(dict, "resetting entries", names, values, entries, errors, dictionary_unittest_reset_nonclone); + dictionary_unittest_run_and_measure_time(dict, "deleting non-existing entries", names, values, entries, errors, dictionary_unittest_del_nonexisting); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop", names, values, entries, errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback", names, values, entries, errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback stop", names, values, entries, errors, dictionary_unittest_walkthrough_stop); + dictionary_unittest_run_and_measure_time(dict, "deleting existing entries", names, values, entries, errors, dictionary_unittest_del_existing); + dictionary_unittest_run_and_measure_time(dict, "walking through empty", names, values, 0, errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach empty", names, values, 0, errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "destroying empty dictionary", names, values, entries, errors, dictionary_unittest_destroy); +} + +struct dictionary_unittest_sorting { + const char *old_name; + const char *old_value; + size_t count; +}; + +static int dictionary_unittest_sorting_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name((DICTIONARY_ITEM *)item); + struct dictionary_unittest_sorting *t = (struct dictionary_unittest_sorting *)data; + const char *v = (const char *)value; + + int ret = 0; + if(t->old_name && strcmp(t->old_name, name) > 0) { + fprintf(stderr, "name '%s' should be after '%s'\n", t->old_name, name); + ret = 1; + } + t->count++; + t->old_name = name; + t->old_value = v; + + return ret; +} + +static size_t dictionary_unittest_sorted_walkthrough(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + struct dictionary_unittest_sorting tmp = { .old_name = NULL, .old_value = NULL, .count = 0 }; + size_t errors; + errors = dictionary_sorted_walkthrough_read(dict, dictionary_unittest_sorting_callback, &tmp); + + if(tmp.count != entries) { + fprintf(stderr, "Expected %zu entries, counted %zu\n", entries, tmp.count); + errors++; + } + return errors; +} + +static void dictionary_unittest_sorting(DICTIONARY *dict, char **names, char **values, size_t entries, size_t *errors) { + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, errors, dictionary_unittest_set_clone); + dictionary_unittest_run_and_measure_time(dict, "sorted walkthrough", names, values, entries, errors, dictionary_unittest_sorted_walkthrough); +} + +static void dictionary_unittest_null_dfe(DICTIONARY *dict, char **names, char **values, size_t entries, size_t *errors) { + dictionary_unittest_run_and_measure_time(dict, "adding null value entries", names, values, entries, errors, dictionary_unittest_set_null); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop", names, values, entries, errors, dictionary_unittest_foreach); +} + + +static int unittest_check_dictionary_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value __maybe_unused, void *data __maybe_unused) { + return 1; +} + +static size_t unittest_check_dictionary(const char *label, DICTIONARY *dict, size_t traversable, size_t active_items, size_t deleted_items, size_t referenced_items, size_t pending_deletion) { + size_t errors = 0; + + size_t ll = 0; + void *t; + dfe_start_read(dict, t) + ll++; + dfe_done(t); + + fprintf(stderr, "DICT %-20s: dictionary foreach entries %zu, expected %zu...\t\t\t\t\t", + label, ll, traversable); + if(ll != traversable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + ll = dictionary_walkthrough_read(dict, unittest_check_dictionary_callback, NULL); + fprintf(stderr, "DICT %-20s: dictionary walkthrough entries %zu, expected %zu...\t\t\t\t", + label, ll, traversable); + if(ll != traversable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + ll = dictionary_sorted_walkthrough_read(dict, unittest_check_dictionary_callback, NULL); + fprintf(stderr, "DICT %-20s: dictionary sorted walkthrough entries %zu, expected %zu...\t\t\t", + label, ll, traversable); + if(ll != traversable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + DICTIONARY_ITEM *item; + size_t active = 0, deleted = 0, referenced = 0, pending = 0; + for(item = dict->items.list; item; item = item->next) { + if(!(item->flags & ITEM_FLAG_DELETED) && !(item->shared->flags & ITEM_FLAG_DELETED)) + active++; + else { + deleted++; + + if(item->refcount == 0) + pending++; + } + + if(item->refcount > 0) + referenced++; + } + + fprintf(stderr, "DICT %-20s: dictionary active items reported %ld, counted %zu, expected %zu...\t\t\t", + label, dict->entries, active, active_items); + if(active != active_items || active != (size_t)dict->entries) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "DICT %-20s: dictionary deleted items counted %zu, expected %zu...\t\t\t\t", + label, deleted, deleted_items); + if(deleted != deleted_items) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "DICT %-20s: dictionary referenced items reported %ld, counted %zu, expected %zu...\t\t", + label, dict->referenced_items, referenced, referenced_items); + if(referenced != referenced_items || dict->referenced_items != (long int)referenced) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "DICT %-20s: dictionary pending deletion items reported %ld, counted %zu, expected %zu...\t", + label, dict->pending_deletion_items, pending, pending_deletion); + if(pending != pending_deletion || pending != (size_t)dict->pending_deletion_items) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + return errors; +} + +static int check_item_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) { + return value == data; +} + +static size_t unittest_check_item(const char *label, DICTIONARY *dict, + DICTIONARY_ITEM *item, const char *name, const char *value, int refcount, + ITEM_FLAGS deleted_flags, bool searchable, bool browsable, bool linked) { + size_t errors = 0; + + fprintf(stderr, "ITEM %-20s: name is '%s', expected '%s'...\t\t\t\t\t\t", label, item_get_name(item), name); + if(strcmp(item_get_name(item), name) != 0) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "ITEM %-20s: value is '%s', expected '%s'...\t\t\t\t\t", label, (const char *)item->shared->value, value); + if(strcmp((const char *)item->shared->value, value) != 0) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "ITEM %-20s: refcount is %d, expected %d...\t\t\t\t\t\t\t", label, item->refcount, refcount); + if (item->refcount != refcount) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "ITEM %-20s: deleted flag is %s, expected %s...\t\t\t\t\t", label, + (item->flags & ITEM_FLAG_DELETED || item->shared->flags & ITEM_FLAG_DELETED)?"true":"false", + (deleted_flags & ITEM_FLAG_DELETED)?"true":"false"); + + if ((item->flags & ITEM_FLAG_DELETED || item->shared->flags & ITEM_FLAG_DELETED) != (deleted_flags & ITEM_FLAG_DELETED)) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + void *v = dictionary_get(dict, name); + bool found = v == item->shared->value; + fprintf(stderr, "ITEM %-20s: searchable %5s, expected %5s...\t\t\t\t\t\t", label, + found?"true":"false", searchable?"true":"false"); + if(found != searchable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + found = false; + void *t; + dfe_start_read(dict, t) { + if(t == item->shared->value) found = true; + } + dfe_done(t); + + fprintf(stderr, "ITEM %-20s: dfe browsable %5s, expected %5s...\t\t\t\t\t", label, + found?"true":"false", browsable?"true":"false"); + if(found != browsable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + found = dictionary_walkthrough_read(dict, check_item_callback, item->shared->value); + fprintf(stderr, "ITEM %-20s: walkthrough browsable %5s, expected %5s...\t\t\t\t", label, + found?"true":"false", browsable?"true":"false"); + if(found != browsable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + found = dictionary_sorted_walkthrough_read(dict, check_item_callback, item->shared->value); + fprintf(stderr, "ITEM %-20s: sorted walkthrough browsable %5s, expected %5s...\t\t\t", label, + found?"true":"false", browsable?"true":"false"); + if(found != browsable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + found = false; + DICTIONARY_ITEM *n; + for(n = dict->items.list; n ;n = n->next) + if(n == item) found = true; + + fprintf(stderr, "ITEM %-20s: linked %5s, expected %5s...\t\t\t\t\t\t", label, + found?"true":"false", linked?"true":"false"); + if(found != linked) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + return errors; +} + +struct thread_unittest { + int join; + DICTIONARY *dict; + int dups; +}; + +static void *unittest_dict_thread(void *arg) { + struct thread_unittest *tu = arg; + for(; 1 ;) { + if(__atomic_load_n(&tu->join, __ATOMIC_RELAXED)) + break; + + DICT_ITEM_CONST DICTIONARY_ITEM *item = + dictionary_set_and_acquire_item_advanced(tu->dict, "dict thread checking 1234567890", + -1, NULL, 0, NULL); + + + dictionary_get(tu->dict, dictionary_acquired_item_name(item)); + + void *t1; + dfe_start_write(tu->dict, t1) { + + // this should delete the referenced item + dictionary_del(tu->dict, t1_dfe.name); + + void *t2; + dfe_start_write(tu->dict, t2) { + // this should add another + dictionary_set(tu->dict, t2_dfe.name, NULL, 0); + + dictionary_get(tu->dict, dictionary_acquired_item_name(item)); + + // and this should delete it again + dictionary_del(tu->dict, t2_dfe.name); + } + dfe_done(t2); + + // this should fail to add it + dictionary_set(tu->dict, t1_dfe.name, NULL, 0); + dictionary_del(tu->dict, t1_dfe.name); + } + dfe_done(t1); + + for(int i = 0; i < tu->dups ; i++) { + dictionary_acquired_item_dup(tu->dict, item); + dictionary_get(tu->dict, dictionary_acquired_item_name(item)); + } + + for(int i = 0; i < tu->dups ; i++) { + dictionary_acquired_item_release(tu->dict, item); + dictionary_del(tu->dict, dictionary_acquired_item_name(item)); + } + + dictionary_acquired_item_release(tu->dict, item); + dictionary_del(tu->dict, "dict thread checking 1234567890"); + + // test concurrent deletions and flushes + { + if(gettid() % 2) { + char buf [256 + 1]; + + for (int i = 0; i < 1000; i++) { + snprintfz(buf, 256, "del/flush test %d", i); + dictionary_set(tu->dict, buf, NULL, 0); + } + + for (int i = 0; i < 1000; i++) { + snprintfz(buf, 256, "del/flush test %d", i); + dictionary_del(tu->dict, buf); + } + } + else { + for (int i = 0; i < 10; i++) { + dictionary_flush(tu->dict); + } + } + } + } + + return arg; +} + +static int dictionary_unittest_threads() { + + struct thread_unittest tu = { + .join = 0, + .dict = NULL, + .dups = 1, + }; + + // threads testing of dictionary + tu.dict = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + time_t seconds_to_run = 5; + int threads_to_create = 2; + fprintf( + stderr, + "\nChecking dictionary concurrency with %d threads for %lld seconds...\n", + threads_to_create, + (long long)seconds_to_run); + + netdata_thread_t threads[threads_to_create]; + tu.join = 0; + for (int i = 0; i < threads_to_create; i++) { + char buf[100 + 1]; + snprintf(buf, 100, "dict%d", i); + netdata_thread_create( + &threads[i], + buf, + NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, + unittest_dict_thread, + &tu); + } + sleep_usec(seconds_to_run * USEC_PER_SEC); + + __atomic_store_n(&tu.join, 1, __ATOMIC_RELAXED); + for (int i = 0; i < threads_to_create; i++) { + void *retval; + netdata_thread_join(threads[i], &retval); + } + + fprintf(stderr, + "inserts %zu" + ", deletes %zu" + ", searches %zu" + ", resets %zu" + ", flushes %zu" + ", entries %ld" + ", referenced_items %ld" + ", pending deletions %ld" + ", check spins %zu" + ", insert spins %zu" + ", delete spins %zu" + ", search ignores %zu" + "\n", + tu.dict->stats->ops.inserts, + tu.dict->stats->ops.deletes, + tu.dict->stats->ops.searches, + tu.dict->stats->ops.resets, + tu.dict->stats->ops.flushes, + tu.dict->entries, + tu.dict->referenced_items, + tu.dict->pending_deletion_items, + tu.dict->stats->spin_locks.use_spins, + tu.dict->stats->spin_locks.insert_spins, + tu.dict->stats->spin_locks.delete_spins, + tu.dict->stats->spin_locks.search_spins + ); + dictionary_destroy(tu.dict); + tu.dict = NULL; + + return 0; +} + +struct thread_view_unittest { + int join; + DICTIONARY *master; + DICTIONARY *view; + DICTIONARY_ITEM *item_master; + int dups; +}; + +static void *unittest_dict_master_thread(void *arg) { + struct thread_view_unittest *tv = arg; + + DICTIONARY_ITEM *item = NULL; + int loops = 0; + while(!__atomic_load_n(&tv->join, __ATOMIC_SEQ_CST)) { + + if(!item) + item = dictionary_set_and_acquire_item(tv->master, "ITEM1", "123", strlen("123") + 1); + + if(__atomic_load_n(&tv->item_master, __ATOMIC_SEQ_CST) != NULL) { + dictionary_acquired_item_release(tv->master, item); + dictionary_del(tv->master, "ITEM1"); + item = NULL; + loops++; + continue; + } + + dictionary_acquired_item_dup(tv->master, item); // for the view thread + __atomic_store_n(&tv->item_master, item, __ATOMIC_SEQ_CST); + dictionary_del(tv->master, "ITEM1"); + + + for(int i = 0; i < tv->dups + loops ; i++) { + dictionary_acquired_item_dup(tv->master, item); + } + + for(int i = 0; i < tv->dups + loops ; i++) { + dictionary_acquired_item_release(tv->master, item); + } + + dictionary_acquired_item_release(tv->master, item); + + item = NULL; + loops = 0; + } + + return arg; +} + +static void *unittest_dict_view_thread(void *arg) { + struct thread_view_unittest *tv = arg; + + DICTIONARY_ITEM *m_item = NULL; + + while(!__atomic_load_n(&tv->join, __ATOMIC_SEQ_CST)) { + if(!(m_item = __atomic_load_n(&tv->item_master, __ATOMIC_SEQ_CST))) + continue; + + DICTIONARY_ITEM *v_item = dictionary_view_set_and_acquire_item(tv->view, "ITEM2", m_item); + dictionary_acquired_item_release(tv->master, m_item); + __atomic_store_n(&tv->item_master, NULL, __ATOMIC_SEQ_CST); + + for(int i = 0; i < tv->dups ; i++) { + dictionary_acquired_item_dup(tv->view, v_item); + } + + for(int i = 0; i < tv->dups ; i++) { + dictionary_acquired_item_release(tv->view, v_item); + } + + dictionary_del(tv->view, "ITEM2"); + + while(!__atomic_load_n(&tv->join, __ATOMIC_SEQ_CST) && !(m_item = __atomic_load_n(&tv->item_master, __ATOMIC_SEQ_CST))) { + dictionary_acquired_item_dup(tv->view, v_item); + dictionary_acquired_item_release(tv->view, v_item); + } + + dictionary_acquired_item_release(tv->view, v_item); + } + + return arg; +} + +static int dictionary_unittest_view_threads() { + + struct thread_view_unittest tv = { + .join = 0, + .master = NULL, + .view = NULL, + .item_master = NULL, + .dups = 1, + }; + + // threads testing of dictionary + struct dictionary_stats stats_master = {}; + struct dictionary_stats stats_view = {}; + tv.master = dictionary_create_advanced(DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE, &stats_master); + tv.view = dictionary_create_view(tv.master); + tv.view->stats = &stats_view; + + time_t seconds_to_run = 5; + fprintf( + stderr, + "\nChecking dictionary concurrency with 1 master and 1 view threads for %lld seconds...\n", + (long long)seconds_to_run); + + netdata_thread_t master_thread, view_thread; + tv.join = 0; + + netdata_thread_create( + &master_thread, + "master", + NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, + unittest_dict_master_thread, + &tv); + + netdata_thread_create( + &view_thread, + "view", + NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, + unittest_dict_view_thread, + &tv); + + sleep_usec(seconds_to_run * USEC_PER_SEC); + + __atomic_store_n(&tv.join, 1, __ATOMIC_RELAXED); + void *retval; + netdata_thread_join(view_thread, &retval); + netdata_thread_join(master_thread, &retval); + + fprintf(stderr, + "MASTER: inserts %zu" + ", deletes %zu" + ", searches %zu" + ", resets %zu" + ", entries %ld" + ", referenced_items %ld" + ", pending deletions %ld" + ", check spins %zu" + ", insert spins %zu" + ", delete spins %zu" + ", search ignores %zu" + "\n", + stats_master.ops.inserts, + stats_master.ops.deletes, + stats_master.ops.searches, + stats_master.ops.resets, + tv.master->entries, + tv.master->referenced_items, + tv.master->pending_deletion_items, + stats_master.spin_locks.use_spins, + stats_master.spin_locks.insert_spins, + stats_master.spin_locks.delete_spins, + stats_master.spin_locks.search_spins + ); + fprintf(stderr, + "VIEW : inserts %zu" + ", deletes %zu" + ", searches %zu" + ", resets %zu" + ", entries %ld" + ", referenced_items %ld" + ", pending deletions %ld" + ", check spins %zu" + ", insert spins %zu" + ", delete spins %zu" + ", search ignores %zu" + "\n", + stats_view.ops.inserts, + stats_view.ops.deletes, + stats_view.ops.searches, + stats_view.ops.resets, + tv.view->entries, + tv.view->referenced_items, + tv.view->pending_deletion_items, + stats_view.spin_locks.use_spins, + stats_view.spin_locks.insert_spins, + stats_view.spin_locks.delete_spins, + stats_view.spin_locks.search_spins + ); + dictionary_destroy(tv.master); + dictionary_destroy(tv.view); + + return 0; +} + +size_t dictionary_unittest_views(void) { + size_t errors = 0; + struct dictionary_stats stats = {}; + DICTIONARY *master = dictionary_create_advanced(DICT_OPTION_NONE, &stats); + DICTIONARY *view = dictionary_create_view(master); + + fprintf(stderr, "\n\nChecking dictionary views...\n"); + + // Add an item to both master and view, then remove the view first and the master second + fprintf(stderr, "\nPASS 1: Adding 1 item to master:\n"); + DICTIONARY_ITEM *item1_on_master = dictionary_set_and_acquire_item(master, "KEY 1", "VALUE1", strlen("VALUE1") + 1); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 1, 0); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 1: Adding master item to view:\n"); + DICTIONARY_ITEM *item1_on_view = dictionary_view_set_and_acquire_item(view, "KEY 1 ON VIEW", item1_on_master); + errors += unittest_check_dictionary("view", view, 1, 1, 0, 1, 0); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 1: Deleting view item:\n"); + dictionary_del(view, "KEY 1 ON VIEW"); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 1, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 1, 0); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nPASS 1: Releasing the deleted view item:\n"); + dictionary_acquired_item_release(view, item1_on_view); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 1, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 0, 1); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 1: Releasing the acquired master item:\n"); + dictionary_acquired_item_release(master, item1_on_master); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 0, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 0, 1); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 0, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 1: Deleting the released master item:\n"); + dictionary_del(master, "KEY 1"); + errors += unittest_check_dictionary("master", master, 0, 0, 0, 0, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 0, 1); + + // The other way now: + // Add an item to both master and view, then remove the master first and verify it is deleted on the view also + fprintf(stderr, "\nPASS 2: Adding 1 item to master:\n"); + item1_on_master = dictionary_set_and_acquire_item(master, "KEY 1", "VALUE1", strlen("VALUE1") + 1); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 1, 0); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 2: Adding master item to view:\n"); + item1_on_view = dictionary_view_set_and_acquire_item(view, "KEY 1 ON VIEW", item1_on_master); + errors += unittest_check_dictionary("view", view, 1, 1, 0, 1, 0); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 2: Deleting master item:\n"); + dictionary_del(master, "KEY 1"); + dictionary_version(view); + errors += unittest_check_dictionary("master", master, 0, 0, 1, 1, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 1, 0); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_DELETED, false, false, true); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nPASS 2: Releasing the acquired master item:\n"); + dictionary_acquired_item_release(master, item1_on_master); + errors += unittest_check_dictionary("master", master, 0, 0, 1, 0, 1); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 1, 0); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nPASS 2: Releasing the deleted view item:\n"); + dictionary_acquired_item_release(view, item1_on_view); + errors += unittest_check_dictionary("master", master, 0, 0, 1, 0, 1); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 0, 1); + + dictionary_destroy(master); + dictionary_destroy(view); + return errors; +} + +int dictionary_unittest(size_t entries) { + if(entries < 10) entries = 10; + + DICTIONARY *dict; + size_t errors = 0; + + fprintf(stderr, "Generating %zu names and values...\n", entries); + char **names = dictionary_unittest_generate_names(entries); + char **values = dictionary_unittest_generate_values(entries); + + fprintf(stderr, "\nCreating dictionary single threaded, clone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + dictionary_unittest_clone(dict, names, values, entries, &errors); + + fprintf(stderr, "\nCreating dictionary multi threaded, clone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_NONE); + dictionary_unittest_clone(dict, names, values, entries, &errors); + + fprintf(stderr, "\nCreating dictionary single threaded, non-clone, add-in-front options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_SINGLE_THREADED | DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | + DICT_OPTION_ADD_IN_FRONT); + dictionary_unittest_nonclone(dict, names, values, entries, &errors); + + fprintf(stderr, "\nCreating dictionary multi threaded, non-clone, add-in-front options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_ADD_IN_FRONT); + dictionary_unittest_nonclone(dict, names, values, entries, &errors); + + fprintf(stderr, "\nCreating dictionary single-threaded, non-clone, don't overwrite options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_SINGLE_THREADED | DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | + DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, &errors, dictionary_unittest_set_nonclone); + dictionary_unittest_run_and_measure_time(dict, "resetting non-overwrite entries", names, values, entries, &errors, dictionary_unittest_reset_dont_overwrite_nonclone); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop", names, values, entries, &errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback", names, values, entries, &errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback stop", names, values, entries, &errors, dictionary_unittest_walkthrough_stop); + dictionary_unittest_run_and_measure_time(dict, "destroying full dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary multi-threaded, non-clone, don't overwrite options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, &errors, dictionary_unittest_set_nonclone); + dictionary_unittest_run_and_measure_time(dict, "walkthrough write delete this", names, values, entries, &errors, dictionary_unittest_walkthrough_delete_this); + dictionary_unittest_run_and_measure_time(dict, "destroying empty dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary multi-threaded, non-clone, don't overwrite options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, &errors, dictionary_unittest_set_nonclone); + dictionary_unittest_run_and_measure_time(dict, "foreach write delete this", names, values, entries, &errors, dictionary_unittest_foreach_delete_this); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop empty", names, values, 0, &errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback empty", names, values, 0, &errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "destroying empty dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary single threaded, clone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + dictionary_unittest_sorting(dict, names, values, entries, &errors); + dictionary_unittest_run_and_measure_time(dict, "destroying full dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary single threaded, clone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + dictionary_unittest_null_dfe(dict, names, values, entries, &errors); + dictionary_unittest_run_and_measure_time(dict, "destroying full dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary single threaded, noclone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_VALUE_LINK_DONT_CLONE); + dictionary_unittest_null_dfe(dict, names, values, entries, &errors); + dictionary_unittest_run_and_measure_time(dict, "destroying full dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + // check reference counters + { + fprintf(stderr, "\nTesting reference counters:\n"); + dict = dictionary_create(DICT_OPTION_NONE | DICT_OPTION_NAME_LINK_DONT_CLONE); + errors += unittest_check_dictionary("", dict, 0, 0, 0, 0, 0); + + fprintf(stderr, "\nAdding test item to dictionary and acquiring it\n"); + dictionary_set(dict, "test", "ITEM1", 6); + DICTIONARY_ITEM *item = (DICTIONARY_ITEM *)dictionary_get_and_acquire_item(dict, "test"); + + errors += unittest_check_dictionary("", dict, 1, 1, 0, 1, 0); + errors += unittest_check_item("ACQUIRED", dict, item, "test", "ITEM1", 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nChecking that reference counters are increased:\n"); + void *t; + dfe_start_read(dict, t) { + errors += unittest_check_dictionary("", dict, 1, 1, 0, 1, 0); + errors += unittest_check_item("ACQUIRED TRAVERSAL", dict, item, "test", "ITEM1", 2, ITEM_FLAG_NONE, true, true, true); + } + dfe_done(t); + + fprintf(stderr, "\nChecking that reference counters are decreased:\n"); + errors += unittest_check_dictionary("", dict, 1, 1, 0, 1, 0); + errors += unittest_check_item("ACQUIRED TRAVERSAL 2", dict, item, "test", "ITEM1", 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nDeleting the item we have acquired:\n"); + dictionary_del(dict, "test"); + + errors += unittest_check_dictionary("", dict, 0, 0, 1, 1, 0); + errors += unittest_check_item("DELETED", dict, item, "test", "ITEM1", 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nAdding another item with the same name of the item we deleted, while being acquired:\n"); + dictionary_set(dict, "test", "ITEM2", 6); + errors += unittest_check_dictionary("", dict, 1, 1, 1, 1, 0); + + fprintf(stderr, "\nAcquiring the second item:\n"); + DICTIONARY_ITEM *item2 = (DICTIONARY_ITEM *)dictionary_get_and_acquire_item(dict, "test"); + errors += unittest_check_item("FIRST", dict, item, "test", "ITEM1", 1, ITEM_FLAG_DELETED, false, false, true); + errors += unittest_check_item("SECOND", dict, item2, "test", "ITEM2", 1, ITEM_FLAG_NONE, true, true, true); + errors += unittest_check_dictionary("", dict, 1, 1, 1, 2, 0); + + fprintf(stderr, "\nReleasing the second item (the first is still acquired):\n"); + dictionary_acquired_item_release(dict, (DICTIONARY_ITEM *)item2); + errors += unittest_check_dictionary("", dict, 1, 1, 1, 1, 0); + errors += unittest_check_item("FIRST", dict, item, "test", "ITEM1", 1, ITEM_FLAG_DELETED, false, false, true); + errors += unittest_check_item("SECOND RELEASED", dict, item2, "test", "ITEM2", 0, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nDeleting the second item (the first is still acquired):\n"); + dictionary_del(dict, "test"); + errors += unittest_check_dictionary("", dict, 0, 0, 1, 1, 0); + errors += unittest_check_item("ACQUIRED DELETED", dict, item, "test", "ITEM1", 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nReleasing the first item (which we have already deleted):\n"); + dictionary_acquired_item_release(dict, (DICTIONARY_ITEM *)item); + dfe_start_write(dict, item) ; dfe_done(item); + errors += unittest_check_dictionary("", dict, 0, 0, 1, 0, 1); + + fprintf(stderr, "\nAdding again the test item to dictionary and acquiring it\n"); + dictionary_set(dict, "test", "ITEM1", 6); + item = (DICTIONARY_ITEM *)dictionary_get_and_acquire_item(dict, "test"); + + errors += unittest_check_dictionary("", dict, 1, 1, 0, 1, 0); + errors += unittest_check_item("RE-ADDITION", dict, item, "test", "ITEM1", 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nDestroying the dictionary while we have acquired an item\n"); + dictionary_destroy(dict); + + fprintf(stderr, "Releasing the item (on a destroyed dictionary)\n"); + dictionary_acquired_item_release(dict, (DICTIONARY_ITEM *)item); + item = NULL; + dict = NULL; + } + + dictionary_unittest_free_char_pp(names, entries); + dictionary_unittest_free_char_pp(values, entries); + + errors += dictionary_unittest_views(); + errors += dictionary_unittest_threads(); + errors += dictionary_unittest_view_threads(); + + fprintf(stderr, "\n%zu errors found\n", errors); + return errors ? 1 : 0; +} diff --git a/libnetdata/dictionary/dictionary.h b/libnetdata/dictionary/dictionary.h new file mode 100644 index 0000000..0e7b3d3 --- /dev/null +++ b/libnetdata/dictionary/dictionary.h @@ -0,0 +1,323 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DICTIONARY_H +#define NETDATA_DICTIONARY_H 1 + +#include "../libnetdata.h" + + +/* + * Netdata DICTIONARY features: + * + * CLONE or LINK + * Names and Values in the dictionary can be cloned or linked. + * In clone mode, the dictionary does all the memory management. + * The default is clone for both names and values. + * Set DICT_OPTION_NAME_LINK_DONT_CLONE to link names. + * Set DICT_OPTION_VALUE_LINK_DONT_CLONE to link names. + * + * ORDERED + * Items are ordered in the order they are added (new items are appended at the end). + * You may reverse the order by setting the flag DICT_OPTION_ADD_IN_FRONT. + * + * LOOKUP + * The dictionary uses JudyHS to maintain a very fast randomly accessible hash table. + * + * MULTI-THREADED and SINGLE-THREADED + * Each dictionary may be single threaded (no locks), or multi-threaded (multiple readers or one writer). + * The default is multi-threaded. Add the flag DICT_OPTION_SINGLE_THREADED for single-threaded. + * + * WALK-THROUGH and FOREACH traversal + * The dictionary can be traversed on read or write mode, either with a callback (walkthrough) or with + * a loop (foreach). + * + * In write mode traversal, the caller may delete only the current item, but may add as many items as needed. + * + */ + +#ifdef DICTIONARY_INTERNALS +#define DICTFE_CONST +#define DICT_ITEM_CONST +#else +#define DICTFE_CONST const +#define DICT_ITEM_CONST const +#endif + +typedef struct dictionary DICTIONARY; +typedef struct dictionary_item DICTIONARY_ITEM; + +typedef enum dictionary_options { + DICT_OPTION_NONE = 0, // the default is the opposite of all below + DICT_OPTION_SINGLE_THREADED = (1 << 0), // don't use any locks (default: use locks) + DICT_OPTION_VALUE_LINK_DONT_CLONE = (1 << 1), // don't copy the value, just point to the one provided (default: copy) + DICT_OPTION_NAME_LINK_DONT_CLONE = (1 << 2), // don't copy the name, just point to the one provided (default: copy) + DICT_OPTION_DONT_OVERWRITE_VALUE = (1 << 3), // don't overwrite values of dictionary items (default: overwrite) + DICT_OPTION_ADD_IN_FRONT = (1 << 4), // add dictionary items at the front of the linked list (default: at the end) +} DICT_OPTIONS; + +struct dictionary_stats { + const char *name; // the name of the category + + struct { + size_t active; // the number of active dictionaries + size_t deleted; // the number of dictionaries queued for destruction + } dictionaries; + + struct { + long entries; // active items in the dictionary + long pending_deletion; // pending deletion items in the dictionary + long referenced; // referenced items in the dictionary + } items; + + struct { + size_t creations; // dictionary creations + size_t destructions; // dictionary destructions + size_t flushes; // dictionary flushes + size_t traversals; // dictionary foreach + size_t walkthroughs; // dictionary walkthrough + size_t garbage_collections; // dictionary garbage collections + size_t searches; // item searches + size_t inserts; // item inserts + size_t resets; // item resets + size_t deletes; // item deletes + } ops; + + struct { + size_t inserts; // number of times the insert callback is called + size_t conflicts; // number of times the conflict callback is called + size_t reacts; // number of times the react callback is called + size_t deletes; // number of times the delete callback is called + } callbacks; + + // memory + struct { + long indexed; // bytes of keys indexed (indication of the index size) + long values; // bytes of caller structures + long dict; // bytes of the structures dictionary needs + } memory; + + // spin locks + struct { + size_t use_spins; // number of times a reference to item had to spin to acquire it or ignore it + size_t search_spins; // number of times a successful search result had to be thrown away + size_t insert_spins; // number of times an insertion to the hash table had to be repeated + size_t delete_spins; // number of times a deletion had to spin to get a decision + } spin_locks; +}; + +// Create a dictionary +#ifdef NETDATA_INTERNAL_CHECKS +#define dictionary_create(options) dictionary_create_advanced_with_trace(options, NULL, __FUNCTION__, __LINE__, __FILE__) +#define dictionary_create_advanced(options, stats) dictionary_create_advanced_with_trace(options, stats, __FUNCTION__, __LINE__, __FILE__) +DICTIONARY *dictionary_create_advanced_with_trace(DICT_OPTIONS options, struct dictionary_stats *stats, const char *function, size_t line, const char *file); +#else +#define dictionary_create(options) dictionary_create_advanced(options, NULL); +DICTIONARY *dictionary_create_advanced(DICT_OPTIONS options, struct dictionary_stats *stats); +#endif + +// Create a view on a dictionary +#ifdef NETDATA_INTERNAL_CHECKS +#define dictionary_create_view(master) dictionary_create_view_with_trace(master, __FUNCTION__, __LINE__, __FILE__) +DICTIONARY *dictionary_create_view_with_trace(DICTIONARY *master, const char *function, size_t line, const char *file); +#else +DICTIONARY *dictionary_create_view(DICTIONARY *master); +#endif + +// an insert callback to be called just after an item is added to the dictionary +// this callback is called while the dictionary is write locked! +void dictionary_register_insert_callback(DICTIONARY *dict, void (*ins_callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data); + +// a delete callback to be called just before an item is deleted forever +// this callback is called while the dictionary is write locked! +void dictionary_register_delete_callback(DICTIONARY *dict, void (*del_callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data); + +// a merge callback to be called when DICT_OPTION_DONT_OVERWRITE_VALUE +// and an item is already found in the dictionary - the dictionary does nothing else in this case +// the old_value will remain in the dictionary - the new_value is ignored +// The callback should return true if the value has been updated (it increases the dictionary version). +void dictionary_register_conflict_callback(DICTIONARY *dict, bool (*conflict_callback)(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data), void *data); + +// a reaction callback to be called after every item insertion or conflict +// after the constructors have finished and the items are fully available for use +// and the dictionary is not write locked anymore +void dictionary_register_react_callback(DICTIONARY *dict, void (*react_callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data); + +// Destroy a dictionary +// Returns the number of bytes freed +// The returned value will not include name/key sizes +// Registered delete callbacks will be run for each item in the dictionary. +size_t dictionary_destroy(DICTIONARY *dict); + +// Empties a dictionary +// Referenced items will survive, but are not offered anymore. +// Registered delete callbacks will be run for each item in the dictionary. +void dictionary_flush(DICTIONARY *dict); + +void dictionary_version_increment(DICTIONARY *dict); + +// ---------------------------------------------------------------------------- +// Set an item in the dictionary +// +// - if an item with the same name does not exist, create one +// - if an item with the same name exists, then: +// a) if DICT_OPTION_DONT_OVERWRITE_VALUE is set, just return the existing value (ignore the new value) +// else b) reset the value to the new value passed at the call +// +// When DICT_OPTION_VALUE_LINK_DONT_CLONE is set, the value is linked, otherwise it is copied +// When DICT_OPTION_NAME_LINK_DONT_CLONE is set, the name is linked, otherwise it is copied +// +// When neither DICT_OPTION_VALUE_LINK_DONT_CLONE nor DICT_OPTION_NAME_LINK_DONT_CLONE are set, all the +// memory management for names and values is done by the dictionary. +// +// Passing NULL as value, the dictionary will callocz() the newly allocated value, otherwise it will copy it. +// Passing 0 as value_len, the dictionary will set the value to NULL (no allocations for value will be made). +#define dictionary_set(dict, name, value, value_len) dictionary_set_advanced(dict, name, -1, value, value_len, NULL) +void *dictionary_set_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, void *value, size_t value_len, void *constructor_data); + +#define dictionary_set_and_acquire_item(dict, name, value, value_len) dictionary_set_and_acquire_item_advanced(dict, name, -1, value, value_len, NULL) +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_set_and_acquire_item_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, void *value, size_t value_len, void *constructor_data); + +// set an item in a dictionary view +#define dictionary_view_set_and_acquire_item(dict, name, master_item) dictionary_view_set_and_acquire_item_advanced(dict, name, -1, master_item) +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_view_set_and_acquire_item_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, DICTIONARY_ITEM *master_item); +#define dictionary_view_set(dict, name, master_item) dictionary_view_set_advanced(dict, name, -1, master_item) +void *dictionary_view_set_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, DICT_ITEM_CONST DICTIONARY_ITEM *master_item); + +// ---------------------------------------------------------------------------- +// Get an item from the dictionary +// If it returns NULL, the item is not found + +#define dictionary_get(dict, name) dictionary_get_advanced(dict, name, -1) +void *dictionary_get_advanced(DICTIONARY *dict, const char *name, ssize_t name_len); + +#define dictionary_get_and_acquire_item(dict, name) dictionary_get_and_acquire_item_advanced(dict, name, -1) +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_get_and_acquire_item_advanced(DICTIONARY *dict, const char *name, ssize_t name_len); + + +// ---------------------------------------------------------------------------- +// Delete an item from the dictionary +// returns true if the item was found and has been deleted +// returns false if the item was not found in the index + +#define dictionary_del(dict, name) dictionary_del_advanced(dict, name, -1) +bool dictionary_del_advanced(DICTIONARY *dict, const char *name, ssize_t name_len); + +// ---------------------------------------------------------------------------- +// reference counters management + +void dictionary_acquired_item_release(DICTIONARY *dict, DICT_ITEM_CONST DICTIONARY_ITEM *item); + +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_acquired_item_dup(DICTIONARY *dict, DICT_ITEM_CONST DICTIONARY_ITEM *item); + +const char *dictionary_acquired_item_name(DICT_ITEM_CONST DICTIONARY_ITEM *item); +void *dictionary_acquired_item_value(DICT_ITEM_CONST DICTIONARY_ITEM *item); + +size_t dictionary_acquired_item_references(DICT_ITEM_CONST DICTIONARY_ITEM *item); + +// ---------------------------------------------------------------------------- +// Traverse (walk through) the items of the dictionary. +// The order of traversal is currently the order of insertion. +// +// The callback function may return a negative number to stop the traversal, +// in which case that negative value is returned to the caller. +// +// If all callback calls return zero or positive numbers, the sum of all of +// them is returned to the caller. +// +// You cannot alter the dictionary from inside a dictionary_walkthrough_read() - deadlock! +// You can only delete the current item from inside a dictionary_walkthrough_write() - you can add as many as you want. +// +#define dictionary_walkthrough_read(dict, callback, data) dictionary_walkthrough_rw(dict, 'r', callback, data) +#define dictionary_walkthrough_write(dict, callback, data) dictionary_walkthrough_rw(dict, 'w', callback, data) +int dictionary_walkthrough_rw(DICTIONARY *dict, char rw, int (*callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data); + +typedef int (*dictionary_sorted_compar)(const DICTIONARY_ITEM **item1, const DICTIONARY_ITEM **item2); + +#define dictionary_sorted_walkthrough_read(dict, callback, data) dictionary_sorted_walkthrough_rw(dict, 'r', callback, data, NULL) +#define dictionary_sorted_walkthrough_write(dict, callback, data) dictionary_sorted_walkthrough_rw(dict, 'w', callback, data, NULL) +int dictionary_sorted_walkthrough_rw(DICTIONARY *dict, char rw, int (*callback)(const DICTIONARY_ITEM *item, void *entry, void *data), void *data, dictionary_sorted_compar compar); + +// ---------------------------------------------------------------------------- +// Traverse with foreach +// +// Use like this: +// +// DICTFE dfe = {}; +// for(MY_ITEM *item = dfe_start_read(&dfe, dict); item ; item = dfe_next(&dfe)) { +// // do things with the item and its dfe.name +// } +// dfe_done(&dfe); +// +// You cannot alter the dictionary from within a dfe_read_start() - deadlock! +// You can only delete the current item from inside a dfe_start_write() - you can add as many as you want. +// + +#define DICTIONARY_LOCK_READ 'r' +#define DICTIONARY_LOCK_WRITE 'w' +#define DICTIONARY_LOCK_REENTRANT 'z' + +void dictionary_write_lock(DICTIONARY *dict); +void dictionary_write_unlock(DICTIONARY *dict); + +typedef DICTFE_CONST struct dictionary_foreach { + DICTIONARY *dict; // the dictionary upon we work + + DICTIONARY_ITEM *item; // the item we work on, to remember the position we are at + // this can be used with dictionary_acquired_item_dup() to + // acquire the currently working item. + + DICTFE_CONST char *name; // the dictionary name of the last item used + void *value; // the dictionary value of the last item used + // same as the return value of dictfe_start() and dictfe_next() + + size_t counter; // counts the number of iterations made, starting from zero + + char rw; // the lock mode 'r' or 'w' +} DICTFE; + +#define dfe_start_read(dict, value) dfe_start_rw(dict, value, DICTIONARY_LOCK_READ) +#define dfe_start_write(dict, value) dfe_start_rw(dict, value, DICTIONARY_LOCK_WRITE) +#define dfe_start_reentrant(dict, value) dfe_start_rw(dict, value, DICTIONARY_LOCK_REENTRANT) + +#define dfe_start_rw(dict, value, mode) \ + do { \ + DICTFE value ## _dfe = {}; \ + (void)(value); /* needed to avoid warning when looping without using this */ \ + for((value) = dictionary_foreach_start_rw(&value ## _dfe, (dict), (mode)); \ + (value ## _dfe.item) ; \ + (value) = dictionary_foreach_next(&value ## _dfe)) \ + { + +#define dfe_done(value) \ + } \ + dictionary_foreach_done(&value ## _dfe); \ + } while(0) + +void *dictionary_foreach_start_rw(DICTFE *dfe, DICTIONARY *dict, char rw); +void *dictionary_foreach_next(DICTFE *dfe); +void dictionary_foreach_done(DICTFE *dfe); + +// ---------------------------------------------------------------------------- +// Get statistics about the dictionary + +size_t dictionary_version(DICTIONARY *dict); +size_t dictionary_entries(DICTIONARY *dict); +size_t dictionary_referenced_items(DICTIONARY *dict); +long int dictionary_stats_for_registry(DICTIONARY *dict); + +// for all cases that the caller does not provide a stats structure, this is where they are accumulated. +extern struct dictionary_stats dictionary_stats_category_other; + +int dictionary_unittest(size_t entries); + +// ---------------------------------------------------------------------------- +// THREAD CACHE + +void *thread_cache_entry_get_or_set(void *key, + ssize_t key_length, + void *value, + void *(*transform_the_value_before_insert)(void *key, size_t key_length, void *value)); + +void thread_cache_destroy(void); + +#endif /* NETDATA_DICTIONARY_H */ diff --git a/libnetdata/ebpf/Makefile.am b/libnetdata/ebpf/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/ebpf/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/ebpf/README.md b/libnetdata/ebpf/README.md new file mode 100644 index 0000000..534867f --- /dev/null +++ b/libnetdata/ebpf/README.md @@ -0,0 +1,5 @@ + + + diff --git a/libnetdata/ebpf/ebpf.c b/libnetdata/ebpf/ebpf.c new file mode 100644 index 0000000..382485e --- /dev/null +++ b/libnetdata/ebpf/ebpf.c @@ -0,0 +1,1427 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include +#include +#include +#include + +#include "../libnetdata.h" + +char *ebpf_user_config_dir = CONFIG_DIR; +char *ebpf_stock_config_dir = LIBCONFIG_DIR; + +/* +static int clean_kprobe_event(FILE *out, char *filename, char *father_pid, netdata_ebpf_events_t *ptr) +{ + int fd = open(filename, O_WRONLY | O_APPEND, 0); + if (fd < 0) { + if (out) { + fprintf(out, "Cannot open %s : %s\n", filename, strerror(errno)); + } + return 1; + } + + char cmd[1024]; + int length = snprintf(cmd, 1023, "-:kprobes/%c_netdata_%s_%s", ptr->type, ptr->name, father_pid); + int ret = 0; + if (length > 0) { + ssize_t written = write(fd, cmd, strlen(cmd)); + if (written < 0) { + if (out) { + fprintf( + out, "Cannot remove the event (%d, %d) '%s' from %s : %s\n", getppid(), getpid(), cmd, filename, + strerror((int)errno)); + } + ret = 1; + } + } + + close(fd); + + return ret; +} + +int clean_kprobe_events(FILE *out, int pid, netdata_ebpf_events_t *ptr) +{ + debug(D_EXIT, "Cleaning parent process events."); + char filename[FILENAME_MAX + 1]; + snprintf(filename, FILENAME_MAX, "%s%s", NETDATA_DEBUGFS, "kprobe_events"); + + char removeme[16]; + snprintf(removeme, 15, "%d", pid); + + int i; + for (i = 0; ptr[i].name; i++) { + if (clean_kprobe_event(out, filename, removeme, &ptr[i])) { + break; + } + } + + return 0; +} +*/ + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Get Kernel version + * + * Get the current kernel from /proc and returns an integer value representing it + * + * @return it returns a value representing the kernel version. + */ +int ebpf_get_kernel_version() +{ + char major[16], minor[16], patch[16]; + char ver[VERSION_STRING_LEN]; + char *version = ver; + + int fd = open("/proc/sys/kernel/osrelease", O_RDONLY); + if (fd < 0) + return -1; + + ssize_t len = read(fd, ver, sizeof(ver)); + if (len < 0) { + close(fd); + return -1; + } + + close(fd); + + char *move = major; + while (*version && *version != '.') + *move++ = *version++; + *move = '\0'; + + version++; + move = minor; + while (*version && *version != '.') + *move++ = *version++; + *move = '\0'; + + if (*version) + version++; + else + return -1; + + move = patch; + while (*version && *version != '\n' && *version != '-') + *move++ = *version++; + *move = '\0'; + + // This new rule is fixing kernel version according the formula: + // KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))) + // that was extracted from /usr/include/linux/version.h + int ipatch = (int)str2l(patch); + if (ipatch > 255) + ipatch = 255; + + return ((int)(str2l(major) * 65536) + (int)(str2l(minor) * 256) + ipatch); +} + +/** + * Get RH release + * + * Read Red Hat release from /etc/redhat-release + * + * @return It returns RH release on success and -1 otherwise + */ +int get_redhat_release() +{ + char buffer[VERSION_STRING_LEN + 1]; + int major, minor; + FILE *fp = fopen("/etc/redhat-release", "r"); + + if (fp) { + major = 0; + minor = -1; + size_t length = fread(buffer, sizeof(char), VERSION_STRING_LEN, fp); + if (length > 4) { + buffer[length] = '\0'; + char *end = strchr(buffer, '.'); + char *start; + if (end) { + *end = 0x0; + + if (end > buffer) { + start = end - 1; + + major = strtol(start, NULL, 10); + start = ++end; + + end++; + if (end) { + end = 0x00; + minor = strtol(start, NULL, 10); + } else { + minor = -1; + } + } + } + } + + fclose(fp); + return ((major * 256) + minor); + } else { + return -1; + } +} + +/** + * Check if the kernel is in a list of rejected ones + * + * @return Returns 1 if the kernel is rejected, 0 otherwise. + */ +static int kernel_is_rejected() +{ + // Get kernel version from system + char version_string[VERSION_STRING_LEN + 1]; + int version_string_len = 0; + + if (read_file("/proc/version_signature", version_string, VERSION_STRING_LEN)) { + if (read_file("/proc/version", version_string, VERSION_STRING_LEN)) { + struct utsname uname_buf; + if (!uname(&uname_buf)) { + info("Cannot check kernel version"); + return 0; + } + version_string_len = + snprintfz(version_string, VERSION_STRING_LEN, "%s %s", uname_buf.release, uname_buf.version); + } + } + + if (!version_string_len) + version_string_len = strlen(version_string); + + // Open a file with a list of rejected kernels + char *config_dir = getenv("NETDATA_USER_CONFIG_DIR"); + if (config_dir == NULL) { + config_dir = CONFIG_DIR; + } + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/ebpf.d/%s", config_dir, EBPF_KERNEL_REJECT_LIST_FILE); + FILE *kernel_reject_list = fopen(filename, "r"); + + if (!kernel_reject_list) { + // Keep this to have compatibility with old versions + snprintfz(filename, FILENAME_MAX, "%s/%s", config_dir, EBPF_KERNEL_REJECT_LIST_FILE); + kernel_reject_list = fopen(filename, "r"); + + if (!kernel_reject_list) { + config_dir = getenv("NETDATA_STOCK_CONFIG_DIR"); + if (config_dir == NULL) { + config_dir = LIBCONFIG_DIR; + } + + snprintfz(filename, FILENAME_MAX, "%s/ebpf.d/%s", config_dir, EBPF_KERNEL_REJECT_LIST_FILE); + kernel_reject_list = fopen(filename, "r"); + + if (!kernel_reject_list) + return 0; + } + } + + // Find if the kernel is in the reject list + char *reject_string = NULL; + size_t buf_len = 0; + ssize_t reject_string_len; + while ((reject_string_len = getline(&reject_string, &buf_len, kernel_reject_list) - 1) > 0) { + if (version_string_len >= reject_string_len) { + if (!strncmp(version_string, reject_string, reject_string_len)) { + info("A buggy kernel is detected"); + fclose(kernel_reject_list); + freez(reject_string); + return 1; + } + } + } + + fclose(kernel_reject_list); + free(reject_string); + + return 0; +} + +static int has_ebpf_kernel_version(int version) +{ + if (kernel_is_rejected()) + return 0; + + // Kernel 4.11.0 or RH > 7.5 + return (version >= NETDATA_MINIMUM_EBPF_KERNEL || get_redhat_release() >= NETDATA_MINIMUM_RH_VERSION); +} + +int has_condition_to_run(int version) +{ + if (!has_ebpf_kernel_version(version)) + return 0; + + return 1; +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Kernel Name + * + * Select kernel name used by eBPF programs + * + * Netdata delivers for users eBPF programs with specific suffixes that represent the kernels they were + * compiled, when we load the eBPF program, the suffix must be the nereast possible of the kernel running. + * + * @param selector select the kernel version. + * + * @return It returns the string to load kernel. + */ +static char *ebpf_select_kernel_name(uint32_t selector) +{ + static char *kernel_names[] = { NETDATA_IDX_STR_V3_10, NETDATA_IDX_STR_V4_14, NETDATA_IDX_STR_V4_16, + NETDATA_IDX_STR_V4_18, NETDATA_IDX_STR_V5_4, NETDATA_IDX_STR_V5_10, + NETDATA_IDX_STR_V5_11, NETDATA_IDX_STR_V5_14, NETDATA_IDX_STR_V5_15, + NETDATA_IDX_STR_V5_16 + }; + + return kernel_names[selector]; +} + +/** + * Select Max Index + * + * Select last index that will be tested on host. + * + * @param is_rhf is Red Hat fammily? + * @param kver the kernel version + * + * @return it returns the index to access kernel string. + */ +static int ebpf_select_max_index(int is_rhf, uint32_t kver) +{ + if (is_rhf > 0) { // Is Red Hat family + if (kver >= NETDATA_EBPF_KERNEL_5_14) + return NETDATA_IDX_V5_14; + else if (kver >= NETDATA_EBPF_KERNEL_5_4 && kver < NETDATA_EBPF_KERNEL_5_5) // For Oracle Linux + return NETDATA_IDX_V5_4; + else if (kver >= NETDATA_EBPF_KERNEL_4_11) + return NETDATA_IDX_V4_18; + } else { // Kernels from kernel.org + if (kver >= NETDATA_EBPF_KERNEL_5_16) + return NETDATA_IDX_V5_16; + else if (kver >= NETDATA_EBPF_KERNEL_5_15) + return NETDATA_IDX_V5_15; + else if (kver >= NETDATA_EBPF_KERNEL_5_11) + return NETDATA_IDX_V5_11; + else if (kver >= NETDATA_EBPF_KERNEL_5_10) + return NETDATA_IDX_V5_10; + else if (kver >= NETDATA_EBPF_KERNEL_4_17) + return NETDATA_IDX_V5_4; + else if (kver >= NETDATA_EBPF_KERNEL_4_15) + return NETDATA_IDX_V4_16; + else if (kver >= NETDATA_EBPF_KERNEL_4_11) + return NETDATA_IDX_V4_14; + } + + return NETDATA_IDX_V3_10; +} + +/** + * Select Index + * + * Select index to load data. + * + * @param kernels is the variable with kernel versions. + * @param is_rhf is Red Hat fammily? + * param kver the kernel version + */ +static uint32_t ebpf_select_index(uint32_t kernels, int is_rhf, uint32_t kver) +{ + uint32_t start = ebpf_select_max_index(is_rhf, kver); + uint32_t idx; + + if (is_rhf == -1) + kernels &= ~NETDATA_V5_14; + + for (idx = start; idx; idx--) { + if (kernels & 1 << idx) + break; + } + + return idx; +} + +/** + * Mount Name + * + * Mount name of eBPF program to be loaded. + * + * Netdata eBPF programs has the following format: + * + * Tnetdata_ebpf_N.V.o + * + * where: + * T - Is the eBPF type. When starts with 'p', this means we are only adding probes, + * and when they start with 'r' we are using retprobes. + * N - The eBPF program name. + * V - The kernel version in string format. + * + * @param out the vector where the name will be stored + * @param path + * @param len the size of the out vector. + * @param kver the kernel version + * @param name the eBPF program name. + * @param is_return is return or entry ? + */ +static void ebpf_mount_name(char *out, size_t len, char *path, uint32_t kver, const char *name, int is_return) +{ + char *version = ebpf_select_kernel_name(kver); + snprintfz(out, len, "%s/ebpf.d/%cnetdata_ebpf_%s.%s.o", + path, + (is_return) ? 'r' : 'p', + name, + version); +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Statistics from targets + * + * Count the information from targets. + * + * @param report the output structure + * @param targets vector with information about the eBPF plugin. + */ +static void ebpf_stats_targets(ebpf_plugin_stats_t *report, netdata_ebpf_targets_t *targets) +{ + if (!targets) { + report->probes = report->tracepoints = report->trampolines = 0; + return; + } + + int i = 0; + while (targets[i].name) { + switch (targets[i].mode) { + case EBPF_LOAD_PROBE: { + report->probes++; + break; + } + case EBPF_LOAD_RETPROBE: { + report->retprobes++; + break; + } + case EBPF_LOAD_TRACEPOINT: { + report->tracepoints++; + break; + } + case EBPF_LOAD_TRAMPOLINE: { + report->trampolines++; + break; + } + } + + i++; + } +} + +/** + * Update General stats + * + * Update eBPF plugin statistics that has relationship with the thread. + * + * This function must be called with mutex associated to charts is locked. + * + * @param report the output structure + * @param em the structure with information about how the module/thread is working. + */ +void ebpf_update_stats(ebpf_plugin_stats_t *report, ebpf_module_t *em) +{ + report->threads++; + + // It is not necessary to report more information. + if (!em->enabled) + return; + + report->running++; + + // In theory the `else if` is useless, because when this function is called, the module should not stay in + // EBPF_LOAD_PLAY_DICE. We have this additional condition to detect errors from developers. + if (em->load & EBPF_LOAD_LEGACY) + report->legacy++; + else if (em->load & EBPF_LOAD_CORE) + report->core++; + + ebpf_stats_targets(report, em->targets); +} + +//---------------------------------------------------------------------------------------------------------------------- + +void ebpf_update_pid_table(ebpf_local_maps_t *pid, ebpf_module_t *em) +{ + pid->user_input = em->pid_map_size; +} + +/** + * Update map size + * + * Update map size with information read from configuration files. + * + * @param map the structure with file descriptor to update. + * @param lmap the structure with information from configuration files. + * @param em the structure with information about how the module/thread is working. + * @param map_name the name of the file used to log. + */ +void ebpf_update_map_size(struct bpf_map *map, ebpf_local_maps_t *lmap, ebpf_module_t *em, const char *map_name __maybe_unused) +{ + uint32_t define_size = 0; + uint32_t apps_type = NETDATA_EBPF_MAP_PID | NETDATA_EBPF_MAP_RESIZABLE; + if (lmap->user_input && lmap->user_input != lmap->internal_input) { + define_size = lmap->internal_input; +#ifdef NETDATA_INTERNAL_CHECKS + info("Changing map %s from size %u to %u ", map_name, lmap->internal_input, lmap->user_input); +#endif + } else if (((lmap->type & apps_type) == apps_type) && (!em->apps_charts) && (!em->cgroup_charts)) { + lmap->user_input = ND_EBPF_DEFAULT_MIN_PID; + } else if (((em->apps_charts) || (em->cgroup_charts)) && (em->apps_level != NETDATA_APPS_NOT_SET)) { + switch (em->apps_level) { + case NETDATA_APPS_LEVEL_ALL: { + define_size = lmap->user_input; + break; + } + case NETDATA_APPS_LEVEL_PARENT: { + define_size = ND_EBPF_DEFAULT_PID_SIZE / 2; + break; + } + case NETDATA_APPS_LEVEL_REAL_PARENT: + default: { + define_size = ND_EBPF_DEFAULT_PID_SIZE / 3; + } + } + } + + if (!define_size) + return; + +#ifdef LIBBPF_MAJOR_VERSION + bpf_map__set_max_entries(map, define_size); +#else + bpf_map__resize(map, define_size); +#endif +} + +/** + * Update Legacy map sizes + * + * Update map size for eBPF legacy code. + * + * @param program the structure with values read from binary. + * @param em the structure with information about how the module/thread is working. + */ +static void ebpf_update_legacy_map_sizes(struct bpf_object *program, ebpf_module_t *em) +{ + struct bpf_map *map; + ebpf_local_maps_t *maps = em->maps; + if (!maps) + return; + + bpf_map__for_each(map, program) + { + const char *map_name = bpf_map__name(map); + int i = 0; ; + while (maps[i].name) { + ebpf_local_maps_t *w = &maps[i]; + if (w->type & NETDATA_EBPF_MAP_RESIZABLE) { + if (!strcmp(w->name, map_name)) { + ebpf_update_map_size(map, w, em, map_name); + } + } + + i++; + } + } +} + +size_t ebpf_count_programs(struct bpf_object *obj) +{ + size_t tot = 0; + struct bpf_program *prog; + bpf_object__for_each_program(prog, obj) + { + tot++; + } + + return tot; +} + +static ebpf_specify_name_t *ebpf_find_names(ebpf_specify_name_t *names, const char *prog_name) +{ + size_t i = 0; + while (names[i].program_name) { + if (!strcmp(prog_name, names[i].program_name)) + return &names[i]; + + i++; + } + + return NULL; +} + +static struct bpf_link **ebpf_attach_programs(struct bpf_object *obj, size_t length, ebpf_specify_name_t *names) +{ + struct bpf_link **links = callocz(length , sizeof(struct bpf_link *)); + size_t i = 0; + struct bpf_program *prog; + ebpf_specify_name_t *w; + bpf_object__for_each_program(prog, obj) + { + if (names) { + const char *name = bpf_program__name(prog); + w = ebpf_find_names(names, name); + } else + w = NULL; + + if (w) { + enum bpf_prog_type type = bpf_program__get_type(prog); + if (type == BPF_PROG_TYPE_KPROBE) + links[i] = bpf_program__attach_kprobe(prog, w->retprobe, w->optional); + } else + links[i] = bpf_program__attach(prog); + + if (libbpf_get_error(links[i])) { + links[i] = NULL; + } + + i++; + } + + return links; +} + +static void ebpf_update_maps(ebpf_module_t *em, struct bpf_object *obj) +{ + if (!em->maps) + return; + + ebpf_local_maps_t *maps = em->maps; + struct bpf_map *map; + bpf_map__for_each(map, obj) + { + int fd = bpf_map__fd(map); + if (maps) { + const char *map_name = bpf_map__name(map); + int j = 0; ; + while (maps[j].name) { + ebpf_local_maps_t *w = &maps[j]; + if (w->map_fd == ND_EBPF_MAP_FD_NOT_INITIALIZED && !strcmp(map_name, w->name)) + w->map_fd = fd; + + j++; + } + } + } +} + +/** + * Update Controller + * + * Update controller value with user input. + * + * @param fd the table file descriptor + * @param em structure with information about eBPF program we will load. + */ +void ebpf_update_controller(int fd, ebpf_module_t *em) +{ + uint32_t values[NETDATA_CONTROLLER_END] = { + (em->apps_charts & NETDATA_EBPF_APPS_FLAG_YES) | em->cgroup_charts, + em->apps_level + }; + uint32_t key; + uint32_t end = (em->apps_level != NETDATA_APPS_NOT_SET) ? NETDATA_CONTROLLER_END : NETDATA_CONTROLLER_APPS_LEVEL; + + for (key = NETDATA_CONTROLLER_APPS_ENABLED; key < end; key++) { + int ret = bpf_map_update_elem(fd, &key, &values[key], 0); + if (ret) + error("Add key(%u) for controller table failed.", key); + } +} + +/** + * Update Legacy controller + * + * Update legacy controller table when eBPF program has it. + * + * @param em structure with information about eBPF program we will load. + * @param obj bpf object with tables. + */ +static void ebpf_update_legacy_controller(ebpf_module_t *em, struct bpf_object *obj) +{ + ebpf_local_maps_t *maps = em->maps; + if (!maps) + return; + + struct bpf_map *map; + bpf_map__for_each(map, obj) + { + size_t i = 0; + while (maps[i].name) { + ebpf_local_maps_t *w = &maps[i]; + if (w->map_fd != ND_EBPF_MAP_FD_NOT_INITIALIZED && (w->type & NETDATA_EBPF_MAP_CONTROLLER)) { + w->type &= ~NETDATA_EBPF_MAP_CONTROLLER; + w->type |= NETDATA_EBPF_MAP_CONTROLLER_UPDATED; + + ebpf_update_controller(w->map_fd, em); + } + i++; + } + } +} + +/** + * Load Program + * + * Load eBPF program into kernel + * + * @param plugins_dir directory where binary are stored + * @param em structure with information about eBPF program we will load. + * @param kver the kernel version according /usr/include/linux/version.h + * @param is_rhf is a kernel from Red Hat Family? + * @param obj structure where we will store object loaded. + * + * @return it returns a link for each target we associated an eBPF program. + */ +struct bpf_link **ebpf_load_program(char *plugins_dir, ebpf_module_t *em, int kver, int is_rhf, + struct bpf_object **obj) +{ + char lpath[4096]; + + uint32_t idx = ebpf_select_index(em->kernels, is_rhf, kver); + + ebpf_mount_name(lpath, 4095, plugins_dir, idx, em->thread_name, em->mode); + + // When this function is called ebpf.plugin is using legacy code, so we should reset the variable + em->load &= ~ NETDATA_EBPF_LOAD_METHODS; + em->load |= EBPF_LOAD_LEGACY; + + *obj = bpf_object__open_file(lpath, NULL); + if (libbpf_get_error(obj)) { + error("Cannot open BPF object %s", lpath); + bpf_object__close(*obj); + return NULL; + } + + ebpf_update_legacy_map_sizes(*obj, em); + + if (bpf_object__load(*obj)) { + error("ERROR: loading BPF object file failed %s\n", lpath); + bpf_object__close(*obj); + return NULL; + } + + ebpf_update_maps(em, *obj); + ebpf_update_legacy_controller(em, *obj); + + size_t count_programs = ebpf_count_programs(*obj); + +#ifdef NETDATA_INTERNAL_CHECKS + info("eBPF program %s loaded with success!", lpath); +#endif + + return ebpf_attach_programs(*obj, count_programs, em->names); +} + +char *ebpf_find_symbol(char *search) +{ + char filename[FILENAME_MAX + 1]; + char *ret = NULL; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, NETDATA_KALLSYMS); + procfile *ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + error("Cannot open %s%s", netdata_configured_host_prefix, NETDATA_KALLSYMS); + return ret; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return ret; + + unsigned long i, lines = procfile_lines(ff); + size_t length = strlen(search); + for(i = 0; i < lines ; i++) { + char *cmp = procfile_lineword(ff, i,2);; + if (!strncmp(search, cmp, length)) { + ret = strdupz(cmp); + break; + } + } + + procfile_close(ff); + + return ret; +} + +void ebpf_update_names(ebpf_specify_name_t *opt, ebpf_module_t *em) +{ + int mode = em->mode; + em->names = opt; + + size_t i = 0; + while (opt[i].program_name) { + opt[i].retprobe = (mode == MODE_RETURN); + opt[i].optional = ebpf_find_symbol(opt[i].function_to_attach); + + i++; + } +} + +//---------------------------------------------------------------------------------------------------------------------- + +void ebpf_mount_config_name(char *filename, size_t length, char *path, const char *config) +{ + snprintf(filename, length, "%s/ebpf.d/%s", path, config); +} + +int ebpf_load_config(struct config *config, char *filename) +{ + return appconfig_load(config, filename, 0, NULL); +} + + +static netdata_run_mode_t ebpf_select_mode(char *mode) +{ + if (!strcasecmp(mode,EBPF_CFG_LOAD_MODE_RETURN )) + return MODE_RETURN; + else if (!strcasecmp(mode, "dev")) + return MODE_DEVMODE; + + return MODE_ENTRY; +} + +static void ebpf_select_mode_string(char *output, size_t len, netdata_run_mode_t sel) +{ + if (sel == MODE_RETURN) + strncpyz(output, EBPF_CFG_LOAD_MODE_RETURN, len); + else + strncpyz(output, EBPF_CFG_LOAD_MODE_DEFAULT, len); +} + +/** + * Convert string to load mode + * + * Convert the string given as argument to value present in enum. + * + * @param str value read from configuraion file. + * + * @return It returns the value to be used. + */ +netdata_ebpf_load_mode_t epbf_convert_string_to_load_mode(char *str) +{ + if (!strcasecmp(str, EBPF_CFG_CORE_PROGRAM)) + return EBPF_LOAD_CORE; + else if (!strcasecmp(str, EBPF_CFG_LEGACY_PROGRAM)) + return EBPF_LOAD_LEGACY; + + return EBPF_LOAD_PLAY_DICE; +} + +/** + * Convert load mode to string + * + * @param mode value that will select the string + * + * @return It returns the string associated to mode. + */ +static char *ebpf_convert_load_mode_to_string(netdata_ebpf_load_mode_t mode) +{ + if (mode & EBPF_LOAD_CORE) + return EBPF_CFG_CORE_PROGRAM; + else if (mode & EBPF_LOAD_LEGACY) + return EBPF_CFG_LEGACY_PROGRAM; + + return EBPF_CFG_DEFAULT_PROGRAM; +} + +/** + * Convert collect pid to string + * + * @param level value that will select the string + * + * @return It returns the string associated to level. + */ +static char *ebpf_convert_collect_pid_to_string(netdata_apps_level_t level) +{ + if (level == NETDATA_APPS_LEVEL_REAL_PARENT) + return EBPF_CFG_PID_REAL_PARENT; + else if (level == NETDATA_APPS_LEVEL_PARENT) + return EBPF_CFG_PID_PARENT; + else if (level == NETDATA_APPS_LEVEL_ALL) + return EBPF_CFG_PID_ALL; + + return EBPF_CFG_PID_INTERNAL_USAGE; +} + +/** + * Convert string to apps level + * + * @param str the argument read from config files + * + * @return it returns the level associated to the string or default when it is a wrong value + */ +netdata_apps_level_t ebpf_convert_string_to_apps_level(char *str) +{ + if (!strcasecmp(str, EBPF_CFG_PID_REAL_PARENT)) + return NETDATA_APPS_LEVEL_REAL_PARENT; + else if (!strcasecmp(str, EBPF_CFG_PID_PARENT)) + return NETDATA_APPS_LEVEL_PARENT; + else if (!strcasecmp(str, EBPF_CFG_PID_ALL)) + return NETDATA_APPS_LEVEL_ALL; + + return NETDATA_APPS_NOT_SET; +} + +/** + * CO-RE type + * + * Select the preferential type of CO-RE + * + * @param str value read from configuration file. + * @param lmode load mode used by collector. + */ +netdata_ebpf_program_loaded_t ebpf_convert_core_type(char *str, netdata_run_mode_t lmode) +{ + if (!strcasecmp(str, EBPF_CFG_ATTACH_TRACEPOINT)) + return EBPF_LOAD_TRACEPOINT; + else if (!strcasecmp(str, EBPF_CFG_ATTACH_PROBE)) { + return (lmode == MODE_ENTRY) ? EBPF_LOAD_PROBE : EBPF_LOAD_RETPROBE; + } + + return EBPF_LOAD_TRAMPOLINE; +} + +#ifdef LIBBPF_MAJOR_VERSION +/** + * Adjust Thread Load + * + * Adjust thread configuraton according specified load. + * + * @param mod the main structure that will be adjusted. + * @param file the btf file used with thread. + */ +void ebpf_adjust_thread_load(ebpf_module_t *mod, struct btf *file) +{ + if (!file) { + mod->load &= ~EBPF_LOAD_CORE; + mod->load |= EBPF_LOAD_LEGACY; + } else if (mod->load == EBPF_LOAD_PLAY_DICE && file) { + mod->load &= ~EBPF_LOAD_LEGACY; + mod->load |= EBPF_LOAD_CORE; + } +} + +/** + * Parse BTF file + * + * Parse a specific BTF file present on filesystem + * + * @param filename the file that will be parsed. + * + * @return It returns a pointer for the file on success and NULL otherwise. + */ +struct btf *ebpf_parse_btf_file(const char *filename) +{ + struct btf *bf = btf__parse(filename, NULL); + if (libbpf_get_error(bf)) { + fprintf(stderr, "Cannot parse btf file"); + btf__free(bf); + return NULL; + } + + return bf; +} + +/** + * Load default btf file + * + * Load the default BTF file on environment. + * + * @param path is the fullpath + * @param filename is the file inside BTF path. + */ +struct btf *ebpf_load_btf_file(char *path, char *filename) +{ + char fullpath[PATH_MAX + 1]; + snprintfz(fullpath, PATH_MAX, "%s/%s", path, filename); + struct btf *ret = ebpf_parse_btf_file(fullpath); + if (!ret) + info("Your environment does not have BTF file %s/%s. The plugin will work with 'legacy' code.", + path, filename); + + return ret; +} + +/** + * Find BTF attach type + * + * Search type fr current btf file. + * + * @param file is the structure for the btf file already parsed. + */ +static inline const struct btf_type *ebpf_find_btf_attach_type(struct btf *file) +{ + int id = btf__find_by_name_kind(file, "bpf_attach_type", BTF_KIND_ENUM); + if (id < 0) { + fprintf(stderr, "Cannot find 'bpf_attach_type'"); + + return NULL; + } + + return btf__type_by_id(file, id); +} + +/** + * Is function inside BTF + * + * Look for a specific function inside the given BTF file. + * + * @param file is the structure for the btf file already parsed. + * @param function is the function that we want to find. + */ +int ebpf_is_function_inside_btf(struct btf *file, char *function) +{ + const struct btf_type *type = ebpf_find_btf_attach_type(file); + if (!type) + return -1; + + const struct btf_enum *e = btf_enum(type); + int i, id; + for (id = -1, i = 0; i < btf_vlen(type); i++, e++) { + if (!strcmp(btf__name_by_offset(file, e->name_off), "BPF_TRACE_FENTRY")) { + id = btf__find_by_name_kind(file, function, BTF_KIND_FUNC); + break; + } + } + + return (id > 0) ? 1 : 0; +} +#endif + +/** + * Update target with configuration + * + * Update target load mode with value. + * + * @param em the module structure + * @param value value used to update. + */ +static void ebpf_update_target_with_conf(ebpf_module_t *em, netdata_ebpf_program_loaded_t value) +{ + netdata_ebpf_targets_t *targets = em->targets; + if (!targets) { + return; + } + + int i = 0; + while (targets[i].name) { + targets[i].mode = value; + i++; + } +} + +/** + * Select Load Mode + * + * Select the load mode according the given inputs. + * + * @param btf_file a pointer to the loaded btf file. + * @parma load current value. + * @param btf_file a pointer to the loaded btf file. + * @param is_rhf is Red Hat family? + * + * @return it returns the new load mode. + */ +static netdata_ebpf_load_mode_t ebpf_select_load_mode(struct btf *btf_file, netdata_ebpf_load_mode_t load, + int kver, int is_rh) +{ +#ifdef LIBBPF_MAJOR_VERSION + if ((load & EBPF_LOAD_CORE) || (load & EBPF_LOAD_PLAY_DICE)) { + // Quick fix for Oracle linux 8.x + load = (!btf_file || (is_rh && (kver >= NETDATA_EBPF_KERNEL_5_4 && kver < NETDATA_EBPF_KERNEL_5_5))) ? + EBPF_LOAD_LEGACY : EBPF_LOAD_CORE; + } +#else + load = EBPF_LOAD_LEGACY; +#endif + + return load; +} + +/** + * Update Module using config + * + * Update configuration for a specific thread. + * + * @param modules structure that will be updated + * @oaram origin specify the configuration file loaded + * @param btf_file a pointer to the loaded btf file. + * @param is_rhf is Red Hat family? + */ +void ebpf_update_module_using_config(ebpf_module_t *modules, netdata_ebpf_load_mode_t origin, struct btf *btf_file, + int kver, int is_rh) +{ + char default_value[EBPF_MAX_MODE_LENGTH + 1]; + ebpf_select_mode_string(default_value, EBPF_MAX_MODE_LENGTH, modules->mode); + char *value = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_LOAD_MODE, default_value); + modules->mode = ebpf_select_mode(value); + + modules->update_every = (int)appconfig_get_number(modules->cfg, EBPF_GLOBAL_SECTION, + EBPF_CFG_UPDATE_EVERY, modules->update_every); + + modules->apps_charts = appconfig_get_boolean(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_APPLICATION, + (int) (modules->apps_charts & NETDATA_EBPF_APPS_FLAG_YES)); + + modules->cgroup_charts = appconfig_get_boolean(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_CGROUP, + modules->cgroup_charts); + + modules->pid_map_size = (uint32_t)appconfig_get_number(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_PID_SIZE, + modules->pid_map_size); + + value = ebpf_convert_load_mode_to_string(modules->load & NETDATA_EBPF_LOAD_METHODS); + value = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_TYPE_FORMAT, value); + netdata_ebpf_load_mode_t load = epbf_convert_string_to_load_mode(value); + load = ebpf_select_load_mode(btf_file, load, kver, is_rh); + modules->load = origin | load; + + value = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_CORE_ATTACH, EBPF_CFG_ATTACH_TRAMPOLINE); + netdata_ebpf_program_loaded_t fill_lm = ebpf_convert_core_type(value, modules->mode); + ebpf_update_target_with_conf(modules, fill_lm); + + value = ebpf_convert_collect_pid_to_string(modules->apps_level); + value = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_COLLECT_PID, value); + modules->apps_level = ebpf_convert_string_to_apps_level(value); +} + +/** + * Update module + * + * When this function is called, it will load the configuration file and after this + * it updates the global information of ebpf_module. + * If the module has specific configuration, this function will load it, but it will not + * update the variables. + * + * @param em the module structure + * @param btf_file a pointer to the loaded btf file. + * @param is_rhf is Red Hat family? + * @param kver the kernel version + */ +void ebpf_update_module(ebpf_module_t *em, struct btf *btf_file, int kver, int is_rh) +{ + char filename[FILENAME_MAX+1]; + netdata_ebpf_load_mode_t origin; + + ebpf_mount_config_name(filename, FILENAME_MAX, ebpf_user_config_dir, em->config_file); + if (!ebpf_load_config(em->cfg, filename)) { + ebpf_mount_config_name(filename, FILENAME_MAX, ebpf_stock_config_dir, em->config_file); + if (!ebpf_load_config(em->cfg, filename)) { + error("Cannot load the ebpf configuration file %s", em->config_file); + return; + } + // If user defined data globaly, we will have here EBPF_LOADED_FROM_USER, we need to consider this, to avoid + // forcing users to configure thread by thread. + origin = (!(em->load & NETDATA_EBPF_LOAD_SOURCE)) ? EBPF_LOADED_FROM_STOCK : em->load & NETDATA_EBPF_LOAD_SOURCE; + } else + origin = EBPF_LOADED_FROM_USER; + + ebpf_update_module_using_config(em, origin, btf_file, kver, is_rh); +} + +/** + * Adjust Apps Cgroup + * + * Apps and cgroup has internal cleanup that needs attaching tracers to release_task, to avoid overload the function + * we will enable this integration by default, if and only if, we are running with trampolines. + * + * @param em a poiter to the main thread structure. + * @param mode is the mode used with different + */ +void ebpf_adjust_apps_cgroup(ebpf_module_t *em, netdata_ebpf_program_loaded_t mode) +{ + if ((em->load & EBPF_LOADED_FROM_STOCK) && + (em->apps_charts || em->cgroup_charts) && + mode != EBPF_LOAD_TRAMPOLINE) { + em->apps_charts = NETDATA_EBPF_APPS_FLAG_NO; + em->cgroup_charts = 0; + } +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Load Address + * + * Helper used to get address from /proc/kallsym + * + * @param fa address structure + * @param fd file descriptor loaded inside kernel. + */ +void ebpf_load_addresses(ebpf_addresses_t *fa, int fd) +{ + if (fa->addr) + return ; + + procfile *ff = procfile_open("/proc/kallsyms", " \t:", PROCFILE_FLAG_DEFAULT); + if (!ff) + return; + + ff = procfile_readall(ff); + if (!ff) + return; + + fa->hash = simple_hash(fa->function); + + size_t lines = procfile_lines(ff), l; + for(l = 0; l < lines ;l++) { + char *fcnt = procfile_lineword(ff, l, 2); + uint32_t hash = simple_hash(fcnt); + if (fa->hash == hash && !strcmp(fcnt, fa->function)) { + char addr[128]; + snprintf(addr, 127, "0x%s", procfile_lineword(ff, l, 0)); + fa->addr = (unsigned long) strtoul(addr, NULL, 16); + uint32_t key = 0; + bpf_map_update_elem(fd, &key, &fa->addr, BPF_ANY); + } + } + + procfile_close(ff); +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Fill Algorithms + * + * Set one unique dimension for all vector position. + * + * @param algorithms the output vector + * @param length number of elements of algorithms vector + * @param algorithm algorithm used on charts. +*/ +void ebpf_fill_algorithms(int *algorithms, size_t length, int algorithm) +{ + size_t i; + for (i = 0; i < length; i++) { + algorithms[i] = algorithm; + } +} + +/** + * Fill Histogram dimension + * + * Fill the histogram dimension with the specified ranges + */ +char **ebpf_fill_histogram_dimension(size_t maximum) +{ + char *dimensions[] = { "us", "ms", "s"}; + int previous_dim = 0, current_dim = 0; + uint32_t previous_level = 1000, current_level = 1000; + uint32_t previous_divisor = 1, current_divisor = 1; + uint32_t current = 1, previous = 0; + uint32_t selector; + char **out = callocz(maximum, sizeof(char *)); + char range[128]; + size_t end = maximum - 1; + for (selector = 0; selector < end; selector++) { + snprintf(range, 127, "%u%s->%u%s", previous/previous_divisor, dimensions[previous_dim], + current/current_divisor, dimensions[current_dim]); + out[selector] = strdupz(range); + previous = current; + current <<= 1; + + if (previous_dim != 2 && previous > previous_level) { + previous_dim++; + + previous_divisor *= 1000; + previous_level *= 1000; + } + + if (current_dim != 2 && current > current_level) { + current_dim++; + + current_divisor *= 1000; + current_level *= 1000; + } + } + snprintf(range, 127, "%u%s->+Inf", previous/previous_divisor, dimensions[previous_dim]); + out[selector] = strdupz(range); + + return out; +} + +/** + * Histogram dimension cleanup + * + * Cleanup dimensions allocated with function ebpf_fill_histogram_dimension + * + * @param ptr + * @param length + */ +void ebpf_histogram_dimension_cleanup(char **ptr, size_t length) +{ + size_t i; + for (i = 0; i < length; i++) { + freez(ptr[i]); + } + freez(ptr); +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Open tracepoint path + * + * @param filename pointer to store the path + * @param length file length + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * @param flags flags used with syscall open + * + * @return it returns a positive value on success and a negative otherwise. + */ +static inline int ebpf_open_tracepoint_path(char *filename, size_t length, char *subsys, char *eventname, int flags) +{ + snprintfz(filename, length, "%s/events/%s/%s/enable", NETDATA_DEBUGFS, subsys, eventname); + return open(filename, flags, 0); +} + +/** + * Is tracepoint enabled + * + * Check whether the tracepoint is enabled. + * + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * + * @return it returns 1 when it is enabled, 0 when it is disabled and -1 on error. + */ +int ebpf_is_tracepoint_enabled(char *subsys, char *eventname) +{ + char text[FILENAME_MAX + 1]; + int fd = ebpf_open_tracepoint_path(text, FILENAME_MAX, subsys, eventname, O_RDONLY); + if (fd < 0) { + return -1; + } + + ssize_t length = read(fd, text, 1); + if (length != 1) { + close(fd); + return -1; + } + close(fd); + + return (text[0] == '1') ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO; +} + +/** + * Change Tracing values + * + * Change value for specific tracepoint enabling or disabling it according value given. + * + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * @param value a value to enable (1) or disable (0) a tracepoint. + * + * @return It returns 0 on success and -1 otherwise + */ +static int ebpf_change_tracing_values(char *subsys, char *eventname, char *value) +{ + if (strcmp("0", value) && strcmp("1", value)) { + error("Invalid value given to either enable or disable a tracepoint."); + return -1; + } + + char filename[1024]; + int fd = ebpf_open_tracepoint_path(filename, 1023, subsys, eventname, O_WRONLY); + if (fd < 0) { + return -1; + } + + ssize_t written = write(fd, value, strlen(value)); + if (written < 0) { + close(fd); + return -1; + } + + close(fd); + return 0; +} + +/** + * Enable tracing values + * + * Enable a tracepoint on a system + * + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * + * @return It returns 0 on success and -1 otherwise + */ +int ebpf_enable_tracing_values(char *subsys, char *eventname) +{ + return ebpf_change_tracing_values(subsys, eventname, "1"); +} + +/** + * Disable tracing values + * + * Disable tracing points enabled by collector + * + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * + * @return It returns 0 on success and -1 otherwise + */ +int ebpf_disable_tracing_values(char *subsys, char *eventname) +{ + return ebpf_change_tracing_values(subsys, eventname, "0"); +} + +/** + * Select PC prefix + * + * Identify the prefix to run on PC architecture. + * + * @return It returns 32 or 64 according to host arch. + */ +static uint32_t ebpf_select_pc_prefix() +{ + long counter = 1; + uint32_t i; + for (i = 0; i < 128; i++) { + counter <<= 1; + if (counter < 0) + break; + } + + return counter; +} + +/** + * Select Host Prefix + * + * Select prefix to syscall when host is running a kernel newer than 4.17.0 + * + * @param output the vector to store data. + * @param length length of output vector. + * @param syscall the syscall that prefix will be attached; + * @param kver the current kernel version in format MAJOR*65536 + MINOR*256 + PATCH + */ +void ebpf_select_host_prefix(char *output, size_t length, char *syscall, int kver) +{ + if (kver < NETDATA_EBPF_KERNEL_4_17) + snprintfz(output, length, "sys_%s", syscall); + else { + uint32_t arch = ebpf_select_pc_prefix(); + // Prefix selected according https://www.kernel.org/doc/html/latest/process/adding-syscalls.html + char *prefix = (arch == 32) ? "__ia32" : "__x64"; + snprintfz(output, length, "%s_sys_%s", prefix, syscall); + } +} + diff --git a/libnetdata/ebpf/ebpf.h b/libnetdata/ebpf/ebpf.h new file mode 100644 index 0000000..5cff513 --- /dev/null +++ b/libnetdata/ebpf/ebpf.h @@ -0,0 +1,371 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_H +#define NETDATA_EBPF_H 1 + +#include +#include +#ifdef LIBBPF_DEPRECATED +#include +#include +#endif +#include // Necessary for stdtoul + +#define NETDATA_DEBUGFS "/sys/kernel/debug/tracing/" +#define NETDATA_KALLSYMS "/proc/kallsyms" + +// Config files +#define EBPF_GLOBAL_SECTION "global" +#define EBPF_CFG_LOAD_MODE "ebpf load mode" +#define EBPF_CFG_LOAD_MODE_DEFAULT "entry" +#define EBPF_CFG_LOAD_MODE_RETURN "return" +#define EBPF_MAX_MODE_LENGTH 6 + +#define EBPF_CFG_TYPE_FORMAT "ebpf type format" +#define EBPF_CFG_DEFAULT_PROGRAM "auto" +#define EBPF_CFG_CORE_PROGRAM "CO-RE" +#define EBPF_CFG_LEGACY_PROGRAM "legacy" + +#define EBPF_CFG_COLLECT_PID "collect pid" +#define EBPF_CFG_PID_REAL_PARENT "real parent" +#define EBPF_CFG_PID_PARENT "parent" +#define EBPF_CFG_PID_ALL "all" +#define EBPF_CFG_PID_INTERNAL_USAGE "not used" + +#define EBPF_CFG_CORE_ATTACH "ebpf co-re tracing" +#define EBPF_CFG_ATTACH_TRAMPOLINE "trampoline" +#define EBPF_CFG_ATTACH_TRACEPOINT "tracepoint" +#define EBPF_CFG_ATTACH_PROBE "probe" + +#define EBPF_CFG_PROGRAM_PATH "btf path" + +#define EBPF_CFG_UPDATE_EVERY "update every" +#define EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT 10 +#define EBPF_CFG_PID_SIZE "pid table size" +#define EBPF_CFG_APPLICATION "apps" +#define EBPF_CFG_CGROUP "cgroups" + +#define EBPF_COMMON_FNCT_CLEAN_UP "release_task" + +/** + * The RedHat magic number was got doing: + * + * 1797 = 7*256 + 5 + * + * For more details, please, read /usr/include/linux/version.h + * in any Red Hat installation. + */ +#define NETDATA_MINIMUM_RH_VERSION 1797 + +/** + * 2048 = 8*256 + 0 + */ +#define NETDATA_RH_8 2048 + +/** + * Kernel Version + * + * Kernel versions are calculated using the following formula: + * + * VERSION = LINUX_VERSION_MAJOR*65536 + LINUX_VERSION_PATCHLEVEL*256 + LINUX_VERSION_SUBLEVEL + * + * Where LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, and LINUX_VERSION_SUBLEVEL are extracted + * from /usr/include/linux/version.h. + * + * LINUX_VERSION_SUBLEVEL has the maximum value 255, but linux can have more SUBLEVELS. + * + */ +enum netdata_ebpf_kernel_versions { + NETDATA_EBPF_KERNEL_4_11 = 264960, // 264960 = 4 * 65536 + 15 * 256 + NETDATA_EBPF_KERNEL_4_14 = 265728, // 264960 = 4 * 65536 + 14 * 256 + NETDATA_EBPF_KERNEL_4_15 = 265984, // 265984 = 4 * 65536 + 15 * 256 + NETDATA_EBPF_KERNEL_4_17 = 266496, // 266496 = 4 * 65536 + 17 * 256 + NETDATA_EBPF_KERNEL_5_0 = 327680, // 327680 = 5 * 65536 + 0 * 256 + NETDATA_EBPF_KERNEL_5_4 = 328704, // 327680 = 5 * 65536 + 4 * 256 + NETDATA_EBPF_KERNEL_5_5 = 328960, // 327680 = 5 * 65536 + 5 * 256 + NETDATA_EBPF_KERNEL_5_10 = 330240, // 330240 = 5 * 65536 + 10 * 256 + NETDATA_EBPF_KERNEL_5_11 = 330496, // 330240 = 5 * 65536 + 11 * 256 + NETDATA_EBPF_KERNEL_5_14 = 331264, // 331264 = 5 * 65536 + 14 * 256 + NETDATA_EBPF_KERNEL_5_15 = 331520, // 331520 = 5 * 65536 + 15 * 256 + NETDATA_EBPF_KERNEL_5_16 = 331776 // 331776 = 5 * 65536 + 16 * 256 +}; + +enum netdata_kernel_flag { + NETDATA_V3_10 = 1 << 0, + NETDATA_V4_14 = 1 << 1, + NETDATA_V4_16 = 1 << 2, + NETDATA_V4_18 = 1 << 3, + NETDATA_V5_4 = 1 << 4, + NETDATA_V5_10 = 1 << 5, + NETDATA_V5_11 = 1 << 6, + NETDATA_V5_14 = 1 << 7, + NETDATA_V5_15 = 1 << 8, + NETDATA_V5_16 = 1 << 9 +}; + +enum netdata_kernel_idx { + NETDATA_IDX_V3_10, + NETDATA_IDX_V4_14, + NETDATA_IDX_V4_16, + NETDATA_IDX_V4_18, + NETDATA_IDX_V5_4 , + NETDATA_IDX_V5_10, + NETDATA_IDX_V5_11, + NETDATA_IDX_V5_14, + NETDATA_IDX_V5_15, + NETDATA_IDX_V5_16 +}; + +#define NETDATA_IDX_STR_V3_10 "3.10" +#define NETDATA_IDX_STR_V4_14 "4.14" +#define NETDATA_IDX_STR_V4_16 "4.16" +#define NETDATA_IDX_STR_V4_18 "4.18" +#define NETDATA_IDX_STR_V5_4 "5.4" +#define NETDATA_IDX_STR_V5_10 "5.10" +#define NETDATA_IDX_STR_V5_11 "5.11" +#define NETDATA_IDX_STR_V5_14 "5.14" +#define NETDATA_IDX_STR_V5_15 "5.15" +#define NETDATA_IDX_STR_V5_16 "5.16" + +/** + * Minimum value has relationship with libbpf support. + */ +#define NETDATA_MINIMUM_EBPF_KERNEL NETDATA_EBPF_KERNEL_4_11 + +#define VERSION_STRING_LEN 256 +#define EBPF_KERNEL_REJECT_LIST_FILE "ebpf_kernel_reject_list.txt" + +#define ND_EBPF_DEFAULT_MIN_PID 1U +#define ND_EBPF_MAP_FD_NOT_INITIALIZED (int)-1 + +typedef struct ebpf_addresses { + char *function; + uint32_t hash; + // We use long as address, because it matches system length + unsigned long addr; +} ebpf_addresses_t; + +extern char *ebpf_user_config_dir; +extern char *ebpf_stock_config_dir; + +typedef struct ebpf_data { + int *map_fd; + + char *kernel_string; + uint32_t running_on_kernel; + int isrh; +} ebpf_data_t; + +typedef enum { + MODE_RETURN = 0, // This attaches kprobe when the function returns + MODE_DEVMODE, // This stores log given description about the errors raised + MODE_ENTRY // This attaches kprobe when the function is called +} netdata_run_mode_t; + +#define ND_EBPF_DEFAULT_PID_SIZE 32768U + +enum netdata_ebpf_map_type { + NETDATA_EBPF_MAP_STATIC = 0, + NETDATA_EBPF_MAP_RESIZABLE = 1, + NETDATA_EBPF_MAP_CONTROLLER = 2, + NETDATA_EBPF_MAP_CONTROLLER_UPDATED = 4, + NETDATA_EBPF_MAP_PID = 8 +}; + +enum netdata_controller { + NETDATA_CONTROLLER_APPS_ENABLED, + NETDATA_CONTROLLER_APPS_LEVEL, + + NETDATA_CONTROLLER_END +}; + +// Control how Netdata will monitor PIDs (apps and cgroups) +typedef enum netdata_apps_level { + NETDATA_APPS_LEVEL_REAL_PARENT, + NETDATA_APPS_LEVEL_PARENT, + NETDATA_APPS_LEVEL_ALL, + + // Present only in user ring + NETDATA_APPS_NOT_SET +} netdata_apps_level_t; + +typedef struct ebpf_local_maps { + char *name; + uint32_t internal_input; + uint32_t user_input; + uint32_t type; + int map_fd; +} ebpf_local_maps_t; + +typedef struct ebpf_specify_name { + char *program_name; + char *function_to_attach; + char *optional; + bool retprobe; +} ebpf_specify_name_t; + +typedef enum netdata_ebpf_load_mode { + EBPF_LOAD_LEGACY = 1<<0, // Select legacy mode, this means we will load binaries + EBPF_LOAD_CORE = 1<<1, // When CO-RE is used, it is necessary to use the souce code + EBPF_LOAD_PLAY_DICE = 1<<2, // Take a look on environment and choose the best option + EBPF_LOADED_FROM_STOCK = 1<<3, // Configuration loaded from Stock file + EBPF_LOADED_FROM_USER = 1<<4 // Configuration loaded from user +} netdata_ebpf_load_mode_t; +#define NETDATA_EBPF_LOAD_METHODS (EBPF_LOAD_LEGACY|EBPF_LOAD_CORE|EBPF_LOAD_PLAY_DICE) +#define NETDATA_EBPF_LOAD_SOURCE (EBPF_LOADED_FROM_STOCK|EBPF_LOADED_FROM_USER) + +typedef enum netdata_ebpf_program_loaded { + EBPF_LOAD_PROBE, // Attach probes on targets + EBPF_LOAD_RETPROBE, // Attach retprobes on targets + EBPF_LOAD_TRACEPOINT, // This stores log given description about the errors raised + EBPF_LOAD_TRAMPOLINE, // This attaches kprobe when the function is called +} netdata_ebpf_program_loaded_t; + +typedef struct netdata_ebpf_targets { + char *name; + netdata_ebpf_program_loaded_t mode; +} netdata_ebpf_targets_t; + +typedef struct ebpf_plugin_stats { + // Load options + uint32_t legacy; // Legacy codes + uint32_t core; // CO-RE codes, this means we are using source code compiled. + + uint32_t threads; // Total number of threads + uint32_t running; // total number of threads running + + uint32_t probes; // Number of kprobes loaded + uint32_t retprobes; // Number of kretprobes loaded + uint32_t tracepoints; // Number of tracepoints used + uint32_t trampolines; // Number of trampolines used +} ebpf_plugin_stats_t; + +typedef enum netdata_apps_integration_flags { + NETDATA_EBPF_APPS_FLAG_NO, + NETDATA_EBPF_APPS_FLAG_YES, + NETDATA_EBPF_APPS_FLAG_CHART_CREATED +} netdata_apps_integration_flags_t; + +typedef struct ebpf_module { + const char *thread_name; + const char *config_name; + int enabled; + void *(*start_routine)(void *); + int update_every; + int global_charts; + netdata_apps_integration_flags_t apps_charts; + netdata_apps_level_t apps_level; + int cgroup_charts; + netdata_run_mode_t mode; + uint32_t thread_id; + int optional; + void (*apps_routine)(struct ebpf_module *em, void *ptr); + ebpf_local_maps_t *maps; + ebpf_specify_name_t *names; + uint32_t pid_map_size; + struct config *cfg; + const char *config_file; + uint64_t kernels; + netdata_ebpf_load_mode_t load; + netdata_ebpf_targets_t *targets; + struct bpf_link **probe_links; + struct bpf_object *objects; + struct netdata_static_thread *thread; +} ebpf_module_t; + +int ebpf_get_kernel_version(); +int get_redhat_release(); +int has_condition_to_run(int version); +char *ebpf_kernel_suffix(int version, int isrh); +struct bpf_link **ebpf_load_program(char *plugins_dir, ebpf_module_t *em, int kver, int is_rhf, + struct bpf_object **obj); + +void ebpf_mount_config_name(char *filename, size_t length, char *path, const char *config); +int ebpf_load_config(struct config *config, char *filename); +void ebpf_update_module(ebpf_module_t *em, struct btf *btf_file, int kver, int is_rh); +void ebpf_update_names(ebpf_specify_name_t *opt, ebpf_module_t *em); +void ebpf_adjust_apps_cgroup(ebpf_module_t *em, netdata_ebpf_program_loaded_t mode); +char *ebpf_find_symbol(char *search); +void ebpf_load_addresses(ebpf_addresses_t *fa, int fd); +void ebpf_fill_algorithms(int *algorithms, size_t length, int algorithm); +char **ebpf_fill_histogram_dimension(size_t maximum); +void ebpf_update_stats(ebpf_plugin_stats_t *report, ebpf_module_t *em); +void ebpf_update_controller(int fd, ebpf_module_t *em); +void ebpf_update_map_size(struct bpf_map *map, ebpf_local_maps_t *lmap, ebpf_module_t *em, const char *map_name); + +// Histogram +#define NETDATA_EBPF_HIST_MAX_BINS 24UL +#define NETDATA_DISK_MAX 256U +#define NETDATA_DISK_HISTOGRAM_LENGTH (NETDATA_DISK_MAX * NETDATA_EBPF_HIST_MAX_BINS) + +typedef struct netdata_ebpf_histogram { + char *name; + char *title; + int order; + uint64_t histogram[NETDATA_EBPF_HIST_MAX_BINS]; +} netdata_ebpf_histogram_t; + +typedef struct ebpf_filesystem_partitions { + char *filesystem; + char *optional_filesystem; + char *family; + char *family_name; + struct bpf_object *objects; + struct bpf_link **probe_links; + + netdata_ebpf_histogram_t hread; + netdata_ebpf_histogram_t hwrite; + netdata_ebpf_histogram_t hopen; + netdata_ebpf_histogram_t hadditional; + + uint32_t flags; + uint32_t enabled; + + ebpf_addresses_t addresses; + uint64_t kernels; +} ebpf_filesystem_partitions_t; + +typedef struct ebpf_sync_syscalls { + char *syscall; + int enabled; + uint32_t flags; + + // BTF structure + struct bpf_object *objects; + struct bpf_link **probe_links; + + // BPF structure +#ifdef LIBBPF_MAJOR_VERSION + struct sync_bpf *sync_obj; +#else + void *sync_obj; +#endif +} ebpf_sync_syscalls_t; + +void ebpf_histogram_dimension_cleanup(char **ptr, size_t length); + +// Tracepoint helpers +// For more information related to tracepoints read https://www.kernel.org/doc/html/latest/trace/tracepoints.html +int ebpf_is_tracepoint_enabled(char *subsys, char *eventname); +int ebpf_enable_tracing_values(char *subsys, char *eventname); +int ebpf_disable_tracing_values(char *subsys, char *eventname); + +// BTF Section +#define EBPF_DEFAULT_BTF_FILE "vmlinux" +#define EBPF_DEFAULT_BTF_PATH "/sys/kernel/btf" +#define EBPF_DEFAULT_ERROR_MSG "Cannot open or load BPF file for thread" + +// BTF helpers +#define NETDATA_EBPF_MAX_SYSCALL_LENGTH 255 + +netdata_ebpf_load_mode_t epbf_convert_string_to_load_mode(char *str); +netdata_ebpf_program_loaded_t ebpf_convert_core_type(char *str, netdata_run_mode_t lmode); +void ebpf_select_host_prefix(char *output, size_t length, char *syscall, int kver); +#ifdef LIBBPF_MAJOR_VERSION +void ebpf_adjust_thread_load(ebpf_module_t *mod, struct btf *file); +struct btf *ebpf_parse_btf_file(const char *filename); +struct btf *ebpf_load_btf_file(char *path, char *filename); +int ebpf_is_function_inside_btf(struct btf *file, char *function); +#endif + +#endif /* NETDATA_EBPF_H */ diff --git a/libnetdata/eval/Makefile.am b/libnetdata/eval/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/eval/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/eval/README.md b/libnetdata/eval/README.md new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/libnetdata/eval/README.md @@ -0,0 +1 @@ + diff --git a/libnetdata/eval/eval.c b/libnetdata/eval/eval.c new file mode 100644 index 0000000..0e429a0 --- /dev/null +++ b/libnetdata/eval/eval.c @@ -0,0 +1,1201 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +// ---------------------------------------------------------------------------- +// data structures for storing the parsed expression in memory + +typedef struct eval_value { + int type; + + union { + NETDATA_DOUBLE number; + EVAL_VARIABLE *variable; + struct eval_node *expression; + }; +} EVAL_VALUE; + +typedef struct eval_node { + int id; + unsigned char operator; + int precedence; + + int count; + EVAL_VALUE ops[]; +} EVAL_NODE; + +// these are used for EVAL_NODE.operator +// they are used as internal IDs to identify an operator +// THEY ARE NOT USED FOR PARSING OPERATORS LIKE THAT +#define EVAL_OPERATOR_NOP '\0' +#define EVAL_OPERATOR_EXPRESSION_OPEN '(' +#define EVAL_OPERATOR_EXPRESSION_CLOSE ')' +#define EVAL_OPERATOR_NOT '!' +#define EVAL_OPERATOR_PLUS '+' +#define EVAL_OPERATOR_MINUS '-' +#define EVAL_OPERATOR_AND '&' +#define EVAL_OPERATOR_OR '|' +#define EVAL_OPERATOR_GREATER_THAN_OR_EQUAL 'G' +#define EVAL_OPERATOR_LESS_THAN_OR_EQUAL 'L' +#define EVAL_OPERATOR_NOT_EQUAL '~' +#define EVAL_OPERATOR_EQUAL '=' +#define EVAL_OPERATOR_LESS '<' +#define EVAL_OPERATOR_GREATER '>' +#define EVAL_OPERATOR_MULTIPLY '*' +#define EVAL_OPERATOR_DIVIDE '/' +#define EVAL_OPERATOR_SIGN_PLUS 'P' +#define EVAL_OPERATOR_SIGN_MINUS 'M' +#define EVAL_OPERATOR_ABS 'A' +#define EVAL_OPERATOR_IF_THEN_ELSE '?' + +// ---------------------------------------------------------------------------- +// forward function definitions + +static inline void eval_node_free(EVAL_NODE *op); +static inline EVAL_NODE *parse_full_expression(const char **string, int *error); +static inline EVAL_NODE *parse_one_full_operand(const char **string, int *error); +static inline NETDATA_DOUBLE eval_node(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error); +static inline void print_parsed_as_node(BUFFER *out, EVAL_NODE *op, int *error); +static inline void print_parsed_as_constant(BUFFER *out, NETDATA_DOUBLE n); + +// ---------------------------------------------------------------------------- +// evaluation of expressions + +static inline NETDATA_DOUBLE eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABLE *v, int *error) { + static STRING + *this_string = NULL, + *now_string = NULL, + *after_string = NULL, + *before_string = NULL, + *status_string = NULL, + *removed_string = NULL, + *uninitialized_string = NULL, + *undefined_string = NULL, + *clear_string = NULL, + *warning_string = NULL, + *critical_string = NULL; + + NETDATA_DOUBLE n; + + if(unlikely(this_string == NULL)) { + this_string = string_strdupz("this"); + now_string = string_strdupz("now"); + after_string = string_strdupz("after"); + before_string = string_strdupz("before"); + status_string = string_strdupz("status"); + removed_string = string_strdupz("REMOVED"); + uninitialized_string = string_strdupz("UNINITIALIZED"); + undefined_string = string_strdupz("UNDEFINED"); + clear_string = string_strdupz("CLEAR"); + warning_string = string_strdupz("WARNING"); + critical_string = string_strdupz("CRITICAL"); + } + + if(unlikely(v->name == this_string)) { + n = (exp->myself)?*exp->myself:NAN; + buffer_strcat(exp->error_msg, "[ $this = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == after_string)) { + n = (exp->after && *exp->after)?*exp->after:NAN; + buffer_strcat(exp->error_msg, "[ $after = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == before_string)) { + n = (exp->before && *exp->before)?*exp->before:NAN; + buffer_strcat(exp->error_msg, "[ $before = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == now_string)) { + n = (NETDATA_DOUBLE)now_realtime_sec(); + buffer_strcat(exp->error_msg, "[ $now = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == status_string)) { + n = (exp->status)?*exp->status:RRDCALC_STATUS_UNINITIALIZED; + buffer_strcat(exp->error_msg, "[ $status = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == removed_string)) { + n = RRDCALC_STATUS_REMOVED; + buffer_strcat(exp->error_msg, "[ $REMOVED = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == uninitialized_string)) { + n = RRDCALC_STATUS_UNINITIALIZED; + buffer_strcat(exp->error_msg, "[ $UNINITIALIZED = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == undefined_string)) { + n = RRDCALC_STATUS_UNDEFINED; + buffer_strcat(exp->error_msg, "[ $UNDEFINED = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == clear_string)) { + n = RRDCALC_STATUS_CLEAR; + buffer_strcat(exp->error_msg, "[ $CLEAR = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == warning_string)) { + n = RRDCALC_STATUS_WARNING; + buffer_strcat(exp->error_msg, "[ $WARNING = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(unlikely(v->name == critical_string)) { + n = RRDCALC_STATUS_CRITICAL; + buffer_strcat(exp->error_msg, "[ $CRITICAL = "); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + if(exp->rrdcalc && health_variable_lookup(v->name, exp->rrdcalc, &n)) { + buffer_sprintf(exp->error_msg, "[ ${%s} = ", string2str(v->name)); + print_parsed_as_constant(exp->error_msg, n); + buffer_strcat(exp->error_msg, " ] "); + return n; + } + + *error = EVAL_ERROR_UNKNOWN_VARIABLE; + buffer_sprintf(exp->error_msg, "[ undefined variable '%s' ] ", string2str(v->name)); + return NAN; +} + +static inline NETDATA_DOUBLE eval_value(EVAL_EXPRESSION *exp, EVAL_VALUE *v, int *error) { + NETDATA_DOUBLE n; + + switch(v->type) { + case EVAL_VALUE_EXPRESSION: + n = eval_node(exp, v->expression, error); + break; + + case EVAL_VALUE_NUMBER: + n = v->number; + break; + + case EVAL_VALUE_VARIABLE: + n = eval_variable(exp, v->variable, error); + break; + + default: + *error = EVAL_ERROR_INVALID_VALUE; + n = 0; + break; + } + + return n; +} + +static inline int is_true(NETDATA_DOUBLE n) { + if(isnan(n)) return 0; + if(isinf(n)) return 1; + if(n == 0) return 0; + return 1; +} + +NETDATA_DOUBLE eval_and(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + return is_true(eval_value(exp, &op->ops[0], error)) && is_true(eval_value(exp, &op->ops[1], error)); +} +NETDATA_DOUBLE eval_or(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + return is_true(eval_value(exp, &op->ops[0], error)) || is_true(eval_value(exp, &op->ops[1], error)); +} +NETDATA_DOUBLE eval_greater_than_or_equal(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + NETDATA_DOUBLE n2 = eval_value(exp, &op->ops[1], error); + return isgreaterequal(n1, n2); +} +NETDATA_DOUBLE eval_less_than_or_equal(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + NETDATA_DOUBLE n2 = eval_value(exp, &op->ops[1], error); + return islessequal(n1, n2); +} +NETDATA_DOUBLE eval_equal(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + NETDATA_DOUBLE n2 = eval_value(exp, &op->ops[1], error); + if(isnan(n1) && isnan(n2)) return 1; + if(isinf(n1) && isinf(n2)) return 1; + if(isnan(n1) || isnan(n2)) return 0; + if(isinf(n1) || isinf(n2)) return 0; + return considered_equal_ndd(n1, n2); +} +NETDATA_DOUBLE eval_not_equal(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + return !eval_equal(exp, op, error); +} +NETDATA_DOUBLE eval_less(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + NETDATA_DOUBLE n2 = eval_value(exp, &op->ops[1], error); + return isless(n1, n2); +} +NETDATA_DOUBLE eval_greater(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + NETDATA_DOUBLE n2 = eval_value(exp, &op->ops[1], error); + return isgreater(n1, n2); +} +NETDATA_DOUBLE eval_plus(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + NETDATA_DOUBLE n2 = eval_value(exp, &op->ops[1], error); + if(isnan(n1) || isnan(n2)) return NAN; + if(isinf(n1) || isinf(n2)) return INFINITY; + return n1 + n2; +} +NETDATA_DOUBLE eval_minus(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + NETDATA_DOUBLE n2 = eval_value(exp, &op->ops[1], error); + if(isnan(n1) || isnan(n2)) return NAN; + if(isinf(n1) || isinf(n2)) return INFINITY; + return n1 - n2; +} +NETDATA_DOUBLE eval_multiply(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + NETDATA_DOUBLE n2 = eval_value(exp, &op->ops[1], error); + if(isnan(n1) || isnan(n2)) return NAN; + if(isinf(n1) || isinf(n2)) return INFINITY; + return n1 * n2; +} +NETDATA_DOUBLE eval_divide(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + NETDATA_DOUBLE n2 = eval_value(exp, &op->ops[1], error); + if(isnan(n1) || isnan(n2)) return NAN; + if(isinf(n1) || isinf(n2)) return INFINITY; + return n1 / n2; +} +NETDATA_DOUBLE eval_nop(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + return eval_value(exp, &op->ops[0], error); +} +NETDATA_DOUBLE eval_not(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + return !is_true(eval_value(exp, &op->ops[0], error)); +} +NETDATA_DOUBLE eval_sign_plus(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + return eval_value(exp, &op->ops[0], error); +} +NETDATA_DOUBLE eval_sign_minus(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + if(isnan(n1)) return NAN; + if(isinf(n1)) return INFINITY; + return -n1; +} +NETDATA_DOUBLE eval_abs(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + NETDATA_DOUBLE n1 = eval_value(exp, &op->ops[0], error); + if(isnan(n1)) return NAN; + if(isinf(n1)) return INFINITY; + return ABS(n1); +} +NETDATA_DOUBLE eval_if_then_else(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + if(is_true(eval_value(exp, &op->ops[0], error))) + return eval_value(exp, &op->ops[1], error); + else + return eval_value(exp, &op->ops[2], error); +} + +static struct operator { + const char *print_as; + char precedence; + char parameters; + char isfunction; + NETDATA_DOUBLE (*eval)(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error); +} operators[256] = { + // this is a random access array + // we always access it with a known EVAL_OPERATOR_X + + [EVAL_OPERATOR_AND] = { "&&", 2, 2, 0, eval_and }, + [EVAL_OPERATOR_OR] = { "||", 2, 2, 0, eval_or }, + [EVAL_OPERATOR_GREATER_THAN_OR_EQUAL] = { ">=", 3, 2, 0, eval_greater_than_or_equal }, + [EVAL_OPERATOR_LESS_THAN_OR_EQUAL] = { "<=", 3, 2, 0, eval_less_than_or_equal }, + [EVAL_OPERATOR_NOT_EQUAL] = { "!=", 3, 2, 0, eval_not_equal }, + [EVAL_OPERATOR_EQUAL] = { "==", 3, 2, 0, eval_equal }, + [EVAL_OPERATOR_LESS] = { "<", 3, 2, 0, eval_less }, + [EVAL_OPERATOR_GREATER] = { ">", 3, 2, 0, eval_greater }, + [EVAL_OPERATOR_PLUS] = { "+", 4, 2, 0, eval_plus }, + [EVAL_OPERATOR_MINUS] = { "-", 4, 2, 0, eval_minus }, + [EVAL_OPERATOR_MULTIPLY] = { "*", 5, 2, 0, eval_multiply }, + [EVAL_OPERATOR_DIVIDE] = { "/", 5, 2, 0, eval_divide }, + [EVAL_OPERATOR_NOT] = { "!", 6, 1, 0, eval_not }, + [EVAL_OPERATOR_SIGN_PLUS] = { "+", 6, 1, 0, eval_sign_plus }, + [EVAL_OPERATOR_SIGN_MINUS] = { "-", 6, 1, 0, eval_sign_minus }, + [EVAL_OPERATOR_ABS] = { "abs(",6,1, 1, eval_abs }, + [EVAL_OPERATOR_IF_THEN_ELSE] = { "?", 7, 3, 0, eval_if_then_else }, + [EVAL_OPERATOR_NOP] = { NULL, 8, 1, 0, eval_nop }, + [EVAL_OPERATOR_EXPRESSION_OPEN] = { NULL, 8, 1, 0, eval_nop }, + + // this should exist in our evaluation list + [EVAL_OPERATOR_EXPRESSION_CLOSE] = { NULL, 99, 1, 0, eval_nop } +}; + +#define eval_precedence(operator) (operators[(unsigned char)(operator)].precedence) + +static inline NETDATA_DOUBLE eval_node(EVAL_EXPRESSION *exp, EVAL_NODE *op, int *error) { + if(unlikely(op->count != operators[op->operator].parameters)) { + *error = EVAL_ERROR_INVALID_NUMBER_OF_OPERANDS; + return 0; + } + + NETDATA_DOUBLE n = operators[op->operator].eval(exp, op, error); + + return n; +} + +// ---------------------------------------------------------------------------- +// parsed-as generation + +static inline void print_parsed_as_variable(BUFFER *out, EVAL_VARIABLE *v, int *error) { + (void)error; + buffer_sprintf(out, "${%s}", string2str(v->name)); +} + +static inline void print_parsed_as_constant(BUFFER *out, NETDATA_DOUBLE n) { + if(unlikely(isnan(n))) { + buffer_strcat(out, "nan"); + return; + } + + if(unlikely(isinf(n))) { + buffer_strcat(out, "inf"); + return; + } + + char b[100+1], *s; + snprintfz(b, 100, NETDATA_DOUBLE_FORMAT, n); + + s = &b[strlen(b) - 1]; + while(s > b && *s == '0') { + *s ='\0'; + s--; + } + + if(s > b && *s == '.') + *s = '\0'; + + buffer_strcat(out, b); +} + +static inline void print_parsed_as_value(BUFFER *out, EVAL_VALUE *v, int *error) { + switch(v->type) { + case EVAL_VALUE_EXPRESSION: + print_parsed_as_node(out, v->expression, error); + break; + + case EVAL_VALUE_NUMBER: + print_parsed_as_constant(out, v->number); + break; + + case EVAL_VALUE_VARIABLE: + print_parsed_as_variable(out, v->variable, error); + break; + + default: + *error = EVAL_ERROR_INVALID_VALUE; + break; + } +} + +static inline void print_parsed_as_node(BUFFER *out, EVAL_NODE *op, int *error) { + if(unlikely(op->count != operators[op->operator].parameters)) { + *error = EVAL_ERROR_INVALID_NUMBER_OF_OPERANDS; + return; + } + + if(operators[op->operator].parameters == 1) { + + if(operators[op->operator].print_as) + buffer_sprintf(out, "%s", operators[op->operator].print_as); + + //if(op->operator == EVAL_OPERATOR_EXPRESSION_OPEN) + // buffer_strcat(out, "("); + + print_parsed_as_value(out, &op->ops[0], error); + + //if(op->operator == EVAL_OPERATOR_EXPRESSION_OPEN) + // buffer_strcat(out, ")"); + } + + else if(operators[op->operator].parameters == 2) { + buffer_strcat(out, "("); + print_parsed_as_value(out, &op->ops[0], error); + + if(operators[op->operator].print_as) + buffer_sprintf(out, " %s ", operators[op->operator].print_as); + + print_parsed_as_value(out, &op->ops[1], error); + buffer_strcat(out, ")"); + } + else if(op->operator == EVAL_OPERATOR_IF_THEN_ELSE && operators[op->operator].parameters == 3) { + buffer_strcat(out, "("); + print_parsed_as_value(out, &op->ops[0], error); + + if(operators[op->operator].print_as) + buffer_sprintf(out, " %s ", operators[op->operator].print_as); + + print_parsed_as_value(out, &op->ops[1], error); + buffer_strcat(out, " : "); + print_parsed_as_value(out, &op->ops[2], error); + buffer_strcat(out, ")"); + } + + if(operators[op->operator].isfunction) + buffer_strcat(out, ")"); +} + +// ---------------------------------------------------------------------------- +// parsing expressions + +// skip spaces +static inline void skip_spaces(const char **string) { + const char *s = *string; + while(isspace(*s)) s++; + *string = s; +} + +// what character can appear just after an operator keyword +// like NOT AND OR ? +static inline int isoperatorterm_word(const char s) { + if(isspace(s) || s == '(' || s == '$' || s == '!' || s == '-' || s == '+' || isdigit(s) || !s) + return 1; + + return 0; +} + +// what character can appear just after an operator symbol? +static inline int isoperatorterm_symbol(const char s) { + if(isoperatorterm_word(s) || isalpha(s)) + return 1; + + return 0; +} + +// return 1 if the character should never appear in a variable +static inline int isvariableterm(const char s) { + if(isalnum(s) || s == '.' || s == '_') + return 0; + + return 1; +} + +// ---------------------------------------------------------------------------- +// parse operators + +static inline int parse_and(const char **string) { + const char *s = *string; + + // AND + if((s[0] == 'A' || s[0] == 'a') && (s[1] == 'N' || s[1] == 'n') && (s[2] == 'D' || s[2] == 'd') && isoperatorterm_word(s[3])) { + *string = &s[4]; + return 1; + } + + // && + if(s[0] == '&' && s[1] == '&' && isoperatorterm_symbol(s[2])) { + *string = &s[2]; + return 1; + } + + return 0; +} + +static inline int parse_or(const char **string) { + const char *s = *string; + + // OR + if((s[0] == 'O' || s[0] == 'o') && (s[1] == 'R' || s[1] == 'r') && isoperatorterm_word(s[2])) { + *string = &s[3]; + return 1; + } + + // || + if(s[0] == '|' && s[1] == '|' && isoperatorterm_symbol(s[2])) { + *string = &s[2]; + return 1; + } + + return 0; +} + +static inline int parse_greater_than_or_equal(const char **string) { + const char *s = *string; + + // >= + if(s[0] == '>' && s[1] == '=' && isoperatorterm_symbol(s[2])) { + *string = &s[2]; + return 1; + } + + return 0; +} + +static inline int parse_less_than_or_equal(const char **string) { + const char *s = *string; + + // <= + if (s[0] == '<' && s[1] == '=' && isoperatorterm_symbol(s[2])) { + *string = &s[2]; + return 1; + } + + return 0; +} + +static inline int parse_greater(const char **string) { + const char *s = *string; + + // > + if(s[0] == '>' && isoperatorterm_symbol(s[1])) { + *string = &s[1]; + return 1; + } + + return 0; +} + +static inline int parse_less(const char **string) { + const char *s = *string; + + // < + if(s[0] == '<' && isoperatorterm_symbol(s[1])) { + *string = &s[1]; + return 1; + } + + return 0; +} + +static inline int parse_equal(const char **string) { + const char *s = *string; + + // == + if(s[0] == '=' && s[1] == '=' && isoperatorterm_symbol(s[2])) { + *string = &s[2]; + return 1; + } + + // = + if(s[0] == '=' && isoperatorterm_symbol(s[1])) { + *string = &s[1]; + return 1; + } + + return 0; +} + +static inline int parse_not_equal(const char **string) { + const char *s = *string; + + // != + if(s[0] == '!' && s[1] == '=' && isoperatorterm_symbol(s[2])) { + *string = &s[2]; + return 1; + } + + // <> + if(s[0] == '<' && s[1] == '>' && isoperatorterm_symbol(s[2])) { + *string = &s[2]; + } + + return 0; +} + +static inline int parse_not(const char **string) { + const char *s = *string; + + // NOT + if((s[0] == 'N' || s[0] == 'n') && (s[1] == 'O' || s[1] == 'o') && (s[2] == 'T' || s[2] == 't') && isoperatorterm_word(s[3])) { + *string = &s[3]; + return 1; + } + + if(s[0] == '!') { + *string = &s[1]; + return 1; + } + + return 0; +} + +static inline int parse_multiply(const char **string) { + const char *s = *string; + + // * + if(s[0] == '*' && isoperatorterm_symbol(s[1])) { + *string = &s[1]; + return 1; + } + + return 0; +} + +static inline int parse_divide(const char **string) { + const char *s = *string; + + // / + if(s[0] == '/' && isoperatorterm_symbol(s[1])) { + *string = &s[1]; + return 1; + } + + return 0; +} + +static inline int parse_minus(const char **string) { + const char *s = *string; + + // - + if(s[0] == '-' && isoperatorterm_symbol(s[1])) { + *string = &s[1]; + return 1; + } + + return 0; +} + +static inline int parse_plus(const char **string) { + const char *s = *string; + + // + + if(s[0] == '+' && isoperatorterm_symbol(s[1])) { + *string = &s[1]; + return 1; + } + + return 0; +} + +static inline int parse_open_subexpression(const char **string) { + const char *s = *string; + + // ( + if(s[0] == '(') { + *string = &s[1]; + return 1; + } + + return 0; +} + +#define parse_close_function(x) parse_close_subexpression(x) + +static inline int parse_close_subexpression(const char **string) { + const char *s = *string; + + // ) + if(s[0] == ')') { + *string = &s[1]; + return 1; + } + + return 0; +} + +static inline int parse_variable(const char **string, char *buffer, size_t len) { + const char *s = *string; + + // $ + if(*s == '$') { + size_t i = 0; + s++; + + if(*s == '{') { + // ${variable_name} + + s++; + while (*s && *s != '}' && i < len) + buffer[i++] = *s++; + + if(*s == '}') + s++; + } + else { + // $variable_name + + while (*s && !isvariableterm(*s) && i < len) + buffer[i++] = *s++; + } + + buffer[i] = '\0'; + + if (buffer[0]) { + *string = s; + return 1; + } + } + + return 0; +} + +static inline int parse_constant(const char **string, NETDATA_DOUBLE *number) { + char *end = NULL; + NETDATA_DOUBLE n = str2ndd(*string, &end); + if(unlikely(!end || *string == end)) { + *number = 0; + return 0; + } + *number = n; + *string = end; + return 1; +} + +static inline int parse_abs(const char **string) { + const char *s = *string; + + // ABS + if((s[0] == 'A' || s[0] == 'a') && (s[1] == 'B' || s[1] == 'b') && (s[2] == 'S' || s[2] == 's') && s[3] == '(') { + *string = &s[3]; + return 1; + } + + return 0; +} + +static inline int parse_if_then_else(const char **string) { + const char *s = *string; + + // ? + if(s[0] == '?') { + *string = &s[1]; + return 1; + } + + return 0; +} + +static struct operator_parser { + unsigned char id; + int (*parse)(const char **); +} operator_parsers[] = { + // the order in this list is important! + // the first matching will be used + // so place the longer of overlapping ones + // at the top + + { EVAL_OPERATOR_AND, parse_and }, + { EVAL_OPERATOR_OR, parse_or }, + { EVAL_OPERATOR_GREATER_THAN_OR_EQUAL, parse_greater_than_or_equal }, + { EVAL_OPERATOR_LESS_THAN_OR_EQUAL, parse_less_than_or_equal }, + { EVAL_OPERATOR_NOT_EQUAL, parse_not_equal }, + { EVAL_OPERATOR_EQUAL, parse_equal }, + { EVAL_OPERATOR_LESS, parse_less }, + { EVAL_OPERATOR_GREATER, parse_greater }, + { EVAL_OPERATOR_PLUS, parse_plus }, + { EVAL_OPERATOR_MINUS, parse_minus }, + { EVAL_OPERATOR_MULTIPLY, parse_multiply }, + { EVAL_OPERATOR_DIVIDE, parse_divide }, + { EVAL_OPERATOR_IF_THEN_ELSE, parse_if_then_else }, + + /* we should not put in this list the following: + * + * - NOT + * - ( + * - ) + * + * these are handled in code + */ + + // termination + { EVAL_OPERATOR_NOP, NULL } +}; + +static inline unsigned char parse_operator(const char **string, int *precedence) { + skip_spaces(string); + + int i; + for(i = 0 ; operator_parsers[i].parse != NULL ; i++) + if(operator_parsers[i].parse(string)) { + if(precedence) *precedence = eval_precedence(operator_parsers[i].id); + return operator_parsers[i].id; + } + + return EVAL_OPERATOR_NOP; +} + +// ---------------------------------------------------------------------------- +// memory management + +static inline EVAL_NODE *eval_node_alloc(int count) { + static int id = 1; + + EVAL_NODE *op = callocz(1, sizeof(EVAL_NODE) + (sizeof(EVAL_VALUE) * count)); + + op->id = id++; + op->operator = EVAL_OPERATOR_NOP; + op->precedence = eval_precedence(EVAL_OPERATOR_NOP); + op->count = count; + return op; +} + +static inline void eval_node_set_value_to_node(EVAL_NODE *op, int pos, EVAL_NODE *value) { + if(pos >= op->count) + fatal("Invalid request to set position %d of OPERAND that has only %d values", pos + 1, op->count + 1); + + op->ops[pos].type = EVAL_VALUE_EXPRESSION; + op->ops[pos].expression = value; +} + +static inline void eval_node_set_value_to_constant(EVAL_NODE *op, int pos, NETDATA_DOUBLE value) { + if(pos >= op->count) + fatal("Invalid request to set position %d of OPERAND that has only %d values", pos + 1, op->count + 1); + + op->ops[pos].type = EVAL_VALUE_NUMBER; + op->ops[pos].number = value; +} + +static inline void eval_node_set_value_to_variable(EVAL_NODE *op, int pos, const char *variable) { + if(pos >= op->count) + fatal("Invalid request to set position %d of OPERAND that has only %d values", pos + 1, op->count + 1); + + op->ops[pos].type = EVAL_VALUE_VARIABLE; + op->ops[pos].variable = callocz(1, sizeof(EVAL_VARIABLE)); + op->ops[pos].variable->name = string_strdupz(variable); +} + +static inline void eval_variable_free(EVAL_VARIABLE *v) { + string_freez(v->name); + freez(v); +} + +static inline void eval_value_free(EVAL_VALUE *v) { + switch(v->type) { + case EVAL_VALUE_EXPRESSION: + eval_node_free(v->expression); + break; + + case EVAL_VALUE_VARIABLE: + eval_variable_free(v->variable); + break; + + default: + break; + } +} + +static inline void eval_node_free(EVAL_NODE *op) { + if(op->count) { + int i; + for(i = op->count - 1; i >= 0 ;i--) + eval_value_free(&op->ops[i]); + } + + freez(op); +} + +// ---------------------------------------------------------------------------- +// the parsing logic + +// helper function to avoid allocations all over the place +static inline EVAL_NODE *parse_next_operand_given_its_operator(const char **string, unsigned char operator_type, int *error) { + EVAL_NODE *sub = parse_one_full_operand(string, error); + if(!sub) return NULL; + + EVAL_NODE *op = eval_node_alloc(1); + op->operator = operator_type; + eval_node_set_value_to_node(op, 0, sub); + return op; +} + +// parse a full operand, including its sign or other associative operator (e.g. NOT) +static inline EVAL_NODE *parse_one_full_operand(const char **string, int *error) { + char variable_buffer[EVAL_MAX_VARIABLE_NAME_LENGTH + 1]; + EVAL_NODE *op1 = NULL; + NETDATA_DOUBLE number; + + *error = EVAL_ERROR_OK; + + skip_spaces(string); + if(!(**string)) { + *error = EVAL_ERROR_MISSING_OPERAND; + return NULL; + } + + if(parse_not(string)) { + op1 = parse_next_operand_given_its_operator(string, EVAL_OPERATOR_NOT, error); + op1->precedence = eval_precedence(EVAL_OPERATOR_NOT); + } + else if(parse_plus(string)) { + op1 = parse_next_operand_given_its_operator(string, EVAL_OPERATOR_SIGN_PLUS, error); + op1->precedence = eval_precedence(EVAL_OPERATOR_SIGN_PLUS); + } + else if(parse_minus(string)) { + op1 = parse_next_operand_given_its_operator(string, EVAL_OPERATOR_SIGN_MINUS, error); + op1->precedence = eval_precedence(EVAL_OPERATOR_SIGN_MINUS); + } + else if(parse_abs(string)) { + op1 = parse_next_operand_given_its_operator(string, EVAL_OPERATOR_ABS, error); + op1->precedence = eval_precedence(EVAL_OPERATOR_ABS); + } + else if(parse_open_subexpression(string)) { + EVAL_NODE *sub = parse_full_expression(string, error); + if(sub) { + op1 = eval_node_alloc(1); + op1->operator = EVAL_OPERATOR_EXPRESSION_OPEN; + op1->precedence = eval_precedence(EVAL_OPERATOR_EXPRESSION_OPEN); + eval_node_set_value_to_node(op1, 0, sub); + if(!parse_close_subexpression(string)) { + *error = EVAL_ERROR_MISSING_CLOSE_SUBEXPRESSION; + eval_node_free(op1); + return NULL; + } + } + } + else if(parse_variable(string, variable_buffer, EVAL_MAX_VARIABLE_NAME_LENGTH)) { + op1 = eval_node_alloc(1); + op1->operator = EVAL_OPERATOR_NOP; + eval_node_set_value_to_variable(op1, 0, variable_buffer); + } + else if(parse_constant(string, &number)) { + op1 = eval_node_alloc(1); + op1->operator = EVAL_OPERATOR_NOP; + eval_node_set_value_to_constant(op1, 0, number); + } + else if(**string) + *error = EVAL_ERROR_UNKNOWN_OPERAND; + else + *error = EVAL_ERROR_MISSING_OPERAND; + + return op1; +} + +// parse an operator and the rest of the expression +// precedence processing is handled here +static inline EVAL_NODE *parse_rest_of_expression(const char **string, int *error, EVAL_NODE *op1) { + EVAL_NODE *op2 = NULL; + unsigned char operator; + int precedence; + + operator = parse_operator(string, &precedence); + skip_spaces(string); + + if(operator != EVAL_OPERATOR_NOP) { + op2 = parse_one_full_operand(string, error); + if(!op2) { + // error is already reported + eval_node_free(op1); + return NULL; + } + + EVAL_NODE *op = eval_node_alloc(operators[operator].parameters); + op->operator = operator; + op->precedence = precedence; + + if(operator == EVAL_OPERATOR_IF_THEN_ELSE && op->count == 3) { + skip_spaces(string); + + if(**string != ':') { + eval_node_free(op); + eval_node_free(op1); + eval_node_free(op2); + *error = EVAL_ERROR_IF_THEN_ELSE_MISSING_ELSE; + return NULL; + } + (*string)++; + + skip_spaces(string); + + EVAL_NODE *op3 = parse_one_full_operand(string, error); + if(!op3) { + eval_node_free(op); + eval_node_free(op1); + eval_node_free(op2); + // error is already reported + return NULL; + } + + eval_node_set_value_to_node(op, 2, op3); + } + + eval_node_set_value_to_node(op, 1, op2); + + // precedence processing + // if this operator has a higher precedence compared to its next + // put the next operator on top of us (top = evaluated later) + // function recursion does the rest... + if(op->precedence > op1->precedence && op1->count == 2 && op1->operator != '(' && op1->ops[1].type == EVAL_VALUE_EXPRESSION) { + eval_node_set_value_to_node(op, 0, op1->ops[1].expression); + op1->ops[1].expression = op; + op = op1; + } + else + eval_node_set_value_to_node(op, 0, op1); + + return parse_rest_of_expression(string, error, op); + } + else if(**string == ')') { + ; + } + else if(**string) { + eval_node_free(op1); + op1 = NULL; + *error = EVAL_ERROR_MISSING_OPERATOR; + } + + return op1; +} + +// high level function to parse an expression or a sub-expression +static inline EVAL_NODE *parse_full_expression(const char **string, int *error) { + EVAL_NODE *op1 = parse_one_full_operand(string, error); + if(!op1) { + *error = EVAL_ERROR_MISSING_OPERAND; + return NULL; + } + + return parse_rest_of_expression(string, error, op1); +} + +// ---------------------------------------------------------------------------- +// public API + +int expression_evaluate(EVAL_EXPRESSION *expression) { + expression->error = EVAL_ERROR_OK; + + buffer_reset(expression->error_msg); + expression->result = eval_node(expression, (EVAL_NODE *)expression->nodes, &expression->error); + + if(unlikely(isnan(expression->result))) { + if(expression->error == EVAL_ERROR_OK) + expression->error = EVAL_ERROR_VALUE_IS_NAN; + } + else if(unlikely(isinf(expression->result))) { + if(expression->error == EVAL_ERROR_OK) + expression->error = EVAL_ERROR_VALUE_IS_INFINITE; + } + else if(unlikely(expression->error == EVAL_ERROR_UNKNOWN_VARIABLE)) { + // although there is an unknown variable + // the expression was evaluated successfully + expression->error = EVAL_ERROR_OK; + } + + if(expression->error != EVAL_ERROR_OK) { + expression->result = NAN; + + if(buffer_strlen(expression->error_msg)) + buffer_strcat(expression->error_msg, "; "); + + buffer_sprintf(expression->error_msg, "failed to evaluate expression with error %d (%s)", expression->error, expression_strerror(expression->error)); + return 0; + } + + return 1; +} + +EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, int *error) { + const char *s = string; + int err = EVAL_ERROR_OK; + + EVAL_NODE *op = parse_full_expression(&s, &err); + + if(*s) { + if(op) { + eval_node_free(op); + op = NULL; + } + err = EVAL_ERROR_REMAINING_GARBAGE; + } + + if (failed_at) *failed_at = s; + if (error) *error = err; + + if(!op) { + unsigned long pos = s - string + 1; + error("failed to parse expression '%s': %s at character %lu (i.e.: '%s').", string, expression_strerror(err), pos, s); + return NULL; + } + + BUFFER *out = buffer_create(1024); + print_parsed_as_node(out, op, &err); + if(err != EVAL_ERROR_OK) { + error("failed to re-generate expression '%s' with reason: %s", string, expression_strerror(err)); + eval_node_free(op); + buffer_free(out); + return NULL; + } + + EVAL_EXPRESSION *exp = callocz(1, sizeof(EVAL_EXPRESSION)); + + exp->source = strdupz(string); + exp->parsed_as = strdupz(buffer_tostring(out)); + buffer_free(out); + + exp->error_msg = buffer_create(100); + exp->nodes = (void *)op; + + return exp; +} + +void expression_free(EVAL_EXPRESSION *expression) { + if(!expression) return; + + if(expression->nodes) eval_node_free((EVAL_NODE *)expression->nodes); + freez((void *)expression->source); + freez((void *)expression->parsed_as); + buffer_free(expression->error_msg); + freez(expression); +} + +const char *expression_strerror(int error) { + switch(error) { + case EVAL_ERROR_OK: + return "success"; + + case EVAL_ERROR_MISSING_CLOSE_SUBEXPRESSION: + return "missing closing parenthesis"; + + case EVAL_ERROR_UNKNOWN_OPERAND: + return "unknown operand"; + + case EVAL_ERROR_MISSING_OPERAND: + return "expected operand"; + + case EVAL_ERROR_MISSING_OPERATOR: + return "expected operator"; + + case EVAL_ERROR_REMAINING_GARBAGE: + return "remaining characters after expression"; + + case EVAL_ERROR_INVALID_VALUE: + return "invalid value structure - internal error"; + + case EVAL_ERROR_INVALID_NUMBER_OF_OPERANDS: + return "wrong number of operands for operation - internal error"; + + case EVAL_ERROR_VALUE_IS_NAN: + return "value is unset"; + + case EVAL_ERROR_VALUE_IS_INFINITE: + return "computed value is infinite"; + + case EVAL_ERROR_UNKNOWN_VARIABLE: + return "undefined variable"; + + case EVAL_ERROR_IF_THEN_ELSE_MISSING_ELSE: + return "missing second sub-expression of inline conditional"; + + default: + return "unknown error"; + } +} diff --git a/libnetdata/eval/eval.h b/libnetdata/eval/eval.h new file mode 100644 index 0000000..1633ec5 --- /dev/null +++ b/libnetdata/eval/eval.h @@ -0,0 +1,87 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EVAL_H +#define NETDATA_EVAL_H 1 + +#include "../libnetdata.h" + +#define EVAL_MAX_VARIABLE_NAME_LENGTH 300 + +typedef enum rrdcalc_status { + RRDCALC_STATUS_REMOVED = -2, + RRDCALC_STATUS_UNDEFINED = -1, + RRDCALC_STATUS_UNINITIALIZED = 0, + RRDCALC_STATUS_CLEAR = 1, + RRDCALC_STATUS_RAISED = 2, + RRDCALC_STATUS_WARNING = 3, + RRDCALC_STATUS_CRITICAL = 4 +} RRDCALC_STATUS; + +typedef struct eval_variable { + STRING *name; + struct eval_variable *next; +} EVAL_VARIABLE; + +typedef struct eval_expression { + const char *source; + const char *parsed_as; + + RRDCALC_STATUS *status; + NETDATA_DOUBLE *myself; + time_t *after; + time_t *before; + + NETDATA_DOUBLE result; + + int error; + BUFFER *error_msg; + + // hidden EVAL_NODE * + void *nodes; + + // custom data to be used for looking up variables + struct rrdcalc *rrdcalc; +} EVAL_EXPRESSION; + +#define EVAL_VALUE_INVALID 0 +#define EVAL_VALUE_NUMBER 1 +#define EVAL_VALUE_VARIABLE 2 +#define EVAL_VALUE_EXPRESSION 3 + +// parsing and evaluation +#define EVAL_ERROR_OK 0 + +// parsing errors +#define EVAL_ERROR_MISSING_CLOSE_SUBEXPRESSION 1 +#define EVAL_ERROR_UNKNOWN_OPERAND 2 +#define EVAL_ERROR_MISSING_OPERAND 3 +#define EVAL_ERROR_MISSING_OPERATOR 4 +#define EVAL_ERROR_REMAINING_GARBAGE 5 +#define EVAL_ERROR_IF_THEN_ELSE_MISSING_ELSE 6 + +// evaluation errors +#define EVAL_ERROR_INVALID_VALUE 101 +#define EVAL_ERROR_INVALID_NUMBER_OF_OPERANDS 102 +#define EVAL_ERROR_VALUE_IS_NAN 103 +#define EVAL_ERROR_VALUE_IS_INFINITE 104 +#define EVAL_ERROR_UNKNOWN_VARIABLE 105 + +// parse the given string as an expression and return: +// a pointer to an expression if it parsed OK +// NULL in which case the pointer to error has the error code +EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, int *error); + +// free all resources allocated for an expression +void expression_free(EVAL_EXPRESSION *expression); + +// convert an error code to a message +const char *expression_strerror(int error); + +// evaluate an expression and return +// 1 = OK, the result is in: expression->result +// 2 = FAILED, the error message is in: buffer_tostring(expression->error_msg) +int expression_evaluate(EVAL_EXPRESSION *expression); + +int health_variable_lookup(STRING *variable, struct rrdcalc *rc, NETDATA_DOUBLE *result); + +#endif //NETDATA_EVAL_H diff --git a/libnetdata/health/Makefile.am b/libnetdata/health/Makefile.am new file mode 100644 index 0000000..643458b --- /dev/null +++ b/libnetdata/health/Makefile.am @@ -0,0 +1,7 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + $(NULL) diff --git a/libnetdata/health/health.c b/libnetdata/health/health.c new file mode 100644 index 0000000..c44ba08 --- /dev/null +++ b/libnetdata/health/health.c @@ -0,0 +1,173 @@ +#include "health.h" + +SILENCERS *silencers; + +/** + * Create Silencer + * + * Allocate a new silencer to Netdata. + * + * @return It returns the address off the silencer on success and NULL otherwise + */ +SILENCER *create_silencer(void) { + SILENCER *t = callocz(1, sizeof(SILENCER)); + debug(D_HEALTH, "HEALTH command API: Created empty silencer"); + + return t; +} + +/** + * Health Silencers add + * + * Add more one silencer to the list of silencers. + * + * @param silencer + */ +void health_silencers_add(SILENCER *silencer) { + // Add the created instance to the linked list in silencers + silencer->next = silencers->silencers; + silencers->silencers = silencer; + debug(D_HEALTH, "HEALTH command API: Added silencer %s:%s:%s:%s:%s", silencer->alarms, + silencer->charts, silencer->contexts, silencer->hosts, silencer->families + ); +} + +/** + * Silencers Add Parameter + * + * Create a new silencer and adjust the variables + * + * @param silencer a pointer to the silencer that will be adjusted + * @param key the key value sent by client + * @param value the value sent to the key + * + * @return It returns the silencer configured on success and NULL otherwise + */ +SILENCER *health_silencers_addparam(SILENCER *silencer, char *key, char *value) { + static uint32_t + hash_alarm = 0, + hash_template = 0, + hash_chart = 0, + hash_context = 0, + hash_host = 0, + hash_families = 0; + + if (unlikely(!hash_alarm)) { + hash_alarm = simple_uhash(HEALTH_ALARM_KEY); + hash_template = simple_uhash(HEALTH_TEMPLATE_KEY); + hash_chart = simple_uhash(HEALTH_CHART_KEY); + hash_context = simple_uhash(HEALTH_CONTEXT_KEY); + hash_host = simple_uhash(HEALTH_HOST_KEY); + hash_families = simple_uhash(HEALTH_FAMILIES_KEY); + } + + uint32_t hash = simple_uhash(key); + if (unlikely(silencer == NULL)) { + if ( + (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) || + (hash == hash_template && !strcasecmp(key, HEALTH_TEMPLATE_KEY)) || + (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) || + (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) || + (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) || + (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) + ) { + silencer = create_silencer(); + if(!silencer) { + error("Cannot add a new silencer to Netdata"); + return NULL; + } + } + } + + if (hash == hash_alarm && !strcasecmp(key, HEALTH_ALARM_KEY)) { + silencer->alarms = strdupz(value); + silencer->alarms_pattern = simple_pattern_create(silencer->alarms, NULL, SIMPLE_PATTERN_EXACT); + } else if (hash == hash_chart && !strcasecmp(key, HEALTH_CHART_KEY)) { + silencer->charts = strdupz(value); + silencer->charts_pattern = simple_pattern_create(silencer->charts, NULL, SIMPLE_PATTERN_EXACT); + } else if (hash == hash_context && !strcasecmp(key, HEALTH_CONTEXT_KEY)) { + silencer->contexts = strdupz(value); + silencer->contexts_pattern = simple_pattern_create(silencer->contexts, NULL, SIMPLE_PATTERN_EXACT); + } else if (hash == hash_host && !strcasecmp(key, HEALTH_HOST_KEY)) { + silencer->hosts = strdupz(value); + silencer->hosts_pattern = simple_pattern_create(silencer->hosts, NULL, SIMPLE_PATTERN_EXACT); + } else if (hash == hash_families && !strcasecmp(key, HEALTH_FAMILIES_KEY)) { + silencer->families = strdupz(value); + silencer->families_pattern = simple_pattern_create(silencer->families, NULL, SIMPLE_PATTERN_EXACT); + } + + return silencer; +} + +/** + * JSON Read Callback + * + * Callback called by netdata to create the silencer. + * + * @param e the main json structure + * + * @return It always return 0. + */ +int health_silencers_json_read_callback(JSON_ENTRY *e) +{ + switch(e->type) { + case JSON_OBJECT: +#ifndef ENABLE_JSONC + e->callback_function = health_silencers_json_read_callback; + if(strcmp(e->name,"")) { + // init silencer + debug(D_HEALTH, "JSON: Got object with a name, initializing new silencer for %s",e->name); +#endif + e->callback_data = create_silencer(); + if(e->callback_data) { + health_silencers_add(e->callback_data); + } +#ifndef ENABLE_JSONC + } +#endif + break; + + case JSON_ARRAY: + e->callback_function = health_silencers_json_read_callback; + break; + + case JSON_STRING: + if(!strcmp(e->name,"type")) { + debug(D_HEALTH, "JSON: Processing type=%s",e->data.string); + if (!strcmp(e->data.string,"SILENCE")) silencers->stype = STYPE_SILENCE_NOTIFICATIONS; + else if (!strcmp(e->data.string,"DISABLE")) silencers->stype = STYPE_DISABLE_ALARMS; + } else { + debug(D_HEALTH, "JSON: Adding %s=%s", e->name, e->data.string); + if (e->callback_data) + (void)health_silencers_addparam(e->callback_data, e->name, e->data.string); + } + break; + + case JSON_BOOLEAN: + debug(D_HEALTH, "JSON: Processing all_alarms"); + silencers->all_alarms=e->data.boolean?1:0; + break; + + case JSON_NUMBER: + case JSON_NULL: + break; + } + + return 0; +} + +/** + * Initialize Global Silencers + * + * Initialize the silencer for the whole netdata system. + * + * @return It returns 0 on success and -1 otherwise + */ +int health_initialize_global_silencers() { + silencers = mallocz(sizeof(SILENCERS)); + silencers->all_alarms=0; + silencers->stype=STYPE_NONE; + silencers->silencers=NULL; + + return 0; +} \ No newline at end of file diff --git a/libnetdata/health/health.h b/libnetdata/health/health.h new file mode 100644 index 0000000..6b8f9b3 --- /dev/null +++ b/libnetdata/health/health.h @@ -0,0 +1,55 @@ +#ifndef NETDATA_HEALTH_LIB +# define NETDATA_HEALTH_LIB 1 + +# include "../libnetdata.h" + +#define HEALTH_ALARM_KEY "alarm" +#define HEALTH_TEMPLATE_KEY "template" +#define HEALTH_CONTEXT_KEY "context" +#define HEALTH_CHART_KEY "chart" +#define HEALTH_HOST_KEY "hosts" +#define HEALTH_OS_KEY "os" +#define HEALTH_FAMILIES_KEY "families" +#define HEALTH_LOOKUP_KEY "lookup" +#define HEALTH_CALC_KEY "calc" + +typedef struct silencer { + char *alarms; + SIMPLE_PATTERN *alarms_pattern; + + char *hosts; + SIMPLE_PATTERN *hosts_pattern; + + char *contexts; + SIMPLE_PATTERN *contexts_pattern; + + char *charts; + SIMPLE_PATTERN *charts_pattern; + + char *families; + SIMPLE_PATTERN *families_pattern; + + struct silencer *next; +} SILENCER; + +typedef enum silence_type { + STYPE_NONE, + STYPE_DISABLE_ALARMS, + STYPE_SILENCE_NOTIFICATIONS +} SILENCE_TYPE; + +typedef struct silencers { + int all_alarms; + SILENCE_TYPE stype; + SILENCER *silencers; +} SILENCERS; + +extern SILENCERS *silencers; + +SILENCER *create_silencer(void); +int health_silencers_json_read_callback(JSON_ENTRY *e); +void health_silencers_add(SILENCER *silencer); +SILENCER * health_silencers_addparam(SILENCER *silencer, char *key, char *value); +int health_initialize_global_silencers(); + +#endif diff --git a/libnetdata/inlined.h b/libnetdata/inlined.h new file mode 100644 index 0000000..aa7f3c2 --- /dev/null +++ b/libnetdata/inlined.h @@ -0,0 +1,269 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_INLINED_H +#define NETDATA_INLINED_H 1 + +#include "libnetdata.h" + +#ifdef KERNEL_32BIT +typedef uint32_t kernel_uint_t; +#define str2kernel_uint_t(string) str2uint32_t(string) +#define KERNEL_UINT_FORMAT "%u" +#else +typedef uint64_t kernel_uint_t; +#define str2kernel_uint_t(string) str2uint64_t(string) +#define KERNEL_UINT_FORMAT "%" PRIu64 +#endif + +#define str2pid_t(string) str2uint32_t(string) + + +// for faster execution, allow the compiler to inline +// these functions that are called thousands of times per second + +static inline uint32_t simple_hash(const char *name) { + unsigned char *s = (unsigned char *) name; + uint32_t hval = 0x811c9dc5; + while (*s) { + hval *= 16777619; + hval ^= (uint32_t) *s++; + } + return hval; +} + +static inline uint32_t simple_uhash(const char *name) { + unsigned char *s = (unsigned char *) name; + uint32_t hval = 0x811c9dc5, c; + while ((c = *s++)) { + if (unlikely(c >= 'A' && c <= 'Z')) c += 'a' - 'A'; + hval *= 16777619; + hval ^= c; + } + return hval; +} + +static inline int str2i(const char *s) { + int n = 0; + char c, negative = (char)(*s == '-'); + const char *e = s + 30; // max number of character to iterate + + for(c = (char)((negative)?*(++s):*s); c >= '0' && c <= '9' && s < e ; c = *(++s)) { + n *= 10; + n += c - '0'; + } + + if(unlikely(negative)) + return -n; + + return n; +} + +static inline long str2l(const char *s) { + long n = 0; + char c, negative = (*s == '-'); + const char *e = &s[30]; // max number of character to iterate + + for(c = (negative)?*(++s):*s; c >= '0' && c <= '9' && s < e ; c = *(++s)) { + n *= 10; + n += c - '0'; + } + + if(unlikely(negative)) + return -n; + + return n; +} + +static inline uint32_t str2uint32_t(const char *s) { + uint32_t n = 0; + char c; + const char *e = &s[30]; // max number of character to iterate + + for(c = *s; c >= '0' && c <= '9' && s < e ; c = *(++s)) { + n *= 10; + n += c - '0'; + } + return n; +} + +static inline uint64_t str2uint64_t(const char *s) { + uint64_t n = 0; + char c; + const char *e = &s[30]; // max number of character to iterate + + for(c = *s; c >= '0' && c <= '9' && s < e ; c = *(++s)) { + n *= 10; + n += c - '0'; + } + return n; +} + +static inline unsigned long str2ul(const char *s) { + unsigned long n = 0; + char c; + const char *e = &s[30]; // max number of character to iterate + + for(c = *s; c >= '0' && c <= '9' && s < e ; c = *(++s)) { + n *= 10; + n += c - '0'; + } + return n; +} + +static inline unsigned long long str2ull(const char *s) { + unsigned long long n = 0; + char c; + const char *e = &s[30]; // max number of character to iterate + + for(c = *s; c >= '0' && c <= '9' && s < e ; c = *(++s)) { + n *= 10; + n += c - '0'; + } + return n; +} + +static inline long long str2ll(const char *s, char **endptr) { + int negative = 0; + + if(unlikely(*s == '-')) { + s++; + negative = 1; + } + else if(unlikely(*s == '+')) + s++; + + long long n = 0; + char c; + const char *e = &s[30]; // max number of character to iterate + + for(c = *s; c >= '0' && c <= '9' && s < e ; c = *(++s)) { + n *= 10; + n += c - '0'; + } + + if(unlikely(endptr)) + *endptr = (char *)s; + + if(unlikely(negative)) + return -n; + else + return n; +} + +static inline char *strncpyz(char *dst, const char *src, size_t n) { + char *p = dst; + + while (*src && n--) + *dst++ = *src++; + + *dst = '\0'; + + return p; +} + +static inline void sanitize_json_string(char *dst, const char *src, size_t dst_size) { + while (*src != '\0' && dst_size > 1) { + if (*src < 0x1F) { + *dst++ = '_'; + src++; + dst_size--; + } + else if (*src == '\\' || *src == '\"') { + *dst++ = '\\'; + *dst++ = *src++; + dst_size -= 2; + } + else { + *dst++ = *src++; + dst_size--; + } + } + *dst = '\0'; +} + +static inline bool sanitize_command_argument_string(char *dst, const char *src, size_t dst_size) { + // skip leading dashes + while (src[0] == '-') + src++; + + // escape single quotes + while (src[0] != '\0') { + if (src[0] == '\'') { + if (dst_size < 4) + return false; + + dst[0] = '\''; dst[1] = '\\'; dst[2] = '\''; dst[3] = '\''; + + dst += 4; + dst_size -= 4; + } else { + if (dst_size < 1) + return false; + + dst[0] = src[0]; + + dst += 1; + dst_size -= 1; + } + + src++; + } + + // make sure we have space to terminate the string + if (dst_size == 0) + return false; + *dst = '\0'; + + return true; +} + +static inline int read_file(const char *filename, char *buffer, size_t size) { + if(unlikely(!size)) return 3; + + int fd = open(filename, O_RDONLY, 0666); + if(unlikely(fd == -1)) { + buffer[0] = '\0'; + return 1; + } + + ssize_t r = read(fd, buffer, size); + if(unlikely(r == -1)) { + buffer[0] = '\0'; + close(fd); + return 2; + } + buffer[r] = '\0'; + + close(fd); + return 0; +} + +static inline int read_single_number_file(const char *filename, unsigned long long *result) { + char buffer[30 + 1]; + + int ret = read_file(filename, buffer, 30); + if(unlikely(ret)) { + *result = 0; + return ret; + } + + buffer[30] = '\0'; + *result = str2ull(buffer); + return 0; +} + +static inline int read_single_signed_number_file(const char *filename, long long *result) { + char buffer[30 + 1]; + + int ret = read_file(filename, buffer, 30); + if(unlikely(ret)) { + *result = 0; + return ret; + } + + buffer[30] = '\0'; + *result = atoll(buffer); + return 0; +} + +#endif //NETDATA_INLINED_H diff --git a/libnetdata/json/Makefile.am b/libnetdata/json/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/json/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/json/README.md b/libnetdata/json/README.md new file mode 100644 index 0000000..2e04b8b --- /dev/null +++ b/libnetdata/json/README.md @@ -0,0 +1,10 @@ + + +# json + +`json` contains a parser for json strings, based on `jsmn` (), but case you have installed the JSON-C library, the installation script will prefer it, you can also force its use with `--enable-jsonc` in the compilation time. + + diff --git a/libnetdata/json/jsmn.c b/libnetdata/json/jsmn.c new file mode 100644 index 0000000..2f48bd6 --- /dev/null +++ b/libnetdata/json/jsmn.c @@ -0,0 +1,328 @@ +#include + +#include "jsmn.h" + +/** + * Alloc token + * + * Allocates a fresh unused token from the token pull. + * + * @param parser the controller + * @param tokens the tokens I am working + * @param num_tokens the number total of tokens. + * + * @return it returns the next token to work. + */ +static jsmntok_t *jsmn_alloc_token(jsmn_parser *parser, + jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *tok; + if (parser->toknext >= num_tokens) { + return NULL; + } + tok = &tokens[parser->toknext++]; + tok->start = tok->end = -1; + tok->size = 0; +#ifdef JSMN_PARENT_LINKS + tok->parent = -1; +#endif + return tok; +} + +/** + * Fill Token + * + * Fills token type and boundaries. + * + * @param token the structure to set the values + * @param type is the token type + * @param start is the first position of the value + * @param end is the end of the value + */ +static void jsmn_fill_token(jsmntok_t *token, jsmntype_t type, + int start, int end) { + token->type = type; + token->start = start; + token->end = end; + token->size = 0; +} + +/** + * Parse primitive + * + * Fills next available token with JSON primitive. + * + * @param parser is the control structure + * @param js is the json string + * @param type is the token type + */ +static jsmnerr_t jsmn_parse_primitive(jsmn_parser *parser, const char *js, + size_t len, jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *token; + int start; + + start = parser->pos; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + switch (js[parser->pos]) { +#ifndef JSMN_STRICT + /* In strict mode primitive must be followed by "," or "}" or "]" */ + case ':': +#endif + case '\t' : case '\r' : case '\n' : case ' ' : + case ',' : case ']' : case '}' : + goto found; + } + if (js[parser->pos] < 32 || js[parser->pos] >= 127) { + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } +#ifdef JSMN_STRICT + /* In strict mode primitive must be followed by a comma/object/array */ + parser->pos = start; + return JSMN_ERROR_PART; +#endif + + found: + if (tokens == NULL) { + parser->pos--; + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_PRIMITIVE, start, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + parser->pos--; + return 0; +} + +/** + * Parse string + * + * Fills next token with JSON string. + * + * @param parser is the control structure + * @param js is the json string + * @param len is the js length + * @param tokens is structure with the tokens mapped. + * @param num_tokens is the total number of tokens + * + * @return It returns 0 on success and another integer otherwise + */ +static jsmnerr_t jsmn_parse_string(jsmn_parser *parser, const char *js, + size_t len, jsmntok_t *tokens, size_t num_tokens) { + jsmntok_t *token; + + int start = parser->pos; + + parser->pos++; + + /* Skip starting quote */ + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c = js[parser->pos]; + + /* Quote: end of string */ + if (c == '\"') { + if (tokens == NULL) { + return 0; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) { + parser->pos = start; + return JSMN_ERROR_NOMEM; + } + jsmn_fill_token(token, JSMN_STRING, start+1, parser->pos); +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + return 0; + } + + /* Backslash: Quoted symbol expected */ + if (c == '\\') { + parser->pos++; + switch (js[parser->pos]) { + /* Allowed escaped symbols */ + case '\"': case '/' : case '\\' : case 'b' : + case 'f' : case 'r' : case 'n' : case 't' : + break; + /* Allows escaped symbol \uXXXX */ + case 'u': + parser->pos++; + int i = 0; + for(; i < 4 && js[parser->pos] != '\0'; i++) { + /* If it isn't a hex character we have an error */ + if(!((js[parser->pos] >= 48 && js[parser->pos] <= 57) || /* 0-9 */ + (js[parser->pos] >= 65 && js[parser->pos] <= 70) || /* A-F */ + (js[parser->pos] >= 97 && js[parser->pos] <= 102))) { /* a-f */ + parser->pos = start; + return JSMN_ERROR_INVAL; + } + parser->pos++; + } + parser->pos--; + break; + /* Unexpected symbol */ + default: + parser->pos = start; + return JSMN_ERROR_INVAL; + } + } + } + parser->pos = start; + return JSMN_ERROR_PART; +} + +/** + * JSMN Parse + * + * Parse JSON string and fill tokens. + * + * @param parser the auxiliary vector used to parser + * @param js the string to parse + * @param len the string length + * @param tokens the place to map the tokens + * @param num_tokens the number of tokens present in the tokens structure. + * + * @return It returns the number of tokens present in the string on success or a negative number otherwise + */ +jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, + jsmntok_t *tokens, unsigned int num_tokens) { + jsmnerr_t r; + int i; + jsmntok_t *token; + int count = 0; + + for (; parser->pos < len && js[parser->pos] != '\0'; parser->pos++) { + char c; + jsmntype_t type; + + c = js[parser->pos]; + switch (c) { + case '{': case '[': + count++; + if (tokens == NULL) { + break; + } + token = jsmn_alloc_token(parser, tokens, num_tokens); + if (token == NULL) + return JSMN_ERROR_NOMEM; + if (parser->toksuper != -1) { + tokens[parser->toksuper].size++; +#ifdef JSMN_PARENT_LINKS + token->parent = parser->toksuper; +#endif + } + token->type = (c == '{' ? JSMN_OBJECT : JSMN_ARRAY); + token->start = parser->pos; + parser->toksuper = parser->toknext - 1; + break; + case '}': case ']': + if (tokens == NULL) + break; + type = (c == '}' ? JSMN_OBJECT : JSMN_ARRAY); +#ifdef JSMN_PARENT_LINKS + if (parser->toknext < 1) { + return JSMN_ERROR_INVAL; + } + token = &tokens[parser->toknext - 1]; + for (;;) { + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + token->end = parser->pos + 1; + parser->toksuper = token->parent; + break; + } + if (token->parent == -1) { + break; + } + token = &tokens[token->parent]; + } +#else + for (i = parser->toknext - 1; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + if (token->type != type) { + return JSMN_ERROR_INVAL; + } + parser->toksuper = -1; + token->end = parser->pos + 1; + break; + } + } + /* Error if unmatched closing bracket */ + if (i == -1) return JSMN_ERROR_INVAL; + for (; i >= 0; i--) { + token = &tokens[i]; + if (token->start != -1 && token->end == -1) { + parser->toksuper = i; + break; + } + } +#endif + break; + case '\"': + r = jsmn_parse_string(parser, js, len, tokens, num_tokens); + if (r < 0) return r; + count++; + if (parser->toksuper != -1 && tokens != NULL) + tokens[parser->toksuper].size++; + break; + case '\t' : case '\r' : case '\n' : case ':' : case ',': case ' ': + break; +#ifdef JSMN_STRICT + /* In strict mode primitives are: numbers and booleans */ + case '-': case '0': case '1' : case '2': case '3' : case '4': + case '5': case '6': case '7' : case '8': case '9': + case 't': case 'f': case 'n' : +#else + /* In non-strict mode every unquoted value is a primitive */ + default: +#endif + r = jsmn_parse_primitive(parser, js, len, tokens, num_tokens); + if (r < 0) return r; + count++; + if (parser->toksuper != -1 && tokens != NULL) + tokens[parser->toksuper].size++; + break; + +#ifdef JSMN_STRICT + /* Unexpected char in strict mode */ + default: + return JSMN_ERROR_INVAL; +#endif + } + } + + if (tokens) { + for (i = parser->toknext - 1; i >= 0; i--) { + /* Unmatched opened object or array */ + if (tokens[i].start != -1 && tokens[i].end == -1) { + return JSMN_ERROR_PART; + } + } + } + + return count; +} + +/** + * JSMN Init + * + * Creates a new parser based over a given buffer with an array of tokens + * available. + * + * @param parser is the structure with values to reset + */ +void jsmn_init(jsmn_parser *parser) { + parser->pos = 0; + parser->toknext = 0; + parser->toksuper = -1; +} \ No newline at end of file diff --git a/libnetdata/json/jsmn.h b/libnetdata/json/jsmn.h new file mode 100644 index 0000000..beff586 --- /dev/null +++ b/libnetdata/json/jsmn.h @@ -0,0 +1,75 @@ +#ifndef __JSMN_H_ +#define __JSMN_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +/** + * JSON type identifier. Basic types are: + * o Object + * o Array + * o String + * o Other primitive: number, boolean (true/false) or null + */ +typedef enum { + JSMN_PRIMITIVE = 0, + JSMN_OBJECT = 1, + JSMN_ARRAY = 2, + JSMN_STRING = 3 +} jsmntype_t; + +typedef enum { + /* Not enough tokens were provided */ + JSMN_ERROR_NOMEM = -1, + /* Invalid character inside JSON string */ + JSMN_ERROR_INVAL = -2, + /* The string is not a full JSON packet, more bytes expected */ + JSMN_ERROR_PART = -3, +} jsmnerr_t; + +/** + * JSON token description. + * + * @param type type (object, array, string etc.) + * @param start start position in JSON data string + * @param end end position in JSON data string + */ +typedef struct { + jsmntype_t type; + int start; + int end; + int size; +#ifdef JSMN_PARENT_LINKS + int parent; +#endif +} jsmntok_t; + +/** + * JSON parser. Contains an array of token blocks available. Also stores + * the string being parsed now and current position in that string + */ +typedef struct { + unsigned int pos; /* offset in the JSON string */ + unsigned int toknext; /* next token to allocate */ + int toksuper; /* superior token node, e.g parent object or array */ +} jsmn_parser; + +/** + * Create JSON parser over an array of tokens + */ +void jsmn_init(jsmn_parser *parser); + +/** + * Run JSON parser. It parses a JSON data string into and array of tokens, each describing + * a single JSON object. + */ +jsmnerr_t jsmn_parse(jsmn_parser *parser, const char *js, size_t len, + jsmntok_t *tokens, unsigned int num_tokens); + +#ifdef __cplusplus +} +#endif + +#endif /* __JSMN_H_ */ \ No newline at end of file diff --git a/libnetdata/json/json.c b/libnetdata/json/json.c new file mode 100644 index 0000000..d5f62ed --- /dev/null +++ b/libnetdata/json/json.c @@ -0,0 +1,557 @@ +#include "jsmn.h" +#include "../libnetdata.h" +#include "json.h" +#include "libnetdata/libnetdata.h" +#include "../../health/health.h" + +#define JSON_TOKENS 1024 + +int json_tokens = JSON_TOKENS; + +/** + * Json Tokenise + * + * Map the string given inside tokens. + * + * @param js is the string used to create the tokens + * @param len is the string length + * @param count the number of tokens present in the string + * + * @return it returns the json parsed in tokens + */ +#ifdef ENABLE_JSONC +json_object *json_tokenise(char *js) { + if(!js) { + error("JSON: json string is empty."); + return NULL; + } + + json_object *token = json_tokener_parse(js); + if(!token) { + error("JSON: Invalid json string."); + return NULL; + } + + return token; +} +#else +jsmntok_t *json_tokenise(char *js, size_t len, size_t *count) +{ + int n = json_tokens; + if(!js || !len) { + error("JSON: json string is empty."); + return NULL; + } + + jsmn_parser parser; + jsmn_init(&parser); + + jsmntok_t *tokens = mallocz(sizeof(jsmntok_t) * n); + if(!tokens) return NULL; + + int ret = jsmn_parse(&parser, js, len, tokens, n); + while (ret == JSMN_ERROR_NOMEM) { + n *= 2; + jsmntok_t *new = reallocz(tokens, sizeof(jsmntok_t) * n); + if(!new) { + freez(tokens); + return NULL; + } + tokens = new; + ret = jsmn_parse(&parser, js, len, tokens, n); + } + + if (ret == JSMN_ERROR_INVAL) { + error("JSON: Invalid json string."); + freez(tokens); + return NULL; + } + else if (ret == JSMN_ERROR_PART) { + error("JSON: Truncated JSON string."); + freez(tokens); + return NULL; + } + + if(count) *count = (size_t)ret; + + if(json_tokens < n) json_tokens = n; + return tokens; +} +#endif + +/** + * Callback Print + * + * Set callback print case necessary and wrinte an information inside a buffer to write in the log. + * + * @param e a pointer for a structure that has the complete information about json structure. + * + * @return It always return 0 + */ +int json_callback_print(JSON_ENTRY *e) +{ + BUFFER *wb=buffer_create(300); + + buffer_sprintf(wb,"%s = ", e->name); + char txt[50]; + switch(e->type) { + case JSON_OBJECT: + e->callback_function = json_callback_print; + buffer_strcat(wb,"OBJECT"); + break; + + case JSON_ARRAY: + e->callback_function = json_callback_print; + sprintf(txt,"ARRAY[%lu]", (long unsigned int) e->data.items); + buffer_strcat(wb, txt); + break; + + case JSON_STRING: + buffer_strcat(wb, e->data.string); + break; + + case JSON_NUMBER: + sprintf(txt, NETDATA_DOUBLE_FORMAT_AUTO, e->data.number); + buffer_strcat(wb,txt); + + break; + + case JSON_BOOLEAN: + buffer_strcat(wb, e->data.boolean?"TRUE":"FALSE"); + break; + + case JSON_NULL: + buffer_strcat(wb,"NULL"); + break; + } + info("JSON: %s", buffer_tostring(wb)); + buffer_free(wb); + return 0; +} + +/** + * JSONC Set String + * + * Set the string value of the structure JSON_ENTRY. + * + * @param e the output structure + */ +static inline void json_jsonc_set_string(JSON_ENTRY *e,char *key,const char *value) { + size_t len = strlen(key); + if(len > JSON_NAME_LEN) + len = JSON_NAME_LEN; + e->type = JSON_STRING; + memcpy(e->name,key,len); + e->name[len] = 0x00; + e->data.string = (char *) value; +} + + +#ifdef ENABLE_JSONC +/** + * JSONC set Boolean + * + * Set the boolean value of the structure JSON_ENTRY + * + * @param e the output structure + * @param value the input value + */ +static inline void json_jsonc_set_boolean(JSON_ENTRY *e,int value) { + e->type = JSON_BOOLEAN; + e->data.boolean = value; +} + +static inline void json_jsonc_set_integer(JSON_ENTRY *e, char *key, int64_t value) { + size_t len = strlen(key); + if(len > JSON_NAME_LEN) + len = JSON_NAME_LEN; + e->type = JSON_NUMBER; + memcpy(e->name, key, len); + e->name[len] = 0; + e->data.number = (NETDATA_DOUBLE)value; +} + +/** + * Parse Array + * + * Parse the array object. + * + * @param ptr the pointer for the object that we will parse. + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + */ +static inline void json_jsonc_parse_array(json_object *ptr, void *callback_data,int (*callback_function)(struct json_entry *)) { + int end = json_object_array_length(ptr); + JSON_ENTRY e; + + if(end) { + int i; + i = 0; + + enum json_type type; + do { + json_object *jvalue = json_object_array_get_idx(ptr, i); + if(jvalue) { + e.callback_data = callback_data; + e.type = JSON_OBJECT; + callback_function(&e); + json_object_object_foreach(jvalue, key, val) { + type = json_object_get_type(val); + if (type == json_type_array) { + e.type = JSON_ARRAY; + json_jsonc_parse_array(val, callback_data, callback_function); + } else if (type == json_type_object) { + json_walk(val,callback_data,callback_function); + } else if (type == json_type_string) { + json_jsonc_set_string(&e,key,json_object_get_string(val)); + callback_function(&e); + } else if (type == json_type_boolean) { + json_jsonc_set_boolean(&e,json_object_get_boolean(val)); + callback_function(&e); + } + } + } + + } while (++i < end); + } +} +#else + +/** + * Walk string + * + * Set JSON_ENTRY to string and map the values from jsmntok_t. + * + * @param js the original string + * @param t the tokens + * @param start the first position + * @param e the output structure. + * + * @return It always return 1 + */ +size_t json_walk_string(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e) +{ + char old = js[t[start].end]; + js[t[start].end] = '\0'; + e->original_string = &js[t[start].start]; + + e->type = JSON_STRING; + e->data.string = e->original_string; + if(e->callback_function) e->callback_function(e); + js[t[start].end] = old; + return 1; +} + +/** + * Walk Primitive + * + * Define the data type of the string + * + * @param js the original string + * @param t the tokens + * @param start the first position + * @param e the output structure. + * + * @return It always return 1 + */ +size_t json_walk_primitive(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e) +{ + char old = js[t[start].end]; + js[t[start].end] = '\0'; + e->original_string = &js[t[start].start]; + + switch(e->original_string[0]) { + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': + case '8': case '9': case '-': case '.': + e->type = JSON_NUMBER; + e->data.number = strtold(e->original_string, NULL); + break; + + case 't': case 'T': + e->type = JSON_BOOLEAN; + e->data.boolean = 1; + break; + + case 'f': case 'F': + e->type = JSON_BOOLEAN; + e->data.boolean = 0; + break; + + case 'n': case 'N': + default: + e->type = JSON_NULL; + break; + } + if(e->callback_function) e->callback_function(e); + js[t[start].end] = old; + return 1; +} + +/** + * Array + * + * Measure the array length + * + * @param js the original string + * @param t the tokens + * @param nest the length of structure t + * @param start the first position + * @param e the structure with values and callback to be used inside the function. + * + * @return It returns the array length + */ +size_t json_walk_array(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e) +{ + JSON_ENTRY ne; + + char old = js[t[start].end]; + js[t[start].end] = '\0'; + ne.original_string = &js[t[start].start]; + + memcpy(&ne, e, sizeof(JSON_ENTRY)); + ne.type = JSON_ARRAY; + ne.data.items = t[start].size; + ne.callback_function = e->callback_function; + ne.name[0]='\0'; + ne.fullname[0]='\0'; + if(e->callback_function) e->callback_function(&ne); + js[t[start].end] = old; + + size_t i, init = start, size = t[start].size; + + start++; + for(i = 0; i < size ; i++) { + ne.pos = i; + if (strlen(e->name) > JSON_NAME_LEN - 24 || strlen(e->fullname) > JSON_FULLNAME_LEN -24) { + info("JSON: JSON walk_array ignoring element with name:%s fullname:%s",e->name, e->fullname); + continue; + } + snprintfz(ne.name, JSON_NAME_LEN, "%s[%lu]", e->name, i); + snprintfz(ne.fullname, JSON_FULLNAME_LEN, "%s[%lu]", e->fullname, i); + + switch(t[start].type) { + case JSMN_PRIMITIVE: + start += json_walk_primitive(js, t, start, &ne); + break; + + case JSMN_OBJECT: + start += json_walk_object(js, t, nest + 1, start, &ne); + break; + + case JSMN_ARRAY: + start += json_walk_array(js, t, nest + 1, start, &ne); + break; + + case JSMN_STRING: + start += json_walk_string(js, t, start, &ne); + break; + } + } + return start - init; +} + +/** + * Object + * + * Measure the Object length + * + * @param js the original string + * @param t the tokens + * @param nest the length of structure t + * @param start the first position + * @param e the output structure. + * + * @return It returns the Object length + */ +size_t json_walk_object(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e) +{ + JSON_ENTRY ne = { + .name = "", + .fullname = "", + .callback_data = NULL, + .callback_function = NULL + }; + + char old = js[t[start].end]; + js[t[start].end] = '\0'; + ne.original_string = &js[t[start].start]; + memcpy(&ne, e, sizeof(JSON_ENTRY)); + ne.type = JSON_OBJECT; + ne.callback_function = e->callback_function; + if(e->callback_function) e->callback_function(&ne); + js[t[start].end] = old; + + int key = 1; + size_t i, init = start, size = t[start].size; + + start++; + for(i = 0; i < size ; i++) { + switch(t[start].type) { + case JSMN_PRIMITIVE: + start += json_walk_primitive(js, t, start, &ne); + key = 1; + break; + + case JSMN_OBJECT: + start += json_walk_object(js, t, nest + 1, start, &ne); + key = 1; + break; + + case JSMN_ARRAY: + start += json_walk_array(js, t, nest + 1, start, &ne); + key = 1; + break; + + case JSMN_STRING: + default: + if(key) { + int len = t[start].end - t[start].start; + if (unlikely(len>JSON_NAME_LEN)) len=JSON_NAME_LEN; + strncpy(ne.name, &js[t[start].start], len); + ne.name[len] = '\0'; + len=strlen(e->fullname) + strlen(e->fullname[0]?".":"") + strlen(ne.name); + char *c = mallocz((len+1)*sizeof(char)); + sprintf(c,"%s%s%s", e->fullname, e->fullname[0]?".":"", ne.name); + if (unlikely(len>JSON_FULLNAME_LEN)) len=JSON_FULLNAME_LEN; + strncpy(ne.fullname, c, len); + freez(c); + start++; + key = 0; + } + else { + start += json_walk_string(js, t, start, &ne); + key = 1; + } + break; + } + } + return start - init; +} +#endif + +/** + * Tree + * + * Call the correct walk function according its type. + * + * @param t the json object to work + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + * + * @return It always return 1 + */ +#ifdef ENABLE_JSONC +size_t json_walk(json_object *t, void *callback_data, int (*callback_function)(struct json_entry *)) { + JSON_ENTRY e; + + e.callback_data = callback_data; + enum json_type type; + json_object_object_foreach(t, key, val) { + type = json_object_get_type(val); + if (type == json_type_array) { + e.type = JSON_ARRAY; + json_jsonc_parse_array(val,NULL,health_silencers_json_read_callback); + } else if (type == json_type_object) { + e.type = JSON_OBJECT; + } else if (type == json_type_string) { + json_jsonc_set_string(&e,key,json_object_get_string(val)); + callback_function(&e); + } else if (type == json_type_boolean) { + json_jsonc_set_boolean(&e,json_object_get_boolean(val)); + callback_function(&e); + } else if (type == json_type_int) { + json_jsonc_set_integer(&e,key,json_object_get_int64(val)); + callback_function(&e); + } + } + + return 1; +} +#else +/** + * Tree + * + * Call the correct walk function according its type. + * + * @param js the original string + * @param t the tokens + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + * + * @return It always return 1 + */ +size_t json_walk_tree(char *js, jsmntok_t *t, void *callback_data, int (*callback_function)(struct json_entry *)) +{ + JSON_ENTRY e = { + .name = "", + .fullname = "", + .callback_data = callback_data, + .callback_function = callback_function + }; + + switch (t[0].type) { + case JSMN_OBJECT: + e.type = JSON_OBJECT; + json_walk_object(js, t, 0, 0, &e); + break; + + case JSMN_ARRAY: + e.type = JSON_ARRAY; + json_walk_array(js, t, 0, 0, &e); + break; + + case JSMN_PRIMITIVE: + case JSMN_STRING: + break; + } + + return 1; +} +#endif + +/** + * JSON Parse + * + * Parse the json message with the callback function + * + * @param js the string that the callback function will parse + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + * + * @return JSON_OK case everything happened as expected, JSON_CANNOT_PARSE case there were errors in the + * parsing process and JSON_CANNOT_DOWNLOAD case the string given(js) is NULL. + */ +int json_parse(char *js, void *callback_data, int (*callback_function)(JSON_ENTRY *)) +{ + if(js) { +#ifdef ENABLE_JSONC + json_object *tokens = json_tokenise(js); +#else + size_t count; + jsmntok_t *tokens = json_tokenise(js, strlen(js), &count); +#endif + + if(tokens) { +#ifdef ENABLE_JSONC + json_walk(tokens, callback_data, callback_function); + json_object_put(tokens); +#else + json_walk_tree(js, tokens, callback_data, callback_function); + freez(tokens); +#endif + return JSON_OK; + } + + return JSON_CANNOT_PARSE; + } + + return JSON_CANNOT_DOWNLOAD; +} + +/* +int json_test(char *str) +{ + return json_parse(str, NULL, json_callback_print); +} + */ + diff --git a/libnetdata/json/json.h b/libnetdata/json/json.h new file mode 100644 index 0000000..b43f06b --- /dev/null +++ b/libnetdata/json/json.h @@ -0,0 +1,77 @@ +#ifndef CHECKIN_JSON_H +#define CHECKIN_JSON_H 1 + + +#if ENABLE_JSONC +#include +// fix an older json-c bug +// https://github.com/json-c/json-c/issues/135 +#ifdef error_description +#undef error_description +#endif // error_description +#endif // ENABLE_JSONC + +#include "jsmn.h" + +//https://www.ibm.com/support/knowledgecenter/en/SS9H2Y_7.6.0/com.ibm.dp.doc/json_parserlimits.html +#define JSON_NAME_LEN 256 +#define JSON_FULLNAME_LEN 1024 + +typedef enum { + JSON_OBJECT = 0, + JSON_ARRAY = 1, + JSON_STRING = 2, + JSON_NUMBER = 3, + JSON_BOOLEAN = 4, + JSON_NULL = 5, +} JSON_ENTRY_TYPE; + +typedef struct json_entry { + JSON_ENTRY_TYPE type; + char name[JSON_NAME_LEN + 1]; + char fullname[JSON_FULLNAME_LEN + 1]; + union { + char *string; // type == JSON_STRING + NETDATA_DOUBLE number; // type == JSON_NUMBER + int boolean; // type == JSON_BOOLEAN + size_t items; // type == JSON_ARRAY + } data; + size_t pos; // the position of this item in its parent + + char *original_string; + + void *callback_data; + int (*callback_function)(struct json_entry *); +} JSON_ENTRY; + +// ---------------------------------------------------------------------------- +// public functions + +#define JSON_OK 0 +#define JSON_CANNOT_DOWNLOAD 1 +#define JSON_CANNOT_PARSE 2 + +int json_parse(char *js, void *callback_data, int (*callback_function)(JSON_ENTRY *)); + + +// ---------------------------------------------------------------------------- +// private functions + +#ifdef ENABLE_JSONC +json_object *json_tokenise(char *js); +size_t json_walk(json_object *t, void *callback_data, int (*callback_function)(struct json_entry *)); +#else +jsmntok_t *json_tokenise(char *js, size_t len, size_t *count); +size_t json_walk_tree(char *js, jsmntok_t *t, void *callback_data, int (*callback_function)(struct json_entry *)); +#endif + +size_t json_walk_object(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e); +size_t json_walk_array(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e); +size_t json_walk_string(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e); +size_t json_walk_primitive(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e); + +int json_callback_print(JSON_ENTRY *e); + + + +#endif \ No newline at end of file diff --git a/libnetdata/libjudy/src/Judy.h b/libnetdata/libjudy/src/Judy.h new file mode 100644 index 0000000..adfb5b5 --- /dev/null +++ b/libnetdata/libjudy/src/Judy.h @@ -0,0 +1,622 @@ +#ifndef _JUDY_INCLUDED +#define _JUDY_INCLUDED +// _________________ +// +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.52 $ $Source: /judy/src/Judy.h $ +// +// HEADER FILE FOR EXPORTED FEATURES IN JUDY LIBRARY, libJudy.* +// +// See the manual entries for details. +// +// Note: This header file uses old-style comments on #-directive lines and +// avoids "()" on macro names in comments for compatibility with older cc -Aa +// and some tools on some platforms. + + +// PLATFORM-SPECIFIC + +#ifdef JU_WIN /* =============================================== */ + +typedef __int8 int8_t; +typedef __int16 int16_t; +typedef __int32 int32_t; +typedef __int64 int64_t; + +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +typedef unsigned __int64 uint64_t; + +#else /* ================ ! JU_WIN ============================= */ + +// ISO C99: 7.8 Format conversion of integer types +#include /* if this FAILS, try #include */ + +// ISO C99: 7.18 Integer types uint*_t +//#include + +#endif /* ================ ! JU_WIN ============================= */ + +// ISO C99 Standard: 7.20 General utilities +#include + +// ISO C99 Standard: 7.10/5.2.4.2.1 Sizes of integer types +#include + +#ifdef __cplusplus /* support use by C++ code */ +extern "C" { +#endif + + +// **************************************************************************** +// DECLARE SOME BASE TYPES IN CASE THEY ARE MISSING: +// +// These base types include "const" where appropriate, but only where of +// interest to the caller. For example, a caller cares that a variable passed +// by reference will not be modified, such as, "const void * Pindex", but not +// that the called function internally does not modify the pointer itself, such +// as, "void * const Pindex". +// +// Note that its OK to pass a Pvoid_t to a Pcvoid_t; the latter is the same, +// only constant. Callers need to do this so they can also pass & Pvoid_t to +// PPvoid_t (non-constant). + +#ifndef _PCVOID_T +#define _PCVOID_T +typedef const void * Pcvoid_t; +#endif + +#ifndef _PVOID_T +#define _PVOID_T +typedef void * Pvoid_t; +typedef void ** PPvoid_t; +#endif + +#ifndef _WORD_T +#define _WORD_T +typedef unsigned long Word_t, * PWord_t; // expect 32-bit or 64-bit words. +#endif + +#ifndef NULL +#define NULL 0 +#endif + + +// **************************************************************************** +// SUPPORT FOR ERROR HANDLING: +// +// Judy error numbers: +// +// Note: These are an enum so theres a related typedef, but the numbers are +// spelled out so you can map a number back to its name. + +typedef enum // uint8_t -- but C does not support this type of enum. +{ + +// Note: JU_ERRNO_NONE and JU_ERRNO_FULL are not real errors. They specify +// conditions which are otherwise impossible return values from 32-bit +// Judy1Count, which has 2^32 + 1 valid returns (0..2^32) plus one error +// return. These pseudo-errors support the return values that cannot otherwise +// be unambiguously represented in a 32-bit word, and will never occur on a +// 64-bit system. + + JU_ERRNO_NONE = 0, + JU_ERRNO_FULL = 1, + JU_ERRNO_NFMAX = JU_ERRNO_FULL, + +// JU_ERRNO_NOMEM comes from malloc(3C) when Judy cannot obtain needed memory. +// The system errno value is also set to ENOMEM. This error can be recoverable +// if the calling application frees other memory. +// +// TBD: Currently there is no guarantee the Judy array has no memory leaks +// upon JU_ERRNO_NOMEM. + + JU_ERRNO_NOMEM = 2, + +// Problems with parameters from the calling program: +// +// JU_ERRNO_NULLPPARRAY means PPArray was null; perhaps PArray was passed where +// &PArray was intended. Similarly, JU_ERRNO_NULLPINDEX means PIndex was null; +// perhaps &Index was intended. Also, JU_ERRNO_NONNULLPARRAY, +// JU_ERRNO_NULLPVALUE, and JU_ERRNO_UNSORTED, all added later (hence with +// higher numbers), mean: A non-null array was passed in where a null pointer +// was required; PValue was null; and unsorted indexes were detected. + + JU_ERRNO_NULLPPARRAY = 3, // see above. + JU_ERRNO_NONNULLPARRAY = 10, // see above. + JU_ERRNO_NULLPINDEX = 4, // see above. + JU_ERRNO_NULLPVALUE = 11, // see above. + JU_ERRNO_NOTJUDY1 = 5, // PArray is not to a Judy1 array. + JU_ERRNO_NOTJUDYL = 6, // PArray is not to a JudyL array. + JU_ERRNO_NOTJUDYSL = 7, // PArray is not to a JudySL array. + JU_ERRNO_UNSORTED = 12, // see above. + +// Errors below this point are not recoverable; further tries to access the +// Judy array might result in EFAULT and a core dump: +// +// JU_ERRNO_OVERRUN occurs when Judy detects, upon reallocation, that a block +// of memory in its own freelist was modified since being freed. + + JU_ERRNO_OVERRUN = 8, + +// JU_ERRNO_CORRUPT occurs when Judy detects an impossible value in a Judy data +// structure: +// +// Note: The Judy data structure contains some redundant elements that support +// this type of checking. + + JU_ERRNO_CORRUPT = 9 + +// Warning: At least some C or C++ compilers do not tolerate a trailing comma +// above here. At least we know of one case, in aCC; see JAGad58928. + +} JU_Errno_t; + + +// Judy errno structure: +// +// WARNING: For compatibility with possible future changes, the fields of this +// struct should not be referenced directly. Instead use the macros supplied +// below. + +// This structure should be declared on the stack in a threaded process. + +typedef struct J_UDY_ERROR_STRUCT +{ + JU_Errno_t je_Errno; // one of the enums above. + int je_ErrID; // often an internal source line number. + Word_t je_reserved[4]; // for future backward compatibility. + +} JError_t, * PJError_t; + + +// Related macros: +// +// Fields from error struct: + +#define JU_ERRNO(PJError) ((PJError)->je_Errno) +#define JU_ERRID(PJError) ((PJError)->je_ErrID) + +// For checking return values from various Judy functions: +// +// Note: Define JERR as -1, not as the seemingly more portable (Word_t) +// (~0UL), to avoid a compiler "overflow in implicit constant conversion" +// warning. + +#define JERR (-1) /* functions returning int or Word_t */ +#define PJERR ((Pvoid_t) (~0UL)) /* mainly for use here, see below */ +#define PPJERR ((PPvoid_t) (~0UL)) /* functions that return PPvoid_t */ + +// Convenience macro for when detailed error information (PJError_t) is not +// desired by the caller; a purposely short name: + +#define PJE0 ((PJError_t) NULL) + + +// **************************************************************************** +// JUDY FUNCTIONS: +// +// P_JE is a shorthand for use below: + +#define P_JE PJError_t PJError + +// **************************************************************************** +// JUDY1 FUNCTIONS: + +extern int Judy1Test( Pcvoid_t PArray, Word_t Index, P_JE); +extern int Judy1Set( PPvoid_t PPArray, Word_t Index, P_JE); +extern int Judy1SetArray( PPvoid_t PPArray, Word_t Count, + const Word_t * const PIndex, + P_JE); +extern int Judy1Unset( PPvoid_t PPArray, Word_t Index, P_JE); +extern Word_t Judy1Count( Pcvoid_t PArray, Word_t Index1, + Word_t Index2, P_JE); +extern int Judy1ByCount( Pcvoid_t PArray, Word_t Count, + Word_t * PIndex, P_JE); +extern Word_t Judy1FreeArray( PPvoid_t PPArray, P_JE); +extern Word_t Judy1MemUsed( Pcvoid_t PArray); +extern Word_t Judy1MemActive( Pcvoid_t PArray); +extern int Judy1First( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int Judy1Next( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int Judy1Last( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int Judy1Prev( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int Judy1FirstEmpty( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int Judy1NextEmpty( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int Judy1LastEmpty( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int Judy1PrevEmpty( Pcvoid_t PArray, Word_t * PIndex, P_JE); + +extern PPvoid_t JudyLGet( Pcvoid_t PArray, Word_t Index, P_JE); +extern PPvoid_t JudyLIns( PPvoid_t PPArray, Word_t Index, P_JE); +extern int JudyLInsArray( PPvoid_t PPArray, Word_t Count, + const Word_t * const PIndex, + const Word_t * const PValue, + +// **************************************************************************** +// JUDYL FUNCTIONS: + P_JE); +extern int JudyLDel( PPvoid_t PPArray, Word_t Index, P_JE); +extern Word_t JudyLCount( Pcvoid_t PArray, Word_t Index1, + Word_t Index2, P_JE); +extern PPvoid_t JudyLByCount( Pcvoid_t PArray, Word_t Count, + Word_t * PIndex, P_JE); +extern Word_t JudyLFreeArray( PPvoid_t PPArray, P_JE); +extern Word_t JudyLMemUsed( Pcvoid_t PArray); +extern Word_t JudyLMemActive( Pcvoid_t PArray); +extern PPvoid_t JudyLFirst( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern PPvoid_t JudyLNext( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern PPvoid_t JudyLLast( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern PPvoid_t JudyLPrev( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int JudyLFirstEmpty( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int JudyLNextEmpty( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int JudyLLastEmpty( Pcvoid_t PArray, Word_t * PIndex, P_JE); +extern int JudyLPrevEmpty( Pcvoid_t PArray, Word_t * PIndex, P_JE); + +// **************************************************************************** +// JUDYSL FUNCTIONS: + +extern PPvoid_t JudySLGet( Pcvoid_t, const uint8_t * Index, P_JE); +extern PPvoid_t JudySLIns( PPvoid_t, const uint8_t * Index, P_JE); +extern int JudySLDel( PPvoid_t, const uint8_t * Index, P_JE); +extern Word_t JudySLFreeArray( PPvoid_t, P_JE); +extern PPvoid_t JudySLFirst( Pcvoid_t, uint8_t * Index, P_JE); +extern PPvoid_t JudySLNext( Pcvoid_t, uint8_t * Index, P_JE); +extern PPvoid_t JudySLLast( Pcvoid_t, uint8_t * Index, P_JE); +extern PPvoid_t JudySLPrev( Pcvoid_t, uint8_t * Index, P_JE); + +// **************************************************************************** +// JUDYHSL FUNCTIONS: + +extern PPvoid_t JudyHSGet( Pcvoid_t, void *, Word_t); +extern PPvoid_t JudyHSIns( PPvoid_t, void *, Word_t, P_JE); +extern int JudyHSDel( PPvoid_t, void *, Word_t, P_JE); +extern Word_t JudyHSFreeArray( PPvoid_t, P_JE); + +extern const char *Judy1MallocSizes; +extern const char *JudyLMallocSizes; + +// **************************************************************************** +// JUDY memory interface to malloc() FUNCTIONS: + +extern Word_t JudyMalloc(Word_t); // words reqd => words allocd. +extern Word_t JudyMallocVirtual(Word_t); // words reqd => words allocd. +extern void JudyFree(Pvoid_t, Word_t); // free, size in words. +extern void JudyFreeVirtual(Pvoid_t, Word_t); // free, size in words. + +#define JLAP_INVALID 0x1 /* flag to mark pointer "not a Judy array" */ + +// **************************************************************************** +// MACRO EQUIVALENTS FOR JUDY FUNCTIONS: +// +// The following macros, such as J1T, are shorthands for calling Judy functions +// with parameter address-of and detailed error checking included. Since they +// are macros, the error checking code is replicated each time the macro is +// used, but it runs fast in the normal case of no error. +// +// If the caller does not like the way the default JUDYERROR macro handles +// errors (such as an exit(1) call when out of memory), they may define their +// own before the "#include ". A routine such as HandleJudyError +// could do checking on specific error numbers and print a different message +// dependent on the error. The following is one example: +// +// Note: the back-slashes are removed because some compilers will not accept +// them in comments. +// +// void HandleJudyError(uint8_t *, int, uint8_t *, int, int); +// #define JUDYERROR(CallerFile, CallerLine, JudyFunc, JudyErrno, JudyErrID) +// { +// HandleJudyError(CallerFile, CallerLine, JudyFunc, JudyErrno, JudyErrID); +// } +// +// The routine HandleJudyError could do checking on specific error numbers and +// print a different message dependent on the error. +// +// The macro receives five parameters that are: +// +// 1. CallerFile: Source filename where a Judy call returned a serious error. +// 2. CallerLine: Line number in that source file. +// 3. JudyFunc: Name of Judy function reporting the error. +// 4. JudyErrno: One of the JU_ERRNO* values enumerated above. +// 5. JudyErrID: The je_ErrID field described above. + +#ifndef JUDYERROR_NOTEST +#ifndef JUDYERROR /* supply a default error macro */ +#include + +#define JUDYERROR(CallerFile, CallerLine, JudyFunc, JudyErrno, JudyErrID) \ + { \ + (void) fprintf(stderr, "File '%s', line %d: %s(), " \ + "JU_ERRNO_* == %d, ID == %d\n", \ + CallerFile, CallerLine, \ + JudyFunc, JudyErrno, JudyErrID); \ + exit(1); \ + } + +#endif /* JUDYERROR */ +#endif /* JUDYERROR_NOTEST */ + +// If the JUDYERROR macro is not desired at all, then the following eliminates +// it. However, the return code from each Judy function (that is, the first +// parameter of each macro) must be checked by the caller to assure that an +// error did not occur. +// +// Example: +// +// #define JUDYERROR_NOTEST 1 +// #include +// +// or use this cc option at compile time: +// +// cc -DJUDYERROR_NOTEST ... +// +// Example code: +// +// J1S(Rc, PArray, Index); +// if (Rc == JERR) goto ...error +// +// or: +// +// JLI(PValue, PArray, Index); +// if (PValue == PJERR) goto ...error + + +// Internal shorthand macros for writing the J1S, etc. macros: + +#ifdef JUDYERROR_NOTEST /* ============================================ */ + +// "Judy Set Error": + +#define J_SE(FuncName,Errno) ((void) 0) + +// Note: In each J_*() case below, the digit is the number of key parameters +// to the Judy*() call. Just assign the Func result to the callers Rc value +// without a cast because none is required, and this keeps the API simpler. +// However, a family of different J_*() macros is needed to support the +// different numbers of key parameters (0,1,2) and the Func return type. +// +// In the names below, "I" = integer result; "P" = pointer result. Note, the +// Funcs for J_*P() return PPvoid_t, but cast this to a Pvoid_t for flexible, +// error-free assignment, and then compare to PJERR. + +#define J_0I(Rc,PArray,Func,FuncName) \ + { (Rc) = Func(PArray, PJE0); } + +#define J_1I(Rc,PArray,Index,Func,FuncName) \ + { (Rc) = Func(PArray, Index, PJE0); } + +#define J_1P(PV,PArray,Index,Func,FuncName) \ + { (PV) = (Pvoid_t) Func(PArray, Index, PJE0); } + +#define J_2I(Rc,PArray,Index,Arg2,Func,FuncName) \ + { (Rc) = Func(PArray, Index, Arg2, PJE0); } + +#define J_2C(Rc,PArray,Index1,Index2,Func,FuncName) \ + { (Rc) = Func(PArray, Index1, Index2, PJE0); } + +#define J_2P(PV,PArray,Index,Arg2,Func,FuncName) \ + { (PV) = (Pvoid_t) Func(PArray, Index, Arg2, PJE0); } + +// Variations for Judy*Set/InsArray functions: + +#define J_2AI(Rc,PArray,Count,PIndex,Func,FuncName) \ + { (Rc) = Func(PArray, Count, PIndex, PJE0); } +#define J_3AI(Rc,PArray,Count,PIndex,PValue,Func,FuncName) \ + { (Rc) = Func(PArray, Count, PIndex, PValue, PJE0); } + +#else /* ================ ! JUDYERROR_NOTEST ============================= */ + +#define J_E(FuncName,PJE) \ + JUDYERROR(__FILE__, __LINE__, FuncName, JU_ERRNO(PJE), JU_ERRID(PJE)) + +#define J_SE(FuncName,Errno) \ + { \ + JError_t J_Error; \ + JU_ERRNO(&J_Error) = (Errno); \ + JU_ERRID(&J_Error) = __LINE__; \ + J_E(FuncName, &J_Error); \ + } + +// Note: In each J_*() case below, the digit is the number of key parameters +// to the Judy*() call. Just assign the Func result to the callers Rc value +// without a cast because none is required, and this keeps the API simpler. +// However, a family of different J_*() macros is needed to support the +// different numbers of key parameters (0,1,2) and the Func return type. +// +// In the names below, "I" = integer result; "P" = pointer result. Note, the +// Funcs for J_*P() return PPvoid_t, but cast this to a Pvoid_t for flexible, +// error-free assignment, and then compare to PJERR. + +#define J_0I(Rc,PArray,Func,FuncName) \ + { \ + JError_t J_Error; \ + if (((Rc) = Func(PArray, &J_Error)) == JERR) \ + J_E(FuncName, &J_Error); \ + } + +#define J_1I(Rc,PArray,Index,Func,FuncName) \ + { \ + JError_t J_Error; \ + if (((Rc) = Func(PArray, Index, &J_Error)) == JERR) \ + J_E(FuncName, &J_Error); \ + } + +#define J_1P(Rc,PArray,Index,Func,FuncName) \ + { \ + JError_t J_Error; \ + if (((Rc) = (Pvoid_t) Func(PArray, Index, &J_Error)) == PJERR) \ + J_E(FuncName, &J_Error); \ + } + +#define J_2I(Rc,PArray,Index,Arg2,Func,FuncName) \ + { \ + JError_t J_Error; \ + if (((Rc) = Func(PArray, Index, Arg2, &J_Error)) == JERR) \ + J_E(FuncName, &J_Error); \ + } + +// Variation for Judy*Count functions, which return 0, not JERR, for error (and +// also for other non-error cases): +// +// Note: JU_ERRNO_NFMAX should only apply to 32-bit Judy1, but this header +// file lacks the necessary ifdefs to make it go away otherwise, so always +// check against it. + +#define J_2C(Rc,PArray,Index1,Index2,Func,FuncName) \ + { \ + JError_t J_Error; \ + if ((((Rc) = Func(PArray, Index1, Index2, &J_Error)) == 0) \ + && (JU_ERRNO(&J_Error) > JU_ERRNO_NFMAX)) \ + { \ + J_E(FuncName, &J_Error); \ + } \ + } + +#define J_2P(PV,PArray,Index,Arg2,Func,FuncName) \ + { \ + JError_t J_Error; \ + if (((PV) = (Pvoid_t) Func(PArray, Index, Arg2, &J_Error)) \ + == PJERR) J_E(FuncName, &J_Error); \ + } + +// Variations for Judy*Set/InsArray functions: + +#define J_2AI(Rc,PArray,Count,PIndex,Func,FuncName) \ + { \ + JError_t J_Error; \ + if (((Rc) = Func(PArray, Count, PIndex, &J_Error)) == JERR) \ + J_E(FuncName, &J_Error); \ + } + +#define J_3AI(Rc,PArray,Count,PIndex,PValue,Func,FuncName) \ + { \ + JError_t J_Error; \ + if (((Rc) = Func(PArray, Count, PIndex, PValue, &J_Error)) \ + == JERR) J_E(FuncName, &J_Error); \ + } + +#endif /* ================ ! JUDYERROR_NOTEST ============================= */ + +// Some of the macros are special cases that use inlined shortcuts for speed +// with root-level leaves: + +// This is a slower version with current processors, but in the future... + +#define J1T(Rc,PArray,Index) \ + (Rc) = Judy1Test((Pvoid_t)(PArray), Index, PJE0) + +#define J1S( Rc, PArray, Index) \ + J_1I(Rc, (&(PArray)), Index, Judy1Set, "Judy1Set") +#define J1SA(Rc, PArray, Count, PIndex) \ + J_2AI(Rc,(&(PArray)), Count, PIndex, Judy1SetArray, "Judy1SetArray") +#define J1U( Rc, PArray, Index) \ + J_1I(Rc, (&(PArray)), Index, Judy1Unset, "Judy1Unset") +#define J1F( Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), Judy1First, "Judy1First") +#define J1N( Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), Judy1Next, "Judy1Next") +#define J1L( Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), Judy1Last, "Judy1Last") +#define J1P( Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), Judy1Prev, "Judy1Prev") +#define J1FE(Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), Judy1FirstEmpty, "Judy1FirstEmpty") +#define J1NE(Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), Judy1NextEmpty, "Judy1NextEmpty") +#define J1LE(Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), Judy1LastEmpty, "Judy1LastEmpty") +#define J1PE(Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), Judy1PrevEmpty, "Judy1PrevEmpty") +#define J1C( Rc, PArray, Index1, Index2) \ + J_2C(Rc, PArray, Index1, Index2, Judy1Count, "Judy1Count") +#define J1BC(Rc, PArray, Count, Index) \ + J_2I(Rc, PArray, Count, &(Index), Judy1ByCount, "Judy1ByCount") +#define J1FA(Rc, PArray) \ + J_0I(Rc, (&(PArray)), Judy1FreeArray, "Judy1FreeArray") +#define J1MU(Rc, PArray) \ + (Rc) = Judy1MemUsed(PArray) + +#define JLG(PV,PArray,Index) \ + (PV) = (Pvoid_t)JudyLGet((Pvoid_t)PArray, Index, PJE0) + +#define JLI( PV, PArray, Index) \ + J_1P(PV, (&(PArray)), Index, JudyLIns, "JudyLIns") + +#define JLIA(Rc, PArray, Count, PIndex, PValue) \ + J_3AI(Rc,(&(PArray)), Count, PIndex, PValue, JudyLInsArray, \ + "JudyLInsArray") +#define JLD( Rc, PArray, Index) \ + J_1I(Rc, (&(PArray)), Index, JudyLDel, "JudyLDel") + +#define JLF( PV, PArray, Index) \ + J_1P(PV, PArray, &(Index), JudyLFirst, "JudyLFirst") + +#define JLN( PV, PArray, Index) \ + J_1P(PV, PArray, &(Index), JudyLNext, "JudyLNext") + +#define JLL( PV, PArray, Index) \ + J_1P(PV, PArray, &(Index), JudyLLast, "JudyLLast") +#define JLP( PV, PArray, Index) \ + J_1P(PV, PArray, &(Index), JudyLPrev, "JudyLPrev") +#define JLFE(Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), JudyLFirstEmpty, "JudyLFirstEmpty") +#define JLNE(Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), JudyLNextEmpty, "JudyLNextEmpty") +#define JLLE(Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), JudyLLastEmpty, "JudyLLastEmpty") +#define JLPE(Rc, PArray, Index) \ + J_1I(Rc, PArray, &(Index), JudyLPrevEmpty, "JudyLPrevEmpty") +#define JLC( Rc, PArray, Index1, Index2) \ + J_2C(Rc, PArray, Index1, Index2, JudyLCount, "JudyLCount") +#define JLBC(PV, PArray, Count, Index) \ + J_2P(PV, PArray, Count, &(Index), JudyLByCount, "JudyLByCount") +#define JLFA(Rc, PArray) \ + J_0I(Rc, (&(PArray)), JudyLFreeArray, "JudyLFreeArray") +#define JLMU(Rc, PArray) \ + (Rc) = JudyLMemUsed(PArray) + +#define JHSI(PV, PArray, PIndex, Count) \ + J_2P(PV, (&(PArray)), PIndex, Count, JudyHSIns, "JudyHSIns") +#define JHSG(PV, PArray, PIndex, Count) \ + (PV) = (Pvoid_t) JudyHSGet(PArray, PIndex, Count) +#define JHSD(Rc, PArray, PIndex, Count) \ + J_2I(Rc, (&(PArray)), PIndex, Count, JudyHSDel, "JudyHSDel") +#define JHSFA(Rc, PArray) \ + J_0I(Rc, (&(PArray)), JudyHSFreeArray, "JudyHSFreeArray") + +#define JSLG( PV, PArray, Index) \ + J_1P( PV, PArray, Index, JudySLGet, "JudySLGet") +#define JSLI( PV, PArray, Index) \ + J_1P( PV, (&(PArray)), Index, JudySLIns, "JudySLIns") +#define JSLD( Rc, PArray, Index) \ + J_1I( Rc, (&(PArray)), Index, JudySLDel, "JudySLDel") +#define JSLF( PV, PArray, Index) \ + J_1P( PV, PArray, Index, JudySLFirst, "JudySLFirst") +#define JSLN( PV, PArray, Index) \ + J_1P( PV, PArray, Index, JudySLNext, "JudySLNext") +#define JSLL( PV, PArray, Index) \ + J_1P( PV, PArray, Index, JudySLLast, "JudySLLast") +#define JSLP( PV, PArray, Index) \ + J_1P( PV, PArray, Index, JudySLPrev, "JudySLPrev") +#define JSLFA(Rc, PArray) \ + J_0I( Rc, (&(PArray)), JudySLFreeArray, "JudySLFreeArray") + +#ifdef __cplusplus +} +#endif +#endif /* ! _JUDY_INCLUDED */ diff --git a/libnetdata/libjudy/src/JudyCommon/JudyMalloc.c b/libnetdata/libjudy/src/JudyCommon/JudyMalloc.c new file mode 100644 index 0000000..09a20e3 --- /dev/null +++ b/libnetdata/libjudy/src/JudyCommon/JudyMalloc.c @@ -0,0 +1,87 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.33 $ $Source: /judy/src/JudyCommon/JudyMalloc.c $ +// ************************************************************************ // +// JUDY - Memory Allocater // +// -by- // +// Douglas L. Baskins // +// Hewlett Packard // +// Fort Collins, Co // +// (970) 229-2027 // +// // +// ************************************************************************ // + +// JUDY INCLUDE FILES +#include "Judy.h" + +// **************************************************************************** +// J U D Y M A L L O C +// +// Allocate RAM. This is the single location in Judy code that calls +// malloc(3C). Note: JPM accounting occurs at a higher level. + +Word_t JudyMalloc( + Word_t Words) +{ + Word_t Addr; + + Addr = (Word_t) malloc(Words * sizeof(Word_t)); + return(Addr); + +} // JudyMalloc() + + +// **************************************************************************** +// J U D Y F R E E + +void JudyFree( + void * PWord, + Word_t Words) +{ + (void) Words; + free(PWord); + +} // JudyFree() + + +// **************************************************************************** +// J U D Y M A L L O C +// +// Higher-level "wrapper" for allocating objects that need not be in RAM, +// although at this time they are in fact only in RAM. Later we hope that some +// entire subtrees (at a JPM or branch) can be "virtual", so their allocations +// and frees should go through this level. + +Word_t JudyMallocVirtual( + Word_t Words) +{ + return(JudyMalloc(Words)); + +} // JudyMallocVirtual() + + +// **************************************************************************** +// J U D Y F R E E + +void JudyFreeVirtual( + void * PWord, + Word_t Words) +{ + JudyFree(PWord, Words); + +} // JudyFreeVirtual() diff --git a/libnetdata/libjudy/src/JudyCommon/JudyPrivate.h b/libnetdata/libjudy/src/JudyCommon/JudyPrivate.h new file mode 100644 index 0000000..350631f --- /dev/null +++ b/libnetdata/libjudy/src/JudyCommon/JudyPrivate.h @@ -0,0 +1,1613 @@ +#ifndef _JUDYPRIVATE_INCLUDED +#define _JUDYPRIVATE_INCLUDED +// _________________ +// +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.77 $ $Source: /judy/src/JudyCommon/JudyPrivate.h $ +// +// Header file for all Judy sources, for global but private (non-exported) +// declarations. + +#include "Judy.h" + +// **************************************************************************** +// A VERY BRIEF EXPLANATION OF A JUDY ARRAY +// +// A Judy array is, effectively, a digital tree (or Trie) with 256 element +// branches (nodes), and with "compression tricks" applied to low-population +// branches or leaves to save a lot of memory at the cost of relatively little +// CPU time or cache fills. +// +// In the actual implementation, a Judy array is level-less, and traversing the +// "tree" actually means following the states in a state machine (SM) as +// directed by the Index. A Judy array is referred to here as an "SM", rather +// than as a "tree"; having "states", rather than "levels". +// +// Each branch or leaf in the SM decodes a portion ("digit") of the original +// Index; with 256-way branches there are 8 bits per digit. There are 3 kinds +// of branches, called: Linear, Bitmap and Uncompressed, of which the first 2 +// are compressed to contain no NULL entries. +// +// An Uncompressed branch has a 1.0 cache line fill cost to decode 8 bits of +// (digit, part of an Index), but it might contain many NULL entries, and is +// therefore inefficient with memory if lightly populated. +// +// A Linear branch has a ~1.75 cache line fill cost when at maximum population. +// A Bitmap branch has ~2.0 cache line fills. Linear and Bitmap branches are +// converted to Uncompressed branches when the additional memory can be +// amortized with larger populations. Higher-state branches have higher +// priority to be converted. +// +// Linear branches can hold 28 elements (based on detailed analysis) -- thus 28 +// expanses. A Linear branch is converted to a Bitmap branch when the 29th +// expanse is required. +// +// A Bitmap branch could hold 256 expanses, but is forced to convert to an +// Uncompressed branch when 185 expanses are required. Hopefully, it is +// converted before that because of population growth (again, based on detailed +// analysis and heuristics in the code). +// +// A path through the SM terminates to a leaf when the Index (or key) +// population in the expanse below a pointer will fit into 1 or 2 cache lines +// (~31..255 Indexes). A maximum-population Leaf has ~1.5 cache line fill +// cost. +// +// Leaves are sorted arrays of Indexes, where the Index Sizes (IS) are: 0, 1, +// 8, 16, 24, 32, [40, 48, 56, 64] bits. The IS depends on the "density" +// (population/expanse) of the values in the Leaf. Zero bits are possible if +// population == expanse in the SM (that is, a full small expanse). +// +// Elements of a branches are called Judy Pointers (JPs). Each JP object +// points to the next object in the SM, plus, a JP can decode an additional +// 2[6] bytes of an Index, but at the cost of "narrowing" the expanse +// represented by the next object in the SM. A "narrow" JP (one which has +// decode bytes/digits) is a way of skipping states in the SM. +// +// Although counterintuitive, we think a Judy SM is optimal when the Leaves are +// stored at MINIMUM compression (narrowing, or use of Decode bytes). If more +// aggressive compression was used, decompression of a leaf be required to +// insert an index. Additional compression would save a little memory but not +// help performance significantly. + + +#ifdef A_PICTURE_IS_WORTH_1000_WORDS +******************************************************************************* + +JUDY 32-BIT STATE MACHINE (SM) EXAMPLE, FOR INDEX = 0x02040103 + +The Index used in this example is purposely chosen to allow small, simple +examples below; each 1-byte "digit" from the Index has a small numeric value +that fits in one column. In the drawing below: + + JRP == Judy Root Pointer; + + C == 1 byte of a 1..3 byte Population (count of Indexes) below this + pointer. Since this is shared with the Decode field, the combined + sizes must be 3[7], that is, 1 word less 1 byte for the JP Type. + + The 1-byte field jp_Type is represented as: + + 1..3 == Number of bytes in the population (Pop0) word of the Branch or Leaf + below the pointer (note: 1..7 on 64-bit); indicates: + - number of bytes in Decode field == 3 - this number; + - number of bytes remaining to decode. + Note: The maximum is 3, not 4, because the 1st byte of the Index is + always decoded digitally in the top branch. + -B- == JP points to a Branch (there are many kinds of Branches). + -L- == JP points to a Leaf (there are many kinds of Leaves). + + (2) == Digit of Index decoded by position offset in branch (really + 0..0xff). + + 4* == Digit of Index necessary for decoding a "narrow" pointer, in a + Decode field; replaces 1 missing branch (really 0..0xff). + + 4+ == Digit of Index NOT necessary for decoding a "narrow" pointer, but + used for fast traversal of the SM by Judy1Test() and JudyLGet() + (see the code) (really 0..0xff). + + 0 == Byte in a JPs Pop0 field that is always ignored, because a leaf + can never contain more than 256 Indexes (Pop0 <= 255). + + +----- == A Branch or Leaf; drawn open-ended to remind you that it could + | have up to 256 columns. + +----- + + | + | == Pointer to next Branch or Leaf. + V + + | + O == A state is skipped by using a "narrow" pointer. + | + + < 1 > == Digit (Index) shown as an example is not necessarily in the + position shown; is sorted in order with neighbor Indexes. + (Really 0..0xff.) + +Note that this example shows every possibly topology to reach a leaf in a +32-bit Judy SM, although this is a very subtle point! + + STATE or` + LEVEL + +---+ +---+ +---+ +---+ +---+ +---+ +---+ +---+ + |RJP| |RJP| |RJP| |RJP| |RJP| |RJP| |RJP| |RJP| + L---+ B---+ B---+ B---+ B---+ B---+ B---+ B---+ + | | | | | | | | + | | | | | | | | + V V (2) V (2) V (2) V (2) V (2) V (2) V (2) + +------ +------ +------ +------ +------ +------ +------ +------ +Four |< 2 > | 0 | 4* | C | 4* | 4* | C | C +byte |< 4 > | 0 | 0 | C | 1* | C | C | C 4 +Index|< 1 > | C | C | C | C | C | C | C +Leaf |< 3 > | 3 | 2 | 3 | 1 | 2 | 3 | 3 + +------ +--L--- +--L--- +--B--- +--L--- +--B--- +--B--- +--B--- + | | | | | | | + / | / | | / / + / | / | | / / + | | | | | | | + V | V (4) | | V (4) V (4) + +------ | +------ | | +------ +------ + Three |< 4 > | | 4+ | | | 4+ | 4+ + byte Index|< 1 > O | 0 O O | 1* | C 3 + Leaf |< 3 > | | C | | | C | C + +------ | | 2 | | | 1 | 2 + / +----L- | | +----L- +----B- + / | | | | | + | / | / / / + | / | / / / + | / | | / / + | / | | / / + | | | | | | + V V | V(1) | V(1) + +------ +------ | +------ | +------ + Two byte |< 1 > |< 1 > | | 4+ | | 4+ + Index Leaf |< 3 > |< 3 > O | 1+ O | 1+ 2 + +------ +------ / | C | | C + / | 1 | | 1 + | +-L---- | +-L---- + | | | | + | / | / + | | | | + V V V V + +------ +------ +------ +------ + One byte Index Leaf |< 3 > |< 3 > |< 3 > |< 3 > 1 + +------ +------ +------ +------ + + +#endif // A_PICTURE_IS_WORTH_1000_WORDS + + +// **************************************************************************** +// MISCELLANEOUS GLOBALS: +// +// PLATFORM-SPECIFIC CONVENIENCE MACROS: +// +// These are derived from context (set by cc or in system header files) or +// based on JU_ macros from make_includes/platform.*.mk. We decided +// on 011018 that any macro reliably derivable from context (cc or headers) for +// ALL platforms supported by Judy is based on that derivation, but ANY +// exception means to stop using the external macro completely and derive from +// JU_ instead. + +// Other miscellaneous stuff: + +#ifndef _BOOL_T +#define _BOOL_T +typedef int bool_t; +#endif + +#define FUNCTION // null; easy to find functions. + +#ifndef TRUE +#define TRUE 1 +#endif + +#ifndef FALSE +#define FALSE 0 +#endif + +#ifdef TRACE // turn on all other tracing in the code: +#define TRACEJP 1 // JP traversals in JudyIns.c and JudyDel.c. +#define TRACEJPR 1 // JP traversals in retrieval code, JudyGet.c. +#define TRACECF 1 // cache fills in JudyGet.c. +#define TRACEMI 1 // malloc calls in JudyMallocIF.c. +#define TRACEMF 1 // malloc calls at a lower level in JudyMalloc.c. +#endif + + +// SUPPORT FOR DEBUG-ONLY CODE: +// +// By convention, use -DDEBUG to enable both debug-only code AND assertions in +// the Judy sources. +// +// Invert the sense of assertions, so they are off unless explicitly requested, +// in a uniform way. +// +// Note: It is NOT appropriate to put this in Judy.h; it would mess up +// application code. + +#ifndef DEBUG +#define NDEBUG 1 // must be 1 for "#if". +#endif + +// Shorthand notations to avoid #ifdefs for single-line conditional statements: +// +// Warning: These cannot be used around compiler directives, such as +// "#include", nor in the case where Code contains a comma other than nested +// within parentheses or quotes. + +#ifndef DEBUG +#define DBGCODE(Code) // null. +#else +#define DBGCODE(Code) Code +#endif + +#ifdef JUDY1 +#define JUDY1CODE(Code) Code +#define JUDYLCODE(Code) // null. +#endif + +#ifdef JUDYL +#define JUDYLCODE(Code) Code +#define JUDY1CODE(Code) // null. +#endif + +#include + +// **************************************************************************** +// FUNDAMENTAL CONSTANTS FOR MACHINE +// **************************************************************************** + +// Machine (CPU) cache line size: +// +// NOTE: A leaf size of 2 cache lines maximum is the target (optimal) for +// Judy. Its hard to obtain a machines cache line size at compile time, but +// if the machine has an unexpected cache line size, its not devastating if +// the following constants end up causing leaves that are 1 cache line in size, +// or even 4 cache lines in size. The assumed 32-bit system has 16-word = +// 64-byte cache lines, and the assumed 64-bit system has 16-word = 128-byte +// cache lines. + +#ifdef JU_64BIT +#define cJU_BYTESPERCL 128 // cache line size in bytes. +#else +#define cJU_BYTESPERCL 64 // cache line size in bytes. +#endif + +// Bits Per Byte: + +#define cJU_BITSPERBYTE 0x8 + +// Bytes Per Word and Bits Per Word, latter assuming sizeof(byte) is 8 bits: +// +// Expect 32 [64] bits per word. + +#define cJU_BYTESPERWORD (sizeof(Word_t)) +#define cJU_BITSPERWORD (sizeof(Word_t) * cJU_BITSPERBYTE) + +#define JU_BYTESTOWORDS(BYTES) \ + (((BYTES) + cJU_BYTESPERWORD - 1) / cJU_BYTESPERWORD) + +// A word that is all-ones, normally equal to -1UL, but safer with ~0: + +#define cJU_ALLONES (~0UL) + +// Note, these are forward references, but thats OK: + +#define cJU_FULLBITMAPB ((BITMAPB_t) cJU_ALLONES) +#define cJU_FULLBITMAPL ((BITMAPL_t) cJU_ALLONES) + + +// **************************************************************************** +// MISCELLANEOUS JUDY-SPECIFIC DECLARATIONS +// **************************************************************************** + +// ROOT STATE: +// +// State at the start of the Judy SM, based on 1 byte decoded per state; equal +// to the number of bytes per Index to decode. + +#define cJU_ROOTSTATE (sizeof(Word_t)) + + +// SUBEXPANSES PER STATE: +// +// Number of subexpanses per state traversed, which is the number of JPs in a +// branch (actual or theoretical) and the number of bits in a bitmap. + +#define cJU_SUBEXPPERSTATE 256 + + +// LEAF AND VALUE POINTERS: +// +// Some other basic object types are in declared in JudyPrivateBranch.h +// (Pjbl_t, Pjbb_t, Pjbu_t, Pjp_t) or are Judy1/L-specific (Pjlb_t). The +// few remaining types are declared below. +// +// Note: Leaf pointers are cast to different-sized objects depending on the +// leafs level, but are at least addresses (not just numbers), so use void * +// (Pvoid_t), not PWord_t or Word_t for them, except use Pjlw_t for whole-word +// (top-level, root-level) leaves. Value areas, however, are always whole +// words. +// +// Furthermore, use Pjll_t only for generic leaf pointers (for various size +// LeafLs). Use Pjlw_t for LeafWs. Use Pleaf (with type uint8_t *, uint16_t +// *, etc) when the leaf index size is known. + +typedef PWord_t Pjlw_t; // pointer to root-level leaf (whole-word indexes). +typedef Pvoid_t Pjll_t; // pointer to lower-level linear leaf. + +#ifdef JUDYL +typedef PWord_t Pjv_t; // pointer to JudyL value area. +#endif + + +// POINTER PREPARATION MACROS: +// +// These macros are used to strip malloc-namespace-type bits from a pointer + +// malloc-type word (which references any Judy mallocd object that might be +// obtained from other than a direct call of malloc()), prior to dereferencing +// the pointer as an address. The malloc-type bits allow Judy mallocd objects +// to come from different "malloc() namespaces". +// +// (root pointer) (JRP, see above) +// jp.jp_Addr generic pointer to next-level node, except when used +// as a JudyL Immed01 value area +// JU_JBB_PJP macro hides jbbs_Pjp (pointer to JP subarray) +// JL_JLB_PVALUE macro hides jLlbs_PValue (pointer to value subarray) +// +// When setting one of these fields or passing an address to j__udyFree*(), the +// "raw" memory address is used; otherwise the memory address must be passed +// through one of the macros below before its dereferenced. +// +// Note: After much study, the typecasts below appear in the macros rather +// than at the point of use, which is both simpler and allows the compiler to +// do type-checking. + + +#define P_JLW( ADDR) ((Pjlw_t) (ADDR)) // root leaf. +#define P_JPM( ADDR) ((Pjpm_t) (ADDR)) // root JPM. +#define P_JBL( ADDR) ((Pjbl_t) (ADDR)) // BranchL. +#define P_JBB( ADDR) ((Pjbb_t) (ADDR)) // BranchB. +#define P_JBU( ADDR) ((Pjbu_t) (ADDR)) // BranchU. +#define P_JLL( ADDR) ((Pjll_t) (ADDR)) // LeafL. +#define P_JLB( ADDR) ((Pjlb_t) (ADDR)) // LeafB1. +#define P_JP( ADDR) ((Pjp_t) (ADDR)) // JP. + +#ifdef JUDYL +#define P_JV( ADDR) ((Pjv_t) (ADDR)) // &value. +#endif + + +// LEAST BYTES: +// +// Mask for least bytes of a word, and a macro to perform this mask on an +// Index. +// +// Note: This macro has been problematic in the past to get right and to make +// portable. Its not OK on all systems to shift by the full word size. This +// macro should allow shifting by 1..N bytes, where N is the word size, but +// should produce a compiler warning if the macro is called with Bytes == 0. +// +// Warning: JU_LEASTBYTESMASK() is not a constant macro unless Bytes is a +// constant; otherwise it is a variable shift, which is expensive on some +// processors. + +#define JU_LEASTBYTESMASK(BYTES) \ + ((0x100UL << (cJU_BITSPERBYTE * ((BYTES) - 1))) - 1) + +#define JU_LEASTBYTES(INDEX,BYTES) ((INDEX) & JU_LEASTBYTESMASK(BYTES)) + + +// BITS IN EACH BITMAP SUBEXPANSE FOR BITMAP BRANCH AND LEAF: +// +// The bits per bitmap subexpanse times the number of subexpanses equals a +// constant (cJU_SUBEXPPERSTATE). You can also think of this as a compile-time +// choice of "aspect ratio" for bitmap branches and leaves (which can be set +// independently for each). +// +// A default aspect ratio is hardwired here if not overridden at compile time, +// such as by "EXTCCOPTS=-DBITMAP_BRANCH16x16 make". + +#if (! (defined(BITMAP_BRANCH8x32) || defined(BITMAP_BRANCH16x16) || defined(BITMAP_BRANCH32x8))) +#define BITMAP_BRANCH32x8 1 // 32 bits per subexpanse, 8 subexpanses. +#endif + +#ifdef BITMAP_BRANCH8x32 +#define BITMAPB_t uint8_t +#endif + +#ifdef BITMAP_BRANCH16x16 +#define BITMAPB_t uint16_t +#endif + +#ifdef BITMAP_BRANCH32x8 +#define BITMAPB_t uint32_t +#endif + +// Note: For bitmap leaves, BITMAP_LEAF64x4 is only valid for 64 bit: +// +// Note: Choice of aspect ratio mostly matters for JudyL bitmap leaves. For +// Judy1 the choice doesnt matter much -- the code generated for different +// BITMAP_LEAF* values choices varies, but correctness and performance are the +// same. + +#ifndef JU_64BIT + +#if (! (defined(BITMAP_LEAF8x32) || defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8))) +#define BITMAP_LEAF32x8 // 32 bits per subexpanse, 8 subexpanses. +#endif + +#else // 32BIT + +#if (! (defined(BITMAP_LEAF8x32) || defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8) || defined(BITMAP_LEAF64x4))) +#define BITMAP_LEAF64x4 // 64 bits per subexpanse, 4 subexpanses. + +#endif +#endif // JU_64BIT + +#ifdef BITMAP_LEAF8x32 +#define BITMAPL_t uint8_t +#endif + +#ifdef BITMAP_LEAF16x16 +#define BITMAPL_t uint16_t +#endif + +#ifdef BITMAP_LEAF32x8 +#define BITMAPL_t uint32_t +#endif + +#ifdef BITMAP_LEAF64x4 +#define BITMAPL_t uint64_t +#endif + + +// EXPORTED DATA AND FUNCTIONS: + +#ifdef JUDY1 +extern const uint8_t j__1_BranchBJPPopToWords[]; +#endif + +#ifdef JUDYL +extern const uint8_t j__L_BranchBJPPopToWords[]; +#endif + +// Fast LeafL search routine used for inlined code: + +#if (! defined(SEARCH_BINARY)) || (! defined(SEARCH_LINEAR)) +// default a binary search leaf method +#define SEARCH_BINARY 1 +//#define SEARCH_LINEAR 1 +#endif + +#ifdef SEARCH_LINEAR + +#define SEARCHLEAFNATIVE(LEAFTYPE,ADDR,POP1,INDEX) \ + LEAFTYPE *P_leaf = (LEAFTYPE *)(ADDR); \ + LEAFTYPE I_ndex = (INDEX); /* with masking */ \ + if (I_ndex > P_leaf[(POP1) - 1]) return(~(POP1)); \ + while(I_ndex > *P_leaf) P_leaf++; \ + if (I_ndex == *P_leaf) return(P_leaf - (LEAFTYPE *)(ADDR)); \ + return(~(P_leaf - (LEAFTYPE *)(ADDR))); + + +#define SEARCHLEAFNONNAT(ADDR,POP1,INDEX,LFBTS,COPYINDEX) \ +{ \ + uint8_t *P_leaf, *P_leafEnd; \ + Word_t i_ndex; \ + Word_t I_ndex = JU_LEASTBYTES((INDEX), (LFBTS)); \ + Word_t p_op1; \ + \ + P_leaf = (uint8_t *)(ADDR); \ + P_leafEnd = P_leaf + ((POP1) * (LFBTS)); \ + \ + do { \ + JU_COPY3_PINDEX_TO_LONG(i_ndex, P_leaf); \ + if (I_ndex <= i_ndex) break; \ + P_leaf += (LFBTS); \ + } while (P_leaf < P_leafEnd); \ + \ + p_op1 = (P_leaf - (uint8_t *) (ADDR)) / (LFBTS); \ + if (I_ndex == i_ndex) return(p_op1); \ + return(~p_op1); \ +} +#endif // SEARCH_LINEAR + +#ifdef SEARCH_BINARY + +#define SEARCHLEAFNATIVE(LEAFTYPE,ADDR,POP1,INDEX) \ + LEAFTYPE *P_leaf = (LEAFTYPE *)(ADDR); \ + LEAFTYPE I_ndex = (LEAFTYPE)INDEX; /* truncate hi bits */ \ + Word_t l_ow = cJU_ALLONES; \ + Word_t m_id; \ + Word_t h_igh = POP1; \ + \ + while ((h_igh - l_ow) > 1UL) \ + { \ + m_id = (h_igh + l_ow) / 2; \ + if (P_leaf[m_id] > I_ndex) \ + h_igh = m_id; \ + else \ + l_ow = m_id; \ + } \ + if (l_ow == cJU_ALLONES || P_leaf[l_ow] != I_ndex) \ + return(~h_igh); \ + return(l_ow) + + +#define SEARCHLEAFNONNAT(ADDR,POP1,INDEX,LFBTS,COPYINDEX) \ + uint8_t *P_leaf = (uint8_t *)(ADDR); \ + Word_t l_ow = cJU_ALLONES; \ + Word_t m_id; \ + Word_t h_igh = POP1; \ + Word_t I_ndex = JU_LEASTBYTES((INDEX), (LFBTS)); \ + Word_t i_ndex; \ + \ + I_ndex = JU_LEASTBYTES((INDEX), (LFBTS)); \ + \ + while ((h_igh - l_ow) > 1UL) \ + { \ + m_id = (h_igh + l_ow) / 2; \ + COPYINDEX(i_ndex, &P_leaf[m_id * (LFBTS)]); \ + if (i_ndex > I_ndex) \ + h_igh = m_id; \ + else \ + l_ow = m_id; \ + } \ + if (l_ow == cJU_ALLONES) return(~h_igh); \ + \ + COPYINDEX(i_ndex, &P_leaf[l_ow * (LFBTS)]); \ + if (i_ndex != I_ndex) return(~h_igh); \ + return(l_ow) + +#endif // SEARCH_BINARY + +// Fast way to count bits set in 8..32[64]-bit int: +// +// For performance, j__udyCountBits*() are written to take advantage of +// platform-specific features where available. +// + +#ifdef JU_NOINLINE + +extern BITMAPB_t j__udyCountBitsB(BITMAPB_t word); +extern BITMAPL_t j__udyCountBitsL(BITMAPL_t word); + +// Compiler supports inline + +#elif defined(JU_HPUX_IPF) + +#define j__udyCountBitsB(WORD) _Asm_popcnt(WORD) +#define j__udyCountBitsL(WORD) _Asm_popcnt(WORD) + +#elif defined(JU_LINUX_IPF) + +static inline BITMAPB_t j__udyCountBitsB(BITMAPB_t word) +{ + BITMAPB_t result; + __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (word)); + return(result); +} + +static inline BITMAPL_t j__udyCountBitsL(BITMAPL_t word) +{ + BITMAPL_t result; + __asm__ ("popcnt %0=%1" : "=r" (result) : "r" (word)); + return(result); +} + + +#else // No instructions available, use inline code + +// **************************************************************************** +// __ J U D Y C O U N T B I T S B +// +// Return the number of bits set in "Word", for a bitmap branch. +// +// Note: Bitmap branches have maximum bitmap size = 32 bits. + +#ifdef JU_WIN +static __inline BITMAPB_t j__udyCountBitsB(BITMAPB_t word) +#else +static inline BITMAPB_t j__udyCountBitsB(BITMAPB_t word) +#endif +{ + word = (word & 0x55555555) + ((word & 0xAAAAAAAA) >> 1); + word = (word & 0x33333333) + ((word & 0xCCCCCCCC) >> 2); + word = (word & 0x0F0F0F0F) + ((word & 0xF0F0F0F0) >> 4); // >= 8 bits. +#if defined(BITMAP_BRANCH16x16) || defined(BITMAP_BRANCH32x8) + word = (word & 0x00FF00FF) + ((word & 0xFF00FF00) >> 8); // >= 16 bits. +#endif + +#ifdef BITMAP_BRANCH32x8 + word = (word & 0x0000FFFF) + ((word & 0xFFFF0000) >> 16); // >= 32 bits. +#endif + return(word); + +} // j__udyCountBitsB() + + +// **************************************************************************** +// __ J U D Y C O U N T B I T S L +// +// Return the number of bits set in "Word", for a bitmap leaf. +// +// Note: Bitmap branches have maximum bitmap size = 32 bits. + +// Note: Need both 32-bit and 64-bit versions of j__udyCountBitsL() because +// bitmap leaves can have 64-bit bitmaps. + +#ifdef JU_WIN +static __inline BITMAPL_t j__udyCountBitsL(BITMAPL_t word) +#else +static inline BITMAPL_t j__udyCountBitsL(BITMAPL_t word) +#endif +{ +#ifndef JU_64BIT + + word = (word & 0x55555555) + ((word & 0xAAAAAAAA) >> 1); + word = (word & 0x33333333) + ((word & 0xCCCCCCCC) >> 2); + word = (word & 0x0F0F0F0F) + ((word & 0xF0F0F0F0) >> 4); // >= 8 bits. +#if defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8) + word = (word & 0x00FF00FF) + ((word & 0xFF00FF00) >> 8); // >= 16 bits. +#endif +#ifdef BITMAP_LEAF32x8 + word = (word & 0x0000FFFF) + ((word & 0xFFFF0000) >> 16); // >= 32 bits. +#endif + +#else // JU_64BIT + + word = (word & 0x5555555555555555) + ((word & 0xAAAAAAAAAAAAAAAA) >> 1); + word = (word & 0x3333333333333333) + ((word & 0xCCCCCCCCCCCCCCCC) >> 2); + word = (word & 0x0F0F0F0F0F0F0F0F) + ((word & 0xF0F0F0F0F0F0F0F0) >> 4); +#if defined(BITMAP_LEAF16x16) || defined(BITMAP_LEAF32x8) || defined(BITMAP_LEAF64x4) + word = (word & 0x00FF00FF00FF00FF) + ((word & 0xFF00FF00FF00FF00) >> 8); +#endif +#if defined(BITMAP_LEAF32x8) || defined(BITMAP_LEAF64x4) + word = (word & 0x0000FFFF0000FFFF) + ((word & 0xFFFF0000FFFF0000) >>16); +#endif +#ifdef BITMAP_LEAF64x4 + word = (word & 0x00000000FFFFFFFF) + ((word & 0xFFFFFFFF00000000) >>32); +#endif +#endif // JU_64BIT + + return(word); + +} // j__udyCountBitsL() + +#endif // Compiler supports inline + +// GET POP0: +// +// Get from jp_DcdPopO the Pop0 for various JP Types. +// +// Notes: +// +// - Different macros require different parameters... +// +// - There are no simple macros for cJU_BRANCH* Types because their +// populations must be added up and dont reside in an already-calculated +// place. (TBD: This is no longer true, now its in the JPM.) +// +// - cJU_JPIMM_POP0() is not defined because it would be redundant because the +// Pop1 is already encoded in each enum name. +// +// - A linear or bitmap leaf Pop0 cannot exceed cJU_SUBEXPPERSTATE - 1 (Pop0 = +// 0..255), so use a simpler, faster macro for it than for other JP Types. +// +// - Avoid any complex calculations that would slow down the compiled code. +// Assume these macros are only called for the appropriate JP Types. +// Unfortunately theres no way to trigger an assertion here if the JP type +// is incorrect for the macro, because these are merely expressions, not +// statements. + +#define JU_LEAFW_POP0(JRP) (*P_JLW(JRP)) +#define cJU_JPFULLPOPU1_POP0 (cJU_SUBEXPPERSTATE - 1) + +// GET JP Type: +// Since bit fields greater than 32 bits are not supported in some compilers +// the jp_DcdPopO field is expanded to include the jp_Type in the high 8 bits +// of the Word_t. +// First the read macro: + +#define JU_JPTYPE(PJP) ((PJP)->jp_Type) + +#define JU_JPLEAF_POP0(PJP) ((PJP)->jp_DcdP0[sizeof(Word_t) - 2]) + +#ifdef JU_64BIT + +#define JU_JPDCDPOP0(PJP) \ + ((Word_t)(PJP)->jp_DcdP0[0] << 48 | \ + (Word_t)(PJP)->jp_DcdP0[1] << 40 | \ + (Word_t)(PJP)->jp_DcdP0[2] << 32 | \ + (Word_t)(PJP)->jp_DcdP0[3] << 24 | \ + (Word_t)(PJP)->jp_DcdP0[4] << 16 | \ + (Word_t)(PJP)->jp_DcdP0[5] << 8 | \ + (Word_t)(PJP)->jp_DcdP0[6]) + + +#define JU_JPSETADT(PJP,ADDR,DCDPOP0,TYPE) \ +{ \ + (PJP)->jp_Addr = (ADDR); \ + (PJP)->jp_DcdP0[0] = (uint8_t)((Word_t)(DCDPOP0) >> 48); \ + (PJP)->jp_DcdP0[1] = (uint8_t)((Word_t)(DCDPOP0) >> 40); \ + (PJP)->jp_DcdP0[2] = (uint8_t)((Word_t)(DCDPOP0) >> 32); \ + (PJP)->jp_DcdP0[3] = (uint8_t)((Word_t)(DCDPOP0) >> 24); \ + (PJP)->jp_DcdP0[4] = (uint8_t)((Word_t)(DCDPOP0) >> 16); \ + (PJP)->jp_DcdP0[5] = (uint8_t)((Word_t)(DCDPOP0) >> 8); \ + (PJP)->jp_DcdP0[6] = (uint8_t)((Word_t)(DCDPOP0)); \ + (PJP)->jp_Type = (TYPE); \ +} + +#else // 32 Bit + +#define JU_JPDCDPOP0(PJP) \ + ((Word_t)(PJP)->jp_DcdP0[0] << 16 | \ + (Word_t)(PJP)->jp_DcdP0[1] << 8 | \ + (Word_t)(PJP)->jp_DcdP0[2]) + + +#define JU_JPSETADT(PJP,ADDR,DCDPOP0,TYPE) \ +{ \ + (PJP)->jp_Addr = (ADDR); \ + (PJP)->jp_DcdP0[0] = (uint8_t)((Word_t)(DCDPOP0) >> 16); \ + (PJP)->jp_DcdP0[1] = (uint8_t)((Word_t)(DCDPOP0) >> 8); \ + (PJP)->jp_DcdP0[2] = (uint8_t)((Word_t)(DCDPOP0)); \ + (PJP)->jp_Type = (TYPE); \ +} + +#endif // 32 Bit + +// NUMBER OF BITS IN A BRANCH OR LEAF BITMAP AND SUBEXPANSE: +// +// Note: cJU_BITSPERBITMAP must be the same as the number of JPs in a branch. + +#define cJU_BITSPERBITMAP cJU_SUBEXPPERSTATE + +// Bitmaps are accessed in units of "subexpanses": + +#define cJU_BITSPERSUBEXPB (sizeof(BITMAPB_t) * cJU_BITSPERBYTE) +#define cJU_NUMSUBEXPB (cJU_BITSPERBITMAP / cJU_BITSPERSUBEXPB) + +#define cJU_BITSPERSUBEXPL (sizeof(BITMAPL_t) * cJU_BITSPERBYTE) +#define cJU_NUMSUBEXPL (cJU_BITSPERBITMAP / cJU_BITSPERSUBEXPL) + + +// MASK FOR A SPECIFIED BIT IN A BITMAP: +// +// Warning: If BitNum is a variable, this results in a variable shift that is +// expensive, at least on some processors. Use with caution. +// +// Warning: BitNum must be less than cJU_BITSPERWORD, that is, 0 .. +// cJU_BITSPERWORD - 1, to avoid a truncated shift on some machines. +// +// TBD: Perhaps use an array[32] of masks instead of calculating them. + +#define JU_BITPOSMASKB(BITNUM) (1L << ((BITNUM) % cJU_BITSPERSUBEXPB)) +#define JU_BITPOSMASKL(BITNUM) (1L << ((BITNUM) % cJU_BITSPERSUBEXPL)) + + +// TEST/SET/CLEAR A BIT IN A BITMAP LEAF: +// +// Test if a byte-sized Digit (portion of Index) has a corresponding bit set in +// a bitmap, or set a byte-sized Digits bit into a bitmap, by looking up the +// correct subexpanse and then checking/setting the correct bit. +// +// Note: Mask higher bits, if any, for the convenience of the user of this +// macro, in case they pass a full Index, not just a digit. If the caller has +// a true 8-bit digit, make it of type uint8_t and the compiler should skip the +// unnecessary mask step. + +#define JU_SUBEXPL(DIGIT) (((DIGIT) / cJU_BITSPERSUBEXPL) & (cJU_NUMSUBEXPL-1)) + +#define JU_BITMAPTESTL(PJLB, INDEX) \ + (JU_JLB_BITMAP(PJLB, JU_SUBEXPL(INDEX)) & JU_BITPOSMASKL(INDEX)) + +#define JU_BITMAPSETL(PJLB, INDEX) \ + (JU_JLB_BITMAP(PJLB, JU_SUBEXPL(INDEX)) |= JU_BITPOSMASKL(INDEX)) + +#define JU_BITMAPCLEARL(PJLB, INDEX) \ + (JU_JLB_BITMAP(PJLB, JU_SUBEXPL(INDEX)) ^= JU_BITPOSMASKL(INDEX)) + + +// MAP BITMAP BIT OFFSET TO DIGIT: +// +// Given a digit variable to set, a bitmap branch or leaf subexpanse (base 0), +// the bitmap (BITMAP*_t) for that subexpanse, and an offset (Nth set bit in +// the bitmap, base 0), compute the digit (also base 0) corresponding to the +// subexpanse and offset by counting all bits in the bitmap until offset+1 set +// bits are seen. Avoid expensive variable shifts. Offset should be less than +// the number of set bits in the bitmap; assert this. +// +// If theres a better way to do this, I dont know what it is. + +#define JU_BITMAPDIGITB(DIGIT,SUBEXP,BITMAP,OFFSET) \ + { \ + BITMAPB_t bitmap = (BITMAP); int remain = (OFFSET); \ + (DIGIT) = (SUBEXP) * cJU_BITSPERSUBEXPB; \ + \ + while ((remain -= (bitmap & 1)) >= 0) \ + { \ + bitmap >>= 1; ++(DIGIT); \ + assert((DIGIT) < ((SUBEXP) + 1) * cJU_BITSPERSUBEXPB); \ + } \ + } + +#define JU_BITMAPDIGITL(DIGIT,SUBEXP,BITMAP,OFFSET) \ + { \ + BITMAPL_t bitmap = (BITMAP); int remain = (OFFSET); \ + (DIGIT) = (SUBEXP) * cJU_BITSPERSUBEXPL; \ + \ + while ((remain -= (bitmap & 1)) >= 0) \ + { \ + bitmap >>= 1; ++(DIGIT); \ + assert((DIGIT) < ((SUBEXP) + 1) * cJU_BITSPERSUBEXPL); \ + } \ + } + + +// MASKS FOR PORTIONS OF 32-BIT WORDS: +// +// These are useful for bitmap subexpanses. +// +// "LOWER"/"HIGHER" means bits representing lower/higher-valued Indexes. The +// exact order of bits in the word is explicit here but is hidden from the +// caller. +// +// "EXC" means exclusive of the specified bit; "INC" means inclusive. +// +// In each case, BitPos is either "JU_BITPOSMASK*(BitNum)", or a variable saved +// from an earlier call of that macro; either way, it must be a 32-bit word +// with a single bit set. In the first case, assume the compiler is smart +// enough to optimize out common subexpressions. +// +// The expressions depend on unsigned decimal math that should be universal. + +#define JU_MASKLOWEREXC( BITPOS) ((BITPOS) - 1) +#define JU_MASKLOWERINC( BITPOS) (JU_MASKLOWEREXC(BITPOS) | (BITPOS)) +#define JU_MASKHIGHERINC(BITPOS) (-(BITPOS)) +#define JU_MASKHIGHEREXC(BITPOS) (JU_MASKHIGHERINC(BITPOS) ^ (BITPOS)) + + +// **************************************************************************** +// SUPPORT FOR NATIVE INDEX SIZES +// **************************************************************************** +// +// Copy a series of generic objects (uint8_t, uint16_t, uint32_t, Word_t) from +// one place to another. + +#define JU_COPYMEM(PDST,PSRC,POP1) \ + { \ + Word_t i_ndex = 0; \ + assert((POP1) > 0); \ + do { (PDST)[i_ndex] = (PSRC)[i_ndex]; } \ + while (++i_ndex < (POP1)); \ + } + + +// **************************************************************************** +// SUPPORT FOR NON-NATIVE INDEX SIZES +// **************************************************************************** +// +// Copy a 3-byte Index pointed by a uint8_t * to a Word_t: +// +#define JU_COPY3_PINDEX_TO_LONG(DESTLONG,PINDEX) \ + DESTLONG = (Word_t)(PINDEX)[0] << 16; \ + DESTLONG += (Word_t)(PINDEX)[1] << 8; \ + DESTLONG += (Word_t)(PINDEX)[2] + +// Copy a Word_t to a 3-byte Index pointed at by a uint8_t *: + +#define JU_COPY3_LONG_TO_PINDEX(PINDEX,SOURCELONG) \ + (PINDEX)[0] = (uint8_t)((SOURCELONG) >> 16); \ + (PINDEX)[1] = (uint8_t)((SOURCELONG) >> 8); \ + (PINDEX)[2] = (uint8_t)((SOURCELONG)) + +#ifdef JU_64BIT + +// Copy a 5-byte Index pointed by a uint8_t * to a Word_t: +// +#define JU_COPY5_PINDEX_TO_LONG(DESTLONG,PINDEX) \ + DESTLONG = (Word_t)(PINDEX)[0] << 32; \ + DESTLONG += (Word_t)(PINDEX)[1] << 24; \ + DESTLONG += (Word_t)(PINDEX)[2] << 16; \ + DESTLONG += (Word_t)(PINDEX)[3] << 8; \ + DESTLONG += (Word_t)(PINDEX)[4] + +// Copy a Word_t to a 5-byte Index pointed at by a uint8_t *: + +#define JU_COPY5_LONG_TO_PINDEX(PINDEX,SOURCELONG) \ + (PINDEX)[0] = (uint8_t)((SOURCELONG) >> 32); \ + (PINDEX)[1] = (uint8_t)((SOURCELONG) >> 24); \ + (PINDEX)[2] = (uint8_t)((SOURCELONG) >> 16); \ + (PINDEX)[3] = (uint8_t)((SOURCELONG) >> 8); \ + (PINDEX)[4] = (uint8_t)((SOURCELONG)) + +// Copy a 6-byte Index pointed by a uint8_t * to a Word_t: +// +#define JU_COPY6_PINDEX_TO_LONG(DESTLONG,PINDEX) \ + DESTLONG = (Word_t)(PINDEX)[0] << 40; \ + DESTLONG += (Word_t)(PINDEX)[1] << 32; \ + DESTLONG += (Word_t)(PINDEX)[2] << 24; \ + DESTLONG += (Word_t)(PINDEX)[3] << 16; \ + DESTLONG += (Word_t)(PINDEX)[4] << 8; \ + DESTLONG += (Word_t)(PINDEX)[5] + +// Copy a Word_t to a 6-byte Index pointed at by a uint8_t *: + +#define JU_COPY6_LONG_TO_PINDEX(PINDEX,SOURCELONG) \ + (PINDEX)[0] = (uint8_t)((SOURCELONG) >> 40); \ + (PINDEX)[1] = (uint8_t)((SOURCELONG) >> 32); \ + (PINDEX)[2] = (uint8_t)((SOURCELONG) >> 24); \ + (PINDEX)[3] = (uint8_t)((SOURCELONG) >> 16); \ + (PINDEX)[4] = (uint8_t)((SOURCELONG) >> 8); \ + (PINDEX)[5] = (uint8_t)((SOURCELONG)) + +// Copy a 7-byte Index pointed by a uint8_t * to a Word_t: +// +#define JU_COPY7_PINDEX_TO_LONG(DESTLONG,PINDEX) \ + DESTLONG = (Word_t)(PINDEX)[0] << 48; \ + DESTLONG += (Word_t)(PINDEX)[1] << 40; \ + DESTLONG += (Word_t)(PINDEX)[2] << 32; \ + DESTLONG += (Word_t)(PINDEX)[3] << 24; \ + DESTLONG += (Word_t)(PINDEX)[4] << 16; \ + DESTLONG += (Word_t)(PINDEX)[5] << 8; \ + DESTLONG += (Word_t)(PINDEX)[6] + +// Copy a Word_t to a 7-byte Index pointed at by a uint8_t *: + +#define JU_COPY7_LONG_TO_PINDEX(PINDEX,SOURCELONG) \ + (PINDEX)[0] = (uint8_t)((SOURCELONG) >> 48); \ + (PINDEX)[1] = (uint8_t)((SOURCELONG) >> 40); \ + (PINDEX)[2] = (uint8_t)((SOURCELONG) >> 32); \ + (PINDEX)[3] = (uint8_t)((SOURCELONG) >> 24); \ + (PINDEX)[4] = (uint8_t)((SOURCELONG) >> 16); \ + (PINDEX)[5] = (uint8_t)((SOURCELONG) >> 8); \ + (PINDEX)[6] = (uint8_t)((SOURCELONG)) + +#endif // JU_64BIT + +// **************************************************************************** +// COMMON CODE FRAGMENTS (MACROS) +// **************************************************************************** +// +// These code chunks are shared between various source files. + + +// SET (REPLACE) ONE DIGIT IN AN INDEX: +// +// To avoid endian issues, use masking and ORing, which operates in a +// big-endian register, rather than treating the Index as an array of bytes, +// though that would be simpler, but would operate in endian-specific memory. +// +// TBD: This contains two variable shifts, is that bad? + +#define JU_SETDIGIT(INDEX,DIGIT,STATE) \ + (INDEX) = ((INDEX) & (~cJU_MASKATSTATE(STATE))) \ + | (((Word_t) (DIGIT)) \ + << (((STATE) - 1) * cJU_BITSPERBYTE)) + +// Fast version for single LSB: + +#define JU_SETDIGIT1(INDEX,DIGIT) (INDEX) = ((INDEX) & ~0xff) | (DIGIT) + + +// SET (REPLACE) "N" LEAST DIGITS IN AN INDEX: + +#define JU_SETDIGITS(INDEX,INDEX2,cSTATE) \ + (INDEX) = ((INDEX ) & (~JU_LEASTBYTESMASK(cSTATE))) \ + | ((INDEX2) & ( JU_LEASTBYTESMASK(cSTATE))) + +// COPY DECODE BYTES FROM JP TO INDEX: +// +// Modify Index digit(s) to match the bytes in jp_DcdPopO in case one or more +// branches are skipped and the digits are significant. Its probably faster +// to just do this unconditionally than to check if its necessary. +// +// To avoid endian issues, use masking and ORing, which operates in a +// big-endian register, rather than treating the Index as an array of bytes, +// though that would be simpler, but would operate in endian-specific memory. +// +// WARNING: Must not call JU_LEASTBYTESMASK (via cJU_DCDMASK) with Bytes = +// cJU_ROOTSTATE or a bad mask is generated, but there are no Dcd bytes to copy +// in this case anyway. In fact there are no Dcd bytes unless State < +// cJU_ROOTSTATE - 1, so dont call this macro except in those cases. +// +// TBD: It would be nice to validate jp_DcdPopO against known digits to ensure +// no corruption, but this is non-trivial. + +#define JU_SETDCD(INDEX,PJP,cSTATE) \ + (INDEX) = ((INDEX) & ~cJU_DCDMASK(cSTATE)) \ + | (JU_JPDCDPOP0(PJP) & cJU_DCDMASK(cSTATE)) + +// INSERT/DELETE AN INDEX IN-PLACE IN MEMORY: +// +// Given a pointer to an array of "even" (native), same-sized objects +// (indexes), the current population of the array, an offset in the array, and +// a new Index to insert, "shift up" the array elements (Indexes) above the +// insertion point and insert the new Index. Assume there is sufficient memory +// to do this. +// +// In these macros, "i_offset" is an index offset, and "b_off" is a byte +// offset for odd Index sizes. +// +// Note: Endian issues only arise fro insertion, not deletion, and even for +// insertion, they are transparent when native (even) objects are used, and +// handled explicitly for odd (non-native) Index sizes. +// +// Note: The following macros are tricky enough that there is some test code +// for them appended to this file. + +#define JU_INSERTINPLACE(PARRAY,POP1,OFFSET,INDEX) \ + assert((long) (POP1) > 0); \ + assert((Word_t) (OFFSET) <= (Word_t) (POP1)); \ + { \ + Word_t i_offset = (POP1); \ + \ + while (i_offset-- > (OFFSET)) \ + (PARRAY)[i_offset + 1] = (PARRAY)[i_offset]; \ + \ + (PARRAY)[OFFSET] = (INDEX); \ + } + + +// Variation for non-native Indexes, where cIS = Index Size +// and PByte must point to a uint8_t (byte); shift byte-by-byte: +// + +#define JU_INSERTINPLACE3(PBYTE,POP1,OFFSET,INDEX) \ +{ \ + Word_t i_off = POP1; \ + \ + while (i_off-- > (OFFSET)) \ + { \ + Word_t i_dx = i_off * 3; \ + (PBYTE)[i_dx + 0 + 3] = (PBYTE)[i_dx + 0]; \ + (PBYTE)[i_dx + 1 + 3] = (PBYTE)[i_dx + 1]; \ + (PBYTE)[i_dx + 2 + 3] = (PBYTE)[i_dx + 2]; \ + } \ + JU_COPY3_LONG_TO_PINDEX(&((PBYTE)[(OFFSET) * 3]), INDEX); \ +} + +#ifdef JU_64BIT + +#define JU_INSERTINPLACE5(PBYTE,POP1,OFFSET,INDEX) \ +{ \ + Word_t i_off = POP1; \ + \ + while (i_off-- > (OFFSET)) \ + { \ + Word_t i_dx = i_off * 5; \ + (PBYTE)[i_dx + 0 + 5] = (PBYTE)[i_dx + 0]; \ + (PBYTE)[i_dx + 1 + 5] = (PBYTE)[i_dx + 1]; \ + (PBYTE)[i_dx + 2 + 5] = (PBYTE)[i_dx + 2]; \ + (PBYTE)[i_dx + 3 + 5] = (PBYTE)[i_dx + 3]; \ + (PBYTE)[i_dx + 4 + 5] = (PBYTE)[i_dx + 4]; \ + } \ + JU_COPY5_LONG_TO_PINDEX(&((PBYTE)[(OFFSET) * 5]), INDEX); \ +} + +#define JU_INSERTINPLACE6(PBYTE,POP1,OFFSET,INDEX) \ +{ \ + Word_t i_off = POP1; \ + \ + while (i_off-- > (OFFSET)) \ + { \ + Word_t i_dx = i_off * 6; \ + (PBYTE)[i_dx + 0 + 6] = (PBYTE)[i_dx + 0]; \ + (PBYTE)[i_dx + 1 + 6] = (PBYTE)[i_dx + 1]; \ + (PBYTE)[i_dx + 2 + 6] = (PBYTE)[i_dx + 2]; \ + (PBYTE)[i_dx + 3 + 6] = (PBYTE)[i_dx + 3]; \ + (PBYTE)[i_dx + 4 + 6] = (PBYTE)[i_dx + 4]; \ + (PBYTE)[i_dx + 5 + 6] = (PBYTE)[i_dx + 5]; \ + } \ + JU_COPY6_LONG_TO_PINDEX(&((PBYTE)[(OFFSET) * 6]), INDEX); \ +} + +#define JU_INSERTINPLACE7(PBYTE,POP1,OFFSET,INDEX) \ +{ \ + Word_t i_off = POP1; \ + \ + while (i_off-- > (OFFSET)) \ + { \ + Word_t i_dx = i_off * 7; \ + (PBYTE)[i_dx + 0 + 7] = (PBYTE)[i_dx + 0]; \ + (PBYTE)[i_dx + 1 + 7] = (PBYTE)[i_dx + 1]; \ + (PBYTE)[i_dx + 2 + 7] = (PBYTE)[i_dx + 2]; \ + (PBYTE)[i_dx + 3 + 7] = (PBYTE)[i_dx + 3]; \ + (PBYTE)[i_dx + 4 + 7] = (PBYTE)[i_dx + 4]; \ + (PBYTE)[i_dx + 5 + 7] = (PBYTE)[i_dx + 5]; \ + (PBYTE)[i_dx + 6 + 7] = (PBYTE)[i_dx + 6]; \ + } \ + JU_COPY7_LONG_TO_PINDEX(&((PBYTE)[(OFFSET) * 7]), INDEX); \ +} +#endif // JU_64BIT + +// Counterparts to the above for deleting an Index: +// +// "Shift down" the array elements starting at the Index to be deleted. + +#define JU_DELETEINPLACE(PARRAY,POP1,OFFSET,IGNORE) \ + assert((long) (POP1) > 0); \ + assert((Word_t) (OFFSET) < (Word_t) (POP1)); \ + { \ + Word_t i_offset = (OFFSET); \ + \ + while (++i_offset < (POP1)) \ + (PARRAY)[i_offset - 1] = (PARRAY)[i_offset]; \ + } + +// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size +// and PByte must point to a uint8_t (byte); copy byte-by-byte: +// +// Note: If cIS == 1, JU_DELETEINPLACE_ODD == JU_DELETEINPLACE. +// +// Note: There are no endian issues here because bytes are just shifted as-is, +// not converted to/from an Index. + +#define JU_DELETEINPLACE_ODD(PBYTE,POP1,OFFSET,cIS) \ + assert((long) (POP1) > 0); \ + assert((Word_t) (OFFSET) < (Word_t) (POP1)); \ + { \ + Word_t b_off = (((OFFSET) + 1) * (cIS)) - 1; \ + \ + while (++b_off < ((POP1) * (cIS))) \ + (PBYTE)[b_off - (cIS)] = (PBYTE)[b_off]; \ + } + + +// INSERT/DELETE AN INDEX WHILE COPYING OTHERS: +// +// Copy PSource[] to PDest[], where PSource[] has Pop1 elements (Indexes), +// inserting Index at PDest[Offset]. Unlike JU_*INPLACE*() above, these macros +// are used when moving Indexes from one memory object to another. + +#define JU_INSERTCOPY(PDEST,PSOURCE,POP1,OFFSET,INDEX) \ + assert((long) (POP1) > 0); \ + assert((Word_t) (OFFSET) <= (Word_t) (POP1)); \ + { \ + Word_t i_offset; \ + \ + for (i_offset = 0; i_offset < (OFFSET); ++i_offset) \ + (PDEST)[i_offset] = (PSOURCE)[i_offset]; \ + \ + (PDEST)[i_offset] = (INDEX); \ + \ + for (/* null */; i_offset < (POP1); ++i_offset) \ + (PDEST)[i_offset + 1] = (PSOURCE)[i_offset]; \ + } + +#define JU_INSERTCOPY3(PDEST,PSOURCE,POP1,OFFSET,INDEX) \ +assert((long) (POP1) > 0); \ +assert((Word_t) (OFFSET) <= (Word_t) (POP1)); \ +{ \ + Word_t o_ff; \ + \ + for (o_ff = 0; o_ff < (OFFSET); o_ff++) \ + { \ + Word_t i_dx = o_ff * 3; \ + (PDEST)[i_dx + 0] = (PSOURCE)[i_dx + 0]; \ + (PDEST)[i_dx + 1] = (PSOURCE)[i_dx + 1]; \ + (PDEST)[i_dx + 2] = (PSOURCE)[i_dx + 2]; \ + } \ + JU_COPY3_LONG_TO_PINDEX(&((PDEST)[(OFFSET) * 3]), INDEX); \ + \ + for (/* null */; o_ff < (POP1); o_ff++) \ + { \ + Word_t i_dx = o_ff * 3; \ + (PDEST)[i_dx + 0 + 3] = (PSOURCE)[i_dx + 0]; \ + (PDEST)[i_dx + 1 + 3] = (PSOURCE)[i_dx + 1]; \ + (PDEST)[i_dx + 2 + 3] = (PSOURCE)[i_dx + 2]; \ + } \ +} + +#ifdef JU_64BIT + +#define JU_INSERTCOPY5(PDEST,PSOURCE,POP1,OFFSET,INDEX) \ +assert((long) (POP1) > 0); \ +assert((Word_t) (OFFSET) <= (Word_t) (POP1)); \ +{ \ + Word_t o_ff; \ + \ + for (o_ff = 0; o_ff < (OFFSET); o_ff++) \ + { \ + Word_t i_dx = o_ff * 5; \ + (PDEST)[i_dx + 0] = (PSOURCE)[i_dx + 0]; \ + (PDEST)[i_dx + 1] = (PSOURCE)[i_dx + 1]; \ + (PDEST)[i_dx + 2] = (PSOURCE)[i_dx + 2]; \ + (PDEST)[i_dx + 3] = (PSOURCE)[i_dx + 3]; \ + (PDEST)[i_dx + 4] = (PSOURCE)[i_dx + 4]; \ + } \ + JU_COPY5_LONG_TO_PINDEX(&((PDEST)[(OFFSET) * 5]), INDEX); \ + \ + for (/* null */; o_ff < (POP1); o_ff++) \ + { \ + Word_t i_dx = o_ff * 5; \ + (PDEST)[i_dx + 0 + 5] = (PSOURCE)[i_dx + 0]; \ + (PDEST)[i_dx + 1 + 5] = (PSOURCE)[i_dx + 1]; \ + (PDEST)[i_dx + 2 + 5] = (PSOURCE)[i_dx + 2]; \ + (PDEST)[i_dx + 3 + 5] = (PSOURCE)[i_dx + 3]; \ + (PDEST)[i_dx + 4 + 5] = (PSOURCE)[i_dx + 4]; \ + } \ +} + +#define JU_INSERTCOPY6(PDEST,PSOURCE,POP1,OFFSET,INDEX) \ +assert((long) (POP1) > 0); \ +assert((Word_t) (OFFSET) <= (Word_t) (POP1)); \ +{ \ + Word_t o_ff; \ + \ + for (o_ff = 0; o_ff < (OFFSET); o_ff++) \ + { \ + Word_t i_dx = o_ff * 6; \ + (PDEST)[i_dx + 0] = (PSOURCE)[i_dx + 0]; \ + (PDEST)[i_dx + 1] = (PSOURCE)[i_dx + 1]; \ + (PDEST)[i_dx + 2] = (PSOURCE)[i_dx + 2]; \ + (PDEST)[i_dx + 3] = (PSOURCE)[i_dx + 3]; \ + (PDEST)[i_dx + 4] = (PSOURCE)[i_dx + 4]; \ + (PDEST)[i_dx + 5] = (PSOURCE)[i_dx + 5]; \ + } \ + JU_COPY6_LONG_TO_PINDEX(&((PDEST)[(OFFSET) * 6]), INDEX); \ + \ + for (/* null */; o_ff < (POP1); o_ff++) \ + { \ + Word_t i_dx = o_ff * 6; \ + (PDEST)[i_dx + 0 + 6] = (PSOURCE)[i_dx + 0]; \ + (PDEST)[i_dx + 1 + 6] = (PSOURCE)[i_dx + 1]; \ + (PDEST)[i_dx + 2 + 6] = (PSOURCE)[i_dx + 2]; \ + (PDEST)[i_dx + 3 + 6] = (PSOURCE)[i_dx + 3]; \ + (PDEST)[i_dx + 4 + 6] = (PSOURCE)[i_dx + 4]; \ + (PDEST)[i_dx + 5 + 6] = (PSOURCE)[i_dx + 5]; \ + } \ +} + +#define JU_INSERTCOPY7(PDEST,PSOURCE,POP1,OFFSET,INDEX) \ +assert((long) (POP1) > 0); \ +assert((Word_t) (OFFSET) <= (Word_t) (POP1)); \ +{ \ + Word_t o_ff; \ + \ + for (o_ff = 0; o_ff < (OFFSET); o_ff++) \ + { \ + Word_t i_dx = o_ff * 7; \ + (PDEST)[i_dx + 0] = (PSOURCE)[i_dx + 0]; \ + (PDEST)[i_dx + 1] = (PSOURCE)[i_dx + 1]; \ + (PDEST)[i_dx + 2] = (PSOURCE)[i_dx + 2]; \ + (PDEST)[i_dx + 3] = (PSOURCE)[i_dx + 3]; \ + (PDEST)[i_dx + 4] = (PSOURCE)[i_dx + 4]; \ + (PDEST)[i_dx + 5] = (PSOURCE)[i_dx + 5]; \ + (PDEST)[i_dx + 6] = (PSOURCE)[i_dx + 6]; \ + } \ + JU_COPY7_LONG_TO_PINDEX(&((PDEST)[(OFFSET) * 7]), INDEX); \ + \ + for (/* null */; o_ff < (POP1); o_ff++) \ + { \ + Word_t i_dx = o_ff * 7; \ + (PDEST)[i_dx + 0 + 7] = (PSOURCE)[i_dx + 0]; \ + (PDEST)[i_dx + 1 + 7] = (PSOURCE)[i_dx + 1]; \ + (PDEST)[i_dx + 2 + 7] = (PSOURCE)[i_dx + 2]; \ + (PDEST)[i_dx + 3 + 7] = (PSOURCE)[i_dx + 3]; \ + (PDEST)[i_dx + 4 + 7] = (PSOURCE)[i_dx + 4]; \ + (PDEST)[i_dx + 5 + 7] = (PSOURCE)[i_dx + 5]; \ + (PDEST)[i_dx + 6 + 7] = (PSOURCE)[i_dx + 6]; \ + } \ +} + +#endif // JU_64BIT + +// Counterparts to the above for deleting an Index: + +#define JU_DELETECOPY(PDEST,PSOURCE,POP1,OFFSET,IGNORE) \ + assert((long) (POP1) > 0); \ + assert((Word_t) (OFFSET) < (Word_t) (POP1)); \ + { \ + Word_t i_offset; \ + \ + for (i_offset = 0; i_offset < (OFFSET); ++i_offset) \ + (PDEST)[i_offset] = (PSOURCE)[i_offset]; \ + \ + for (++i_offset; i_offset < (POP1); ++i_offset) \ + (PDEST)[i_offset - 1] = (PSOURCE)[i_offset]; \ + } + +// Variation for odd-byte-sized (non-native) Indexes, where cIS = Index Size; +// copy byte-by-byte: +// +// Note: There are no endian issues here because bytes are just shifted as-is, +// not converted to/from an Index. +// +// Note: If cIS == 1, JU_DELETECOPY_ODD == JU_DELETECOPY, at least in concept. + +#define JU_DELETECOPY_ODD(PDEST,PSOURCE,POP1,OFFSET,cIS) \ + assert((long) (POP1) > 0); \ + assert((Word_t) (OFFSET) < (Word_t) (POP1)); \ + { \ + uint8_t *_Pdest = (uint8_t *) (PDEST); \ + uint8_t *_Psource = (uint8_t *) (PSOURCE); \ + Word_t b_off; \ + \ + for (b_off = 0; b_off < ((OFFSET) * (cIS)); ++b_off) \ + *_Pdest++ = *_Psource++; \ + \ + _Psource += (cIS); \ + \ + for (b_off += (cIS); b_off < ((POP1) * (cIS)); ++b_off) \ + *_Pdest++ = *_Psource++; \ + } + + +// GENERIC RETURN CODE HANDLING FOR JUDY1 (NO VALUE AREAS) AND JUDYL (VALUE +// AREAS): +// +// This common code hides Judy1 versus JudyL details of how to return various +// conditions, including a pointer to a value area for JudyL. +// +// First, define an internal variation of JERR called JERRI (I = int) to make +// lint happy. We accidentally shipped to 11.11 OEUR with all functions that +// return int or Word_t using JERR, which is type Word_t, for errors. Lint +// complains about this for functions that return int. So, internally use +// JERRI for error returns from the int functions. Experiments show that +// callers which compare int Foo() to (Word_t) JERR (~0UL) are OK, since JERRI +// sign-extends to match JERR. + +#define JERRI ((int) ~0) // see above. + +#ifdef JUDY1 + +#define JU_RET_FOUND return(1) +#define JU_RET_NOTFOUND return(0) + +// For Judy1, these all "fall through" to simply JU_RET_FOUND, since there is no +// value area pointer to return: + +#define JU_RET_FOUND_LEAFW(PJLW,POP1,OFFSET) JU_RET_FOUND + +#define JU_RET_FOUND_JPM(Pjpm) JU_RET_FOUND +#define JU_RET_FOUND_PVALUE(Pjv,OFFSET) JU_RET_FOUND +#ifndef JU_64BIT +#define JU_RET_FOUND_LEAF1(Pjll,POP1,OFFSET) JU_RET_FOUND +#endif +#define JU_RET_FOUND_LEAF2(Pjll,POP1,OFFSET) JU_RET_FOUND +#define JU_RET_FOUND_LEAF3(Pjll,POP1,OFFSET) JU_RET_FOUND +#ifdef JU_64BIT +#define JU_RET_FOUND_LEAF4(Pjll,POP1,OFFSET) JU_RET_FOUND +#define JU_RET_FOUND_LEAF5(Pjll,POP1,OFFSET) JU_RET_FOUND +#define JU_RET_FOUND_LEAF6(Pjll,POP1,OFFSET) JU_RET_FOUND +#define JU_RET_FOUND_LEAF7(Pjll,POP1,OFFSET) JU_RET_FOUND +#endif +#define JU_RET_FOUND_IMM_01(Pjp) JU_RET_FOUND +#define JU_RET_FOUND_IMM(Pjp,OFFSET) JU_RET_FOUND + +// Note: No JudyL equivalent: + +#define JU_RET_FOUND_FULLPOPU1 JU_RET_FOUND +#define JU_RET_FOUND_LEAF_B1(PJLB,SUBEXP,OFFSET) JU_RET_FOUND + +#else // JUDYL + +// JU_RET_FOUND // see below; must NOT be defined for JudyL. +#define JU_RET_NOTFOUND return((PPvoid_t) NULL) + +// For JudyL, the location of the value area depends on the JP type and other +// factors: +// +// TBD: The value areas should be accessed via data structures, here and in +// Dougs code, not by hard-coded address calculations. +// +// This is useful in insert/delete code when the value area is returned from +// lower levels in the JPM: + +#define JU_RET_FOUND_JPM(Pjpm) return((PPvoid_t) ((Pjpm)->jpm_PValue)) + +// This is useful in insert/delete code when the value area location is already +// computed: + +#define JU_RET_FOUND_PVALUE(Pjv,OFFSET) return((PPvoid_t) ((Pjv) + OFFSET)) + +#define JU_RET_FOUND_LEAFW(PJLW,POP1,OFFSET) \ + return((PPvoid_t) (JL_LEAFWVALUEAREA(PJLW, POP1) + (OFFSET))) + +#define JU_RET_FOUND_LEAF1(Pjll,POP1,OFFSET) \ + return((PPvoid_t) (JL_LEAF1VALUEAREA(Pjll, POP1) + (OFFSET))) +#define JU_RET_FOUND_LEAF2(Pjll,POP1,OFFSET) \ + return((PPvoid_t) (JL_LEAF2VALUEAREA(Pjll, POP1) + (OFFSET))) +#define JU_RET_FOUND_LEAF3(Pjll,POP1,OFFSET) \ + return((PPvoid_t) (JL_LEAF3VALUEAREA(Pjll, POP1) + (OFFSET))) +#ifdef JU_64BIT +#define JU_RET_FOUND_LEAF4(Pjll,POP1,OFFSET) \ + return((PPvoid_t) (JL_LEAF4VALUEAREA(Pjll, POP1) + (OFFSET))) +#define JU_RET_FOUND_LEAF5(Pjll,POP1,OFFSET) \ + return((PPvoid_t) (JL_LEAF5VALUEAREA(Pjll, POP1) + (OFFSET))) +#define JU_RET_FOUND_LEAF6(Pjll,POP1,OFFSET) \ + return((PPvoid_t) (JL_LEAF6VALUEAREA(Pjll, POP1) + (OFFSET))) +#define JU_RET_FOUND_LEAF7(Pjll,POP1,OFFSET) \ + return((PPvoid_t) (JL_LEAF7VALUEAREA(Pjll, POP1) + (OFFSET))) +#endif + +// Note: Here jp_Addr is a value area itself and not an address, so P_JV() is +// not needed: + +#define JU_RET_FOUND_IMM_01(PJP) return((PPvoid_t) (&((PJP)->jp_Addr))) + +// Note: Here jp_Addr is a pointer to a separately-mallocd value area, so +// P_JV() is required; likewise for JL_JLB_PVALUE: + +#define JU_RET_FOUND_IMM(PJP,OFFSET) \ + return((PPvoid_t) (P_JV((PJP)->jp_Addr) + (OFFSET))) + +#define JU_RET_FOUND_LEAF_B1(PJLB,SUBEXP,OFFSET) \ + return((PPvoid_t) (P_JV(JL_JLB_PVALUE(PJLB, SUBEXP)) + (OFFSET))) + +#endif // JUDYL + + +// GENERIC ERROR HANDLING: +// +// This is complicated by variations in the needs of the callers of these +// macros. Only use JU_SET_ERRNO() for PJError, because it can be null; use +// JU_SET_ERRNO_NONNULL() for Pjpm, which is never null, and also in other +// cases where the pointer is known not to be null (to save dead branches). +// +// Note: Most cases of JU_ERRNO_OVERRUN or JU_ERRNO_CORRUPT should result in +// an assertion failure in debug code, so they are more likely to be caught, so +// do that here in each macro. + +#define JU_SET_ERRNO(PJError, JErrno) \ + { \ + assert((JErrno) != JU_ERRNO_OVERRUN); \ + assert((JErrno) != JU_ERRNO_CORRUPT); \ + \ + if (PJError != (PJError_t) NULL) \ + { \ + JU_ERRNO(PJError) = (JErrno); \ + JU_ERRID(PJError) = __LINE__; \ + } \ + } + +// Variation for callers who know already that PJError is non-null; and, it can +// also be Pjpm (both PJError_t and Pjpm_t have je_* fields), so only assert it +// for null, not cast to any specific pointer type: + +#define JU_SET_ERRNO_NONNULL(PJError, JErrno) \ + { \ + assert((JErrno) != JU_ERRNO_OVERRUN); \ + assert((JErrno) != JU_ERRNO_CORRUPT); \ + assert(PJError); \ + \ + JU_ERRNO(PJError) = (JErrno); \ + JU_ERRID(PJError) = __LINE__; \ + } + +// Variation to copy error info from a (required) JPM to an (optional) +// PJError_t: +// +// Note: The assertions above about JU_ERRNO_OVERRUN and JU_ERRNO_CORRUPT +// should have already popped, so they are not needed here. + +#define JU_COPY_ERRNO(PJError, Pjpm) \ + { \ + if (PJError) \ + { \ + JU_ERRNO(PJError) = (uint8_t)JU_ERRNO(Pjpm); \ + JU_ERRID(PJError) = JU_ERRID(Pjpm); \ + } \ + } + +// For JErrno parameter to previous macros upon return from Judy*Alloc*(): +// +// The memory allocator returns an address of 0 for out of memory, +// 1..sizeof(Word_t)-1 for corruption (an invalid pointer), otherwise a valid +// pointer. + +#define JU_ALLOC_ERRNO(ADDR) \ + (((void *) (ADDR) != (void *) NULL) ? JU_ERRNO_OVERRUN : JU_ERRNO_NOMEM) + +#define JU_CHECKALLOC(Type,Ptr,Retval) \ + if ((Ptr) < (Type) sizeof(Word_t)) \ + { \ + JU_SET_ERRNO(PJError, JU_ALLOC_ERRNO(Ptr)); \ + return(Retval); \ + } + +// Leaf search routines + +#ifdef JU_NOINLINE + +int j__udySearchLeaf1(Pjll_t Pjll, Word_t LeafPop1, Word_t Index); +int j__udySearchLeaf2(Pjll_t Pjll, Word_t LeafPop1, Word_t Index); +int j__udySearchLeaf3(Pjll_t Pjll, Word_t LeafPop1, Word_t Index); + +#ifdef JU_64BIT + +int j__udySearchLeaf4(Pjll_t Pjll, Word_t LeafPop1, Word_t Index); +int j__udySearchLeaf5(Pjll_t Pjll, Word_t LeafPop1, Word_t Index); +int j__udySearchLeaf6(Pjll_t Pjll, Word_t LeafPop1, Word_t Index); +int j__udySearchLeaf7(Pjll_t Pjll, Word_t LeafPop1, Word_t Index); + +#endif // JU_64BIT + +int j__udySearchLeafW(Pjlw_t Pjlw, Word_t LeafPop1, Word_t Index); + +#else // complier support for inline + +#ifdef JU_WIN +static __inline int j__udySearchLeaf1(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#else +static inline int j__udySearchLeaf1(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#endif +{ SEARCHLEAFNATIVE(uint8_t, Pjll, LeafPop1, Index); } + +#ifdef JU_WIN +static __inline int j__udySearchLeaf2(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#else +static inline int j__udySearchLeaf2(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#endif +{ SEARCHLEAFNATIVE(uint16_t, Pjll, LeafPop1, Index); } + +#ifdef JU_WIN +static __inline int j__udySearchLeaf3(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#else +static inline int j__udySearchLeaf3(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#endif +{ SEARCHLEAFNONNAT(Pjll, LeafPop1, Index, 3, JU_COPY3_PINDEX_TO_LONG); } + +#ifdef JU_64BIT + +#ifdef JU_WIN +static __inline int j__udySearchLeaf4(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#else +static inline int j__udySearchLeaf4(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#endif +{ SEARCHLEAFNATIVE(uint32_t, Pjll, LeafPop1, Index); } + +#ifdef JU_WIN +static __inline int j__udySearchLeaf5(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#else +static inline int j__udySearchLeaf5(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#endif +{ SEARCHLEAFNONNAT(Pjll, LeafPop1, Index, 5, JU_COPY5_PINDEX_TO_LONG); } + +#ifdef JU_WIN +static __inline int j__udySearchLeaf6(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#else +static inline int j__udySearchLeaf6(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#endif +{ SEARCHLEAFNONNAT(Pjll, LeafPop1, Index, 6, JU_COPY6_PINDEX_TO_LONG); } + +#ifdef JU_WIN +static __inline int j__udySearchLeaf7(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#else +static inline int j__udySearchLeaf7(Pjll_t Pjll, Word_t LeafPop1, Word_t Index) +#endif +{ SEARCHLEAFNONNAT(Pjll, LeafPop1, Index, 7, JU_COPY7_PINDEX_TO_LONG); } + +#endif // JU_64BIT + +#ifdef JU_WIN +static __inline int j__udySearchLeafW(Pjlw_t Pjlw, Word_t LeafPop1, Word_t Index) +#else +static inline int j__udySearchLeafW(Pjlw_t Pjlw, Word_t LeafPop1, Word_t Index) +#endif +{ SEARCHLEAFNATIVE(Word_t, Pjlw, LeafPop1, Index); } + +#endif // compiler support for inline + +#endif // ! _JUDYPRIVATE_INCLUDED diff --git a/libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h b/libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h new file mode 100644 index 0000000..5b47048 --- /dev/null +++ b/libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h @@ -0,0 +1,485 @@ +#ifndef _JUDYPRIVATE1L_INCLUDED +#define _JUDYPRIVATE1L_INCLUDED +// _________________ +// +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.31 $ $Source: /judy/src/JudyCommon/JudyPrivate1L.h $ + +// **************************************************************************** +// Declare common cJU_* names for JP Types that occur in both Judy1 and JudyL, +// for use by code that ifdefs JUDY1 and JUDYL. Only JP Types common to both +// Judy1 and JudyL are #defined here with equivalent cJU_* names. JP Types +// unique to only Judy1 or JudyL are listed in comments, so the type lists +// match the Judy1.h and JudyL.h files. +// +// This file also defines cJU_* for other JP-related constants and functions +// that some shared JUDY1/JUDYL code finds handy. +// +// At least in principle this file should be included AFTER Judy1.h or JudyL.h. +// +// WARNING: This file must be kept consistent with the enums in Judy1.h and +// JudyL.h. +// +// TBD: You might think, why not define common cJU_* enums in, say, +// JudyPrivate.h, and then inherit them into superset enums in Judy1.h and +// JudyL.h? The problem is that the enum lists for each class (cJ1_* and +// cJL_*) must be numerically "packed" into the correct order, for two reasons: +// (1) allow the compiler to generate "tight" switch statements with no wasted +// slots (although this is not very big), and (2) allow calculations using the +// enum values, although this is also not an issue if the calculations are only +// within each cJ*_JPIMMED_*_* class and the members are packed within the +// class. + +#ifdef JUDY1 + +#define cJU_JRPNULL cJ1_JRPNULL +#define cJU_JPNULL1 cJ1_JPNULL1 +#define cJU_JPNULL2 cJ1_JPNULL2 +#define cJU_JPNULL3 cJ1_JPNULL3 +#ifdef JU_64BIT +#define cJU_JPNULL4 cJ1_JPNULL4 +#define cJU_JPNULL5 cJ1_JPNULL5 +#define cJU_JPNULL6 cJ1_JPNULL6 +#define cJU_JPNULL7 cJ1_JPNULL7 +#endif +#define cJU_JPNULLMAX cJ1_JPNULLMAX +#define cJU_JPBRANCH_L2 cJ1_JPBRANCH_L2 +#define cJU_JPBRANCH_L3 cJ1_JPBRANCH_L3 +#ifdef JU_64BIT +#define cJU_JPBRANCH_L4 cJ1_JPBRANCH_L4 +#define cJU_JPBRANCH_L5 cJ1_JPBRANCH_L5 +#define cJU_JPBRANCH_L6 cJ1_JPBRANCH_L6 +#define cJU_JPBRANCH_L7 cJ1_JPBRANCH_L7 +#endif +#define cJU_JPBRANCH_L cJ1_JPBRANCH_L +#define j__U_BranchBJPPopToWords j__1_BranchBJPPopToWords +#define cJU_JPBRANCH_B2 cJ1_JPBRANCH_B2 +#define cJU_JPBRANCH_B3 cJ1_JPBRANCH_B3 +#ifdef JU_64BIT +#define cJU_JPBRANCH_B4 cJ1_JPBRANCH_B4 +#define cJU_JPBRANCH_B5 cJ1_JPBRANCH_B5 +#define cJU_JPBRANCH_B6 cJ1_JPBRANCH_B6 +#define cJU_JPBRANCH_B7 cJ1_JPBRANCH_B7 +#endif +#define cJU_JPBRANCH_B cJ1_JPBRANCH_B +#define cJU_JPBRANCH_U2 cJ1_JPBRANCH_U2 +#define cJU_JPBRANCH_U3 cJ1_JPBRANCH_U3 +#ifdef JU_64BIT +#define cJU_JPBRANCH_U4 cJ1_JPBRANCH_U4 +#define cJU_JPBRANCH_U5 cJ1_JPBRANCH_U5 +#define cJU_JPBRANCH_U6 cJ1_JPBRANCH_U6 +#define cJU_JPBRANCH_U7 cJ1_JPBRANCH_U7 +#endif +#define cJU_JPBRANCH_U cJ1_JPBRANCH_U +#ifndef JU_64BIT +#define cJU_JPLEAF1 cJ1_JPLEAF1 +#endif +#define cJU_JPLEAF2 cJ1_JPLEAF2 +#define cJU_JPLEAF3 cJ1_JPLEAF3 +#ifdef JU_64BIT +#define cJU_JPLEAF4 cJ1_JPLEAF4 +#define cJU_JPLEAF5 cJ1_JPLEAF5 +#define cJU_JPLEAF6 cJ1_JPLEAF6 +#define cJU_JPLEAF7 cJ1_JPLEAF7 +#endif +#define cJU_JPLEAF_B1 cJ1_JPLEAF_B1 +// cJ1_JPFULLPOPU1 +#define cJU_JPIMMED_1_01 cJ1_JPIMMED_1_01 +#define cJU_JPIMMED_2_01 cJ1_JPIMMED_2_01 +#define cJU_JPIMMED_3_01 cJ1_JPIMMED_3_01 +#ifdef JU_64BIT +#define cJU_JPIMMED_4_01 cJ1_JPIMMED_4_01 +#define cJU_JPIMMED_5_01 cJ1_JPIMMED_5_01 +#define cJU_JPIMMED_6_01 cJ1_JPIMMED_6_01 +#define cJU_JPIMMED_7_01 cJ1_JPIMMED_7_01 +#endif +#define cJU_JPIMMED_1_02 cJ1_JPIMMED_1_02 +#define cJU_JPIMMED_1_03 cJ1_JPIMMED_1_03 +#define cJU_JPIMMED_1_04 cJ1_JPIMMED_1_04 +#define cJU_JPIMMED_1_05 cJ1_JPIMMED_1_05 +#define cJU_JPIMMED_1_06 cJ1_JPIMMED_1_06 +#define cJU_JPIMMED_1_07 cJ1_JPIMMED_1_07 +#ifdef JU_64BIT +// cJ1_JPIMMED_1_08 +// cJ1_JPIMMED_1_09 +// cJ1_JPIMMED_1_10 +// cJ1_JPIMMED_1_11 +// cJ1_JPIMMED_1_12 +// cJ1_JPIMMED_1_13 +// cJ1_JPIMMED_1_14 +// cJ1_JPIMMED_1_15 +#endif +#define cJU_JPIMMED_2_02 cJ1_JPIMMED_2_02 +#define cJU_JPIMMED_2_03 cJ1_JPIMMED_2_03 +#ifdef JU_64BIT +// cJ1_JPIMMED_2_04 +// cJ1_JPIMMED_2_05 +// cJ1_JPIMMED_2_06 +// cJ1_JPIMMED_2_07 +#endif +#define cJU_JPIMMED_3_02 cJ1_JPIMMED_3_02 +#ifdef JU_64BIT +// cJ1_JPIMMED_3_03 +// cJ1_JPIMMED_3_04 +// cJ1_JPIMMED_3_05 +// cJ1_JPIMMED_4_02 +// cJ1_JPIMMED_4_03 +// cJ1_JPIMMED_5_02 +// cJ1_JPIMMED_5_03 +// cJ1_JPIMMED_6_02 +// cJ1_JPIMMED_7_02 +#endif +#define cJU_JPIMMED_CAP cJ1_JPIMMED_CAP + +#else // JUDYL **************************************************************** + +#define cJU_JRPNULL cJL_JRPNULL +#define cJU_JPNULL1 cJL_JPNULL1 +#define cJU_JPNULL2 cJL_JPNULL2 +#define cJU_JPNULL3 cJL_JPNULL3 +#ifdef JU_64BIT +#define cJU_JPNULL4 cJL_JPNULL4 +#define cJU_JPNULL5 cJL_JPNULL5 +#define cJU_JPNULL6 cJL_JPNULL6 +#define cJU_JPNULL7 cJL_JPNULL7 +#endif +#define cJU_JPNULLMAX cJL_JPNULLMAX +#define cJU_JPBRANCH_L2 cJL_JPBRANCH_L2 +#define cJU_JPBRANCH_L3 cJL_JPBRANCH_L3 +#ifdef JU_64BIT +#define cJU_JPBRANCH_L4 cJL_JPBRANCH_L4 +#define cJU_JPBRANCH_L5 cJL_JPBRANCH_L5 +#define cJU_JPBRANCH_L6 cJL_JPBRANCH_L6 +#define cJU_JPBRANCH_L7 cJL_JPBRANCH_L7 +#endif +#define cJU_JPBRANCH_L cJL_JPBRANCH_L +#define j__U_BranchBJPPopToWords j__L_BranchBJPPopToWords +#define cJU_JPBRANCH_B2 cJL_JPBRANCH_B2 +#define cJU_JPBRANCH_B3 cJL_JPBRANCH_B3 +#ifdef JU_64BIT +#define cJU_JPBRANCH_B4 cJL_JPBRANCH_B4 +#define cJU_JPBRANCH_B5 cJL_JPBRANCH_B5 +#define cJU_JPBRANCH_B6 cJL_JPBRANCH_B6 +#define cJU_JPBRANCH_B7 cJL_JPBRANCH_B7 +#endif +#define cJU_JPBRANCH_B cJL_JPBRANCH_B +#define cJU_JPBRANCH_U2 cJL_JPBRANCH_U2 +#define cJU_JPBRANCH_U3 cJL_JPBRANCH_U3 +#ifdef JU_64BIT +#define cJU_JPBRANCH_U4 cJL_JPBRANCH_U4 +#define cJU_JPBRANCH_U5 cJL_JPBRANCH_U5 +#define cJU_JPBRANCH_U6 cJL_JPBRANCH_U6 +#define cJU_JPBRANCH_U7 cJL_JPBRANCH_U7 +#endif +#define cJU_JPBRANCH_U cJL_JPBRANCH_U +#define cJU_JPLEAF1 cJL_JPLEAF1 +#define cJU_JPLEAF2 cJL_JPLEAF2 +#define cJU_JPLEAF3 cJL_JPLEAF3 +#ifdef JU_64BIT +#define cJU_JPLEAF4 cJL_JPLEAF4 +#define cJU_JPLEAF5 cJL_JPLEAF5 +#define cJU_JPLEAF6 cJL_JPLEAF6 +#define cJU_JPLEAF7 cJL_JPLEAF7 +#endif +#define cJU_JPLEAF_B1 cJL_JPLEAF_B1 +#define cJU_JPIMMED_1_01 cJL_JPIMMED_1_01 +#define cJU_JPIMMED_2_01 cJL_JPIMMED_2_01 +#define cJU_JPIMMED_3_01 cJL_JPIMMED_3_01 +#ifdef JU_64BIT +#define cJU_JPIMMED_4_01 cJL_JPIMMED_4_01 +#define cJU_JPIMMED_5_01 cJL_JPIMMED_5_01 +#define cJU_JPIMMED_6_01 cJL_JPIMMED_6_01 +#define cJU_JPIMMED_7_01 cJL_JPIMMED_7_01 +#endif +#define cJU_JPIMMED_1_02 cJL_JPIMMED_1_02 +#define cJU_JPIMMED_1_03 cJL_JPIMMED_1_03 +#ifdef JU_64BIT +#define cJU_JPIMMED_1_04 cJL_JPIMMED_1_04 +#define cJU_JPIMMED_1_05 cJL_JPIMMED_1_05 +#define cJU_JPIMMED_1_06 cJL_JPIMMED_1_06 +#define cJU_JPIMMED_1_07 cJL_JPIMMED_1_07 +#define cJU_JPIMMED_2_02 cJL_JPIMMED_2_02 +#define cJU_JPIMMED_2_03 cJL_JPIMMED_2_03 +#define cJU_JPIMMED_3_02 cJL_JPIMMED_3_02 +#endif +#define cJU_JPIMMED_CAP cJL_JPIMMED_CAP + +#endif // JUDYL + + +// **************************************************************************** +// cJU*_ other than JP types: + +#ifdef JUDY1 + +#define cJU_LEAFW_MAXPOP1 cJ1_LEAFW_MAXPOP1 +#ifndef JU_64BIT +#define cJU_LEAF1_MAXPOP1 cJ1_LEAF1_MAXPOP1 +#endif +#define cJU_LEAF2_MAXPOP1 cJ1_LEAF2_MAXPOP1 +#define cJU_LEAF3_MAXPOP1 cJ1_LEAF3_MAXPOP1 +#ifdef JU_64BIT +#define cJU_LEAF4_MAXPOP1 cJ1_LEAF4_MAXPOP1 +#define cJU_LEAF5_MAXPOP1 cJ1_LEAF5_MAXPOP1 +#define cJU_LEAF6_MAXPOP1 cJ1_LEAF6_MAXPOP1 +#define cJU_LEAF7_MAXPOP1 cJ1_LEAF7_MAXPOP1 +#endif +#define cJU_IMMED1_MAXPOP1 cJ1_IMMED1_MAXPOP1 +#define cJU_IMMED2_MAXPOP1 cJ1_IMMED2_MAXPOP1 +#define cJU_IMMED3_MAXPOP1 cJ1_IMMED3_MAXPOP1 +#ifdef JU_64BIT +#define cJU_IMMED4_MAXPOP1 cJ1_IMMED4_MAXPOP1 +#define cJU_IMMED5_MAXPOP1 cJ1_IMMED5_MAXPOP1 +#define cJU_IMMED6_MAXPOP1 cJ1_IMMED6_MAXPOP1 +#define cJU_IMMED7_MAXPOP1 cJ1_IMMED7_MAXPOP1 +#endif + +#define JU_LEAF1POPTOWORDS(Pop1) J1_LEAF1POPTOWORDS(Pop1) +#define JU_LEAF2POPTOWORDS(Pop1) J1_LEAF2POPTOWORDS(Pop1) +#define JU_LEAF3POPTOWORDS(Pop1) J1_LEAF3POPTOWORDS(Pop1) +#ifdef JU_64BIT +#define JU_LEAF4POPTOWORDS(Pop1) J1_LEAF4POPTOWORDS(Pop1) +#define JU_LEAF5POPTOWORDS(Pop1) J1_LEAF5POPTOWORDS(Pop1) +#define JU_LEAF6POPTOWORDS(Pop1) J1_LEAF6POPTOWORDS(Pop1) +#define JU_LEAF7POPTOWORDS(Pop1) J1_LEAF7POPTOWORDS(Pop1) +#endif +#define JU_LEAFWPOPTOWORDS(Pop1) J1_LEAFWPOPTOWORDS(Pop1) + +#ifndef JU_64BIT +#define JU_LEAF1GROWINPLACE(Pop1) J1_LEAF1GROWINPLACE(Pop1) +#endif +#define JU_LEAF2GROWINPLACE(Pop1) J1_LEAF2GROWINPLACE(Pop1) +#define JU_LEAF3GROWINPLACE(Pop1) J1_LEAF3GROWINPLACE(Pop1) +#ifdef JU_64BIT +#define JU_LEAF4GROWINPLACE(Pop1) J1_LEAF4GROWINPLACE(Pop1) +#define JU_LEAF5GROWINPLACE(Pop1) J1_LEAF5GROWINPLACE(Pop1) +#define JU_LEAF6GROWINPLACE(Pop1) J1_LEAF6GROWINPLACE(Pop1) +#define JU_LEAF7GROWINPLACE(Pop1) J1_LEAF7GROWINPLACE(Pop1) +#endif +#define JU_LEAFWGROWINPLACE(Pop1) J1_LEAFWGROWINPLACE(Pop1) + +#define j__udyCreateBranchL j__udy1CreateBranchL +#define j__udyCreateBranchB j__udy1CreateBranchB +#define j__udyCreateBranchU j__udy1CreateBranchU +#define j__udyCascade1 j__udy1Cascade1 +#define j__udyCascade2 j__udy1Cascade2 +#define j__udyCascade3 j__udy1Cascade3 +#ifdef JU_64BIT +#define j__udyCascade4 j__udy1Cascade4 +#define j__udyCascade5 j__udy1Cascade5 +#define j__udyCascade6 j__udy1Cascade6 +#define j__udyCascade7 j__udy1Cascade7 +#endif +#define j__udyCascadeL j__udy1CascadeL +#define j__udyInsertBranch j__udy1InsertBranch + +#define j__udyBranchBToBranchL j__udy1BranchBToBranchL +#ifndef JU_64BIT +#define j__udyLeafB1ToLeaf1 j__udy1LeafB1ToLeaf1 +#endif +#define j__udyLeaf1ToLeaf2 j__udy1Leaf1ToLeaf2 +#define j__udyLeaf2ToLeaf3 j__udy1Leaf2ToLeaf3 +#ifndef JU_64BIT +#define j__udyLeaf3ToLeafW j__udy1Leaf3ToLeafW +#else +#define j__udyLeaf3ToLeaf4 j__udy1Leaf3ToLeaf4 +#define j__udyLeaf4ToLeaf5 j__udy1Leaf4ToLeaf5 +#define j__udyLeaf5ToLeaf6 j__udy1Leaf5ToLeaf6 +#define j__udyLeaf6ToLeaf7 j__udy1Leaf6ToLeaf7 +#define j__udyLeaf7ToLeafW j__udy1Leaf7ToLeafW +#endif + +#define jpm_t j1pm_t +#define Pjpm_t Pj1pm_t + +#define jlb_t j1lb_t +#define Pjlb_t Pj1lb_t + +#define JU_JLB_BITMAP J1_JLB_BITMAP + +#define j__udyAllocJPM j__udy1AllocJ1PM +#define j__udyAllocJBL j__udy1AllocJBL +#define j__udyAllocJBB j__udy1AllocJBB +#define j__udyAllocJBBJP j__udy1AllocJBBJP +#define j__udyAllocJBU j__udy1AllocJBU +#ifndef JU_64BIT +#define j__udyAllocJLL1 j__udy1AllocJLL1 +#endif +#define j__udyAllocJLL2 j__udy1AllocJLL2 +#define j__udyAllocJLL3 j__udy1AllocJLL3 +#ifdef JU_64BIT +#define j__udyAllocJLL4 j__udy1AllocJLL4 +#define j__udyAllocJLL5 j__udy1AllocJLL5 +#define j__udyAllocJLL6 j__udy1AllocJLL6 +#define j__udyAllocJLL7 j__udy1AllocJLL7 +#endif +#define j__udyAllocJLW j__udy1AllocJLW +#define j__udyAllocJLB1 j__udy1AllocJLB1 +#define j__udyFreeJPM j__udy1FreeJ1PM +#define j__udyFreeJBL j__udy1FreeJBL +#define j__udyFreeJBB j__udy1FreeJBB +#define j__udyFreeJBBJP j__udy1FreeJBBJP +#define j__udyFreeJBU j__udy1FreeJBU +#ifndef JU_64BIT +#define j__udyFreeJLL1 j__udy1FreeJLL1 +#endif +#define j__udyFreeJLL2 j__udy1FreeJLL2 +#define j__udyFreeJLL3 j__udy1FreeJLL3 +#ifdef JU_64BIT +#define j__udyFreeJLL4 j__udy1FreeJLL4 +#define j__udyFreeJLL5 j__udy1FreeJLL5 +#define j__udyFreeJLL6 j__udy1FreeJLL6 +#define j__udyFreeJLL7 j__udy1FreeJLL7 +#endif +#define j__udyFreeJLW j__udy1FreeJLW +#define j__udyFreeJLB1 j__udy1FreeJLB1 +#define j__udyFreeSM j__udy1FreeSM + +#define j__uMaxWords j__u1MaxWords + +#ifdef DEBUG +#define JudyCheckPop Judy1CheckPop +#endif + +#else // JUDYL **************************************************************** + +#define cJU_LEAFW_MAXPOP1 cJL_LEAFW_MAXPOP1 +#define cJU_LEAF1_MAXPOP1 cJL_LEAF1_MAXPOP1 +#define cJU_LEAF2_MAXPOP1 cJL_LEAF2_MAXPOP1 +#define cJU_LEAF3_MAXPOP1 cJL_LEAF3_MAXPOP1 +#ifdef JU_64BIT +#define cJU_LEAF4_MAXPOP1 cJL_LEAF4_MAXPOP1 +#define cJU_LEAF5_MAXPOP1 cJL_LEAF5_MAXPOP1 +#define cJU_LEAF6_MAXPOP1 cJL_LEAF6_MAXPOP1 +#define cJU_LEAF7_MAXPOP1 cJL_LEAF7_MAXPOP1 +#endif +#define cJU_IMMED1_MAXPOP1 cJL_IMMED1_MAXPOP1 +#define cJU_IMMED2_MAXPOP1 cJL_IMMED2_MAXPOP1 +#define cJU_IMMED3_MAXPOP1 cJL_IMMED3_MAXPOP1 +#ifdef JU_64BIT +#define cJU_IMMED4_MAXPOP1 cJL_IMMED4_MAXPOP1 +#define cJU_IMMED5_MAXPOP1 cJL_IMMED5_MAXPOP1 +#define cJU_IMMED6_MAXPOP1 cJL_IMMED6_MAXPOP1 +#define cJU_IMMED7_MAXPOP1 cJL_IMMED7_MAXPOP1 +#endif + +#define JU_LEAF1POPTOWORDS(Pop1) JL_LEAF1POPTOWORDS(Pop1) +#define JU_LEAF2POPTOWORDS(Pop1) JL_LEAF2POPTOWORDS(Pop1) +#define JU_LEAF3POPTOWORDS(Pop1) JL_LEAF3POPTOWORDS(Pop1) +#ifdef JU_64BIT +#define JU_LEAF4POPTOWORDS(Pop1) JL_LEAF4POPTOWORDS(Pop1) +#define JU_LEAF5POPTOWORDS(Pop1) JL_LEAF5POPTOWORDS(Pop1) +#define JU_LEAF6POPTOWORDS(Pop1) JL_LEAF6POPTOWORDS(Pop1) +#define JU_LEAF7POPTOWORDS(Pop1) JL_LEAF7POPTOWORDS(Pop1) +#endif +#define JU_LEAFWPOPTOWORDS(Pop1) JL_LEAFWPOPTOWORDS(Pop1) + +#define JU_LEAF1GROWINPLACE(Pop1) JL_LEAF1GROWINPLACE(Pop1) +#define JU_LEAF2GROWINPLACE(Pop1) JL_LEAF2GROWINPLACE(Pop1) +#define JU_LEAF3GROWINPLACE(Pop1) JL_LEAF3GROWINPLACE(Pop1) +#ifdef JU_64BIT +#define JU_LEAF4GROWINPLACE(Pop1) JL_LEAF4GROWINPLACE(Pop1) +#define JU_LEAF5GROWINPLACE(Pop1) JL_LEAF5GROWINPLACE(Pop1) +#define JU_LEAF6GROWINPLACE(Pop1) JL_LEAF6GROWINPLACE(Pop1) +#define JU_LEAF7GROWINPLACE(Pop1) JL_LEAF7GROWINPLACE(Pop1) +#endif +#define JU_LEAFWGROWINPLACE(Pop1) JL_LEAFWGROWINPLACE(Pop1) + +#define j__udyCreateBranchL j__udyLCreateBranchL +#define j__udyCreateBranchB j__udyLCreateBranchB +#define j__udyCreateBranchU j__udyLCreateBranchU +#define j__udyCascade1 j__udyLCascade1 +#define j__udyCascade2 j__udyLCascade2 +#define j__udyCascade3 j__udyLCascade3 +#ifdef JU_64BIT +#define j__udyCascade4 j__udyLCascade4 +#define j__udyCascade5 j__udyLCascade5 +#define j__udyCascade6 j__udyLCascade6 +#define j__udyCascade7 j__udyLCascade7 +#endif +#define j__udyCascadeL j__udyLCascadeL +#define j__udyInsertBranch j__udyLInsertBranch + +#define j__udyBranchBToBranchL j__udyLBranchBToBranchL +#define j__udyLeafB1ToLeaf1 j__udyLLeafB1ToLeaf1 +#define j__udyLeaf1ToLeaf2 j__udyLLeaf1ToLeaf2 +#define j__udyLeaf2ToLeaf3 j__udyLLeaf2ToLeaf3 +#ifndef JU_64BIT +#define j__udyLeaf3ToLeafW j__udyLLeaf3ToLeafW +#else +#define j__udyLeaf3ToLeaf4 j__udyLLeaf3ToLeaf4 +#define j__udyLeaf4ToLeaf5 j__udyLLeaf4ToLeaf5 +#define j__udyLeaf5ToLeaf6 j__udyLLeaf5ToLeaf6 +#define j__udyLeaf6ToLeaf7 j__udyLLeaf6ToLeaf7 +#define j__udyLeaf7ToLeafW j__udyLLeaf7ToLeafW +#endif + +#define jpm_t jLpm_t +#define Pjpm_t PjLpm_t + +#define jlb_t jLlb_t +#define Pjlb_t PjLlb_t + +#define JU_JLB_BITMAP JL_JLB_BITMAP + +#define j__udyAllocJPM j__udyLAllocJLPM +#define j__udyAllocJBL j__udyLAllocJBL +#define j__udyAllocJBB j__udyLAllocJBB +#define j__udyAllocJBBJP j__udyLAllocJBBJP +#define j__udyAllocJBU j__udyLAllocJBU +#define j__udyAllocJLL1 j__udyLAllocJLL1 +#define j__udyAllocJLL2 j__udyLAllocJLL2 +#define j__udyAllocJLL3 j__udyLAllocJLL3 +#ifdef JU_64BIT +#define j__udyAllocJLL4 j__udyLAllocJLL4 +#define j__udyAllocJLL5 j__udyLAllocJLL5 +#define j__udyAllocJLL6 j__udyLAllocJLL6 +#define j__udyAllocJLL7 j__udyLAllocJLL7 +#endif +#define j__udyAllocJLW j__udyLAllocJLW +#define j__udyAllocJLB1 j__udyLAllocJLB1 +// j__udyLAllocJV +#define j__udyFreeJPM j__udyLFreeJLPM +#define j__udyFreeJBL j__udyLFreeJBL +#define j__udyFreeJBB j__udyLFreeJBB +#define j__udyFreeJBBJP j__udyLFreeJBBJP +#define j__udyFreeJBU j__udyLFreeJBU +#define j__udyFreeJLL1 j__udyLFreeJLL1 +#define j__udyFreeJLL2 j__udyLFreeJLL2 +#define j__udyFreeJLL3 j__udyLFreeJLL3 +#ifdef JU_64BIT +#define j__udyFreeJLL4 j__udyLFreeJLL4 +#define j__udyFreeJLL5 j__udyLFreeJLL5 +#define j__udyFreeJLL6 j__udyLFreeJLL6 +#define j__udyFreeJLL7 j__udyLFreeJLL7 +#endif +#define j__udyFreeJLW j__udyLFreeJLW +#define j__udyFreeJLB1 j__udyLFreeJLB1 +#define j__udyFreeSM j__udyLFreeSM +// j__udyLFreeJV + +#define j__uMaxWords j__uLMaxWords + +#ifdef DEBUG +#define JudyCheckPop JudyLCheckPop +#endif + +#endif // JUDYL + +#endif // _JUDYPRIVATE1L_INCLUDED diff --git a/libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h b/libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h new file mode 100644 index 0000000..10295ba --- /dev/null +++ b/libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h @@ -0,0 +1,788 @@ +#ifndef _JUDY_PRIVATE_BRANCH_INCLUDED +#define _JUDY_PRIVATE_BRANCH_INCLUDED +// _________________ +// +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 1.2 $ $Source: /home/doug/judy-1.0.5_min/test/../src/JudyCommon/RCS/JudyPrivateBranch.h,v $ +// +// Header file for all Judy sources, for global but private (non-exported) +// declarations specific to branch support. +// +// See also the "Judy Shop Manual" (try judy/doc/int/JudyShopManual.*). + + +// **************************************************************************** +// JUDY POINTER (JP) SUPPORT +// **************************************************************************** +// +// This "rich pointer" object is pivotal to Judy execution. +// +// JP CONTAINING OTHER THAN IMMEDIATE INDEXES: +// +// If the JP points to a linear or bitmap leaf, jp_DcdPopO contains the +// Population-1 in LSbs and Decode (Dcd) bytes in the MSBs. (In practice the +// Decode bits are masked off while accessing the Pop0 bits.) +// +// The Decode Size, the number of Dcd bytes available, is encoded in jpo_Type. +// It can also be thought of as the number of states "skipped" in the SM, where +// each state decodes 8 bits = 1 byte. +// +// TBD: Dont need two structures, except possibly to force jp_Type to highest +// address! +// +// Note: The jpo_u union is not required by HP-UX or Linux but Win32 because +// the cl.exe compiler otherwise refuses to pack a bitfield (DcdPopO) with +// anything else, even with the -Zp option. This is pretty ugly, but +// fortunately portable, and its all hide-able by macros (see below). + +typedef struct J_UDY_POINTER_OTHERS // JPO. + { + Word_t j_po_Addr; // first word: Pjp_t, Word_t, etc. + union { + Word_t j_po_Addr1; + uint8_t j_po_DcdP0[sizeof(Word_t) - 1]; + uint8_t j_po_Bytes[sizeof(Word_t)]; // last byte = jp_Type. + } jpo_u; + } jpo_t; + + +// JP CONTAINING IMMEDIATE INDEXES: +// +// j_pi_1Index[] plus j_pi_LIndex[] together hold as many N-byte (1..3-byte +// [1..7-byte]) Indexes as will fit in sizeof(jpi_t) less 1 byte for j_pi_Type +// (that is, 7..1 [15..1] Indexes). +// +// For Judy1, j_pi_1Index[] is used and j_pi_LIndex[] is not used. +// For JudyL, j_pi_LIndex[] is used and j_pi_1Index[] is not used. +// +// Note: Actually when Pop1 = 1, jpi_t is not used, and the least bytes of the +// single Index are stored in j_po_DcdPopO, for both Judy1 and JudyL, so for +// JudyL the j_po_Addr field can hold the target value. +// +// TBD: Revise this structure to not overload j_po_DcdPopO this way? The +// current arrangement works, its just confusing. + +typedef struct _JUDY_POINTER_IMMEDL + { + Word_t j_pL_Addr; + uint8_t j_pL_LIndex[sizeof(Word_t) - 1]; // see above. + uint8_t j_pL_Type; + } jpL_t; + +typedef struct _JUDY_POINTER_IMMED1 + { + uint8_t j_p1_1Index[(2 * sizeof(Word_t)) - 1]; + uint8_t j_p1_Type; + } jp1_t; + +// UNION OF JP TYPES: +// +// A branch is an array of cJU_BRANCHUNUMJPS (256) of this object, or an +// alternate data type such as: A linear branch which is a list of 2..7 JPs, +// or a bitmap branch which contains 8 lists of 0..32 JPs. JPs reside only in +// branches of a Judy SM. + +typedef union J_UDY_POINTER // JP. + { + jpo_t j_po; // other than immediate indexes. + jpL_t j_pL; // immediate indexes. + jp1_t j_p1; // immediate indexes. + } jp_t, *Pjp_t; + +// For coding convenience: +// +// Note, jp_Type has the same bits in jpo_t jpL_t and jp1_t. + +#define jp_1Index j_p1.j_p1_1Index // for storing Indexes in first word. +#define jp_LIndex j_pL.j_pL_LIndex // for storing Indexes in second word. +#define jp_Addr j_po.j_po_Addr +#define jp_Addr1 j_po.jpo_u.j_po_Addr1 +//#define jp_DcdPop0 j_po.jpo_u.j_po_DcdPop0 +#define jp_Addr1 j_po.jpo_u.j_po_Addr1 +//#define jp_Type j_po.jpo_u.j_po_Bytes[sizeof(Word_t) - 1] +#define jp_Type j_p1.j_p1_Type +#define jp_DcdP0 j_po.jpo_u.j_po_DcdP0 + + +// **************************************************************************** +// JUDY POINTER (JP) -- RELATED MACROS AND CONSTANTS +// **************************************************************************** + +// EXTRACT VALUES FROM JP: +// +// Masks for the bytes in the Dcd and Pop0 parts of jp_DcdPopO: +// +// cJU_DCDMASK() consists of a mask that excludes the (LSb) Pop0 bytes and +// also, just to be safe, the top byte of the word, since jp_DcdPopO is 1 byte +// less than a full word. +// +// Note: These are constant macros (cJU) because cPopBytes should be a +// constant. Also note cPopBytes == state in the SM. + +#define cJU_POP0MASK(cPopBytes) JU_LEASTBYTESMASK(cPopBytes) + +#define cJU_DCDMASK(cPopBytes) \ + ((cJU_ALLONES >> cJU_BITSPERBYTE) & (~cJU_POP0MASK(cPopBytes))) + +// Mask off the high byte from INDEX to it can be compared to DcdPopO: + +#define JU_TRIMTODCDSIZE(INDEX) ((cJU_ALLONES >> cJU_BITSPERBYTE) & (INDEX)) + +// Get from jp_DcdPopO the Pop0 for various branch JP Types: +// +// Note: There are no simple macros for cJU_BRANCH* Types because their +// populations must be added up and dont reside in an already-calculated +// place. + +#define JU_JPBRANCH_POP0(PJP,cPopBytes) \ + (JU_JPDCDPOP0(PJP) & cJU_POP0MASK(cPopBytes)) + +// METHOD FOR DETERMINING IF OBJECTS HAVE ROOM TO GROW: +// +// J__U_GROWCK() is a generic method to determine if an object can grow in +// place, based on whether the next population size (one more) would use the +// same space. + +#define J__U_GROWCK(POP1,MAXPOP1,POPTOWORDS) \ + (((POP1) != (MAXPOP1)) && (POPTOWORDS[POP1] == POPTOWORDS[(POP1) + 1])) + +#define JU_BRANCHBJPGROWINPLACE(NumJPs) \ + J__U_GROWCK(NumJPs, cJU_BITSPERSUBEXPB, j__U_BranchBJPPopToWords) + + +// DETERMINE IF AN INDEX IS (NOT) IN A JPS EXPANSE: + +#define JU_DCDNOTMATCHINDEX(INDEX,PJP,POP0BYTES) \ + (((INDEX) ^ JU_JPDCDPOP0(PJP)) & cJU_DCDMASK(POP0BYTES)) + + +// NUMBER OF JPs IN AN UNCOMPRESSED BRANCH: +// +// An uncompressed branch is simply an array of 256 Judy Pointers (JPs). It is +// a minimum cacheline fill object. Define it here before its first needed. + +#define cJU_BRANCHUNUMJPS cJU_SUBEXPPERSTATE + + +// **************************************************************************** +// JUDY BRANCH LINEAR (JBL) SUPPORT +// **************************************************************************** +// +// A linear branch is a way of compressing empty expanses (null JPs) out of an +// uncompressed 256-way branch, when the number of populated expanses is so +// small that even a bitmap branch is excessive. +// +// The maximum number of JPs in a Judy linear branch: +// +// Note: This number results in a 1-cacheline sized structure. Previous +// versions had a larger struct so a linear branch didnt become a bitmap +// branch until the memory consumed was even, but for speed, its better to +// switch "sooner" and keep a linear branch fast. + +#define cJU_BRANCHLMAXJPS 7 + + +// LINEAR BRANCH STRUCT: +// +// 1-byte count, followed by array of byte-sized expanses, followed by JPs. + +typedef struct J__UDY_BRANCH_LINEAR + { + uint8_t jbl_NumJPs; // num of JPs (Pjp_t), 1..N. + uint8_t jbl_Expanse[cJU_BRANCHLMAXJPS]; // 1..7 MSbs of pop exps. + jp_t jbl_jp [cJU_BRANCHLMAXJPS]; // JPs for populated exps. + } jbl_t, * Pjbl_t; + + +// **************************************************************************** +// JUDY BRANCH BITMAP (JBB) SUPPORT +// **************************************************************************** +// +// A bitmap branch is a way of compressing empty expanses (null JPs) out of +// uncompressed 256-way branch. This costs 1 additional cache line fill, but +// can save a lot of memory when it matters most, near the leaves, and +// typically there will be only one at most in the path to any Index (leaf). +// +// The bitmap indicates which of the cJU_BRANCHUNUMJPS (256) JPs in the branch +// are NOT null, that is, their expanses are populated. The jbb_t also +// contains N pointers to "mini" Judy branches ("subexpanses") of up to M JPs +// each (see BITMAP_BRANCHMxN, for example, BITMAP_BRANCH32x8), where M x N = +// cJU_BRANCHUNUMJPS. These are dynamically allocated and never contain +// cJ*_JPNULL* jp_Types. An empty subexpanse is represented by no bit sets in +// the corresponding subexpanse bitmap, in which case the corresponding +// jbbs_Pjp pointers value is unused. +// +// Note that the number of valid JPs in each 1-of-N subexpanses is determined +// by POPULATION rather than by EXPANSE -- the desired outcome to save memory +// when near the leaves. Note that the memory required for 185 JPs is about as +// much as an uncompressed 256-way branch, therefore 184 is set as the maximum. +// However, it is expected that a conversion to an uncompressed 256-way branch +// will normally take place before this limit is reached for other reasons, +// such as improving performance when the "wasted" memory is well amortized by +// the population under the branch, preserving an acceptable overall +// bytes/Index in the Judy array. +// +// The number of pointers to arrays of JPs in the Judy bitmap branch: +// +// Note: The numbers below are the same in both 32 and 64 bit systems. + +#define cJU_BRANCHBMAXJPS 184 // maximum JPs for bitmap branches. + +// Convenience wrappers for referencing BranchB bitmaps or JP subarray +// pointers: +// +// Note: JU_JBB_PJP produces a "raw" memory address that must pass through +// P_JP before use, except when freeing memory: + +#define JU_JBB_BITMAP(Pjbb, SubExp) ((Pjbb)->jbb_jbbs[SubExp].jbbs_Bitmap) +#define JU_JBB_PJP( Pjbb, SubExp) ((Pjbb)->jbb_jbbs[SubExp].jbbs_Pjp) + +#define JU_SUBEXPB(Digit) (((Digit) / cJU_BITSPERSUBEXPB) & (cJU_NUMSUBEXPB-1)) + +#define JU_BITMAPTESTB(Pjbb, Index) \ + (JU_JBB_BITMAP(Pjbb, JU_SUBEXPB(Index)) & JU_BITPOSMASKB(Index)) + +#define JU_BITMAPSETB(Pjbb, Index) \ + (JU_JBB_BITMAP(Pjbb, JU_SUBEXPB(Index)) |= JU_BITPOSMASKB(Index)) + +// Note: JU_BITMAPCLEARB is not defined because the code does it a faster way. + +typedef struct J__UDY_BRANCH_BITMAP_SUBEXPANSE + { + BITMAPB_t jbbs_Bitmap; + Pjp_t jbbs_Pjp; + + } jbbs_t; + +typedef struct J__UDY_BRANCH_BITMAP + { + jbbs_t jbb_jbbs [cJU_NUMSUBEXPB]; +#ifdef SUBEXPCOUNTS + Word_t jbb_subPop1[cJU_NUMSUBEXPB]; +#endif + } jbb_t, * Pjbb_t; + +#define JU_BRANCHJP_NUMJPSTOWORDS(NumJPs) (j__U_BranchBJPPopToWords[NumJPs]) + +#ifdef SUBEXPCOUNTS +#define cJU_NUMSUBEXPU 16 // number of subexpanse counts. +#endif + + +// **************************************************************************** +// JUDY BRANCH UNCOMPRESSED (JBU) SUPPORT +// **************************************************************************** + +// Convenience wrapper for referencing BranchU JPs: +// +// Note: This produces a non-"raw" address already passed through P_JBU(). + +#define JU_JBU_PJP(Pjp,Index,Level) \ + (&((P_JBU((Pjp)->jp_Addr))->jbu_jp[JU_DIGITATSTATE(Index, Level)])) +#define JU_JBU_PJP0(Pjp) \ + (&((P_JBU((Pjp)->jp_Addr))->jbu_jp[0])) + +typedef struct J__UDY_BRANCH_UNCOMPRESSED + { + jp_t jbu_jp [cJU_BRANCHUNUMJPS]; // JPs for populated exp. +#ifdef SUBEXPCOUNTS + Word_t jbu_subPop1[cJU_NUMSUBEXPU]; +#endif + } jbu_t, * Pjbu_t; + + +// **************************************************************************** +// OTHER SUPPORT FOR JUDY STATE MACHINES (SMs) +// **************************************************************************** + +// OBJECT SIZES IN WORDS: +// +// Word_ts per various JudyL structures that have constant sizes. +// cJU_WORDSPERJP should always be 2; this is fundamental to the Judy +// structures. + +#define cJU_WORDSPERJP (sizeof(jp_t) / cJU_BYTESPERWORD) +#define cJU_WORDSPERCL (cJU_BYTESPERCL / cJU_BYTESPERWORD) + + +// OPPORTUNISTIC UNCOMPRESSION: +// +// Define populations at which a BranchL or BranchB must convert to BranchU. +// Earlier conversion is possible with good memory efficiency -- see below. + +#ifndef NO_BRANCHU + +// Max population below BranchL, then convert to BranchU: + +#define JU_BRANCHL_MAX_POP 1000 + +// Minimum global population increment before next conversion of a BranchB to a +// BranchU: +// +// This is was done to allow malloc() to coalesce memory before the next big +// (~512 words) allocation. + +#define JU_BTOU_POP_INCREMENT 300 + +// Min/max population below BranchB, then convert to BranchU: + +#define JU_BRANCHB_MIN_POP 135 +#define JU_BRANCHB_MAX_POP 750 + +#else // NO_BRANCHU + +// These are set up to have conservative conversion schedules to BranchU: + +#define JU_BRANCHL_MAX_POP (-1UL) +#define JU_BTOU_POP_INCREMENT 300 +#define JU_BRANCHB_MIN_POP 1000 +#define JU_BRANCHB_MAX_POP (-1UL) + +#endif // NO_BRANCHU + + +// MISCELLANEOUS MACROS: + +// Get N most significant bits from the shifted Index word: +// +// As Index words are decoded, they are shifted left so only relevant, +// undecoded Index bits remain. + +#define JU_BITSFROMSFTIDX(SFTIDX, N) ((SFTIDX) >> (cJU_BITSPERWORD - (N))) + +// TBD: I have my doubts about the necessity of these macros (dlb): + +// Produce 1-digit mask at specified state: + +#define cJU_MASKATSTATE(State) (0xffL << (((State) - 1) * cJU_BITSPERBYTE)) + +// Get byte (digit) from Index at the specified state, right justified: +// +// Note: State must be 1..cJU_ROOTSTATE, and Digits must be 1..(cJU_ROOTSTATE +// - 1), but theres no way to assert these within an expression. + +#define JU_DIGITATSTATE(Index,cState) \ + ((uint8_t)((Index) >> (((cState) - 1) * cJU_BITSPERBYTE))) + +// Similarly, place byte (digit) at correct position for the specified state: +// +// Note: Cast digit to a Word_t first so there are no complaints or problems +// about shifting it more than 32 bits on a 64-bit system, say, when it is a +// uint8_t from jbl_Expanse[]. (Believe it or not, the C standard says to +// promote an unsigned char to a signed int; -Ac does not do this, but -Ae +// does.) +// +// Also, to make lint happy, cast the whole result again because apparently +// shifting a Word_t does not result in a Word_t! + +#define JU_DIGITTOSTATE(Digit,cState) \ + ((Word_t) (((Word_t) (Digit)) << (((cState) - 1) * cJU_BITSPERBYTE))) + +#endif // ! _JUDY_PRIVATE_BRANCH_INCLUDED + + +#ifdef TEST_INSDEL + +// **************************************************************************** +// TEST CODE FOR INSERT/DELETE MACROS +// **************************************************************************** +// +// To use this, compile a temporary *.c file containing: +// +// #define DEBUG +// #define JUDY_ASSERT +// #define TEST_INSDEL +// #include "JudyPrivate.h" +// #include "JudyPrivateBranch.h" +// +// Use a command like this: cc -Ae +DD64 -I. -I JudyCommon -o t t.c +// For best results, include +DD64 on a 64-bit system. +// +// This test code exercises some tricky macros, but the output must be studied +// manually to verify it. Assume that for even-index testing, whole words +// (Word_t) suffices. + +#include + +#define INDEXES 3 // in each array. + + +// **************************************************************************** +// I N I T +// +// Set up variables for next test. See usage. + +FUNCTION void Init ( + int base, + PWord_t PeIndex, + PWord_t PoIndex, + PWord_t Peleaf, // always whole words. +#ifndef JU_64BIT + uint8_t * Poleaf3) +#else + uint8_t * Poleaf3, + uint8_t * Poleaf5, + uint8_t * Poleaf6, + uint8_t * Poleaf7) +#endif +{ + int offset; + + *PeIndex = 99; + + for (offset = 0; offset <= INDEXES; ++offset) + Peleaf[offset] = base + offset; + + for (offset = 0; offset < (INDEXES + 1) * 3; ++offset) + Poleaf3[offset] = base + offset; + +#ifndef JU_64BIT + *PoIndex = (91 << 24) | (92 << 16) | (93 << 8) | 94; +#else + + *PoIndex = (91L << 56) | (92L << 48) | (93L << 40) | (94L << 32) + | (95L << 24) | (96L << 16) | (97L << 8) | 98L; + + for (offset = 0; offset < (INDEXES + 1) * 5; ++offset) + Poleaf5[offset] = base + offset; + + for (offset = 0; offset < (INDEXES + 1) * 6; ++offset) + Poleaf6[offset] = base + offset; + + for (offset = 0; offset < (INDEXES + 1) * 7; ++offset) + Poleaf7[offset] = base + offset; +#endif + +} // Init() + + +// **************************************************************************** +// P R I N T L E A F +// +// Print the byte values in a leaf. + +FUNCTION void PrintLeaf ( + char * Label, // for output. + int IOffset, // insertion offset in array. + int Indsize, // index size in bytes. + uint8_t * PLeaf) // array of Index bytes. +{ + int offset; // in PLeaf. + int byte; // in one word. + + (void) printf("%s %u: ", Label, IOffset); + + for (offset = 0; offset <= INDEXES; ++offset) + { + for (byte = 0; byte < Indsize; ++byte) + (void) printf("%2d", PLeaf[(offset * Indsize) + byte]); + + (void) printf(" "); + } + + (void) printf("\n"); + +} // PrintLeaf() + + +// **************************************************************************** +// M A I N +// +// Test program. + +FUNCTION main() +{ + Word_t eIndex; // even, to insert. + Word_t oIndex; // odd, to insert. + Word_t eleaf [ INDEXES + 1]; // even leaf, index size 4. + uint8_t oleaf3[(INDEXES + 1) * 3]; // odd leaf, index size 3. +#ifdef JU_64BIT + uint8_t oleaf5[(INDEXES + 1) * 5]; // odd leaf, index size 5. + uint8_t oleaf6[(INDEXES + 1) * 6]; // odd leaf, index size 6. + uint8_t oleaf7[(INDEXES + 1) * 7]; // odd leaf, index size 7. +#endif + Word_t eleaf_2 [ INDEXES + 1]; // same, but second arrays: + uint8_t oleaf3_2[(INDEXES + 1) * 3]; +#ifdef JU_64BIT + uint8_t oleaf5_2[(INDEXES + 1) * 5]; + uint8_t oleaf6_2[(INDEXES + 1) * 6]; + uint8_t oleaf7_2[(INDEXES + 1) * 7]; +#endif + int ioffset; // index insertion offset. + +#ifndef JU_64BIT +#define INIT Init( 0, & eIndex, & oIndex, eleaf, oleaf3) +#define INIT2 INIT; Init(50, & eIndex, & oIndex, eleaf_2, oleaf3_2) +#else +#define INIT Init( 0, & eIndex, & oIndex, eleaf, oleaf3, \ + oleaf5, oleaf6, oleaf7) +#define INIT2 INIT; Init(50, & eIndex, & oIndex, eleaf_2, oleaf3_2, \ + oleaf5_2, oleaf6_2, oleaf7_2) +#endif + +#define WSIZE sizeof (Word_t) // shorthand. + +#ifdef PRINTALL // to turn on "noisy" printouts. +#define PRINTLEAF(Label,IOffset,Indsize,PLeaf) \ + PrintLeaf(Label,IOffset,Indsize,PLeaf) +#else +#define PRINTLEAF(Label,IOffset,Indsize,PLeaf) \ + if (ioffset == 0) \ + PrintLeaf(Label,IOffset,Indsize,PLeaf) +#endif + + (void) printf( +"In each case, tests operate on an initial array of %d indexes. Even-index\n" +"tests set index values to 0,1,2...; odd-index tests set byte values to\n" +"0,1,2... Inserted indexes have a value of 99 or else byte values 91,92,...\n", + INDEXES); + + (void) puts("\nJU_INSERTINPLACE():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, WSIZE, (uint8_t *) eleaf); + JU_INSERTINPLACE(eleaf, INDEXES, ioffset, eIndex); + PrintLeaf("After ", ioffset, WSIZE, (uint8_t *) eleaf); + } + + (void) puts("\nJU_INSERTINPLACE3():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, 3, oleaf3); + JU_INSERTINPLACE3(oleaf3, INDEXES, ioffset, oIndex); + PrintLeaf("After ", ioffset, 3, oleaf3); + } + +#ifdef JU_64BIT + (void) puts("\nJU_INSERTINPLACE5():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, 5, oleaf5); + JU_INSERTINPLACE5(oleaf5, INDEXES, ioffset, oIndex); + PrintLeaf("After ", ioffset, 5, oleaf5); + } + + (void) puts("\nJU_INSERTINPLACE6():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, 6, oleaf6); + JU_INSERTINPLACE6(oleaf6, INDEXES, ioffset, oIndex); + PrintLeaf("After ", ioffset, 6, oleaf6); + } + + (void) puts("\nJU_INSERTINPLACE7():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, 7, oleaf7); + JU_INSERTINPLACE7(oleaf7, INDEXES, ioffset, oIndex); + PrintLeaf("After ", ioffset, 7, oleaf7); + } +#endif // JU_64BIT + + (void) puts("\nJU_DELETEINPLACE():"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, WSIZE, (uint8_t *) eleaf); + JU_DELETEINPLACE(eleaf, INDEXES, ioffset); + PrintLeaf("After ", ioffset, WSIZE, (uint8_t *) eleaf); + } + + (void) puts("\nJU_DELETEINPLACE_ODD(3):"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, 3, oleaf3); + JU_DELETEINPLACE_ODD(oleaf3, INDEXES, ioffset, 3); + PrintLeaf("After ", ioffset, 3, oleaf3); + } + +#ifdef JU_64BIT + (void) puts("\nJU_DELETEINPLACE_ODD(5):"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, 5, oleaf5); + JU_DELETEINPLACE_ODD(oleaf5, INDEXES, ioffset, 5); + PrintLeaf("After ", ioffset, 5, oleaf5); + } + + (void) puts("\nJU_DELETEINPLACE_ODD(6):"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, 6, oleaf6); + JU_DELETEINPLACE_ODD(oleaf6, INDEXES, ioffset, 6); + PrintLeaf("After ", ioffset, 6, oleaf6); + } + + (void) puts("\nJU_DELETEINPLACE_ODD(7):"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT; + PRINTLEAF("Before", ioffset, 7, oleaf7); + JU_DELETEINPLACE_ODD(oleaf7, INDEXES, ioffset, 7); + PrintLeaf("After ", ioffset, 7, oleaf7); + } +#endif // JU_64BIT + + (void) puts("\nJU_INSERTCOPY():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, WSIZE, (uint8_t *) eleaf); + PRINTLEAF("Before, dest", ioffset, WSIZE, (uint8_t *) eleaf_2); + JU_INSERTCOPY(eleaf_2, eleaf, INDEXES, ioffset, eIndex); + PRINTLEAF("After, src ", ioffset, WSIZE, (uint8_t *) eleaf); + PrintLeaf("After, dest", ioffset, WSIZE, (uint8_t *) eleaf_2); + } + + (void) puts("\nJU_INSERTCOPY3():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, 3, oleaf3); + PRINTLEAF("Before, dest", ioffset, 3, oleaf3_2); + JU_INSERTCOPY3(oleaf3_2, oleaf3, INDEXES, ioffset, oIndex); + PRINTLEAF("After, src ", ioffset, 3, oleaf3); + PrintLeaf("After, dest", ioffset, 3, oleaf3_2); + } + +#ifdef JU_64BIT + (void) puts("\nJU_INSERTCOPY5():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, 5, oleaf5); + PRINTLEAF("Before, dest", ioffset, 5, oleaf5_2); + JU_INSERTCOPY5(oleaf5_2, oleaf5, INDEXES, ioffset, oIndex); + PRINTLEAF("After, src ", ioffset, 5, oleaf5); + PrintLeaf("After, dest", ioffset, 5, oleaf5_2); + } + + (void) puts("\nJU_INSERTCOPY6():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, 6, oleaf6); + PRINTLEAF("Before, dest", ioffset, 6, oleaf6_2); + JU_INSERTCOPY6(oleaf6_2, oleaf6, INDEXES, ioffset, oIndex); + PRINTLEAF("After, src ", ioffset, 6, oleaf6); + PrintLeaf("After, dest", ioffset, 6, oleaf6_2); + } + + (void) puts("\nJU_INSERTCOPY7():"); + + for (ioffset = 0; ioffset <= INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, 7, oleaf7); + PRINTLEAF("Before, dest", ioffset, 7, oleaf7_2); + JU_INSERTCOPY7(oleaf7_2, oleaf7, INDEXES, ioffset, oIndex); + PRINTLEAF("After, src ", ioffset, 7, oleaf7); + PrintLeaf("After, dest", ioffset, 7, oleaf7_2); + } +#endif // JU_64BIT + + (void) puts("\nJU_DELETECOPY():"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, WSIZE, (uint8_t *) eleaf); + PRINTLEAF("Before, dest", ioffset, WSIZE, (uint8_t *) eleaf_2); + JU_DELETECOPY(eleaf_2, eleaf, INDEXES, ioffset, ignore); + PRINTLEAF("After, src ", ioffset, WSIZE, (uint8_t *) eleaf); + PrintLeaf("After, dest", ioffset, WSIZE, (uint8_t *) eleaf_2); + } + + (void) puts("\nJU_DELETECOPY_ODD(3):"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, 3, oleaf3); + PRINTLEAF("Before, dest", ioffset, 3, oleaf3_2); + JU_DELETECOPY_ODD(oleaf3_2, oleaf3, INDEXES, ioffset, 3); + PRINTLEAF("After, src ", ioffset, 3, oleaf3); + PrintLeaf("After, dest", ioffset, 3, oleaf3_2); + } + +#ifdef JU_64BIT + (void) puts("\nJU_DELETECOPY_ODD(5):"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, 5, oleaf5); + PRINTLEAF("Before, dest", ioffset, 5, oleaf5_2); + JU_DELETECOPY_ODD(oleaf5_2, oleaf5, INDEXES, ioffset, 5); + PRINTLEAF("After, src ", ioffset, 5, oleaf5); + PrintLeaf("After, dest", ioffset, 5, oleaf5_2); + } + + (void) puts("\nJU_DELETECOPY_ODD(6):"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, 6, oleaf6); + PRINTLEAF("Before, dest", ioffset, 6, oleaf6_2); + JU_DELETECOPY_ODD(oleaf6_2, oleaf6, INDEXES, ioffset, 6); + PRINTLEAF("After, src ", ioffset, 6, oleaf6); + PrintLeaf("After, dest", ioffset, 6, oleaf6_2); + } + + (void) puts("\nJU_DELETECOPY_ODD(7):"); + + for (ioffset = 0; ioffset < INDEXES; ++ioffset) + { + INIT2; + PRINTLEAF("Before, src ", ioffset, 7, oleaf7); + PRINTLEAF("Before, dest", ioffset, 7, oleaf7_2); + JU_DELETECOPY_ODD(oleaf7_2, oleaf7, INDEXES, ioffset, 7); + PRINTLEAF("After, src ", ioffset, 7, oleaf7); + PrintLeaf("After, dest", ioffset, 7, oleaf7_2); + } +#endif // JU_64BIT + + return(0); + +} // main() + +#endif // TEST_INSDEL diff --git a/libnetdata/libjudy/src/JudyHS/JudyHS.c b/libnetdata/libjudy/src/JudyHS/JudyHS.c new file mode 100644 index 0000000..21191ba --- /dev/null +++ b/libnetdata/libjudy/src/JudyHS/JudyHS.c @@ -0,0 +1,771 @@ +// @(#) $Revision: 4.1 $ $Source: /judy/src/JudyHS/JudyHS.c +//======================================================================= +// Author Douglas L. Baskins, Dec 2003. +// Permission to use this code is freely granted, provided that this +// statement is retained. +// email - doug@sourcejudy.com -or- dougbaskins@yahoo.com +//======================================================================= + +#include // for memcmp(), memcpy() + +#include // for JudyL* routines/macros + +/* + This routine is a very fast "string" version of an ADT that stores + (JudyHSIns()), retrieves (JudyHSGet()), deletes (JudyHSDel()) and + frees the entire ADT (JudyHSFreeArray()) strings. It uses the "Judy + arrays" JudyL() API as the main workhorse. The length of the string + is included in the calling parameters so that strings with embedded + \0s can be used. The string lengths can be from 0 bytes to whatever + malloc() can handle (~2GB). + + Compile: + + cc -O JudyHS.c -c needs to link with -lJudy (libJudy.a) + + Note: in gcc version 3.3.1, -O2 generates faster code than -O + Note: in gcc version 3.3.2, -O3 generates faster code than -O2 + + NOTES: + +1) There may be some performance issues with 64 bit machines, because I + have not characterized that it yet. + +2) It appears that a modern CPU (>2Ghz) that the instruction times are + much faster that a RAM access, so building up a word from bytes takes + no longer that a whole word access. I am taking advantage of this to + make this code endian neutral. A side effect of this is strings do + not need to be aligned, nor tested to be on to a word boundry. In + older and in slow (RISC) machines, this may be a performance issue. + I have given up trying to optimize for machines that have very slow + mpy, mod, variable shifts and call returns. + +3) JudyHS is very scalable from 1 string to billions (with enough RAM). + The memory usage is also scales with population. I have attempted to + combine the best characteristics of JudyL arrays with Hashing methods + and well designed modern processors (such as the 1.3Ghz Intel + Centrino this is being written on). + + HOW JudyHS WORKS: ( 4[8] means 4 bytes in 32 bit machine and 8 in 64) + + A) A JudyL array is used to separate strings of equal lengths into + their own structures (a different hash table is used for each length + of string). The additional time overhead is very near zero because + of the CPU cache. The space efficiency is improved because the + length need not be stored with the string (ls_t). The "JLHash" ADT + in the test program "StringCompare" is verification of both these + assumptions. + + B) A 32 bit hash value is produced from the string. Many thanks to + the Internet and the author (Bob Jenkins) for coming up with a very + good and fast universal string hash. Next the 32 bit hash number is + used as an Index to another JudyL array. Notice that one (1) JudyL + array is used as a hash table per each string length. If there are + no hash collisions (normally) then the string is copied to a + structure (ls_t) along with room for storing a Value. A flag is + added to the pointer to note it is pointing to a ls_t structure. + Since the lengths of the strings are the same, there is no need to + stored length of string in the ls_t structure. This saves about a + word per string of memory. + + C) When there is a hashing collision (very rare), a JudyL array is + used to decode the next 4[8] bytes of the string. That is, the next + 4[8] bytes of the string are used as the Index. This process is + repeated until the remaining string is unique. The remaining string + (if any) is stored in a (now smaller) ls_t structure. If the + remaining string is less or equal to 4[8] bytes, then the ls_t + structure is not needed and the Value area in the JudyL array is + used. A compile option -DDONOTUSEHASH is available to test this + structure without using hashing (only the JudyL tree is used). This + is equivalent to having all strings hashed to the same bucket. The + speed is still better than all other tree based ADTs I have tested. + An added benefit of this is a very fast "hash collision" resolving. + It could foil hackers that exploit the slow synonym (linked-list) + collision handling property used with most hashing algorithms. If + this is not a necessary property, then a simpler ADT "JLHash" that is + documented the the test program "StringCompare.c" may be used with a + little loss of memory efficiency (because it includes the string + length with the ls_t structure). JudyHS was written to be the + fastest, very scalable, memory efficient, general purpose string ADT + possible. (However, I would like to eat those words someday). (dlb) + +*/ + +#ifdef EXAMPLE_CODE +#include +#include +#include + +#include + +//#include "JudyHS.h" // for Judy.h without JudyHS*() + +// By Doug Baskins Apr 2004 - for JudyHS man page + +#define MAXLINE 1000000 /* max length of line */ +char Index[MAXLINE]; // string to check + +int // Usage: CheckDupLines < file +main() +{ + Pvoid_t PJArray = (PWord_t)NULL; // Judy array. + PWord_t PValue; // ^ Judy array element. + Word_t Bytes; // size of JudyHS array. + Word_t LineNumb = 0; // current line number + Word_t Dups = 0; // number of duplicate lines + + while (fgets(Index, MAXLINE, stdin) != (char *)NULL) + { + LineNumb++; // line number + +// store string into array + JHSI(PValue, PJArray, Index, strlen(Index)); + if (*PValue) // check if duplicate + { + Dups++; // count duplicates + printf("Duplicate lines %lu:%lu:%s", *PValue, LineNumb, Index); + } + else + { + *PValue = LineNumb; // store Line number + } + } + printf("%lu Duplicates, free JudyHS array of %lu Lines\n", + Dups, LineNumb - Dups); + JHSFA(Bytes, PJArray); // free array + printf("The JudyHS array allocated %lu bytes of memory\n", Bytes); + return (0); +} +#endif // EXAMPLE_CODE + +// Note: Use JLAP_INVALID, which is non-zero, to mark pointers to a ls_t +// This makes it compatable with previous versions of JudyL() + +#define IS_PLS(PLS) (((Word_t) (PLS)) & JLAP_INVALID) +#define CLEAR_PLS(PLS) (((Word_t) (PLS)) & (~JLAP_INVALID)) +#define SET_PLS(PLS) (((Word_t) (PLS)) | JLAP_INVALID) + +#define WORDSIZE (sizeof(Word_t)) + +// this is the struct used for "leaf" strings. Note that +// the Value is followed by a "variable" length ls_String array. +// +typedef struct L_EAFSTRING +{ + Word_t ls_Value; // Value area (cannot change size) + uint8_t ls_String[WORDSIZE]; // to fill out to a Word_t size +} ls_t , *Pls_t; + +#define LS_STRUCTOVD (sizeof(ls_t) - WORDSIZE) + +// Calculate size of ls_t including the string of length of LEN. +// +#define LS_WORDLEN(LEN) (((LEN) + LS_STRUCTOVD + WORDSIZE - 1) / WORDSIZE) + +// Copy from 0..4[8] bytes from string to a Word_t +// NOTE: the copy in in little-endian order to take advantage of improved +// memory efficiency of JudyLIns() with smaller numbers +// +#define COPYSTRING4toWORD(WORD,STR,LEN) \ +{ \ + WORD = 0; \ + switch(LEN) \ + { \ + default: /* four and greater */ \ + case 4: \ + WORD += (Word_t)(((uint8_t *)(STR))[3] << 24); \ + case 3: \ + WORD += (Word_t)(((uint8_t *)(STR))[2] << 16); \ + case 2: \ + WORD += (Word_t)(((uint8_t *)(STR))[1] << 8); \ + case 1: \ + WORD += (Word_t)(((uint8_t *)(STR))[0]); \ + case 0: break; \ + } \ +} + +#ifdef JU_64BIT + +// copy from 0..8 bytes from string to Word_t +// +#define COPYSTRING8toWORD(WORD,STR,LEN) \ +{ \ + WORD = 0UL; \ + switch(LEN) \ + { \ + default: /* eight and greater */ \ + case 8: \ + WORD += ((Word_t)((uint8_t *)(STR))[7] << 56); \ + case 7: \ + WORD += ((Word_t)((uint8_t *)(STR))[6] << 48); \ + case 6: \ + WORD += ((Word_t)((uint8_t *)(STR))[5] << 40); \ + case 5: \ + WORD += ((Word_t)((uint8_t *)(STR))[4] << 32); \ + case 4: \ + WORD += ((Word_t)((uint8_t *)(STR))[3] << 24); \ + case 3: \ + WORD += ((Word_t)((uint8_t *)(STR))[2] << 16); \ + case 2: \ + WORD += ((Word_t)((uint8_t *)(STR))[1] << 8); \ + case 1: \ + WORD += ((Word_t)((uint8_t *)(STR))[0]); \ + case 0: break; \ + } \ +} + +#define COPYSTRINGtoWORD COPYSTRING8toWORD + +#else // JU_32BIT + +#define COPYSTRINGtoWORD COPYSTRING4toWORD + +#endif // JU_32BIT + +// set JError_t locally + +#define JU_SET_ERRNO(PJERROR, JERRNO) \ +{ \ + if (PJERROR != (PJError_t) NULL) \ + { \ + if (JERRNO) \ + JU_ERRNO(PJError) = (JERRNO); \ + JU_ERRID(PJERROR) = __LINE__; \ + } \ +} + +//======================================================================= +// This routine must hash string to 24..32 bits. The "goodness" of +// the hash is not as important as its speed. +//======================================================================= + +// hash to no more than 32 bits + +// extern Word_t gHmask; for hash bits experiments + +#define JUDYHASHSTR(HVALUE,STRING,LENGTH) \ +{ \ + uint8_t *p_ = (uint8_t *)(STRING); \ + uint8_t *q_ = p_ + (LENGTH); \ + uint32_t c_ = 0; \ + for (; p_ != q_; ++p_) \ + { \ + c_ = (c_ * 31) + *p_; \ + } \ +/* c_ &= gHmask; see above */ \ + (HVALUE) = c_; \ +} + +// Find String of Len in JudyHS structure, return pointer to associated Value + +PPvoid_t +JudyHSGet(Pcvoid_t PArray, // pointer (^) to structure + void * Str, // pointer to string + Word_t Len // length of string + ) +{ + uint8_t *String = (uint8_t *)Str; + PPvoid_t PPValue; // pointer to Value + Word_t Index; // 4[8] bytes of String + + JLG(PPValue, PArray, Len); // find hash table for strings of Len + if (PPValue == (PPvoid_t) NULL) + return ((PPvoid_t) NULL); // no strings of this Len + +// check for caller error (null pointer) +// + if ((String == (void *) NULL) && (Len != 0)) + return ((PPvoid_t) NULL); // avoid null-pointer dereference + +#ifndef DONOTUSEHASH + if (Len > WORDSIZE) // Hash table not necessary with short + { + uint32_t HValue; // hash of input string + JUDYHASHSTR(HValue, String, Len); // hash to no more than 32 bits + JLG(PPValue, *PPValue, (Word_t)HValue); // get ^ to hash bucket + if (PPValue == (PPvoid_t) NULL) + return ((PPvoid_t) NULL); // no entry in Hash table + } +#endif // DONOTUSEHASH + +/* + Each JudyL array decodes 4[8] bytes of the string. Since the hash + collisions occur very infrequently, the performance is not important. + However, even if the Hash code is not used this method still is + significantly faster than common tree methods (AVL, Red-Black, Splay, + b-tree, etc..). You can compare it yourself with #define DONOTUSEHASH + 1 or putting -DDONOTUSEHASH in the cc line. Use the "StringCompare.c" + code to compare (9Dec2003 dlb). +*/ + while (Len > WORDSIZE) // traverse tree of JudyL arrays + { + if (IS_PLS(*PPValue)) // ^ to JudyL array or ls_t struct? + { + Pls_t Pls; // ls_t struct, termination of tree + Pls = (Pls_t) CLEAR_PLS(*PPValue); // remove flag from ^ + +// if remaining string matches, return ^ to Value, else NULL + + if (memcmp(String, Pls->ls_String, Len) == 0) + return ((PPvoid_t) (&(Pls->ls_Value))); + else + return ((PPvoid_t) NULL); // string does not match + } + else + { + COPYSTRINGtoWORD(Index, String, WORDSIZE); + + JLG(PPValue, *PPValue, Index); // decode next 4[8] bytes + if (PPValue == (PPvoid_t) NULL) // if NULL array, bail out + return ((PPvoid_t) NULL); // string does not match + + String += WORDSIZE; // advance + Len -= WORDSIZE; + } + } + +// Get remaining 1..4[8] bytes left in string + + COPYSTRINGtoWORD(Index, String, Len); + JLG(PPValue, *PPValue, Index); // decode last 1-4[8] bytes + return (PPValue); +} + +// Add string to a tree of JudyL arrays (all lengths must be same) + +static PPvoid_t +insStrJudyLTree(uint8_t * String, // string to add to tree of JudyL arrays + Word_t Len, // length of string + PPvoid_t PPValue, // pointer to root pointer + PJError_t PJError // for returning error info + ) +{ + Word_t Index; // next 4[8] bytes of String + + while (Len > WORDSIZE) // add to JudyL tree + { +// CASE 1, pointer is to a NULL, make a new ls_t leaf + + if (*PPValue == (Pvoid_t)NULL) + { + Pls_t Pls; // memory for a ls_t + Pls = (Pls_t) JudyMalloc(LS_WORDLEN(Len)); + if (Pls == NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NOMEM); + return (PPJERR); + } + Pls->ls_Value = 0; // clear Value word + memcpy(Pls->ls_String, String, Len); // copy to new struct + *PPValue = (Pvoid_t)SET_PLS(Pls); // mark pointer + return ((PPvoid_t) (&Pls->ls_Value)); // return ^ to Value + } // no exit here +// CASE 2: is a ls_t, free (and shorten), then decode into JudyL tree + + if (IS_PLS(*PPValue)) // pointer to a ls_t? (leaf) + { + Pls_t Pls; // ^ to ls_t + uint8_t *String0; // ^ to string in ls_t + Word_t Index0; // 4[8] bytes in string + Word_t FreeLen; // length of ls_t + PPvoid_t PPsplit; + + FreeLen = LS_WORDLEN(Len); // length of ls_t + + Pls = (Pls_t) CLEAR_PLS(*PPValue); // demangle ^ to ls_t + String0 = Pls->ls_String; + if (memcmp(String, String0, Len) == 0) // check if match? + { + return ((PPvoid_t) (&Pls->ls_Value)); // yes, duplicate + } + + *PPValue = NULL; // clear ^ to ls_t and make JudyL + +// This do loop is technically not required, saves multiple JudyFree() +// when storing already sorted strings into structure + + do // decode next 4[8] bytes of string + { // with a JudyL array +// Note: string0 is always aligned + + COPYSTRINGtoWORD(Index0, String0, WORDSIZE); + String0 += WORDSIZE; + COPYSTRINGtoWORD(Index, String, WORDSIZE); + String += WORDSIZE; + Len -= WORDSIZE; + PPsplit = PPValue; // save for split below + PPValue = JudyLIns(PPValue, Index0, PJError); + if (PPValue == PPJERR) + { + JU_SET_ERRNO(PJError, 0); + return (PPJERR); + } + + } while ((Index0 == Index) && (Len > WORDSIZE)); + +// finish storing remainder of string that was in the ls_t + + PPValue = insStrJudyLTree(String0, Len, PPValue, PJError); + if (PPValue == PPJERR) + { + return (PPJERR); + } +// copy old Value to Value in new struct + + *(PWord_t)PPValue = Pls->ls_Value; + +// free the string buffer (ls_t) + + JudyFree((Pvoid_t)Pls, FreeLen); + PPValue = JudyLIns(PPsplit, Index, PJError); + if (PPValue == PPJERR) + { + JU_SET_ERRNO(PJError, 0); + return (PPValue); + } + +// finish remainder of newly inserted string + + PPValue = insStrJudyLTree(String, Len, PPValue, PJError); + return (PPValue); + } // no exit here +// CASE 3, more JudyL arrays, decode to next tree + + COPYSTRINGtoWORD(Index, String, WORDSIZE); + Len -= WORDSIZE; + String += WORDSIZE; + + PPValue = JudyLIns(PPValue, Index, PJError); // next 4[8] bytes + if (PPValue == PPJERR) + { + JU_SET_ERRNO(PJError, 0); + return (PPValue); + } + } +// this is done outside of loop so "Len" can be an unsigned number + + COPYSTRINGtoWORD(Index, String, Len); + PPValue = JudyLIns(PPValue, Index, PJError); // remaining 4[8] bytes + + return (PPValue); +} + + +// Insert string to JudyHS structure, return pointer to associated Value + +PPvoid_t +JudyHSIns(PPvoid_t PPArray, // ^ to JudyHashArray name + void * Str, // pointer to string + Word_t Len, // length of string + PJError_t PJError // optional, for returning error info + ) +{ + uint8_t * String = (uint8_t *)Str; + PPvoid_t PPValue; + +// string can only be NULL if Len is 0. + + if ((String == (uint8_t *) NULL) && (Len != 0UL)) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); + return (PPJERR); + } + JLG(PPValue, *PPArray, Len); // JudyL hash table for strings of Len + if (PPValue == (PPvoid_t) NULL) // make new if missing, (very rare) + { + PPValue = JudyLIns(PPArray, Len, PJError); + if (PPValue == PPJERR) + { + JU_SET_ERRNO(PJError, 0); + return (PPJERR); + } + } +#ifndef DONOTUSEHASH + if (Len > WORDSIZE) + { + uint32_t HValue; // hash of input string + JUDYHASHSTR(HValue, String, Len); // hash to no more than 32 bits + PPValue = JudyLIns(PPValue, (Word_t)HValue, PJError); + if (PPValue == PPJERR) + { + JU_SET_ERRNO(PJError, 0); + return (PPJERR); + } + } +#endif // DONOTUSEHASH + + PPValue = insStrJudyLTree(String, Len, PPValue, PJError); // add string + return (PPValue); // ^ to Value +} + +// Delete string from tree of JudyL arrays (all Lens must be same) + +static int +delStrJudyLTree(uint8_t * String, // delete from tree of JudyL arrays + Word_t Len, // length of string + PPvoid_t PPValue, // ^ to hash bucket + PJError_t PJError // for returning error info + ) +{ + PPvoid_t PPValueN; // next pointer + Word_t Index; + int Ret; // -1=failed, 1=success, 2=quit del + + if (IS_PLS(*PPValue)) // is pointer to ls_t? + { + Pls_t Pls; + Pls = (Pls_t) CLEAR_PLS(*PPValue); // demangle pointer + JudyFree((Pvoid_t)Pls, LS_WORDLEN(Len)); // free the ls_t + + *PPValue = (Pvoid_t)NULL; // clean pointer + return (1); // successfully deleted + } + + if (Len > WORDSIZE) // delete from JudyL tree, not leaf + { + COPYSTRINGtoWORD(Index, String, WORDSIZE); // get Index + JLG(PPValueN, *PPValue, Index); // get pointer to next JudyL array + + String += WORDSIZE; // advance to next 4[8] bytes + Len -= WORDSIZE; + + Ret = delStrJudyLTree(String, Len, PPValueN, PJError); + if (Ret != 1) return(Ret); + + if (*PPValueN == (PPvoid_t) NULL) + { +// delete JudyL element from tree + + Ret = JudyLDel(PPValue, Index, PJError); + } + } + else + { + COPYSTRINGtoWORD(Index, String, Len); // get leaf element + +// delete last 1-4[8] bytes from leaf element + + Ret = JudyLDel(PPValue, Index, PJError); + } + return (Ret); +} + +// Delete string from JHS structure + +int +JudyHSDel(PPvoid_t PPArray, // ^ to JudyHashArray struct + void * Str, // pointer to string + Word_t Len, // length of string + PJError_t PJError // optional, for returning error info + ) +{ + uint8_t * String = (uint8_t *)Str; + PPvoid_t PPBucket, PPHtble; + int Ret; // return bool from Delete routine +#ifndef DONOTUSEHASH + uint32_t HValue = 0; // hash value of input string +#endif // DONOTUSEHASH + + if (PPArray == NULL) + return (0); // no pointer, return not found + +// This is a little slower than optimum method, but not much in new CPU +// Verify that string is in the structure -- simplifies future assumptions + + if (JudyHSGet(*PPArray, String, Len) == (PPvoid_t) NULL) + return (0); // string not found, return + +// string is in structure, so testing for absence is not necessary + + JLG(PPHtble, *PPArray, Len); // JudyL hash table for strings of Len + +#ifdef DONOTUSEHASH + PPBucket = PPHtble; // simulate below code +#else // USEHASH + if (Len > WORDSIZE) + { + JUDYHASHSTR(HValue, String, Len); // hash to no more than 32 bits + +// get pointer to hash bucket + + JLG(PPBucket, *PPHtble, (Word_t)HValue); + } + else + { + PPBucket = PPHtble; // no bucket to JLGet + } +#endif // USEHASH + +// delete from JudyL tree +// + Ret = delStrJudyLTree(String, Len, PPBucket, PJError); + if (Ret != 1) + { + JU_SET_ERRNO(PJError, 0); + return(-1); + } +// handle case of missing JudyL array from hash table and length table + + if (*PPBucket == (Pvoid_t)NULL) // if JudyL tree gone + { +#ifndef DONOTUSEHASH + if (Len > WORDSIZE) + { +// delete entry in Hash table + + Ret = JudyLDel(PPHtble, (Word_t)HValue, PJError); + if (Ret != 1) + { + JU_SET_ERRNO(PJError, 0); + return(-1); + } + } +#endif // USEHASH + if (*PPHtble == (PPvoid_t) NULL) // if Hash table gone + { +// delete entry from the String length table + + Ret = JudyLDel(PPArray, Len, PJError); + if (Ret != 1) + { + JU_SET_ERRNO(PJError, 0); + return(-1); + } + } + } + return (1); // success +} + +static Word_t +delJudyLTree(PPvoid_t PPValue, // ^ to JudyL root pointer + Word_t Len, // length of string + PJError_t PJError) // for returning error info +{ + Word_t bytes_freed = 0; // bytes freed at point + Word_t bytes_total = 0; // accumulated bytes freed + PPvoid_t PPValueN; + +// Pointer is to another tree of JudyL arrays or ls_t struct + + if (Len > WORDSIZE) // more depth to tree + { + Word_t NEntry; + +// Pointer is to a ls_t struct + + if (IS_PLS(*PPValue)) + { + Pls_t Pls; + Word_t freewords; + + freewords = LS_WORDLEN(Len); // calculate length + Pls = (Pls_t)CLEAR_PLS(*PPValue); // demangle pointer + +// *PPValue = (Pvoid_t)NULL; // clean pointer + JudyFree((Pvoid_t)Pls, freewords); // free the ls_t + + return(freewords * WORDSIZE); + } +// else +// Walk all the entrys in the JudyL array + + NEntry = 0; // start at beginning + for (PPValueN = JudyLFirst(*PPValue, &NEntry, PJError); + (PPValueN != (PPvoid_t) NULL) && (PPValueN != PPJERR); + PPValueN = JudyLNext(*PPValue, &NEntry, PJError)) + { +// recurse to the next level in the tree of arrays + + bytes_freed = delJudyLTree(PPValueN, Len - WORDSIZE, PJError); + if (bytes_freed == JERR) return(JERR); + bytes_total += bytes_freed; + } + if (PPValueN == PPJERR) return(JERR); + +// now free this JudyL array + + bytes_freed = JudyLFreeArray(PPValue, PJError); + if (bytes_freed == JERR) return(JERR); + bytes_total += bytes_freed; + + return(bytes_total); // return amount freed + } +// else + +// Pointer to simple JudyL array + + bytes_freed = JudyLFreeArray(PPValue, PJError); + + return(bytes_freed); +} + + +Word_t // bytes freed +JudyHSFreeArray(PPvoid_t PPArray, // ^ to JudyHashArray struct + PJError_t PJError // optional, for returning error info + ) +{ + Word_t Len; // start at beginning + Word_t bytes_freed; // bytes freed at this level. + Word_t bytes_total; // bytes total at all levels. + PPvoid_t PPHtble; + + if (PPArray == NULL) + return (0); // no pointer, return none + +// Walk the string length table for subsidary hash structs +// NOTE: This is necessary to determine the depth of the tree + + bytes_freed = 0; + bytes_total = 0; + Len = 0; // walk to length table + + for (PPHtble = JudyLFirst(*PPArray, &Len, PJError); + (PPHtble != (PPvoid_t) NULL) && (PPHtble != PPJERR); + PPHtble = JudyLNext(*PPArray, &Len, PJError)) + { + PPvoid_t PPValueH; + +#ifndef DONOTUSEHASH + if (Len > WORDSIZE) + { + Word_t HEntry = 0; // walk the hash tables + + for (PPValueH = JudyLFirst(*PPHtble, &HEntry, PJError); + (PPValueH != (PPvoid_t) NULL) && (PPValueH != PPJERR); + PPValueH = JudyLNext(*PPHtble, &HEntry, PJError)) + { + bytes_freed = delJudyLTree(PPValueH, Len, PJError); + if (bytes_freed == JERR) return(JERR); + bytes_total += bytes_freed; + } + + if (PPValueH == PPJERR) return(JERR); + +// free the Hash table for this length of string + + bytes_freed = JudyLFreeArray(PPHtble, PJError); + if (bytes_freed == JERR) return(JERR); + bytes_total += bytes_freed; + } + else +#endif // DONOTUSEHASH + { + PPValueH = PPHtble; // simulate hash table + + bytes_freed = delJudyLTree(PPValueH, Len, PJError); + if (bytes_freed == JERR) return(JERR); + bytes_total += bytes_freed; + } + } + if (PPHtble == PPJERR) return(JERR); + +// free the length table + + bytes_freed = JudyLFreeArray(PPArray, PJError); + if (bytes_freed == JERR) return(JERR); + + bytes_total += bytes_freed; + + return(bytes_total); // return bytes freed +} diff --git a/libnetdata/libjudy/src/JudyL/JudyL.h b/libnetdata/libjudy/src/JudyL/JudyL.h new file mode 100644 index 0000000..d901969 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyL.h @@ -0,0 +1,505 @@ +#ifndef _JUDYL_INCLUDED +#define _JUDYL_INCLUDED +// _________________ +// +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.41 $ $Source: /judy/src/JudyL/JudyL.h $ + +// **************************************************************************** +// JUDYL -- SMALL/LARGE AND/OR CLUSTERED/SPARSE ARRAYS +// +// -by- +// +// Douglas L. Baskins +// doug@sourcejudy.com +// +// Judy arrays are designed to be used instead of arrays. The performance +// suggests the reason why Judy arrays are thought of as arrays, instead of +// trees. They are remarkably memory efficient at all populations. +// Implemented as a hybrid digital tree (but really a state machine, see +// below), Judy arrays feature fast insert/retrievals, fast near neighbor +// searching, and contain a population tree for extremely fast ordinal related +// retrievals. +// +// CONVENTIONS: +// +// - The comments here refer to 32-bit [64-bit] systems. +// +// - BranchL, LeafL refer to linear branches and leaves (small populations), +// except LeafL does not actually appear as such; rather, Leaf1..3 [Leaf1..7] +// is used to represent leaf Index sizes, and LeafW refers to a Leaf with +// full (Long) word Indexes, which is also a type of linear leaf. Note that +// root-level LeafW (Leaf4 [Leaf8]) leaves are called LEAFW. +// +// - BranchB, LeafB1 refer to bitmap branches and leaves (intermediate +// populations). +// +// - BranchU refers to uncompressed branches. An uncompressed branch has 256 +// JPs, some of which could be null. Note: All leaves are compressed (and +// sorted), or else an expanse is full (FullPopu), so there is no LeafU +// equivalent to BranchU. +// +// - "Popu" is short for "Population". +// - "Pop1" refers to actual population (base 1). +// - "Pop0" refers to Pop1 - 1 (base 0), the way populations are stored in data +// structures. +// +// - Branches and Leaves are both named by the number of bytes in their Pop0 +// field. In the case of Leaves, the same number applies to the Index sizes. +// +// - The representation of many numbers as hex is a relatively safe and +// portable way to get desired bitpatterns as unsigned longs. +// +// - Some preprocessors cant handle single apostrophe characters within +// #ifndef code, so here, delete all instead. + + +#include "JudyPrivate.h" // includes Judy.h in turn. +#include "JudyPrivateBranch.h" // support for branches. + + +// **************************************************************************** +// JUDYL ROOT POINTER (JRP) AND JUDYL POINTER (JP) TYPE FIELDS +// **************************************************************************** + +typedef enum // uint8_t -- but C does not support this type of enum. +{ + +// JP NULL TYPES: +// +// There is a series of cJL_JPNULL* Types because each one pre-records a +// different Index Size for when the first Index is inserted in the previously +// null JP. They must start >= 8 (three bits). +// +// Note: These Types must be in sequential order for doing relative +// calculations between them. + + cJL_JPNULL1 = 1, + // Index Size 1[1] byte when 1 Index inserted. + cJL_JPNULL2, // Index Size 2[2] bytes when 1 Index inserted. + cJL_JPNULL3, // Index Size 3[3] bytes when 1 Index inserted. + +#ifndef JU_64BIT +#define cJL_JPNULLMAX cJL_JPNULL3 +#else + cJL_JPNULL4, // Index Size 4[4] bytes when 1 Index inserted. + cJL_JPNULL5, // Index Size 5[5] bytes when 1 Index inserted. + cJL_JPNULL6, // Index Size 6[6] bytes when 1 Index inserted. + cJL_JPNULL7, // Index Size 7[7] bytes when 1 Index inserted. +#define cJL_JPNULLMAX cJL_JPNULL7 +#endif + + +// JP BRANCH TYPES: +// +// Note: There are no state-1 branches; only leaves reside at state 1. + +// Linear branches: +// +// Note: These Types must be in sequential order for doing relative +// calculations between them. + + cJL_JPBRANCH_L2, // 2[2] bytes Pop0, 1[5] bytes Dcd. + cJL_JPBRANCH_L3, // 3[3] bytes Pop0, 0[4] bytes Dcd. + +#ifdef JU_64BIT + cJL_JPBRANCH_L4, // [4] bytes Pop0, [3] bytes Dcd. + cJL_JPBRANCH_L5, // [5] bytes Pop0, [2] bytes Dcd. + cJL_JPBRANCH_L6, // [6] bytes Pop0, [1] byte Dcd. + cJL_JPBRANCH_L7, // [7] bytes Pop0, [0] bytes Dcd. +#endif + + cJL_JPBRANCH_L, // note: DcdPopO field not used. + +// Bitmap branches: +// +// Note: These Types must be in sequential order for doing relative +// calculations between them. + + cJL_JPBRANCH_B2, // 2[2] bytes Pop0, 1[5] bytes Dcd. + cJL_JPBRANCH_B3, // 3[3] bytes Pop0, 0[4] bytes Dcd. + +#ifdef JU_64BIT + cJL_JPBRANCH_B4, // [4] bytes Pop0, [3] bytes Dcd. + cJL_JPBRANCH_B5, // [5] bytes Pop0, [2] bytes Dcd. + cJL_JPBRANCH_B6, // [6] bytes Pop0, [1] byte Dcd. + cJL_JPBRANCH_B7, // [7] bytes Pop0, [0] bytes Dcd. +#endif + + cJL_JPBRANCH_B, // note: DcdPopO field not used. + +// Uncompressed branches: +// +// Note: These Types must be in sequential order for doing relative +// calculations between them. + + cJL_JPBRANCH_U2, // 2[2] bytes Pop0, 1[5] bytes Dcd. + cJL_JPBRANCH_U3, // 3[3] bytes Pop0, 0[4] bytes Dcd. + +#ifdef JU_64BIT + cJL_JPBRANCH_U4, // [4] bytes Pop0, [3] bytes Dcd. + cJL_JPBRANCH_U5, // [5] bytes Pop0, [2] bytes Dcd. + cJL_JPBRANCH_U6, // [6] bytes Pop0, [1] byte Dcd. + cJL_JPBRANCH_U7, // [7] bytes Pop0, [0] bytes Dcd. +#endif + + cJL_JPBRANCH_U, // note: DcdPopO field not used. + + +// JP LEAF TYPES: + +// Linear leaves: +// +// Note: These Types must be in sequential order for doing relative +// calculations between them. +// +// Note: There is no full-word (4-byte [8-byte]) Index leaf under a JP because +// non-root-state leaves only occur under branches that decode at least one +// byte. Full-word, root-state leaves are under a JRP, not a JP. However, in +// the code a "fake" JP can be created temporarily above a root-state leaf. + + cJL_JPLEAF1, // 1[1] byte Pop0, 2 bytes Dcd. + cJL_JPLEAF2, // 2[2] bytes Pop0, 1[5] bytes Dcd. + cJL_JPLEAF3, // 3[3] bytes Pop0, 0[4] bytes Dcd. + +#ifdef JU_64BIT + cJL_JPLEAF4, // [4] bytes Pop0, [3] bytes Dcd. + cJL_JPLEAF5, // [5] bytes Pop0, [2] bytes Dcd. + cJL_JPLEAF6, // [6] bytes Pop0, [1] byte Dcd. + cJL_JPLEAF7, // [7] bytes Pop0, [0] bytes Dcd. +#endif + +// Bitmap leaf; Index Size == 1: +// +// Note: These are currently only supported at state 1. At other states the +// bitmap would grow from 256 to 256^2, 256^3, ... bits, which would not be +// efficient.. + + cJL_JPLEAF_B1, // 1[1] byte Pop0, 2[6] bytes Dcd. + +// Full population; Index Size == 1 virtual leaf: +// +// Note: JudyL has no cJL_JPFULLPOPU1 equivalent to cJ1_JPFULLPOPU1, because +// in the JudyL case this could result in a values-only leaf of up to 256 words +// (value areas) that would be slow to insert/delete. + + +// JP IMMEDIATES; leaves (Indexes) stored inside a JP: +// +// The second numeric suffix is the Pop1 for each type. As the Index Size +// increases, the maximum possible population decreases. +// +// Note: These Types must be in sequential order in each group (Index Size), +// and the groups in correct order too, for doing relative calculations between +// them. For example, since these Types enumerate the Pop1 values (unlike +// other JP Types where there is a Pop0 value in the JP), the maximum Pop1 for +// each Index Size is computable. +// +// All enums equal or above this point are cJL_JPIMMEDs. + + cJL_JPIMMED_1_01, // Index Size = 1, Pop1 = 1. + cJL_JPIMMED_2_01, // Index Size = 2, Pop1 = 1. + cJL_JPIMMED_3_01, // Index Size = 3, Pop1 = 1. + +#ifdef JU_64BIT + cJL_JPIMMED_4_01, // Index Size = 4, Pop1 = 1. + cJL_JPIMMED_5_01, // Index Size = 5, Pop1 = 1. + cJL_JPIMMED_6_01, // Index Size = 6, Pop1 = 1. + cJL_JPIMMED_7_01, // Index Size = 7, Pop1 = 1. +#endif + + cJL_JPIMMED_1_02, // Index Size = 1, Pop1 = 2. + cJL_JPIMMED_1_03, // Index Size = 1, Pop1 = 3. + +#ifdef JU_64BIT + cJL_JPIMMED_1_04, // Index Size = 1, Pop1 = 4. + cJL_JPIMMED_1_05, // Index Size = 1, Pop1 = 5. + cJL_JPIMMED_1_06, // Index Size = 1, Pop1 = 6. + cJL_JPIMMED_1_07, // Index Size = 1, Pop1 = 7. + + cJL_JPIMMED_2_02, // Index Size = 2, Pop1 = 2. + cJL_JPIMMED_2_03, // Index Size = 2, Pop1 = 3. + + cJL_JPIMMED_3_02, // Index Size = 3, Pop1 = 2. +#endif + +// This special Type is merely a sentinel for doing relative calculations. +// This value should not be used in switch statements (to avoid allocating code +// for it), which is also why it appears at the end of the enum list. + + cJL_JPIMMED_CAP + +} jpL_Type_t; + + +// RELATED VALUES: + +// Index Size (state) for leaf JP, and JP type based on Index Size (state): + +#define JL_LEAFINDEXSIZE(jpType) ((jpType) - cJL_JPLEAF1 + 1) +#define JL_LEAFTYPE(IndexSize) ((IndexSize) + cJL_JPLEAF1 - 1) + + +// MAXIMUM POPULATIONS OF LINEAR LEAVES: + +#ifndef JU_64BIT // 32-bit + +#define J_L_MAXB (sizeof(Word_t) * 64) +#define ALLOCSIZES { 3, 5, 7, 11, 15, 23, 32, 47, 64, TERMINATOR } // in words. +#define cJL_LEAF1_MAXWORDS (32) // max Leaf1 size in words. + +// Note: cJL_LEAF1_MAXPOP1 is chosen such that the index portion is less than +// 32 bytes -- the number of bytes the index takes in a bitmap leaf. + +#define cJL_LEAF1_MAXPOP1 \ + ((cJL_LEAF1_MAXWORDS * cJU_BYTESPERWORD)/(1 + cJU_BYTESPERWORD)) +#define cJL_LEAF2_MAXPOP1 (J_L_MAXB / (2 + cJU_BYTESPERWORD)) +#define cJL_LEAF3_MAXPOP1 (J_L_MAXB / (3 + cJU_BYTESPERWORD)) +#define cJL_LEAFW_MAXPOP1 \ + ((J_L_MAXB - cJU_BYTESPERWORD) / (2 * cJU_BYTESPERWORD)) + +#else // 64-bit + +#define J_L_MAXB (sizeof(Word_t) * 64) +#define ALLOCSIZES { 3, 5, 7, 11, 15, 23, 32, 47, 64, TERMINATOR } // in words. +#define cJL_LEAF1_MAXWORDS (15) // max Leaf1 size in words. + +#define cJL_LEAF1_MAXPOP1 \ + ((cJL_LEAF1_MAXWORDS * cJU_BYTESPERWORD)/(1 + cJU_BYTESPERWORD)) +#define cJL_LEAF2_MAXPOP1 (J_L_MAXB / (2 + cJU_BYTESPERWORD)) +#define cJL_LEAF3_MAXPOP1 (J_L_MAXB / (3 + cJU_BYTESPERWORD)) +#define cJL_LEAF4_MAXPOP1 (J_L_MAXB / (4 + cJU_BYTESPERWORD)) +#define cJL_LEAF5_MAXPOP1 (J_L_MAXB / (5 + cJU_BYTESPERWORD)) +#define cJL_LEAF6_MAXPOP1 (J_L_MAXB / (6 + cJU_BYTESPERWORD)) +#define cJL_LEAF7_MAXPOP1 (J_L_MAXB / (7 + cJU_BYTESPERWORD)) +#define cJL_LEAFW_MAXPOP1 \ + ((J_L_MAXB - cJU_BYTESPERWORD) / (2 * cJU_BYTESPERWORD)) + +#endif // 64-bit + + +// MAXIMUM POPULATIONS OF IMMEDIATE JPs: +// +// These specify the maximum Population of immediate JPs with various Index +// Sizes (== sizes of remaining undecoded Index bits). Since the JP Types enum +// already lists all the immediates in order by state and size, calculate these +// values from it to avoid redundancy. + +#define cJL_IMMED1_MAXPOP1 ((cJU_BYTESPERWORD - 1) / 1) // 3 [7]. +#define cJL_IMMED2_MAXPOP1 ((cJU_BYTESPERWORD - 1) / 2) // 1 [3]. +#define cJL_IMMED3_MAXPOP1 ((cJU_BYTESPERWORD - 1) / 3) // 1 [2]. + +#ifdef JU_64BIT +#define cJL_IMMED4_MAXPOP1 ((cJU_BYTESPERWORD - 1) / 4) // [1]. +#define cJL_IMMED5_MAXPOP1 ((cJU_BYTESPERWORD - 1) / 5) // [1]. +#define cJL_IMMED6_MAXPOP1 ((cJU_BYTESPERWORD - 1) / 6) // [1]. +#define cJL_IMMED7_MAXPOP1 ((cJU_BYTESPERWORD - 1) / 7) // [1]. +#endif + + +// **************************************************************************** +// JUDYL LEAF BITMAP (JLLB) SUPPORT +// **************************************************************************** +// +// Assemble bitmap leaves out of smaller units that put bitmap subexpanses +// close to their associated pointers. Why not just use a bitmap followed by a +// series of pointers? (See 4.27.) Turns out this wastes a cache fill on +// systems with smaller cache lines than the assumed value cJU_WORDSPERCL. + +#define JL_JLB_BITMAP(Pjlb, Subexp) ((Pjlb)->jLlb_jLlbs[Subexp].jLlbs_Bitmap) +#define JL_JLB_PVALUE(Pjlb, Subexp) ((Pjlb)->jLlb_jLlbs[Subexp].jLlbs_PValue) + +typedef struct J__UDYL_LEAF_BITMAP_SUBEXPANSE +{ + BITMAPL_t jLlbs_Bitmap; + Pjv_t jLlbs_PValue; + +} jLlbs_t; + +typedef struct J__UDYL_LEAF_BITMAP +{ + jLlbs_t jLlb_jLlbs[cJU_NUMSUBEXPL]; + +} jLlb_t, * PjLlb_t; + +// Words per bitmap leaf: + +#define cJL_WORDSPERLEAFB1 (sizeof(jLlb_t) / cJU_BYTESPERWORD) + + +// **************************************************************************** +// MEMORY ALLOCATION SUPPORT +// **************************************************************************** + +// ARRAY-GLOBAL INFORMATION: +// +// At the cost of an occasional additional cache fill, this object, which is +// pointed at by a JRP and in turn points to a JP_BRANCH*, carries array-global +// information about a JudyL array that has sufficient population to amortize +// the cost. The jpm_Pop0 field prevents having to add up the total population +// for the array in insert, delete, and count code. The jpm_JP field prevents +// having to build a fake JP for entry to a state machine; however, the +// jp_DcdPopO field in jpm_JP, being one byte too small, is not used. +// +// Note: Struct fields are ordered to keep "hot" data in the first 8 words +// (see left-margin comments) for machines with 8-word cache lines, and to keep +// sub-word fields together for efficient packing. + +typedef struct J_UDYL_POPULATION_AND_MEMORY +{ +/* 1 */ Word_t jpm_Pop0; // total population-1 in array. +/* 2 */ jp_t jpm_JP; // JP to first branch; see above. +/* 4 */ Word_t jpm_LastUPop0; // last jpm_Pop0 when convert to BranchU +/* 7 */ Pjv_t jpm_PValue; // pointer to value to return. +// Note: Field names match PJError_t for convenience in macros: +/* 8 */ char je_Errno; // one of the enums in Judy.h. +/* 8/9 */ int je_ErrID; // often an internal source line number. +/* 9/10 */ Word_t jpm_TotalMemWords; // words allocated in array. +} jLpm_t, *PjLpm_t; + + +// TABLES FOR DETERMINING IF LEAVES HAVE ROOM TO GROW: +// +// These tables indicate if a given memory chunk can support growth of a given +// object into wasted (rounded-up) memory in the chunk. Note: This violates +// the hiddenness of the JudyMalloc code. + +extern const uint8_t j__L_Leaf1PopToWords[cJL_LEAF1_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf2PopToWords[cJL_LEAF2_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf3PopToWords[cJL_LEAF3_MAXPOP1 + 1]; +#ifdef JU_64BIT +extern const uint8_t j__L_Leaf4PopToWords[cJL_LEAF4_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf5PopToWords[cJL_LEAF5_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf6PopToWords[cJL_LEAF6_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf7PopToWords[cJL_LEAF7_MAXPOP1 + 1]; +#endif +extern const uint8_t j__L_LeafWPopToWords[cJL_LEAFW_MAXPOP1 + 1]; +extern const uint8_t j__L_LeafVPopToWords[]; + +// These tables indicate where value areas start: + +extern const uint8_t j__L_Leaf1Offset [cJL_LEAF1_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf2Offset [cJL_LEAF2_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf3Offset [cJL_LEAF3_MAXPOP1 + 1]; +#ifdef JU_64BIT +extern const uint8_t j__L_Leaf4Offset [cJL_LEAF4_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf5Offset [cJL_LEAF5_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf6Offset [cJL_LEAF6_MAXPOP1 + 1]; +extern const uint8_t j__L_Leaf7Offset [cJL_LEAF7_MAXPOP1 + 1]; +#endif +extern const uint8_t j__L_LeafWOffset [cJL_LEAFW_MAXPOP1 + 1]; + +// Also define macros to hide the details in the code using these tables. + +#define JL_LEAF1GROWINPLACE(Pop1) \ + J__U_GROWCK(Pop1, cJL_LEAF1_MAXPOP1, j__L_Leaf1PopToWords) +#define JL_LEAF2GROWINPLACE(Pop1) \ + J__U_GROWCK(Pop1, cJL_LEAF2_MAXPOP1, j__L_Leaf2PopToWords) +#define JL_LEAF3GROWINPLACE(Pop1) \ + J__U_GROWCK(Pop1, cJL_LEAF3_MAXPOP1, j__L_Leaf3PopToWords) +#ifdef JU_64BIT +#define JL_LEAF4GROWINPLACE(Pop1) \ + J__U_GROWCK(Pop1, cJL_LEAF4_MAXPOP1, j__L_Leaf4PopToWords) +#define JL_LEAF5GROWINPLACE(Pop1) \ + J__U_GROWCK(Pop1, cJL_LEAF5_MAXPOP1, j__L_Leaf5PopToWords) +#define JL_LEAF6GROWINPLACE(Pop1) \ + J__U_GROWCK(Pop1, cJL_LEAF6_MAXPOP1, j__L_Leaf6PopToWords) +#define JL_LEAF7GROWINPLACE(Pop1) \ + J__U_GROWCK(Pop1, cJL_LEAF7_MAXPOP1, j__L_Leaf7PopToWords) +#endif +#define JL_LEAFWGROWINPLACE(Pop1) \ + J__U_GROWCK(Pop1, cJL_LEAFW_MAXPOP1, j__L_LeafWPopToWords) +#define JL_LEAFVGROWINPLACE(Pop1) \ + J__U_GROWCK(Pop1, cJU_BITSPERSUBEXPL, j__L_LeafVPopToWords) + +#define JL_LEAF1VALUEAREA(Pjv,Pop1) (((PWord_t)(Pjv)) + j__L_Leaf1Offset[Pop1]) +#define JL_LEAF2VALUEAREA(Pjv,Pop1) (((PWord_t)(Pjv)) + j__L_Leaf2Offset[Pop1]) +#define JL_LEAF3VALUEAREA(Pjv,Pop1) (((PWord_t)(Pjv)) + j__L_Leaf3Offset[Pop1]) +#ifdef JU_64BIT +#define JL_LEAF4VALUEAREA(Pjv,Pop1) (((PWord_t)(Pjv)) + j__L_Leaf4Offset[Pop1]) +#define JL_LEAF5VALUEAREA(Pjv,Pop1) (((PWord_t)(Pjv)) + j__L_Leaf5Offset[Pop1]) +#define JL_LEAF6VALUEAREA(Pjv,Pop1) (((PWord_t)(Pjv)) + j__L_Leaf6Offset[Pop1]) +#define JL_LEAF7VALUEAREA(Pjv,Pop1) (((PWord_t)(Pjv)) + j__L_Leaf7Offset[Pop1]) +#endif +#define JL_LEAFWVALUEAREA(Pjv,Pop1) (((PWord_t)(Pjv)) + j__L_LeafWOffset[Pop1]) + +#define JL_LEAF1POPTOWORDS(Pop1) (j__L_Leaf1PopToWords[Pop1]) +#define JL_LEAF2POPTOWORDS(Pop1) (j__L_Leaf2PopToWords[Pop1]) +#define JL_LEAF3POPTOWORDS(Pop1) (j__L_Leaf3PopToWords[Pop1]) +#ifdef JU_64BIT +#define JL_LEAF4POPTOWORDS(Pop1) (j__L_Leaf4PopToWords[Pop1]) +#define JL_LEAF5POPTOWORDS(Pop1) (j__L_Leaf5PopToWords[Pop1]) +#define JL_LEAF6POPTOWORDS(Pop1) (j__L_Leaf6PopToWords[Pop1]) +#define JL_LEAF7POPTOWORDS(Pop1) (j__L_Leaf7PopToWords[Pop1]) +#endif +#define JL_LEAFWPOPTOWORDS(Pop1) (j__L_LeafWPopToWords[Pop1]) +#define JL_LEAFVPOPTOWORDS(Pop1) (j__L_LeafVPopToWords[Pop1]) + + +// FUNCTIONS TO ALLOCATE OBJECTS: + +PjLpm_t j__udyLAllocJLPM(void); // constant size. + +Pjbl_t j__udyLAllocJBL( PjLpm_t); // constant size. +Pjbb_t j__udyLAllocJBB( PjLpm_t); // constant size. +Pjp_t j__udyLAllocJBBJP(Word_t, PjLpm_t); +Pjbu_t j__udyLAllocJBU( PjLpm_t); // constant size. + +Pjll_t j__udyLAllocJLL1( Word_t, PjLpm_t); +Pjll_t j__udyLAllocJLL2( Word_t, PjLpm_t); +Pjll_t j__udyLAllocJLL3( Word_t, PjLpm_t); + +#ifdef JU_64BIT +Pjll_t j__udyLAllocJLL4( Word_t, PjLpm_t); +Pjll_t j__udyLAllocJLL5( Word_t, PjLpm_t); +Pjll_t j__udyLAllocJLL6( Word_t, PjLpm_t); +Pjll_t j__udyLAllocJLL7( Word_t, PjLpm_t); +#endif + +Pjlw_t j__udyLAllocJLW( Word_t ); // no PjLpm_t needed. +PjLlb_t j__udyLAllocJLB1( PjLpm_t); // constant size. +Pjv_t j__udyLAllocJV( Word_t, PjLpm_t); + + +// FUNCTIONS TO FREE OBJECTS: + +void j__udyLFreeJLPM( PjLpm_t, PjLpm_t); // constant size. + +void j__udyLFreeJBL( Pjbl_t, PjLpm_t); // constant size. +void j__udyLFreeJBB( Pjbb_t, PjLpm_t); // constant size. +void j__udyLFreeJBBJP(Pjp_t, Word_t, PjLpm_t); +void j__udyLFreeJBU( Pjbu_t, PjLpm_t); // constant size. + +void j__udyLFreeJLL1( Pjll_t, Word_t, PjLpm_t); +void j__udyLFreeJLL2( Pjll_t, Word_t, PjLpm_t); +void j__udyLFreeJLL3( Pjll_t, Word_t, PjLpm_t); + +#ifdef JU_64BIT +void j__udyLFreeJLL4( Pjll_t, Word_t, PjLpm_t); +void j__udyLFreeJLL5( Pjll_t, Word_t, PjLpm_t); +void j__udyLFreeJLL6( Pjll_t, Word_t, PjLpm_t); +void j__udyLFreeJLL7( Pjll_t, Word_t, PjLpm_t); +#endif + +void j__udyLFreeJLW( Pjlw_t, Word_t, PjLpm_t); +void j__udyLFreeJLB1( PjLlb_t, PjLpm_t); // constant size. +void j__udyLFreeJV( Pjv_t, Word_t, PjLpm_t); +void j__udyLFreeSM( Pjp_t, PjLpm_t); // everything below Pjp. + +#endif // ! _JUDYL_INCLUDED diff --git a/libnetdata/libjudy/src/JudyL/JudyLByCount.c b/libnetdata/libjudy/src/JudyL/JudyLByCount.c new file mode 100644 index 0000000..c5a0047 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLByCount.c @@ -0,0 +1,954 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.28 $ $Source: /judy/src/JudyCommon/JudyByCount.c $ +// +// Judy*ByCount() function for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// Compile with -DNOSMARTJBB, -DNOSMARTJBU, and/or -DNOSMARTJLB to build a +// version with cache line optimizations deleted, for testing. +// +// Judy*ByCount() is a conceptual although not literal inverse of Judy*Count(). +// Judy*Count() takes a pair of Indexes, and allows finding the ordinal of a +// given Index (that is, its position in the list of valid indexes from the +// beginning) as a degenerate case, because in general the count between two +// Indexes, inclusive, is not always just the difference in their ordinals. +// However, it suffices for Judy*ByCount() to simply be an ordinal-to-Index +// mapper. +// +// Note: Like Judy*Count(), this code must "count sideways" in branches, which +// can result in a lot of cache line fills. However, unlike Judy*Count(), this +// code does not receive a specific Index, hence digit, where to start in each +// branch, so it cant accurately calculate cache line fills required in each +// direction. The best it can do is an approximation based on the total +// population of the expanse (pop1 from Pjp) and the ordinal of the target +// Index (see SETOFFSET()) within the expanse. +// +// Compile with -DSMARTMETRICS to obtain global variables containing smart +// cache line metrics. Note: Dont turn this on simultaneously for this file +// and JudyCount.c because they export the same globals. +// **************************************************************************** + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +// These are imported from JudyCount.c: +// +// TBD: Should this be in common code? Exported from a header file? + +#ifdef JUDY1 +extern Word_t j__udy1JPPop1(const Pjp_t Pjp); +#define j__udyJPPop1 j__udy1JPPop1 +#else +extern Word_t j__udyLJPPop1(const Pjp_t Pjp); +#define j__udyJPPop1 j__udyLJPPop1 +#endif + +// Avoid duplicate symbols since this file is multi-compiled: + +#ifdef SMARTMETRICS +#ifdef JUDY1 +Word_t jbb_upward = 0; // counts of directions taken: +Word_t jbb_downward = 0; +Word_t jbu_upward = 0; +Word_t jbu_downward = 0; +Word_t jlb_upward = 0; +Word_t jlb_downward = 0; +#else +extern Word_t jbb_upward; +extern Word_t jbb_downward; +extern Word_t jbu_upward; +extern Word_t jbu_downward; +extern Word_t jlb_upward; +extern Word_t jlb_downward; +#endif +#endif + + +// **************************************************************************** +// J U D Y 1 B Y C O U N T +// J U D Y L B Y C O U N T +// +// See the manual entry. + +#ifdef JUDY1 +FUNCTION int Judy1ByCount +#else +FUNCTION PPvoid_t JudyLByCount +#endif + ( + Pcvoid_t PArray, // root pointer to first branch/leaf in SM. + Word_t Count, // ordinal of Index to find, 1..MAX. + Word_t * PIndex, // to return found Index. + PJError_t PJError // optional, for returning error info. + ) +{ + Word_t Count0; // Count, base-0, to match pop0. + Word_t state; // current state in SM. + Word_t pop1; // of current branch or leaf, or of expanse. + Word_t pop1lower; // pop1 of expanses (JPs) below that for Count. + Word_t digit; // current word in branch. + Word_t jpcount; // JPs in a BranchB subexpanse. + long jpnum; // JP number in a branch (base 0). + long subexp; // for stepping through layer 1 (subexpanses). + int offset; // index ordinal within a leaf, base 0. + + Pjp_t Pjp; // current JP in branch. + Pjll_t Pjll; // current Judy linear leaf. + + +// CHECK FOR EMPTY ARRAY OR NULL PINDEX: + + if (PArray == (Pvoid_t) NULL) JU_RET_NOTFOUND; + + if (PIndex == (PWord_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + +// Convert Count to Count0; assume special case of Count = 0 maps to ~0, as +// desired, to represent the last index in a full array: +// +// Note: Think of Count0 as a reliable "number of Indexes below the target." + + Count0 = Count - 1; + assert((Count || Count0 == ~0)); // ensure CPU is sane about 0 - 1. + pop1lower = 0; + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + + if (Count0 > Pjlw[0]) JU_RET_NOTFOUND; // too high. + + *PIndex = Pjlw[Count]; // Index, base 1. + + JU_RET_FOUND_LEAFW(Pjlw, Pjlw[0] + 1, Count0); + } + else + { + Pjpm_t Pjpm = P_JPM(PArray); + + if (Count0 > (Pjpm->jpm_Pop0)) JU_RET_NOTFOUND; // too high. + + Pjp = &(Pjpm->jpm_JP); + pop1 = (Pjpm->jpm_Pop0) + 1; + +// goto SMByCount; + } + +// COMMON CODE: +// +// Prepare to handle a root-level or lower-level branch: Save the current +// state, obtain the total population for the branch in a state-dependent way, +// and then branch to common code for multiple cases. +// +// For root-level branches, the state is always cJU_ROOTSTATE, and the array +// population must already be set in pop1; it is not available in jp_DcdPopO. +// +// Note: The total population is only needed in cases where the common code +// "counts down" instead of up to minimize cache line fills. However, its +// available cheaply, and its better to do it with a constant shift (constant +// state value) instead of a variable shift later "when needed". + +#define PREPB_ROOT(Next) \ + state = cJU_ROOTSTATE; \ + goto Next + +// Use PREPB_DCD() to first copy the Dcd bytes to *PIndex if there are any +// (only if state < cJU_ROOTSTATE - 1): + +#define PREPB_DCD(Pjp,cState,Next) \ + JU_SETDCD(*PIndex, Pjp, cState); \ + PREPB((Pjp), cState, Next) + +#define PREPB(Pjp,cState,Next) \ + state = (cState); \ + pop1 = JU_JPBRANCH_POP0(Pjp, (cState)) + 1; \ + goto Next + +// Calculate whether the ordinal of an Index within a given expanse falls in +// the lower or upper half of the expanses population, taking care with +// unsigned math and boundary conditions: +// +// Note: Assume the ordinal falls within the expanses population, that is, +// 0 < (Count - Pop1lower) <= Pop1exp (assuming infinite math). +// +// Note: If the ordinal is the middle element, it doesnt matter whether +// LOWERHALF() is TRUE or FALSE. + +#define LOWERHALF(Count0,Pop1lower,Pop1exp) \ + (((Count0) - (Pop1lower)) < ((Pop1exp) / 2)) + +// Calculate the (signed) offset within a leaf to the desired ordinal (Count - +// Pop1lower; offset is one less), and optionally ensure its in range: + +#define SETOFFSET(Offset,Count0,Pop1lower,Pjp) \ + (Offset) = (Count0) - (Pop1lower); \ + assert((Offset) >= 0); \ + assert((Offset) <= JU_JPLEAF_POP0(Pjp)) + +// Variations for immediate indexes, with and without pop1-specific assertions: + +#define SETOFFSET_IMM_CK(Offset,Count0,Pop1lower,cPop1) \ + (Offset) = (Count0) - (Pop1lower); \ + assert((Offset) >= 0); \ + assert((Offset) < (cPop1)) + +#define SETOFFSET_IMM(Offset,Count0,Pop1lower) \ + (Offset) = (Count0) - (Pop1lower) + + +// STATE MACHINE -- TRAVERSE TREE: +// +// In branches, look for the expanse (digit), if any, where the total pop1 +// below or at that expanse would meet or exceed Count, meaning the Index must +// be in this expanse. + +SMByCount: // return here for next branch/leaf. + + switch (JU_JPTYPE(Pjp)) + { + + +// ---------------------------------------------------------------------------- +// LINEAR BRANCH; count populations in JPs in the JBL upwards until finding the +// expanse (digit) containing Count, and "recurse". +// +// Note: There are no null JPs in a JBL; watch out for pop1 == 0. +// +// Note: A JBL should always fit in one cache line => no need to count up +// versus down to save cache line fills. +// +// TBD: The previous is no longer true. Consider enhancing this code to count +// up/down, but it can wait for a later tuning phase. In the meantime, PREPB() +// sets pop1 for the whole array, but that value is not used here. 001215: +// Maybe its true again? + + case cJU_JPBRANCH_L2: PREPB_DCD(Pjp, 2, BranchL); +#ifndef JU_64BIT + case cJU_JPBRANCH_L3: PREPB( Pjp, 3, BranchL); +#else + case cJU_JPBRANCH_L3: PREPB_DCD(Pjp, 3, BranchL); + case cJU_JPBRANCH_L4: PREPB_DCD(Pjp, 4, BranchL); + case cJU_JPBRANCH_L5: PREPB_DCD(Pjp, 5, BranchL); + case cJU_JPBRANCH_L6: PREPB_DCD(Pjp, 6, BranchL); + case cJU_JPBRANCH_L7: PREPB( Pjp, 7, BranchL); +#endif + case cJU_JPBRANCH_L: PREPB_ROOT( BranchL); + { + Pjbl_t Pjbl; + +// Common code (state-independent) for all cases of linear branches: + +BranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + + for (jpnum = 0; jpnum < (Pjbl->jbl_NumJPs); ++jpnum) + { + if ((pop1 = j__udyJPPop1((Pjbl->jbl_jp) + jpnum)) + == cJU_ALLONES) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + assert(pop1 != 0); + +// Warning: pop1lower and pop1 are unsigned, so do not subtract 1 and compare +// >=, but instead use the following expression: + + if (pop1lower + pop1 > Count0) // Index is in this expanse. + { + JU_SETDIGIT(*PIndex, Pjbl->jbl_Expanse[jpnum], state); + Pjp = (Pjbl->jbl_jp) + jpnum; + goto SMByCount; // look under this expanse. + } + + pop1lower += pop1; // add this JPs pop1. + } + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // should never get here. + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // case cJU_JPBRANCH_L + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH; count populations in JPs in the JBB upwards or downwards +// until finding the expanse (digit) containing Count, and "recurse". +// +// Note: There are no null JPs in a JBB; watch out for pop1 == 0. + + case cJU_JPBRANCH_B2: PREPB_DCD(Pjp, 2, BranchB); +#ifndef JU_64BIT + case cJU_JPBRANCH_B3: PREPB( Pjp, 3, BranchB); +#else + case cJU_JPBRANCH_B3: PREPB_DCD(Pjp, 3, BranchB); + case cJU_JPBRANCH_B4: PREPB_DCD(Pjp, 4, BranchB); + case cJU_JPBRANCH_B5: PREPB_DCD(Pjp, 5, BranchB); + case cJU_JPBRANCH_B6: PREPB_DCD(Pjp, 6, BranchB); + case cJU_JPBRANCH_B7: PREPB( Pjp, 7, BranchB); +#endif + case cJU_JPBRANCH_B: PREPB_ROOT( BranchB); + { + Pjbb_t Pjbb; + +// Common code (state-independent) for all cases of bitmap branches: + +BranchB: + Pjbb = P_JBB(Pjp->jp_Addr); + +// Shorthand for one subexpanse in a bitmap and for one JP in a bitmap branch: +// +// Note: BMPJP0 exists separately to support assertions. + +#define BMPJP0(Subexp) (P_JP(JU_JBB_PJP(Pjbb, Subexp))) +#define BMPJP(Subexp,JPnum) (BMPJP0(Subexp) + (JPnum)) + + +// Common code for descending through a JP: +// +// Determine the digit for the expanse and save it in *PIndex; then "recurse". + +#define JBB_FOUNDEXPANSE \ + { \ + JU_BITMAPDIGITB(digit, subexp, JU_JBB_BITMAP(Pjbb,subexp), jpnum); \ + JU_SETDIGIT(*PIndex, digit, state); \ + Pjp = BMPJP(subexp, jpnum); \ + goto SMByCount; \ + } + + +#ifndef NOSMARTJBB // enable to turn off smart code for comparison purposes. + +// FIGURE OUT WHICH DIRECTION CAUSES FEWER CACHE LINE FILLS; adding the pop1s +// in JPs upwards, or subtracting the pop1s in JPs downwards: +// +// See header comments about limitations of this for Judy*ByCount(). + +#endif + +// COUNT UPWARD, adding each "below" JPs pop1: + +#ifndef NOSMARTJBB // enable to turn off smart code for comparison purposes. + + if (LOWERHALF(Count0, pop1lower, pop1)) + { +#endif +#ifdef SMARTMETRICS + ++jbb_upward; +#endif + for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) + { + if ((jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb,subexp))) + && (BMPJP0(subexp) == (Pjp_t) NULL)) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // null ptr. + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + +// Note: An empty subexpanse (jpcount == 0) is handled "for free": + + for (jpnum = 0; jpnum < jpcount; ++jpnum) + { + if ((pop1 = j__udyJPPop1(BMPJP(subexp, jpnum))) + == cJU_ALLONES) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + assert(pop1 != 0); + +// Warning: pop1lower and pop1 are unsigned, see earlier comment: + + if (pop1lower + pop1 > Count0) + JBB_FOUNDEXPANSE; // Index is in this expanse. + + pop1lower += pop1; // add this JPs pop1. + } + } +#ifndef NOSMARTJBB // enable to turn off smart code for comparison purposes. + } + + +// COUNT DOWNWARD, subtracting each "above" JPs pop1 from the whole expanses +// pop1: + + else + { +#ifdef SMARTMETRICS + ++jbb_downward; +#endif + pop1lower += pop1; // add whole branch to start. + + for (subexp = cJU_NUMSUBEXPB - 1; subexp >= 0; --subexp) + { + if ((jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp))) + && (BMPJP0(subexp) == (Pjp_t) NULL)) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // null ptr. + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + +// Note: An empty subexpanse (jpcount == 0) is handled "for free": + + for (jpnum = jpcount - 1; jpnum >= 0; --jpnum) + { + if ((pop1 = j__udyJPPop1(BMPJP(subexp, jpnum))) + == cJU_ALLONES) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + assert(pop1 != 0); + +// Warning: pop1lower and pop1 are unsigned, see earlier comment: + + pop1lower -= pop1; + +// Beware unsigned math problems: + + if ((pop1lower == 0) || (pop1lower - 1 < Count0)) + JBB_FOUNDEXPANSE; // Index is in this expanse. + } + } + } +#endif // NOSMARTJBB + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // should never get here. + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // case cJU_JPBRANCH_B + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH; count populations in JPs in the JBU upwards or +// downwards until finding the expanse (digit) containing Count, and "recurse". + + case cJU_JPBRANCH_U2: PREPB_DCD(Pjp, 2, BranchU); +#ifndef JU_64BIT + case cJU_JPBRANCH_U3: PREPB( Pjp, 3, BranchU); +#else + case cJU_JPBRANCH_U3: PREPB_DCD(Pjp, 3, BranchU); + case cJU_JPBRANCH_U4: PREPB_DCD(Pjp, 4, BranchU); + case cJU_JPBRANCH_U5: PREPB_DCD(Pjp, 5, BranchU); + case cJU_JPBRANCH_U6: PREPB_DCD(Pjp, 6, BranchU); + case cJU_JPBRANCH_U7: PREPB( Pjp, 7, BranchU); +#endif + case cJU_JPBRANCH_U: PREPB_ROOT( BranchU); + { + Pjbu_t Pjbu; + +// Common code (state-independent) for all cases of uncompressed branches: + +BranchU: + Pjbu = P_JBU(Pjp->jp_Addr); + +// Common code for descending through a JP: +// +// Save the digit for the expanse in *PIndex, then "recurse". + +#define JBU_FOUNDEXPANSE \ + { \ + JU_SETDIGIT(*PIndex, jpnum, state); \ + Pjp = (Pjbu->jbu_jp) + jpnum; \ + goto SMByCount; \ + } + + +#ifndef NOSMARTJBU // enable to turn off smart code for comparison purposes. + +// FIGURE OUT WHICH DIRECTION CAUSES FEWER CACHE LINE FILLS; adding the pop1s +// in JPs upwards, or subtracting the pop1s in JPs downwards: +// +// See header comments about limitations of this for Judy*ByCount(). + +#endif + +// COUNT UPWARD, simply adding the pop1 of each JP: + +#ifndef NOSMARTJBU // enable to turn off smart code for comparison purposes. + + if (LOWERHALF(Count0, pop1lower, pop1)) + { +#endif +#ifdef SMARTMETRICS + ++jbu_upward; +#endif + + for (jpnum = 0; jpnum < cJU_BRANCHUNUMJPS; ++jpnum) + { + // shortcut, save a function call: + + if ((Pjbu->jbu_jp[jpnum].jp_Type) <= cJU_JPNULLMAX) + continue; + + if ((pop1 = j__udyJPPop1((Pjbu->jbu_jp) + jpnum)) + == cJU_ALLONES) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + assert(pop1 != 0); + +// Warning: pop1lower and pop1 are unsigned, see earlier comment: + + if (pop1lower + pop1 > Count0) + JBU_FOUNDEXPANSE; // Index is in this expanse. + + pop1lower += pop1; // add this JPs pop1. + } +#ifndef NOSMARTJBU // enable to turn off smart code for comparison purposes. + } + + +// COUNT DOWNWARD, subtracting the pop1 of each JP above from the whole +// expanses pop1: + + else + { +#ifdef SMARTMETRICS + ++jbu_downward; +#endif + pop1lower += pop1; // add whole branch to start. + + for (jpnum = cJU_BRANCHUNUMJPS - 1; jpnum >= 0; --jpnum) + { + // shortcut, save a function call: + + if ((Pjbu->jbu_jp[jpnum].jp_Type) <= cJU_JPNULLMAX) + continue; + + if ((pop1 = j__udyJPPop1(Pjbu->jbu_jp + jpnum)) + == cJU_ALLONES) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + assert(pop1 != 0); + +// Warning: pop1lower and pop1 are unsigned, see earlier comment: + + pop1lower -= pop1; + +// Beware unsigned math problems: + + if ((pop1lower == 0) || (pop1lower - 1 < Count0)) + JBU_FOUNDEXPANSE; // Index is in this expanse. + } + } +#endif // NOSMARTJBU + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // should never get here. + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // case cJU_JPBRANCH_U + +// ---------------------------------------------------------------------------- +// LINEAR LEAF: +// +// Return the Index at the proper ordinal (see SETOFFSET()) in the leaf. First +// copy Dcd bytes, if there are any (only if state < cJU_ROOTSTATE - 1), to +// *PIndex. +// +// Note: The preceding branch traversal code MIGHT set pop1 for this expanse +// (linear leaf) as a side-effect, but dont depend on that (for JUDYL, which +// is the only cases that need it anyway). + +#define PREPL_DCD(cState) \ + JU_SETDCD(*PIndex, Pjp, cState); \ + PREPL + +#ifdef JUDY1 +#define PREPL_SETPOP1 // not needed in any cases. +#else +#define PREPL_SETPOP1 pop1 = JU_JPLEAF_POP0(Pjp) + 1 +#endif + +#define PREPL \ + Pjll = P_JLL(Pjp->jp_Addr); \ + PREPL_SETPOP1; \ + SETOFFSET(offset, Count0, pop1lower, Pjp) + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: + + PREPL_DCD(1); + JU_SETDIGIT1(*PIndex, ((uint8_t *) Pjll)[offset]); + JU_RET_FOUND_LEAF1(Pjll, pop1, offset); +#endif + + case cJU_JPLEAF2: + + PREPL_DCD(2); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF2(Pjll, pop1, offset); + +#ifndef JU_64BIT + case cJU_JPLEAF3: + { + Word_t lsb; + PREPL; + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_LEAF3(Pjll, pop1, offset); + } + +#else + case cJU_JPLEAF3: + { + Word_t lsb; + PREPL_DCD(3); + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_LEAF3(Pjll, pop1, offset); + } + + case cJU_JPLEAF4: + + PREPL_DCD(4); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF4(Pjll, pop1, offset); + + case cJU_JPLEAF5: + { + Word_t lsb; + PREPL_DCD(5); + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_LEAF5(Pjll, pop1, offset); + } + + case cJU_JPLEAF6: + { + Word_t lsb; + PREPL_DCD(6); + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_LEAF6(Pjll, pop1, offset); + } + + case cJU_JPLEAF7: + { + Word_t lsb; + PREPL; + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_LEAF7(Pjll, pop1, offset); + } +#endif + + +// ---------------------------------------------------------------------------- +// BITMAP LEAF: +// +// Return the Index at the proper ordinal (see SETOFFSET()) in the leaf by +// counting bits. First copy Dcd bytes (always present since state 1 < +// cJU_ROOTSTATE) to *PIndex. +// +// Note: The preceding branch traversal code MIGHT set pop1 for this expanse +// (bitmap leaf) as a side-effect, but dont depend on that. + + case cJU_JPLEAF_B1: + { + Pjlb_t Pjlb; + + JU_SETDCD(*PIndex, Pjp, 1); + Pjlb = P_JLB(Pjp->jp_Addr); + pop1 = JU_JPLEAF_POP0(Pjp) + 1; + +// COUNT UPWARD, adding the pop1 of each subexpanse: +// +// The entire bitmap should fit in one cache line, but still try to save some +// CPU time by counting the fewest possible number of subexpanses from the +// bitmap. +// +// See header comments about limitations of this for Judy*ByCount(). + +#ifndef NOSMARTJLB // enable to turn off smart code for comparison purposes. + + if (LOWERHALF(Count0, pop1lower, pop1)) + { +#endif +#ifdef SMARTMETRICS + ++jlb_upward; +#endif + for (subexp = 0; subexp < cJU_NUMSUBEXPL; ++subexp) + { + pop1 = j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp)); + +// Warning: pop1lower and pop1 are unsigned, see earlier comment: + + if (pop1lower + pop1 > Count0) + goto LeafB1; // Index is in this subexpanse. + + pop1lower += pop1; // add this subexpanses pop1. + } +#ifndef NOSMARTJLB // enable to turn off smart code for comparison purposes. + } + + +// COUNT DOWNWARD, subtracting each "above" subexpanses pop1 from the whole +// expanses pop1: + + else + { +#ifdef SMARTMETRICS + ++jlb_downward; +#endif + pop1lower += pop1; // add whole leaf to start. + + for (subexp = cJU_NUMSUBEXPL - 1; subexp >= 0; --subexp) + { + pop1lower -= j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp)); + +// Beware unsigned math problems: + + if ((pop1lower == 0) || (pop1lower - 1 < Count0)) + goto LeafB1; // Index is in this subexpanse. + } + } +#endif // NOSMARTJLB + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // should never get here. + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + +// RETURN INDEX FOUND: +// +// Come here with subexp set to the correct subexpanse, and pop1lower set to +// the sum for all lower expanses and subexpanses in the Judy tree. Calculate +// and save in *PIndex the digit corresponding to the ordinal in this +// subexpanse. + +LeafB1: + SETOFFSET(offset, Count0, pop1lower, Pjp); + JU_BITMAPDIGITL(digit, subexp, JU_JLB_BITMAP(Pjlb, subexp), offset); + JU_SETDIGIT1(*PIndex, digit); + JU_RET_FOUND_LEAF_B1(Pjlb, subexp, offset); +// == return((PPvoid_t) (P_JV(JL_JLB_PVALUE(Pjlb, subexp)) + offset)) + + } // case cJU_JPLEAF_B1 + + +#ifdef JUDY1 +// ---------------------------------------------------------------------------- +// FULL POPULATION: +// +// Copy Dcd bytes (always present since state 1 < cJU_ROOTSTATE) to *PIndex, +// then set the appropriate digit for the ordinal (see SETOFFSET()) in the leaf +// as the LSB in *PIndex. + + case cJ1_JPFULLPOPU1: + + JU_SETDCD(*PIndex, Pjp, 1); + SETOFFSET(offset, Count0, pop1lower, Pjp); + assert(offset >= 0); + assert(offset <= cJU_JPFULLPOPU1_POP0); + JU_SETDIGIT1(*PIndex, offset); + JU_RET_FOUND_FULLPOPU1; +#endif + + +// ---------------------------------------------------------------------------- +// IMMEDIATE: +// +// Locate the Index with the proper ordinal (see SETOFFSET()) in the Immediate, +// depending on leaf Index Size and pop1. Note: There are no Dcd bytes in an +// Immediate JP, but in a cJU_JPIMMED_*_01 JP, the field holds the least bytes +// of the immediate Index. + +#define SET_01(cState) JU_SETDIGITS(*PIndex, JU_JPDCDPOP0(Pjp), cState) + + case cJU_JPIMMED_1_01: SET_01(1); goto Imm_01; + case cJU_JPIMMED_2_01: SET_01(2); goto Imm_01; + case cJU_JPIMMED_3_01: SET_01(3); goto Imm_01; +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: SET_01(4); goto Imm_01; + case cJU_JPIMMED_5_01: SET_01(5); goto Imm_01; + case cJU_JPIMMED_6_01: SET_01(6); goto Imm_01; + case cJU_JPIMMED_7_01: SET_01(7); goto Imm_01; +#endif + +Imm_01: + + DBGCODE(SETOFFSET_IMM_CK(offset, Count0, pop1lower, 1);) + JU_RET_FOUND_IMM_01(Pjp); + +// Shorthand for where to find start of Index bytes array: + +#ifdef JUDY1 +#define PJI (Pjp->jp_1Index) +#else +#define PJI (Pjp->jp_LIndex) +#endif + +// Optional code to check the remaining ordinal (see SETOFFSET_IMM()) against +// the Index Size of the Immediate: + +#ifndef DEBUG // simple placeholder: +#define IMM(cPop1,Next) \ + goto Next +#else // extra pop1-specific checking: +#define IMM(cPop1,Next) \ + SETOFFSET_IMM_CK(offset, Count0, pop1lower, cPop1); \ + goto Next +#endif + + case cJU_JPIMMED_1_02: IMM( 2, Imm1); + case cJU_JPIMMED_1_03: IMM( 3, Imm1); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: IMM( 4, Imm1); + case cJU_JPIMMED_1_05: IMM( 5, Imm1); + case cJU_JPIMMED_1_06: IMM( 6, Imm1); + case cJU_JPIMMED_1_07: IMM( 7, Imm1); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: IMM( 8, Imm1); + case cJ1_JPIMMED_1_09: IMM( 9, Imm1); + case cJ1_JPIMMED_1_10: IMM(10, Imm1); + case cJ1_JPIMMED_1_11: IMM(11, Imm1); + case cJ1_JPIMMED_1_12: IMM(12, Imm1); + case cJ1_JPIMMED_1_13: IMM(13, Imm1); + case cJ1_JPIMMED_1_14: IMM(14, Imm1); + case cJ1_JPIMMED_1_15: IMM(15, Imm1); +#endif + +Imm1: SETOFFSET_IMM(offset, Count0, pop1lower); + JU_SETDIGIT1(*PIndex, ((uint8_t *) PJI)[offset]); + JU_RET_FOUND_IMM(Pjp, offset); + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: IMM(2, Imm2); + case cJU_JPIMMED_2_03: IMM(3, Imm2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: IMM(4, Imm2); + case cJ1_JPIMMED_2_05: IMM(5, Imm2); + case cJ1_JPIMMED_2_06: IMM(6, Imm2); + case cJ1_JPIMMED_2_07: IMM(7, Imm2); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) +Imm2: SETOFFSET_IMM(offset, Count0, pop1lower); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: IMM(2, Imm3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: IMM(3, Imm3); + case cJ1_JPIMMED_3_04: IMM(4, Imm3); + case cJ1_JPIMMED_3_05: IMM(5, Imm3); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) +Imm3: + { + Word_t lsb; + SETOFFSET_IMM(offset, Count0, pop1lower); + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_4_02: IMM(2, Imm4); + case cJ1_JPIMMED_4_03: IMM(3, Imm4); + +Imm4: SETOFFSET_IMM(offset, Count0, pop1lower); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); + + case cJ1_JPIMMED_5_02: IMM(2, Imm5); + case cJ1_JPIMMED_5_03: IMM(3, Imm5); + +Imm5: + { + Word_t lsb; + SETOFFSET_IMM(offset, Count0, pop1lower); + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_6_02: IMM(2, Imm6); + +Imm6: + { + Word_t lsb; + SETOFFSET_IMM(offset, Count0, pop1lower); + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_7_02: IMM(2, Imm7); + +Imm7: + { + Word_t lsb; + SETOFFSET_IMM(offset, Count0, pop1lower); + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } +#endif // (JUDY1 && JU_64BIT) + + +// ---------------------------------------------------------------------------- +// UNEXPECTED JP TYPES: + + default: JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // SMByCount switch. + + /*NOTREACHED*/ + +} // Judy1ByCount() / JudyLByCount() diff --git a/libnetdata/libjudy/src/JudyL/JudyLCascade.c b/libnetdata/libjudy/src/JudyL/JudyLCascade.c new file mode 100644 index 0000000..6b52ddf --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLCascade.c @@ -0,0 +1,1942 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.38 $ $Source: /judy/src/JudyCommon/JudyCascade.c $ + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +extern int j__udyCreateBranchL(Pjp_t, Pjp_t, uint8_t *, Word_t, Pvoid_t); +extern int j__udyCreateBranchB(Pjp_t, Pjp_t, uint8_t *, Word_t, Pvoid_t); + +DBGCODE(extern void JudyCheckSorted(Pjll_t Pjll, Word_t Pop1, long IndexSize);) + +static const jbb_t StageJBBZero; // zeroed versions of namesake struct. + +// TBD: There are multiple copies of (some of) these CopyWto3, Copy3toW, +// CopyWto7 and Copy7toW functions in Judy1Cascade.c, JudyLCascade.c, and +// JudyDecascade.c. These static functions should probably be moved to a +// common place, made macros, or something to avoid having four copies. + + +// **************************************************************************** +// __ J U D Y C O P Y X T O W + + +FUNCTION static void j__udyCopy3toW( + PWord_t PDest, + uint8_t * PSrc, + Word_t LeafIndexes) +{ + do + { + JU_COPY3_PINDEX_TO_LONG(*PDest, PSrc); + PSrc += 3; + PDest += 1; + + } while(--LeafIndexes); + +} //j__udyCopy3toW() + + +#ifdef JU_64BIT + +FUNCTION static void j__udyCopy4toW( + PWord_t PDest, + uint32_t * PSrc, + Word_t LeafIndexes) +{ + do { *PDest++ = *PSrc++; + } while(--LeafIndexes); + +} // j__udyCopy4toW() + + +FUNCTION static void j__udyCopy5toW( + PWord_t PDest, + uint8_t * PSrc, + Word_t LeafIndexes) +{ + do + { + JU_COPY5_PINDEX_TO_LONG(*PDest, PSrc); + PSrc += 5; + PDest += 1; + + } while(--LeafIndexes); + +} // j__udyCopy5toW() + + +FUNCTION static void j__udyCopy6toW( + PWord_t PDest, + uint8_t * PSrc, + Word_t LeafIndexes) +{ + do + { + JU_COPY6_PINDEX_TO_LONG(*PDest, PSrc); + PSrc += 6; + PDest += 1; + + } while(--LeafIndexes); + +} // j__udyCopy6toW() + + +FUNCTION static void j__udyCopy7toW( + PWord_t PDest, + uint8_t * PSrc, + Word_t LeafIndexes) +{ + do + { + JU_COPY7_PINDEX_TO_LONG(*PDest, PSrc); + PSrc += 7; + PDest += 1; + + } while(--LeafIndexes); + +} // j__udyCopy7toW() + +#endif // JU_64BIT + + +// **************************************************************************** +// __ J U D Y C O P Y W T O X + + +FUNCTION static void j__udyCopyWto3( + uint8_t * PDest, + PWord_t PSrc, + Word_t LeafIndexes) +{ + do + { + JU_COPY3_LONG_TO_PINDEX(PDest, *PSrc); + PSrc += 1; + PDest += 3; + + } while(--LeafIndexes); + +} // j__udyCopyWto3() + + +#ifdef JU_64BIT + +FUNCTION static void j__udyCopyWto4( + uint8_t * PDest, + PWord_t PSrc, + Word_t LeafIndexes) +{ + uint32_t *PDest32 = (uint32_t *)PDest; + + do + { + *PDest32 = *PSrc; + PSrc += 1; + PDest32 += 1; + } while(--LeafIndexes); + +} // j__udyCopyWto4() + + +FUNCTION static void j__udyCopyWto5( + uint8_t * PDest, + PWord_t PSrc, + Word_t LeafIndexes) +{ + do + { + JU_COPY5_LONG_TO_PINDEX(PDest, *PSrc); + PSrc += 1; + PDest += 5; + + } while(--LeafIndexes); + +} // j__udyCopyWto5() + + +FUNCTION static void j__udyCopyWto6( + uint8_t * PDest, + PWord_t PSrc, + Word_t LeafIndexes) +{ + do + { + JU_COPY6_LONG_TO_PINDEX(PDest, *PSrc); + PSrc += 1; + PDest += 6; + + } while(--LeafIndexes); + +} // j__udyCopyWto6() + + +FUNCTION static void j__udyCopyWto7( + uint8_t * PDest, + PWord_t PSrc, + Word_t LeafIndexes) +{ + do + { + JU_COPY7_LONG_TO_PINDEX(PDest, *PSrc); + PSrc += 1; + PDest += 7; + + } while(--LeafIndexes); + +} // j__udyCopyWto7() + +#endif // JU_64BIT + + +// **************************************************************************** +// COMMON CODE (MACROS): +// +// Free objects in an array of valid JPs, StageJP[ExpCnt] == last one may +// include Immeds, which are ignored. + +#define FREEALLEXIT(ExpCnt,StageJP,Pjpm) \ + { \ + Word_t _expct = (ExpCnt); \ + while (_expct--) j__udyFreeSM(&((StageJP)[_expct]), Pjpm); \ + return(-1); \ + } + +// Clear the array that keeps track of the number of JPs in a subexpanse: + +#define ZEROJP(SubJPCount) \ + { \ + int ii; \ + for (ii = 0; ii < cJU_NUMSUBEXPB; ii++) (SubJPCount[ii]) = 0; \ + } + +// **************************************************************************** +// __ J U D Y S T A G E J B B T O J B B +// +// Create a mallocd BranchB (jbb_t) from a staged BranchB while "splaying" a +// single old leaf. Return -1 if out of memory, otherwise 1. + +static int j__udyStageJBBtoJBB( + Pjp_t PjpLeaf, // JP of leaf being splayed. + Pjbb_t PStageJBB, // temp jbb_t on stack. + Pjp_t PjpArray, // array of JPs to splayed new leaves. + uint8_t * PSubCount, // count of JPs for each subexpanse. + Pjpm_t Pjpm) // the jpm_t for JudyAlloc*(). +{ + Pjbb_t PjbbRaw; // pointer to new bitmap branch. + Pjbb_t Pjbb; + Word_t subexp; + +// Get memory for new BranchB: + + if ((PjbbRaw = j__udyAllocJBB(Pjpm)) == (Pjbb_t) NULL) return(-1); + Pjbb = P_JBB(PjbbRaw); + +// Copy staged BranchB into just-allocated BranchB: + + *Pjbb = *PStageJBB; + +// Allocate the JP subarrays (BJP) for the new BranchB: + + for (subexp = 0; subexp < cJU_NUMSUBEXPB; subexp++) + { + Pjp_t PjpRaw; + Pjp_t Pjp; + Word_t NumJP; // number of JPs in each subexpanse. + + if ((NumJP = PSubCount[subexp]) == 0) continue; // empty. + +// Out of memory, back out previous allocations: + + if ((PjpRaw = j__udyAllocJBBJP(NumJP, Pjpm)) == (Pjp_t) NULL) + { + while(subexp--) + { + if ((NumJP = PSubCount[subexp]) == 0) continue; + + PjpRaw = JU_JBB_PJP(Pjbb, subexp); + j__udyFreeJBBJP(PjpRaw, NumJP, Pjpm); + } + j__udyFreeJBB(PjbbRaw, Pjpm); + return(-1); // out of memory. + } + Pjp = P_JP(PjpRaw); + +// Place the JP subarray pointer in the new BranchB, copy subarray JPs, and +// advance to the next subexpanse: + + JU_JBB_PJP(Pjbb, subexp) = PjpRaw; + JU_COPYMEM(Pjp, PjpArray, NumJP); + PjpArray += NumJP; + + } // for each subexpanse. + +// Change the PjpLeaf from Leaf to BranchB: + + PjpLeaf->jp_Addr = (Word_t) PjbbRaw; + PjpLeaf->jp_Type += cJU_JPBRANCH_B2 - cJU_JPLEAF2; // Leaf to BranchB. + + return(1); + +} // j__udyStageJBBtoJBB() + + +// **************************************************************************** +// __ J U D Y J L L 2 T O J L B 1 +// +// Create a LeafB1 (jlb_t = JLB1) from a Leaf2 (2-byte Indexes and for JudyL, +// Word_t Values). Return NULL if out of memory, else a pointer to the new +// LeafB1. +// +// NOTE: Caller must release the Leaf2 that was passed in. + +FUNCTION static Pjlb_t j__udyJLL2toJLB1( + uint16_t * Pjll, // array of 16-bit indexes. +#ifdef JUDYL + Pjv_t Pjv, // array of associated values. +#endif + Word_t LeafPop1, // number of indexes/values. + Pvoid_t Pjpm) // jpm_t for JudyAlloc*()/JudyFree*(). +{ + Pjlb_t PjlbRaw; + Pjlb_t Pjlb; + int offset; +JUDYLCODE(int subexp;) + +// Allocate the LeafB1: + + if ((PjlbRaw = j__udyAllocJLB1(Pjpm)) == (Pjlb_t) NULL) + return((Pjlb_t) NULL); + Pjlb = P_JLB(PjlbRaw); + +// Copy Leaf2 indexes to LeafB1: + + for (offset = 0; offset < LeafPop1; ++offset) + JU_BITMAPSETL(Pjlb, Pjll[offset]); + +#ifdef JUDYL + +// Build LeafVs from bitmap: + + for (subexp = 0; subexp < cJU_NUMSUBEXPL; ++subexp) + { + struct _POINTER_VALUES + { + Word_t pv_Pop1; // size of value area. + Pjv_t pv_Pjv; // raw pointer to value area. + } pv[cJU_NUMSUBEXPL]; + +// Get the population of the subexpanse, and if any, allocate a LeafV: + + pv[subexp].pv_Pop1 = j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp)); + + if (pv[subexp].pv_Pop1) + { + Pjv_t Pjvnew; + +// TBD: There is an opportunity to put pop == 1 value in pointer: + + pv[subexp].pv_Pjv = j__udyLAllocJV(pv[subexp].pv_Pop1, Pjpm); + +// Upon out of memory, free all previously allocated: + + if (pv[subexp].pv_Pjv == (Pjv_t) NULL) + { + while(subexp--) + { + if (pv[subexp].pv_Pop1) + { + j__udyLFreeJV(pv[subexp].pv_Pjv, pv[subexp].pv_Pop1, + Pjpm); + } + } + j__udyFreeJLB1(PjlbRaw, Pjpm); + return((Pjlb_t) NULL); + } + + Pjvnew = P_JV(pv[subexp].pv_Pjv); + JU_COPYMEM(Pjvnew, Pjv, pv[subexp].pv_Pop1); + Pjv += pv[subexp].pv_Pop1; // advance value pointer. + +// Place raw pointer to value array in bitmap subexpanse: + + JL_JLB_PVALUE(Pjlb, subexp) = pv[subexp].pv_Pjv; + + } // populated subexpanse. + } // each subexpanse. + +#endif // JUDYL + + return(PjlbRaw); // pointer to LeafB1. + +} // j__udyJLL2toJLB1() + + +// **************************************************************************** +// __ J U D Y C A S C A D E 1 +// +// Create bitmap leaf from 1-byte Indexes and Word_t Values. +// +// TBD: There must be a better way. +// +// Only for JudyL 32 bit: (note, unifdef disallows comment on next line) + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + +FUNCTION int j__udyCascade1( + Pjp_t Pjp, + Pvoid_t Pjpm) +{ + Word_t DcdP0; + uint8_t * PLeaf; + Pjlb_t PjlbRaw; + Pjlb_t Pjlb; + Word_t Pop1; + Word_t ii; // temp for loop counter +JUDYLCODE(Pjv_t Pjv;) + + assert(JU_JPTYPE(Pjp) == cJU_JPLEAF1); + assert((JU_JPDCDPOP0(Pjp) & 0xFF) == (cJU_LEAF1_MAXPOP1-1)); + + PjlbRaw = j__udyAllocJLB1(Pjpm); + if (PjlbRaw == (Pjlb_t) NULL) return(-1); + + Pjlb = P_JLB(PjlbRaw); + PLeaf = (uint8_t *) P_JLL(Pjp->jp_Addr); + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + + JUDYLCODE(Pjv = JL_LEAF1VALUEAREA(PLeaf, Pop1);) + +// Copy 1 byte index Leaf to bitmap Leaf + for (ii = 0; ii < Pop1; ii++) JU_BITMAPSETL(Pjlb, PLeaf[ii]); + +#ifdef JUDYL +// Build 8 subexpanse Value leaves from bitmap + for (ii = 0; ii < cJU_NUMSUBEXPL; ii++) + { +// Get number of Indexes in subexpanse + if ((Pop1 = j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, ii)))) + { + Pjv_t PjvnewRaw; // value area of new leaf. + Pjv_t Pjvnew; + + PjvnewRaw = j__udyLAllocJV(Pop1, Pjpm); + if (PjvnewRaw == (Pjv_t) NULL) // out of memory. + { +// Free prevously allocated LeafVs: + while(ii--) + { + if ((Pop1 = j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, ii)))) + { + PjvnewRaw = JL_JLB_PVALUE(Pjlb, ii); + j__udyLFreeJV(PjvnewRaw, Pop1, Pjpm); + } + } +// Free the bitmap leaf + j__udyLFreeJLB1(PjlbRaw,Pjpm); + return(-1); + } + Pjvnew = P_JV(PjvnewRaw); + JU_COPYMEM(Pjvnew, Pjv, Pop1); + + Pjv += Pop1; + JL_JLB_PVALUE(Pjlb, ii) = PjvnewRaw; + } + } +#endif // JUDYL + + DcdP0 = JU_JPDCDPOP0(Pjp) | (PLeaf[0] & cJU_DCDMASK(1)); + JU_JPSETADT(Pjp, (Word_t)PjlbRaw, DcdP0, cJU_JPLEAF_B1); + + return(1); // return success + +} // j__udyCascade1() + +#endif // (!(JUDY1 && JU_64BIT)) + + +// **************************************************************************** +// __ J U D Y C A S C A D E 2 +// +// Entry PLeaf of size LeafPop1 is either compressed or splayed with pointer +// returned in Pjp. Entry Levels sizeof(Word_t) down to level 2. +// +// Splay or compress the 2-byte Index Leaf that Pjp point to. Return *Pjp as a +// (compressed) cJU_LEAFB1 or a cJU_BRANCH_*2 + +FUNCTION int j__udyCascade2( + Pjp_t Pjp, + Pvoid_t Pjpm) +{ + uint16_t * PLeaf; // pointer to leaf, explicit type. + Word_t End, Start; // temporaries. + Word_t ExpCnt; // count of expanses of splay. + Word_t CIndex; // current Index word. +JUDYLCODE(Pjv_t Pjv;) // value area of leaf. + +// Temp staging for parts(Leaves) of newly splayed leaf + jp_t StageJP [cJU_LEAF2_MAXPOP1]; // JPs of new leaves + uint8_t StageExp [cJU_LEAF2_MAXPOP1]; // Expanses of new leaves + uint8_t SubJPCount[cJU_NUMSUBEXPB]; // JPs in each subexpanse + jbb_t StageJBB; // staged bitmap branch + + assert(JU_JPTYPE(Pjp) == cJU_JPLEAF2); + assert((JU_JPDCDPOP0(Pjp) & 0xFFFF) == (cJU_LEAF2_MAXPOP1-1)); + +// Get the address of the Leaf + PLeaf = (uint16_t *) P_JLL(Pjp->jp_Addr); + +// And its Value area + JUDYLCODE(Pjv = JL_LEAF2VALUEAREA(PLeaf, cJU_LEAF2_MAXPOP1);) + +// If Leaf is in 1 expanse -- just compress it to a Bitmap Leaf + + CIndex = PLeaf[0]; + if (!JU_DIGITATSTATE(CIndex ^ PLeaf[cJU_LEAF2_MAXPOP1-1], 2)) + { +// cJU_JPLEAF_B1 + Word_t DcdP0; + Pjlb_t PjlbRaw; + PjlbRaw = j__udyJLL2toJLB1(PLeaf, +#ifdef JUDYL + Pjv, +#endif + cJU_LEAF2_MAXPOP1, Pjpm); + if (PjlbRaw == (Pjlb_t)NULL) return(-1); // out of memory + +// Merge in another Dcd byte because compressing + DcdP0 = (CIndex & cJU_DCDMASK(1)) | JU_JPDCDPOP0(Pjp); + JU_JPSETADT(Pjp, (Word_t)PjlbRaw, DcdP0, cJU_JPLEAF_B1); + + return(1); + } + +// Else in 2+ expanses, splay Leaf into smaller leaves at higher compression + + StageJBB = StageJBBZero; // zero staged bitmap branch + ZEROJP(SubJPCount); + +// Splay the 2 byte index Leaf to 1 byte Index Leaves + for (ExpCnt = Start = 0, End = 1; ; End++) + { +// Check if new expanse or last one + if ( (End == cJU_LEAF2_MAXPOP1) + || + (JU_DIGITATSTATE(CIndex ^ PLeaf[End], 2)) + ) + { +// Build a leaf below the previous expanse +// + Pjp_t PjpJP = StageJP + ExpCnt; + Word_t Pop1 = End - Start; + Word_t expanse = JU_DIGITATSTATE(CIndex, 2); + Word_t subexp = expanse / cJU_BITSPERSUBEXPB; +// +// set the bit that is the current expanse + JU_JBB_BITMAP(&StageJBB, subexp) |= JU_BITPOSMASKB(expanse); +#ifdef SUBEXPCOUNTS + StageJBB.jbb_subPop1[subexp] += Pop1; // pop of subexpanse +#endif +// count number of expanses in each subexpanse + SubJPCount[subexp]++; + +// Save byte expanse of leaf + StageExp[ExpCnt] = JU_DIGITATSTATE(CIndex, 2); + + if (Pop1 == 1) // cJU_JPIMMED_1_01 + { + Word_t DcdP0; + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(1)) | + CIndex; +#ifdef JUDY1 + JU_JPSETADT(PjpJP, 0, DcdP0, cJ1_JPIMMED_1_01); +#else // JUDYL + JU_JPSETADT(PjpJP, Pjv[Start], DcdP0, + cJL_JPIMMED_1_01); +#endif // JUDYL + } + else if (Pop1 <= cJU_IMMED1_MAXPOP1) // bigger + { +// cJL_JPIMMED_1_02..3: JudyL 32 +// cJ1_JPIMMED_1_02..7: Judy1 32 +// cJL_JPIMMED_1_02..7: JudyL 64 +// cJ1_JPIMMED_1_02..15: Judy1 64 +#ifdef JUDYL + Pjv_t PjvnewRaw; // value area of leaf. + Pjv_t Pjvnew; + +// Allocate Value area for Immediate Leaf + PjvnewRaw = j__udyLAllocJV(Pop1, Pjpm); + if (PjvnewRaw == (Pjv_t) NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjvnew = P_JV(PjvnewRaw); + +// Copy to Values to Value Leaf + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); + PjpJP->jp_Addr = (Word_t) PjvnewRaw; + +// Copy to JP as an immediate Leaf + JU_COPYMEM(PjpJP->jp_LIndex, PLeaf + Start, + Pop1); +#else + JU_COPYMEM(PjpJP->jp_1Index, PLeaf + Start, + Pop1); +#endif +// Set Type, Population and Index size + PjpJP->jp_Type = cJU_JPIMMED_1_02 + Pop1 - 2; + } + +// 64Bit Judy1 does not have Leaf1: (note, unifdef disallows comment on next +// line) + +#if (! (defined(JUDY1) && defined(JU_64BIT))) + else if (Pop1 <= cJU_LEAF1_MAXPOP1) // still bigger + { +// cJU_JPLEAF1 + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + +// Get a new Leaf + PjllRaw = j__udyAllocJLL1(Pop1, Pjpm); + if (PjllRaw == (Pjll_t)NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjll = P_JLL(PjllRaw); +#ifdef JUDYL +// Copy to Values to new Leaf + Pjvnew = JL_LEAF1VALUEAREA(Pjll, Pop1); + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); +#endif +// Copy Indexes to new Leaf + JU_COPYMEM((uint8_t *)Pjll, PLeaf+Start, Pop1); + + DBGCODE(JudyCheckSorted(Pjll, Pop1, 1);) + + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(2)) + | + (CIndex & cJU_DCDMASK(2-1)) + | + (Pop1 - 1); + + JU_JPSETADT(PjpJP, (Word_t)PjllRaw, DcdP0, + cJU_JPLEAF1); + } +#endif // (!(JUDY1 && JU_64BIT)) // Not 64Bit Judy1 + + else // biggest + { +// cJU_JPLEAF_B1 + Word_t DcdP0; + Pjlb_t PjlbRaw; + PjlbRaw = j__udyJLL2toJLB1( + PLeaf + Start, +#ifdef JUDYL + Pjv + Start, +#endif + Pop1, Pjpm); + if (PjlbRaw == (Pjlb_t)NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(2)) + | + (CIndex & cJU_DCDMASK(2-1)) + | + (Pop1 - 1); + + JU_JPSETADT(PjpJP, (Word_t)PjlbRaw, DcdP0, + cJU_JPLEAF_B1); + } + ExpCnt++; +// Done? + if (End == cJU_LEAF2_MAXPOP1) break; + +// New Expanse, Start and Count + CIndex = PLeaf[End]; + Start = End; + } + } + +// Now put all the Leaves below a BranchL or BranchB: + if (ExpCnt <= cJU_BRANCHLMAXJPS) // put the Leaves below a BranchL + { + if (j__udyCreateBranchL(Pjp, StageJP, StageExp, ExpCnt, + Pjpm) == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjp->jp_Type = cJU_JPBRANCH_L2; + } + else + { + if (j__udyStageJBBtoJBB(Pjp, &StageJBB, StageJP, SubJPCount, Pjpm) + == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + } + return(1); + +} // j__udyCascade2() + + +// **************************************************************************** +// __ J U D Y C A S C A D E 3 +// +// Return *Pjp as a (compressed) cJU_LEAF2, cJU_BRANCH_L3, cJU_BRANCH_B3. + +FUNCTION int j__udyCascade3( + Pjp_t Pjp, + Pvoid_t Pjpm) +{ + uint8_t * PLeaf; // pointer to leaf, explicit type. + Word_t End, Start; // temporaries. + Word_t ExpCnt; // count of expanses of splay. + Word_t CIndex; // current Index word. +JUDYLCODE(Pjv_t Pjv;) // value area of leaf. + +// Temp staging for parts(Leaves) of newly splayed leaf + jp_t StageJP [cJU_LEAF3_MAXPOP1]; // JPs of new leaves + Word_t StageA [cJU_LEAF3_MAXPOP1]; + uint8_t StageExp [cJU_LEAF3_MAXPOP1]; // Expanses of new leaves + uint8_t SubJPCount[cJU_NUMSUBEXPB]; // JPs in each subexpanse + jbb_t StageJBB; // staged bitmap branch + + assert(JU_JPTYPE(Pjp) == cJU_JPLEAF3); + assert((JU_JPDCDPOP0(Pjp) & 0xFFFFFF) == (cJU_LEAF3_MAXPOP1-1)); + +// Get the address of the Leaf + PLeaf = (uint8_t *) P_JLL(Pjp->jp_Addr); + +// Extract leaf to Word_t and insert-sort Index into it + j__udyCopy3toW(StageA, PLeaf, cJU_LEAF3_MAXPOP1); + +// Get the address of the Leaf and Value area + JUDYLCODE(Pjv = JL_LEAF3VALUEAREA(PLeaf, cJU_LEAF3_MAXPOP1);) + +// If Leaf is in 1 expanse -- just compress it (compare 1st, last & Index) + + CIndex = StageA[0]; + if (!JU_DIGITATSTATE(CIndex ^ StageA[cJU_LEAF3_MAXPOP1-1], 3)) + { + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + +// Alloc a 2 byte Index Leaf + PjllRaw = j__udyAllocJLL2(cJU_LEAF3_MAXPOP1, Pjpm); + if (PjllRaw == (Pjlb_t)NULL) return(-1); // out of memory + + Pjll = P_JLL(PjllRaw); + +// Copy just 2 bytes Indexes to new Leaf +// j__udyCopyWto2((uint16_t *) Pjll, StageA, cJU_LEAF3_MAXPOP1); + JU_COPYMEM ((uint16_t *) Pjll, StageA, cJU_LEAF3_MAXPOP1); +#ifdef JUDYL +// Copy Value area into new Leaf + Pjvnew = JL_LEAF2VALUEAREA(Pjll, cJU_LEAF3_MAXPOP1); + JU_COPYMEM(Pjvnew, Pjv, cJU_LEAF3_MAXPOP1); +#endif + DBGCODE(JudyCheckSorted(Pjll, cJU_LEAF3_MAXPOP1, 2);) + +// Form new JP, Pop0 field is unchanged +// Add in another Dcd byte because compressing + DcdP0 = (CIndex & cJU_DCDMASK(2)) | JU_JPDCDPOP0(Pjp); + + JU_JPSETADT(Pjp, (Word_t) PjllRaw, DcdP0, cJU_JPLEAF2); + + return(1); // Success + } + +// Else in 2+ expanses, splay Leaf into smaller leaves at higher compression + + StageJBB = StageJBBZero; // zero staged bitmap branch + ZEROJP(SubJPCount); + +// Splay the 3 byte index Leaf to 2 byte Index Leaves + for (ExpCnt = Start = 0, End = 1; ; End++) + { +// Check if new expanse or last one + if ( (End == cJU_LEAF3_MAXPOP1) + || + (JU_DIGITATSTATE(CIndex ^ StageA[End], 3)) + ) + { +// Build a leaf below the previous expanse + + Pjp_t PjpJP = StageJP + ExpCnt; + Word_t Pop1 = End - Start; + Word_t expanse = JU_DIGITATSTATE(CIndex, 3); + Word_t subexp = expanse / cJU_BITSPERSUBEXPB; +// +// set the bit that is the current expanse + JU_JBB_BITMAP(&StageJBB, subexp) |= JU_BITPOSMASKB(expanse); +#ifdef SUBEXPCOUNTS + StageJBB.jbb_subPop1[subexp] += Pop1; // pop of subexpanse +#endif +// count number of expanses in each subexpanse + SubJPCount[subexp]++; + +// Save byte expanse of leaf + StageExp[ExpCnt] = JU_DIGITATSTATE(CIndex, 3); + + if (Pop1 == 1) // cJU_JPIMMED_2_01 + { + Word_t DcdP0; + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(2)) | + CIndex; +#ifdef JUDY1 + JU_JPSETADT(PjpJP, 0, DcdP0, cJ1_JPIMMED_2_01); +#else // JUDYL + JU_JPSETADT(PjpJP, Pjv[Start], DcdP0, + cJL_JPIMMED_2_01); +#endif // JUDYL + } +#if (defined(JUDY1) || defined(JU_64BIT)) + else if (Pop1 <= cJU_IMMED2_MAXPOP1) + { +// cJ1_JPIMMED_2_02..3: Judy1 32 +// cJL_JPIMMED_2_02..3: JudyL 64 +// cJ1_JPIMMED_2_02..7: Judy1 64 +#ifdef JUDYL +// Alloc is 1st in case of malloc fail + Pjv_t PjvnewRaw; // value area of new leaf. + Pjv_t Pjvnew; + +// Allocate Value area for Immediate Leaf + PjvnewRaw = j__udyLAllocJV(Pop1, Pjpm); + if (PjvnewRaw == (Pjv_t) NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjvnew = P_JV(PjvnewRaw); + +// Copy to Values to Value Leaf + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); + + PjpJP->jp_Addr = (Word_t) PjvnewRaw; + +// Copy to Index to JP as an immediate Leaf + JU_COPYMEM((uint16_t *) (PjpJP->jp_LIndex), + StageA + Start, Pop1); +#else // JUDY1 + JU_COPYMEM((uint16_t *) (PjpJP->jp_1Index), + StageA + Start, Pop1); +#endif // JUDY1 +// Set Type, Population and Index size + PjpJP->jp_Type = cJU_JPIMMED_2_02 + Pop1 - 2; + } +#endif // (JUDY1 || JU_64BIT) + + else // Make a linear leaf2 + { +// cJU_JPLEAF2 + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + + PjllRaw = j__udyAllocJLL2(Pop1, Pjpm); + if (PjllRaw == (Pjll_t) NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjll = P_JLL(PjllRaw); +#ifdef JUDYL +// Copy to Values to new Leaf + Pjvnew = JL_LEAF2VALUEAREA(Pjll, Pop1); + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); +#endif +// Copy least 2 bytes per Index of Leaf to new Leaf + JU_COPYMEM((uint16_t *) Pjll, StageA+Start, + Pop1); + + DBGCODE(JudyCheckSorted(Pjll, Pop1, 2);) + + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(3)) + | + (CIndex & cJU_DCDMASK(3-1)) + | + (Pop1 - 1); + + JU_JPSETADT(PjpJP, (Word_t)PjllRaw, DcdP0, + cJU_JPLEAF2); + } + ExpCnt++; +// Done? + if (End == cJU_LEAF3_MAXPOP1) break; + +// New Expanse, Start and Count + CIndex = StageA[End]; + Start = End; + } + } + +// Now put all the Leaves below a BranchL or BranchB: + if (ExpCnt <= cJU_BRANCHLMAXJPS) // put the Leaves below a BranchL + { + if (j__udyCreateBranchL(Pjp, StageJP, StageExp, ExpCnt, + Pjpm) == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjp->jp_Type = cJU_JPBRANCH_L3; + } + else + { + if (j__udyStageJBBtoJBB(Pjp, &StageJBB, StageJP, SubJPCount, Pjpm) + == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + } + return(1); + +} // j__udyCascade3() + + +#ifdef JU_64BIT // JudyCascade[4567] + +// **************************************************************************** +// __ J U D Y C A S C A D E 4 +// +// Cascade from a cJU_JPLEAF4 to one of the following: +// 1. if leaf is in 1 expanse: +// compress it into a JPLEAF3 +// 2. if leaf contains multiple expanses: +// create linear or bitmap branch containing +// each new expanse is either a: +// JPIMMED_3_01 branch +// JPIMMED_3_02 branch +// JPLEAF3 + +FUNCTION int j__udyCascade4( + Pjp_t Pjp, + Pvoid_t Pjpm) +{ + uint32_t * PLeaf; // pointer to leaf, explicit type. + Word_t End, Start; // temporaries. + Word_t ExpCnt; // count of expanses of splay. + Word_t CIndex; // current Index word. +JUDYLCODE(Pjv_t Pjv;) // value area of leaf. + +// Temp staging for parts(Leaves) of newly splayed leaf + jp_t StageJP [cJU_LEAF4_MAXPOP1]; // JPs of new leaves + Word_t StageA [cJU_LEAF4_MAXPOP1]; + uint8_t StageExp [cJU_LEAF4_MAXPOP1]; // Expanses of new leaves + uint8_t SubJPCount[cJU_NUMSUBEXPB]; // JPs in each subexpanse + jbb_t StageJBB; // staged bitmap branch + + assert(JU_JPTYPE(Pjp) == cJU_JPLEAF4); + assert((JU_JPDCDPOP0(Pjp) & 0xFFFFFFFF) == (cJU_LEAF4_MAXPOP1-1)); + +// Get the address of the Leaf + PLeaf = (uint32_t *) P_JLL(Pjp->jp_Addr); + +// Extract 4 byte index Leaf to Word_t + j__udyCopy4toW(StageA, PLeaf, cJU_LEAF4_MAXPOP1); + +// Get the address of the Leaf and Value area + JUDYLCODE(Pjv = JL_LEAF4VALUEAREA(PLeaf, cJU_LEAF4_MAXPOP1);) + +// If Leaf is in 1 expanse -- just compress it (compare 1st, last & Index) + + CIndex = StageA[0]; + if (!JU_DIGITATSTATE(CIndex ^ StageA[cJU_LEAF4_MAXPOP1-1], 4)) + { + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new Leaf. + +// Alloc a 3 byte Index Leaf + PjllRaw = j__udyAllocJLL3(cJU_LEAF4_MAXPOP1, Pjpm); + if (PjllRaw == (Pjlb_t)NULL) return(-1); // out of memory + + Pjll = P_JLL(PjllRaw); + +// Copy Index area into new Leaf + j__udyCopyWto3((uint8_t *) Pjll, StageA, cJU_LEAF4_MAXPOP1); +#ifdef JUDYL +// Copy Value area into new Leaf + Pjvnew = JL_LEAF3VALUEAREA(Pjll, cJU_LEAF4_MAXPOP1); + JU_COPYMEM(Pjvnew, Pjv, cJU_LEAF4_MAXPOP1); +#endif + DBGCODE(JudyCheckSorted(Pjll, cJU_LEAF4_MAXPOP1, 3);) + + DcdP0 = JU_JPDCDPOP0(Pjp) | (CIndex & cJU_DCDMASK(3)); + JU_JPSETADT(Pjp, (Word_t)PjllRaw, DcdP0, cJU_JPLEAF3); + + return(1); + } + +// Else in 2+ expanses, splay Leaf into smaller leaves at higher compression + + StageJBB = StageJBBZero; // zero staged bitmap branch + ZEROJP(SubJPCount); + +// Splay the 4 byte index Leaf to 3 byte Index Leaves + for (ExpCnt = Start = 0, End = 1; ; End++) + { +// Check if new expanse or last one + if ( (End == cJU_LEAF4_MAXPOP1) + || + (JU_DIGITATSTATE(CIndex ^ StageA[End], 4)) + ) + { +// Build a leaf below the previous expanse + + Pjp_t PjpJP = StageJP + ExpCnt; + Word_t Pop1 = End - Start; + Word_t expanse = JU_DIGITATSTATE(CIndex, 4); + Word_t subexp = expanse / cJU_BITSPERSUBEXPB; +// +// set the bit that is the current expanse + JU_JBB_BITMAP(&StageJBB, subexp) |= JU_BITPOSMASKB(expanse); +#ifdef SUBEXPCOUNTS + StageJBB.jbb_subPop1[subexp] += Pop1; // pop of subexpanse +#endif +// count number of expanses in each subexpanse + SubJPCount[subexp]++; + +// Save byte expanse of leaf + StageExp[ExpCnt] = JU_DIGITATSTATE(CIndex, 4); + + if (Pop1 == 1) // cJU_JPIMMED_3_01 + { + Word_t DcdP0; + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(3)) | + CIndex; +#ifdef JUDY1 + JU_JPSETADT(PjpJP, 0, DcdP0, cJ1_JPIMMED_3_01); +#else // JUDYL + JU_JPSETADT(PjpJP, Pjv[Start], DcdP0, + cJL_JPIMMED_3_01); +#endif // JUDYL + } + else if (Pop1 <= cJU_IMMED3_MAXPOP1) + { +// cJ1_JPIMMED_3_02 : Judy1 32 +// cJL_JPIMMED_3_02 : JudyL 64 +// cJ1_JPIMMED_3_02..5: Judy1 64 + +#ifdef JUDYL +// Alloc is 1st in case of malloc fail + Pjv_t PjvnewRaw; // value area of new leaf. + Pjv_t Pjvnew; + +// Allocate Value area for Immediate Leaf + PjvnewRaw = j__udyLAllocJV(Pop1, Pjpm); + if (PjvnewRaw == (Pjv_t) NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjvnew = P_JV(PjvnewRaw); + +// Copy to Values to Value Leaf + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); + PjpJP->jp_Addr = (Word_t) PjvnewRaw; + +// Copy to Index to JP as an immediate Leaf + j__udyCopyWto3(PjpJP->jp_LIndex, + StageA + Start, Pop1); +#else + j__udyCopyWto3(PjpJP->jp_1Index, + StageA + Start, Pop1); +#endif +// Set type, population and Index size + PjpJP->jp_Type = cJU_JPIMMED_3_02 + Pop1 - 2; + } + else + { +// cJU_JPLEAF3 + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + + PjllRaw = j__udyAllocJLL3(Pop1, Pjpm); + if (PjllRaw == (Pjll_t)NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjll = P_JLL(PjllRaw); + +// Copy Indexes to new Leaf + j__udyCopyWto3((uint8_t *) Pjll, StageA + Start, + Pop1); +#ifdef JUDYL +// Copy to Values to new Leaf + Pjvnew = JL_LEAF3VALUEAREA(Pjll, Pop1); + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); +#endif + DBGCODE(JudyCheckSorted(Pjll, Pop1, 3);) + + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(4)) + | + (CIndex & cJU_DCDMASK(4-1)) + | + (Pop1 - 1); + + JU_JPSETADT(PjpJP, (Word_t)PjllRaw, DcdP0, + cJU_JPLEAF3); + } + ExpCnt++; +// Done? + if (End == cJU_LEAF4_MAXPOP1) break; + +// New Expanse, Start and Count + CIndex = StageA[End]; + Start = End; + } + } + +// Now put all the Leaves below a BranchL or BranchB: + if (ExpCnt <= cJU_BRANCHLMAXJPS) // put the Leaves below a BranchL + { + if (j__udyCreateBranchL(Pjp, StageJP, StageExp, ExpCnt, + Pjpm) == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjp->jp_Type = cJU_JPBRANCH_L4; + } + else + { + if (j__udyStageJBBtoJBB(Pjp, &StageJBB, StageJP, SubJPCount, Pjpm) + == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + } + return(1); + +} // j__udyCascade4() + + +// **************************************************************************** +// __ J U D Y C A S C A D E 5 +// +// Cascade from a cJU_JPLEAF5 to one of the following: +// 1. if leaf is in 1 expanse: +// compress it into a JPLEAF4 +// 2. if leaf contains multiple expanses: +// create linear or bitmap branch containing +// each new expanse is either a: +// JPIMMED_4_01 branch +// JPLEAF4 + +FUNCTION int j__udyCascade5( + Pjp_t Pjp, + Pvoid_t Pjpm) +{ + uint8_t * PLeaf; // pointer to leaf, explicit type. + Word_t End, Start; // temporaries. + Word_t ExpCnt; // count of expanses of splay. + Word_t CIndex; // current Index word. +JUDYLCODE(Pjv_t Pjv;) // value area of leaf. + +// Temp staging for parts(Leaves) of newly splayed leaf + jp_t StageJP [cJU_LEAF5_MAXPOP1]; // JPs of new leaves + Word_t StageA [cJU_LEAF5_MAXPOP1]; + uint8_t StageExp [cJU_LEAF5_MAXPOP1]; // Expanses of new leaves + uint8_t SubJPCount[cJU_NUMSUBEXPB]; // JPs in each subexpanse + jbb_t StageJBB; // staged bitmap branch + + assert(JU_JPTYPE(Pjp) == cJU_JPLEAF5); + assert((JU_JPDCDPOP0(Pjp) & 0xFFFFFFFFFF) == (cJU_LEAF5_MAXPOP1-1)); + +// Get the address of the Leaf + PLeaf = (uint8_t *) P_JLL(Pjp->jp_Addr); + +// Extract 5 byte index Leaf to Word_t + j__udyCopy5toW(StageA, PLeaf, cJU_LEAF5_MAXPOP1); + +// Get the address of the Leaf and Value area + JUDYLCODE(Pjv = JL_LEAF5VALUEAREA(PLeaf, cJU_LEAF5_MAXPOP1);) + +// If Leaf is in 1 expanse -- just compress it (compare 1st, last & Index) + + CIndex = StageA[0]; + if (!JU_DIGITATSTATE(CIndex ^ StageA[cJU_LEAF5_MAXPOP1-1], 5)) + { + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + +// Alloc a 4 byte Index Leaf + PjllRaw = j__udyAllocJLL4(cJU_LEAF5_MAXPOP1, Pjpm); + if (PjllRaw == (Pjlb_t)NULL) return(-1); // out of memory + + Pjll = P_JLL(PjllRaw); + +// Copy Index area into new Leaf + j__udyCopyWto4((uint8_t *) Pjll, StageA, cJU_LEAF5_MAXPOP1); +#ifdef JUDYL +// Copy Value area into new Leaf + Pjvnew = JL_LEAF4VALUEAREA(Pjll, cJU_LEAF5_MAXPOP1); + JU_COPYMEM(Pjvnew, Pjv, cJU_LEAF5_MAXPOP1); +#endif + DBGCODE(JudyCheckSorted(Pjll, cJU_LEAF5_MAXPOP1, 4);) + + DcdP0 = JU_JPDCDPOP0(Pjp) | (CIndex & cJU_DCDMASK(4)); + JU_JPSETADT(Pjp, (Word_t)PjllRaw, DcdP0, cJU_JPLEAF4); + + return(1); + } + +// Else in 2+ expanses, splay Leaf into smaller leaves at higher compression + + StageJBB = StageJBBZero; // zero staged bitmap branch + ZEROJP(SubJPCount); + +// Splay the 5 byte index Leaf to 4 byte Index Leaves + for (ExpCnt = Start = 0, End = 1; ; End++) + { +// Check if new expanse or last one + if ( (End == cJU_LEAF5_MAXPOP1) + || + (JU_DIGITATSTATE(CIndex ^ StageA[End], 5)) + ) + { +// Build a leaf below the previous expanse + + Pjp_t PjpJP = StageJP + ExpCnt; + Word_t Pop1 = End - Start; + Word_t expanse = JU_DIGITATSTATE(CIndex, 5); + Word_t subexp = expanse / cJU_BITSPERSUBEXPB; +// +// set the bit that is the current expanse + JU_JBB_BITMAP(&StageJBB, subexp) |= JU_BITPOSMASKB(expanse); +#ifdef SUBEXPCOUNTS + StageJBB.jbb_subPop1[subexp] += Pop1; // pop of subexpanse +#endif +// count number of expanses in each subexpanse + SubJPCount[subexp]++; + +// Save byte expanse of leaf + StageExp[ExpCnt] = JU_DIGITATSTATE(CIndex, 5); + + if (Pop1 == 1) // cJU_JPIMMED_4_01 + { + Word_t DcdP0; + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(4)) | + CIndex; +#ifdef JUDY1 + JU_JPSETADT(PjpJP, 0, DcdP0, cJ1_JPIMMED_4_01); +#else // JUDYL + JU_JPSETADT(PjpJP, Pjv[Start], DcdP0, + cJL_JPIMMED_4_01); +#endif // JUDYL + } +#ifdef JUDY1 + else if (Pop1 <= cJ1_IMMED4_MAXPOP1) + { +// cJ1_JPIMMED_4_02..3: Judy1 64 + +// Copy to Index to JP as an immediate Leaf + j__udyCopyWto4(PjpJP->jp_1Index, + StageA + Start, Pop1); + +// Set pointer, type, population and Index size + PjpJP->jp_Type = cJ1_JPIMMED_4_02 + Pop1 - 2; + } +#endif + else + { +// cJU_JPLEAF4 + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + +// Get a new Leaf + PjllRaw = j__udyAllocJLL4(Pop1, Pjpm); + if (PjllRaw == (Pjll_t)NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjll = P_JLL(PjllRaw); + +// Copy Indexes to new Leaf + j__udyCopyWto4((uint8_t *) Pjll, StageA + Start, + Pop1); +#ifdef JUDYL +// Copy to Values to new Leaf + Pjvnew = JL_LEAF4VALUEAREA(Pjll, Pop1); + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); +#endif + DBGCODE(JudyCheckSorted(Pjll, Pop1, 4);) + + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(5)) + | + (CIndex & cJU_DCDMASK(5-1)) + | + (Pop1 - 1); + + JU_JPSETADT(PjpJP, (Word_t)PjllRaw, DcdP0, + cJU_JPLEAF4); + } + ExpCnt++; +// Done? + if (End == cJU_LEAF5_MAXPOP1) break; + +// New Expanse, Start and Count + CIndex = StageA[End]; + Start = End; + } + } + +// Now put all the Leaves below a BranchL or BranchB: + if (ExpCnt <= cJU_BRANCHLMAXJPS) // put the Leaves below a BranchL + { + if (j__udyCreateBranchL(Pjp, StageJP, StageExp, ExpCnt, + Pjpm) == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjp->jp_Type = cJU_JPBRANCH_L5; + } + else + { + if (j__udyStageJBBtoJBB(Pjp, &StageJBB, StageJP, SubJPCount, Pjpm) + == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + } + return(1); + +} // j__udyCascade5() + + +// **************************************************************************** +// __ J U D Y C A S C A D E 6 +// +// Cascade from a cJU_JPLEAF6 to one of the following: +// 1. if leaf is in 1 expanse: +// compress it into a JPLEAF5 +// 2. if leaf contains multiple expanses: +// create linear or bitmap branch containing +// each new expanse is either a: +// JPIMMED_5_01 ... JPIMMED_5_03 branch +// JPIMMED_5_01 branch +// JPLEAF5 + +FUNCTION int j__udyCascade6( + Pjp_t Pjp, + Pvoid_t Pjpm) +{ + uint8_t * PLeaf; // pointer to leaf, explicit type. + Word_t End, Start; // temporaries. + Word_t ExpCnt; // count of expanses of splay. + Word_t CIndex; // current Index word. +JUDYLCODE(Pjv_t Pjv;) // value area of leaf. + +// Temp staging for parts(Leaves) of newly splayed leaf + jp_t StageJP [cJU_LEAF6_MAXPOP1]; // JPs of new leaves + Word_t StageA [cJU_LEAF6_MAXPOP1]; + uint8_t StageExp [cJU_LEAF6_MAXPOP1]; // Expanses of new leaves + uint8_t SubJPCount[cJU_NUMSUBEXPB]; // JPs in each subexpanse + jbb_t StageJBB; // staged bitmap branch + + assert(JU_JPTYPE(Pjp) == cJU_JPLEAF6); + assert((JU_JPDCDPOP0(Pjp) & 0xFFFFFFFFFFFF) == (cJU_LEAF6_MAXPOP1-1)); + +// Get the address of the Leaf + PLeaf = (uint8_t *) P_JLL(Pjp->jp_Addr); + +// Extract 6 byte index Leaf to Word_t + j__udyCopy6toW(StageA, PLeaf, cJU_LEAF6_MAXPOP1); + +// Get the address of the Leaf and Value area + JUDYLCODE(Pjv = JL_LEAF6VALUEAREA(PLeaf, cJU_LEAF6_MAXPOP1);) + +// If Leaf is in 1 expanse -- just compress it (compare 1st, last & Index) + + CIndex = StageA[0]; + if (!JU_DIGITATSTATE(CIndex ^ StageA[cJU_LEAF6_MAXPOP1-1], 6)) + { + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + +// Alloc a 5 byte Index Leaf + PjllRaw = j__udyAllocJLL5(cJU_LEAF6_MAXPOP1, Pjpm); + if (PjllRaw == (Pjlb_t)NULL) return(-1); // out of memory + + Pjll = P_JLL(PjllRaw); + +// Copy Index area into new Leaf + j__udyCopyWto5((uint8_t *) Pjll, StageA, cJU_LEAF6_MAXPOP1); +#ifdef JUDYL +// Copy Value area into new Leaf + Pjvnew = JL_LEAF5VALUEAREA(Pjll, cJU_LEAF6_MAXPOP1); + JU_COPYMEM(Pjvnew, Pjv, cJU_LEAF6_MAXPOP1); +#endif + DBGCODE(JudyCheckSorted(Pjll, cJU_LEAF6_MAXPOP1, 5);) + + DcdP0 = JU_JPDCDPOP0(Pjp) | (CIndex & cJU_DCDMASK(5)); + JU_JPSETADT(Pjp, (Word_t)PjllRaw, DcdP0, cJU_JPLEAF5); + + return(1); + } + +// Else in 2+ expanses, splay Leaf into smaller leaves at higher compression + + StageJBB = StageJBBZero; // zero staged bitmap branch + ZEROJP(SubJPCount); + +// Splay the 6 byte index Leaf to 5 byte Index Leaves + for (ExpCnt = Start = 0, End = 1; ; End++) + { +// Check if new expanse or last one + if ( (End == cJU_LEAF6_MAXPOP1) + || + (JU_DIGITATSTATE(CIndex ^ StageA[End], 6)) + ) + { +// Build a leaf below the previous expanse + + Pjp_t PjpJP = StageJP + ExpCnt; + Word_t Pop1 = End - Start; + Word_t expanse = JU_DIGITATSTATE(CIndex, 6); + Word_t subexp = expanse / cJU_BITSPERSUBEXPB; +// +// set the bit that is the current expanse + JU_JBB_BITMAP(&StageJBB, subexp) |= JU_BITPOSMASKB(expanse); +#ifdef SUBEXPCOUNTS + StageJBB.jbb_subPop1[subexp] += Pop1; // pop of subexpanse +#endif +// count number of expanses in each subexpanse + SubJPCount[subexp]++; + +// Save byte expanse of leaf + StageExp[ExpCnt] = JU_DIGITATSTATE(CIndex, 6); + + if (Pop1 == 1) // cJU_JPIMMED_5_01 + { + Word_t DcdP0; + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(5)) | + CIndex; +#ifdef JUDY1 + JU_JPSETADT(PjpJP, 0, DcdP0, cJ1_JPIMMED_5_01); +#else // JUDYL + JU_JPSETADT(PjpJP, Pjv[Start], DcdP0, + cJL_JPIMMED_5_01); +#endif // JUDYL + } +#ifdef JUDY1 + else if (Pop1 <= cJ1_IMMED5_MAXPOP1) + { +// cJ1_JPIMMED_5_02..3: Judy1 64 + +// Copy to Index to JP as an immediate Leaf + j__udyCopyWto5(PjpJP->jp_1Index, + StageA + Start, Pop1); + +// Set pointer, type, population and Index size + PjpJP->jp_Type = cJ1_JPIMMED_5_02 + Pop1 - 2; + } +#endif + else + { +// cJU_JPLEAF5 + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + +// Get a new Leaf + PjllRaw = j__udyAllocJLL5(Pop1, Pjpm); + if (PjllRaw == (Pjll_t)NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjll = P_JLL(PjllRaw); + +// Copy Indexes to new Leaf + j__udyCopyWto5((uint8_t *) Pjll, StageA + Start, + Pop1); + +// Copy to Values to new Leaf +#ifdef JUDYL + Pjvnew = JL_LEAF5VALUEAREA(Pjll, Pop1); + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); +#endif + DBGCODE(JudyCheckSorted(Pjll, Pop1, 5);) + + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(6)) + | + (CIndex & cJU_DCDMASK(6-1)) + | + (Pop1 - 1); + + JU_JPSETADT(PjpJP, (Word_t)PjllRaw, DcdP0, + cJU_JPLEAF5); + } + ExpCnt++; +// Done? + if (End == cJU_LEAF6_MAXPOP1) break; + +// New Expanse, Start and Count + CIndex = StageA[End]; + Start = End; + } + } + +// Now put all the Leaves below a BranchL or BranchB: + if (ExpCnt <= cJU_BRANCHLMAXJPS) // put the Leaves below a BranchL + { + if (j__udyCreateBranchL(Pjp, StageJP, StageExp, ExpCnt, + Pjpm) == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjp->jp_Type = cJU_JPBRANCH_L6; + } + else + { + if (j__udyStageJBBtoJBB(Pjp, &StageJBB, StageJP, SubJPCount, Pjpm) + == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + } + return(1); + +} // j__udyCascade6() + + +// **************************************************************************** +// __ J U D Y C A S C A D E 7 +// +// Cascade from a cJU_JPLEAF7 to one of the following: +// 1. if leaf is in 1 expanse: +// compress it into a JPLEAF6 +// 2. if leaf contains multiple expanses: +// create linear or bitmap branch containing +// each new expanse is either a: +// JPIMMED_6_01 ... JPIMMED_6_02 branch +// JPIMMED_6_01 branch +// JPLEAF6 + +FUNCTION int j__udyCascade7( + Pjp_t Pjp, + Pvoid_t Pjpm) +{ + uint8_t * PLeaf; // pointer to leaf, explicit type. + Word_t End, Start; // temporaries. + Word_t ExpCnt; // count of expanses of splay. + Word_t CIndex; // current Index word. +JUDYLCODE(Pjv_t Pjv;) // value area of leaf. + +// Temp staging for parts(Leaves) of newly splayed leaf + jp_t StageJP [cJU_LEAF7_MAXPOP1]; // JPs of new leaves + Word_t StageA [cJU_LEAF7_MAXPOP1]; + uint8_t StageExp [cJU_LEAF7_MAXPOP1]; // Expanses of new leaves + uint8_t SubJPCount[cJU_NUMSUBEXPB]; // JPs in each subexpanse + jbb_t StageJBB; // staged bitmap branch + + assert(JU_JPTYPE(Pjp) == cJU_JPLEAF7); + assert(JU_JPDCDPOP0(Pjp) == (cJU_LEAF7_MAXPOP1-1)); + +// Get the address of the Leaf + PLeaf = (uint8_t *) P_JLL(Pjp->jp_Addr); + +// Extract 7 byte index Leaf to Word_t + j__udyCopy7toW(StageA, PLeaf, cJU_LEAF7_MAXPOP1); + +// Get the address of the Leaf and Value area + JUDYLCODE(Pjv = JL_LEAF7VALUEAREA(PLeaf, cJU_LEAF7_MAXPOP1);) + +// If Leaf is in 1 expanse -- just compress it (compare 1st, last & Index) + + CIndex = StageA[0]; + if (!JU_DIGITATSTATE(CIndex ^ StageA[cJU_LEAF7_MAXPOP1-1], 7)) + { + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + +// Alloc a 6 byte Index Leaf + PjllRaw = j__udyAllocJLL6(cJU_LEAF7_MAXPOP1, Pjpm); + if (PjllRaw == (Pjlb_t)NULL) return(-1); // out of memory + + Pjll = P_JLL(PjllRaw); + +// Copy Index area into new Leaf + j__udyCopyWto6((uint8_t *) Pjll, StageA, cJU_LEAF7_MAXPOP1); +#ifdef JUDYL +// Copy Value area into new Leaf + Pjvnew = JL_LEAF6VALUEAREA(Pjll, cJU_LEAF7_MAXPOP1); + JU_COPYMEM(Pjvnew, Pjv, cJU_LEAF7_MAXPOP1); +#endif + DBGCODE(JudyCheckSorted(Pjll, cJU_LEAF7_MAXPOP1, 6);) + + DcdP0 = JU_JPDCDPOP0(Pjp) | (CIndex & cJU_DCDMASK(6)); + JU_JPSETADT(Pjp, (Word_t)PjllRaw, DcdP0, cJU_JPLEAF6); + + return(1); + } + +// Else in 2+ expanses, splay Leaf into smaller leaves at higher compression + + StageJBB = StageJBBZero; // zero staged bitmap branch + ZEROJP(SubJPCount); + +// Splay the 7 byte index Leaf to 6 byte Index Leaves + for (ExpCnt = Start = 0, End = 1; ; End++) + { +// Check if new expanse or last one + if ( (End == cJU_LEAF7_MAXPOP1) + || + (JU_DIGITATSTATE(CIndex ^ StageA[End], 7)) + ) + { +// Build a leaf below the previous expanse + + Pjp_t PjpJP = StageJP + ExpCnt; + Word_t Pop1 = End - Start; + Word_t expanse = JU_DIGITATSTATE(CIndex, 7); + Word_t subexp = expanse / cJU_BITSPERSUBEXPB; +// +// set the bit that is the current expanse + JU_JBB_BITMAP(&StageJBB, subexp) |= JU_BITPOSMASKB(expanse); +#ifdef SUBEXPCOUNTS + StageJBB.jbb_subPop1[subexp] += Pop1; // pop of subexpanse +#endif +// count number of expanses in each subexpanse + SubJPCount[subexp]++; + +// Save byte expanse of leaf + StageExp[ExpCnt] = JU_DIGITATSTATE(CIndex, 7); + + if (Pop1 == 1) // cJU_JPIMMED_6_01 + { + Word_t DcdP0; + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(6)) | + CIndex; +#ifdef JUDY1 + JU_JPSETADT(PjpJP, 0, DcdP0, cJ1_JPIMMED_6_01); +#else // JUDYL + JU_JPSETADT(PjpJP, Pjv[Start], DcdP0, + cJL_JPIMMED_6_01); +#endif // JUDYL + } +#ifdef JUDY1 + else if (Pop1 == cJ1_IMMED6_MAXPOP1) + { +// cJ1_JPIMMED_6_02: Judy1 64 + +// Copy to Index to JP as an immediate Leaf + j__udyCopyWto6(PjpJP->jp_1Index, + StageA + Start, 2); + +// Set pointer, type, population and Index size + PjpJP->jp_Type = cJ1_JPIMMED_6_02; + } +#endif + else + { +// cJU_JPLEAF6 + Word_t DcdP0; + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + +// Get a new Leaf + PjllRaw = j__udyAllocJLL6(Pop1, Pjpm); + if (PjllRaw == (Pjll_t)NULL) + FREEALLEXIT(ExpCnt, StageJP, Pjpm); + Pjll = P_JLL(PjllRaw); + +// Copy Indexes to new Leaf + j__udyCopyWto6((uint8_t *) Pjll, StageA + Start, + Pop1); +#ifdef JUDYL +// Copy to Values to new Leaf + Pjvnew = JL_LEAF6VALUEAREA(Pjll, Pop1); + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); +#endif + DBGCODE(JudyCheckSorted(Pjll, Pop1, 6);) + + DcdP0 = (JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(7)) + | + (CIndex & cJU_DCDMASK(7-1)) + | + (Pop1 - 1); + + JU_JPSETADT(PjpJP, (Word_t)PjllRaw, DcdP0, + cJU_JPLEAF6); + } + ExpCnt++; +// Done? + if (End == cJU_LEAF7_MAXPOP1) break; + +// New Expanse, Start and Count + CIndex = StageA[End]; + Start = End; + } + } + +// Now put all the Leaves below a BranchL or BranchB: + if (ExpCnt <= cJU_BRANCHLMAXJPS) // put the Leaves below a BranchL + { + if (j__udyCreateBranchL(Pjp, StageJP, StageExp, ExpCnt, + Pjpm) == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjp->jp_Type = cJU_JPBRANCH_L7; + } + else + { + if (j__udyStageJBBtoJBB(Pjp, &StageJBB, StageJP, SubJPCount, Pjpm) + == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + } + return(1); + +} // j__udyCascade7() + +#endif // JU_64BIT + + +// **************************************************************************** +// __ J U D Y C A S C A D E L +// +// (Compressed) cJU_LEAF3[7], cJ1_JPBRANCH_L. +// +// Cascade from a LEAFW (under Pjp) to one of the following: +// 1. if LEAFW is in 1 expanse: +// create linear branch with a JPLEAF3[7] under it +// 2. LEAFW contains multiple expanses: +// create linear or bitmap branch containing new expanses +// each new expanse is either a: 32 64 +// JPIMMED_3_01 branch Y N +// JPIMMED_7_01 branch N Y +// JPLEAF3 Y N +// JPLEAF7 N Y + +FUNCTION int j__udyCascadeL( + Pjp_t Pjp, + Pvoid_t Pjpm) +{ + Pjlw_t Pjlw; // leaf to work on. + Word_t End, Start; // temporaries. + Word_t ExpCnt; // count of expanses of splay. + Word_t CIndex; // current Index word. +JUDYLCODE(Pjv_t Pjv;) // value area of leaf. + +// Temp staging for parts(Leaves) of newly splayed leaf + jp_t StageJP [cJU_LEAFW_MAXPOP1]; + uint8_t StageExp[cJU_LEAFW_MAXPOP1]; + uint8_t SubJPCount[cJU_NUMSUBEXPB]; // JPs in each subexpanse + jbb_t StageJBB; // staged bitmap branch + +// Get the address of the Leaf + Pjlw = P_JLW(Pjp->jp_Addr); + + assert(Pjlw[0] == (cJU_LEAFW_MAXPOP1 - 1)); + +// Get pointer to Value area of old Leaf + JUDYLCODE(Pjv = JL_LEAFWVALUEAREA(Pjlw, cJU_LEAFW_MAXPOP1);) + + Pjlw++; // Now point to Index area + +// If Leaf is in 1 expanse -- first compress it (compare 1st, last & Index): + + CIndex = Pjlw[0]; // also used far below + if (!JU_DIGITATSTATE(CIndex ^ Pjlw[cJU_LEAFW_MAXPOP1 - 1], + cJU_ROOTSTATE)) + { + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. + +// Get the common expanse to all elements in Leaf + StageExp[0] = JU_DIGITATSTATE(CIndex, cJU_ROOTSTATE); + +// Alloc a 3[7] byte Index Leaf +#ifdef JU_64BIT + PjllRaw = j__udyAllocJLL7(cJU_LEAFW_MAXPOP1, Pjpm); + if (PjllRaw == (Pjlb_t)NULL) return(-1); // out of memory + + Pjll = P_JLL(PjllRaw); + +// Copy LEAFW to a cJU_JPLEAF7 + j__udyCopyWto7((uint8_t *) Pjll, Pjlw, cJU_LEAFW_MAXPOP1); +#ifdef JUDYL +// Get the Value area of new Leaf + Pjvnew = JL_LEAF7VALUEAREA(Pjll, cJU_LEAFW_MAXPOP1); + JU_COPYMEM(Pjvnew, Pjv, cJU_LEAFW_MAXPOP1); +#endif + DBGCODE(JudyCheckSorted(Pjll, cJU_LEAFW_MAXPOP1, 7);) +#else // 32 Bit + PjllRaw = j__udyAllocJLL3(cJU_LEAFW_MAXPOP1, Pjpm); + if (PjllRaw == (Pjll_t) NULL) return(-1); + + Pjll = P_JLL(PjllRaw); + +// Copy LEAFW to a cJU_JPLEAF3 + j__udyCopyWto3((uint8_t *) Pjll, Pjlw, cJU_LEAFW_MAXPOP1); +#ifdef JUDYL +// Get the Value area of new Leaf + Pjvnew = JL_LEAF3VALUEAREA(Pjll, cJU_LEAFW_MAXPOP1); + JU_COPYMEM(Pjvnew, Pjv, cJU_LEAFW_MAXPOP1); +#endif + DBGCODE(JudyCheckSorted(Pjll, cJU_LEAFW_MAXPOP1, 3);) +#endif // 32 Bit + +// Following not needed because cJU_DCDMASK(3[7]) is == 0 +////// StageJP[0].jp_DcdPopO |= (CIndex & cJU_DCDMASK(3[7])); +#ifdef JU_64BIT + JU_JPSETADT(&(StageJP[0]), (Word_t)PjllRaw, cJU_LEAFW_MAXPOP1-1, + cJU_JPLEAF7); +#else // 32BIT + JU_JPSETADT(&(StageJP[0]), (Word_t)PjllRaw, cJU_LEAFW_MAXPOP1-1, + cJU_JPLEAF3); +#endif // 32BIT +// Create a 1 element Linear branch + if (j__udyCreateBranchL(Pjp, StageJP, StageExp, 1, Pjpm) == -1) + return(-1); + +// Change the type of callers JP + Pjp->jp_Type = cJU_JPBRANCH_L; + + return(1); + } + +// Else in 2+ expanses, splay Leaf into smaller leaves at higher compression + + StageJBB = StageJBBZero; // zero staged bitmap branch + ZEROJP(SubJPCount); + +// Splay the 4[8] byte Index Leaf to 3[7] byte Index Leaves + for (ExpCnt = Start = 0, End = 1; ; End++) + { +// Check if new expanse or last one + if ( (End == cJU_LEAFW_MAXPOP1) + || + (JU_DIGITATSTATE(CIndex ^ Pjlw[End], cJU_ROOTSTATE)) + ) + { +// Build a leaf below the previous expanse + + Pjp_t PjpJP = StageJP + ExpCnt; + Word_t Pop1 = End - Start; + Word_t expanse = JU_DIGITATSTATE(CIndex, cJU_ROOTSTATE); + Word_t subexp = expanse / cJU_BITSPERSUBEXPB; +// +// set the bit that is the current expanse + JU_JBB_BITMAP(&StageJBB, subexp) |= JU_BITPOSMASKB(expanse); +#ifdef SUBEXPCOUNTS + StageJBB.jbb_subPop1[subexp] += Pop1; // pop of subexpanse +#endif +// count number of expanses in each subexpanse + SubJPCount[subexp]++; + +// Save byte expanse of leaf + StageExp[ExpCnt] = JU_DIGITATSTATE(CIndex, + cJU_ROOTSTATE); + + if (Pop1 == 1) // cJU_JPIMMED_3[7]_01 + { +#ifdef JU_64BIT +#ifdef JUDY1 + JU_JPSETADT(PjpJP, 0, CIndex, cJ1_JPIMMED_7_01); +#else // JUDYL + JU_JPSETADT(PjpJP, Pjv[Start], CIndex, + cJL_JPIMMED_7_01); +#endif // JUDYL + +#else // JU_32BIT +#ifdef JUDY1 + JU_JPSETADT(PjpJP, 0, CIndex, cJ1_JPIMMED_3_01); +#else // JUDYL + JU_JPSETADT(PjpJP, Pjv[Start], CIndex, + cJL_JPIMMED_3_01); +#endif // JUDYL +#endif // JU_32BIT + } +#ifdef JUDY1 +#ifdef JU_64BIT + else if (Pop1 <= cJ1_IMMED7_MAXPOP1) +#else + else if (Pop1 <= cJ1_IMMED3_MAXPOP1) +#endif + { +// cJ1_JPIMMED_3_02 : Judy1 32 +// cJ1_JPIMMED_7_02 : Judy1 64 +// Copy to JP as an immediate Leaf +#ifdef JU_64BIT + j__udyCopyWto7(PjpJP->jp_1Index, Pjlw+Start, 2); + PjpJP->jp_Type = cJ1_JPIMMED_7_02; +#else + j__udyCopyWto3(PjpJP->jp_1Index, Pjlw+Start, 2); + PjpJP->jp_Type = cJ1_JPIMMED_3_02; +#endif // 32 Bit + } +#endif // JUDY1 + else // Linear Leaf JPLEAF3[7] + { +// cJU_JPLEAF3[7] + Pjll_t PjllRaw; // pointer to new leaf. + Pjll_t Pjll; + JUDYLCODE(Pjv_t Pjvnew;) // value area of new leaf. +#ifdef JU_64BIT + PjllRaw = j__udyAllocJLL7(Pop1, Pjpm); + if (PjllRaw == (Pjll_t) NULL) return(-1); + Pjll = P_JLL(PjllRaw); + + j__udyCopyWto7((uint8_t *) Pjll, Pjlw + Start, + Pop1); +#ifdef JUDYL + Pjvnew = JL_LEAF7VALUEAREA(Pjll, Pop1); + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); +#endif // JUDYL + DBGCODE(JudyCheckSorted(Pjll, Pop1, 7);) +#else // JU_64BIT - 32 Bit + PjllRaw = j__udyAllocJLL3(Pop1, Pjpm); + if (PjllRaw == (Pjll_t) NULL) return(-1); + Pjll = P_JLL(PjllRaw); + + j__udyCopyWto3((uint8_t *) Pjll, Pjlw + Start, + Pop1); +#ifdef JUDYL + Pjvnew = JL_LEAF3VALUEAREA(Pjll, Pop1); + JU_COPYMEM(Pjvnew, Pjv + Start, Pop1); +#endif // JUDYL + DBGCODE(JudyCheckSorted(Pjll, Pop1, 3);) +#endif // 32 Bit + +#ifdef JU_64BIT + JU_JPSETADT(PjpJP, (Word_t)PjllRaw, Pop1 - 1, + cJU_JPLEAF7); +#else // JU_64BIT - 32 Bit + JU_JPSETADT(PjpJP, (Word_t)PjllRaw, Pop1 - 1, + cJU_JPLEAF3); +#endif // 32 Bit + } + ExpCnt++; +// Done? + if (End == cJU_LEAFW_MAXPOP1) break; + +// New Expanse, Start and Count + CIndex = Pjlw[End]; + Start = End; + } + } + +// Now put all the Leaves below a BranchL or BranchB: + if (ExpCnt <= cJU_BRANCHLMAXJPS) // put the Leaves below a BranchL + { + if (j__udyCreateBranchL(Pjp, StageJP, StageExp, ExpCnt, + Pjpm) == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjp->jp_Type = cJU_JPBRANCH_L; + } + else + { + if (j__udyStageJBBtoJBB(Pjp, &StageJBB, StageJP, SubJPCount, Pjpm) + == -1) FREEALLEXIT(ExpCnt, StageJP, Pjpm); + + Pjp->jp_Type = cJU_JPBRANCH_B; // cJU_LEAFW is out of sequence + } + return(1); + +} // j__udyCascadeL() diff --git a/libnetdata/libjudy/src/JudyL/JudyLCount.c b/libnetdata/libjudy/src/JudyL/JudyLCount.c new file mode 100644 index 0000000..179757f --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLCount.c @@ -0,0 +1,1195 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.78 $ $Source: /judy/src/JudyCommon/JudyCount.c $ +// +// Judy*Count() function for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// Compile with -DNOSMARTJBB, -DNOSMARTJBU, and/or -DNOSMARTJLB to build a +// version with cache line optimizations deleted, for testing. +// +// Compile with -DSMARTMETRICS to obtain global variables containing smart +// cache line metrics. Note: Dont turn this on simultaneously for this file +// and JudyByCount.c because they export the same globals. +// +// Judy*Count() returns the "count of Indexes" (inclusive) between the two +// specified limits (Indexes). This code is remarkably fast. It traverses the +// "Judy array" data structure. +// +// This count code is the GENERIC untuned version (minimum code size). It +// might be possible to tuned to a specific architecture to be faster. +// However, in real applications, with a modern machine, it is expected that +// the instruction times will be swamped by cache line fills. +// **************************************************************************** + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + + +// define a phoney that is for sure + +#define cJU_LEAFW cJU_JPIMMED_CAP + +// Avoid duplicate symbols since this file is multi-compiled: + +#ifdef SMARTMETRICS +#ifdef JUDY1 +Word_t jbb_upward = 0; // counts of directions taken: +Word_t jbb_downward = 0; +Word_t jbu_upward = 0; +Word_t jbu_downward = 0; +Word_t jlb_upward = 0; +Word_t jlb_downward = 0; +#else +extern Word_t jbb_upward; +extern Word_t jbb_downward; +extern Word_t jbu_upward; +extern Word_t jbu_downward; +extern Word_t jlb_upward; +extern Word_t jlb_downward; +#endif +#endif + + +// FORWARD DECLARATIONS (prototypes): + +static Word_t j__udy1LCountSM(const Pjp_t Pjp, const Word_t Index, + const Pjpm_t Pjpm); + +// Each of Judy1 and JudyL get their own private (static) version of this +// function: + +static int j__udyCountLeafB1(const Pjll_t Pjll, const Word_t Pop1, + const Word_t Index); + +// These functions are not static because they are exported to Judy*ByCount(): +// +// TBD: Should be made static for performance reasons? And thus duplicated? +// +// Note: There really are two different functions, but for convenience they +// are referred to here with a generic name. + +#ifdef JUDY1 +#define j__udyJPPop1 j__udy1JPPop1 +#else +#define j__udyJPPop1 j__udyLJPPop1 +#endif + +Word_t j__udyJPPop1(const Pjp_t Pjp); + + +// LOCAL ERROR HANDLING: +// +// The Judy*Count() functions are unusual because they return 0 instead of JERR +// for an error. In this source file, define C_JERR for clarity. + +#define C_JERR 0 + + +// **************************************************************************** +// J U D Y 1 C O U N T +// J U D Y L C O U N T +// +// See the manual entry for details. +// +// This code is written recursively, at least at first, because thats much +// simpler; hope its fast enough. + +#ifdef JUDY1 +FUNCTION Word_t Judy1Count +#else +FUNCTION Word_t JudyLCount +#endif + ( + Pcvoid_t PArray, // JRP to first branch/leaf in SM. + Word_t Index1, // starting Index. + Word_t Index2, // ending Index. + PJError_t PJError // optional, for returning error info. + ) +{ + jpm_t fakejpm; // local temporary for small arrays. + Pjpm_t Pjpm; // top JPM or local temporary for error info. + jp_t fakejp; // constructed for calling j__udy1LCountSM(). + Pjp_t Pjp; // JP to pass to j__udy1LCountSM(). + Word_t pop1; // total for the array. + Word_t pop1above1; // indexes at or above Index1, inclusive. + Word_t pop1above2; // indexes at or above Index2, exclusive. + int retcode; // from Judy*First() calls. +JUDYLCODE(PPvoid_t PPvalue); // from JudyLFirst() calls. + + +// CHECK FOR SHORTCUTS: +// +// As documented, return C_JERR if the Judy array is empty or Index1 > Index2. + + if ((PArray == (Pvoid_t) NULL) || (Index1 > Index2)) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } + +// If Index1 == Index2, simply check if the specified Index is set; pass +// through the return value from Judy1Test() or JudyLGet() with appropriate +// translations. + + if (Index1 == Index2) + { +#ifdef JUDY1 + retcode = Judy1Test(PArray, Index1, PJError); + + if (retcode == JERRI) return(C_JERR); // pass through error. + + if (retcode == 0) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } +#else + PPvalue = JudyLGet(PArray, Index1, PJError); + + if (PPvalue == PPJERR) return(C_JERR); // pass through error. + + if (PPvalue == (PPvoid_t) NULL) // Index is not set. + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } +#endif + return(1); // single index is set. + } + + +// CHECK JRP TYPE: +// +// Use an if/then for speed rather than a switch, and put the most common cases +// first. +// +// Note: Since even cJU_LEAFW types require counting between two Indexes, +// prepare them here for common code below that calls j__udy1LCountSM(), rather +// than handling them even more specially here. + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + Pjpm = & fakejpm; + Pjp = & fakejp; + Pjp->jp_Addr = (Word_t) Pjlw; + Pjp->jp_Type = cJU_LEAFW; + Pjpm->jpm_Pop0 = Pjlw[0]; // from first word of leaf. + pop1 = Pjpm->jpm_Pop0 + 1; + } + else + { + Pjpm = P_JPM(PArray); + Pjp = &(Pjpm->jpm_JP); + pop1 = (Pjpm->jpm_Pop0) + 1; // note: can roll over to 0. + +#if (defined(JUDY1) && (! defined(JU_64BIT))) + if (pop1 == 0) // rare special case of full array: + { + Word_t count = Index2 - Index1 + 1; // can roll over again. + + if (count == 0) + { + JU_SET_ERRNO(PJError, JU_ERRNO_FULL); + return(C_JERR); + } + return(count); + } +#else + assert(pop1); // JudyL or 64-bit cannot create a full array! +#endif + } + + +// COUNT POP1 ABOVE INDEX1, INCLUSIVE: + + assert(pop1); // just to be safe. + + if (Index1 == 0) // shortcut, pop1above1 is entire population: + { + pop1above1 = pop1; + } + else // find first valid Index above Index1, if any: + { +#ifdef JUDY1 + if ((retcode = Judy1First(PArray, & Index1, PJError)) == JERRI) + return(C_JERR); // pass through error. +#else + if ((PPvalue = JudyLFirst(PArray, & Index1, PJError)) == PPJERR) + return(C_JERR); // pass through error. + + retcode = (PPvalue != (PPvoid_t) NULL); // found a next Index. +#endif + +// If theres no Index at or above Index1, just return C_JERR (early exit): + + if (retcode == 0) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } + +// If a first/next Index was found, call the counting motor starting with that +// known valid Index, meaning the return should be positive, not C_JERR except +// in case of a real error: + + if ((pop1above1 = j__udy1LCountSM(Pjp, Index1, Pjpm)) == C_JERR) + { + JU_COPY_ERRNO(PJError, Pjpm); // pass through error. + return(C_JERR); + } + } + + +// COUNT POP1 ABOVE INDEX2, EXCLUSIVE, AND RETURN THE DIFFERENCE: +// +// In principle, calculate the ordinal of each Index and take the difference, +// with caution about off-by-one errors due to the specified Indexes being set +// or unset. In practice: +// +// - The ordinals computed here are inverse ordinals, that is, the populations +// ABOVE the specified Indexes (Index1 inclusive, Index2 exclusive), so +// subtract pop1above2 from pop1above1, rather than vice-versa. +// +// - Index1s result already includes a count for Index1 and/or Index2 if +// either is set, so calculate pop1above2 exclusive of Index2. +// +// TBD: If Index1 and Index2 fall in the same expanse in the top-state +// branch(es), would it be faster to walk the SM only once, to their divergence +// point, before calling j__udy1LCountSM() or equivalent? Possibly a non-issue +// if a top-state pop1 becomes stored with each Judy1 array. Also, consider +// whether the first call of j__udy1LCountSM() fills the cache, for common tree +// branches, for the second call. +// +// As for pop1above1, look for shortcuts for special cases when pop1above2 is +// zero. Otherwise call the counting "motor". + + assert(pop1above1); // just to be safe. + + if (Index2++ == cJU_ALLONES) return(pop1above1); // Index2 at limit. + +#ifdef JUDY1 + if ((retcode = Judy1First(PArray, & Index2, PJError)) == JERRI) + return(C_JERR); +#else + if ((PPvalue = JudyLFirst(PArray, & Index2, PJError)) == PPJERR) + return(C_JERR); + + retcode = (PPvalue != (PPvoid_t) NULL); // found a next Index. +#endif + if (retcode == 0) return(pop1above1); // no Index above Index2. + +// Just as for Index1, j__udy1LCountSM() cannot return 0 (locally == C_JERR) +// except in case of a real error: + + if ((pop1above2 = j__udy1LCountSM(Pjp, Index2, Pjpm)) == C_JERR) + { + JU_COPY_ERRNO(PJError, Pjpm); // pass through error. + return(C_JERR); + } + + if (pop1above1 == pop1above2) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NONE); + return(C_JERR); + } + + return(pop1above1 - pop1above2); + +} // Judy1Count() / JudyLCount() + + +// **************************************************************************** +// __ J U D Y 1 L C O U N T S M +// +// Given a pointer to a JP (with invalid jp_DcdPopO at cJU_ROOTSTATE), a known +// valid Index, and a Pjpm for returning error info, recursively visit a Judy +// array state machine (SM) and return the count of Indexes, including Index, +// through the end of the Judy array at this state or below. In case of error +// or a count of 0 (should never happen), return C_JERR with appropriate +// JU_ERRNO in the Pjpm. +// +// Note: This function is not told the current state because its encoded in +// the JP Type. +// +// Method: To minimize cache line fills, while studying each branch, if Index +// resides above the midpoint of the branch (which often consists of multiple +// cache lines), ADD the populations at or above Index; otherwise, SUBTRACT +// from the population of the WHOLE branch (available from the JP) the +// populations at or above Index. This is especially tricky for bitmap +// branches. +// +// Note: Unlike, say, the Ins and Del walk routines, this function returns the +// same type of returns as Judy*Count(), so it can use *_SET_ERRNO*() macros +// the same way. + +FUNCTION static Word_t j__udy1LCountSM( +const Pjp_t Pjp, // top of Judy (sub)SM. +const Word_t Index, // count at or above this Index. +const Pjpm_t Pjpm) // for returning error info. +{ + Pjbl_t Pjbl; // Pjp->jp_Addr masked and cast to types: + Pjbb_t Pjbb; + Pjbu_t Pjbu; + Pjll_t Pjll; // a Judy lower-level linear leaf. + + Word_t digit; // next digit to decode from Index. + long jpnum; // JP number in a branch (base 0). + int offset; // index ordinal within a leaf, base 0. + Word_t pop1; // total population of an expanse. + Word_t pop1above; // to return. + +// Common code to check Decode bits in a JP against the equivalent portion of +// Index; XOR together, then mask bits of interest; must be all 0: +// +// Note: Why does this code only assert() compliance rather than actively +// checking for outliers? Its because Index is supposed to be valid, hence +// always match any Dcd bits traversed. +// +// Note: This assertion turns out to be always true for cState = 3 on 32-bit +// and 7 on 64-bit, but its harmless, probably removed by the compiler. + +#define CHECKDCD(Pjp,cState) \ + assert(! JU_DCDNOTMATCHINDEX(Index, Pjp, cState)) + +// Common code to prepare to handle a root-level or lower-level branch: +// Extract a state-dependent digit from Index in a "constant" way, obtain the +// total population for the branch in a state-dependent way, and then branch to +// common code for multiple cases: +// +// For root-level branches, the state is always cJU_ROOTSTATE, and the +// population is received in Pjpm->jpm_Pop0. +// +// Note: The total population is only needed in cases where the common code +// "counts up" instead of down to minimize cache line fills. However, its +// available cheaply, and its better to do it with a constant shift (constant +// state value) instead of a variable shift later "when needed". + +#define PREPB_ROOT(Pjp,Next) \ + digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); \ + pop1 = (Pjpm->jpm_Pop0) + 1; \ + goto Next + +#define PREPB(Pjp,cState,Next) \ + digit = JU_DIGITATSTATE(Index, cState); \ + pop1 = JU_JPBRANCH_POP0(Pjp, (cState)) + 1; \ + goto Next + + +// SWITCH ON JP TYPE: +// +// WARNING: For run-time efficiency the following cases replicate code with +// varying constants, rather than using common code with variable values! + + switch (JU_JPTYPE(Pjp)) + { + + +// ---------------------------------------------------------------------------- +// ROOT-STATE LEAF that starts with a Pop0 word; just count within the leaf: + + case cJU_LEAFW: + { + Pjlw_t Pjlw = P_JLW(Pjp->jp_Addr); // first word of leaf. + + assert((Pjpm->jpm_Pop0) + 1 == Pjlw[0] + 1); // sent correctly. + offset = j__udySearchLeafW(Pjlw + 1, Pjpm->jpm_Pop0 + 1, Index); + assert(offset >= 0); // Index must exist. + assert(offset < (Pjpm->jpm_Pop0) + 1); // Index be in range. + return((Pjpm->jpm_Pop0) + 1 - offset); // INCLUSIVE of Index. + } + +// ---------------------------------------------------------------------------- +// LINEAR BRANCH; count populations in JPs in the JBL ABOVE the next digit in +// Index, and recurse for the next digit in Index: +// +// Note: There are no null JPs in a JBL; watch out for pop1 == 0. +// +// Note: A JBL should always fit in one cache line => no need to count up +// versus down to save cache line fills. (PREPB() sets pop1 for no reason.) + + case cJU_JPBRANCH_L2: CHECKDCD(Pjp, 2); PREPB(Pjp, 2, BranchL); + case cJU_JPBRANCH_L3: CHECKDCD(Pjp, 3); PREPB(Pjp, 3, BranchL); + +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: CHECKDCD(Pjp, 4); PREPB(Pjp, 4, BranchL); + case cJU_JPBRANCH_L5: CHECKDCD(Pjp, 5); PREPB(Pjp, 5, BranchL); + case cJU_JPBRANCH_L6: CHECKDCD(Pjp, 6); PREPB(Pjp, 6, BranchL); + case cJU_JPBRANCH_L7: CHECKDCD(Pjp, 7); PREPB(Pjp, 7, BranchL); +#endif + case cJU_JPBRANCH_L: PREPB_ROOT(Pjp, BranchL); + +// Common code (state-independent) for all cases of linear branches: + +BranchL: + + Pjbl = P_JBL(Pjp->jp_Addr); + jpnum = Pjbl->jbl_NumJPs; // above last JP. + pop1above = 0; + + while (digit < (Pjbl->jbl_Expanse[--jpnum])) // still ABOVE digit. + { + if ((pop1 = j__udyJPPop1((Pjbl->jbl_jp) + jpnum)) == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above += pop1; + assert(jpnum > 0); // should find digit. + } + + assert(digit == (Pjbl->jbl_Expanse[jpnum])); // should find digit. + + pop1 = j__udy1LCountSM((Pjbl->jbl_jp) + jpnum, Index, Pjpm); + if (pop1 == C_JERR) return(C_JERR); // pass error up. + + assert(pop1above + pop1); + return(pop1above + pop1); + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH; count populations in JPs in the JBB ABOVE the next digit in +// Index, and recurse for the next digit in Index: +// +// Note: There are no null JPs in a JBB; watch out for pop1 == 0. + + case cJU_JPBRANCH_B2: CHECKDCD(Pjp, 2); PREPB(Pjp, 2, BranchB); + case cJU_JPBRANCH_B3: CHECKDCD(Pjp, 3); PREPB(Pjp, 3, BranchB); +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: CHECKDCD(Pjp, 4); PREPB(Pjp, 4, BranchB); + case cJU_JPBRANCH_B5: CHECKDCD(Pjp, 5); PREPB(Pjp, 5, BranchB); + case cJU_JPBRANCH_B6: CHECKDCD(Pjp, 6); PREPB(Pjp, 6, BranchB); + case cJU_JPBRANCH_B7: CHECKDCD(Pjp, 7); PREPB(Pjp, 7, BranchB); +#endif + case cJU_JPBRANCH_B: PREPB_ROOT(Pjp, BranchB); + +// Common code (state-independent) for all cases of bitmap branches: + +BranchB: + { + long subexp; // for stepping through layer 1 (subexpanses). + long findsub; // subexpanse containing Index (digit). + Word_t findbit; // bit representing Index (digit). + Word_t lowermask; // bits for indexes at or below Index. + Word_t jpcount; // JPs in a subexpanse. + Word_t clbelow; // cache lines below digits cache line. + Word_t clabove; // cache lines above digits cache line. + + Pjbb = P_JBB(Pjp->jp_Addr); + findsub = digit / cJU_BITSPERSUBEXPB; + findbit = digit % cJU_BITSPERSUBEXPB; + lowermask = JU_MASKLOWERINC(JU_BITPOSMASKB(findbit)); + clbelow = clabove = 0; // initial/default => always downward. + + assert(JU_BITMAPTESTB(Pjbb, digit)); // digit must have a JP. + assert(findsub < cJU_NUMSUBEXPB); // falls in expected range. + +// Shorthand for one subexpanse in a bitmap and for one JP in a bitmap branch: +// +// Note: BMPJP0 exists separately to support assertions. + +#define BMPJP0(Subexp) (P_JP(JU_JBB_PJP(Pjbb, Subexp))) +#define BMPJP(Subexp,JPnum) (BMPJP0(Subexp) + (JPnum)) + +#ifndef NOSMARTJBB // enable to turn off smart code for comparison purposes. + +// FIGURE OUT WHICH DIRECTION CAUSES FEWER CACHE LINE FILLS; adding the pop1s +// in JPs above Indexs JP, or subtracting the pop1s in JPs below Indexs JP. +// +// This is tricky because, while each set bit in the bitmap represents a JP, +// the JPs are scattered over cJU_NUMSUBEXPB subexpanses, each of which can +// contain JPs packed into multiple cache lines, and this code must visit every +// JP either BELOW or ABOVE the JP for Index. +// +// Number of cache lines required to hold a linear list of the given number of +// JPs, assuming the first JP is at the start of a cache line or the JPs in +// jpcount fit wholly within a single cache line, which is ensured by +// JudyMalloc(): + +#define CLPERJPS(jpcount) \ + ((((jpcount) * cJU_WORDSPERJP) + cJU_WORDSPERCL - 1) / cJU_WORDSPERCL) + +// Count cache lines below/above for each subexpanse: + + for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) + { + jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); + +// When at the subexpanse containing Index (digit), add cache lines +// below/above appropriately, excluding the cache line containing the JP for +// Index itself: + + if (subexp < findsub) clbelow += CLPERJPS(jpcount); + else if (subexp > findsub) clabove += CLPERJPS(jpcount); + else // (subexp == findsub) + { + Word_t clfind; // cache line containing Index (digit). + + clfind = CLPERJPS(j__udyCountBitsB( + JU_JBB_BITMAP(Pjbb, subexp) & lowermask)); + + assert(clfind > 0); // digit itself should have 1 CL. + clbelow += clfind - 1; + clabove += CLPERJPS(jpcount) - clfind; + } + } +#endif // ! NOSMARTJBB + +// Note: Its impossible to get through the following "if" without setting +// jpnum -- see some of the assertions below -- but gcc -Wall doesnt know +// this, so preset jpnum to make it happy: + + jpnum = 0; + + +// COUNT POPULATION FOR A BITMAP BRANCH, in whichever direction should result +// in fewer cache line fills: +// +// Note: If the remainder of Index is zero, pop1above is the pop1 of the +// entire expanse and theres no point in recursing to lower levels; but this +// should be so rare that its not worth checking for; +// Judy1Count()/JudyLCount() never even calls the motor for Index == 0 (all +// bytes). + + +// COUNT UPWARD, subtracting each "below or at" JPs pop1 from the whole +// expanses pop1: +// +// Note: If this causes clbelow + 1 cache line fills including JPs cache +// line, thats OK; at worst this is the same as clabove. + + if (clbelow < clabove) + { +#ifdef SMARTMETRICS + ++jbb_upward; +#endif + pop1above = pop1; // subtract JPs at/below Index. + +// Count JPs for which to accrue pop1s in this subexpanse: +// +// TBD: If JU_JBB_BITMAP is cJU_FULLBITMAPB, dont bother counting. + + for (subexp = 0; subexp <= findsub; ++subexp) + { + jpcount = j__udyCountBitsB((subexp < findsub) ? + JU_JBB_BITMAP(Pjbb, subexp) : + JU_JBB_BITMAP(Pjbb, subexp) & lowermask); + + // should always find findbit: + assert((subexp < findsub) || jpcount); + +// Subtract pop1s from JPs BELOW OR AT Index (digit): +// +// Note: The pop1 for Indexs JP itself is partially added back later at a +// lower state. +// +// Note: An empty subexpanse (jpcount == 0) is handled "for free". +// +// Note: Must be null JP subexp pointer in empty subexpanse and non-empty in +// non-empty subexpanse: + + assert( jpcount || (BMPJP0(subexp) == (Pjp_t) NULL)); + assert((! jpcount) || (BMPJP0(subexp) != (Pjp_t) NULL)); + + for (jpnum = 0; jpnum < jpcount; ++jpnum) + { + if ((pop1 = j__udyJPPop1(BMPJP(subexp, jpnum))) + == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above -= pop1; + } + + jpnum = jpcount - 1; // make correct for digit. + } + } + +// COUNT DOWNWARD, adding each "above" JPs pop1: + + else + { + long jpcountbf; // below findbit, inclusive. +#ifdef SMARTMETRICS + ++jbb_downward; +#endif + pop1above = 0; // add JPs above Index. + jpcountbf = 0; // until subexp == findsub. + +// Count JPs for which to accrue pop1s in this subexpanse: +// +// This is more complicated than counting upward because the scan of digits +// subexpanse must count ALL JPs, to know where to START counting down, and +// ALSO note the offset of digits JP to know where to STOP counting down. + + for (subexp = cJU_NUMSUBEXPB - 1; subexp >= findsub; --subexp) + { + jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); + + // should always find findbit: + assert((subexp > findsub) || jpcount); + + if (! jpcount) continue; // empty subexpanse, save time. + +// Count JPs below digit, inclusive: + + if (subexp == findsub) + { + jpcountbf = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp) + & lowermask); + } + + // should always find findbit: + assert((subexp > findsub) || jpcountbf); + assert(jpcount >= jpcountbf); // proper relationship. + +// Add pop1s from JPs ABOVE Index (digit): + + // no null JP subexp pointers: + assert(BMPJP0(subexp) != (Pjp_t) NULL); + + for (jpnum = jpcount - 1; jpnum >= jpcountbf; --jpnum) + { + if ((pop1 = j__udyJPPop1(BMPJP(subexp, jpnum))) + == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above += pop1; + } + // jpnum is now correct for digit. + } + } // else. + +// Return the net population ABOVE the digits JP at this state (in this JBB) +// plus the population AT OR ABOVE Index in the SM under the digits JP: + + pop1 = j__udy1LCountSM(BMPJP(findsub, jpnum), Index, Pjpm); + if (pop1 == C_JERR) return(C_JERR); // pass error up. + + assert(pop1above + pop1); + return(pop1above + pop1); + + } // case. + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH; count populations in JPs in the JBU ABOVE the next +// digit in Index, and recurse for the next digit in Index: +// +// Note: If the remainder of Index is zero, pop1above is the pop1 of the +// entire expanse and theres no point in recursing to lower levels; but this +// should be so rare that its not worth checking for; +// Judy1Count()/JudyLCount() never even calls the motor for Index == 0 (all +// bytes). + + case cJU_JPBRANCH_U2: CHECKDCD(Pjp, 2); PREPB(Pjp, 2, BranchU); + case cJU_JPBRANCH_U3: CHECKDCD(Pjp, 3); PREPB(Pjp, 3, BranchU); +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: CHECKDCD(Pjp, 4); PREPB(Pjp, 4, BranchU); + case cJU_JPBRANCH_U5: CHECKDCD(Pjp, 5); PREPB(Pjp, 5, BranchU); + case cJU_JPBRANCH_U6: CHECKDCD(Pjp, 6); PREPB(Pjp, 6, BranchU); + case cJU_JPBRANCH_U7: CHECKDCD(Pjp, 7); PREPB(Pjp, 7, BranchU); +#endif + case cJU_JPBRANCH_U: PREPB_ROOT(Pjp, BranchU); + +// Common code (state-independent) for all cases of uncompressed branches: + +BranchU: + Pjbu = P_JBU(Pjp->jp_Addr); + +#ifndef NOSMARTJBU // enable to turn off smart code for comparison purposes. + +// FIGURE OUT WHICH WAY CAUSES FEWER CACHE LINE FILLS; adding the JPs above +// Indexs JP, or subtracting the JPs below Indexs JP. +// +// COUNT UPWARD, subtracting the pop1 of each JP BELOW OR AT Index, from the +// whole expanses pop1: + + if (digit < (cJU_BRANCHUNUMJPS / 2)) + { + pop1above = pop1; // subtract JPs below Index. +#ifdef SMARTMETRICS + ++jbu_upward; +#endif + for (jpnum = 0; jpnum <= digit; ++jpnum) + { + if ((Pjbu->jbu_jp[jpnum].jp_Type) <= cJU_JPNULLMAX) + continue; // shortcut, save a function call. + + if ((pop1 = j__udyJPPop1(Pjbu->jbu_jp + jpnum)) + == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above -= pop1; + } + } + +// COUNT DOWNWARD, simply adding the pop1 of each JP ABOVE Index: + + else +#endif // NOSMARTJBU + { + assert(digit < cJU_BRANCHUNUMJPS); +#ifdef SMARTMETRICS + ++jbu_downward; +#endif + pop1above = 0; // add JPs above Index. + + for (jpnum = cJU_BRANCHUNUMJPS - 1; jpnum > digit; --jpnum) + { + if ((Pjbu->jbu_jp[jpnum].jp_Type) <= cJU_JPNULLMAX) + continue; // shortcut, save a function call. + + if ((pop1 = j__udyJPPop1(Pjbu->jbu_jp + jpnum)) + == cJU_ALLONES) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(C_JERR); + } + + pop1above += pop1; + } + } + + if ((pop1 = j__udy1LCountSM(Pjbu->jbu_jp + digit, Index, Pjpm)) + == C_JERR) return(C_JERR); // pass error up. + + assert(pop1above + pop1); + return(pop1above + pop1); + + +// ---------------------------------------------------------------------------- +// LEAF COUNT MACROS: +// +// LEAF*ABOVE() are common code for different JP types (linear leaves, bitmap +// leaves, and immediates) and different leaf Index Sizes, which result in +// calling different leaf search functions. Linear leaves get the leaf address +// from jp_Addr and the Population from jp_DcdPopO, while immediates use Pjp +// itself as the leaf address and get Population from jp_Type. + +#define LEAFLABOVE(Func) \ + Pjll = P_JLL(Pjp->jp_Addr); \ + pop1 = JU_JPLEAF_POP0(Pjp) + 1; \ + LEAFABOVE(Func, Pjll, pop1) + +#define LEAFB1ABOVE(Func) LEAFLABOVE(Func) // different Func, otherwise same. + +#ifdef JUDY1 +#define IMMABOVE(Func,Pop1) \ + Pjll = (Pjll_t) Pjp; \ + LEAFABOVE(Func, Pjll, Pop1) +#else +// Note: For JudyL immediates with >= 2 Indexes, the index bytes are in a +// different place than for Judy1: + +#define IMMABOVE(Func,Pop1) \ + LEAFABOVE(Func, (Pjll_t) (Pjp->jp_LIndex), Pop1) +#endif + +// For all leaf types, the population AT OR ABOVE is the total pop1 less the +// offset of Index; and Index should always be found: + +#define LEAFABOVE(Func,Pjll,Pop1) \ + offset = Func(Pjll, Pop1, Index); \ + assert(offset >= 0); \ + assert(offset < (Pop1)); \ + return((Pop1) - offset) + +// IMMABOVE_01 handles the special case of an immediate JP with 1 index, which +// the search functions arent used for anyway: +// +// The target Index should be the one in this Immediate, in which case the +// count above (inclusive) is always 1. + +#define IMMABOVE_01 \ + assert((JU_JPDCDPOP0(Pjp)) == JU_TRIMTODCDSIZE(Index)); \ + return(1) + + +// ---------------------------------------------------------------------------- +// LINEAR LEAF; search the leaf for Index; size is computed from jp_Type: + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: LEAFLABOVE(j__udySearchLeaf1); +#endif + case cJU_JPLEAF2: LEAFLABOVE(j__udySearchLeaf2); + case cJU_JPLEAF3: LEAFLABOVE(j__udySearchLeaf3); + +#ifdef JU_64BIT + case cJU_JPLEAF4: LEAFLABOVE(j__udySearchLeaf4); + case cJU_JPLEAF5: LEAFLABOVE(j__udySearchLeaf5); + case cJU_JPLEAF6: LEAFLABOVE(j__udySearchLeaf6); + case cJU_JPLEAF7: LEAFLABOVE(j__udySearchLeaf7); +#endif + + +// ---------------------------------------------------------------------------- +// BITMAP LEAF; search the leaf for Index: +// +// Since the bitmap describes Indexes digitally rather than linearly, this is +// not really a search, but just a count. + + case cJU_JPLEAF_B1: LEAFB1ABOVE(j__udyCountLeafB1); + + +#ifdef JUDY1 +// ---------------------------------------------------------------------------- +// FULL POPULATION: +// +// Return the count of Indexes AT OR ABOVE Index, which is the total population +// of the expanse (a constant) less the value of the undecoded digit remaining +// in Index (its base-0 offset in the expanse), which yields an inclusive count +// above. +// +// TBD: This only supports a 1-byte full expanse. Should this extract a +// stored value for pop0 and possibly more LSBs of Index, to handle larger full +// expanses? + + case cJ1_JPFULLPOPU1: + return(cJU_JPFULLPOPU1_POP0 + 1 - JU_DIGITATSTATE(Index, 1)); +#endif + + +// ---------------------------------------------------------------------------- +// IMMEDIATE: + + case cJU_JPIMMED_1_01: IMMABOVE_01; + case cJU_JPIMMED_2_01: IMMABOVE_01; + case cJU_JPIMMED_3_01: IMMABOVE_01; +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: IMMABOVE_01; + case cJU_JPIMMED_5_01: IMMABOVE_01; + case cJU_JPIMMED_6_01: IMMABOVE_01; + case cJU_JPIMMED_7_01: IMMABOVE_01; +#endif + + case cJU_JPIMMED_1_02: IMMABOVE(j__udySearchLeaf1, 2); + case cJU_JPIMMED_1_03: IMMABOVE(j__udySearchLeaf1, 3); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: IMMABOVE(j__udySearchLeaf1, 4); + case cJU_JPIMMED_1_05: IMMABOVE(j__udySearchLeaf1, 5); + case cJU_JPIMMED_1_06: IMMABOVE(j__udySearchLeaf1, 6); + case cJU_JPIMMED_1_07: IMMABOVE(j__udySearchLeaf1, 7); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: IMMABOVE(j__udySearchLeaf1, 8); + case cJ1_JPIMMED_1_09: IMMABOVE(j__udySearchLeaf1, 9); + case cJ1_JPIMMED_1_10: IMMABOVE(j__udySearchLeaf1, 10); + case cJ1_JPIMMED_1_11: IMMABOVE(j__udySearchLeaf1, 11); + case cJ1_JPIMMED_1_12: IMMABOVE(j__udySearchLeaf1, 12); + case cJ1_JPIMMED_1_13: IMMABOVE(j__udySearchLeaf1, 13); + case cJ1_JPIMMED_1_14: IMMABOVE(j__udySearchLeaf1, 14); + case cJ1_JPIMMED_1_15: IMMABOVE(j__udySearchLeaf1, 15); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: IMMABOVE(j__udySearchLeaf2, 2); + case cJU_JPIMMED_2_03: IMMABOVE(j__udySearchLeaf2, 3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: IMMABOVE(j__udySearchLeaf2, 4); + case cJ1_JPIMMED_2_05: IMMABOVE(j__udySearchLeaf2, 5); + case cJ1_JPIMMED_2_06: IMMABOVE(j__udySearchLeaf2, 6); + case cJ1_JPIMMED_2_07: IMMABOVE(j__udySearchLeaf2, 7); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: IMMABOVE(j__udySearchLeaf3, 2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: IMMABOVE(j__udySearchLeaf3, 3); + case cJ1_JPIMMED_3_04: IMMABOVE(j__udySearchLeaf3, 4); + case cJ1_JPIMMED_3_05: IMMABOVE(j__udySearchLeaf3, 5); + + case cJ1_JPIMMED_4_02: IMMABOVE(j__udySearchLeaf4, 2); + case cJ1_JPIMMED_4_03: IMMABOVE(j__udySearchLeaf4, 3); + + case cJ1_JPIMMED_5_02: IMMABOVE(j__udySearchLeaf5, 2); + case cJ1_JPIMMED_5_03: IMMABOVE(j__udySearchLeaf5, 3); + + case cJ1_JPIMMED_6_02: IMMABOVE(j__udySearchLeaf6, 2); + + case cJ1_JPIMMED_7_02: IMMABOVE(j__udySearchLeaf7, 2); +#endif + + +// ---------------------------------------------------------------------------- +// OTHER CASES: + + default: JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(C_JERR); + + } // switch on JP type + + /*NOTREACHED*/ + +} // j__udy1LCountSM() + + +// **************************************************************************** +// J U D Y C O U N T L E A F B 1 +// +// This is a private analog of the j__udySearchLeaf*() functions for counting +// in bitmap 1-byte leaves. Since a bitmap leaf describes Indexes digitally +// rather than linearly, this is not really a search, but just a count of the +// valid Indexes == set bits below or including Index, which should be valid. +// Return the "offset" (really the ordinal), 0 .. Pop1 - 1, of Index in Pjll; +// if Indexs bit is not set (which should never happen, so this is DEBUG-mode +// only), return the 1s-complement equivalent (== negative offset minus 1). +// +// Note: The source code for this function looks identical for both Judy1 and +// JudyL, but the JU_JLB_BITMAP macro varies. +// +// Note: For simpler calling, the first arg is of type Pjll_t but then cast to +// Pjlb_t. + +FUNCTION static int j__udyCountLeafB1( +const Pjll_t Pjll, // bitmap leaf, as Pjll_t for consistency. +const Word_t Pop1, // Population of whole leaf. +const Word_t Index) // to which to count. +{ + Pjlb_t Pjlb = (Pjlb_t) Pjll; // to proper type. + Word_t digit = Index & cJU_MASKATSTATE(1); + Word_t findsub = digit / cJU_BITSPERSUBEXPL; + Word_t findbit = digit % cJU_BITSPERSUBEXPL; + int count; // in leaf through Index. + long subexp; // for stepping through subexpanses. + + +// COUNT UPWARD: +// +// The entire bitmap should fit in one cache line, but still try to save some +// CPU time by counting the fewest possible number of subexpanses from the +// bitmap. + +#ifndef NOSMARTJLB // enable to turn off smart code for comparison purposes. + + if (findsub < (cJU_NUMSUBEXPL / 2)) + { +#ifdef SMARTMETRICS + ++jlb_upward; +#endif + count = 0; + + for (subexp = 0; subexp < findsub; ++subexp) + { + count += ((JU_JLB_BITMAP(Pjlb, subexp) == cJU_FULLBITMAPL) ? + cJU_BITSPERSUBEXPL : + j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp))); + } + +// This count includes findbit, which should be set, resulting in a base-1 +// offset: + + count += j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, findsub) + & JU_MASKLOWERINC(JU_BITPOSMASKL(findbit))); + + DBGCODE(if (! JU_BITMAPTESTL(Pjlb, digit)) return(~count);) + assert(count >= 1); + return(count - 1); // convert to base-0 offset. + } +#endif // NOSMARTJLB + + +// COUNT DOWNWARD: +// +// Count the valid Indexes above or at Index, and subtract from Pop1. + +#ifdef SMARTMETRICS + ++jlb_downward; +#endif + count = Pop1; // base-1 for now. + + for (subexp = cJU_NUMSUBEXPL - 1; subexp > findsub; --subexp) + { + count -= ((JU_JLB_BITMAP(Pjlb, subexp) == cJU_FULLBITMAPL) ? + cJU_BITSPERSUBEXPL : + j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp))); + } + +// This count includes findbit, which should be set, resulting in a base-0 +// offset: + + count -= j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, findsub) + & JU_MASKHIGHERINC(JU_BITPOSMASKL(findbit))); + + DBGCODE(if (! JU_BITMAPTESTL(Pjlb, digit)) return(~count);) + assert(count >= 0); // should find Index itself. + return(count); // is already a base-0 offset. + +} // j__udyCountLeafB1() + + +// **************************************************************************** +// J U D Y J P P O P 1 +// +// This function takes any type of JP other than a root-level JP (cJU_LEAFW* or +// cJU_JPBRANCH* with no number suffix) and extracts the Pop1 from it. In some +// sense this is a wrapper around the JU_JP*_POP0 macros. Why write it as a +// function instead of a complex macro containing a trinary? (See version +// Judy1.h version 4.17.) We think its cheaper to call a function containing +// a switch statement with "constant" cases than to do the variable +// calculations in a trinary. +// +// For invalid JP Types return cJU_ALLONES. Note that this is an impossibly +// high Pop1 for any JP below a top level branch. + +FUNCTION Word_t j__udyJPPop1( +const Pjp_t Pjp) // JP to count. +{ + switch (JU_JPTYPE(Pjp)) + { +#ifdef notdef // caller should shortcut and not even call with these: + + case cJU_JPNULL1: + case cJU_JPNULL2: + case cJU_JPNULL3: return(0); +#ifdef JU_64BIT + case cJU_JPNULL4: + case cJU_JPNULL5: + case cJU_JPNULL6: + case cJU_JPNULL7: return(0); +#endif +#endif // notdef + + case cJU_JPBRANCH_L2: + case cJU_JPBRANCH_B2: + case cJU_JPBRANCH_U2: return(JU_JPBRANCH_POP0(Pjp,2) + 1); + + case cJU_JPBRANCH_L3: + case cJU_JPBRANCH_B3: + case cJU_JPBRANCH_U3: return(JU_JPBRANCH_POP0(Pjp,3) + 1); + +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: + case cJU_JPBRANCH_B4: + case cJU_JPBRANCH_U4: return(JU_JPBRANCH_POP0(Pjp,4) + 1); + + case cJU_JPBRANCH_L5: + case cJU_JPBRANCH_B5: + case cJU_JPBRANCH_U5: return(JU_JPBRANCH_POP0(Pjp,5) + 1); + + case cJU_JPBRANCH_L6: + case cJU_JPBRANCH_B6: + case cJU_JPBRANCH_U6: return(JU_JPBRANCH_POP0(Pjp,6) + 1); + + case cJU_JPBRANCH_L7: + case cJU_JPBRANCH_B7: + case cJU_JPBRANCH_U7: return(JU_JPBRANCH_POP0(Pjp,7) + 1); +#endif + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: +#endif + case cJU_JPLEAF2: + case cJU_JPLEAF3: +#ifdef JU_64BIT + case cJU_JPLEAF4: + case cJU_JPLEAF5: + case cJU_JPLEAF6: + case cJU_JPLEAF7: +#endif + case cJU_JPLEAF_B1: return(JU_JPLEAF_POP0(Pjp) + 1); + +#ifdef JUDY1 + case cJ1_JPFULLPOPU1: return(cJU_JPFULLPOPU1_POP0 + 1); +#endif + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: return(1); +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: return(1); +#endif + + case cJU_JPIMMED_1_02: return(2); + case cJU_JPIMMED_1_03: return(3); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: return(4); + case cJU_JPIMMED_1_05: return(5); + case cJU_JPIMMED_1_06: return(6); + case cJU_JPIMMED_1_07: return(7); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: return(8); + case cJ1_JPIMMED_1_09: return(9); + case cJ1_JPIMMED_1_10: return(10); + case cJ1_JPIMMED_1_11: return(11); + case cJ1_JPIMMED_1_12: return(12); + case cJ1_JPIMMED_1_13: return(13); + case cJ1_JPIMMED_1_14: return(14); + case cJ1_JPIMMED_1_15: return(15); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: return(2); + case cJU_JPIMMED_2_03: return(3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: return(4); + case cJ1_JPIMMED_2_05: return(5); + case cJ1_JPIMMED_2_06: return(6); + case cJ1_JPIMMED_2_07: return(7); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: return(2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: return(3); + case cJ1_JPIMMED_3_04: return(4); + case cJ1_JPIMMED_3_05: return(5); + + case cJ1_JPIMMED_4_02: return(2); + case cJ1_JPIMMED_4_03: return(3); + + case cJ1_JPIMMED_5_02: return(2); + case cJ1_JPIMMED_5_03: return(3); + + case cJ1_JPIMMED_6_02: return(2); + + case cJ1_JPIMMED_7_02: return(2); +#endif + + default: return(cJU_ALLONES); + } + + /*NOTREACHED*/ + +} // j__udyJPPop1() diff --git a/libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c b/libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c new file mode 100644 index 0000000..ffe6b3b --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c @@ -0,0 +1,314 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.26 $ $Source: /judy/src/JudyCommon/JudyCreateBranch.c $ + +// Branch creation functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + + +// **************************************************************************** +// J U D Y C R E A T E B R A N C H L +// +// Build a BranchL from an array of JPs and associated 1 byte digits +// (expanses). Return with Pjp pointing to the BranchL. Caller must +// deallocate passed arrays, if necessary. +// +// We have no idea what kind of BranchL it is, so caller must set the jp_Type. +// +// Return -1 if error (details in Pjpm), otherwise return 1. + +FUNCTION int j__udyCreateBranchL( + Pjp_t Pjp, // Build JPs from this place + Pjp_t PJPs, // Array of JPs to put into Bitmap branch + uint8_t Exp[], // Array of expanses to put into bitmap + Word_t ExpCnt, // Number of above JPs and Expanses + Pvoid_t Pjpm) +{ + Pjbl_t PjblRaw; // pointer to linear branch. + Pjbl_t Pjbl; + + assert(ExpCnt <= cJU_BRANCHLMAXJPS); + + PjblRaw = j__udyAllocJBL(Pjpm); + if (PjblRaw == (Pjbl_t) NULL) return(-1); + Pjbl = P_JBL(PjblRaw); + +// Build a Linear Branch + Pjbl->jbl_NumJPs = ExpCnt; + +// Copy from the Linear branch from splayed leaves + JU_COPYMEM(Pjbl->jbl_Expanse, Exp, ExpCnt); + JU_COPYMEM(Pjbl->jbl_jp, PJPs, ExpCnt); + +// Pass back new pointer to the Linear branch in JP + Pjp->jp_Addr = (Word_t) PjblRaw; + + return(1); + +} // j__udyCreateBranchL() + + +// **************************************************************************** +// J U D Y C R E A T E B R A N C H B +// +// Build a BranchB from an array of JPs and associated 1 byte digits +// (expanses). Return with Pjp pointing to the BranchB. Caller must +// deallocate passed arrays, if necessary. +// +// We have no idea what kind of BranchB it is, so caller must set the jp_Type. +// +// Return -1 if error (details in Pjpm), otherwise return 1. + +FUNCTION int j__udyCreateBranchB( + Pjp_t Pjp, // Build JPs from this place + Pjp_t PJPs, // Array of JPs to put into Bitmap branch + uint8_t Exp[], // Array of expanses to put into bitmap + Word_t ExpCnt, // Number of above JPs and Expanses + Pvoid_t Pjpm) +{ + Pjbb_t PjbbRaw; // pointer to bitmap branch. + Pjbb_t Pjbb; + Word_t ii, jj; // Temps + uint8_t CurrSubExp; // Current sub expanse for BM + +// This assertion says the number of populated subexpanses is not too large. +// This function is only called when a BranchL overflows to a BranchB or when a +// cascade occurs, meaning a leaf overflows. Either way ExpCnt cant be very +// large, in fact a lot smaller than cJU_BRANCHBMAXJPS. (Otherwise a BranchU +// would be used.) Popping this assertion means something (unspecified) has +// gone very wrong, or else Judys design criteria have changed, although in +// fact there should be no HARM in creating a BranchB with higher actual +// fanout. + + assert(ExpCnt <= cJU_BRANCHBMAXJPS); + +// Get memory for a Bitmap branch + PjbbRaw = j__udyAllocJBB(Pjpm); + if (PjbbRaw == (Pjbb_t) NULL) return(-1); + Pjbb = P_JBB(PjbbRaw); + +// Get 1st "sub" expanse (0..7) of bitmap branch + CurrSubExp = Exp[0] / cJU_BITSPERSUBEXPB; + +// Index thru all 1 byte sized expanses: + + for (jj = ii = 0; ii <= ExpCnt; ii++) + { + Word_t SubExp; // Cannot be a uint8_t + +// Make sure we cover the last one + if (ii == ExpCnt) + { + SubExp = cJU_ALLONES; // Force last one + } + else + { +// Calculate the "sub" expanse of the byte expanse + SubExp = Exp[ii] / cJU_BITSPERSUBEXPB; // Bits 5..7. + +// Set the bit that represents the expanse in Exp[] + JU_JBB_BITMAP(Pjbb, SubExp) |= JU_BITPOSMASKB(Exp[ii]); + } +// Check if a new "sub" expanse range needed + if (SubExp != CurrSubExp) + { +// Get number of JPs in this sub expanse + Word_t NumJP = ii - jj; + Pjp_t PjpRaw; + Pjp_t Pjp; + + PjpRaw = j__udyAllocJBBJP(NumJP, Pjpm); + Pjp = P_JP(PjpRaw); + + if (PjpRaw == (Pjp_t) NULL) // out of memory. + { + +// Free any previous allocations: + + while(CurrSubExp--) + { + NumJP = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, + CurrSubExp)); + if (NumJP) + { + j__udyFreeJBBJP(JU_JBB_PJP(Pjbb, + CurrSubExp), NumJP, Pjpm); + } + } + j__udyFreeJBB(PjbbRaw, Pjpm); + return(-1); + } + +// Place the array of JPs in bitmap branch: + + JU_JBB_PJP(Pjbb, CurrSubExp) = PjpRaw; + +// Copy the JPs to new leaf: + + JU_COPYMEM(Pjp, PJPs + jj, NumJP); + +// On to the next bitmap branch "sub" expanse: + + jj = ii; + CurrSubExp = SubExp; + } + } // for each 1-byte expanse + +// Pass back some of the JP to the new Bitmap branch: + + Pjp->jp_Addr = (Word_t) PjbbRaw; + + return(1); + +} // j__udyCreateBranchB() + + +// **************************************************************************** +// J U D Y C R E A T E B R A N C H U +// +// Build a BranchU from a BranchB. Return with Pjp pointing to the BranchU. +// Free the BranchB and its JP subarrays. +// +// Return -1 if error (details in Pjpm), otherwise return 1. + +FUNCTION int j__udyCreateBranchU( + Pjp_t Pjp, + Pvoid_t Pjpm) +{ + jp_t JPNull; + Pjbu_t PjbuRaw; + Pjbu_t Pjbu; + Pjbb_t PjbbRaw; + Pjbb_t Pjbb; + Word_t ii, jj; + BITMAPB_t BitMap; + Pjp_t PDstJP; +#ifdef JU_STAGED_EXP + jbu_t BranchU; // Staged uncompressed branch +#else + +// Allocate memory for a BranchU: + + PjbuRaw = j__udyAllocJBU(Pjpm); + if (PjbuRaw == (Pjbu_t) NULL) return(-1); + Pjbu = P_JBU(PjbuRaw); +#endif + JU_JPSETADT(&JPNull, 0, 0, JU_JPTYPE(Pjp) - cJU_JPBRANCH_B2 + cJU_JPNULL1); + +// Get the pointer to the BranchB: + + PjbbRaw = (Pjbb_t) (Pjp->jp_Addr); + Pjbb = P_JBB(PjbbRaw); + +// Set the pointer to the Uncompressed branch +#ifdef JU_STAGED_EXP + PDstJP = BranchU.jbu_jp; +#else + PDstJP = Pjbu->jbu_jp; +#endif + for (ii = 0; ii < cJU_NUMSUBEXPB; ii++) + { + Pjp_t PjpA; + Pjp_t PjpB; + + PjpB = PjpA = P_JP(JU_JBB_PJP(Pjbb, ii)); + +// Get the bitmap for this subexpanse + BitMap = JU_JBB_BITMAP(Pjbb, ii); + +// NULL empty subexpanses + if (BitMap == 0) + { +// But, fill with NULLs + for (jj = 0; jj < cJU_BITSPERSUBEXPB; jj++) + { + PDstJP[jj] = JPNull; + } + PDstJP += cJU_BITSPERSUBEXPB; + continue; + } +// Check if Uncompressed subexpanse + if (BitMap == cJU_FULLBITMAPB) + { +// Copy subexpanse to the Uncompressed branch intact + JU_COPYMEM(PDstJP, PjpA, cJU_BITSPERSUBEXPB); + +// Bump to next subexpanse + PDstJP += cJU_BITSPERSUBEXPB; + +// Set length of subexpanse + jj = cJU_BITSPERSUBEXPB; + } + else + { + for (jj = 0; jj < cJU_BITSPERSUBEXPB; jj++) + { +// Copy JP or NULLJP depending on bit + if (BitMap & 1) { *PDstJP = *PjpA++; } + else { *PDstJP = JPNull; } + + PDstJP++; // advance to next JP + BitMap >>= 1; + } + jj = PjpA - PjpB; + } + +// Free the subexpanse: + + j__udyFreeJBBJP(JU_JBB_PJP(Pjbb, ii), jj, Pjpm); + + } // for each JP in BranchU + +#ifdef JU_STAGED_EXP + +// Allocate memory for a BranchU: + + PjbuRaw = j__udyAllocJBU(Pjpm); + if (PjbuRaw == (Pjbu_t) NULL) return(-1); + Pjbu = P_JBU(PjbuRaw); + +// Copy staged branch to newly allocated branch: +// +// TBD: I think this code is broken. + + *Pjbu = BranchU; + +#endif // JU_STAGED_EXP + +// Finally free the BranchB and put the BranchU in its place: + + j__udyFreeJBB(PjbbRaw, Pjpm); + + Pjp->jp_Addr = (Word_t) PjbuRaw; + Pjp->jp_Type += cJU_JPBRANCH_U - cJU_JPBRANCH_B; + + return(1); + +} // j__udyCreateBranchU() diff --git a/libnetdata/libjudy/src/JudyL/JudyLDecascade.c b/libnetdata/libjudy/src/JudyL/JudyLDecascade.c new file mode 100644 index 0000000..39a89ef --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLDecascade.c @@ -0,0 +1,1206 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.25 $ $Source: /judy/src/JudyCommon/JudyDecascade.c $ +// +// "Decascade" support functions for JudyDel.c: These functions convert +// smaller-index-size leaves to larger-index-size leaves, and also, bitmap +// leaves (LeafB1s) to Leaf1s, and some types of branches to smaller branches +// at the same index size. Some "decascading" occurs explicitly in JudyDel.c, +// but rare or large subroutines appear as functions here, and the overhead to +// call them is negligible. +// +// Compile with one of -DJUDY1 or -DJUDYL. Note: Function names are converted +// to Judy1 or JudyL specific values by external #defines. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#endif +#ifdef JUDYL +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +DBGCODE(extern void JudyCheckSorted(Pjll_t Pjll, Word_t Pop1, long IndexSize);) + + +// **************************************************************************** +// __ J U D Y C O P Y 2 T O 3 +// +// Copy one or more 2-byte Indexes to a series of 3-byte Indexes. + +FUNCTION static void j__udyCopy2to3( + uint8_t * PDest, // to where to copy 3-byte Indexes. + uint16_t * PSrc, // from where to copy 2-byte indexes. + Word_t Pop1, // number of Indexes to copy. + Word_t MSByte) // most-significant byte, prefix to each Index. +{ + Word_t Temp; // for building 3-byte Index. + + assert(Pop1); + + do { + Temp = MSByte | *PSrc++; + JU_COPY3_LONG_TO_PINDEX(PDest, Temp); + PDest += 3; + } while (--Pop1); + +} // j__udyCopy2to3() + + +#ifdef JU_64BIT + +// **************************************************************************** +// __ J U D Y C O P Y 3 T O 4 +// +// Copy one or more 3-byte Indexes to a series of 4-byte Indexes. + +FUNCTION static void j__udyCopy3to4( + uint32_t * PDest, // to where to copy 4-byte Indexes. + uint8_t * PSrc, // from where to copy 3-byte indexes. + Word_t Pop1, // number of Indexes to copy. + Word_t MSByte) // most-significant byte, prefix to each Index. +{ + Word_t Temp; // for building 4-byte Index. + + assert(Pop1); + + do { + JU_COPY3_PINDEX_TO_LONG(Temp, PSrc); + Temp |= MSByte; + PSrc += 3; + *PDest++ = Temp; // truncates to uint32_t. + } while (--Pop1); + +} // j__udyCopy3to4() + + +// **************************************************************************** +// __ J U D Y C O P Y 4 T O 5 +// +// Copy one or more 4-byte Indexes to a series of 5-byte Indexes. + +FUNCTION static void j__udyCopy4to5( + uint8_t * PDest, // to where to copy 4-byte Indexes. + uint32_t * PSrc, // from where to copy 4-byte indexes. + Word_t Pop1, // number of Indexes to copy. + Word_t MSByte) // most-significant byte, prefix to each Index. +{ + Word_t Temp; // for building 5-byte Index. + + assert(Pop1); + + do { + Temp = MSByte | *PSrc++; + JU_COPY5_LONG_TO_PINDEX(PDest, Temp); + PDest += 5; + } while (--Pop1); + +} // j__udyCopy4to5() + + +// **************************************************************************** +// __ J U D Y C O P Y 5 T O 6 +// +// Copy one or more 5-byte Indexes to a series of 6-byte Indexes. + +FUNCTION static void j__udyCopy5to6( + uint8_t * PDest, // to where to copy 6-byte Indexes. + uint8_t * PSrc, // from where to copy 5-byte indexes. + Word_t Pop1, // number of Indexes to copy. + Word_t MSByte) // most-significant byte, prefix to each Index. +{ + Word_t Temp; // for building 6-byte Index. + + assert(Pop1); + + do { + JU_COPY5_PINDEX_TO_LONG(Temp, PSrc); + Temp |= MSByte; + JU_COPY6_LONG_TO_PINDEX(PDest, Temp); + PSrc += 5; + PDest += 6; + } while (--Pop1); + +} // j__udyCopy5to6() + + +// **************************************************************************** +// __ J U D Y C O P Y 6 T O 7 +// +// Copy one or more 6-byte Indexes to a series of 7-byte Indexes. + +FUNCTION static void j__udyCopy6to7( + uint8_t * PDest, // to where to copy 6-byte Indexes. + uint8_t * PSrc, // from where to copy 5-byte indexes. + Word_t Pop1, // number of Indexes to copy. + Word_t MSByte) // most-significant byte, prefix to each Index. +{ + Word_t Temp; // for building 6-byte Index. + + assert(Pop1); + + do { + JU_COPY6_PINDEX_TO_LONG(Temp, PSrc); + Temp |= MSByte; + JU_COPY7_LONG_TO_PINDEX(PDest, Temp); + PSrc += 6; + PDest += 7; + } while (--Pop1); + +} // j__udyCopy6to7() + +#endif // JU_64BIT + + +#ifndef JU_64BIT // 32-bit + +// **************************************************************************** +// __ J U D Y C O P Y 3 T O W +// +// Copy one or more 3-byte Indexes to a series of longs (words, always 4-byte). + +FUNCTION static void j__udyCopy3toW( + PWord_t PDest, // to where to copy full-word Indexes. + uint8_t * PSrc, // from where to copy 3-byte indexes. + Word_t Pop1, // number of Indexes to copy. + Word_t MSByte) // most-significant byte, prefix to each Index. +{ + assert(Pop1); + + do { + JU_COPY3_PINDEX_TO_LONG(*PDest, PSrc); + *PDest++ |= MSByte; + PSrc += 3; + } while (--Pop1); + +} // j__udyCopy3toW() + + +#else // JU_64BIT + +// **************************************************************************** +// __ J U D Y C O P Y 7 T O W +// +// Copy one or more 7-byte Indexes to a series of longs (words, always 8-byte). + +FUNCTION static void j__udyCopy7toW( + PWord_t PDest, // to where to copy full-word Indexes. + uint8_t * PSrc, // from where to copy 7-byte indexes. + Word_t Pop1, // number of Indexes to copy. + Word_t MSByte) // most-significant byte, prefix to each Index. +{ + assert(Pop1); + + do { + JU_COPY7_PINDEX_TO_LONG(*PDest, PSrc); + *PDest++ |= MSByte; + PSrc += 7; + } while (--Pop1); + +} // j__udyCopy7toW() + +#endif // JU_64BIT + + +// **************************************************************************** +// __ J U D Y B R A N C H B T O B R A N C H L +// +// When a BranchB shrinks to have few enough JPs, call this function to convert +// it to a BranchL. Return 1 for success, or -1 for failure (with details in +// Pjpm). + +FUNCTION int j__udyBranchBToBranchL( + Pjp_t Pjp, // points to BranchB to shrink. + Pvoid_t Pjpm) // for global accounting. +{ + Pjbb_t PjbbRaw; // old BranchB to shrink. + Pjbb_t Pjbb; + Pjbl_t PjblRaw; // new BranchL to create. + Pjbl_t Pjbl; + Word_t Digit; // in BranchB. + Word_t NumJPs; // non-null JPs in BranchB. + uint8_t Expanse[cJU_BRANCHLMAXJPS]; // for building jbl_Expanse[]. + Pjp_t Pjpjbl; // current JP in BranchL. + Word_t SubExp; // in BranchB. + + assert(JU_JPTYPE(Pjp) >= cJU_JPBRANCH_B2); + assert(JU_JPTYPE(Pjp) <= cJU_JPBRANCH_B); + + PjbbRaw = (Pjbb_t) (Pjp->jp_Addr); + Pjbb = P_JBB(PjbbRaw); + +// Copy 1-byte subexpanse digits from BranchB to temporary buffer for BranchL, +// for each bit set in the BranchB: +// +// TBD: The following supports variable-sized linear branches, but they are no +// longer variable; this could be simplified to save the copying. +// +// TBD: Since cJU_BRANCHLMAXJP == 7 now, and cJU_BRANCHUNUMJPS == 256, the +// following might be inefficient; is there a faster way to do it? At least +// skip wholly empty subexpanses? + + for (NumJPs = Digit = 0; Digit < cJU_BRANCHUNUMJPS; ++Digit) + { + if (JU_BITMAPTESTB(Pjbb, Digit)) + { + Expanse[NumJPs++] = Digit; + assert(NumJPs <= cJU_BRANCHLMAXJPS); // required of caller. + } + } + +// Allocate and populate the BranchL: + + if ((PjblRaw = j__udyAllocJBL(Pjpm)) == (Pjbl_t) NULL) return(-1); + Pjbl = P_JBL(PjblRaw); + + JU_COPYMEM(Pjbl->jbl_Expanse, Expanse, NumJPs); + + Pjbl->jbl_NumJPs = NumJPs; + DBGCODE(JudyCheckSorted((Pjll_t) (Pjbl->jbl_Expanse), NumJPs, 1);) + +// Copy JPs from each BranchB subexpanse subarray: + + Pjpjbl = P_JP(Pjbl->jbl_jp); // start at first JP in array. + + for (SubExp = 0; SubExp < cJU_NUMSUBEXPB; ++SubExp) + { + Pjp_t PjpRaw = JU_JBB_PJP(Pjbb, SubExp); // current Pjp. + Pjp_t Pjp; + + if (PjpRaw == (Pjp_t) NULL) continue; // skip empty subexpanse. + Pjp = P_JP(PjpRaw); + + NumJPs = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, SubExp)); + assert(NumJPs); + JU_COPYMEM(Pjpjbl, Pjp, NumJPs); // one subarray at a time. + + Pjpjbl += NumJPs; + j__udyFreeJBBJP(PjpRaw, NumJPs, Pjpm); // subarray. + } + j__udyFreeJBB(PjbbRaw, Pjpm); // BranchB itself. + +// Finish up: Calculate new JP type (same index size = level in new class), +// and tie new BranchB into parent JP: + + Pjp->jp_Type += cJU_JPBRANCH_L - cJU_JPBRANCH_B; + Pjp->jp_Addr = (Word_t) PjblRaw; + + return(1); + +} // j__udyBranchBToBranchL() + + +#ifdef notdef + +// **************************************************************************** +// __ J U D Y B R A N C H U T O B R A N C H B +// +// When a BranchU shrinks to need little enough memory, call this function to +// convert it to a BranchB to save memory (at the cost of some speed). Return +// 1 for success, or -1 for failure (with details in Pjpm). +// +// TBD: Fill out if/when needed. Not currently used in JudyDel.c for reasons +// explained there. + +FUNCTION int j__udyBranchUToBranchB( + Pjp_t Pjp, // points to BranchU to shrink. + Pvoid_t Pjpm) // for global accounting. +{ + assert(FALSE); + return(1); +} +#endif // notdef + + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + +// **************************************************************************** +// __ J U D Y L E A F B 1 T O L E A F 1 +// +// Shrink a bitmap leaf (cJU_LEAFB1) to linear leaf (cJU_JPLEAF1). +// Return 1 for success, or -1 for failure (with details in Pjpm). +// +// Note: This function is different than the other JudyLeaf*ToLeaf*() +// functions because it receives a Pjp, not just a leaf, and handles its own +// allocation and free, in order to allow the caller to continue with a LeafB1 +// if allocation fails. + +FUNCTION int j__udyLeafB1ToLeaf1( + Pjp_t Pjp, // points to LeafB1 to shrink. + Pvoid_t Pjpm) // for global accounting. +{ + Pjlb_t PjlbRaw; // bitmap in old leaf. + Pjlb_t Pjlb; + Pjll_t PjllRaw; // new Leaf1. + uint8_t * Pleaf1; // Leaf1 pointer type. + Word_t Digit; // in LeafB1 bitmap. +#ifdef JUDYL + Pjv_t PjvNew; // value area in new Leaf1. + Word_t Pop1; + Word_t SubExp; +#endif + + assert(JU_JPTYPE(Pjp) == cJU_JPLEAF_B1); + assert(((JU_JPDCDPOP0(Pjp) & 0xFF) + 1) == cJU_LEAF1_MAXPOP1); + +// Allocate JPLEAF1 and prepare pointers: + + if ((PjllRaw = j__udyAllocJLL1(cJU_LEAF1_MAXPOP1, Pjpm)) == 0) + return(-1); + + Pleaf1 = (uint8_t *) P_JLL(PjllRaw); + PjlbRaw = (Pjlb_t) (Pjp->jp_Addr); + Pjlb = P_JLB(PjlbRaw); + JUDYLCODE(PjvNew = JL_LEAF1VALUEAREA(Pleaf1, cJL_LEAF1_MAXPOP1);) + +// Copy 1-byte indexes from old LeafB1 to new Leaf1: + + for (Digit = 0; Digit < cJU_BRANCHUNUMJPS; ++Digit) + if (JU_BITMAPTESTL(Pjlb, Digit)) + *Pleaf1++ = Digit; + +#ifdef JUDYL + +// Copy all old-LeafB1 value areas from value subarrays to new Leaf1: + + for (SubExp = 0; SubExp < cJU_NUMSUBEXPL; ++SubExp) + { + Pjv_t PjvRaw = JL_JLB_PVALUE(Pjlb, SubExp); + Pjv_t Pjv = P_JV(PjvRaw); + + if (Pjv == (Pjv_t) NULL) continue; // skip empty subarray. + + Pop1 = j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, SubExp)); // subarray. + assert(Pop1); + + JU_COPYMEM(PjvNew, Pjv, Pop1); // copy value areas. + j__udyLFreeJV(PjvRaw, Pop1, Pjpm); + PjvNew += Pop1; // advance through new. + } + + assert((((Word_t) Pleaf1) - (Word_t) P_JLL(PjllRaw)) + == (PjvNew - JL_LEAF1VALUEAREA(P_JLL(PjllRaw), cJL_LEAF1_MAXPOP1))); +#endif // JUDYL + + DBGCODE(JudyCheckSorted((Pjll_t) P_JLL(PjllRaw), + (((Word_t) Pleaf1) - (Word_t) P_JLL(PjllRaw)), 1);) + +// Finish up: Free the old LeafB1 and plug the new Leaf1 into the JP: +// +// Note: jp_DcdPopO does not change here. + + j__udyFreeJLB1(PjlbRaw, Pjpm); + + Pjp->jp_Addr = (Word_t) PjllRaw; + Pjp->jp_Type = cJU_JPLEAF1; + + return(1); + +} // j__udyLeafB1ToLeaf1() + +#endif // (JUDYL || (! JU_64BIT)) + + +// **************************************************************************** +// __ J U D Y L E A F 1 T O L E A F 2 +// +// Copy 1-byte Indexes from a LeafB1 or Leaf1 to 2-byte Indexes in a Leaf2. +// Pjp MUST be one of: cJU_JPLEAF_B1, cJU_JPLEAF1, or cJU_JPIMMED_1_*. +// Return number of Indexes copied. +// +// TBD: In this and all following functions, the caller should already be able +// to compute the Pop1 return value, so why return it? + +FUNCTION Word_t j__udyLeaf1ToLeaf2( + uint16_t * PLeaf2, // destination uint16_t * Index portion of leaf. +#ifdef JUDYL + Pjv_t Pjv2, // destination value part of leaf. +#endif + Pjp_t Pjp, // 1-byte-index object from which to copy. + Word_t MSByte, // most-significant byte, prefix to each Index. + Pvoid_t Pjpm) // for global accounting. +{ + Word_t Pop1; // Indexes in leaf. + Word_t Offset; // in linear leaf list. +JUDYLCODE(Pjv_t Pjv1Raw;) // source object value area. +JUDYLCODE(Pjv_t Pjv1;) + + switch (JU_JPTYPE(Pjp)) + { + + +// JPLEAF_B1: + + case cJU_JPLEAF_B1: + { + Pjlb_t Pjlb = P_JLB(Pjp->jp_Addr); + Word_t Digit; // in LeafB1 bitmap. + JUDYLCODE(Word_t SubExp;) // in LeafB1. + + Pop1 = JU_JPBRANCH_POP0(Pjp, 1) + 1; assert(Pop1); + +// Copy 1-byte indexes from old LeafB1 to new Leaf2, including splicing in +// the missing MSByte needed in the Leaf2: + + for (Digit = 0; Digit < cJU_BRANCHUNUMJPS; ++Digit) + if (JU_BITMAPTESTL(Pjlb, Digit)) + *PLeaf2++ = MSByte | Digit; + +#ifdef JUDYL + +// Copy all old-LeafB1 value areas from value subarrays to new Leaf2: + + for (SubExp = 0; SubExp < cJU_NUMSUBEXPL; ++SubExp) + { + Word_t SubExpPop1; + + Pjv1Raw = JL_JLB_PVALUE(Pjlb, SubExp); + if (Pjv1Raw == (Pjv_t) NULL) continue; // skip empty. + Pjv1 = P_JV(Pjv1Raw); + + SubExpPop1 = j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, SubExp)); + assert(SubExpPop1); + + JU_COPYMEM(Pjv2, Pjv1, SubExpPop1); // copy value areas. + j__udyLFreeJV(Pjv1Raw, SubExpPop1, Pjpm); + Pjv2 += SubExpPop1; // advance through new. + } +#endif // JUDYL + + j__udyFreeJLB1((Pjlb_t) (Pjp->jp_Addr), Pjpm); // LeafB1 itself. + return(Pop1); + + } // case cJU_JPLEAF_B1 + + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + +// JPLEAF1: + + case cJU_JPLEAF1: + { + uint8_t * PLeaf1 = (uint8_t *) P_JLL(Pjp->jp_Addr); + + Pop1 = JU_JPBRANCH_POP0(Pjp, 1) + 1; assert(Pop1); + JUDYLCODE(Pjv1 = JL_LEAF1VALUEAREA(PLeaf1, Pop1);) + +// Copy all Index bytes including splicing in missing MSByte needed in Leaf2 +// (plus, for JudyL, value areas): + + for (Offset = 0; Offset < Pop1; ++Offset) + { + PLeaf2[Offset] = MSByte | PLeaf1[Offset]; + JUDYLCODE(Pjv2[Offset] = Pjv1[Offset];) + } + j__udyFreeJLL1((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + return(Pop1); + } +#endif // (JUDYL || (! JU_64BIT)) + + +// JPIMMED_1_01: +// +// Note: jp_DcdPopO has 3 [7] bytes of Index (all but most significant byte), +// so the assignment to PLeaf2[] truncates and MSByte is not needed. + + case cJU_JPIMMED_1_01: + { + PLeaf2[0] = JU_JPDCDPOP0(Pjp); // see above. + JUDYLCODE(Pjv2[0] = Pjp->jp_Addr;) + return(1); + } + + +// JPIMMED_1_0[2+]: + + case cJU_JPIMMED_1_02: + case cJU_JPIMMED_1_03: +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: + case cJU_JPIMMED_1_07: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: + case cJ1_JPIMMED_1_09: + case cJ1_JPIMMED_1_10: + case cJ1_JPIMMED_1_11: + case cJ1_JPIMMED_1_12: + case cJ1_JPIMMED_1_13: + case cJ1_JPIMMED_1_14: + case cJ1_JPIMMED_1_15: +#endif + { + Pop1 = JU_JPTYPE(Pjp) - cJU_JPIMMED_1_02 + 2; assert(Pop1); + JUDYLCODE(Pjv1Raw = (Pjv_t) (Pjp->jp_Addr);) + JUDYLCODE(Pjv1 = P_JV(Pjv1Raw);) + + for (Offset = 0; Offset < Pop1; ++Offset) + { +#ifdef JUDY1 + PLeaf2[Offset] = MSByte | Pjp->jp_1Index[Offset]; +#else + PLeaf2[Offset] = MSByte | Pjp->jp_LIndex[Offset]; + Pjv2 [Offset] = Pjv1[Offset]; +#endif + } + JUDYLCODE(j__udyLFreeJV(Pjv1Raw, Pop1, Pjpm);) + return(Pop1); + } + + +// UNEXPECTED CASES, including JPNULL1, should be handled by caller: + + default: assert(FALSE); break; + + } // switch + + return(0); + +} // j__udyLeaf1ToLeaf2() + + +// ***************************************************************************** +// __ J U D Y L E A F 2 T O L E A F 3 +// +// Copy 2-byte Indexes from a Leaf2 to 3-byte Indexes in a Leaf3. +// Pjp MUST be one of: cJU_JPLEAF2 or cJU_JPIMMED_2_*. +// Return number of Indexes copied. +// +// Note: By the time this function is called to compress a level-3 branch to a +// Leaf3, the branch has no narrow pointers under it, meaning only level-2 +// objects are below it and must be handled here. + +FUNCTION Word_t j__udyLeaf2ToLeaf3( + uint8_t * PLeaf3, // destination "uint24_t *" Index part of leaf. +#ifdef JUDYL + Pjv_t Pjv3, // destination value part of leaf. +#endif + Pjp_t Pjp, // 2-byte-index object from which to copy. + Word_t MSByte, // most-significant byte, prefix to each Index. + Pvoid_t Pjpm) // for global accounting. +{ + Word_t Pop1; // Indexes in leaf. +#if (defined(JUDYL) && defined(JU_64BIT)) + Pjv_t Pjv2Raw; // source object value area. +#endif +JUDYLCODE(Pjv_t Pjv2;) + + switch (JU_JPTYPE(Pjp)) + { + + +// JPLEAF2: + + case cJU_JPLEAF2: + { + uint16_t * PLeaf2 = (uint16_t *) P_JLL(Pjp->jp_Addr); + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; assert(Pop1); + j__udyCopy2to3(PLeaf3, PLeaf2, Pop1, MSByte); +#ifdef JUDYL + Pjv2 = JL_LEAF2VALUEAREA(PLeaf2, Pop1); + JU_COPYMEM(Pjv3, Pjv2, Pop1); +#endif + j__udyFreeJLL2((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + return(Pop1); + } + + +// JPIMMED_2_01: +// +// Note: jp_DcdPopO has 3 [7] bytes of Index (all but most significant byte), +// so the "assignment" to PLeaf3[] is exact [truncates] and MSByte is not +// needed. + + case cJU_JPIMMED_2_01: + { + JU_COPY3_LONG_TO_PINDEX(PLeaf3, JU_JPDCDPOP0(Pjp)); // see above. + JUDYLCODE(Pjv3[0] = Pjp->jp_Addr;) + return(1); + } + + +// JPIMMED_2_0[2+]: + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: + case cJU_JPIMMED_2_03: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: + case cJ1_JPIMMED_2_05: + case cJ1_JPIMMED_2_06: + case cJ1_JPIMMED_2_07: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + { + JUDY1CODE(uint16_t * PLeaf2 = (uint16_t *) (Pjp->jp_1Index);) + JUDYLCODE(uint16_t * PLeaf2 = (uint16_t *) (Pjp->jp_LIndex);) + + Pop1 = JU_JPTYPE(Pjp) - cJU_JPIMMED_2_02 + 2; assert(Pop1); + j__udyCopy2to3(PLeaf3, PLeaf2, Pop1, MSByte); +#ifdef JUDYL + Pjv2Raw = (Pjv_t) (Pjp->jp_Addr); + Pjv2 = P_JV(Pjv2Raw); + JU_COPYMEM(Pjv3, Pjv2, Pop1); + j__udyLFreeJV(Pjv2Raw, Pop1, Pjpm); +#endif + return(Pop1); + } +#endif // (JUDY1 || JU_64BIT) + + +// UNEXPECTED CASES, including JPNULL2, should be handled by caller: + + default: assert(FALSE); break; + + } // switch + + return(0); + +} // j__udyLeaf2ToLeaf3() + + +#ifdef JU_64BIT + +// **************************************************************************** +// __ J U D Y L E A F 3 T O L E A F 4 +// +// Copy 3-byte Indexes from a Leaf3 to 4-byte Indexes in a Leaf4. +// Pjp MUST be one of: cJU_JPLEAF3 or cJU_JPIMMED_3_*. +// Return number of Indexes copied. +// +// Note: By the time this function is called to compress a level-4 branch to a +// Leaf4, the branch has no narrow pointers under it, meaning only level-3 +// objects are below it and must be handled here. + +FUNCTION Word_t j__udyLeaf3ToLeaf4( + uint32_t * PLeaf4, // destination uint32_t * Index part of leaf. +#ifdef JUDYL + Pjv_t Pjv4, // destination value part of leaf. +#endif + Pjp_t Pjp, // 3-byte-index object from which to copy. + Word_t MSByte, // most-significant byte, prefix to each Index. + Pvoid_t Pjpm) // for global accounting. +{ + Word_t Pop1; // Indexes in leaf. +JUDYLCODE(Pjv_t Pjv3Raw;) // source object value area. +JUDYLCODE(Pjv_t Pjv3;) + + switch (JU_JPTYPE(Pjp)) + { + + +// JPLEAF3: + + case cJU_JPLEAF3: + { + uint8_t * PLeaf3 = (uint8_t *) P_JLL(Pjp->jp_Addr); + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; assert(Pop1); + j__udyCopy3to4(PLeaf4, (uint8_t *) PLeaf3, Pop1, MSByte); +#ifdef JUDYL + Pjv3 = JL_LEAF3VALUEAREA(PLeaf3, Pop1); + JU_COPYMEM(Pjv4, Pjv3, Pop1); +#endif + j__udyFreeJLL3((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + return(Pop1); + } + + +// JPIMMED_3_01: +// +// Note: jp_DcdPopO has 7 bytes of Index (all but most significant byte), so +// the assignment to PLeaf4[] truncates and MSByte is not needed. + + case cJU_JPIMMED_3_01: + { + PLeaf4[0] = JU_JPDCDPOP0(Pjp); // see above. + JUDYLCODE(Pjv4[0] = Pjp->jp_Addr;) + return(1); + } + + +// JPIMMED_3_0[2+]: + + case cJU_JPIMMED_3_02: +#ifdef JUDY1 + case cJ1_JPIMMED_3_03: + case cJ1_JPIMMED_3_04: + case cJ1_JPIMMED_3_05: +#endif + { + JUDY1CODE(uint8_t * PLeaf3 = (uint8_t *) (Pjp->jp_1Index);) + JUDYLCODE(uint8_t * PLeaf3 = (uint8_t *) (Pjp->jp_LIndex);) + + JUDY1CODE(Pop1 = JU_JPTYPE(Pjp) - cJU_JPIMMED_3_02 + 2;) + JUDYLCODE(Pop1 = 2;) + + j__udyCopy3to4(PLeaf4, PLeaf3, Pop1, MSByte); +#ifdef JUDYL + Pjv3Raw = (Pjv_t) (Pjp->jp_Addr); + Pjv3 = P_JV(Pjv3Raw); + JU_COPYMEM(Pjv4, Pjv3, Pop1); + j__udyLFreeJV(Pjv3Raw, Pop1, Pjpm); +#endif + return(Pop1); + } + + +// UNEXPECTED CASES, including JPNULL3, should be handled by caller: + + default: assert(FALSE); break; + + } // switch + + return(0); + +} // j__udyLeaf3ToLeaf4() + + +// Note: In all following j__udyLeaf*ToLeaf*() functions, JPIMMED_*_0[2+] +// cases exist for Judy1 (&& 64-bit) only. JudyL has no equivalent Immeds. + + +// ***************************************************************************** +// __ J U D Y L E A F 4 T O L E A F 5 +// +// Copy 4-byte Indexes from a Leaf4 to 5-byte Indexes in a Leaf5. +// Pjp MUST be one of: cJU_JPLEAF4 or cJU_JPIMMED_4_*. +// Return number of Indexes copied. +// +// Note: By the time this function is called to compress a level-5 branch to a +// Leaf5, the branch has no narrow pointers under it, meaning only level-4 +// objects are below it and must be handled here. + +FUNCTION Word_t j__udyLeaf4ToLeaf5( + uint8_t * PLeaf5, // destination "uint40_t *" Index part of leaf. +#ifdef JUDYL + Pjv_t Pjv5, // destination value part of leaf. +#endif + Pjp_t Pjp, // 4-byte-index object from which to copy. + Word_t MSByte, // most-significant byte, prefix to each Index. + Pvoid_t Pjpm) // for global accounting. +{ + Word_t Pop1; // Indexes in leaf. +JUDYLCODE(Pjv_t Pjv4;) // source object value area. + + switch (JU_JPTYPE(Pjp)) + { + + +// JPLEAF4: + + case cJU_JPLEAF4: + { + uint32_t * PLeaf4 = (uint32_t *) P_JLL(Pjp->jp_Addr); + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; assert(Pop1); + j__udyCopy4to5(PLeaf5, PLeaf4, Pop1, MSByte); +#ifdef JUDYL + Pjv4 = JL_LEAF4VALUEAREA(PLeaf4, Pop1); + JU_COPYMEM(Pjv5, Pjv4, Pop1); +#endif + j__udyFreeJLL4((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + return(Pop1); + } + + +// JPIMMED_4_01: +// +// Note: jp_DcdPopO has 7 bytes of Index (all but most significant byte), so +// the assignment to PLeaf5[] truncates and MSByte is not needed. + + case cJU_JPIMMED_4_01: + { + JU_COPY5_LONG_TO_PINDEX(PLeaf5, JU_JPDCDPOP0(Pjp)); // see above. + JUDYLCODE(Pjv5[0] = Pjp->jp_Addr;) + return(1); + } + + +#ifdef JUDY1 + +// JPIMMED_4_0[4+]: + + case cJ1_JPIMMED_4_02: + case cJ1_JPIMMED_4_03: + { + uint32_t * PLeaf4 = (uint32_t *) (Pjp->jp_1Index); + + Pop1 = JU_JPTYPE(Pjp) - cJ1_JPIMMED_4_02 + 2; + j__udyCopy4to5(PLeaf5, PLeaf4, Pop1, MSByte); + return(Pop1); + } +#endif // JUDY1 + + +// UNEXPECTED CASES, including JPNULL4, should be handled by caller: + + default: assert(FALSE); break; + + } // switch + + return(0); + +} // j__udyLeaf4ToLeaf5() + + +// **************************************************************************** +// __ J U D Y L E A F 5 T O L E A F 6 +// +// Copy 5-byte Indexes from a Leaf5 to 6-byte Indexes in a Leaf6. +// Pjp MUST be one of: cJU_JPLEAF5 or cJU_JPIMMED_5_*. +// Return number of Indexes copied. +// +// Note: By the time this function is called to compress a level-6 branch to a +// Leaf6, the branch has no narrow pointers under it, meaning only level-5 +// objects are below it and must be handled here. + +FUNCTION Word_t j__udyLeaf5ToLeaf6( + uint8_t * PLeaf6, // destination uint8_t * Index part of leaf. +#ifdef JUDYL + Pjv_t Pjv6, // destination value part of leaf. +#endif + Pjp_t Pjp, // 5-byte-index object from which to copy. + Word_t MSByte, // most-significant byte, prefix to each Index. + Pvoid_t Pjpm) // for global accounting. +{ + Word_t Pop1; // Indexes in leaf. +JUDYLCODE(Pjv_t Pjv5;) // source object value area. + + switch (JU_JPTYPE(Pjp)) + { + + +// JPLEAF5: + + case cJU_JPLEAF5: + { + uint8_t * PLeaf5 = (uint8_t *) P_JLL(Pjp->jp_Addr); + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; assert(Pop1); + j__udyCopy5to6(PLeaf6, PLeaf5, Pop1, MSByte); +#ifdef JUDYL + Pjv5 = JL_LEAF5VALUEAREA(PLeaf5, Pop1); + JU_COPYMEM(Pjv6, Pjv5, Pop1); +#endif + j__udyFreeJLL5((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + return(Pop1); + } + + +// JPIMMED_5_01: +// +// Note: jp_DcdPopO has 7 bytes of Index (all but most significant byte), so +// the assignment to PLeaf6[] truncates and MSByte is not needed. + + case cJU_JPIMMED_5_01: + { + JU_COPY6_LONG_TO_PINDEX(PLeaf6, JU_JPDCDPOP0(Pjp)); // see above. + JUDYLCODE(Pjv6[0] = Pjp->jp_Addr;) + return(1); + } + + +#ifdef JUDY1 + +// JPIMMED_5_0[2+]: + + case cJ1_JPIMMED_5_02: + case cJ1_JPIMMED_5_03: + { + uint8_t * PLeaf5 = (uint8_t *) (Pjp->jp_1Index); + + Pop1 = JU_JPTYPE(Pjp) - cJ1_JPIMMED_5_02 + 2; + j__udyCopy5to6(PLeaf6, PLeaf5, Pop1, MSByte); + return(Pop1); + } +#endif // JUDY1 + + +// UNEXPECTED CASES, including JPNULL5, should be handled by caller: + + default: assert(FALSE); break; + + } // switch + + return(0); + +} // j__udyLeaf5ToLeaf6() + + +// ***************************************************************************** +// __ J U D Y L E A F 6 T O L E A F 7 +// +// Copy 6-byte Indexes from a Leaf2 to 7-byte Indexes in a Leaf7. +// Pjp MUST be one of: cJU_JPLEAF6 or cJU_JPIMMED_6_*. +// Return number of Indexes copied. +// +// Note: By the time this function is called to compress a level-7 branch to a +// Leaf7, the branch has no narrow pointers under it, meaning only level-6 +// objects are below it and must be handled here. + +FUNCTION Word_t j__udyLeaf6ToLeaf7( + uint8_t * PLeaf7, // destination "uint24_t *" Index part of leaf. +#ifdef JUDYL + Pjv_t Pjv7, // destination value part of leaf. +#endif + Pjp_t Pjp, // 6-byte-index object from which to copy. + Word_t MSByte, // most-significant byte, prefix to each Index. + Pvoid_t Pjpm) // for global accounting. +{ + Word_t Pop1; // Indexes in leaf. +JUDYLCODE(Pjv_t Pjv6;) // source object value area. + + switch (JU_JPTYPE(Pjp)) + { + + +// JPLEAF6: + + case cJU_JPLEAF6: + { + uint8_t * PLeaf6 = (uint8_t *) P_JLL(Pjp->jp_Addr); + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyCopy6to7(PLeaf7, PLeaf6, Pop1, MSByte); +#ifdef JUDYL + Pjv6 = JL_LEAF6VALUEAREA(PLeaf6, Pop1); + JU_COPYMEM(Pjv7, Pjv6, Pop1); +#endif + j__udyFreeJLL6((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + return(Pop1); + } + + +// JPIMMED_6_01: +// +// Note: jp_DcdPopO has 7 bytes of Index (all but most significant byte), so +// the "assignment" to PLeaf7[] is exact and MSByte is not needed. + + case cJU_JPIMMED_6_01: + { + JU_COPY7_LONG_TO_PINDEX(PLeaf7, JU_JPDCDPOP0(Pjp)); // see above. + JUDYLCODE(Pjv7[0] = Pjp->jp_Addr;) + return(1); + } + + +#ifdef JUDY1 + +// JPIMMED_6_02: + + case cJ1_JPIMMED_6_02: + { + uint8_t * PLeaf6 = (uint8_t *) (Pjp->jp_1Index); + + j__udyCopy6to7(PLeaf7, PLeaf6, /* Pop1 = */ 2, MSByte); + return(2); + } +#endif // JUDY1 + + +// UNEXPECTED CASES, including JPNULL6, should be handled by caller: + + default: assert(FALSE); break; + + } // switch + + return(0); + +} // j__udyLeaf6ToLeaf7() + +#endif // JU_64BIT + + +#ifndef JU_64BIT // 32-bit version first + +// **************************************************************************** +// __ J U D Y L E A F 3 T O L E A F W +// +// Copy 3-byte Indexes from a Leaf3 to 4-byte Indexes in a LeafW. Pjp MUST be +// one of: cJU_JPLEAF3 or cJU_JPIMMED_3_*. Return number of Indexes copied. +// +// Note: By the time this function is called to compress a level-L branch to a +// LeafW, the branch has no narrow pointers under it, meaning only level-3 +// objects are below it and must be handled here. + +FUNCTION Word_t j__udyLeaf3ToLeafW( + Pjlw_t Pjlw, // destination Index part of leaf. +#ifdef JUDYL + Pjv_t PjvW, // destination value part of leaf. +#endif + Pjp_t Pjp, // 3-byte-index object from which to copy. + Word_t MSByte, // most-significant byte, prefix to each Index. + Pvoid_t Pjpm) // for global accounting. +{ + Word_t Pop1; // Indexes in leaf. +JUDYLCODE(Pjv_t Pjv3;) // source object value area. + + switch (JU_JPTYPE(Pjp)) + { + + +// JPLEAF3: + + case cJU_JPLEAF3: + { + uint8_t * PLeaf3 = (uint8_t *) P_JLL(Pjp->jp_Addr); + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyCopy3toW((PWord_t) Pjlw, PLeaf3, Pop1, MSByte); +#ifdef JUDYL + Pjv3 = JL_LEAF3VALUEAREA(PLeaf3, Pop1); + JU_COPYMEM(PjvW, Pjv3, Pop1); +#endif + j__udyFreeJLL3((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + return(Pop1); + } + + +// JPIMMED_3_01: +// +// Note: jp_DcdPopO has 3 bytes of Index (all but most significant byte), and +// MSByte must be ord in. + + case cJU_JPIMMED_3_01: + { + Pjlw[0] = MSByte | JU_JPDCDPOP0(Pjp); // see above. + JUDYLCODE(PjvW[0] = Pjp->jp_Addr;) + return(1); + } + + +#ifdef JUDY1 + +// JPIMMED_3_02: + + case cJU_JPIMMED_3_02: + { + uint8_t * PLeaf3 = (uint8_t *) (Pjp->jp_1Index); + + j__udyCopy3toW((PWord_t) Pjlw, PLeaf3, /* Pop1 = */ 2, MSByte); + return(2); + } +#endif // JUDY1 + + +// UNEXPECTED CASES, including JPNULL3, should be handled by caller: + + default: assert(FALSE); break; + + } // switch + + return(0); + +} // j__udyLeaf3ToLeafW() + + +#else // JU_64BIT + + +// **************************************************************************** +// __ J U D Y L E A F 7 T O L E A F W +// +// Copy 7-byte Indexes from a Leaf7 to 8-byte Indexes in a LeafW. +// Pjp MUST be one of: cJU_JPLEAF7 or cJU_JPIMMED_7_*. +// Return number of Indexes copied. +// +// Note: By the time this function is called to compress a level-L branch to a +// LeafW, the branch has no narrow pointers under it, meaning only level-7 +// objects are below it and must be handled here. + +FUNCTION Word_t j__udyLeaf7ToLeafW( + Pjlw_t Pjlw, // destination Index part of leaf. +#ifdef JUDYL + Pjv_t PjvW, // destination value part of leaf. +#endif + Pjp_t Pjp, // 7-byte-index object from which to copy. + Word_t MSByte, // most-significant byte, prefix to each Index. + Pvoid_t Pjpm) // for global accounting. +{ + Word_t Pop1; // Indexes in leaf. +JUDYLCODE(Pjv_t Pjv7;) // source object value area. + + switch (JU_JPTYPE(Pjp)) + { + + +// JPLEAF7: + + case cJU_JPLEAF7: + { + uint8_t * PLeaf7 = (uint8_t *) P_JLL(Pjp->jp_Addr); + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyCopy7toW((PWord_t) Pjlw, PLeaf7, Pop1, MSByte); +#ifdef JUDYL + Pjv7 = JL_LEAF7VALUEAREA(PLeaf7, Pop1); + JU_COPYMEM(PjvW, Pjv7, Pop1); +#endif + j__udyFreeJLL7((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + return(Pop1); + } + + +// JPIMMED_7_01: +// +// Note: jp_DcdPopO has 7 bytes of Index (all but most significant byte), and +// MSByte must be ord in. + + case cJU_JPIMMED_7_01: + { + Pjlw[0] = MSByte | JU_JPDCDPOP0(Pjp); // see above. + JUDYLCODE(PjvW[0] = Pjp->jp_Addr;) + return(1); + } + + +#ifdef JUDY1 + +// JPIMMED_7_02: + + case cJ1_JPIMMED_7_02: + { + uint8_t * PLeaf7 = (uint8_t *) (Pjp->jp_1Index); + + j__udyCopy7toW((PWord_t) Pjlw, PLeaf7, /* Pop1 = */ 2, MSByte); + return(2); + } +#endif + + +// UNEXPECTED CASES, including JPNULL7, should be handled by caller: + + default: assert(FALSE); break; + + } // switch + + return(0); + +} // j__udyLeaf7ToLeafW() + +#endif // JU_64BIT diff --git a/libnetdata/libjudy/src/JudyL/JudyLDel.c b/libnetdata/libjudy/src/JudyL/JudyLDel.c new file mode 100644 index 0000000..ced4b5f --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLDel.c @@ -0,0 +1,2146 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.68 $ $Source: /judy/src/JudyCommon/JudyDel.c $ +// +// Judy1Unset() and JudyLDel() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// About HYSTERESIS: In the Judy code, hysteresis means leaving around a +// nominally suboptimal (not maximally compressed) data structure after a +// deletion. As a result, the shape of the tree for two identical index sets +// can differ depending on the insert/delete path taken to arrive at the index +// sets. The purpose is to minimize worst-case behavior (thrashing) that could +// result from a series of intermixed insertions and deletions. It also makes +// for MUCH simpler code, because instead of performing, "delete and then +// compress," it can say, "compress and then delete," where due to hysteresis, +// compression is not even attempted until the object IS compressible. +// +// In some cases the code has no choice and it must "ungrow" a data structure +// across a "phase transition" boundary without hysteresis. In other cases the +// amount (such as "hysteresis = 1") is indicated by the number of JP deletions +// (in branches) or index deletions (in leaves) that can occur in succession +// before compressing the data structure. (It appears that hysteresis <= 1 in +// all cases.) +// +// In general no hysteresis occurs when the data structure type remains the +// same but the allocated memory chunk for the node must shrink, because the +// relationship is hardwired and theres no way to know how much memory is +// allocated to a given data structure. Hysteresis = 0 in all these cases. +// +// TBD: Could this code be faster if memory chunk hysteresis were supported +// somehow along with data structure type hysteresis? +// +// TBD: Should some of the assertions here be converted to product code that +// returns JU_ERRNO_CORRUPT? +// +// TBD: Dougs code had an odd mix of function-wide and limited-scope +// variables. Should some of the function-wide variables appear only in +// limited scopes, or more likely, vice-versa? + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +DBGCODE(extern void JudyCheckPop(Pvoid_t PArray);) +DBGCODE(extern void JudyCheckSorted(Pjll_t Pjll, Word_t Pop1, long IndexSize);) + +#ifdef TRACEJP +#include "JudyPrintJP.c" +#endif + +// These are defined to generic values in JudyCommon/JudyPrivateTypes.h: +// +// TBD: These should be exported from a header file, but perhaps not, as they +// are only used here, and exported from JudyDecascade.c, which is a separate +// file for profiling reasons (to prevent inlining), but which potentially +// could be merged with this file, either in SoftCM or at compile-time: + +#ifdef JUDY1 + +extern int j__udy1BranchBToBranchL(Pjp_t Pjp, Pvoid_t Pjpm); +#ifndef JU_64BIT +extern int j__udy1LeafB1ToLeaf1(Pjp_t, Pvoid_t); +#endif +extern Word_t j__udy1Leaf1ToLeaf2(uint16_t *, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udy1Leaf2ToLeaf3(uint8_t *, Pjp_t, Word_t, Pvoid_t); +#ifndef JU_64BIT +extern Word_t j__udy1Leaf3ToLeafW(Pjlw_t, Pjp_t, Word_t, Pvoid_t); +#else +extern Word_t j__udy1Leaf3ToLeaf4(uint32_t *, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udy1Leaf4ToLeaf5(uint8_t *, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udy1Leaf5ToLeaf6(uint8_t *, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udy1Leaf6ToLeaf7(uint8_t *, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udy1Leaf7ToLeafW(Pjlw_t, Pjp_t, Word_t, Pvoid_t); +#endif + +#else // JUDYL + +extern int j__udyLBranchBToBranchL(Pjp_t Pjp, Pvoid_t Pjpm); +extern int j__udyLLeafB1ToLeaf1(Pjp_t, Pvoid_t); +extern Word_t j__udyLLeaf1ToLeaf2(uint16_t *, Pjv_t, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udyLLeaf2ToLeaf3(uint8_t *, Pjv_t, Pjp_t, Word_t, Pvoid_t); +#ifndef JU_64BIT +extern Word_t j__udyLLeaf3ToLeafW(Pjlw_t, Pjv_t, Pjp_t, Word_t, Pvoid_t); +#else +extern Word_t j__udyLLeaf3ToLeaf4(uint32_t *, Pjv_t, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udyLLeaf4ToLeaf5(uint8_t *, Pjv_t, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udyLLeaf5ToLeaf6(uint8_t *, Pjv_t, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udyLLeaf6ToLeaf7(uint8_t *, Pjv_t, Pjp_t, Word_t, Pvoid_t); +extern Word_t j__udyLLeaf7ToLeafW(Pjlw_t, Pjv_t, Pjp_t, Word_t, Pvoid_t); +#endif + +#endif // JUDYL + +// For convenience in the calling code; "M1" means "minus one": + +#ifndef JU_64BIT +#define j__udyLeafM1ToLeafW j__udyLeaf3ToLeafW +#else +#define j__udyLeafM1ToLeafW j__udyLeaf7ToLeafW +#endif + + +// **************************************************************************** +// __ J U D Y D E L W A L K +// +// Given a pointer to a JP, an Index known to be valid, the number of bytes +// left to decode (== level in the tree), and a pointer to a global JPM, walk a +// Judy (sub)tree to do an unset/delete of that index, and possibly modify the +// JPM. This function is only called internally, and recursively. Unlike +// Judy1Test() and JudyLGet(), the extra time required for recursion should be +// negligible compared with the total. +// +// Return values: +// +// -1 error; details in JPM +// +// 0 Index already deleted (should never happen, Index is known to be valid) +// +// 1 previously valid Index deleted +// +// 2 same as 1, but in addition the JP now points to a BranchL containing a +// single JP, which should be compressed into the parent branch (if there +// is one, which is not the case for a top-level branch under a JPM) + +DBGCODE(uint8_t parentJPtype;) // parent branch JP type. + +FUNCTION static int j__udyDelWalk( + Pjp_t Pjp, // current JP under which to delete. + Word_t Index, // to delete. + Word_t ParentLevel, // of parent branch. + Pjpm_t Pjpm) // for returning info to top level. +{ + Word_t pop1; // of a leaf. + Word_t level; // of a leaf. + uint8_t digit; // from Index, in current branch. + Pjll_t PjllnewRaw; // address of newly allocated leaf. + Pjll_t Pjllnew; + int offset; // within a branch. + int retcode; // return code: -1, 0, 1, 2. +JUDYLCODE(Pjv_t PjvRaw;) // value area. +JUDYLCODE(Pjv_t Pjv;) + + DBGCODE(level = 0;) + +ContinueDelWalk: // for modifying state without recursing. + +#ifdef TRACEJP + JudyPrintJP(Pjp, "d", __LINE__); +#endif + + switch (JU_JPTYPE(Pjp)) // entry: Pjp, Index. + { + + +// **************************************************************************** +// LINEAR BRANCH: +// +// MACROS FOR COMMON CODE: +// +// Check for population too high to compress a branch to a leaf, meaning just +// descend through the branch, with a purposeful off-by-one error that +// constitutes hysteresis = 1. In other words, do not compress until the +// branchs CURRENT population fits in the leaf, even BEFORE deleting one +// index. +// +// Next is a label for branch-type-specific common code. Variables pop1, +// level, digit, and Index are in the context. + +#define JU_BRANCH_KEEP(cLevel,MaxPop1,Next) \ + if (pop1 > (MaxPop1)) /* hysteresis = 1 */ \ + { \ + assert((cLevel) >= 2); \ + level = (cLevel); \ + digit = JU_DIGITATSTATE(Index, cLevel); \ + goto Next; \ + } + +// Support for generic calling of JudyLeaf*ToLeaf*() functions: +// +// Note: Cannot use JUDYLCODE() because this contains a comma. + +#ifdef JUDY1 +#define JU_PVALUEPASS // null. +#else +#define JU_PVALUEPASS Pjv, +#endif + +// During compression to a leaf, check if a JP contains nothing but a +// cJU_JPIMMED_*_01, in which case shortcut calling j__udyLeaf*ToLeaf*(): +// +// Copy the index bytes from the jp_DcdPopO field (with possible truncation), +// and continue the branch-JP-walk loop. Variables Pjp and Pleaf are in the +// context. + +#define JU_BRANCH_COPY_IMMED_EVEN(cLevel,Pjp,ignore) \ + if (JU_JPTYPE(Pjp) == cJU_JPIMMED_1_01 + (cLevel) - 2) \ + { \ + *Pleaf++ = JU_JPDCDPOP0(Pjp); \ + JUDYLCODE(*Pjv++ = (Pjp)->jp_Addr;) \ + continue; /* for-loop */ \ + } + +#define JU_BRANCH_COPY_IMMED_ODD(cLevel,Pjp,CopyIndex) \ + if (JU_JPTYPE(Pjp) == cJU_JPIMMED_1_01 + (cLevel) - 2) \ + { \ + CopyIndex(Pleaf, (Word_t) (JU_JPDCDPOP0(Pjp))); \ + Pleaf += (cLevel); /* index size = level */ \ + JUDYLCODE(*Pjv++ = (Pjp)->jp_Addr;) \ + continue; /* for-loop */ \ + } + +// Compress a BranchL into a leaf one index size larger: +// +// Allocate a new leaf, walk the JPs in the old BranchL and pack their contents +// into the new leaf (of type NewJPType), free the old BranchL, and finally +// restart the switch to delete Index from the new leaf. (Note that all +// BranchLs are the same size.) Variables Pjp, Pjpm, Pleaf, digit, and pop1 +// are in the context. + +#define JU_BRANCHL_COMPRESS(cLevel,LeafType,MaxPop1,NewJPType, \ + LeafToLeaf,Alloc,ValueArea, \ + CopyImmed,CopyIndex) \ + { \ + LeafType Pleaf; \ + Pjbl_t PjblRaw; \ + Pjbl_t Pjbl; \ + Word_t numJPs; \ + \ + if ((PjllnewRaw = Alloc(MaxPop1, Pjpm)) == 0) return(-1); \ + Pjllnew = P_JLL(PjllnewRaw); \ + Pleaf = (LeafType) Pjllnew; \ + JUDYLCODE(Pjv = ValueArea(Pleaf, MaxPop1);) \ + \ + PjblRaw = (Pjbl_t) (Pjp->jp_Addr); \ + Pjbl = P_JBL(PjblRaw); \ + numJPs = Pjbl->jbl_NumJPs; \ + \ + for (offset = 0; offset < numJPs; ++offset) \ + { \ + CopyImmed(cLevel, (Pjbl->jbl_jp) + offset, CopyIndex); \ + \ + pop1 = LeafToLeaf(Pleaf, JU_PVALUEPASS \ + (Pjbl->jbl_jp) + offset, \ + JU_DIGITTOSTATE(Pjbl->jbl_Expanse[offset], \ + cLevel), (Pvoid_t) Pjpm); \ + Pleaf = (LeafType) (((Word_t) Pleaf) + ((cLevel) * pop1)); \ + JUDYLCODE(Pjv += pop1;) \ + } \ + assert(((((Word_t) Pleaf) - ((Word_t) Pjllnew)) / (cLevel)) == (MaxPop1)); \ + JUDYLCODE(assert((Pjv - ValueArea(Pjllnew, MaxPop1)) == (MaxPop1));) \ + DBGCODE(JudyCheckSorted(Pjllnew, MaxPop1, cLevel);) \ + \ + j__udyFreeJBL(PjblRaw, Pjpm); \ + \ + Pjp->jp_Type = (NewJPType); \ + Pjp->jp_Addr = (Word_t) PjllnewRaw; \ + goto ContinueDelWalk; /* delete from new leaf */ \ + } + +// Overall common code for initial BranchL deletion handling: +// +// Assert that Index is in the branch, then see if the BranchL should be kept +// or else compressed to a leaf. Variables Index, Pjp, and pop1 are in the +// context. + +#define JU_BRANCHL(cLevel,MaxPop1,LeafType,NewJPType, \ + LeafToLeaf,Alloc,ValueArea,CopyImmed,CopyIndex) \ + \ + assert(! JU_DCDNOTMATCHINDEX(Index, Pjp, cLevel)); \ + assert(ParentLevel > (cLevel)); \ + \ + pop1 = JU_JPBRANCH_POP0(Pjp, cLevel) + 1; \ + JU_BRANCH_KEEP(cLevel, MaxPop1, BranchLKeep); \ + assert(pop1 == (MaxPop1)); \ + \ + JU_BRANCHL_COMPRESS(cLevel, LeafType, MaxPop1, NewJPType, \ + LeafToLeaf, Alloc, ValueArea, CopyImmed, CopyIndex) + + +// END OF MACROS, START OF CASES: + + case cJU_JPBRANCH_L2: + + JU_BRANCHL(2, cJU_LEAF2_MAXPOP1, uint16_t *, cJU_JPLEAF2, + j__udyLeaf1ToLeaf2, j__udyAllocJLL2, JL_LEAF2VALUEAREA, + JU_BRANCH_COPY_IMMED_EVEN, ignore); + + case cJU_JPBRANCH_L3: + + JU_BRANCHL(3, cJU_LEAF3_MAXPOP1, uint8_t *, cJU_JPLEAF3, + j__udyLeaf2ToLeaf3, j__udyAllocJLL3, JL_LEAF3VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY3_LONG_TO_PINDEX); + +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: + + JU_BRANCHL(4, cJU_LEAF4_MAXPOP1, uint32_t *, cJU_JPLEAF4, + j__udyLeaf3ToLeaf4, j__udyAllocJLL4, JL_LEAF4VALUEAREA, + JU_BRANCH_COPY_IMMED_EVEN, ignore); + + case cJU_JPBRANCH_L5: + + JU_BRANCHL(5, cJU_LEAF5_MAXPOP1, uint8_t *, cJU_JPLEAF5, + j__udyLeaf4ToLeaf5, j__udyAllocJLL5, JL_LEAF5VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY5_LONG_TO_PINDEX); + + case cJU_JPBRANCH_L6: + + JU_BRANCHL(6, cJU_LEAF6_MAXPOP1, uint8_t *, cJU_JPLEAF6, + j__udyLeaf5ToLeaf6, j__udyAllocJLL6, JL_LEAF6VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY6_LONG_TO_PINDEX); + + case cJU_JPBRANCH_L7: + + JU_BRANCHL(7, cJU_LEAF7_MAXPOP1, uint8_t *, cJU_JPLEAF7, + j__udyLeaf6ToLeaf7, j__udyAllocJLL7, JL_LEAF7VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY7_LONG_TO_PINDEX); +#endif // JU_64BIT + +// A top-level BranchL is different and cannot use JU_BRANCHL(): Dont try to +// compress to a (LEAFW) leaf yet, but leave this for a later deletion +// (hysteresis > 0); and the next JP type depends on the system word size; so +// dont use JU_BRANCH_KEEP(): + + case cJU_JPBRANCH_L: + { + Pjbl_t Pjbl; + Word_t numJPs; + + level = cJU_ROOTSTATE; + digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); + + // fall through: + + +// COMMON CODE FOR KEEPING AND DESCENDING THROUGH A BRANCHL: +// +// Come here with level and digit set. + +BranchLKeep: + Pjbl = P_JBL(Pjp->jp_Addr); + numJPs = Pjbl->jbl_NumJPs; + assert(numJPs > 0); + DBGCODE(parentJPtype = JU_JPTYPE(Pjp);) + +// Search for a match to the digit (valid Index => must find digit): + + for (offset = 0; (Pjbl->jbl_Expanse[offset]) != digit; ++offset) + assert(offset < numJPs - 1); + + Pjp = (Pjbl->jbl_jp) + offset; + +// If not at a (deletable) JPIMMED_*_01, continue the walk (to descend through +// the BranchL): + + assert(level >= 2); + if ((JU_JPTYPE(Pjp)) != cJU_JPIMMED_1_01 + level - 2) break; + +// At JPIMMED_*_01: Ensure the index is in the right expanse, then delete the +// Immed from the BranchL: +// +// Note: A BranchL has a fixed size and format regardless of numJPs. + + assert(JU_JPDCDPOP0(Pjp) == JU_TRIMTODCDSIZE(Index)); + + JU_DELETEINPLACE(Pjbl->jbl_Expanse, numJPs, offset, ignore); + JU_DELETEINPLACE(Pjbl->jbl_jp, numJPs, offset, ignore); + + DBGCODE(JudyCheckSorted((Pjll_t) (Pjbl->jbl_Expanse), + numJPs - 1, 1);) + +// If only one index left in the BranchL, indicate this to the caller: + + return ((--(Pjbl->jbl_NumJPs) <= 1) ? 2 : 1); + + } // case cJU_JPBRANCH_L. + + +// **************************************************************************** +// BITMAP BRANCH: +// +// MACROS FOR COMMON CODE: +// +// Note the reuse of common macros here, defined earlier: JU_BRANCH_KEEP(), +// JU_PVALUE*. +// +// Compress a BranchB into a leaf one index size larger: +// +// Allocate a new leaf, walk the JPs in the old BranchB (one bitmap subexpanse +// at a time) and pack their contents into the new leaf (of type NewJPType), +// free the old BranchB, and finally restart the switch to delete Index from +// the new leaf. Variables Pjp, Pjpm, Pleaf, digit, and pop1 are in the +// context. +// +// Note: Its no accident that the interface to JU_BRANCHB_COMPRESS() is +// identical to JU_BRANCHL_COMPRESS(). Only the details differ in how to +// traverse the branchs JPs. + +#define JU_BRANCHB_COMPRESS(cLevel,LeafType,MaxPop1,NewJPType, \ + LeafToLeaf,Alloc,ValueArea, \ + CopyImmed,CopyIndex) \ + { \ + LeafType Pleaf; \ + Pjbb_t PjbbRaw; /* BranchB to compress */ \ + Pjbb_t Pjbb; \ + Word_t subexp; /* current subexpanse number */ \ + BITMAPB_t bitmap; /* portion for this subexpanse */ \ + Pjp_t Pjp2Raw; /* one subexpanses subarray */ \ + Pjp_t Pjp2; \ + \ + if ((PjllnewRaw = Alloc(MaxPop1, Pjpm)) == 0) return(-1); \ + Pjllnew = P_JLL(PjllnewRaw); \ + Pleaf = (LeafType) Pjllnew; \ + JUDYLCODE(Pjv = ValueArea(Pleaf, MaxPop1);) \ + \ + PjbbRaw = (Pjbb_t) (Pjp->jp_Addr); \ + Pjbb = P_JBB(PjbbRaw); \ + \ + for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) \ + { \ + if ((bitmap = JU_JBB_BITMAP(Pjbb, subexp)) == 0) \ + continue; /* empty subexpanse */ \ + \ + digit = subexp * cJU_BITSPERSUBEXPB; \ + Pjp2Raw = JU_JBB_PJP(Pjbb, subexp); \ + Pjp2 = P_JP(Pjp2Raw); \ + assert(Pjp2 != (Pjp_t) NULL); \ + \ + for (offset = 0; bitmap != 0; bitmap >>= 1, ++digit) \ + { \ + if (! (bitmap & 1)) \ + continue; /* empty sub-subexpanse */ \ + \ + ++offset; /* before any continue */ \ + \ + CopyImmed(cLevel, Pjp2 + offset - 1, CopyIndex); \ + \ + pop1 = LeafToLeaf(Pleaf, JU_PVALUEPASS \ + Pjp2 + offset - 1, \ + JU_DIGITTOSTATE(digit, cLevel), \ + (Pvoid_t) Pjpm); \ + Pleaf = (LeafType) (((Word_t) Pleaf) + ((cLevel) * pop1)); \ + JUDYLCODE(Pjv += pop1;) \ + } \ + j__udyFreeJBBJP(Pjp2Raw, /* pop1 = */ offset, Pjpm); \ + } \ + assert(((((Word_t) Pleaf) - ((Word_t) Pjllnew)) / (cLevel)) == (MaxPop1)); \ + JUDYLCODE(assert((Pjv - ValueArea(Pjllnew, MaxPop1)) == (MaxPop1));) \ + DBGCODE(JudyCheckSorted(Pjllnew, MaxPop1, cLevel);) \ + \ + j__udyFreeJBB(PjbbRaw, Pjpm); \ + \ + Pjp->jp_Type = (NewJPType); \ + Pjp->jp_Addr = (Word_t) PjllnewRaw; \ + goto ContinueDelWalk; /* delete from new leaf */ \ + } + +// Overall common code for initial BranchB deletion handling: +// +// Assert that Index is in the branch, then see if the BranchB should be kept +// or else compressed to a leaf. Variables Index, Pjp, and pop1 are in the +// context. + +#define JU_BRANCHB(cLevel,MaxPop1,LeafType,NewJPType, \ + LeafToLeaf,Alloc,ValueArea,CopyImmed,CopyIndex) \ + \ + assert(! JU_DCDNOTMATCHINDEX(Index, Pjp, cLevel)); \ + assert(ParentLevel > (cLevel)); \ + \ + pop1 = JU_JPBRANCH_POP0(Pjp, cLevel) + 1; \ + JU_BRANCH_KEEP(cLevel, MaxPop1, BranchBKeep); \ + assert(pop1 == (MaxPop1)); \ + \ + JU_BRANCHB_COMPRESS(cLevel, LeafType, MaxPop1, NewJPType, \ + LeafToLeaf, Alloc, ValueArea, CopyImmed, CopyIndex) + + +// END OF MACROS, START OF CASES: +// +// Note: Its no accident that the macro calls for these cases is nearly +// identical to the code for BranchLs. + + case cJU_JPBRANCH_B2: + + JU_BRANCHB(2, cJU_LEAF2_MAXPOP1, uint16_t *, cJU_JPLEAF2, + j__udyLeaf1ToLeaf2, j__udyAllocJLL2, JL_LEAF2VALUEAREA, + JU_BRANCH_COPY_IMMED_EVEN, ignore); + + case cJU_JPBRANCH_B3: + + JU_BRANCHB(3, cJU_LEAF3_MAXPOP1, uint8_t *, cJU_JPLEAF3, + j__udyLeaf2ToLeaf3, j__udyAllocJLL3, JL_LEAF3VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY3_LONG_TO_PINDEX); + +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: + + JU_BRANCHB(4, cJU_LEAF4_MAXPOP1, uint32_t *, cJU_JPLEAF4, + j__udyLeaf3ToLeaf4, j__udyAllocJLL4, JL_LEAF4VALUEAREA, + JU_BRANCH_COPY_IMMED_EVEN, ignore); + + case cJU_JPBRANCH_B5: + + JU_BRANCHB(5, cJU_LEAF5_MAXPOP1, uint8_t *, cJU_JPLEAF5, + j__udyLeaf4ToLeaf5, j__udyAllocJLL5, JL_LEAF5VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY5_LONG_TO_PINDEX); + + case cJU_JPBRANCH_B6: + + JU_BRANCHB(6, cJU_LEAF6_MAXPOP1, uint8_t *, cJU_JPLEAF6, + j__udyLeaf5ToLeaf6, j__udyAllocJLL6, JL_LEAF6VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY6_LONG_TO_PINDEX); + + case cJU_JPBRANCH_B7: + + JU_BRANCHB(7, cJU_LEAF7_MAXPOP1, uint8_t *, cJU_JPLEAF7, + j__udyLeaf6ToLeaf7, j__udyAllocJLL7, JL_LEAF7VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY7_LONG_TO_PINDEX); +#endif // JU_64BIT + +// A top-level BranchB is different and cannot use JU_BRANCHB(): Dont try to +// compress to a (LEAFW) leaf yet, but leave this for a later deletion +// (hysteresis > 0); and the next JP type depends on the system word size; so +// dont use JU_BRANCH_KEEP(): + + case cJU_JPBRANCH_B: + { + Pjbb_t Pjbb; // BranchB to modify. + Word_t subexp; // current subexpanse number. + Word_t subexp2; // in second-level loop. + BITMAPB_t bitmap; // portion for this subexpanse. + BITMAPB_t bitmask; // with digits bit set. + Pjp_t Pjp2Raw; // one subexpanses subarray. + Pjp_t Pjp2; + Word_t numJPs; // in one subexpanse. + + level = cJU_ROOTSTATE; + digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); + + // fall through: + + +// COMMON CODE FOR KEEPING AND DESCENDING THROUGH A BRANCHB: +// +// Come here with level and digit set. + +BranchBKeep: + Pjbb = P_JBB(Pjp->jp_Addr); + subexp = digit / cJU_BITSPERSUBEXPB; + bitmap = JU_JBB_BITMAP(Pjbb, subexp); + bitmask = JU_BITPOSMASKB(digit); + assert(bitmap & bitmask); // Index valid => digits bit is set. + DBGCODE(parentJPtype = JU_JPTYPE(Pjp);) + +// Compute digits offset into the bitmap, with a fast method if all bits are +// set: + + offset = ((bitmap == (cJU_FULLBITMAPB)) ? + digit % cJU_BITSPERSUBEXPB : + j__udyCountBitsB(bitmap & JU_MASKLOWEREXC(bitmask))); + + Pjp2Raw = JU_JBB_PJP(Pjbb, subexp); + Pjp2 = P_JP(Pjp2Raw); + assert(Pjp2 != (Pjp_t) NULL); // valid subexpanse pointer. + +// If not at a (deletable) JPIMMED_*_01, continue the walk (to descend through +// the BranchB): + + if (JU_JPTYPE(Pjp2 + offset) != cJU_JPIMMED_1_01 + level - 2) + { + Pjp = Pjp2 + offset; + break; + } + +// At JPIMMED_*_01: Ensure the index is in the right expanse, then delete the +// Immed from the BranchB: + + assert(JU_JPDCDPOP0(Pjp2 + offset) + == JU_TRIMTODCDSIZE(Index)); + +// If only one index is left in the subexpanse, free the JP array: + + if ((numJPs = j__udyCountBitsB(bitmap)) == 1) + { + j__udyFreeJBBJP(Pjp2Raw, /* pop1 = */ 1, Pjpm); + JU_JBB_PJP(Pjbb, subexp) = (Pjp_t) NULL; + } + +// Shrink JP array in-place: + + else if (JU_BRANCHBJPGROWINPLACE(numJPs - 1)) + { + assert(numJPs > 0); + JU_DELETEINPLACE(Pjp2, numJPs, offset, ignore); + } + +// JP array would end up too large; compress it to a smaller one: + + else + { + Pjp_t PjpnewRaw; + Pjp_t Pjpnew; + + if ((PjpnewRaw = j__udyAllocJBBJP(numJPs - 1, Pjpm)) + == (Pjp_t) NULL) return(-1); + Pjpnew = P_JP(PjpnewRaw); + + JU_DELETECOPY(Pjpnew, Pjp2, numJPs, offset, ignore); + j__udyFreeJBBJP(Pjp2Raw, numJPs, Pjpm); // old. + + JU_JBB_PJP(Pjbb, subexp) = PjpnewRaw; + } + +// Clear digits bit in the bitmap: + + JU_JBB_BITMAP(Pjbb, subexp) ^= bitmask; + +// If the current subexpanse alone is still too large for a BranchL (with +// hysteresis = 1), the delete is all done: + + if (numJPs > cJU_BRANCHLMAXJPS) return(1); + +// Consider shrinking the current BranchB to a BranchL: +// +// Check the numbers of JPs in other subexpanses in the BranchL. Upon reaching +// the critical number of numJPs (which could be right at the start; again, +// with hysteresis = 1), its faster to just watch for any non-empty subexpanse +// than to count bits in each subexpanse. Upon finding too many JPs, give up +// on shrinking the BranchB. + + for (subexp2 = 0; subexp2 < cJU_NUMSUBEXPB; ++subexp2) + { + if (subexp2 == subexp) continue; // skip current subexpanse. + + if ((numJPs == cJU_BRANCHLMAXJPS) ? + JU_JBB_BITMAP(Pjbb, subexp2) : + ((numJPs += j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp2))) + > cJU_BRANCHLMAXJPS)) + { + return(1); // too many JPs, cannot shrink. + } + } + +// Shrink current BranchB to a BranchL: +// +// Note: In this rare case, ignore the return value, do not pass it to the +// caller, because the deletion is already successfully completed and the +// caller(s) must decrement population counts. The only errors expected from +// this call are JU_ERRNO_NOMEM and JU_ERRNO_OVERRUN, neither of which is worth +// forwarding from this point. See also 4.1, 4.8, and 4.15 of this file. + + (void) j__udyBranchBToBranchL(Pjp, Pjpm); + return(1); + + } // case. + + +// **************************************************************************** +// UNCOMPRESSED BRANCH: +// +// MACROS FOR COMMON CODE: +// +// Note the reuse of common macros here, defined earlier: JU_PVALUE*. +// +// Compress a BranchU into a leaf one index size larger: +// +// Allocate a new leaf, walk the JPs in the old BranchU and pack their contents +// into the new leaf (of type NewJPType), free the old BranchU, and finally +// restart the switch to delete Index from the new leaf. Variables Pjp, Pjpm, +// digit, and pop1 are in the context. +// +// Note: Its no accident that the interface to JU_BRANCHU_COMPRESS() is +// nearly identical to JU_BRANCHL_COMPRESS(); just NullJPType is added. The +// details differ in how to traverse the branchs JPs -- +// +// -- and also, what to do upon encountering a cJU_JPIMMED_*_01 JP. In +// BranchLs and BranchBs the JP must be deleted, but in a BranchU its merely +// converted to a null JP, and this is done by other switch cases, so the "keep +// branch" situation is simpler here and JU_BRANCH_KEEP() is not used. Also, +// theres no code to convert a BranchU to a BranchB since counting the JPs in +// a BranchU is (at least presently) expensive, and besides, keeping around a +// BranchU is form of hysteresis. + +#define JU_BRANCHU_COMPRESS(cLevel,LeafType,MaxPop1,NullJPType,NewJPType, \ + LeafToLeaf,Alloc,ValueArea,CopyImmed,CopyIndex) \ + { \ + LeafType Pleaf; \ + Pjbu_t PjbuRaw = (Pjbu_t) (Pjp->jp_Addr); \ + Pjp_t Pjp2 = JU_JBU_PJP0(Pjp); \ + Word_t ldigit; /* larger than uint8_t */ \ + \ + if ((PjllnewRaw = Alloc(MaxPop1, Pjpm)) == 0) return(-1); \ + Pjllnew = P_JLL(PjllnewRaw); \ + Pleaf = (LeafType) Pjllnew; \ + JUDYLCODE(Pjv = ValueArea(Pleaf, MaxPop1);) \ + \ + for (ldigit = 0; ldigit < cJU_BRANCHUNUMJPS; ++ldigit, ++Pjp2) \ + { \ + /* fast-process common types: */ \ + if (JU_JPTYPE(Pjp2) == (NullJPType)) continue; \ + CopyImmed(cLevel, Pjp2, CopyIndex); \ + \ + pop1 = LeafToLeaf(Pleaf, JU_PVALUEPASS Pjp2, \ + JU_DIGITTOSTATE(ldigit, cLevel), \ + (Pvoid_t) Pjpm); \ + Pleaf = (LeafType) (((Word_t) Pleaf) + ((cLevel) * pop1)); \ + JUDYLCODE(Pjv += pop1;) \ + } \ + assert(((((Word_t) Pleaf) - ((Word_t) Pjllnew)) / (cLevel)) == (MaxPop1)); \ + JUDYLCODE(assert((Pjv - ValueArea(Pjllnew, MaxPop1)) == (MaxPop1));) \ + DBGCODE(JudyCheckSorted(Pjllnew, MaxPop1, cLevel);) \ + \ + j__udyFreeJBU(PjbuRaw, Pjpm); \ + \ + Pjp->jp_Type = (NewJPType); \ + Pjp->jp_Addr = (Word_t) PjllnewRaw; \ + goto ContinueDelWalk; /* delete from new leaf */ \ + } + +// Overall common code for initial BranchU deletion handling: +// +// Assert that Index is in the branch, then see if a BranchU should be kept or +// else compressed to a leaf. Variables level, Index, Pjp, and pop1 are in the +// context. +// +// Note: BranchU handling differs from BranchL and BranchB as described above. + +#define JU_BRANCHU(cLevel,MaxPop1,LeafType,NullJPType,NewJPType, \ + LeafToLeaf,Alloc,ValueArea,CopyImmed,CopyIndex) \ + \ + assert(! JU_DCDNOTMATCHINDEX(Index, Pjp, cLevel)); \ + assert(ParentLevel > (cLevel)); \ + DBGCODE(parentJPtype = JU_JPTYPE(Pjp);) \ + \ + pop1 = JU_JPBRANCH_POP0(Pjp, cLevel) + 1; \ + \ + if (pop1 > (MaxPop1)) /* hysteresis = 1 */ \ + { \ + level = (cLevel); \ + Pjp = P_JP(Pjp->jp_Addr) + JU_DIGITATSTATE(Index, cLevel);\ + break; /* descend to next level */ \ + } \ + assert(pop1 == (MaxPop1)); \ + \ + JU_BRANCHU_COMPRESS(cLevel, LeafType, MaxPop1, NullJPType, NewJPType, \ + LeafToLeaf, Alloc, ValueArea, CopyImmed, CopyIndex) + + +// END OF MACROS, START OF CASES: +// +// Note: Its no accident that the macro calls for these cases is nearly +// identical to the code for BranchLs, with the addition of cJU_JPNULL* +// parameters only needed for BranchUs. + + case cJU_JPBRANCH_U2: + + JU_BRANCHU(2, cJU_LEAF2_MAXPOP1, uint16_t *, + cJU_JPNULL1, cJU_JPLEAF2, + j__udyLeaf1ToLeaf2, j__udyAllocJLL2, JL_LEAF2VALUEAREA, + JU_BRANCH_COPY_IMMED_EVEN, ignore); + + case cJU_JPBRANCH_U3: + + JU_BRANCHU(3, cJU_LEAF3_MAXPOP1, uint8_t *, + cJU_JPNULL2, cJU_JPLEAF3, + j__udyLeaf2ToLeaf3, j__udyAllocJLL3, JL_LEAF3VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY3_LONG_TO_PINDEX); + +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: + + JU_BRANCHU(4, cJU_LEAF4_MAXPOP1, uint32_t *, + cJU_JPNULL3, cJU_JPLEAF4, + j__udyLeaf3ToLeaf4, j__udyAllocJLL4, JL_LEAF4VALUEAREA, + JU_BRANCH_COPY_IMMED_EVEN, ignore); + + case cJU_JPBRANCH_U5: + + JU_BRANCHU(5, cJU_LEAF5_MAXPOP1, uint8_t *, + cJU_JPNULL4, cJU_JPLEAF5, + j__udyLeaf4ToLeaf5, j__udyAllocJLL5, JL_LEAF5VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY5_LONG_TO_PINDEX); + + case cJU_JPBRANCH_U6: + + JU_BRANCHU(6, cJU_LEAF6_MAXPOP1, uint8_t *, + cJU_JPNULL5, cJU_JPLEAF6, + j__udyLeaf5ToLeaf6, j__udyAllocJLL6, JL_LEAF6VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY6_LONG_TO_PINDEX); + + case cJU_JPBRANCH_U7: + + JU_BRANCHU(7, cJU_LEAF7_MAXPOP1, uint8_t *, + cJU_JPNULL6, cJU_JPLEAF7, + j__udyLeaf6ToLeaf7, j__udyAllocJLL7, JL_LEAF7VALUEAREA, + JU_BRANCH_COPY_IMMED_ODD, JU_COPY7_LONG_TO_PINDEX); +#endif // JU_64BIT + +// A top-level BranchU is different and cannot use JU_BRANCHU(): Dont try to +// compress to a (LEAFW) leaf yet, but leave this for a later deletion +// (hysteresis > 0); just descend through the BranchU: + + case cJU_JPBRANCH_U: + + DBGCODE(parentJPtype = JU_JPTYPE(Pjp);) + + level = cJU_ROOTSTATE; + Pjp = P_JP(Pjp->jp_Addr) + JU_DIGITATSTATE(Index, cJU_ROOTSTATE); + break; + + +// **************************************************************************** +// LINEAR LEAF: +// +// State transitions while deleting an Index, the inverse of the similar table +// that appears in JudyIns.c: +// +// Note: In JudyIns.c this table is not needed and does not appear until the +// Immed handling code; because once a Leaf is reached upon growing the tree, +// the situation remains simpler, but for deleting indexes, the complexity +// arises when leaves must compress to Immeds. +// +// Note: There are other transitions possible too, not shown here, such as to +// a leaf one level higher. +// +// (Yes, this is very terse... Study it and it will make sense.) +// (Note, parts of this diagram are repeated below for quick reference.) +// +// reformat JP here for Judy1 only, from word-1 to word-2 +// | +// JUDY1 && JU_64BIT JUDY1 || JU_64BIT | +// V +// (*) Leaf1 [[ => 1_15..08 ] => 1_07 => ... => 1_04 ] => 1_03 => 1_02 => 1_01 +// Leaf2 [[ => 2_07..04 ] => 2_03 => 2_02 ] => 2_01 +// Leaf3 [[ => 3_05..03 ] => 3_02 ] => 3_01 +// JU_64BIT only: +// Leaf4 [[ => 4_03..02 ]] => 4_01 +// Leaf5 [[ => 5_03..02 ]] => 5_01 +// Leaf6 [[ => 6_02 ]] => 6_01 +// Leaf7 [[ => 7_02 ]] => 7_01 +// +// (*) For Judy1 & 64-bit, go directly from a LeafB1 to cJU_JPIMMED_1_15; skip +// Leaf1, as described in Judy1.h regarding cJ1_JPLEAF1. +// +// MACROS FOR COMMON CODE: +// +// (De)compress a LeafX into a LeafY one index size (cIS) larger (X+1 = Y): +// +// This is only possible when the current leaf is under a narrow pointer +// ((ParentLevel - 1) > cIS) and its population fits in a higher-level leaf. +// Variables ParentLevel, pop1, PjllnewRaw, Pjllnew, Pjpm, and Index are in the +// context. +// +// Note: Doing an "uplevel" doesnt occur until the old leaf can be compressed +// up one level BEFORE deleting an index; that is, hysteresis = 1. +// +// Note: LeafType, MaxPop1, NewJPType, and Alloc refer to the up-level leaf, +// not the current leaf. +// +// Note: 010327: Fixed bug where the jp_DcdPopO next-uplevel digit (byte) +// above the current Pop0 value was not being cleared. When upleveling, one +// digit in jp_DcdPopO "moves" from being part of the Dcd subfield to the Pop0 +// subfield, but since a leaf maxpop1 is known to be <= 1 byte in size, the new +// Pop0 byte should always be zero. This is easy to overlook because +// JU_JPLEAF_POP0() "knows" to only use the LSB of Pop0 (for efficiency) and +// ignore the other bytes... Until someone uses cJU_POP0MASK() instead of +// JU_JPLEAF_POP0(), such as in JudyInsertBranch.c. +// +// TBD: Should JudyInsertBranch.c use JU_JPLEAF_POP0() rather than +// cJU_POP0MASK(), for efficiency? Does it know for sure its a narrow pointer +// under the leaf? Not necessarily. + +#define JU_LEAF_UPLEVEL(cIS,LeafType,MaxPop1,NewJPType,LeafToLeaf, \ + Alloc,ValueArea) \ + \ + assert(((ParentLevel - 1) == (cIS)) || (pop1 >= (MaxPop1))); \ + \ + if (((ParentLevel - 1) > (cIS)) /* under narrow pointer */ \ + && (pop1 == (MaxPop1))) /* hysteresis = 1 */ \ + { \ + Word_t D_cdP0; \ + if ((PjllnewRaw = Alloc(MaxPop1, Pjpm)) == 0) return(-1); \ + Pjllnew = P_JLL(PjllnewRaw); \ + JUDYLCODE(Pjv = ValueArea((LeafType) Pjllnew, MaxPop1);) \ + \ + (void) LeafToLeaf((LeafType) Pjllnew, JU_PVALUEPASS Pjp, \ + Index & cJU_DCDMASK(cIS), /* TBD, Doug says */ \ + (Pvoid_t) Pjpm); \ + DBGCODE(JudyCheckSorted(Pjllnew, MaxPop1, cIS + 1);) \ + \ + D_cdP0 = (~cJU_MASKATSTATE((cIS) + 1)) & JU_JPDCDPOP0(Pjp); \ + JU_JPSETADT(Pjp, (Word_t)PjllnewRaw, D_cdP0, NewJPType); \ + goto ContinueDelWalk; /* delete from new leaf */ \ + } + + +// For Leaf3, only support JU_LEAF_UPLEVEL on a 64-bit system, and for Leaf7, +// there is no JU_LEAF_UPLEVEL: +// +// Note: Theres no way here to go from Leaf3 [Leaf7] to LEAFW on a 32-bit +// [64-bit] system. Thats handled in the main code, because its different in +// that a JPM is involved. + +#ifndef JU_64BIT // 32-bit. +#define JU_LEAF_UPLEVEL64(cIS,LeafType,MaxPop1,NewJPType,LeafToLeaf, \ + Alloc,ValueArea) // null. +#else +#define JU_LEAF_UPLEVEL64(cIS,LeafType,MaxPop1,NewJPType,LeafToLeaf, \ + Alloc,ValueArea) \ + JU_LEAF_UPLEVEL (cIS,LeafType,MaxPop1,NewJPType,LeafToLeaf, \ + Alloc,ValueArea) +#define JU_LEAF_UPLEVEL_NONE(cIS,LeafType,MaxPop1,NewJPType,LeafToLeaf, \ + Alloc,ValueArea) // null. +#endif + +// Compress a Leaf* with pop1 = 2, or a JPIMMED_*_02, into a JPIMMED_*_01: +// +// Copy whichever Index is NOT being deleted (and assert that the other one is +// found; Index must be valid). This requires special handling of the Index +// bytes (and value area). Variables Pjp, Index, offset, and Pleaf are in the +// context, offset is modified to the undeleted Index, and Pjp is modified +// including jp_Addr. + + +#define JU_TOIMMED_01_EVEN(cIS,ignore1,ignore2) \ +{ \ + Word_t D_cdP0; \ + Word_t A_ddr = 0; \ + uint8_t T_ype = JU_JPTYPE(Pjp); \ + offset = (Pleaf[0] == JU_LEASTBYTES(Index, cIS)); /* undeleted Ind */ \ + assert(Pleaf[offset ? 0 : 1] == JU_LEASTBYTES(Index, cIS)); \ + D_cdP0 = (Index & cJU_DCDMASK(cIS)) | Pleaf[offset]; \ +JUDYLCODE(A_ddr = Pjv[offset];) \ + JU_JPSETADT(Pjp, A_ddr, D_cdP0, T_ype); \ +} + +#define JU_TOIMMED_01_ODD(cIS,SearchLeaf,CopyPIndex) \ + { \ + Word_t D_cdP0; \ + Word_t A_ddr = 0; \ + uint8_t T_ype = JU_JPTYPE(Pjp); \ + \ + offset = SearchLeaf(Pleaf, 2, Index); \ + assert(offset >= 0); /* Index must be valid */ \ + CopyPIndex(D_cdP0, & (Pleaf[offset ? 0 : cIS])); \ + D_cdP0 |= Index & cJU_DCDMASK(cIS); \ + JUDYLCODE(A_ddr = Pjv[offset ? 0 : 1];) \ + JU_JPSETADT(Pjp, A_ddr, D_cdP0, T_ype); \ + } + + +// Compress a Leaf* into a JPIMMED_*_0[2+]: +// +// This occurs as soon as its possible, with hysteresis = 0. Variables pop1, +// Pleaf, offset, and Pjpm are in the context. +// +// TBD: Explain why hysteresis = 0 here, rather than > 0. Probably because +// the insert code assumes if the population is small enough, an Immed is used, +// not a leaf. +// +// The differences between Judy1 and JudyL with respect to value area handling +// are just too large for completely common code between them... Oh well, some +// big ifdefs follow. + +#ifdef JUDY1 + +#define JU_LEAF_TOIMMED(cIS,LeafType,MaxPop1,BaseJPType,ignore1,\ + ignore2,ignore3,ignore4, \ + DeleteCopy,FreeLeaf) \ + \ + assert(pop1 > (MaxPop1)); \ + \ + if ((pop1 - 1) == (MaxPop1)) /* hysteresis = 0 */ \ + { \ + Pjll_t PjllRaw = (Pjll_t) (Pjp->jp_Addr); \ + DeleteCopy((LeafType) (Pjp->jp_1Index), Pleaf, pop1, offset, cIS); \ + DBGCODE(JudyCheckSorted((Pjll_t) (Pjp->jp_1Index), pop1-1, cIS);) \ + Pjp->jp_Type = (BaseJPType) - 1 + (MaxPop1) - 1; \ + FreeLeaf(PjllRaw, pop1, Pjpm); \ + return(1); \ + } + +#else // JUDYL + +// Pjv is also in the context. + +#define JU_LEAF_TOIMMED(cIS,LeafType,MaxPop1,BaseJPType,ignore1,\ + ignore2,ignore3,ignore4, \ + DeleteCopy,FreeLeaf) \ + \ + assert(pop1 > (MaxPop1)); \ + \ + if ((pop1 - 1) == (MaxPop1)) /* hysteresis = 0 */ \ + { \ + Pjll_t PjllRaw = (Pjll_t) (Pjp->jp_Addr); \ + Pjv_t PjvnewRaw; \ + Pjv_t Pjvnew; \ + \ + if ((PjvnewRaw = j__udyLAllocJV(pop1 - 1, Pjpm)) \ + == (Pjv_t) NULL) return(-1); \ + JUDYLCODE(Pjvnew = P_JV(PjvnewRaw);) \ + \ + DeleteCopy((LeafType) (Pjp->jp_LIndex), Pleaf, pop1, offset, cIS); \ + JU_DELETECOPY(Pjvnew, Pjv, pop1, offset, cIS); \ + DBGCODE(JudyCheckSorted((Pjll_t) (Pjp->jp_LIndex), pop1-1, cIS);) \ + FreeLeaf(PjllRaw, pop1, Pjpm); \ + Pjp->jp_Addr = (Word_t) PjvnewRaw; \ + Pjp->jp_Type = (BaseJPType) - 2 + (MaxPop1); \ + return(1); \ + } + +// A complicating factor for JudyL & 32-bit is that Leaf2..3, and for JudyL & +// 64-bit Leaf 4..7, go directly to an Immed*_01, where the value is stored in +// jp_Addr and not in a separate LeafV. For efficiency, use the following +// macro in cases where it can apply; it is rigged to do the right thing. +// Unfortunately, this requires the calling code to "know" the transition table +// and call the right macro. +// +// This variant compresses a Leaf* with pop1 = 2 into a JPIMMED_*_01: + +#define JU_LEAF_TOIMMED_01(cIS,LeafType,MaxPop1,ignore,Immed01JPType, \ + ToImmed,SearchLeaf,CopyPIndex, \ + DeleteCopy,FreeLeaf) \ + \ + assert(pop1 > (MaxPop1)); \ + \ + if ((pop1 - 1) == (MaxPop1)) /* hysteresis = 0 */ \ + { \ + Pjll_t PjllRaw = (Pjll_t) (Pjp->jp_Addr); \ + ToImmed(cIS, SearchLeaf, CopyPIndex); \ + FreeLeaf(PjllRaw, pop1, Pjpm); \ + Pjp->jp_Type = (Immed01JPType); \ + return(1); \ + } +#endif // JUDYL + +// See comments above about these: +// +// Note: Here "23" means index size 2 or 3, and "47" means 4..7. + +#if (defined(JUDY1) || defined(JU_64BIT)) +#define JU_LEAF_TOIMMED_23(cIS,LeafType,MaxPop1,BaseJPType,Immed01JPType, \ + ToImmed,SearchLeaf,CopyPIndex, \ + DeleteCopy,FreeLeaf) \ + JU_LEAF_TOIMMED( cIS,LeafType,MaxPop1,BaseJPType,ignore1, \ + ignore2,ignore3,ignore4, \ + DeleteCopy,FreeLeaf) +#else // JUDYL && 32-bit +#define JU_LEAF_TOIMMED_23(cIS,LeafType,MaxPop1,BaseJPType,Immed01JPType, \ + ToImmed,SearchLeaf,CopyPIndex, \ + DeleteCopy,FreeLeaf) \ + JU_LEAF_TOIMMED_01(cIS,LeafType,MaxPop1,ignore,Immed01JPType, \ + ToImmed,SearchLeaf,CopyPIndex, \ + DeleteCopy,FreeLeaf) +#endif + +#ifdef JU_64BIT +#ifdef JUDY1 +#define JU_LEAF_TOIMMED_47(cIS,LeafType,MaxPop1,BaseJPType,Immed01JPType, \ + ToImmed,SearchLeaf,CopyPIndex, \ + DeleteCopy,FreeLeaf) \ + JU_LEAF_TOIMMED( cIS,LeafType,MaxPop1,BaseJPType,ignore1, \ + ignore2,ignore3,ignore4, \ + DeleteCopy,FreeLeaf) +#else // JUDYL && 64-bit +#define JU_LEAF_TOIMMED_47(cIS,LeafType,MaxPop1,BaseJPType,Immed01JPType, \ + ToImmed,SearchLeaf,CopyPIndex, \ + DeleteCopy,FreeLeaf) \ + JU_LEAF_TOIMMED_01(cIS,LeafType,MaxPop1,ignore,Immed01JPType, \ + ToImmed,SearchLeaf,CopyPIndex, \ + DeleteCopy,FreeLeaf) +#endif // JUDYL +#endif // JU_64BIT + +// Compress a Leaf* in place: +// +// Here hysteresis = 0 (no memory is wasted). Variables pop1, Pleaf, and +// offset, and for JudyL, Pjv, are in the context. + +#ifdef JUDY1 +#define JU_LEAF_INPLACE(cIS,GrowInPlace,DeleteInPlace) \ + if (GrowInPlace(pop1 - 1)) /* hysteresis = 0 */ \ + { \ + DeleteInPlace(Pleaf, pop1, offset, cIS); \ + DBGCODE(JudyCheckSorted(Pleaf, pop1 - 1, cIS);) \ + return(1); \ + } +#else +#define JU_LEAF_INPLACE(cIS,GrowInPlace,DeleteInPlace) \ + if (GrowInPlace(pop1 - 1)) /* hysteresis = 0 */ \ + { \ + DeleteInPlace(Pleaf, pop1, offset, cIS); \ +/**/ JU_DELETEINPLACE(Pjv, pop1, offset, ignore); \ + DBGCODE(JudyCheckSorted(Pleaf, pop1 - 1, cIS);) \ + return(1); \ + } +#endif + +// Compress a Leaf* into a smaller memory object of the same JP type: +// +// Variables PjllnewRaw, Pjllnew, Pleafpop1, Pjpm, PleafRaw, Pleaf, and offset +// are in the context. + +#ifdef JUDY1 + +#define JU_LEAF_SHRINK(cIS,LeafType,DeleteCopy,Alloc,FreeLeaf,ValueArea) \ + if ((PjllnewRaw = Alloc(pop1 - 1, Pjpm)) == 0) return(-1); \ + Pjllnew = P_JLL(PjllnewRaw); \ + DeleteCopy((LeafType) Pjllnew, Pleaf, pop1, offset, cIS); \ + DBGCODE(JudyCheckSorted(Pjllnew, pop1 - 1, cIS);) \ + FreeLeaf(PleafRaw, pop1, Pjpm); \ + Pjp->jp_Addr = (Word_t) PjllnewRaw; \ + return(1) + +#else // JUDYL + +#define JU_LEAF_SHRINK(cIS,LeafType,DeleteCopy,Alloc,FreeLeaf,ValueArea) \ + { \ +/**/ Pjv_t Pjvnew; \ + \ + if ((PjllnewRaw = Alloc(pop1 - 1, Pjpm)) == 0) return(-1); \ + Pjllnew = P_JLL(PjllnewRaw); \ +/**/ Pjvnew = ValueArea(Pjllnew, pop1 - 1); \ + DeleteCopy((LeafType) Pjllnew, Pleaf, pop1, offset, cIS); \ +/**/ JU_DELETECOPY(Pjvnew, Pjv, pop1, offset, cIS); \ + DBGCODE(JudyCheckSorted(Pjllnew, pop1 - 1, cIS);) \ + FreeLeaf(PleafRaw, pop1, Pjpm); \ + Pjp->jp_Addr = (Word_t) PjllnewRaw; \ + return(1); \ + } +#endif // JUDYL + +// Overall common code for Leaf* deletion handling: +// +// See if the leaf can be: +// - (de)compressed to one a level higher (JU_LEAF_UPLEVEL()), or if not, +// - compressed to an Immediate JP (JU_LEAF_TOIMMED()), or if not, +// - shrunk in place (JU_LEAF_INPLACE()), or if none of those, then +// - shrink the leaf to a smaller chunk of memory (JU_LEAF_SHRINK()). +// +// Variables Pjp, pop1, Index, and offset are in the context. +// The *Up parameters refer to a leaf one level up, if there is any. + +#define JU_LEAF(cIS, \ + UpLevel, \ + LeafTypeUp,MaxPop1Up,LeafJPTypeUp,LeafToLeaf, \ + AllocUp,ValueAreaUp, \ + LeafToImmed,ToImmed,CopyPIndex, \ + LeafType,ImmedMaxPop1,ImmedBaseJPType,Immed01JPType, \ + SearchLeaf,GrowInPlace,DeleteInPlace,DeleteCopy, \ + Alloc,FreeLeaf,ValueArea) \ + { \ + Pjll_t PleafRaw; \ + LeafType Pleaf; \ + \ + assert(! JU_DCDNOTMATCHINDEX(Index, Pjp, cIS)); \ + assert(ParentLevel > (cIS)); \ + \ + PleafRaw = (Pjll_t) (Pjp->jp_Addr); \ + Pleaf = (LeafType) P_JLL(PleafRaw); \ + pop1 = JU_JPLEAF_POP0(Pjp) + 1; \ + \ + UpLevel(cIS, LeafTypeUp, MaxPop1Up, LeafJPTypeUp, \ + LeafToLeaf, AllocUp, ValueAreaUp); \ + \ + offset = SearchLeaf(Pleaf, pop1, Index); \ + assert(offset >= 0); /* Index must be valid */ \ + JUDYLCODE(Pjv = ValueArea(Pleaf, pop1);) \ + \ + LeafToImmed(cIS, LeafType, ImmedMaxPop1, \ + ImmedBaseJPType, Immed01JPType, \ + ToImmed, SearchLeaf, CopyPIndex, \ + DeleteCopy, FreeLeaf); \ + \ + JU_LEAF_INPLACE(cIS, GrowInPlace, DeleteInPlace); \ + \ + JU_LEAF_SHRINK(cIS, LeafType, DeleteCopy, Alloc, FreeLeaf, \ + ValueArea); \ + } + +// END OF MACROS, START OF CASES: +// +// (*) Leaf1 [[ => 1_15..08 ] => 1_07 => ... => 1_04 ] => 1_03 => 1_02 => 1_01 + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: + + JU_LEAF(1, + JU_LEAF_UPLEVEL, uint16_t *, cJU_LEAF2_MAXPOP1, cJU_JPLEAF2, + j__udyLeaf1ToLeaf2, j__udyAllocJLL2, JL_LEAF2VALUEAREA, + JU_LEAF_TOIMMED, ignore, ignore, + uint8_t *, cJU_IMMED1_MAXPOP1, + cJU_JPIMMED_1_02, cJU_JPIMMED_1_01, j__udySearchLeaf1, + JU_LEAF1GROWINPLACE, JU_DELETEINPLACE, JU_DELETECOPY, + j__udyAllocJLL1, j__udyFreeJLL1, JL_LEAF1VALUEAREA); +#endif + +// A complicating factor is that for JudyL & 32-bit, a Leaf2 must go directly +// to an Immed 2_01 and a Leaf3 must go directly to an Immed 3_01: +// +// Leaf2 [[ => 2_07..04 ] => 2_03 => 2_02 ] => 2_01 +// Leaf3 [[ => 3_05..03 ] => 3_02 ] => 3_01 +// +// Hence use JU_LEAF_TOIMMED_23 instead of JU_LEAF_TOIMMED in the cases below, +// and also the parameters ToImmed and, for odd index sizes, CopyPIndex, are +// required. + + case cJU_JPLEAF2: + + JU_LEAF(2, + JU_LEAF_UPLEVEL, uint8_t *, cJU_LEAF3_MAXPOP1, cJU_JPLEAF3, + j__udyLeaf2ToLeaf3, j__udyAllocJLL3, JL_LEAF3VALUEAREA, + JU_LEAF_TOIMMED_23, JU_TOIMMED_01_EVEN, ignore, + uint16_t *, cJU_IMMED2_MAXPOP1, + cJU_JPIMMED_2_02, cJU_JPIMMED_2_01, j__udySearchLeaf2, + JU_LEAF2GROWINPLACE, JU_DELETEINPLACE, JU_DELETECOPY, + j__udyAllocJLL2, j__udyFreeJLL2, JL_LEAF2VALUEAREA); + +// On 32-bit there is no transition to "uplevel" for a Leaf3, so use +// JU_LEAF_UPLEVEL64 instead of JU_LEAF_UPLEVEL: + + case cJU_JPLEAF3: + + JU_LEAF(3, + JU_LEAF_UPLEVEL64, uint32_t *, cJU_LEAF4_MAXPOP1, + cJU_JPLEAF4, + j__udyLeaf3ToLeaf4, j__udyAllocJLL4, JL_LEAF4VALUEAREA, + JU_LEAF_TOIMMED_23, + JU_TOIMMED_01_ODD, JU_COPY3_PINDEX_TO_LONG, + uint8_t *, cJU_IMMED3_MAXPOP1, + cJU_JPIMMED_3_02, cJU_JPIMMED_3_01, j__udySearchLeaf3, + JU_LEAF3GROWINPLACE, JU_DELETEINPLACE_ODD, + JU_DELETECOPY_ODD, + j__udyAllocJLL3, j__udyFreeJLL3, JL_LEAF3VALUEAREA); + +#ifdef JU_64BIT + +// A complicating factor is that for JudyL & 64-bit, a Leaf[4-7] must go +// directly to an Immed [4-7]_01: +// +// Leaf4 [[ => 4_03..02 ]] => 4_01 +// Leaf5 [[ => 5_03..02 ]] => 5_01 +// Leaf6 [[ => 6_02 ]] => 6_01 +// Leaf7 [[ => 7_02 ]] => 7_01 +// +// Hence use JU_LEAF_TOIMMED_47 instead of JU_LEAF_TOIMMED in the cases below. + + case cJU_JPLEAF4: + + JU_LEAF(4, + JU_LEAF_UPLEVEL, uint8_t *, cJU_LEAF5_MAXPOP1, cJU_JPLEAF5, + j__udyLeaf4ToLeaf5, j__udyAllocJLL5, JL_LEAF5VALUEAREA, + JU_LEAF_TOIMMED_47, JU_TOIMMED_01_EVEN, ignore, + uint32_t *, cJU_IMMED4_MAXPOP1, + cJ1_JPIMMED_4_02, cJU_JPIMMED_4_01, j__udySearchLeaf4, + JU_LEAF4GROWINPLACE, JU_DELETEINPLACE, JU_DELETECOPY, + j__udyAllocJLL4, j__udyFreeJLL4, JL_LEAF4VALUEAREA); + + case cJU_JPLEAF5: + + JU_LEAF(5, + JU_LEAF_UPLEVEL, uint8_t *, cJU_LEAF6_MAXPOP1, cJU_JPLEAF6, + j__udyLeaf5ToLeaf6, j__udyAllocJLL6, JL_LEAF6VALUEAREA, + JU_LEAF_TOIMMED_47, + JU_TOIMMED_01_ODD, JU_COPY5_PINDEX_TO_LONG, + uint8_t *, cJU_IMMED5_MAXPOP1, + cJ1_JPIMMED_5_02, cJU_JPIMMED_5_01, j__udySearchLeaf5, + JU_LEAF5GROWINPLACE, JU_DELETEINPLACE_ODD, + JU_DELETECOPY_ODD, + j__udyAllocJLL5, j__udyFreeJLL5, JL_LEAF5VALUEAREA); + + case cJU_JPLEAF6: + + JU_LEAF(6, + JU_LEAF_UPLEVEL, uint8_t *, cJU_LEAF7_MAXPOP1, cJU_JPLEAF7, + j__udyLeaf6ToLeaf7, j__udyAllocJLL7, JL_LEAF7VALUEAREA, + JU_LEAF_TOIMMED_47, + JU_TOIMMED_01_ODD, JU_COPY6_PINDEX_TO_LONG, + uint8_t *, cJU_IMMED6_MAXPOP1, + cJ1_JPIMMED_6_02, cJU_JPIMMED_6_01, j__udySearchLeaf6, + JU_LEAF6GROWINPLACE, JU_DELETEINPLACE_ODD, + JU_DELETECOPY_ODD, + j__udyAllocJLL6, j__udyFreeJLL6, JL_LEAF6VALUEAREA); + +// There is no transition to "uplevel" for a Leaf7, so use JU_LEAF_UPLEVEL_NONE +// instead of JU_LEAF_UPLEVEL, and ignore all of the parameters to that macro: + + case cJU_JPLEAF7: + + JU_LEAF(7, + JU_LEAF_UPLEVEL_NONE, ignore1, ignore2, ignore3, ignore4, + ignore5, ignore6, + JU_LEAF_TOIMMED_47, + JU_TOIMMED_01_ODD, JU_COPY7_PINDEX_TO_LONG, + uint8_t *, cJU_IMMED7_MAXPOP1, + cJ1_JPIMMED_7_02, cJU_JPIMMED_7_01, j__udySearchLeaf7, + JU_LEAF7GROWINPLACE, JU_DELETEINPLACE_ODD, + JU_DELETECOPY_ODD, + j__udyAllocJLL7, j__udyFreeJLL7, JL_LEAF7VALUEAREA); +#endif // JU_64BIT + + +// **************************************************************************** +// BITMAP LEAF: + + case cJU_JPLEAF_B1: + { +#ifdef JUDYL + Pjv_t PjvnewRaw; // new value area. + Pjv_t Pjvnew; + Word_t subexp; // 1 of 8 subexpanses in bitmap. + Pjlb_t Pjlb; // pointer to bitmap part of the leaf. + BITMAPL_t bitmap; // for one subexpanse. + BITMAPL_t bitmask; // bit set for Indexs digit. +#endif + assert(! JU_DCDNOTMATCHINDEX(Index, Pjp, 1)); + assert(ParentLevel > 1); + // valid Index: + assert(JU_BITMAPTESTL(P_JLB(Pjp->jp_Addr), Index)); + + pop1 = JU_JPLEAF_POP0(Pjp) + 1; + +// Like a Leaf1, see if its under a narrow pointer and can become a Leaf2 +// (hysteresis = 1): + + JU_LEAF_UPLEVEL(1, uint16_t *, cJU_LEAF2_MAXPOP1, cJU_JPLEAF2, + j__udyLeaf1ToLeaf2, j__udyAllocJLL2, + JL_LEAF2VALUEAREA); + +#if (defined(JUDY1) && defined(JU_64BIT)) + +// Handle the unusual special case, on Judy1 64-bit only, where a LeafB1 goes +// directly to a JPIMMED_1_15; as described in comments in Judy1.h and +// JudyIns.c. Copy 1-byte indexes from old LeafB1 to the Immed: + + if ((pop1 - 1) == cJU_IMMED1_MAXPOP1) // hysteresis = 0. + { + Pjlb_t PjlbRaw; // bitmap in old leaf. + Pjlb_t Pjlb; + uint8_t * Pleafnew; // JPIMMED as a pointer. + Word_t ldigit; // larger than uint8_t. + + PjlbRaw = (Pjlb_t) (Pjp->jp_Addr); + Pjlb = P_JLB(PjlbRaw); + Pleafnew = Pjp->jp_1Index; + + JU_BITMAPCLEARL(Pjlb, Index); // unset Indexs bit. + +// TBD: This is very slow, there must be a better way: + + for (ldigit = 0; ldigit < cJU_BRANCHUNUMJPS; ++ldigit) + { + if (JU_BITMAPTESTL(Pjlb, ldigit)) + { + *Pleafnew++ = ldigit; + assert(Pleafnew - (Pjp->jp_1Index) + <= cJU_IMMED1_MAXPOP1); + } + } + + DBGCODE(JudyCheckSorted((Pjll_t) (Pjp->jp_1Index), + cJU_IMMED1_MAXPOP1, 1);) + j__udyFreeJLB1(PjlbRaw, Pjpm); + + Pjp->jp_Type = cJ1_JPIMMED_1_15; + return(1); + } + +#else // (JUDYL || (! JU_64BIT)) + +// Compress LeafB1 to a Leaf1: +// +// Note: 4.37 of this file contained alternate code for Judy1 only that simply +// cleared the bit and allowed the LeafB1 to go below cJU_LEAF1_MAXPOP1. This +// was the ONLY case where a malloc failure was not fatal; however, it violated +// the critical assumption that the tree is always kept in least-compressed +// form. + + if (pop1 == cJU_LEAF1_MAXPOP1) // hysteresis = 1. + { + if (j__udyLeafB1ToLeaf1(Pjp, Pjpm) == -1) return(-1); + goto ContinueDelWalk; // delete Index in new Leaf1. + } +#endif // (JUDYL || (! JU_64BIT)) + +#ifdef JUDY1 + // unset Indexs bit: + + JU_BITMAPCLEARL(P_JLB(Pjp->jp_Addr), Index); +#else // JUDYL + +// This is very different from Judy1 because of the need to manage the value +// area: +// +// Get last byte to decode from Index, and pointer to bitmap leaf: + + digit = JU_DIGITATSTATE(Index, 1); + Pjlb = P_JLB(Pjp->jp_Addr); + +// Prepare additional values: + + subexp = digit / cJU_BITSPERSUBEXPL; // which subexpanse. + bitmap = JU_JLB_BITMAP(Pjlb, subexp); // subexps 32-bit map. + PjvRaw = JL_JLB_PVALUE(Pjlb, subexp); // corresponding values. + Pjv = P_JV(PjvRaw); + bitmask = JU_BITPOSMASKL(digit); // mask for Index. + + assert(bitmap & bitmask); // Index must be valid. + + if (bitmap == cJU_FULLBITMAPL) // full bitmap, take shortcut: + { + pop1 = cJU_BITSPERSUBEXPL; + offset = digit % cJU_BITSPERSUBEXPL; + } + else // compute subexpanse pop1 and value area offset: + { + pop1 = j__udyCountBitsL(bitmap); + offset = j__udyCountBitsL(bitmap & (bitmask - 1)); + } + +// Handle solitary Index remaining in subexpanse: + + if (pop1 == 1) + { + j__udyLFreeJV(PjvRaw, 1, Pjpm); + + JL_JLB_PVALUE(Pjlb, subexp) = (Pjv_t) NULL; + JU_JLB_BITMAP(Pjlb, subexp) = 0; + + return(1); + } + +// Shrink value area in place or move to a smaller value area: + + if (JL_LEAFVGROWINPLACE(pop1 - 1)) // hysteresis = 0. + { + JU_DELETEINPLACE(Pjv, pop1, offset, ignore); + } + else + { + if ((PjvnewRaw = j__udyLAllocJV(pop1 - 1, Pjpm)) + == (Pjv_t) NULL) return(-1); + Pjvnew = P_JV(PjvnewRaw); + + JU_DELETECOPY(Pjvnew, Pjv, pop1, offset, ignore); + j__udyLFreeJV(PjvRaw, pop1, Pjpm); + JL_JLB_PVALUE(Pjlb, subexp) = (Pjv_t) PjvnewRaw; + } + + JU_JLB_BITMAP(Pjlb, subexp) ^= bitmask; // clear Indexs bit. + +#endif // JUDYL + + return(1); + + } // case. + + +#ifdef JUDY1 + +// **************************************************************************** +// FULL POPULATION LEAF: +// +// Convert to a LeafB1 and delete the index. Hysteresis = 0; none is possible. +// +// Note: Earlier the second assertion below said, "== 2", but in fact the +// parent could be at a higher level if a fullpop is under a narrow pointer. + + case cJ1_JPFULLPOPU1: + { + Pjlb_t PjlbRaw; + Pjlb_t Pjlb; + Word_t subexp; + + assert(! JU_DCDNOTMATCHINDEX(Index, Pjp, 2)); + assert(ParentLevel > 1); // see above. + + if ((PjlbRaw = j__udyAllocJLB1(Pjpm)) == (Pjlb_t) NULL) + return(-1); + Pjlb = P_JLB(PjlbRaw); + +// Fully populate the leaf, then unset Indexs bit: + + for (subexp = 0; subexp < cJU_NUMSUBEXPL; ++subexp) + JU_JLB_BITMAP(Pjlb, subexp) = cJU_FULLBITMAPL; + + JU_BITMAPCLEARL(Pjlb, Index); + + Pjp->jp_Addr = (Word_t) PjlbRaw; + Pjp->jp_Type = cJU_JPLEAF_B1; + + return(1); + } +#endif // JUDY1 + + +// **************************************************************************** +// IMMEDIATE JP: +// +// If theres just the one Index in the Immed, convert the JP to a JPNULL* +// (should only happen in a BranchU); otherwise delete the Index from the +// Immed. See the state transitions table elsewhere in this file for a summary +// of which Immed types must be handled. Hysteresis = 0; none is possible with +// Immeds. +// +// MACROS FOR COMMON CODE: +// +// Single Index remains in cJU_JPIMMED_*_01; convert JP to null: +// +// Variables Pjp and parentJPtype are in the context. +// +// Note: cJU_JPIMMED_*_01 should only be encountered in BranchUs, not in +// BranchLs or BranchBs (where its improper to merely modify the JP to be a +// null JP); that is, BranchL and BranchB code should have already handled +// any cJU_JPIMMED_*_01 by different means. + +#define JU_IMMED_01(NewJPType,ParentJPType) \ + \ + assert(parentJPtype == (ParentJPType)); \ + assert(JU_JPDCDPOP0(Pjp) == JU_TRIMTODCDSIZE(Index)); \ + JU_JPSETADT(Pjp, 0, 0, NewJPType); \ + return(1) + +// Convert cJ*_JPIMMED_*_02 to cJU_JPIMMED_*_01: +// +// Move the undeleted Index, whichever does not match the least bytes of Index, +// from undecoded-bytes-only (in jp_1Index or jp_LIndex as appropriate) to +// jp_DcdPopO (full-field). Pjp, Index, and offset are in the context. + +#define JU_IMMED_02(cIS,LeafType,NewJPType) \ + { \ + LeafType Pleaf; \ + \ + assert((ParentLevel - 1) == (cIS)); \ + JUDY1CODE(Pleaf = (LeafType) (Pjp->jp_1Index);) \ + JUDYLCODE(Pleaf = (LeafType) (Pjp->jp_LIndex);) \ + JUDYLCODE(PjvRaw = (Pjv_t) (Pjp->jp_Addr);) \ + JUDYLCODE(Pjv = P_JV(PjvRaw);) \ + JU_TOIMMED_01_EVEN(cIS, ignore, ignore); \ + JUDYLCODE(j__udyLFreeJV(PjvRaw, 2, Pjpm);) \ + Pjp->jp_Type = (NewJPType); \ + return(1); \ + } + +#if (defined(JUDY1) || defined(JU_64BIT)) + +// Variation for "odd" cJ*_JPIMMED_*_02 JP types, which are very different from +// "even" types because they use leaf search code and odd-copy macros: +// +// Note: JudyL 32-bit has no "odd" JPIMMED_*_02 types. + +#define JU_IMMED_02_ODD(cIS,NewJPType,SearchLeaf,CopyPIndex) \ + { \ + uint8_t * Pleaf; \ + \ + assert((ParentLevel - 1) == (cIS)); \ + JUDY1CODE(Pleaf = (uint8_t *) (Pjp->jp_1Index);) \ + JUDYLCODE(Pleaf = (uint8_t *) (Pjp->jp_LIndex);) \ + JUDYLCODE(PjvRaw = (Pjv_t) (Pjp->jp_Addr);) \ + JUDYLCODE(Pjv = P_JV(PjvRaw);) \ + JU_TOIMMED_01_ODD(cIS, SearchLeaf, CopyPIndex); \ + JUDYLCODE(j__udyLFreeJV(PjvRaw, 2, Pjpm);) \ + Pjp->jp_Type = (NewJPType); \ + return(1); \ + } +#endif // (JUDY1 || JU_64BIT) + +// Core code for deleting one Index (and for JudyL, its value area) from a +// larger Immed: +// +// Variables Pleaf, pop1, and offset are in the context. + +#ifdef JUDY1 +#define JU_IMMED_DEL(cIS,DeleteInPlace) \ + DeleteInPlace(Pleaf, pop1, offset, cIS); \ + DBGCODE(JudyCheckSorted(Pleaf, pop1 - 1, cIS);) + +#else // JUDYL + +// For JudyL the value area might need to be shrunk: + +#define JU_IMMED_DEL(cIS,DeleteInPlace) \ + \ + if (JL_LEAFVGROWINPLACE(pop1 - 1)) /* hysteresis = 0 */ \ + { \ + DeleteInPlace( Pleaf, pop1, offset, cIS); \ + JU_DELETEINPLACE(Pjv, pop1, offset, ignore); \ + DBGCODE(JudyCheckSorted(Pleaf, pop1 - 1, cIS);) \ + } \ + else \ + { \ + Pjv_t PjvnewRaw; \ + Pjv_t Pjvnew; \ + \ + if ((PjvnewRaw = j__udyLAllocJV(pop1 - 1, Pjpm)) \ + == (Pjv_t) NULL) return(-1); \ + Pjvnew = P_JV(PjvnewRaw); \ + \ + DeleteInPlace(Pleaf, pop1, offset, cIS); \ + JU_DELETECOPY(Pjvnew, Pjv, pop1, offset, ignore); \ + DBGCODE(JudyCheckSorted(Pleaf, pop1 - 1, cIS);) \ + j__udyLFreeJV(PjvRaw, pop1, Pjpm); \ + \ + (Pjp->jp_Addr) = (Word_t) PjvnewRaw; \ + } +#endif // JUDYL + +// Delete one Index from a larger Immed where no restructuring is required: +// +// Variables pop1, Pjp, offset, and Index are in the context. + +#define JU_IMMED(cIS,LeafType,BaseJPType,SearchLeaf,DeleteInPlace) \ + { \ + LeafType Pleaf; \ + \ + assert((ParentLevel - 1) == (cIS)); \ + JUDY1CODE(Pleaf = (LeafType) (Pjp->jp_1Index);) \ + JUDYLCODE(Pleaf = (LeafType) (Pjp->jp_LIndex);) \ + JUDYLCODE(PjvRaw = (Pjv_t) (Pjp->jp_Addr);) \ + JUDYLCODE(Pjv = P_JV(PjvRaw);) \ + pop1 = (JU_JPTYPE(Pjp)) - (BaseJPType) + 2; \ + offset = SearchLeaf(Pleaf, pop1, Index); \ + assert(offset >= 0); /* Index must be valid */ \ + \ + JU_IMMED_DEL(cIS, DeleteInPlace); \ + --(Pjp->jp_Type); \ + return(1); \ + } + + +// END OF MACROS, START OF CASES: + +// Single Index remains in Immed; convert JP to null: + + case cJU_JPIMMED_1_01: JU_IMMED_01(cJU_JPNULL1, cJU_JPBRANCH_U2); + case cJU_JPIMMED_2_01: JU_IMMED_01(cJU_JPNULL2, cJU_JPBRANCH_U3); +#ifndef JU_64BIT + case cJU_JPIMMED_3_01: JU_IMMED_01(cJU_JPNULL3, cJU_JPBRANCH_U); +#else + case cJU_JPIMMED_3_01: JU_IMMED_01(cJU_JPNULL3, cJU_JPBRANCH_U4); + case cJU_JPIMMED_4_01: JU_IMMED_01(cJU_JPNULL4, cJU_JPBRANCH_U5); + case cJU_JPIMMED_5_01: JU_IMMED_01(cJU_JPNULL5, cJU_JPBRANCH_U6); + case cJU_JPIMMED_6_01: JU_IMMED_01(cJU_JPNULL6, cJU_JPBRANCH_U7); + case cJU_JPIMMED_7_01: JU_IMMED_01(cJU_JPNULL7, cJU_JPBRANCH_U); +#endif + +// Multiple Indexes remain in the Immed JP; delete the specified Index: + + case cJU_JPIMMED_1_02: + + JU_IMMED_02(1, uint8_t *, cJU_JPIMMED_1_01); + + case cJU_JPIMMED_1_03: +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: + case cJU_JPIMMED_1_07: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: + case cJ1_JPIMMED_1_09: + case cJ1_JPIMMED_1_10: + case cJ1_JPIMMED_1_11: + case cJ1_JPIMMED_1_12: + case cJ1_JPIMMED_1_13: + case cJ1_JPIMMED_1_14: + case cJ1_JPIMMED_1_15: +#endif + JU_IMMED(1, uint8_t *, cJU_JPIMMED_1_02, + j__udySearchLeaf1, JU_DELETEINPLACE); + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: + + JU_IMMED_02(2, uint16_t *, cJU_JPIMMED_2_01); + + case cJU_JPIMMED_2_03: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: + case cJ1_JPIMMED_2_05: + case cJ1_JPIMMED_2_06: + case cJ1_JPIMMED_2_07: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + JU_IMMED(2, uint16_t *, cJU_JPIMMED_2_02, + j__udySearchLeaf2, JU_DELETEINPLACE); + + case cJU_JPIMMED_3_02: + + JU_IMMED_02_ODD(3, cJU_JPIMMED_3_01, + j__udySearchLeaf3, JU_COPY3_PINDEX_TO_LONG); + +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: + case cJ1_JPIMMED_3_04: + case cJ1_JPIMMED_3_05: + + JU_IMMED(3, uint8_t *, cJU_JPIMMED_3_02, + j__udySearchLeaf3, JU_DELETEINPLACE_ODD); + + case cJ1_JPIMMED_4_02: + + JU_IMMED_02(4, uint32_t *, cJU_JPIMMED_4_01); + + case cJ1_JPIMMED_4_03: + + JU_IMMED(4, uint32_t *, cJ1_JPIMMED_4_02, + j__udySearchLeaf4, JU_DELETEINPLACE); + + case cJ1_JPIMMED_5_02: + + JU_IMMED_02_ODD(5, cJU_JPIMMED_5_01, + j__udySearchLeaf5, JU_COPY5_PINDEX_TO_LONG); + + case cJ1_JPIMMED_5_03: + + JU_IMMED(5, uint8_t *, cJ1_JPIMMED_5_02, + j__udySearchLeaf5, JU_DELETEINPLACE_ODD); + + case cJ1_JPIMMED_6_02: + + JU_IMMED_02_ODD(6, cJU_JPIMMED_6_01, + j__udySearchLeaf6, JU_COPY6_PINDEX_TO_LONG); + + case cJ1_JPIMMED_7_02: + + JU_IMMED_02_ODD(7, cJU_JPIMMED_7_01, + j__udySearchLeaf7, JU_COPY7_PINDEX_TO_LONG); + +#endif // (JUDY1 && JU_64BIT) + + +// **************************************************************************** +// INVALID JP TYPE: + + default: JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(-1); + + } // switch + + +// PROCESS JP -- RECURSIVELY: +// +// For non-Immed JP types, if successful, post-decrement the population count +// at this level, or collapse a BranchL if necessary by copying the remaining +// JP in the BranchL to the parent (hysteresis = 0), which implicitly creates a +// narrow pointer if there was not already one in the hierarchy. + + assert(level); + retcode = j__udyDelWalk(Pjp, Index, level, Pjpm); + assert(retcode != 0); // should never happen. + + if ((JU_JPTYPE(Pjp)) < cJU_JPIMMED_1_01) // not an Immed. + { + switch (retcode) + { + case 1: + { + jp_t JP = *Pjp; + Word_t DcdP0; + + DcdP0 = JU_JPDCDPOP0(Pjp) - 1; // decrement count. + JU_JPSETADT(Pjp, JP.jp_Addr, DcdP0, JU_JPTYPE(&JP)); + break; + } + case 2: // collapse BranchL to single JP; see above: + { + Pjbl_t PjblRaw = (Pjbl_t) (Pjp->jp_Addr); + Pjbl_t Pjbl = P_JBL(PjblRaw); + + *Pjp = Pjbl->jbl_jp[0]; + j__udyFreeJBL(PjblRaw, Pjpm); + retcode = 1; + } + } + } + + return(retcode); + +} // j__udyDelWalk() + + +// **************************************************************************** +// J U D Y 1 U N S E T +// J U D Y L D E L +// +// Main entry point. See the manual entry for details. + +#ifdef JUDY1 +FUNCTION int Judy1Unset +#else +FUNCTION int JudyLDel +#endif + ( + PPvoid_t PPArray, // in which to delete. + Word_t Index, // to delete. + PJError_t PJError // optional, for returning error info. + ) +{ + Word_t pop1; // population of leaf. + int offset; // at which to delete Index. + JUDY1CODE(int retcode;) // return code from Judy1Test(). +JUDYLCODE(PPvoid_t PPvalue;) // pointer from JudyLGet(). + + +// CHECK FOR NULL ARRAY POINTER (error by caller): + + if (PPArray == (PPvoid_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPPARRAY); + return(JERRI); + } + + +// CHECK IF INDEX IS INVALID: +// +// If so, theres nothing to do. This saves a lot of time. Pass through +// PJError, if any, from the "get" function. + +#ifdef JUDY1 + if ((retcode = Judy1Test(*PPArray, Index, PJError)) == JERRI) + return (JERRI); + + if (retcode == 0) return(0); +#else + if ((PPvalue = JudyLGet(*PPArray, Index, PJError)) == PPJERR) + return (JERRI); + + if (PPvalue == (PPvoid_t) NULL) return(0); +#endif + + +// **************************************************************************** +// PROCESS TOP LEVEL (LEAFW) BRANCHES AND LEAVES: + +// **************************************************************************** +// LEAFW LEAF, OTHER SIZE: +// +// Shrink or convert the leaf as necessary. Hysteresis = 0; none is possible. + + if (JU_LEAFW_POP0(*PPArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + JUDYLCODE(Pjv_t Pjv;) // current value area. + JUDYLCODE(Pjv_t Pjvnew;) // value area in new leaf. + Pjlw_t Pjlw = P_JLW(*PPArray); // first word of leaf. + Pjlw_t Pjlwnew; // replacement leaf. + pop1 = Pjlw[0] + 1; // first word of leaf is pop0. + +// Delete single (last) Index from array: + + if (pop1 == 1) + { + j__udyFreeJLW(Pjlw, /* pop1 = */ 1, (Pjpm_t) NULL); + *PPArray = (Pvoid_t) NULL; + return(1); + } + +// Locate Index in compressible leaf: + + offset = j__udySearchLeafW(Pjlw + 1, pop1, Index); + assert(offset >= 0); // Index must be valid. + + JUDYLCODE(Pjv = JL_LEAFWVALUEAREA(Pjlw, pop1);) + +// Delete Index in-place: +// +// Note: "Grow in place from pop1 - 1" is the logical inverse of, "shrink in +// place from pop1." Also, Pjlw points to the count word, so skip that for +// doing the deletion. + + if (JU_LEAFWGROWINPLACE(pop1 - 1)) + { + JU_DELETEINPLACE(Pjlw + 1, pop1, offset, ignore); +#ifdef JUDYL // also delete from value area: + JU_DELETEINPLACE(Pjv, pop1, offset, ignore); +#endif + DBGCODE(JudyCheckSorted((Pjll_t) (Pjlw + 1), pop1 - 1, + cJU_ROOTSTATE);) + --(Pjlw[0]); // decrement population. + DBGCODE(JudyCheckPop(*PPArray);) + return(1); + } + +// Allocate new leaf for use in either case below: + + Pjlwnew = j__udyAllocJLW(pop1 - 1); + JU_CHECKALLOC(Pjlw_t, Pjlwnew, JERRI); + +// Shrink to smaller LEAFW: +// +// Note: Skip the first word = pop0 in each leaf. + + Pjlwnew[0] = (pop1 - 1) - 1; + JU_DELETECOPY(Pjlwnew + 1, Pjlw + 1, pop1, offset, ignore); + +#ifdef JUDYL // also delete from value area: + Pjvnew = JL_LEAFWVALUEAREA(Pjlwnew, pop1 - 1); + JU_DELETECOPY(Pjvnew, Pjv, pop1, offset, ignore); +#endif + DBGCODE(JudyCheckSorted(Pjlwnew + 1, pop1 - 1, cJU_ROOTSTATE);) + + j__udyFreeJLW(Pjlw, pop1, (Pjpm_t) NULL); + +//// *PPArray = (Pvoid_t) Pjlwnew | cJU_LEAFW); + *PPArray = (Pvoid_t) Pjlwnew; + DBGCODE(JudyCheckPop(*PPArray);) + return(1); + + } + else + + +// **************************************************************************** +// JRP BRANCH: +// +// Traverse through the JPM to do the deletion unless the population is small +// enough to convert immediately to a LEAFW. + + { + Pjpm_t Pjpm; + Pjp_t Pjp; // top-level JP to process. + Word_t digit; // in a branch. + JUDYLCODE(Pjv_t Pjv;) // to value area. + Pjlw_t Pjlwnew; // replacement leaf. + DBGCODE(Pjlw_t Pjlwnew_orig;) + + Pjpm = P_JPM(*PPArray); // top object in array (tree). + Pjp = &(Pjpm->jpm_JP); // next object (first branch or leaf). + + assert(((Pjpm->jpm_JP.jp_Type) == cJU_JPBRANCH_L) + || ((Pjpm->jpm_JP.jp_Type) == cJU_JPBRANCH_B) + || ((Pjpm->jpm_JP.jp_Type) == cJU_JPBRANCH_U)); + +// WALK THE TREE +// +// Note: Recursive code in j__udyDelWalk() knows how to collapse a lower-level +// BranchL containing a single JP into the parent JP as a narrow pointer, but +// the code here cant do that for a top-level BranchL. The result can be +// PArray -> JPM -> BranchL containing a single JP. This situation is +// unavoidable because a JPM cannot contain a narrow pointer; the BranchL is +// required in order to hold the top digit decoded, and it does not collapse to +// a LEAFW until the population is low enough. +// +// TBD: Should we add a topdigit field to JPMs so they can hold narrow +// pointers? + + if (j__udyDelWalk(Pjp, Index, cJU_ROOTSTATE, Pjpm) == -1) + { + JU_COPY_ERRNO(PJError, Pjpm); + return(JERRI); + } + + --(Pjpm->jpm_Pop0); // success; decrement total population. + + if ((Pjpm->jpm_Pop0 + 1) != cJU_LEAFW_MAXPOP1) + { + DBGCODE(JudyCheckPop(*PPArray);) + return(1); + } + +// COMPRESS A BRANCH[LBU] TO A LEAFW: +// + Pjlwnew = j__udyAllocJLW(cJU_LEAFW_MAXPOP1); + JU_CHECKALLOC(Pjlw_t, Pjlwnew, JERRI); + +// Plug leaf into root pointer and set population count: + +//// *PPArray = (Pvoid_t) ((Word_t) Pjlwnew | cJU_LEAFW); + *PPArray = (Pvoid_t) Pjlwnew; +#ifdef JUDYL // prepare value area: + Pjv = JL_LEAFWVALUEAREA(Pjlwnew, cJU_LEAFW_MAXPOP1); +#endif + *Pjlwnew++ = cJU_LEAFW_MAXPOP1 - 1; // set pop0. + DBGCODE(Pjlwnew_orig = Pjlwnew;) + + switch (JU_JPTYPE(Pjp)) + { + +// JPBRANCH_L: Copy each JPs indexes to the new LEAFW and free the old +// branch: + + case cJU_JPBRANCH_L: + { + Pjbl_t PjblRaw = (Pjbl_t) (Pjp->jp_Addr); + Pjbl_t Pjbl = P_JBL(PjblRaw); + + for (offset = 0; offset < Pjbl->jbl_NumJPs; ++offset) + { + pop1 = j__udyLeafM1ToLeafW(Pjlwnew, JU_PVALUEPASS + (Pjbl->jbl_jp) + offset, + JU_DIGITTOSTATE(Pjbl->jbl_Expanse[offset], + cJU_BYTESPERWORD), + (Pvoid_t) Pjpm); + Pjlwnew += pop1; // advance through indexes. + JUDYLCODE(Pjv += pop1;) // advance through values. + } + j__udyFreeJBL(PjblRaw, Pjpm); + + assert(Pjlwnew == Pjlwnew_orig + cJU_LEAFW_MAXPOP1); + break; // delete Index from new LEAFW. + } + +// JPBRANCH_B: Copy each JPs indexes to the new LEAFW and free the old +// branch, including each JP subarray: + + case cJU_JPBRANCH_B: + { + Pjbb_t PjbbRaw = (Pjbb_t) (Pjp->jp_Addr); + Pjbb_t Pjbb = P_JBB(PjbbRaw); + Word_t subexp; // current subexpanse number. + BITMAPB_t bitmap; // portion for this subexpanse. + Pjp_t Pjp2Raw; // one subexpanses subarray. + Pjp_t Pjp2; + + for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) + { + if ((bitmap = JU_JBB_BITMAP(Pjbb, subexp)) == 0) + continue; // skip empty subexpanse. + + digit = subexp * cJU_BITSPERSUBEXPB; + Pjp2Raw = JU_JBB_PJP(Pjbb, subexp); + Pjp2 = P_JP(Pjp2Raw); + assert(Pjp2 != (Pjp_t) NULL); + +// Walk through bits for all possible sub-subexpanses (digits); increment +// offset for each populated subexpanse; until no more set bits: + + for (offset = 0; bitmap != 0; bitmap >>= 1, ++digit) + { + if (! (bitmap & 1)) // skip empty sub-subexpanse. + continue; + + pop1 = j__udyLeafM1ToLeafW(Pjlwnew, JU_PVALUEPASS + Pjp2 + offset, + JU_DIGITTOSTATE(digit, cJU_BYTESPERWORD), + (Pvoid_t) Pjpm); + Pjlwnew += pop1; // advance through indexes. + JUDYLCODE(Pjv += pop1;) // advance through values. + ++offset; + } + j__udyFreeJBBJP(Pjp2Raw, /* pop1 = */ offset, Pjpm); + } + j__udyFreeJBB(PjbbRaw, Pjpm); + + assert(Pjlwnew == Pjlwnew_orig + cJU_LEAFW_MAXPOP1); + break; // delete Index from new LEAFW. + + } // case cJU_JPBRANCH_B. + + +// JPBRANCH_U: Copy each JPs indexes to the new LEAFW and free the old +// branch: + + case cJU_JPBRANCH_U: + { + Pjbu_t PjbuRaw = (Pjbu_t) (Pjp->jp_Addr); + Pjbu_t Pjbu = P_JBU(PjbuRaw); + Word_t ldigit; // larger than uint8_t. + + for (Pjp = Pjbu->jbu_jp, ldigit = 0; + ldigit < cJU_BRANCHUNUMJPS; + ++Pjp, ++ldigit) + { + +// Shortcuts, to save a little time for possibly big branches: + + if ((JU_JPTYPE(Pjp)) == cJU_JPNULLMAX) // skip null JP. + continue; + +// TBD: Should the following shortcut also be used in BranchL and BranchB +// code? + +#ifndef JU_64BIT + if ((JU_JPTYPE(Pjp)) == cJU_JPIMMED_3_01) +#else + if ((JU_JPTYPE(Pjp)) == cJU_JPIMMED_7_01) +#endif + { // single Immed: + *Pjlwnew++ = JU_DIGITTOSTATE(ldigit, cJU_BYTESPERWORD) + | JU_JPDCDPOP0(Pjp); // rebuild Index. +#ifdef JUDYL + *Pjv++ = Pjp->jp_Addr; // copy value area. +#endif + continue; + } + + pop1 = j__udyLeafM1ToLeafW(Pjlwnew, JU_PVALUEPASS + Pjp, JU_DIGITTOSTATE(ldigit, cJU_BYTESPERWORD), + (Pvoid_t) Pjpm); + Pjlwnew += pop1; // advance through indexes. + JUDYLCODE(Pjv += pop1;) // advance through values. + } + j__udyFreeJBU(PjbuRaw, Pjpm); + + assert(Pjlwnew == Pjlwnew_orig + cJU_LEAFW_MAXPOP1); + break; // delete Index from new LEAFW. + + } // case cJU_JPBRANCH_U. + + +// INVALID JP TYPE in jpm_t struct + + default: JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(JERRI); + + } // end switch on sub-JP type. + + DBGCODE(JudyCheckSorted((Pjll_t) Pjlwnew_orig, cJU_LEAFW_MAXPOP1, + cJU_ROOTSTATE);) + +// FREE JPM (no longer needed): + + j__udyFreeJPM(Pjpm, (Pjpm_t) NULL); + DBGCODE(JudyCheckPop(*PPArray);) + return(1); + + } + /*NOTREACHED*/ + +} // Judy1Unset() / JudyLDel() diff --git a/libnetdata/libjudy/src/JudyL/JudyLFirst.c b/libnetdata/libjudy/src/JudyL/JudyLFirst.c new file mode 100644 index 0000000..aaf6639 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLFirst.c @@ -0,0 +1,213 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.12 $ $Source: /judy/src/JudyCommon/JudyFirst.c $ +// +// Judy*First[Empty]() and Judy*Last[Empty]() routines for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// These are inclusive versions of Judy*Next[Empty]() and Judy*Prev[Empty](). + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + + +// **************************************************************************** +// J U D Y 1 F I R S T +// J U D Y L F I R S T +// +// See the manual entry for details. + +#ifdef JUDY1 +FUNCTION int Judy1First +#else +FUNCTION PPvoid_t JudyLFirst +#endif + ( + Pcvoid_t PArray, // Judy array to search. + Word_t * PIndex, // starting point and result. + PJError_t PJError // optional, for returning error info. + ) +{ + if (PIndex == (PWord_t) NULL) // caller error: + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + +#ifdef JUDY1 + switch (Judy1Test(PArray, *PIndex, PJError)) + { + case 1: return(1); // found *PIndex itself. + case 0: return(Judy1Next(PArray, PIndex, PJError)); + default: return(JERRI); + } +#else + { + PPvoid_t PValue; + + if ((PValue = JudyLGet(PArray, *PIndex, PJError)) == PPJERR) + return(PPJERR); + + if (PValue != (PPvoid_t) NULL) return(PValue); // found *PIndex. + + return(JudyLNext(PArray, PIndex, PJError)); + } +#endif + +} // Judy1First() / JudyLFirst() + + +// **************************************************************************** +// J U D Y 1 L A S T +// J U D Y L L A S T +// +// See the manual entry for details. + +#ifdef JUDY1 +FUNCTION int Judy1Last( +#else +FUNCTION PPvoid_t JudyLLast( +#endif + Pcvoid_t PArray, // Judy array to search. + Word_t * PIndex, // starting point and result. + PJError_t PJError) // optional, for returning error info. +{ + if (PIndex == (PWord_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); // caller error. + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + +#ifdef JUDY1 + switch (Judy1Test(PArray, *PIndex, PJError)) + { + case 1: return(1); // found *PIndex itself. + case 0: return(Judy1Prev(PArray, PIndex, PJError)); + default: return(JERRI); + } +#else + { + PPvoid_t PValue; + + if ((PValue = JudyLGet(PArray, *PIndex, PJError)) == PPJERR) + return(PPJERR); + + if (PValue != (PPvoid_t) NULL) return(PValue); // found *PIndex. + + return(JudyLPrev(PArray, PIndex, PJError)); + } +#endif + +} // Judy1Last() / JudyLLast() + + +// **************************************************************************** +// J U D Y 1 F I R S T E M P T Y +// J U D Y L F I R S T E M P T Y +// +// See the manual entry for details. + +#ifdef JUDY1 +FUNCTION int Judy1FirstEmpty( +#else +FUNCTION int JudyLFirstEmpty( +#endif + Pcvoid_t PArray, // Judy array to search. + Word_t * PIndex, // starting point and result. + PJError_t PJError) // optional, for returning error info. +{ + if (PIndex == (PWord_t) NULL) // caller error: + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); + return(JERRI); + } + +#ifdef JUDY1 + switch (Judy1Test(PArray, *PIndex, PJError)) + { + case 0: return(1); // found *PIndex itself. + case 1: return(Judy1NextEmpty(PArray, PIndex, PJError)); + default: return(JERRI); + } +#else + { + PPvoid_t PValue; + + if ((PValue = JudyLGet(PArray, *PIndex, PJError)) == PPJERR) + return(JERRI); + + if (PValue == (PPvoid_t) NULL) return(1); // found *PIndex. + + return(JudyLNextEmpty(PArray, PIndex, PJError)); + } +#endif + +} // Judy1FirstEmpty() / JudyLFirstEmpty() + + +// **************************************************************************** +// J U D Y 1 L A S T E M P T Y +// J U D Y L L A S T E M P T Y +// +// See the manual entry for details. + +#ifdef JUDY1 +FUNCTION int Judy1LastEmpty( +#else +FUNCTION int JudyLLastEmpty( +#endif + Pcvoid_t PArray, // Judy array to search. + Word_t * PIndex, // starting point and result. + PJError_t PJError) // optional, for returning error info. +{ + if (PIndex == (PWord_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); // caller error. + return(JERRI); + } + +#ifdef JUDY1 + switch (Judy1Test(PArray, *PIndex, PJError)) + { + case 0: return(1); // found *PIndex itself. + case 1: return(Judy1PrevEmpty(PArray, PIndex, PJError)); + default: return(JERRI); + } +#else + { + PPvoid_t PValue; + + if ((PValue = JudyLGet(PArray, *PIndex, PJError)) == PPJERR) + return(JERRI); + + if (PValue == (PPvoid_t) NULL) return(1); // found *PIndex. + + return(JudyLPrevEmpty(PArray, PIndex, PJError)); + } +#endif + +} // Judy1LastEmpty() / JudyLLastEmpty() diff --git a/libnetdata/libjudy/src/JudyL/JudyLFreeArray.c b/libnetdata/libjudy/src/JudyL/JudyLFreeArray.c new file mode 100644 index 0000000..34fac50 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLFreeArray.c @@ -0,0 +1,363 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.51 $ $Source: /judy/src/JudyCommon/JudyFreeArray.c $ +// +// Judy1FreeArray() and JudyLFreeArray() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// Return the number of bytes freed from the array. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +DBGCODE(extern void JudyCheckPop(Pvoid_t PArray);) + + +// **************************************************************************** +// J U D Y 1 F R E E A R R A Y +// J U D Y L F R E E A R R A Y +// +// See the Judy*(3C) manual entry for details. +// +// This code is written recursively, at least at first, because thats much +// simpler. Hope its fast enough. + +#ifdef JUDY1 +FUNCTION Word_t Judy1FreeArray +#else +FUNCTION Word_t JudyLFreeArray +#endif + ( + PPvoid_t PPArray, // array to free. + PJError_t PJError // optional, for returning error info. + ) +{ + jpm_t jpm; // local to accumulate free statistics. + +// CHECK FOR NULL POINTER (error by caller): + + if (PPArray == (PPvoid_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPPARRAY); + return(JERR); + } + + DBGCODE(JudyCheckPop(*PPArray);) + +// Zero jpm.jpm_Pop0 (meaning the array will be empty in a moment) for accurate +// logging in TRACEMI2. + + jpm.jpm_Pop0 = 0; // see above. + jpm.jpm_TotalMemWords = 0; // initialize memory freed. + +// Empty array: + + if (P_JLW(*PPArray) == (Pjlw_t) NULL) return(0); + +// PROCESS TOP LEVEL "JRP" BRANCHES AND LEAF: + + if (JU_LEAFW_POP0(*PPArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(*PPArray); // first word of leaf. + + j__udyFreeJLW(Pjlw, Pjlw[0] + 1, &jpm); + *PPArray = (Pvoid_t) NULL; // make an empty array. + return (-(jpm.jpm_TotalMemWords * cJU_BYTESPERWORD)); // see above. + } + else + +// Rootstate leaves: just free the leaf: + +// Common code for returning the amount of memory freed. +// +// Note: In a an ordinary LEAFW, pop0 = *PPArray[0]. +// +// Accumulate (negative) words freed, while freeing objects. +// Return the positive bytes freed. + + { + Pjpm_t Pjpm = P_JPM(*PPArray); + Word_t TotalMem = Pjpm->jpm_TotalMemWords; + + j__udyFreeSM(&(Pjpm->jpm_JP), &jpm); // recurse through tree. + j__udyFreeJPM(Pjpm, &jpm); + +// Verify the array was not corrupt. This means that amount of memory freed +// (which is negative) is equal to the initial amount: + + if (TotalMem + jpm.jpm_TotalMemWords) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + return(JERR); + } + + *PPArray = (Pvoid_t) NULL; // make an empty array. + return (TotalMem * cJU_BYTESPERWORD); + } + +} // Judy1FreeArray() / JudyLFreeArray() + + +// **************************************************************************** +// __ J U D Y F R E E S M +// +// Given a pointer to a JP, recursively visit and free (depth first) all nodes +// in a Judy array BELOW the JP, but not the JP itself. Accumulate in *Pjpm +// the total words freed (as a negative value). "SM" = State Machine. +// +// Note: Corruption is not detected at this level because during a FreeArray, +// if the code hasnt already core dumped, its better to remain silent, even +// if some memory has not been freed, than to bother the caller about the +// corruption. TBD: Is this true? If not, must list all legitimate JPNULL +// and JPIMMED above first, and revert to returning bool_t (see 4.34). + +FUNCTION void j__udyFreeSM( + Pjp_t Pjp, // top of Judy (top-state). + Pjpm_t Pjpm) // to return words freed. +{ + Word_t Pop1; + + switch (JU_JPTYPE(Pjp)) + { + +#ifdef JUDY1 + +// FULL EXPANSE -- nothing to free for this jp_Type. + + case cJ1_JPFULLPOPU1: + break; +#endif + +// JUDY BRANCH -- free the sub-tree depth first: + +// LINEAR BRANCH -- visit each JP in the JBLs list, then free the JBL: +// +// Note: There are no null JPs in a JBL. + + case cJU_JPBRANCH_L: + case cJU_JPBRANCH_L2: + case cJU_JPBRANCH_L3: +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: + case cJU_JPBRANCH_L5: + case cJU_JPBRANCH_L6: + case cJU_JPBRANCH_L7: +#endif // JU_64BIT + { + Pjbl_t Pjbl = P_JBL(Pjp->jp_Addr); + Word_t offset; + + for (offset = 0; offset < Pjbl->jbl_NumJPs; ++offset) + j__udyFreeSM((Pjbl->jbl_jp) + offset, Pjpm); + + j__udyFreeJBL((Pjbl_t) (Pjp->jp_Addr), Pjpm); + break; + } + + +// BITMAP BRANCH -- visit each JP in the JBBs list based on the bitmap, also +// +// Note: There are no null JPs in a JBB. + + case cJU_JPBRANCH_B: + case cJU_JPBRANCH_B2: + case cJU_JPBRANCH_B3: +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: + case cJU_JPBRANCH_B5: + case cJU_JPBRANCH_B6: + case cJU_JPBRANCH_B7: +#endif // JU_64BIT + { + Word_t subexp; + Word_t offset; + Word_t jpcount; + + Pjbb_t Pjbb = P_JBB(Pjp->jp_Addr); + + for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) + { + jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); + + if (jpcount) + { + for (offset = 0; offset < jpcount; ++offset) + { + j__udyFreeSM(P_JP(JU_JBB_PJP(Pjbb, subexp)) + offset, + Pjpm); + } + j__udyFreeJBBJP(JU_JBB_PJP(Pjbb, subexp), jpcount, Pjpm); + } + } + j__udyFreeJBB((Pjbb_t) (Pjp->jp_Addr), Pjpm); + + break; + } + + +// UNCOMPRESSED BRANCH -- visit each JP in the JBU array, then free the JBU +// itself: +// +// Note: Null JPs are handled during recursion at a lower state. + + case cJU_JPBRANCH_U: + case cJU_JPBRANCH_U2: + case cJU_JPBRANCH_U3: +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: + case cJU_JPBRANCH_U5: + case cJU_JPBRANCH_U6: + case cJU_JPBRANCH_U7: +#endif // JU_64BIT + { + Word_t offset; + Pjbu_t Pjbu = P_JBU(Pjp->jp_Addr); + + for (offset = 0; offset < cJU_BRANCHUNUMJPS; ++offset) + j__udyFreeSM((Pjbu->jbu_jp) + offset, Pjpm); + + j__udyFreeJBU((Pjbu_t) (Pjp->jp_Addr), Pjpm); + break; + } + + +// -- Cases below here terminate and do not recurse. -- + + +// LINEAR LEAF -- just free the leaf; size is computed from jp_Type: +// +// Note: cJU_JPLEAF1 is a special case, see discussion in ../Judy1/Judy1.h + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyFreeJLL1((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + break; +#endif + + case cJU_JPLEAF2: + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyFreeJLL2((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + break; + + case cJU_JPLEAF3: + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyFreeJLL3((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + break; + +#ifdef JU_64BIT + case cJU_JPLEAF4: + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyFreeJLL4((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + break; + + case cJU_JPLEAF5: + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyFreeJLL5((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + break; + + case cJU_JPLEAF6: + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyFreeJLL6((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + break; + + case cJU_JPLEAF7: + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + j__udyFreeJLL7((Pjll_t) (Pjp->jp_Addr), Pop1, Pjpm); + break; +#endif // JU_64BIT + + +// BITMAP LEAF -- free sub-expanse arrays of JPs, then free the JBB. + + case cJU_JPLEAF_B1: + { +#ifdef JUDYL + Word_t subexp; + Word_t jpcount; + Pjlb_t Pjlb = P_JLB(Pjp->jp_Addr); + +// Free the value areas in the bitmap leaf: + + for (subexp = 0; subexp < cJU_NUMSUBEXPL; ++subexp) + { + jpcount = j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp)); + + if (jpcount) + j__udyLFreeJV(JL_JLB_PVALUE(Pjlb, subexp), jpcount, Pjpm); + } +#endif // JUDYL + + j__udyFreeJLB1((Pjlb_t) (Pjp->jp_Addr), Pjpm); + break; + + } // case cJU_JPLEAF_B1 + +#ifdef JUDYL + + +// IMMED*: +// +// For JUDYL, all non JPIMMED_*_01s have a LeafV which must be freed: + + case cJU_JPIMMED_1_02: + case cJU_JPIMMED_1_03: +#ifdef JU_64BIT + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: + case cJU_JPIMMED_1_07: +#endif + Pop1 = JU_JPTYPE(Pjp) - cJU_JPIMMED_1_02 + 2; + j__udyLFreeJV((Pjv_t) (Pjp->jp_Addr), Pop1, Pjpm); + break; + +#ifdef JU_64BIT + case cJU_JPIMMED_2_02: + case cJU_JPIMMED_2_03: + + Pop1 = JU_JPTYPE(Pjp) - cJU_JPIMMED_2_02 + 2; + j__udyLFreeJV((Pjv_t) (Pjp->jp_Addr), Pop1, Pjpm); + break; + + case cJU_JPIMMED_3_02: + j__udyLFreeJV((Pjv_t) (Pjp->jp_Addr), 2, Pjpm); + break; + +#endif // JU_64BIT +#endif // JUDYL + + +// OTHER JPNULL, JPIMMED, OR UNEXPECTED TYPE -- nothing to free for this type: +// +// Note: Lump together no-op and invalid JP types; see function header +// comments. + + default: break; + + } // switch (JU_JPTYPE(Pjp)) + +} // j__udyFreeSM() diff --git a/libnetdata/libjudy/src/JudyL/JudyLGet.c b/libnetdata/libjudy/src/JudyL/JudyLGet.c new file mode 100644 index 0000000..0bb9971 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLGet.c @@ -0,0 +1,1094 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.43 $ $Source: /judy/src/JudyCommon/JudyGet.c $ +// +// Judy1Test() and JudyLGet() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +#ifdef TRACEJPR // different macro name, for "retrieval" only. +#include "JudyPrintJP.c" +#endif + + +// **************************************************************************** +// J U D Y 1 T E S T +// J U D Y L G E T +// +// See the manual entry for details. Note support for "shortcut" entries to +// trees known to start with a JPM. + +#ifdef JUDY1 + +#ifdef JUDYGETINLINE +FUNCTION int j__udy1Test +#else +FUNCTION int Judy1Test +#endif + +#else // JUDYL + +#ifdef JUDYGETINLINE +FUNCTION PPvoid_t j__udyLGet +#else +FUNCTION PPvoid_t JudyLGet +#endif + +#endif // JUDYL + ( +#ifdef JUDYGETINLINE + Pvoid_t PArray, // from which to retrieve. + Word_t Index // to retrieve. +#else + Pcvoid_t PArray, // from which to retrieve. + Word_t Index, // to retrieve. + PJError_t PJError // optional, for returning error info. +#endif + ) +{ + Pjp_t Pjp; // current JP while walking the tree. + Pjpm_t Pjpm; // for global accounting. + uint8_t Digit; // byte just decoded from Index. + Word_t Pop1; // leaf population (number of indexes). + Pjll_t Pjll; // pointer to LeafL. + DBGCODE(uint8_t ParentJPType;) + +#ifndef JUDYGETINLINE + + if (PArray == (Pcvoid_t) NULL) // empty array. + { + JUDY1CODE(return(0);) + JUDYLCODE(return((PPvoid_t) NULL);) + } + +// **************************************************************************** +// PROCESS TOP LEVEL BRANCHES AND LEAF: + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + int posidx; // signed offset in leaf. + + Pop1 = Pjlw[0] + 1; + posidx = j__udySearchLeafW(Pjlw + 1, Pop1, Index); + + if (posidx >= 0) + { + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAFWVALUEAREA(Pjlw, Pop1) + posidx));) + } + JUDY1CODE(return(0);) + JUDYLCODE(return((PPvoid_t) NULL);) + } + +#endif // ! JUDYGETINLINE + + Pjpm = P_JPM(PArray); + Pjp = &(Pjpm->jpm_JP); // top branch is below JPM. + +// **************************************************************************** +// WALK THE JUDY TREE USING A STATE MACHINE: + +ContinueWalk: // for going down one level; come here with Pjp set. + +#ifdef TRACEJPR + JudyPrintJP(Pjp, "g", __LINE__); +#endif + switch (JU_JPTYPE(Pjp)) + { + +// Ensure the switch table starts at 0 for speed; otherwise more code is +// executed: + + case 0: goto ReturnCorrupt; // save a little code. + + +// **************************************************************************** +// JPNULL*: +// +// Note: These are legitimate in a BranchU (only) and do not constitute a +// fault. + + case cJU_JPNULL1: + case cJU_JPNULL2: + case cJU_JPNULL3: +#ifdef JU_64BIT + case cJU_JPNULL4: + case cJU_JPNULL5: + case cJU_JPNULL6: + case cJU_JPNULL7: +#endif + assert(ParentJPType >= cJU_JPBRANCH_U2); + assert(ParentJPType <= cJU_JPBRANCH_U); + JUDY1CODE(return(0);) + JUDYLCODE(return((PPvoid_t) NULL);) + + +// **************************************************************************** +// JPBRANCH_L*: +// +// Note: The use of JU_DCDNOTMATCHINDEX() in branches is not strictly +// required,since this can be done at leaf level, but it costs nothing to do it +// sooner, and it aborts an unnecessary traversal sooner. + + case cJU_JPBRANCH_L2: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 2)) break; + Digit = JU_DIGITATSTATE(Index, 2); + goto JudyBranchL; + + case cJU_JPBRANCH_L3: + +#ifdef JU_64BIT // otherwise its a no-op: + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 3)) break; +#endif + Digit = JU_DIGITATSTATE(Index, 3); + goto JudyBranchL; + +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 4)) break; + Digit = JU_DIGITATSTATE(Index, 4); + goto JudyBranchL; + + case cJU_JPBRANCH_L5: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 5)) break; + Digit = JU_DIGITATSTATE(Index, 5); + goto JudyBranchL; + + case cJU_JPBRANCH_L6: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 6)) break; + Digit = JU_DIGITATSTATE(Index, 6); + goto JudyBranchL; + + case cJU_JPBRANCH_L7: + + // JU_DCDNOTMATCHINDEX() would be a no-op. + Digit = JU_DIGITATSTATE(Index, 7); + goto JudyBranchL; + +#endif // JU_64BIT + + case cJU_JPBRANCH_L: + { + Pjbl_t Pjbl; + int posidx; + + Digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); + +// Common code for all BranchLs; come here with Digit set: + +JudyBranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + + posidx = 0; + + do { + if (Pjbl->jbl_Expanse[posidx] == Digit) + { // found Digit; continue traversal: + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = Pjbl->jbl_jp + posidx; + goto ContinueWalk; + } + } while (++posidx != Pjbl->jbl_NumJPs); + + break; + } + + +// **************************************************************************** +// JPBRANCH_B*: + + case cJU_JPBRANCH_B2: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 2)) break; + Digit = JU_DIGITATSTATE(Index, 2); + goto JudyBranchB; + + case cJU_JPBRANCH_B3: + +#ifdef JU_64BIT // otherwise its a no-op: + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 3)) break; +#endif + Digit = JU_DIGITATSTATE(Index, 3); + goto JudyBranchB; + + +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 4)) break; + Digit = JU_DIGITATSTATE(Index, 4); + goto JudyBranchB; + + case cJU_JPBRANCH_B5: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 5)) break; + Digit = JU_DIGITATSTATE(Index, 5); + goto JudyBranchB; + + case cJU_JPBRANCH_B6: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 6)) break; + Digit = JU_DIGITATSTATE(Index, 6); + goto JudyBranchB; + + case cJU_JPBRANCH_B7: + + // JU_DCDNOTMATCHINDEX() would be a no-op. + Digit = JU_DIGITATSTATE(Index, 7); + goto JudyBranchB; + +#endif // JU_64BIT + + case cJU_JPBRANCH_B: + { + Pjbb_t Pjbb; + Word_t subexp; // in bitmap, 0..7. + BITMAPB_t BitMap; // for one subexpanse. + BITMAPB_t BitMask; // bit in BitMap for Indexs Digit. + + Digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); + +// Common code for all BranchBs; come here with Digit set: + +JudyBranchB: + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjbb = P_JBB(Pjp->jp_Addr); + subexp = Digit / cJU_BITSPERSUBEXPB; + + BitMap = JU_JBB_BITMAP(Pjbb, subexp); + Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp)); + + BitMask = JU_BITPOSMASKB(Digit); + +// No JP in subexpanse for Index => Index not found: + + if (! (BitMap & BitMask)) break; + +// Count JPs in the subexpanse below the one for Index: + + Pjp += j__udyCountBitsB(BitMap & (BitMask - 1)); + + goto ContinueWalk; + + } // case cJU_JPBRANCH_B* + + +// **************************************************************************** +// JPBRANCH_U*: +// +// Notice the reverse order of the cases, and falling through to the next case, +// for performance. + + case cJU_JPBRANCH_U: + + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, cJU_ROOTSTATE); + +// If not a BranchU, traverse; otherwise fall into the next case, which makes +// this very fast code for a large Judy array (mainly BranchUs), especially +// when branches are already in the cache, such as for prev/next: + +#ifndef JU_64BIT + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U3) goto ContinueWalk; +#else + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U7) goto ContinueWalk; +#endif + +#ifdef JU_64BIT + case cJU_JPBRANCH_U7: + + // JU_DCDNOTMATCHINDEX() would be a no-op. + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 7); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U6) goto ContinueWalk; + // and fall through. + + case cJU_JPBRANCH_U6: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 6)) break; + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 6); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U5) goto ContinueWalk; + // and fall through. + + case cJU_JPBRANCH_U5: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 5)) break; + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 5); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U4) goto ContinueWalk; + // and fall through. + + case cJU_JPBRANCH_U4: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 4)) break; + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 4); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U3) goto ContinueWalk; + // and fall through. + +#endif // JU_64BIT + + case cJU_JPBRANCH_U3: + +#ifdef JU_64BIT // otherwise its a no-op: + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 3)) break; +#endif + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 3); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U2) goto ContinueWalk; + // and fall through. + + case cJU_JPBRANCH_U2: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 2)) break; + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 2); + +// Note: BranchU2 is a special case that must continue traversal to a leaf, +// immed, full, or null type: + + goto ContinueWalk; + + +// **************************************************************************** +// JPLEAF*: +// +// Note: Here the calls of JU_DCDNOTMATCHINDEX() are necessary and check +// whether Index is out of the expanse of a narrow pointer. + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + + case cJU_JPLEAF1: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 1)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf1(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF1VALUEAREA(Pjll, Pop1) + posidx));) + } + +#endif // (JUDYL || (! JU_64BIT)) + + case cJU_JPLEAF2: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 2)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf2(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF2VALUEAREA(Pjll, Pop1) + posidx));) + } + case cJU_JPLEAF3: + { + int posidx; // signed offset in leaf. + +#ifdef JU_64BIT // otherwise its a no-op: + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 3)) break; +#endif + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf3(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF3VALUEAREA(Pjll, Pop1) + posidx));) + } +#ifdef JU_64BIT + case cJU_JPLEAF4: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 4)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf4(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF4VALUEAREA(Pjll, Pop1) + posidx));) + } + case cJU_JPLEAF5: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 5)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf5(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF5VALUEAREA(Pjll, Pop1) + posidx));) + } + + case cJU_JPLEAF6: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 6)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf6(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF6VALUEAREA(Pjll, Pop1) + posidx));) + } + case cJU_JPLEAF7: + { + int posidx; // signed offset in leaf. + + // JU_DCDNOTMATCHINDEX() would be a no-op. + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf7(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF7VALUEAREA(Pjll, Pop1) + posidx));) + } +#endif // JU_64BIT + + +// **************************************************************************** +// JPLEAF_B1: + + case cJU_JPLEAF_B1: + { + Pjlb_t Pjlb; +#ifdef JUDYL + int posidx; + Word_t subexp; // in bitmap, 0..7. + BITMAPL_t BitMap; // for one subexpanse. + BITMAPL_t BitMask; // bit in BitMap for Indexs Digit. + Pjv_t Pjv; +#endif + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 1)) break; + + Pjlb = P_JLB(Pjp->jp_Addr); + +#ifdef JUDY1 + +// Simply check if Indexs bit is set in the bitmap: + + if (JU_BITMAPTESTL(Pjlb, Index)) return(1); + break; + +#else // JUDYL + +// JudyL is much more complicated because of value area subarrays: + + Digit = JU_DIGITATSTATE(Index, 1); + subexp = Digit / cJU_BITSPERSUBEXPL; + BitMap = JU_JLB_BITMAP(Pjlb, subexp); + BitMask = JU_BITPOSMASKL(Digit); + +// No value in subexpanse for Index => Index not found: + + if (! (BitMap & BitMask)) break; + +// Count value areas in the subexpanse below the one for Index: + + Pjv = P_JV(JL_JLB_PVALUE(Pjlb, subexp)); + assert(Pjv != (Pjv_t) NULL); + posidx = j__udyCountBitsL(BitMap & (BitMask - 1)); + + return((PPvoid_t) (Pjv + posidx)); + +#endif // JUDYL + + } // case cJU_JPLEAF_B1 + +#ifdef JUDY1 + +// **************************************************************************** +// JPFULLPOPU1: +// +// If the Index is in the expanse, it is necessarily valid (found). + + case cJ1_JPFULLPOPU1: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 1)) break; + return(1); + +#ifdef notdef // for future enhancements +#ifdef JU_64BIT + +// Note: Need ? if (JU_DCDNOTMATCHINDEX(Index, Pjp, 1)) break; + + case cJ1_JPFULLPOPU1m15: + if (Pjp->jp_1Index[14] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m14: + if (Pjp->jp_1Index[13] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m13: + if (Pjp->jp_1Index[12] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m12: + if (Pjp->jp_1Index[11] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m11: + if (Pjp->jp_1Index[10] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m10: + if (Pjp->jp_1Index[9] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m9: + if (Pjp->jp_1Index[8] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m8: + if (Pjp->jp_1Index[7] == (uint8_t)Index) break; +#endif + case cJ1_JPFULLPOPU1m7: + if (Pjp->jp_1Index[6] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m6: + if (Pjp->jp_1Index[5] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m5: + if (Pjp->jp_1Index[4] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m4: + if (Pjp->jp_1Index[3] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m3: + if (Pjp->jp_1Index[2] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m2: + if (Pjp->jp_1Index[1] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m1: + if (Pjp->jp_1Index[0] == (uint8_t)Index) break; + + return(1); // found, not in exclusion list + +#endif // JUDY1 +#endif // notdef + +// **************************************************************************** +// JPIMMED*: +// +// Note that the contents of jp_DcdPopO are different for cJU_JPIMMED_*_01: + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: +#endif + if (JU_JPDCDPOP0(Pjp) != JU_TRIMTODCDSIZE(Index)) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) &(Pjp->jp_Addr));) // immediate value area. + + +// Macros to make code more readable and avoid dup errors + +#ifdef JUDY1 + +#define CHECKINDEXNATIVE(LEAF_T, PJP, IDX, INDEX) \ +if (((LEAF_T *)((PJP)->jp_1Index))[(IDX) - 1] == (LEAF_T)(INDEX)) \ + return(1) + +#define CHECKLEAFNONNAT(LFBTS, PJP, INDEX, IDX, COPY) \ +{ \ + Word_t i_ndex; \ + uint8_t *a_ddr; \ + a_ddr = (PJP)->jp_1Index + (((IDX) - 1) * (LFBTS)); \ + COPY(i_ndex, a_ddr); \ + if (i_ndex == JU_LEASTBYTES((INDEX), (LFBTS))) \ + return(1); \ +} +#endif + +#ifdef JUDYL + +#define CHECKINDEXNATIVE(LEAF_T, PJP, IDX, INDEX) \ +if (((LEAF_T *)((PJP)->jp_LIndex))[(IDX) - 1] == (LEAF_T)(INDEX)) \ + return((PPvoid_t)(P_JV((PJP)->jp_Addr) + (IDX) - 1)) + +#define CHECKLEAFNONNAT(LFBTS, PJP, INDEX, IDX, COPY) \ +{ \ + Word_t i_ndex; \ + uint8_t *a_ddr; \ + a_ddr = (PJP)->jp_LIndex + (((IDX) - 1) * (LFBTS)); \ + COPY(i_ndex, a_ddr); \ + if (i_ndex == JU_LEASTBYTES((INDEX), (LFBTS))) \ + return((PPvoid_t)(P_JV((PJP)->jp_Addr) + (IDX) - 1)); \ +} +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_15: CHECKINDEXNATIVE(uint8_t, Pjp, 15, Index); + case cJ1_JPIMMED_1_14: CHECKINDEXNATIVE(uint8_t, Pjp, 14, Index); + case cJ1_JPIMMED_1_13: CHECKINDEXNATIVE(uint8_t, Pjp, 13, Index); + case cJ1_JPIMMED_1_12: CHECKINDEXNATIVE(uint8_t, Pjp, 12, Index); + case cJ1_JPIMMED_1_11: CHECKINDEXNATIVE(uint8_t, Pjp, 11, Index); + case cJ1_JPIMMED_1_10: CHECKINDEXNATIVE(uint8_t, Pjp, 10, Index); + case cJ1_JPIMMED_1_09: CHECKINDEXNATIVE(uint8_t, Pjp, 9, Index); + case cJ1_JPIMMED_1_08: CHECKINDEXNATIVE(uint8_t, Pjp, 8, Index); +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_07: CHECKINDEXNATIVE(uint8_t, Pjp, 7, Index); + case cJU_JPIMMED_1_06: CHECKINDEXNATIVE(uint8_t, Pjp, 6, Index); + case cJU_JPIMMED_1_05: CHECKINDEXNATIVE(uint8_t, Pjp, 5, Index); + case cJU_JPIMMED_1_04: CHECKINDEXNATIVE(uint8_t, Pjp, 4, Index); +#endif + case cJU_JPIMMED_1_03: CHECKINDEXNATIVE(uint8_t, Pjp, 3, Index); + case cJU_JPIMMED_1_02: CHECKINDEXNATIVE(uint8_t, Pjp, 2, Index); + CHECKINDEXNATIVE(uint8_t, Pjp, 1, Index); + break; + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_07: CHECKINDEXNATIVE(uint16_t, Pjp, 7, Index); + case cJ1_JPIMMED_2_06: CHECKINDEXNATIVE(uint16_t, Pjp, 6, Index); + case cJ1_JPIMMED_2_05: CHECKINDEXNATIVE(uint16_t, Pjp, 5, Index); + case cJ1_JPIMMED_2_04: CHECKINDEXNATIVE(uint16_t, Pjp, 4, Index); +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_03: CHECKINDEXNATIVE(uint16_t, Pjp, 3, Index); + case cJU_JPIMMED_2_02: CHECKINDEXNATIVE(uint16_t, Pjp, 2, Index); + CHECKINDEXNATIVE(uint16_t, Pjp, 1, Index); + break; +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_05: + CHECKLEAFNONNAT(3, Pjp, Index, 5, JU_COPY3_PINDEX_TO_LONG); + case cJ1_JPIMMED_3_04: + CHECKLEAFNONNAT(3, Pjp, Index, 4, JU_COPY3_PINDEX_TO_LONG); + case cJ1_JPIMMED_3_03: + CHECKLEAFNONNAT(3, Pjp, Index, 3, JU_COPY3_PINDEX_TO_LONG); +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: + CHECKLEAFNONNAT(3, Pjp, Index, 2, JU_COPY3_PINDEX_TO_LONG); + CHECKLEAFNONNAT(3, Pjp, Index, 1, JU_COPY3_PINDEX_TO_LONG); + break; +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + + case cJ1_JPIMMED_4_03: CHECKINDEXNATIVE(uint32_t, Pjp, 3, Index); + case cJ1_JPIMMED_4_02: CHECKINDEXNATIVE(uint32_t, Pjp, 2, Index); + CHECKINDEXNATIVE(uint32_t, Pjp, 1, Index); + break; + + case cJ1_JPIMMED_5_03: + CHECKLEAFNONNAT(5, Pjp, Index, 3, JU_COPY5_PINDEX_TO_LONG); + case cJ1_JPIMMED_5_02: + CHECKLEAFNONNAT(5, Pjp, Index, 2, JU_COPY5_PINDEX_TO_LONG); + CHECKLEAFNONNAT(5, Pjp, Index, 1, JU_COPY5_PINDEX_TO_LONG); + break; + + case cJ1_JPIMMED_6_02: + CHECKLEAFNONNAT(6, Pjp, Index, 2, JU_COPY6_PINDEX_TO_LONG); + CHECKLEAFNONNAT(6, Pjp, Index, 1, JU_COPY6_PINDEX_TO_LONG); + break; + + case cJ1_JPIMMED_7_02: + CHECKLEAFNONNAT(7, Pjp, Index, 2, JU_COPY7_PINDEX_TO_LONG); + CHECKLEAFNONNAT(7, Pjp, Index, 1, JU_COPY7_PINDEX_TO_LONG); + break; + +#endif // (JUDY1 && JU_64BIT) + + +// **************************************************************************** +// INVALID JP TYPE: + + default: + +ReturnCorrupt: + +#ifdef JUDYGETINLINE // Pjpm is known to be non-null: + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); +#else + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); +#endif + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // switch on JP type + +JUDY1CODE(return(0);) +JUDYLCODE(return((PPvoid_t) NULL);) + +} // Judy1Test() / JudyLGet() + + +#ifndef JUDYGETINLINE // only compile the following function once: +#ifdef DEBUG + +// **************************************************************************** +// J U D Y C H E C K P O P +// +// Given a pointer to a Judy array, traverse the entire array to ensure +// population counts add up correctly. This can catch various coding errors. +// +// Since walking the entire tree is probably time-consuming, enable this +// function by setting env parameter $CHECKPOP to first call at which to start +// checking. Note: This function is called both from insert and delete code. +// +// Note: Even though this function does nothing useful for LEAFW leaves, its +// good practice to call it anyway, and cheap too. +// +// TBD: This is a debug-only check function similar to JudyCheckSorted(), but +// since it walks the tree it is Judy1/JudyL-specific and must live in a source +// file that is built both ways. +// +// TBD: As feared, enabling this code for every insert/delete makes Judy +// deathly slow, even for a small tree (10K indexes). Its not so bad if +// present but disabled (<1% slowdown measured). Still, should it be ifdefd +// other than DEBUG and/or called less often? +// +// TBD: Should this "population checker" be expanded to a comprehensive tree +// checker? It currently detects invalid LEAFW/JP types as well as inconsistent +// pop1s. Other possible checks, all based on essentially redundant data in +// the Judy tree, include: +// +// - Zero LS bits in jp_Addr field. +// +// - Correct Dcd bits. +// +// - Consistent JP types (always descending down the tree). +// +// - Sorted linear lists in BranchLs and leaves (using JudyCheckSorted(), but +// ideally that function is already called wherever appropriate after any +// linear list is modified). +// +// - Any others possible? + +#include // for getenv() and atol(). + +static Word_t JudyCheckPopSM(Pjp_t Pjp, Word_t RootPop1); + +FUNCTION void JudyCheckPop( + Pvoid_t PArray) +{ +static bool_t checked = FALSE; // already checked env parameter. +static bool_t enabled = FALSE; // env parameter set. +static bool_t active = FALSE; // calls >= callsmin. +static Word_t callsmin; // start point from $CHECKPOP. +static Word_t calls = 0; // times called so far. + + +// CHECK FOR EXTERNAL ENABLING: + + if (! checked) // only check once. + { + char * value; // for getenv(). + + checked = TRUE; + + if ((value = getenv("CHECKPOP")) == (char *) NULL) + { +#ifdef notdef +// Take this out because nightly tests want to be flavor-independent; its not +// OK to emit special non-error output from the debug flavor: + + (void) puts("JudyCheckPop() present but not enabled by " + "$CHECKPOP env parameter; set it to the number of " + "calls at which to begin checking"); +#endif + return; + } + + callsmin = atol(value); // note: non-number evaluates to 0. + enabled = TRUE; + + (void) printf("JudyCheckPop() present and enabled; callsmin = " + "%lu\n", callsmin); + } + else if (! enabled) return; + +// Previously or just now enabled; check if non-active or newly active: + + if (! active) + { + if (++calls < callsmin) return; + + (void) printf("JudyCheckPop() activated at call %lu\n", calls); + active = TRUE; + } + +// IGNORE LEAFW AT TOP OF TREE: + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + return; + +// Check JPM pop0 against tree, recursively: +// +// Note: The traversal code in JudyCheckPopSM() is simplest when the case +// statement for each JP type compares the pop1 for that JP to its subtree (if +// any) after traversing the subtree (thats the hard part) and adding up +// actual pop1s. A top branchs JP in the JPM does not have room for a +// full-word pop1, so pass it in as a special case. + + { + Pjpm_t Pjpm = P_JPM(PArray); + (void) JudyCheckPopSM(&(Pjpm->jpm_JP), Pjpm->jpm_Pop0 + 1); + return; + } + +} // JudyCheckPop() + + +// **************************************************************************** +// J U D Y C H E C K P O P S M +// +// Recursive state machine (subroutine) for JudyCheckPop(): Given a Pjp (other +// than JPNULL*; caller should shortcut) and the root population for top-level +// branches, check the subtrees actual pop1 against its nominal value, and +// return the total pop1 for the subtree. +// +// Note: Expect RootPop1 to be ignored at lower levels, so pass down 0, which +// should pop an assertion if this expectation is violated. + +FUNCTION static Word_t JudyCheckPopSM( + Pjp_t Pjp, // top of subtree. + Word_t RootPop1) // whole array, for top-level branches only. +{ + Word_t pop1_jp; // nominal population from the JP. + Word_t pop1 = 0; // actual population at this level. + Word_t offset; // in a branch. + +#define PREPBRANCH(cPopBytes,Next) \ + pop1_jp = JU_JPBRANCH_POP0(Pjp, cPopBytes) + 1; goto Next + +assert((((Word_t) (Pjp->jp_Addr)) & 7) == 3); + switch (JU_JPTYPE(Pjp)) + { + + case cJU_JPBRANCH_L2: PREPBRANCH(2, BranchL); + case cJU_JPBRANCH_L3: PREPBRANCH(3, BranchL); +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: PREPBRANCH(4, BranchL); + case cJU_JPBRANCH_L5: PREPBRANCH(5, BranchL); + case cJU_JPBRANCH_L6: PREPBRANCH(6, BranchL); + case cJU_JPBRANCH_L7: PREPBRANCH(7, BranchL); +#endif + case cJU_JPBRANCH_L: pop1_jp = RootPop1; + { + Pjbl_t Pjbl; +BranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + + for (offset = 0; offset < (Pjbl->jbl_NumJPs); ++offset) + pop1 += JudyCheckPopSM((Pjbl->jbl_jp) + offset, 0); + + assert(pop1_jp == pop1); + return(pop1); + } + + case cJU_JPBRANCH_B2: PREPBRANCH(2, BranchB); + case cJU_JPBRANCH_B3: PREPBRANCH(3, BranchB); +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: PREPBRANCH(4, BranchB); + case cJU_JPBRANCH_B5: PREPBRANCH(5, BranchB); + case cJU_JPBRANCH_B6: PREPBRANCH(6, BranchB); + case cJU_JPBRANCH_B7: PREPBRANCH(7, BranchB); +#endif + case cJU_JPBRANCH_B: pop1_jp = RootPop1; + { + Word_t subexp; + Word_t jpcount; + Pjbb_t Pjbb; +BranchB: + Pjbb = P_JBB(Pjp->jp_Addr); + + for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) + { + jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); + + for (offset = 0; offset < jpcount; ++offset) + { + pop1 += JudyCheckPopSM(P_JP(JU_JBB_PJP(Pjbb, subexp)) + + offset, 0); + } + } + + assert(pop1_jp == pop1); + return(pop1); + } + + case cJU_JPBRANCH_U2: PREPBRANCH(2, BranchU); + case cJU_JPBRANCH_U3: PREPBRANCH(3, BranchU); +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: PREPBRANCH(4, BranchU); + case cJU_JPBRANCH_U5: PREPBRANCH(5, BranchU); + case cJU_JPBRANCH_U6: PREPBRANCH(6, BranchU); + case cJU_JPBRANCH_U7: PREPBRANCH(7, BranchU); +#endif + case cJU_JPBRANCH_U: pop1_jp = RootPop1; + { + Pjbu_t Pjbu; +BranchU: + Pjbu = P_JBU(Pjp->jp_Addr); + + for (offset = 0; offset < cJU_BRANCHUNUMJPS; ++offset) + { + if (((Pjbu->jbu_jp[offset].jp_Type) >= cJU_JPNULL1) + && ((Pjbu->jbu_jp[offset].jp_Type) <= cJU_JPNULLMAX)) + { + continue; // skip null JP to save time. + } + + pop1 += JudyCheckPopSM((Pjbu->jbu_jp) + offset, 0); + } + + assert(pop1_jp == pop1); + return(pop1); + } + + +// -- Cases below here terminate and do not recurse. -- +// +// For all of these cases except JPLEAF_B1, there is no way to check the JPs +// pop1 against the object itself; just return the pop1; but for linear leaves, +// a bounds check is possible. + +#define CHECKLEAF(MaxPop1) \ + pop1 = JU_JPLEAF_POP0(Pjp) + 1; \ + assert(pop1 >= 1); \ + assert(pop1 <= (MaxPop1)); \ + return(pop1) + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: CHECKLEAF(cJU_LEAF1_MAXPOP1); +#endif + case cJU_JPLEAF2: CHECKLEAF(cJU_LEAF2_MAXPOP1); + case cJU_JPLEAF3: CHECKLEAF(cJU_LEAF3_MAXPOP1); +#ifdef JU_64BIT + case cJU_JPLEAF4: CHECKLEAF(cJU_LEAF4_MAXPOP1); + case cJU_JPLEAF5: CHECKLEAF(cJU_LEAF5_MAXPOP1); + case cJU_JPLEAF6: CHECKLEAF(cJU_LEAF6_MAXPOP1); + case cJU_JPLEAF7: CHECKLEAF(cJU_LEAF7_MAXPOP1); +#endif + + case cJU_JPLEAF_B1: + { + Word_t subexp; + Pjlb_t Pjlb; + + pop1_jp = JU_JPLEAF_POP0(Pjp) + 1; + + Pjlb = P_JLB(Pjp->jp_Addr); + + for (subexp = 0; subexp < cJU_NUMSUBEXPL; ++subexp) + pop1 += j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp)); + + assert(pop1_jp == pop1); + return(pop1); + } + + JUDY1CODE(case cJ1_JPFULLPOPU1: return(cJU_JPFULLPOPU1_POP0);) + + case cJU_JPIMMED_1_01: return(1); + case cJU_JPIMMED_2_01: return(1); + case cJU_JPIMMED_3_01: return(1); +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: return(1); + case cJU_JPIMMED_5_01: return(1); + case cJU_JPIMMED_6_01: return(1); + case cJU_JPIMMED_7_01: return(1); +#endif + + case cJU_JPIMMED_1_02: return(2); + case cJU_JPIMMED_1_03: return(3); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: return(4); + case cJU_JPIMMED_1_05: return(5); + case cJU_JPIMMED_1_06: return(6); + case cJU_JPIMMED_1_07: return(7); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: return(8); + case cJ1_JPIMMED_1_09: return(9); + case cJ1_JPIMMED_1_10: return(10); + case cJ1_JPIMMED_1_11: return(11); + case cJ1_JPIMMED_1_12: return(12); + case cJ1_JPIMMED_1_13: return(13); + case cJ1_JPIMMED_1_14: return(14); + case cJ1_JPIMMED_1_15: return(15); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: return(2); + case cJU_JPIMMED_2_03: return(3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: return(4); + case cJ1_JPIMMED_2_05: return(5); + case cJ1_JPIMMED_2_06: return(6); + case cJ1_JPIMMED_2_07: return(7); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: return(2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: return(3); + case cJ1_JPIMMED_3_04: return(4); + case cJ1_JPIMMED_3_05: return(5); + + case cJ1_JPIMMED_4_02: return(2); + case cJ1_JPIMMED_4_03: return(3); + case cJ1_JPIMMED_5_02: return(2); + case cJ1_JPIMMED_5_03: return(3); + case cJ1_JPIMMED_6_02: return(2); + case cJ1_JPIMMED_7_02: return(2); +#endif + + } // switch (JU_JPTYPE(Pjp)) + + assert(FALSE); // unrecognized JP type => corruption. + return(0); // to make some compilers happy. + +} // JudyCheckPopSM() + +#endif // DEBUG +#endif // ! JUDYGETINLINE diff --git a/libnetdata/libjudy/src/JudyL/JudyLIns.c b/libnetdata/libjudy/src/JudyL/JudyLIns.c new file mode 100644 index 0000000..f96df41 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLIns.c @@ -0,0 +1,1873 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.116 $ $Source: /judy/src/JudyCommon/JudyIns.c $ +// +// Judy1Set() and JudyLIns() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// TBD: Should some of the assertions here be converted to product code that +// returns JU_ERRNO_CORRUPT? + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +// Note: Call JudyCheckPop() even before "already inserted" returns, to catch +// population errors; see fix in 4.84: + +DBGCODE(extern void JudyCheckPop(Pvoid_t PArray);) +DBGCODE(extern void JudyCheckSorted(Pjll_t Pjll, Word_t Pop1, long IndexSize);) + +#ifdef TRACEJP +#include "JudyPrintJP.c" +#endif + + +// These are defined to generic values in JudyCommon/JudyPrivateTypes.h: +// +// TBD: These should be exported from a header file, but perhaps not, as they +// are only used here, and exported from Judy*Decascade, which is a separate +// file for profiling reasons (to prevent inlining), but which potentially +// could be merged with this file, either in SoftCM or at compile-time. + +#ifdef JUDY1 +extern int j__udy1CreateBranchB(Pjp_t, Pjp_t, uint8_t *, Word_t, Pvoid_t); +extern int j__udy1CreateBranchU(Pjp_t, Pvoid_t); + +#ifndef JU_64BIT +extern int j__udy1Cascade1(Pjp_t, Pvoid_t); +#endif +extern int j__udy1Cascade2(Pjp_t, Pvoid_t); +extern int j__udy1Cascade3(Pjp_t, Pvoid_t); +#ifdef JU_64BIT +extern int j__udy1Cascade4(Pjp_t, Pvoid_t); +extern int j__udy1Cascade5(Pjp_t, Pvoid_t); +extern int j__udy1Cascade6(Pjp_t, Pvoid_t); +extern int j__udy1Cascade7(Pjp_t, Pvoid_t); +#endif +extern int j__udy1CascadeL(Pjp_t, Pvoid_t); + +extern int j__udy1InsertBranch(Pjp_t Pjp, Word_t Index, Word_t Btype, Pjpm_t); + +#else // JUDYL + +extern int j__udyLCreateBranchB(Pjp_t, Pjp_t, uint8_t *, Word_t, Pvoid_t); +extern int j__udyLCreateBranchU(Pjp_t, Pvoid_t); + +extern int j__udyLCascade1(Pjp_t, Pvoid_t); +extern int j__udyLCascade2(Pjp_t, Pvoid_t); +extern int j__udyLCascade3(Pjp_t, Pvoid_t); +#ifdef JU_64BIT +extern int j__udyLCascade4(Pjp_t, Pvoid_t); +extern int j__udyLCascade5(Pjp_t, Pvoid_t); +extern int j__udyLCascade6(Pjp_t, Pvoid_t); +extern int j__udyLCascade7(Pjp_t, Pvoid_t); +#endif +extern int j__udyLCascadeL(Pjp_t, Pvoid_t); + +extern int j__udyLInsertBranch(Pjp_t Pjp, Word_t Index, Word_t Btype, Pjpm_t); +#endif + + +// **************************************************************************** +// MACROS FOR COMMON CODE: +// +// Check if Index is an outlier to (that is, not a member of) this expanse: +// +// An outlier is an Index in-the-expanse of the slot containing the pointer, +// but not-in-the-expanse of the "narrow" pointer in that slot. (This means +// the Dcd part of the Index differs from the equivalent part of jp_DcdPopO.) +// Therefore, the remedy is to put a cJU_JPBRANCH_L* between the narrow pointer +// and the object to which it points, and add the outlier Index as an Immediate +// in the cJU_JPBRANCH_L*. The "trick" is placing the cJU_JPBRANCH_L* at a +// Level that is as low as possible. This is determined by counting the digits +// in the existing narrow pointer that are the same as the digits in the new +// Index (see j__udyInsertBranch()). +// +// Note: At some high Levels, cJU_DCDMASK() is all zeros => dead code; assume +// the compiler optimizes this out. + +#define JU_CHECK_IF_OUTLIER(Pjp, Index, cLevel, Pjpm) \ + if (JU_DCDNOTMATCHINDEX(Index, Pjp, cLevel)) \ + return(j__udyInsertBranch(Pjp, Index, cLevel, Pjpm)) + +// Check if an Index is already in a leaf or immediate, after calling +// j__udySearchLeaf*() to set Offset: +// +// A non-negative Offset means the Index already exists, so return 0; otherwise +// complement Offset to proceed. + +#ifdef JUDY1 +#define Pjv ignore // placeholder. +#define JU_CHECK_IF_EXISTS(Offset,ignore,Pjpm) \ + { \ + if ((Offset) >= 0) return(0); \ + (Offset) = ~(Offset); \ + } +#else +// For JudyL, also set the value area pointer in the Pjpm: + +#define JU_CHECK_IF_EXISTS(Offset,Pjv,Pjpm) \ + { \ + if ((Offset) >= 0) \ + { \ + (Pjpm)->jpm_PValue = (Pjv) + (Offset); \ + return(0); \ + } \ + (Offset) = ~(Offset); \ + } +#endif + + +// **************************************************************************** +// __ J U D Y I N S W A L K +// +// Walk the Judy tree to do a set/insert. This is only called internally, and +// recursively. Unlike Judy1Test() and JudyLGet(), the extra time required for +// recursion should be negligible compared with the total. +// +// Return -1 for error (details in JPM), 0 for Index already inserted, 1 for +// new Index inserted. + +FUNCTION static int j__udyInsWalk( + Pjp_t Pjp, // current JP to descend. + Word_t Index, // to insert. + Pjpm_t Pjpm) // for returning info to top Level. +{ + uint8_t digit; // from Index, current offset into a branch. + jp_t newJP; // for creating a new Immed JP. + Word_t exppop1; // expanse (leaf) population. + int retcode; // return codes: -1, 0, 1. + +#ifdef SUBEXPCOUNTS +// Pointer to BranchB/U subexpanse counter: +// +// Note: Very important for performance reasons (avoids cache fills). + + PWord_t PSubExp = (PWord_t) NULL; +#endif + +ContinueInsWalk: // for modifying state without recursing. + +#ifdef TRACEJP + JudyPrintJP(Pjp, "i", __LINE__); +#endif + + switch (JU_JPTYPE(Pjp)) // entry: Pjp, Index. + { + + +// **************************************************************************** +// JPNULL*: +// +// Convert JP in place from current null type to cJU_JPIMMED_*_01 by +// calculating new JP type. + + case cJU_JPNULL1: + case cJU_JPNULL2: + case cJU_JPNULL3: +#ifdef JU_64BIT + case cJU_JPNULL4: + case cJU_JPNULL5: + case cJU_JPNULL6: + case cJU_JPNULL7: +#endif + assert((Pjp->jp_Addr) == 0); + JU_JPSETADT(Pjp, 0, Index, JU_JPTYPE(Pjp) + cJU_JPIMMED_1_01 - cJU_JPNULL1); +#ifdef JUDYL + // value area is first word of new Immed_01 JP: + Pjpm->jpm_PValue = (Pjv_t) (&(Pjp->jp_Addr)); +#endif + return(1); + + +// **************************************************************************** +// JPBRANCH_L*: +// +// If the new Index is not an outlier to the branchs expanse, and the branch +// should not be converted to uncompressed, extract the digit and record the +// Immediate type to create for a new Immed JP, before going to common code. +// +// Note: JU_CHECK_IF_OUTLIER() is a no-op for BranchB3[7] on 32[64]-bit. + +#define JU_BRANCH_OUTLIER(DIGIT,POP1,cLEVEL,PJP,INDEX,PJPM) \ + JU_CHECK_IF_OUTLIER(PJP, INDEX, cLEVEL, PJPM); \ + (DIGIT) = JU_DIGITATSTATE(INDEX, cLEVEL); \ + (POP1) = JU_JPBRANCH_POP0(PJP, cLEVEL) + + case cJU_JPBRANCH_L2: + JU_BRANCH_OUTLIER(digit, exppop1, 2, Pjp, Index, Pjpm); + goto JudyBranchL; + + case cJU_JPBRANCH_L3: + JU_BRANCH_OUTLIER(digit, exppop1, 3, Pjp, Index, Pjpm); + goto JudyBranchL; + +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: + JU_BRANCH_OUTLIER(digit, exppop1, 4, Pjp, Index, Pjpm); + goto JudyBranchL; + + case cJU_JPBRANCH_L5: + JU_BRANCH_OUTLIER(digit, exppop1, 5, Pjp, Index, Pjpm); + goto JudyBranchL; + + case cJU_JPBRANCH_L6: + JU_BRANCH_OUTLIER(digit, exppop1, 6, Pjp, Index, Pjpm); + goto JudyBranchL; + + case cJU_JPBRANCH_L7: + JU_BRANCH_OUTLIER(digit, exppop1, 7, Pjp, Index, Pjpm); + goto JudyBranchL; +#endif + +// Similar to common code above, but no outlier check is needed, and the Immed +// type depends on the word size: + + case cJU_JPBRANCH_L: + { + Pjbl_t PjblRaw; // pointer to old linear branch. + Pjbl_t Pjbl; + Pjbu_t PjbuRaw; // pointer to new uncompressed branch. + Pjbu_t Pjbu; + Word_t numJPs; // number of JPs = populated expanses. + int offset; // in branch. + + digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); + exppop1 = Pjpm->jpm_Pop0; + + // fall through: + +// COMMON CODE FOR LINEAR BRANCHES: +// +// Come here with digit and exppop1 already set. + +JudyBranchL: + PjblRaw = (Pjbl_t) (Pjp->jp_Addr); + Pjbl = P_JBL(PjblRaw); + +// If population under this branch greater than: + + if (exppop1 > JU_BRANCHL_MAX_POP) + goto ConvertBranchLtoU; + + numJPs = Pjbl->jbl_NumJPs; + + if ((numJPs == 0) || (numJPs > cJU_BRANCHLMAXJPS)) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); + return(-1); + } + +// Search for a match to the digit: + + offset = j__udySearchLeaf1((Pjll_t) (Pjbl->jbl_Expanse), numJPs, + digit); + +// If Index is found, offset is into an array of 1..cJU_BRANCHLMAXJPS JPs: + + if (offset >= 0) + { + Pjp = (Pjbl->jbl_jp) + offset; // address of next JP. + break; // continue walk. + } + +// Expanse is missing (not populated) for the passed Index, so insert an Immed +// -- if theres room: + + if (numJPs < cJU_BRANCHLMAXJPS) + { + offset = ~offset; // insertion offset. + + JU_JPSETADT(&newJP, 0, Index, + JU_JPTYPE(Pjp) + cJU_JPIMMED_1_01-cJU_JPBRANCH_L2); + + JU_INSERTINPLACE(Pjbl->jbl_Expanse, numJPs, offset, digit); + JU_INSERTINPLACE(Pjbl->jbl_jp, numJPs, offset, newJP); + + DBGCODE(JudyCheckSorted((Pjll_t) (Pjbl->jbl_Expanse), + numJPs + 1, /* IndexSize = */ 1);) + ++(Pjbl->jbl_NumJPs); +#ifdef JUDYL + // value area is first word of new Immed 01 JP: + Pjpm->jpm_PValue = (Pjv_t) ((Pjbl->jbl_jp) + offset); +#endif + return(1); + } + + +// MAXED OUT LINEAR BRANCH, CONVERT TO A BITMAP BRANCH, THEN INSERT: +// +// Copy the linear branch to a bitmap branch. +// +// TBD: Consider renaming j__udyCreateBranchB() to j__udyConvertBranchLtoB(). + + assert((numJPs) <= cJU_BRANCHLMAXJPS); + + if (j__udyCreateBranchB(Pjp, Pjbl->jbl_jp, Pjbl->jbl_Expanse, + numJPs, Pjpm) == -1) + { + return(-1); + } + +// Convert jp_Type from linear branch to equivalent bitmap branch: + + Pjp->jp_Type += cJU_JPBRANCH_B - cJU_JPBRANCH_L; + + j__udyFreeJBL(PjblRaw, Pjpm); // free old BranchL. + +// Having changed branch types, now do the insert in the new branch type: + + goto ContinueInsWalk; + + +// OPPORTUNISTICALLY CONVERT FROM BRANCHL TO BRANCHU: +// +// Memory efficiency is no object because the branchs pop1 is large enough, so +// speed up array access. Come here with PjblRaw set. Note: This is goto +// code because the previous block used to fall through into it as well, but no +// longer. + +ConvertBranchLtoU: + +// Allocate memory for an uncompressed branch: + + if ((PjbuRaw = j__udyAllocJBU(Pjpm)) == (Pjbu_t) NULL) + return(-1); + Pjbu = P_JBU(PjbuRaw); + +// Set the proper NULL type for most of the uncompressed branchs JPs: + + JU_JPSETADT(&newJP, 0, 0, + JU_JPTYPE(Pjp) - cJU_JPBRANCH_L2 + cJU_JPNULL1); + +// Initialize: Pre-set uncompressed branch to mostly JPNULL*s: + + for (numJPs = 0; numJPs < cJU_BRANCHUNUMJPS; ++numJPs) + Pjbu->jbu_jp[numJPs] = newJP; + +// Copy JPs from linear branch to uncompressed branch: + + { +#ifdef SUBEXPCOUNTS + Word_t popmask = cJU_POP0MASK(JU_JPTYPE(Pjp)) + - cJU_JPBRANCH_L2 - 2; + + for (numJPs = 0; numJPs < cJU_NUMSUBEXPU; ++numJPs) + Pjbu->jbu_subPop1[numJPs] = 0; +#endif + for (numJPs = 0; numJPs < Pjbl->jbl_NumJPs; ++numJPs) + { + Pjp_t Pjp1 = &(Pjbl->jbl_jp[numJPs]); + offset = Pjbl->jbl_Expanse[numJPs]; + Pjbu->jbu_jp[offset] = *Pjp1; +#ifdef SUBEXPCOUNTS + Pjbu->jbu_subPop1[offset/cJU_NUMSUBEXPU] += + JU_JPDCDPOP0(Pjp1) & popmask + 1; +#endif + } + } + j__udyFreeJBL(PjblRaw, Pjpm); // free old BranchL. + +// Plug new values into parent JP: + + Pjp->jp_Addr = (Word_t) PjbuRaw; + Pjp->jp_Type += cJU_JPBRANCH_U - cJU_JPBRANCH_L; // to BranchU. + +// Save global population of last BranchU conversion: + + Pjpm->jpm_LastUPop0 = Pjpm->jpm_Pop0; + goto ContinueInsWalk; + + } // case cJU_JPBRANCH_L. + + +// **************************************************************************** +// JPBRANCH_B*: +// +// If the new Index is not an outlier to the branchs expanse, extract the +// digit and record the Immediate type to create for a new Immed JP, before +// going to common code. +// +// Note: JU_CHECK_IF_OUTLIER() is a no-op for BranchB3[7] on 32[64]-bit. + + case cJU_JPBRANCH_B2: + JU_BRANCH_OUTLIER(digit, exppop1, 2, Pjp, Index, Pjpm); + goto JudyBranchB; + + case cJU_JPBRANCH_B3: + JU_BRANCH_OUTLIER(digit, exppop1, 3, Pjp, Index, Pjpm); + goto JudyBranchB; + +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: + JU_BRANCH_OUTLIER(digit, exppop1, 4, Pjp, Index, Pjpm); + goto JudyBranchB; + + case cJU_JPBRANCH_B5: + JU_BRANCH_OUTLIER(digit, exppop1, 5, Pjp, Index, Pjpm); + goto JudyBranchB; + + case cJU_JPBRANCH_B6: + JU_BRANCH_OUTLIER(digit, exppop1, 6, Pjp, Index, Pjpm); + goto JudyBranchB; + + case cJU_JPBRANCH_B7: + JU_BRANCH_OUTLIER(digit, exppop1, 7, Pjp, Index, Pjpm); + goto JudyBranchB; +#endif + + case cJU_JPBRANCH_B: + { + Pjbb_t Pjbb; // pointer to bitmap branch. + Pjbb_t PjbbRaw; // pointer to bitmap branch. + Pjp_t Pjp2Raw; // 1 of N arrays of JPs. + Pjp_t Pjp2; // 1 of N arrays of JPs. + Word_t subexp; // 1 of N subexpanses in bitmap. + BITMAPB_t bitmap; // for one subexpanse. + BITMAPB_t bitmask; // bit set for Indexs digit. + Word_t numJPs; // number of JPs = populated expanses. + int offset; // in bitmap branch. + +// Similar to common code above, but no outlier check is needed, and the Immed +// type depends on the word size: + + digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); + exppop1 = Pjpm->jpm_Pop0; + + // fall through: + + +// COMMON CODE FOR BITMAP BRANCHES: +// +// Come here with digit and exppop1 already set. + +JudyBranchB: + +// If population increment is greater than.. (300): + + if ((Pjpm->jpm_Pop0 - Pjpm->jpm_LastUPop0) > JU_BTOU_POP_INCREMENT) + { + +// If total population of array is greater than.. (750): + + if (Pjpm->jpm_Pop0 > JU_BRANCHB_MAX_POP) + { + +// If population under the branch is greater than.. (135): + + if (exppop1 > JU_BRANCHB_MIN_POP) + { + if (j__udyCreateBranchU(Pjp, Pjpm) == -1) return(-1); + +// Save global population of last BranchU conversion: + + Pjpm->jpm_LastUPop0 = Pjpm->jpm_Pop0; + + goto ContinueInsWalk; + } + } + } + +// CONTINUE TO USE BRANCHB: +// +// Get pointer to bitmap branch (JBB): + + PjbbRaw = (Pjbb_t) (Pjp->jp_Addr); + Pjbb = P_JBB(PjbbRaw); + +// Form the Int32 offset, and Bit offset values: +// +// 8 bit Decode | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | +// |SubExpanse | Bit offset | +// +// Get the 1 of 8 expanses from digit, Bits 5..7 = 1 of 8, and get the 32-bit +// word that may have a bit set: + + subexp = digit / cJU_BITSPERSUBEXPB; + bitmap = JU_JBB_BITMAP(Pjbb, subexp); + + Pjp2Raw = JU_JBB_PJP(Pjbb, subexp); + Pjp2 = P_JP(Pjp2Raw); + +// Get the bit position that represents the desired expanse, and get the offset +// into the array of JPs for the JP that matches the bit. + + bitmask = JU_BITPOSMASKB(digit); + offset = j__udyCountBitsB(bitmap & (bitmask - 1)); + +// If JP is already in this expanse, get Pjp and continue the walk: + + if (bitmap & bitmask) + { +#ifdef SUBEXPCOUNTS + PSubExp = &(Pjbb->jbb_Counts[subexp]); // ptr to subexp counts. +#endif + Pjp = Pjp2 + offset; + break; // continue walk. + } + + +// ADD NEW EXPANSE FOR NEW INDEX: +// +// The new expanse always an cJU_JPIMMED_*_01 containing just the new Index, so +// finish setting up an Immed JP. + + JU_JPSETADT(&newJP, 0, Index, + JU_JPTYPE(Pjp) + cJU_JPIMMED_1_01-cJU_JPBRANCH_B2); + +// Get 1 of the 8 JP arrays and calculate number of JPs in subexpanse array: + + Pjp2Raw = JU_JBB_PJP(Pjbb, subexp); + Pjp2 = P_JP(Pjp2Raw); + numJPs = j__udyCountBitsB(bitmap); + +// Expand branch JP subarray in-place: + + if (JU_BRANCHBJPGROWINPLACE(numJPs)) + { + assert(numJPs > 0); + JU_INSERTINPLACE(Pjp2, numJPs, offset, newJP); +#ifdef JUDYL + // value area is first word of new Immed 01 JP: + Pjpm->jpm_PValue = (Pjv_t) (Pjp2 + offset); +#endif + } + +// No room, allocate a bigger bitmap branch JP subarray: + + else + { + Pjp_t PjpnewRaw; + Pjp_t Pjpnew; + + if ((PjpnewRaw = j__udyAllocJBBJP(numJPs + 1, Pjpm)) == 0) + return(-1); + Pjpnew = P_JP(PjpnewRaw); + +// If there was an old JP array, then copy it, insert the new Immed JP, and +// free the old array: + + if (numJPs) + { + JU_INSERTCOPY(Pjpnew, Pjp2, numJPs, offset, newJP); + j__udyFreeJBBJP(Pjp2Raw, numJPs, Pjpm); +#ifdef JUDYL + // value area is first word of new Immed 01 JP: + Pjpm->jpm_PValue = (Pjv_t) (Pjpnew + offset); +#endif + } + +// New JP subarray; point to cJU_JPIMMED_*_01 and place it: + + else + { + assert(JU_JBB_PJP(Pjbb, subexp) == (Pjp_t) NULL); + Pjp = Pjpnew; + *Pjp = newJP; // copy to new memory. +#ifdef JUDYL + // value area is first word of new Immed 01 JP: + Pjpm->jpm_PValue = (Pjv_t) (&(Pjp->jp_Addr)); +#endif + } + +// Place new JP subarray in BranchB: + + JU_JBB_PJP(Pjbb, subexp) = PjpnewRaw; + + } // else + +// Set the new Indexs bit: + + JU_JBB_BITMAP(Pjbb, subexp) |= bitmask; + + return(1); + + } // case + + +// **************************************************************************** +// JPBRANCH_U*: +// +// Just drop through the JP for the correct digit. If the JP turns out to be a +// JPNULL*, thats OK, the memory is already allocated, and the next walk +// simply places an Immed in it. +// +#ifdef SUBEXPCOUNTS +#define JU_GETSUBEXP(PSubExp,Pjbu,Digit) \ + (PSubExp) = &((Pjbu)->jbu_subPop1[(Digit) / cJU_NUMSUBEXPU]) +#else +#define JU_GETSUBEXP(PSubExp,Pjbu,Digit) // null. +#endif + +#define JU_JBU_PJP_SUBEXP(Pjp,PSubExp,Index,Level) \ + { \ + uint8_t digit = JU_DIGITATSTATE(Index, Level); \ + Pjbu_t P_jbu = P_JBU((Pjp)->jp_Addr); \ + (Pjp) = &(P_jbu->jbu_jp[digit]); \ + JU_GETSUBEXP(PSubExp, P_jbu, digit); \ + } + + case cJU_JPBRANCH_U2: + JU_CHECK_IF_OUTLIER(Pjp, Index, 2, Pjpm); + JU_JBU_PJP_SUBEXP(Pjp, PSubExp, Index, 2); + break; + +#ifdef JU_64BIT + case cJU_JPBRANCH_U3: + JU_CHECK_IF_OUTLIER(Pjp, Index, 3, Pjpm); + JU_JBU_PJP_SUBEXP(Pjp, PSubExp, Index, 3); + break; + + case cJU_JPBRANCH_U4: + JU_CHECK_IF_OUTLIER(Pjp, Index, 4, Pjpm); + JU_JBU_PJP_SUBEXP(Pjp, PSubExp, Index, 4); + break; + + case cJU_JPBRANCH_U5: + JU_CHECK_IF_OUTLIER(Pjp, Index, 5, Pjpm); + JU_JBU_PJP_SUBEXP(Pjp, PSubExp, Index, 5); + break; + + case cJU_JPBRANCH_U6: + JU_CHECK_IF_OUTLIER(Pjp, Index, 6, Pjpm); + JU_JBU_PJP_SUBEXP(Pjp, PSubExp, Index, 6); + break; + + case cJU_JPBRANCH_U7: + JU_JBU_PJP_SUBEXP(Pjp, PSubExp, Index, 7); +#else + case cJU_JPBRANCH_U3: + JU_JBU_PJP_SUBEXP(Pjp, PSubExp, Index, 3); +#endif + break; + + case cJU_JPBRANCH_U: + JU_JBU_PJP_SUBEXP(Pjp, PSubExp, Index, cJU_ROOTSTATE); + break; + + +// **************************************************************************** +// JPLEAF*: +// +// COMMON CODE FRAGMENTS TO MINIMIZE REDUNDANCY BELOW: +// +// These are necessary to support performance by function and loop unrolling +// while avoiding huge amounts of nearly identical code. +// +// Prepare to handle a linear leaf: Check for an outlier; set pop1 and pointer +// to leaf: + +#ifdef JUDY1 +#define JU_LEAFVALUE(Pjv) // null. +#define JU_LEAFPREPVALUE(Pjv, ValueArea) // null. +#else +#define JU_LEAFVALUE(Pjv) Pjv_t Pjv +#define JU_LEAFPREPVALUE(Pjv, ValueArea) (Pjv) = ValueArea(Pleaf, exppop1) +#endif + +#define JU_LEAFPREP(cIS,Type,MaxPop1,ValueArea) \ + Pjll_t PjllRaw; \ + Type Pleaf; /* specific type */ \ + int offset; \ + JU_LEAFVALUE(Pjv); \ + \ + JU_CHECK_IF_OUTLIER(Pjp, Index, cIS, Pjpm); \ + \ + exppop1 = JU_JPLEAF_POP0(Pjp) + 1; \ + assert(exppop1 <= (MaxPop1)); \ + PjllRaw = (Pjll_t) (Pjp->jp_Addr); \ + Pleaf = (Type) P_JLL(PjllRaw); \ + JU_LEAFPREPVALUE(Pjv, ValueArea) + +// Add to, or grow, a linear leaf: Find Index position; if the Index is +// absent, if theres room in the leaf, insert the Index [and value of 0] in +// place, otherwise grow the leaf: +// +// Note: These insertions always take place with whole words, using +// JU_INSERTINPLACE() or JU_INSERTCOPY(). + +#ifdef JUDY1 +#define JU_LEAFGROWVALUEADD(Pjv,ExpPop1,Offset) // null. +#else +#define JU_LEAFGROWVALUEADD(Pjv,ExpPop1,Offset) \ + JU_INSERTINPLACE(Pjv, ExpPop1, Offset, 0); \ + Pjpm->jpm_PValue = (Pjv) + (Offset) +#endif + +#ifdef JUDY1 +#define JU_LEAFGROWVALUENEW(ValueArea,Pjv,ExpPop1,Offset) // null. +#else +#define JU_LEAFGROWVALUENEW(ValueArea,Pjv,ExpPop1,Offset) \ + { \ + Pjv_t Pjvnew = ValueArea(Pleafnew, (ExpPop1) + 1); \ + JU_INSERTCOPY(Pjvnew, Pjv, ExpPop1, Offset, 0); \ + Pjpm->jpm_PValue = (Pjvnew) + (Offset); \ + } +#endif + +#define JU_LEAFGROW(cIS,Type,MaxPop1,Search,ValueArea,GrowInPlace, \ + InsertInPlace,InsertCopy,Alloc,Free) \ + \ + offset = Search(Pleaf, exppop1, Index); \ + JU_CHECK_IF_EXISTS(offset, Pjv, Pjpm); \ + \ + if (GrowInPlace(exppop1)) /* add to current leaf */ \ + { \ + InsertInPlace(Pleaf, exppop1, offset, Index); \ + JU_LEAFGROWVALUEADD(Pjv, exppop1, offset); \ + DBGCODE(JudyCheckSorted((Pjll_t) Pleaf, exppop1 + 1, cIS);) \ + return(1); \ + } \ + \ + if (exppop1 < (MaxPop1)) /* grow to new leaf */ \ + { \ + Pjll_t PjllnewRaw; \ + Type Pleafnew; \ + if ((PjllnewRaw = Alloc(exppop1 + 1, Pjpm)) == 0) return(-1); \ + Pleafnew = (Type) P_JLL(PjllnewRaw); \ + InsertCopy(Pleafnew, Pleaf, exppop1, offset, Index); \ + JU_LEAFGROWVALUENEW(ValueArea, Pjv, exppop1, offset); \ + DBGCODE(JudyCheckSorted((Pjll_t) Pleafnew, exppop1 + 1, cIS);) \ + Free(PjllRaw, exppop1, Pjpm); \ + (Pjp->jp_Addr) = (Word_t) PjllnewRaw; \ + return(1); \ + } \ + assert(exppop1 == (MaxPop1)) + +// Handle linear leaf overflow (cascade): Splay or compress into smaller +// leaves: + +#define JU_LEAFCASCADE(MaxPop1,Cascade,Free) \ + if (Cascade(Pjp, Pjpm) == -1) return(-1); \ + Free(PjllRaw, MaxPop1, Pjpm); \ + goto ContinueInsWalk + +// Wrapper around all of the above: + +#define JU_LEAFSET(cIS,Type,MaxPop1,Search,GrowInPlace,InsertInPlace, \ + InsertCopy,Cascade,Alloc,Free,ValueArea) \ + { \ + JU_LEAFPREP(cIS,Type,MaxPop1,ValueArea); \ + JU_LEAFGROW(cIS,Type,MaxPop1,Search,ValueArea,GrowInPlace, \ + InsertInPlace,InsertCopy,Alloc,Free); \ + JU_LEAFCASCADE(MaxPop1,Cascade,Free); \ + } + +// END OF MACROS; LEAFL CASES START HERE: +// +// 64-bit Judy1 does not have 1-byte leaves: + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + + case cJU_JPLEAF1: + + JU_LEAFSET(1, uint8_t *, cJU_LEAF1_MAXPOP1, j__udySearchLeaf1, + JU_LEAF1GROWINPLACE, JU_INSERTINPLACE, JU_INSERTCOPY, + j__udyCascade1, j__udyAllocJLL1, j__udyFreeJLL1, + JL_LEAF1VALUEAREA); + +#endif // (JUDYL || ! JU_64BIT) + + case cJU_JPLEAF2: + + JU_LEAFSET(2, uint16_t *, cJU_LEAF2_MAXPOP1, j__udySearchLeaf2, + JU_LEAF2GROWINPLACE, JU_INSERTINPLACE, JU_INSERTCOPY, + j__udyCascade2, j__udyAllocJLL2, j__udyFreeJLL2, + JL_LEAF2VALUEAREA); + + case cJU_JPLEAF3: + + JU_LEAFSET(3, uint8_t *, cJU_LEAF3_MAXPOP1, j__udySearchLeaf3, + JU_LEAF3GROWINPLACE, JU_INSERTINPLACE3, JU_INSERTCOPY3, + j__udyCascade3, j__udyAllocJLL3, j__udyFreeJLL3, + JL_LEAF3VALUEAREA); + +#ifdef JU_64BIT + case cJU_JPLEAF4: + + JU_LEAFSET(4, uint32_t *, cJU_LEAF4_MAXPOP1, j__udySearchLeaf4, + JU_LEAF4GROWINPLACE, JU_INSERTINPLACE, JU_INSERTCOPY, + j__udyCascade4, j__udyAllocJLL4, j__udyFreeJLL4, + JL_LEAF4VALUEAREA); + + case cJU_JPLEAF5: + + JU_LEAFSET(5, uint8_t *, cJU_LEAF5_MAXPOP1, j__udySearchLeaf5, + JU_LEAF5GROWINPLACE, JU_INSERTINPLACE5, JU_INSERTCOPY5, + j__udyCascade5, j__udyAllocJLL5, j__udyFreeJLL5, + JL_LEAF5VALUEAREA); + + case cJU_JPLEAF6: + + JU_LEAFSET(6, uint8_t *, cJU_LEAF6_MAXPOP1, j__udySearchLeaf6, + JU_LEAF6GROWINPLACE, JU_INSERTINPLACE6, JU_INSERTCOPY6, + j__udyCascade6, j__udyAllocJLL6, j__udyFreeJLL6, + JL_LEAF6VALUEAREA); + + case cJU_JPLEAF7: + + JU_LEAFSET(7, uint8_t *, cJU_LEAF7_MAXPOP1, j__udySearchLeaf7, + JU_LEAF7GROWINPLACE, JU_INSERTINPLACE7, JU_INSERTCOPY7, + j__udyCascade7, j__udyAllocJLL7, j__udyFreeJLL7, + JL_LEAF7VALUEAREA); +#endif // JU_64BIT + + +// **************************************************************************** +// JPLEAF_B1: +// +// 8 bit Decode | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | +// |SubExpanse | Bit offset | +// +// Note: For JudyL, values are stored in 8 subexpanses, each a linear word +// array of up to 32 values each. + + case cJU_JPLEAF_B1: + { +#ifdef JUDYL + Pjv_t PjvRaw; // pointer to value part of the leaf. + Pjv_t Pjv; // pointer to value part of the leaf. + Pjv_t PjvnewRaw; // new value area. + Pjv_t Pjvnew; // new value area. + Word_t subexp; // 1 of 8 subexpanses in bitmap. + Pjlb_t Pjlb; // pointer to bitmap part of the leaf. + BITMAPL_t bitmap; // for one subexpanse. + BITMAPL_t bitmask; // bit set for Indexs digit. + int offset; // of index in value area. +#endif + + JU_CHECK_IF_OUTLIER(Pjp, Index, 1, Pjpm); + +#ifdef JUDY1 + +// If Index (bit) is already set, return now: + + if (JU_BITMAPTESTL(P_JLB(Pjp->jp_Addr), Index)) return(0); + +// If bitmap is not full, set the new Indexs bit; otherwise convert to a Full: + + if ((exppop1 = JU_JPLEAF_POP0(Pjp) + 1) + < cJU_JPFULLPOPU1_POP0) + { + JU_BITMAPSETL(P_JLB(Pjp->jp_Addr), Index); + } + else + { + j__udyFreeJLB1((Pjlb_t) (Pjp->jp_Addr), Pjpm); // free LeafB1. + Pjp->jp_Type = cJ1_JPFULLPOPU1; + Pjp->jp_Addr = 0; + } + +#else // JUDYL + +// This is very different from Judy1 because of the need to return a value area +// even for an existing Index, or manage the value area for a new Index, and +// because JudyL has no Full type: + +// Get last byte to decode from Index, and pointer to bitmap leaf: + + digit = JU_DIGITATSTATE(Index, 1); + Pjlb = P_JLB(Pjp->jp_Addr); + +// Prepare additional values: + + subexp = digit / cJU_BITSPERSUBEXPL; // which subexpanse. + bitmap = JU_JLB_BITMAP(Pjlb, subexp); // subexps 32-bit map. + PjvRaw = JL_JLB_PVALUE(Pjlb, subexp); // corresponding values. + Pjv = P_JV(PjvRaw); // corresponding values. + bitmask = JU_BITPOSMASKL(digit); // mask for Index. + offset = j__udyCountBitsL(bitmap & (bitmask - 1)); // of Index. + +// If Index already exists, get value pointer and exit: + + if (bitmap & bitmask) + { + assert(Pjv); + Pjpm->jpm_PValue = Pjv + offset; // existing value. + return(0); + } + +// Get the total bits set = expanse population of Value area: + + exppop1 = j__udyCountBitsL(bitmap); + +// If the value area can grow in place, do it: + + if (JL_LEAFVGROWINPLACE(exppop1)) + { + JU_INSERTINPLACE(Pjv, exppop1, offset, 0); + JU_JLB_BITMAP(Pjlb, subexp) |= bitmask; // set Indexs bit. + Pjpm->jpm_PValue = Pjv + offset; // new value area. + return(1); + } + +// Increase size of value area: + + if ((PjvnewRaw = j__udyLAllocJV(exppop1 + 1, Pjpm)) + == (Pjv_t) NULL) return(-1); + Pjvnew = P_JV(PjvnewRaw); + + if (exppop1) // have existing value area. + { + assert(Pjv); + JU_INSERTCOPY(Pjvnew, Pjv, exppop1, offset, 0); + Pjpm->jpm_PValue = Pjvnew + offset; + j__udyLFreeJV(PjvRaw, exppop1, Pjpm); // free old values. + } + else // first index, new value area: + { + Pjpm->jpm_PValue = Pjvnew; + *(Pjpm->jpm_PValue) = 0; + } + +// Set bit for new Index and place new leaf value area in bitmap: + + JU_JLB_BITMAP(Pjlb, subexp) |= bitmask; + JL_JLB_PVALUE(Pjlb, subexp) = PjvnewRaw; + +#endif // JUDYL + + return(1); + + } // case + + +#ifdef JUDY1 +// **************************************************************************** +// JPFULLPOPU1: +// +// If Index is not an outlier, then by definition its already set. + + case cJ1_JPFULLPOPU1: + + JU_CHECK_IF_OUTLIER(Pjp, Index, 1, Pjpm); + return(0); +#endif + + +// **************************************************************************** +// JPIMMED*: +// +// This is some of the most complex code in Judy considering Judy1 versus JudyL +// and 32-bit versus 64-bit variations. The following comments attempt to make +// this clearer. +// +// Of the 2 words in a JP, for immediate indexes Judy1 can use 2 words - 1 byte +// = 7 [15] bytes, but JudyL can only use 1 word - 1 byte = 3 [7] bytes because +// the other word is needed for a value area or a pointer to a value area. +// +// For both Judy1 and JudyL, cJU_JPIMMED_*_01 indexes are in word 2; otherwise +// for Judy1 only, a list of 2 or more indexes starts in word 1. JudyL keeps +// the list in word 2 because word 1 is a pointer (to a LeafV, that is, a leaf +// containing only values). Furthermore, cJU_JPIMMED_*_01 indexes are stored +// all-but-first-byte in jp_DcdPopO, not just the Index Sizes bytes. +// +// TBD: This can be confusing because Doug didnt use data structures for it. +// Instead he often directly accesses Pjp for the first word and jp_DcdPopO for +// the second word. It would be nice to use data structs, starting with +// jp_1Index and jp_LIndex where possible. +// +// Maximum Immed JP types for Judy1/JudyL, depending on Index Size (cIS): +// +// 32-bit 64-bit +// +// bytes: 7/ 3 15/ 7 (Judy1/JudyL) +// +// cIS +// 1_ 07/03 15/07 (as in: cJ1_JPIMMED_1_07) +// 2_ 03/01 07/03 +// 3_ 02/01 05/02 +// 4_ 03/01 +// 5_ 03/01 +// 6_ 02/01 +// 7_ 02/01 +// +// State transitions while inserting an Index, matching the above table: +// (Yes, this is very terse... Study it and it will make sense.) +// (Note, parts of this diagram are repeated below for quick reference.) +// +// +-- reformat JP here for Judy1 only, from word-2 to word-1 +// | +// | JUDY1 || JU_64BIT JUDY1 && JU_64BIT +// V +// 1_01 => 1_02 => 1_03 => [ 1_04 => ... => 1_07 => [ 1_08..15 => ]] Leaf1 (*) +// 2_01 => [ 2_02 => 2_03 => [ 2_04..07 => ]] Leaf2 +// 3_01 => [ 3_02 => [ 3_03..05 => ]] Leaf3 +// JU_64BIT only: +// 4_01 => [[ 4_02..03 => ]] Leaf4 +// 5_01 => [[ 5_02..03 => ]] Leaf5 +// 6_01 => [[ 6_02 => ]] Leaf6 +// 7_01 => [[ 7_02 => ]] Leaf7 +// +// (*) For Judy1 & 64-bit, go directly from cJU_JPIMMED_1_15 to a LeafB1; skip +// Leaf1, as described in Judy1.h regarding cJ1_JPLEAF1. + + +// COMMON CODE FRAGMENTS TO MINIMIZE REDUNDANCY BELOW: +// +// These are necessary to support performance by function and loop unrolling +// while avoiding huge amounts of nearly identical code. +// +// The differences between Judy1 and JudyL with respect to value area handling +// are just too large for completely common code between them... Oh well, some +// big ifdefs follow. However, even in the following ifdefd code, use cJU_*, +// JU_*, and Judy*() instead of cJ1_* / cJL_*, J1_* / JL_*, and +// Judy1*()/JudyL*(), for minimum diffs. +// +// Handle growth of cJU_JPIMMED_*_01 to cJU_JPIMMED_*_02, for an even or odd +// Index Size (cIS), given oldIndex, Index, and Pjll in the context: +// +// Put oldIndex and Index in their proper order. For odd indexes, must copy +// bytes. + +#ifdef JUDY1 + +#define JU_IMMSET_01_COPY_EVEN(ignore1,ignore2) \ + if (oldIndex < Index) { Pjll[0] = oldIndex; Pjll[1] = Index; } \ + else { Pjll[0] = Index; Pjll[1] = oldIndex; } + +#define JU_IMMSET_01_COPY_ODD(cIS,CopyWord) \ + if (oldIndex < Index) \ + { \ + CopyWord(Pjll + 0, oldIndex); \ + CopyWord(Pjll + (cIS), Index); \ + } \ + else \ + { \ + CopyWord(Pjll + 0, Index); \ + CopyWord(Pjll + (cIS), oldIndex); \ + } + +// The "real" *_01 Copy macro: +// +// Trim the high byte off Index, look for a match with the old Index, and if +// none, insert the new Index in the leaf in the correct place, given Pjp and +// Index in the context. +// +// Note: A single immediate index lives in the jp_DcdPopO field, but two or +// more reside starting at Pjp->jp_1Index. + +#define JU_IMMSET_01_COPY(cIS,LeafType,NewJPType,Copy,CopyWord) \ + { \ + LeafType Pjll; \ + Word_t oldIndex = JU_JPDCDPOP0(Pjp); \ + \ + Index = JU_TRIMTODCDSIZE(Index); \ + if (oldIndex == Index) return(0); \ + \ + Pjll = (LeafType) (Pjp->jp_1Index); \ + Copy(cIS,CopyWord); \ + DBGCODE(JudyCheckSorted(Pjll, 2, cIS);) \ + \ + Pjp->jp_Type = (NewJPType); \ + return(1); \ + } + +#else // JUDYL + +// Variations to also handle value areas; see comments above: +// +// For JudyL, Pjv (start of value area) and oldValue are also in the context; +// leave Pjv set to the value area for Index. + +#define JU_IMMSET_01_COPY_EVEN(cIS,CopyWord) \ + if (oldIndex < Index) \ + { \ + Pjll[0] = oldIndex; \ + Pjv [0] = oldValue; \ + Pjll[1] = Index; \ + ++Pjv; \ + } \ + else \ + { \ + Pjll[0] = Index; \ + Pjll[1] = oldIndex; \ + Pjv [1] = oldValue; \ + } + +#define JU_IMMSET_01_COPY_ODD(cIS,CopyWord) \ + if (oldIndex < Index) \ + { \ + CopyWord(Pjll + 0, oldIndex); \ + CopyWord(Pjll + (cIS), Index); \ + Pjv[0] = oldValue; \ + ++Pjv; \ + } \ + else \ + { \ + CopyWord(Pjll + 0, Index); \ + CopyWord(Pjll + (cIS), oldIndex); \ + Pjv[1] = oldValue; \ + } + +// The old value area is in the first word (*Pjp), and Pjv and Pjpm are also in +// the context. Also, unlike Judy1, indexes remain in word 2 (jp_LIndex), +// meaning insert-in-place rather than copy. +// +// Return jpm_PValue pointing to Indexs value area. If Index is new, allocate +// a 2-value-leaf and attach it to the JP. + +#define JU_IMMSET_01_COPY(cIS,LeafType,NewJPType,Copy,CopyWord) \ + { \ + LeafType Pjll; \ + Word_t oldIndex = JU_JPDCDPOP0(Pjp); \ + Word_t oldValue; \ + Pjv_t PjvRaw; \ + Pjv_t Pjv; \ + \ + Index = JU_TRIMTODCDSIZE(Index); \ + \ + if (oldIndex == Index) \ + { \ + Pjpm->jpm_PValue = (Pjv_t) Pjp; \ + return(0); \ + } \ + \ + if ((PjvRaw = j__udyLAllocJV(2, Pjpm)) == (Pjv_t) NULL) \ + return(-1); \ + Pjv = P_JV(PjvRaw); \ + \ + oldValue = Pjp->jp_Addr; \ + (Pjp->jp_Addr) = (Word_t) PjvRaw; \ + Pjll = (LeafType) (Pjp->jp_LIndex); \ + \ + Copy(cIS,CopyWord); \ + DBGCODE(JudyCheckSorted(Pjll, 2, cIS);) \ + \ + Pjp->jp_Type = (NewJPType); \ + *Pjv = 0; \ + Pjpm->jpm_PValue = Pjv; \ + return(1); \ + } + +// The following is a unique mix of JU_IMMSET_01() and JU_IMMSETCASCADE() for +// going from cJU_JPIMMED_*_01 directly to a cJU_JPLEAF* for JudyL: +// +// If Index is not already set, allocate a leaf, copy the old and new indexes +// into it, clear and return the new value area, and modify the current JP. +// Note that jp_DcdPop is set to a pop0 of 0 for now, and incremented later. + + +#define JU_IMMSET_01_CASCADE(cIS,LeafType,NewJPType,ValueArea, \ + Copy,CopyWord,Alloc) \ + { \ + Word_t D_P0; \ + LeafType PjllRaw; \ + LeafType Pjll; \ + Word_t oldIndex = JU_JPDCDPOP0(Pjp); \ + Word_t oldValue; \ + Pjv_t Pjv; \ + \ + Index = JU_TRIMTODCDSIZE(Index); \ + \ + if (oldIndex == Index) \ + { \ + Pjpm->jpm_PValue = (Pjv_t) (&(Pjp->jp_Addr)); \ + return(0); \ + } \ + \ + if ((PjllRaw = (LeafType) Alloc(2, Pjpm)) == (LeafType) NULL) \ + return(-1); \ + Pjll = (LeafType) P_JLL(PjllRaw); \ + Pjv = ValueArea(Pjll, 2); \ + \ + oldValue = Pjp->jp_Addr; \ + \ + Copy(cIS,CopyWord); \ + DBGCODE(JudyCheckSorted(Pjll, 2, cIS);) \ + \ + *Pjv = 0; \ + Pjpm->jpm_PValue = Pjv; \ + D_P0 = Index & cJU_DCDMASK(cIS); /* pop0 = 0 */ \ + JU_JPSETADT(Pjp, (Word_t)PjllRaw, D_P0, NewJPType); \ + \ + return(1); \ + } + +#endif // JUDYL + +// Handle growth of cJU_JPIMMED_*_[02..15]: + +#ifdef JUDY1 + +// Insert an Index into an immediate JP that has room for more, if the Index is +// not already present; given Pjp, Index, exppop1, Pjv, and Pjpm in the +// context: +// +// Note: Use this only when the JP format doesnt change, that is, going from +// cJU_JPIMMED_X_0Y to cJU_JPIMMED_X_0Z, where X >= 2 and Y+1 = Z. +// +// Note: Incrementing jp_Type is how to increase the Index population. + +#define JU_IMMSETINPLACE(cIS,LeafType,BaseJPType_02,Search,InsertInPlace) \ + { \ + LeafType Pjll; \ + int offset; \ + \ + exppop1 = JU_JPTYPE(Pjp) - (BaseJPType_02) + 2; \ + offset = Search((Pjll_t) (Pjp->jp_1Index), exppop1, Index); \ + \ + JU_CHECK_IF_EXISTS(offset, ignore, Pjpm); \ + \ + Pjll = (LeafType) (Pjp->jp_1Index); \ + InsertInPlace(Pjll, exppop1, offset, Index); \ + DBGCODE(JudyCheckSorted(Pjll, exppop1 + 1, cIS);) \ + ++(Pjp->jp_Type); \ + return(1); \ + } + +// Insert an Index into an immediate JP that has no room for more: +// +// If the Index is not already present, do a cascade (to a leaf); given Pjp, +// Index, Pjv, and Pjpm in the context. + + +#define JU_IMMSETCASCADE(cIS,OldPop1,LeafType,NewJPType, \ + ignore,Search,InsertCopy,Alloc) \ + { \ + Word_t D_P0; \ + Pjll_t PjllRaw; \ + Pjll_t Pjll; \ + int offset; \ + \ + offset = Search((Pjll_t) (Pjp->jp_1Index), (OldPop1), Index); \ + JU_CHECK_IF_EXISTS(offset, ignore, Pjpm); \ + \ + if ((PjllRaw = Alloc((OldPop1) + 1, Pjpm)) == 0) return(-1); \ + Pjll = P_JLL(PjllRaw); \ + \ + InsertCopy((LeafType) Pjll, (LeafType) (Pjp->jp_1Index), \ + OldPop1, offset, Index); \ + DBGCODE(JudyCheckSorted(Pjll, (OldPop1) + 1, cIS);) \ + \ + D_P0 = (Index & cJU_DCDMASK(cIS)) + (OldPop1) - 1; \ + JU_JPSETADT(Pjp, (Word_t)PjllRaw, D_P0, NewJPType); \ + return(1); \ + } + +#else // JUDYL + +// Variations to also handle value areas; see comments above: +// +// For JudyL, Pjv (start of value area) is also in the context. +// +// TBD: This code makes a true but weak assumption that a JudyL 32-bit 2-index +// value area must be copied to a new 3-index value area. AND it doesnt know +// anything about JudyL 64-bit cases (cJU_JPIMMED_1_0[3-7] only) where the +// value area can grow in place! However, this should not break it, just slow +// it down. + +#define JU_IMMSETINPLACE(cIS,LeafType,BaseJPType_02,Search,InsertInPlace) \ + { \ + LeafType Pleaf; \ + int offset; \ + Pjv_t PjvRaw; \ + Pjv_t Pjv; \ + Pjv_t PjvnewRaw; \ + Pjv_t Pjvnew; \ + \ + exppop1 = JU_JPTYPE(Pjp) - (BaseJPType_02) + 2; \ + offset = Search((Pjll_t) (Pjp->jp_LIndex), exppop1, Index); \ + PjvRaw = (Pjv_t) (Pjp->jp_Addr); \ + Pjv = P_JV(PjvRaw); \ + \ + JU_CHECK_IF_EXISTS(offset, Pjv, Pjpm); \ + \ + if ((PjvnewRaw = j__udyLAllocJV(exppop1 + 1, Pjpm)) \ + == (Pjv_t) NULL) return(-1); \ + Pjvnew = P_JV(PjvnewRaw); \ + \ + Pleaf = (LeafType) (Pjp->jp_LIndex); \ + \ + InsertInPlace(Pleaf, exppop1, offset, Index); \ + /* see TBD above about this: */ \ + JU_INSERTCOPY(Pjvnew, Pjv, exppop1, offset, 0); \ + DBGCODE(JudyCheckSorted(Pleaf, exppop1 + 1, cIS);) \ + j__udyLFreeJV(PjvRaw, exppop1, Pjpm); \ + Pjp->jp_Addr = (Word_t) PjvnewRaw; \ + Pjpm->jpm_PValue = Pjvnew + offset; \ + \ + ++(Pjp->jp_Type); \ + return(1); \ + } + +#define JU_IMMSETCASCADE(cIS,OldPop1,LeafType,NewJPType, \ + ValueArea,Search,InsertCopy,Alloc) \ + { \ + Word_t D_P0; \ + Pjll_t PjllRaw; \ + Pjll_t Pjll; \ + int offset; \ + Pjv_t PjvRaw; \ + Pjv_t Pjv; \ + Pjv_t Pjvnew; \ + \ + PjvRaw = (Pjv_t) (Pjp->jp_Addr); \ + Pjv = P_JV(PjvRaw); \ + offset = Search((Pjll_t) (Pjp->jp_LIndex), (OldPop1), Index); \ + JU_CHECK_IF_EXISTS(offset, Pjv, Pjpm); \ + \ + if ((PjllRaw = Alloc((OldPop1) + 1, Pjpm)) == 0) \ + return(-1); \ + Pjll = P_JLL(PjllRaw); \ + InsertCopy((LeafType) Pjll, (LeafType) (Pjp->jp_LIndex), \ + OldPop1, offset, Index); \ + DBGCODE(JudyCheckSorted(Pjll, (OldPop1) + 1, cIS);) \ + \ + Pjvnew = ValueArea(Pjll, (OldPop1) + 1); \ + JU_INSERTCOPY(Pjvnew, Pjv, OldPop1, offset, 0); \ + j__udyLFreeJV(PjvRaw, (OldPop1), Pjpm); \ + Pjpm->jpm_PValue = Pjvnew + offset; \ + \ + D_P0 = (Index & cJU_DCDMASK(cIS)) + (OldPop1) - 1; \ + JU_JPSETADT(Pjp, (Word_t)PjllRaw, D_P0, NewJPType); \ + return(1); \ + } + +#endif // JUDYL + +// Common convenience/shorthand wrappers around JU_IMMSET_01_COPY() for +// even/odd index sizes: + +#define JU_IMMSET_01( cIS, LeafType, NewJPType) \ + JU_IMMSET_01_COPY(cIS, LeafType, NewJPType, JU_IMMSET_01_COPY_EVEN, \ + ignore) + +#define JU_IMMSET_01_ODD( cIS, NewJPType, CopyWord) \ + JU_IMMSET_01_COPY(cIS, uint8_t *, NewJPType, JU_IMMSET_01_COPY_ODD, \ + CopyWord) + + +// END OF MACROS; IMMED CASES START HERE: + +// cJU_JPIMMED_*_01 cases: +// +// 1_01 always leads to 1_02: +// +// (1_01 => 1_02 => 1_03 => [ 1_04 => ... => 1_07 => [ 1_08..15 => ]] LeafL) + + case cJU_JPIMMED_1_01: JU_IMMSET_01(1, uint8_t *, cJU_JPIMMED_1_02); + +// 2_01 leads to 2_02, and 3_01 leads to 3_02, except for JudyL 32-bit, where +// they lead to a leaf: +// +// (2_01 => [ 2_02 => 2_03 => [ 2_04..07 => ]] LeafL) +// (3_01 => [ 3_02 => [ 3_03..05 => ]] LeafL) + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_01: JU_IMMSET_01(2, uint16_t *, cJU_JPIMMED_2_02); + case cJU_JPIMMED_3_01: JU_IMMSET_01_ODD (3, cJU_JPIMMED_3_02, + JU_COPY3_LONG_TO_PINDEX); +#else + case cJU_JPIMMED_2_01: + JU_IMMSET_01_CASCADE(2, uint16_t *, cJU_JPLEAF2, JL_LEAF2VALUEAREA, + JU_IMMSET_01_COPY_EVEN, ignore, + j__udyAllocJLL2); + case cJU_JPIMMED_3_01: + JU_IMMSET_01_CASCADE(3, uint8_t *, cJU_JPLEAF3, JL_LEAF3VALUEAREA, + JU_IMMSET_01_COPY_ODD, + JU_COPY3_LONG_TO_PINDEX, j__udyAllocJLL3); +#endif + +#ifdef JU_64BIT + +// [4-7]_01 lead to [4-7]_02 for Judy1, and to leaves for JudyL: +// +// (4_01 => [[ 4_02..03 => ]] LeafL) +// (5_01 => [[ 5_02..03 => ]] LeafL) +// (6_01 => [[ 6_02 => ]] LeafL) +// (7_01 => [[ 7_02 => ]] LeafL) + +#ifdef JUDY1 + case cJU_JPIMMED_4_01: JU_IMMSET_01(4, uint32_t *, cJ1_JPIMMED_4_02); + case cJU_JPIMMED_5_01: JU_IMMSET_01_ODD(5, cJ1_JPIMMED_5_02, + JU_COPY5_LONG_TO_PINDEX); + case cJU_JPIMMED_6_01: JU_IMMSET_01_ODD(6, cJ1_JPIMMED_6_02, + JU_COPY6_LONG_TO_PINDEX); + case cJU_JPIMMED_7_01: JU_IMMSET_01_ODD(7, cJ1_JPIMMED_7_02, + JU_COPY7_LONG_TO_PINDEX); +#else // JUDYL + case cJU_JPIMMED_4_01: + JU_IMMSET_01_CASCADE(4, uint32_t *, cJU_JPLEAF4, JL_LEAF4VALUEAREA, + JU_IMMSET_01_COPY_EVEN, ignore, + j__udyAllocJLL4); + case cJU_JPIMMED_5_01: + JU_IMMSET_01_CASCADE(5, uint8_t *, cJU_JPLEAF5, JL_LEAF5VALUEAREA, + JU_IMMSET_01_COPY_ODD, + JU_COPY5_LONG_TO_PINDEX, j__udyAllocJLL5); + case cJU_JPIMMED_6_01: + JU_IMMSET_01_CASCADE(6, uint8_t *, cJU_JPLEAF6, JL_LEAF6VALUEAREA, + JU_IMMSET_01_COPY_ODD, + JU_COPY6_LONG_TO_PINDEX, j__udyAllocJLL6); + case cJU_JPIMMED_7_01: + JU_IMMSET_01_CASCADE(7, uint8_t *, cJU_JPLEAF7, JL_LEAF7VALUEAREA, + JU_IMMSET_01_COPY_ODD, + JU_COPY7_LONG_TO_PINDEX, j__udyAllocJLL7); +#endif // JUDYL +#endif // JU_64BIT + +// cJU_JPIMMED_1_* cases that can grow in place: +// +// (1_01 => 1_02 => 1_03 => [ 1_04 => ... => 1_07 => [ 1_08..15 => ]] LeafL) + + case cJU_JPIMMED_1_02: +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_03: + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJU_JPIMMED_1_07: + case cJ1_JPIMMED_1_08: + case cJ1_JPIMMED_1_09: + case cJ1_JPIMMED_1_10: + case cJ1_JPIMMED_1_11: + case cJ1_JPIMMED_1_12: + case cJ1_JPIMMED_1_13: + case cJ1_JPIMMED_1_14: +#endif + JU_IMMSETINPLACE(1, uint8_t *, cJU_JPIMMED_1_02, j__udySearchLeaf1, + JU_INSERTINPLACE); + +// cJU_JPIMMED_1_* cases that must cascade: +// +// (1_01 => 1_02 => 1_03 => [ 1_04 => ... => 1_07 => [ 1_08..15 => ]] LeafL) + +#if (defined(JUDYL) && (! defined(JU_64BIT))) + case cJU_JPIMMED_1_03: + JU_IMMSETCASCADE(1, 3, uint8_t *, cJU_JPLEAF1, JL_LEAF1VALUEAREA, + j__udySearchLeaf1, JU_INSERTCOPY, + j__udyAllocJLL1); +#endif +#if (defined(JUDY1) && (! defined(JU_64BIT))) + case cJU_JPIMMED_1_07: + JU_IMMSETCASCADE(1, 7, uint8_t *, cJU_JPLEAF1, ignore, + j__udySearchLeaf1, JU_INSERTCOPY, + j__udyAllocJLL1); + +#endif +#if (defined(JUDYL) && defined(JU_64BIT)) + case cJU_JPIMMED_1_07: + JU_IMMSETCASCADE(1, 7, uint8_t *, cJU_JPLEAF1, JL_LEAF1VALUEAREA, + j__udySearchLeaf1, JU_INSERTCOPY, + j__udyAllocJLL1); + +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) +// Special case, as described above, go directly from Immed to LeafB1: + + case cJ1_JPIMMED_1_15: + { + Word_t DcdP0; + int offset; + Pjlb_t PjlbRaw; + Pjlb_t Pjlb; + + offset = j__udySearchLeaf1((Pjll_t) Pjp->jp_1Index, 15, Index); + + JU_CHECK_IF_EXISTS(offset, ignore, Pjpm); + +// Create a bitmap leaf (special case for Judy1 64-bit only, see usage): Set +// new Index in bitmap, copy an Immed1_15 to the bitmap, and set the parent JP +// EXCEPT jp_DcdPopO, leaving any followup to the caller: + + if ((PjlbRaw = j__udyAllocJLB1(Pjpm)) == (Pjlb_t) NULL) + return(-1); + Pjlb = P_JLB(PjlbRaw); + + JU_BITMAPSETL(Pjlb, Index); + + for (offset = 0; offset < 15; ++offset) + JU_BITMAPSETL(Pjlb, Pjp->jp_1Index[offset]); + +// Set jp_DcdPopO including the current pop0; incremented later: + DcdP0 = (Index & cJU_DCDMASK(1)) + 15 - 1; + JU_JPSETADT(Pjp, (Word_t)PjlbRaw, DcdP0, cJU_JPLEAF_B1); + + return(1); + } +#endif + +// cJU_JPIMMED_[2..7]_[02..15] cases that grow in place or cascade: +// +// (2_01 => [ 2_02 => 2_03 => [ 2_04..07 => ]] LeafL) + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJU_JPIMMED_2_03: + case cJ1_JPIMMED_2_04: + case cJ1_JPIMMED_2_05: + case cJ1_JPIMMED_2_06: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + JU_IMMSETINPLACE(2, uint16_t *, cJU_JPIMMED_2_02, j__udySearchLeaf2, + JU_INSERTINPLACE); +#endif + +#undef OLDPOP1 +#if ((defined(JUDY1) && (! defined(JU_64BIT))) || (defined(JUDYL) && defined(JU_64BIT))) + case cJU_JPIMMED_2_03: +#define OLDPOP1 3 +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_07: +#define OLDPOP1 7 +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + JU_IMMSETCASCADE(2, OLDPOP1, uint16_t *, cJU_JPLEAF2, + JL_LEAF2VALUEAREA, j__udySearchLeaf2, + JU_INSERTCOPY, j__udyAllocJLL2); +#endif + +// (3_01 => [ 3_02 => [ 3_03..05 => ]] LeafL) + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJU_JPIMMED_3_02: + case cJ1_JPIMMED_3_03: + case cJ1_JPIMMED_3_04: + + JU_IMMSETINPLACE(3, uint8_t *, cJU_JPIMMED_3_02, j__udySearchLeaf3, + JU_INSERTINPLACE3); +#endif + +#undef OLDPOP1 +#if ((defined(JUDY1) && (! defined(JU_64BIT))) || (defined(JUDYL) && defined(JU_64BIT))) + case cJU_JPIMMED_3_02: +#define OLDPOP1 2 +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_05: +#define OLDPOP1 5 +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + JU_IMMSETCASCADE(3, OLDPOP1, uint8_t *, cJU_JPLEAF3, + JL_LEAF3VALUEAREA, j__udySearchLeaf3, + JU_INSERTCOPY3, j__udyAllocJLL3); +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + +// (4_01 => [[ 4_02..03 => ]] LeafL) + + case cJ1_JPIMMED_4_02: + + JU_IMMSETINPLACE(4, uint32_t *, cJ1_JPIMMED_4_02, j__udySearchLeaf4, + JU_INSERTINPLACE); + + case cJ1_JPIMMED_4_03: + + JU_IMMSETCASCADE(4, 3, uint32_t *, cJU_JPLEAF4, ignore, + j__udySearchLeaf4, JU_INSERTCOPY, + j__udyAllocJLL4); + +// (5_01 => [[ 5_02..03 => ]] LeafL) + + case cJ1_JPIMMED_5_02: + + JU_IMMSETINPLACE(5, uint8_t *, cJ1_JPIMMED_5_02, j__udySearchLeaf5, + JU_INSERTINPLACE5); + + case cJ1_JPIMMED_5_03: + + JU_IMMSETCASCADE(5, 3, uint8_t *, cJU_JPLEAF5, ignore, + j__udySearchLeaf5, JU_INSERTCOPY5, + j__udyAllocJLL5); + +// (6_01 => [[ 6_02 => ]] LeafL) + + case cJ1_JPIMMED_6_02: + + JU_IMMSETCASCADE(6, 2, uint8_t *, cJU_JPLEAF6, ignore, + j__udySearchLeaf6, JU_INSERTCOPY6, + j__udyAllocJLL6); + +// (7_01 => [[ 7_02 => ]] LeafL) + + case cJ1_JPIMMED_7_02: + + JU_IMMSETCASCADE(7, 2, uint8_t *, cJU_JPLEAF7, ignore, + j__udySearchLeaf7, JU_INSERTCOPY7, + j__udyAllocJLL7); + +#endif // (JUDY1 && JU_64BIT) + + +// **************************************************************************** +// INVALID JP TYPE: + + default: JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); return(-1); + + } // switch on JP type + + { + +#ifdef SUBEXPCOUNTS + +// This code might seem strange here. However it saves some memory read time +// during insert (~70nS) because a pipelined processor does not need to "stall" +// waiting for the memory read to complete. Hope the compiler is not too smart +// or dumb and moves the code down to where it looks like it belongs (below a +// few lines). + + Word_t SubExpCount = 0; // current subexpanse counter. + + if (PSubExp != (PWord_t) NULL) // only if BranchB/U. + SubExpCount = PSubExp[0]; +#endif + +// PROCESS JP -- RECURSIVELY: +// +// For non-Immed JP types, if successful, post-increment the population count +// at this Level. + + retcode = j__udyInsWalk(Pjp, Index, Pjpm); + +// Successful insert, increment JP and subexpanse count: + + if ((JU_JPTYPE(Pjp) < cJU_JPIMMED_1_01) && (retcode == 1)) + { + jp_t JP; + Word_t DcdP0; +#ifdef SUBEXPCOUNTS + +// Note: Pjp must be a pointer to a BranchB/U: + + if (PSubExp != (PWord_t) NULL) PSubExp[0] = SubExpCount + 1; +#endif + + JP = *Pjp; + DcdP0 = JU_JPDCDPOP0(Pjp) + 1; + JU_JPSETADT(Pjp, JP.jp_Addr, DcdP0, JU_JPTYPE(&JP)); + } + } + return(retcode); + +} // j__udyInsWalk() + + +// **************************************************************************** +// J U D Y 1 S E T +// J U D Y L I N S +// +// Main entry point. See the manual entry for details. + +#ifdef JUDY1 +FUNCTION int Judy1Set +#else +FUNCTION PPvoid_t JudyLIns +#endif + ( + PPvoid_t PPArray, // in which to insert. + Word_t Index, // to insert. + PJError_t PJError // optional, for returning error info. + ) +{ +#ifdef JUDY1 +#define Pjv ignore // placeholders for macros. +#define Pjvnew ignore +#else + Pjv_t Pjv; // value area in old leaf. + Pjv_t Pjvnew; // value area in new leaf. +#endif + Pjpm_t Pjpm; // array-global info. + int offset; // position in which to store new Index. + Pjlw_t Pjlw; + + +// CHECK FOR NULL POINTER (error by caller): + + if (PPArray == (PPvoid_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPPARRAY); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + Pjlw = P_JLW(*PPArray); // first word of leaf. + +// **************************************************************************** +// PROCESS TOP LEVEL "JRP" BRANCHES AND LEAVES: + +// **************************************************************************** +// JRPNULL (EMPTY ARRAY): BUILD A LEAFW WITH ONE INDEX: + +// if a valid empty array (null pointer), so create an array of population == 1: + + if (Pjlw == (Pjlw_t)NULL) + { + Pjlw_t Pjlwnew; + + Pjlwnew = j__udyAllocJLW(1); + JUDY1CODE(JU_CHECKALLOC(Pjlw_t, Pjlwnew, JERRI );) + JUDYLCODE(JU_CHECKALLOC(Pjlw_t, Pjlwnew, PPJERR);) + + Pjlwnew[0] = 1 - 1; // pop0 = 0. + Pjlwnew[1] = Index; + + *PPArray = (Pvoid_t) Pjlwnew; + DBGCODE(JudyCheckPop(*PPArray);) + + JUDY1CODE(return(1); ) + JUDYLCODE(Pjlwnew[2] = 0; ) // value area. + JUDYLCODE(return((PPvoid_t) (Pjlwnew + 2)); ) + + } // NULL JRP + +// **************************************************************************** +// LEAFW, OTHER SIZE: + + if (JU_LEAFW_POP0(*PPArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlwnew; + Word_t pop1; + + Pjlw = P_JLW(*PPArray); // first word of leaf. + pop1 = Pjlw[0] + 1; + +#ifdef JUDYL + Pjv = JL_LEAFWVALUEAREA(Pjlw, pop1); +#endif + offset = j__udySearchLeafW(Pjlw + 1, pop1, Index); + + if (offset >= 0) // index is already valid: + { + DBGCODE(JudyCheckPop(*PPArray);) + JUDY1CODE(return(0); ) + JUDYLCODE(return((PPvoid_t) (Pjv + offset)); ) + } + + offset = ~offset; + +// Insert index in cases where no new memory is needed: + + if (JU_LEAFWGROWINPLACE(pop1)) + { + ++Pjlw[0]; // increase population. + + JU_INSERTINPLACE(Pjlw + 1, pop1, offset, Index); +#ifdef JUDYL + JU_INSERTINPLACE(Pjv, pop1, offset, 0); +#endif + DBGCODE(JudyCheckPop(*PPArray);) + DBGCODE(JudyCheckSorted(Pjlw + 1, pop1 + 1, cJU_ROOTSTATE);) + + JUDY1CODE(return(1); ) + JUDYLCODE(return((PPvoid_t) (Pjv + offset)); ) + } + +// Insert index into a new, larger leaf: + + if (pop1 < cJU_LEAFW_MAXPOP1) // can grow to a larger leaf. + { + Pjlwnew = j__udyAllocJLW(pop1 + 1); + JUDY1CODE(JU_CHECKALLOC(Pjlw_t, Pjlwnew, JERRI );) + JUDYLCODE(JU_CHECKALLOC(Pjlw_t, Pjlwnew, PPJERR);) + + Pjlwnew[0] = pop1; // set pop0 in new leaf. + + JU_INSERTCOPY(Pjlwnew + 1, Pjlw + 1, pop1, offset, Index); +#ifdef JUDYL + Pjvnew = JL_LEAFWVALUEAREA(Pjlwnew, pop1 + 1); + JU_INSERTCOPY(Pjvnew, Pjv, pop1, offset, 0); +#endif + DBGCODE(JudyCheckSorted(Pjlwnew + 1, pop1 + 1, cJU_ROOTSTATE);) + + j__udyFreeJLW(Pjlw, pop1, NULL); + + *PPArray = (Pvoid_t) Pjlwnew; + DBGCODE(JudyCheckPop(*PPArray);) + + JUDY1CODE(return(1); ) + JUDYLCODE(return((PPvoid_t) (Pjvnew + offset)); ) + } + + assert(pop1 == cJU_LEAFW_MAXPOP1); + +// Leaf at max size => cannot insert new index, so cascade instead: +// +// Upon cascading from a LEAFW leaf to the first branch, must allocate and +// initialize a JPM. + + Pjpm = j__udyAllocJPM(); + JUDY1CODE(JU_CHECKALLOC(Pjpm_t, Pjpm, JERRI );) + JUDYLCODE(JU_CHECKALLOC(Pjpm_t, Pjpm, PPJERR);) + + (Pjpm->jpm_Pop0) = cJU_LEAFW_MAXPOP1 - 1; + (Pjpm->jpm_JP.jp_Addr) = (Word_t) Pjlw; + + if (j__udyCascadeL(&(Pjpm->jpm_JP), Pjpm) == -1) + { + JU_COPY_ERRNO(PJError, Pjpm); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + +// Note: No need to pass Pjpm for memory decrement; LEAFW memory is never +// counted in a JPM at all: + + j__udyFreeJLW(Pjlw, cJU_LEAFW_MAXPOP1, NULL); + *PPArray = (Pvoid_t) Pjpm; + + } // JU_LEAFW + +// **************************************************************************** +// BRANCH: + + { + int retcode; // really only needed for Judy1, but free for JudyL. + + Pjpm = P_JPM(*PPArray); + retcode = j__udyInsWalk(&(Pjpm->jpm_JP), Index, Pjpm); + + if (retcode == -1) + { + JU_COPY_ERRNO(PJError, Pjpm); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + if (retcode == 1) ++(Pjpm->jpm_Pop0); // incr total array popu. + + assert(((Pjpm->jpm_JP.jp_Type) == cJU_JPBRANCH_L) + || ((Pjpm->jpm_JP.jp_Type) == cJU_JPBRANCH_B) + || ((Pjpm->jpm_JP.jp_Type) == cJU_JPBRANCH_U)); + DBGCODE(JudyCheckPop(*PPArray);) + +#ifdef JUDY1 + assert((retcode == 0) || (retcode == 1)); + return(retcode); // == JU_RET_*_JPM(). +#else + assert(Pjpm->jpm_PValue != (Pjv_t) NULL); + return((PPvoid_t) Pjpm->jpm_PValue); +#endif + } + /*NOTREACHED*/ + +} // Judy1Set() / JudyLIns() diff --git a/libnetdata/libjudy/src/JudyL/JudyLInsArray.c b/libnetdata/libjudy/src/JudyL/JudyLInsArray.c new file mode 100644 index 0000000..f8e361f --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLInsArray.c @@ -0,0 +1,1178 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// TBD: It would probably be faster for the caller if the JudyL version took +// PIndex as an interleaved array of indexes and values rather than just +// indexes with a separate values array (PValue), especially considering +// indexes and values are copied here with for-loops anyway and not the +// equivalent of memcpy(). All code could be revised to simply count by two +// words for JudyL? Supports "streaming" the data to/from disk better later? +// In which case get rid of JU_ERRNO_NULLPVALUE, no longer needed, and simplify +// the API to this code. +// _________________ + +// @(#) $Revision: 4.21 $ $Source: /judy/src/JudyCommon/JudyInsArray.c $ +// +// Judy1SetArray() and JudyLInsArray() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +DBGCODE(extern void JudyCheckPop(Pvoid_t PArray);) + + +// IMMED AND LEAF SIZE AND BRANCH TYPE ARRAYS: +// +// These support fast and easy lookup by level. + +static uint8_t immed_maxpop1[] = { + 0, + cJU_IMMED1_MAXPOP1, + cJU_IMMED2_MAXPOP1, + cJU_IMMED3_MAXPOP1, +#ifdef JU_64BIT + cJU_IMMED4_MAXPOP1, + cJU_IMMED5_MAXPOP1, + cJU_IMMED6_MAXPOP1, + cJU_IMMED7_MAXPOP1, +#endif + // note: There are no IMMEDs for whole words. +}; + +static uint8_t leaf_maxpop1[] = { + 0, +#if (defined(JUDYL) || (! defined(JU_64BIT))) + cJU_LEAF1_MAXPOP1, +#else + 0, // 64-bit Judy1 has no Leaf1. +#endif + cJU_LEAF2_MAXPOP1, + cJU_LEAF3_MAXPOP1, +#ifdef JU_64BIT + cJU_LEAF4_MAXPOP1, + cJU_LEAF5_MAXPOP1, + cJU_LEAF6_MAXPOP1, + cJU_LEAF7_MAXPOP1, +#endif + // note: Root-level leaves are handled differently. +}; + +static uint8_t branchL_JPtype[] = { + 0, + 0, + cJU_JPBRANCH_L2, + cJU_JPBRANCH_L3, +#ifdef JU_64BIT + cJU_JPBRANCH_L4, + cJU_JPBRANCH_L5, + cJU_JPBRANCH_L6, + cJU_JPBRANCH_L7, +#endif + cJU_JPBRANCH_L, +}; + +static uint8_t branchB_JPtype[] = { + 0, + 0, + cJU_JPBRANCH_B2, + cJU_JPBRANCH_B3, +#ifdef JU_64BIT + cJU_JPBRANCH_B4, + cJU_JPBRANCH_B5, + cJU_JPBRANCH_B6, + cJU_JPBRANCH_B7, +#endif + cJU_JPBRANCH_B, +}; + +static uint8_t branchU_JPtype[] = { + 0, + 0, + cJU_JPBRANCH_U2, + cJU_JPBRANCH_U3, +#ifdef JU_64BIT + cJU_JPBRANCH_U4, + cJU_JPBRANCH_U5, + cJU_JPBRANCH_U6, + cJU_JPBRANCH_U7, +#endif + cJU_JPBRANCH_U, +}; + +// Subexpanse masks are similer to JU_DCDMASK() but without the need to clear +// the first digits bits. Avoid doing variable shifts by precomputing a +// lookup array. + +static Word_t subexp_mask[] = { + 0, + ~cJU_POP0MASK(1), + ~cJU_POP0MASK(2), + ~cJU_POP0MASK(3), +#ifdef JU_64BIT + ~cJU_POP0MASK(4), + ~cJU_POP0MASK(5), + ~cJU_POP0MASK(6), + ~cJU_POP0MASK(7), +#endif +}; + + +// FUNCTION PROTOTYPES: + +static bool_t j__udyInsArray(Pjp_t PjpParent, int Level, PWord_t PPop1, + PWord_t PIndex, +#ifdef JUDYL + Pjv_t PValue, +#endif + Pjpm_t Pjpm); + + +// **************************************************************************** +// J U D Y 1 S E T A R R A Y +// J U D Y L I N S A R R A Y +// +// Main entry point. See the manual entry for external overview. +// +// TBD: Until thats written, note that the function returns 1 for success or +// JERRI for serious error, including insufficient memory to build whole array; +// use Judy*Count() to see how many were stored, the first N of the total +// Count. Also, since it takes Count == Pop1, it cannot handle a full array. +// Also, "sorted" means ascending without duplicates, otherwise you get the +// "unsorted" error. +// +// The purpose of these functions is to allow rapid construction of a large +// Judy array given a sorted list of indexes (and for JudyL, corresponding +// values). At least one customer saw this as useful, and probably it would +// also be useful as a sufficient workaround for fast(er) unload/reload to/from +// disk. +// +// This code is written recursively for simplicity, until/unless someone +// decides to make it faster and more complex. Hopefully recursion is fast +// enough simply because the function is so much faster than a series of +// Set/Ins calls. + +#ifdef JUDY1 +FUNCTION int Judy1SetArray +#else +FUNCTION int JudyLInsArray +#endif + ( + PPvoid_t PPArray, // in which to insert, initially empty. + Word_t Count, // number of indexes (and values) to insert. +const Word_t * const PIndex, // list of indexes to insert. +#ifdef JUDYL +const Word_t * const PValue, // list of corresponding values. +#endif + PJError_t PJError // optional, for returning error info. + ) +{ + Pjlw_t Pjlw; // new root-level leaf. + Pjlw_t Pjlwindex; // first index in root-level leaf. + int offset; // in PIndex. + + +// CHECK FOR NULL OR NON-NULL POINTER (error by caller): + + if (PPArray == (PPvoid_t) NULL) + { JU_SET_ERRNO(PJError, JU_ERRNO_NULLPPARRAY); return(JERRI); } + + if (*PPArray != (Pvoid_t) NULL) + { JU_SET_ERRNO(PJError, JU_ERRNO_NONNULLPARRAY); return(JERRI); } + + if (PIndex == (PWord_t) NULL) + { JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); return(JERRI); } + +#ifdef JUDYL + if (PValue == (PWord_t) NULL) + { JU_SET_ERRNO(PJError, JU_ERRNO_NULLPVALUE); return(JERRI); } +#endif + + +// HANDLE LARGE COUNT (= POP1) (typical case): +// +// Allocate and initialize a JPM, set the root pointer to point to it, and then +// build the tree underneath it. + +// Common code for unusual error handling when no JPM available: + + if (Count > cJU_LEAFW_MAXPOP1) // too big for root-level leaf. + { + Pjpm_t Pjpm; // new, to allocate. + +// Allocate JPM: + + Pjpm = j__udyAllocJPM(); + JU_CHECKALLOC(Pjpm_t, Pjpm, JERRI); + *PPArray = (Pvoid_t) Pjpm; + +// Set some JPM fields: + + (Pjpm->jpm_Pop0) = Count - 1; + // note: (Pjpm->jpm_TotalMemWords) is now initialized. + +// Build Judy tree: +// +// In case of error save the final Count, possibly modified, unless modified to +// 0, in which case free the JPM itself: + + if (! j__udyInsArray(&(Pjpm->jpm_JP), cJU_ROOTSTATE, &Count, + (PWord_t) PIndex, +#ifdef JUDYL + (Pjv_t) PValue, +#endif + Pjpm)) + { + JU_COPY_ERRNO(PJError, Pjpm); + + if (Count) // partial success, adjust pop0: + { + (Pjpm->jpm_Pop0) = Count - 1; + } + else // total failure, free JPM: + { + j__udyFreeJPM(Pjpm, (Pjpm_t) NULL); + *PPArray = (Pvoid_t) NULL; + } + + DBGCODE(JudyCheckPop(*PPArray);) + return(JERRI); + } + + DBGCODE(JudyCheckPop(*PPArray);) + return(1); + + } // large count + + +// HANDLE SMALL COUNT (= POP1): +// +// First ensure indexes are in sorted order: + + for (offset = 1; offset < Count; ++offset) + { + if (PIndex[offset - 1] >= PIndex[offset]) + { JU_SET_ERRNO(PJError, JU_ERRNO_UNSORTED); return(JERRI); } + } + + if (Count == 0) return(1); // *PPArray remains null. + + { + Pjlw = j__udyAllocJLW(Count + 1); + JU_CHECKALLOC(Pjlw_t, Pjlw, JERRI); + *PPArray = (Pvoid_t) Pjlw; + Pjlw[0] = Count - 1; // set pop0. + Pjlwindex = Pjlw + 1; + } + +// Copy whole-word indexes (and values) to the root-level leaf: + + JU_COPYMEM(Pjlwindex, PIndex, Count); +JUDYLCODE(JU_COPYMEM(JL_LEAFWVALUEAREA(Pjlw, Count), PValue, Count)); + + DBGCODE(JudyCheckPop(*PPArray);) + return(1); + +} // Judy1SetArray() / JudyLInsArray() + + +// **************************************************************************** +// __ J U D Y I N S A R R A Y +// +// Given: +// +// - a pointer to a JP +// +// - the JPs level in the tree, that is, the number of digits left to decode +// in the indexes under the JP (one less than the level of the JPM or branch +// in which the JP resides); cJU_ROOTSTATE on first entry (when JP is the one +// in the JPM), down to 1 for a Leaf1, LeafB1, or FullPop +// +// - a pointer to the number of indexes (and corresponding values) to store in +// this subtree, to modify in case of partial success +// +// - a list of indexes (and for JudyL, corresponding values) to store in this +// subtree +// +// - a JPM for tracking memory usage and returning errors +// +// Recursively build a subtree (immediate indexes, leaf, or branch with +// subtrees) and modify the JP accordingly. On the way down, build a BranchU +// (only) for any expanse with *PPop1 too high for a leaf; on the way out, +// convert the BranchU to a BranchL or BranchB if appropriate. Keep memory +// statistics in the JPM. +// +// Return TRUE for success, or FALSE with error information set in the JPM in +// case of error, in which case leave a partially constructed but healthy tree, +// and modify parent population counts on the way out. +// +// Note: Each call of this function makes all modifications to the PjpParent +// it receives; neither the parent nor child calls do this. + +FUNCTION static bool_t j__udyInsArray( + Pjp_t PjpParent, // parent JP in/under which to store. + int Level, // initial digits remaining to decode. + PWord_t PPop1, // number of indexes to store. + PWord_t PIndex, // list of indexes to store. +#ifdef JUDYL + Pjv_t PValue, // list of corresponding values. +#endif + Pjpm_t Pjpm) // for memory and errors. +{ + Pjp_t Pjp; // lower-level JP. + Word_t Pjbany; // any type of branch. + int levelsub; // actual, of Pjps node, <= Level. + Word_t pop1 = *PPop1; // fast local value. + Word_t pop1sub; // population of one subexpanse. + uint8_t JPtype; // current JP type. + uint8_t JPtype_null; // precomputed value for new branch. + jp_t JPnull; // precomputed for speed. + Pjbu_t PjbuRaw; // constructed BranchU. + Pjbu_t Pjbu; + int digit; // in BranchU. + Word_t digitmask; // for a digit in a BranchU. + Word_t digitshifted; // shifted to correct offset. + Word_t digitshincr; // increment for digitshifted. + int offset; // in PIndex, or a bitmap subexpanse. + int numJPs; // number non-null in a BranchU. + bool_t retval; // to return from this func. +JUDYLCODE(Pjv_t PjvRaw); // destination value area. +JUDYLCODE(Pjv_t Pjv); + + +// MACROS FOR COMMON CODE: +// +// Note: These use function and local parameters from the context. +// Note: Assume newly allocated memory is zeroed. + +// Indicate whether a sorted list of indexes in PIndex, based on the first and +// last indexes in the list using pop1, are in the same subexpanse between +// Level and L_evel: +// +// This can be confusing! Note that SAMESUBEXP(L) == TRUE means the indexes +// are the same through level L + 1, and it says nothing about level L and +// lower; they might be the same or they might differ. +// +// Note: In principle SAMESUBEXP needs a mask for the digits from Level, +// inclusive, to L_evel, exclusive. But in practice, since the indexes are all +// known to be identical above Level, it just uses a mask for the digits +// through L_evel + 1; see subexp_mask[]. + +#define SAMESUBEXP(L_evel) \ + (! ((PIndex[0] ^ PIndex[pop1 - 1]) & subexp_mask[L_evel])) + +// Set PjpParent to a null JP appropriate for the level of the node to which it +// points, which is 1 less than the level of the node in which the JP resides, +// which is by definition Level: +// +// Note: This can set the JPMs JP to an invalid jp_Type, but it doesnt +// matter because the JPM is deleted by the caller. + +#define SETJPNULL_PARENT \ + JU_JPSETADT(PjpParent, 0, 0, cJU_JPNULL1 + Level - 1); + +// Variation to set a specified JP (in a branch being built) to a precomputed +// null JP: + +#define SETJPNULL(Pjp) *(Pjp) = JPnull + +// Handle complete (as opposed to partial) memory allocation failure: Set the +// parent JP to an appropriate null type (to leave a consistent tree), zero the +// callers population count, and return FALSE: +// +// Note: At Level == cJU_ROOTSTATE this sets the JPMs JPs jp_Type to a bogus +// value, but it doesnt matter because the JPM should be deleted by the +// caller. + +#define NOMEM { SETJPNULL_PARENT; *PPop1 = 0; return(FALSE); } + +// Allocate a Leaf1-N and save the address in Pjll; in case of failure, NOMEM: + +#define ALLOCLEAF(AllocLeaf) \ + if ((PjllRaw = AllocLeaf(pop1, Pjpm)) == (Pjll_t) NULL) NOMEM; \ + Pjll = P_JLL(PjllRaw); + +// Copy indexes smaller than words (and values which are whole words) from +// given arrays to immediate indexes or a leaf: +// +// TBD: These macros overlap with some of the code in JudyCascade.c; do some +// merging? That file has functions while these are macros. + +#define COPYTOLEAF_EVEN_SUB(Pjll,LeafType) \ + { \ + LeafType * P_leaf = (LeafType *) (Pjll); \ + Word_t p_op1 = pop1; \ + PWord_t P_Index = PIndex; \ + \ + assert(pop1 > 0); \ + \ + do { *P_leaf++ = *P_Index++; /* truncates */\ + } while (--(p_op1)); \ + } + +#define COPYTOLEAF_ODD_SUB(cLevel,Pjll,Copy) \ + { \ + uint8_t * P_leaf = (uint8_t *) (Pjll); \ + Word_t p_op1 = pop1; \ + PWord_t P_Index = PIndex; \ + \ + assert(pop1 > 0); \ + \ + do { \ + Copy(P_leaf, *P_Index); \ + P_leaf += (cLevel); ++P_Index; \ + } while (--(p_op1)); \ + } + +#ifdef JUDY1 + +#define COPYTOLEAF_EVEN(Pjll,LeafType) COPYTOLEAF_EVEN_SUB(Pjll,LeafType) +#define COPYTOLEAF_ODD(cLevel,Pjll,Copy) COPYTOLEAF_ODD_SUB(cLevel,Pjll,Copy) + +#else // JUDYL adds copying of values: + +#define COPYTOLEAF_EVEN(Pjll,LeafType) \ + { \ + COPYTOLEAF_EVEN_SUB(Pjll,LeafType) \ + JU_COPYMEM(Pjv, PValue, pop1); \ + } + +#define COPYTOLEAF_ODD(cLevel,Pjll,Copy) \ + { \ + COPYTOLEAF_ODD_SUB( cLevel,Pjll,Copy) \ + JU_COPYMEM(Pjv, PValue, pop1); \ + } + +#endif + +// Set the JP type for an immediate index, where BaseJPType is JPIMMED_*_02: + +#define SETIMMTYPE(BaseJPType) (PjpParent->jp_Type) = (BaseJPType) + pop1 - 2 + +// Allocate and populate a Leaf1-N: +// +// Build MAKELEAF_EVEN() and MAKELEAF_ODD() using macros for common code. + +#define MAKELEAF_SUB1(AllocLeaf,ValueArea,LeafType) \ + ALLOCLEAF(AllocLeaf); \ + JUDYLCODE(Pjv = ValueArea(Pjll, pop1)) + + +#define MAKELEAF_SUB2(cLevel,JPType) \ +{ \ + Word_t D_cdP0; \ + assert(pop1 - 1 <= cJU_POP0MASK(cLevel)); \ + D_cdP0 = (*PIndex & cJU_DCDMASK(cLevel)) | (pop1 - 1); \ + JU_JPSETADT(PjpParent, (Word_t)PjllRaw, D_cdP0, JPType); \ +} + + +#define MAKELEAF_EVEN(cLevel,JPType,AllocLeaf,ValueArea,LeafType) \ + MAKELEAF_SUB1(AllocLeaf,ValueArea,LeafType); \ + COPYTOLEAF_EVEN(Pjll, LeafType); \ + MAKELEAF_SUB2(cLevel, JPType) + +#define MAKELEAF_ODD(cLevel,JPType,AllocLeaf,ValueArea,Copy) \ + MAKELEAF_SUB1(AllocLeaf,ValueArea,LeafType); \ + COPYTOLEAF_ODD(cLevel, Pjll, Copy); \ + MAKELEAF_SUB2(cLevel, JPType) + +// Ensure that the indexes to be stored in immediate indexes or a leaf are +// sorted: +// +// This check is pure overhead, but required in order to protect the Judy array +// against caller error, to avoid a later corruption or core dump from a +// seemingly valid Judy array. Do this check piecemeal at the leaf level while +// the indexes are already in the cache. Higher-level order-checking occurs +// while building branches. +// +// Note: Any sorting error in the expanse of a single immediate indexes JP or +// a leaf => save no indexes in that expanse. + +#define CHECKLEAFORDER \ + { \ + for (offset = 1; offset < pop1; ++offset) \ + { \ + if (PIndex[offset - 1] >= PIndex[offset]) \ + { \ + SETJPNULL_PARENT; \ + *PPop1 = 0; \ + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_UNSORTED); \ + return(FALSE); \ + } \ + } \ + } + + +// ------ START OF CODE ------ + + assert( Level >= 1); + assert( Level <= cJU_ROOTSTATE); + assert((Level < cJU_ROOTSTATE) || (pop1 > cJU_LEAFW_MAXPOP1)); + + +// CHECK FOR TOP LEVEL: +// +// Special case: If at the top level (PjpParent is in the JPM), a top-level +// branch must be created, even if its a BranchL with just one JP. (The JPM +// cannot point to a leaf because the leaf would have to be a lower-level, +// higher-capacity leaf under a narrow pointer (otherwise a root-level leaf +// would suffice), and the JPMs JP cant handle a narrow pointer because the +// jp_DcdPopO field isnt big enough.) Otherwise continue to check for a pop1 +// small enough to support immediate indexes or a leaf before giving up and +// making a lower-level branch. + + if (Level == cJU_ROOTSTATE) + { + levelsub = cJU_ROOTSTATE; + goto BuildBranch2; + } + assert(Level < cJU_ROOTSTATE); + + +// SKIP JPIMMED_*_01: +// +// Immeds with pop1 == 1 should be handled in-line during branch construction. + + assert(pop1 > 1); + + +// BUILD JPIMMED_*_02+: +// +// The starting address of the indexes depends on Judy1 or JudyL; also, JudyL +// includes a pointer to a values-only leaf. + + if (pop1 <= immed_maxpop1[Level]) // note: always < root level. + { + JUDY1CODE(uint8_t * Pjll = (uint8_t *) (PjpParent->jp_1Index);) + JUDYLCODE(uint8_t * Pjll = (uint8_t *) (PjpParent->jp_LIndex);) + + CHECKLEAFORDER; // indexes to be stored are sorted. + +#ifdef JUDYL + if ((PjvRaw = j__udyLAllocJV(pop1, Pjpm)) == (Pjv_t) NULL) + NOMEM; + (PjpParent->jp_Addr) = (Word_t) PjvRaw; + Pjv = P_JV(PjvRaw); +#endif + + switch (Level) + { + case 1: COPYTOLEAF_EVEN(Pjll, uint8_t); + SETIMMTYPE(cJU_JPIMMED_1_02); + break; +#if (defined(JUDY1) || defined(JU_64BIT)) + case 2: COPYTOLEAF_EVEN(Pjll, uint16_t); + SETIMMTYPE(cJU_JPIMMED_2_02); + break; + case 3: COPYTOLEAF_ODD(3, Pjll, JU_COPY3_LONG_TO_PINDEX); + SETIMMTYPE(cJU_JPIMMED_3_02); + break; +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case 4: COPYTOLEAF_EVEN(Pjll, uint32_t); + SETIMMTYPE(cJ1_JPIMMED_4_02); + break; + case 5: COPYTOLEAF_ODD(5, Pjll, JU_COPY5_LONG_TO_PINDEX); + SETIMMTYPE(cJ1_JPIMMED_5_02); + break; + case 6: COPYTOLEAF_ODD(6, Pjll, JU_COPY6_LONG_TO_PINDEX); + SETIMMTYPE(cJ1_JPIMMED_6_02); + break; + case 7: COPYTOLEAF_ODD(7, Pjll, JU_COPY7_LONG_TO_PINDEX); + SETIMMTYPE(cJ1_JPIMMED_7_02); + break; +#endif + default: assert(FALSE); // should be impossible. + } + + return(TRUE); // note: no children => no *PPop1 mods. + + } // JPIMMED_*_02+ + + +// BUILD JPLEAF*: +// +// This code is a little tricky. The method is: For each level starting at +// the present Level down through levelsub = 1, and then as a special case for +// LeafB1 and FullPop (which are also at levelsub = 1 but have different +// capacity, see later), check if pop1 fits in a leaf (using leaf_maxpop1[]) +// at that level. If so, except for Level == levelsub, check if all of the +// current indexes to be stored are in the same (narrow) subexpanse, that is, +// the digits from Level to levelsub + 1, inclusive, are identical between the +// first and last index in the (sorted) list (in PIndex). If this condition is +// satisfied at any level, build a leaf at that level (under a narrow pointer +// if Level > levelsub). +// +// Note: Doing the search in this order results in storing the indexes in +// "least compressed form." + + for (levelsub = Level; levelsub >= 1; --levelsub) + { + Pjll_t PjllRaw; + Pjll_t Pjll; + +// Check if pop1 is too large to fit in a leaf at levelsub; if so, try the next +// lower level: + + if (pop1 > leaf_maxpop1[levelsub]) continue; + +// If pop1 fits in a leaf at levelsub, but levelsub is lower than Level, must +// also check whether all the indexes in the expanse to store can in fact be +// placed under a narrow pointer; if not, a leaf cannot be used, at this or any +// lower level (levelsub): + + if ((levelsub < Level) && (! SAMESUBEXP(levelsub))) + goto BuildBranch; // cant use a narrow, need a branch. + +// Ensure valid pop1 and all indexes are in fact common through Level: + + assert(pop1 <= cJU_POP0MASK(Level) + 1); + assert(! ((PIndex[0] ^ PIndex[pop1 - 1]) & cJU_DCDMASK(Level))); + + CHECKLEAFORDER; // indexes to be stored are sorted. + +// Build correct type of leaf: +// +// Note: The jp_DcdPopO and jp_Type assignments in MAKELEAF_* happen correctly +// for the levelsub (not Level) of the new leaf, even if its under a narrow +// pointer. + + switch (levelsub) + { +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case 1: MAKELEAF_EVEN(1, cJU_JPLEAF1, j__udyAllocJLL1, + JL_LEAF1VALUEAREA, uint8_t); + break; +#endif + case 2: MAKELEAF_EVEN(2, cJU_JPLEAF2, j__udyAllocJLL2, + JL_LEAF2VALUEAREA, uint16_t); + break; + case 3: MAKELEAF_ODD( 3, cJU_JPLEAF3, j__udyAllocJLL3, + JL_LEAF3VALUEAREA, JU_COPY3_LONG_TO_PINDEX); + break; +#ifdef JU_64BIT + case 4: MAKELEAF_EVEN(4, cJU_JPLEAF4, j__udyAllocJLL4, + JL_LEAF4VALUEAREA, uint32_t); + break; + case 5: MAKELEAF_ODD( 5, cJU_JPLEAF5, j__udyAllocJLL5, + JL_LEAF5VALUEAREA, JU_COPY5_LONG_TO_PINDEX); + break; + case 6: MAKELEAF_ODD( 6, cJU_JPLEAF6, j__udyAllocJLL6, + JL_LEAF6VALUEAREA, JU_COPY6_LONG_TO_PINDEX); + break; + case 7: MAKELEAF_ODD( 7, cJU_JPLEAF7, j__udyAllocJLL7, + JL_LEAF7VALUEAREA, JU_COPY7_LONG_TO_PINDEX); + break; +#endif + default: assert(FALSE); // should be impossible. + } + + return(TRUE); // note: no children => no *PPop1 mods. + + } // JPLEAF* + + +// BUILD JPLEAF_B1 OR JPFULLPOPU1: +// +// See above about JPLEAF*. If pop1 doesnt fit in any level of linear leaf, +// it might still fit in a LeafB1 or FullPop, perhaps under a narrow pointer. + + if ((Level == 1) || SAMESUBEXP(1)) // same until last digit. + { + Pjlb_t PjlbRaw; // for bitmap leaf. + Pjlb_t Pjlb; + + assert(pop1 <= cJU_JPFULLPOPU1_POP0 + 1); + CHECKLEAFORDER; // indexes to be stored are sorted. + +#ifdef JUDY1 + +// JPFULLPOPU1: + + if (pop1 == cJU_JPFULLPOPU1_POP0 + 1) + { + Word_t Addr = PjpParent->jp_Addr; + Word_t DcdP0 = (*PIndex & cJU_DCDMASK(1)) + | cJU_JPFULLPOPU1_POP0; + JU_JPSETADT(PjpParent, Addr, DcdP0, cJ1_JPFULLPOPU1); + + return(TRUE); + } +#endif + +// JPLEAF_B1: + + if ((PjlbRaw = j__udyAllocJLB1(Pjpm)) == (Pjlb_t) NULL) + NOMEM; + Pjlb = P_JLB(PjlbRaw); + + for (offset = 0; offset < pop1; ++offset) + JU_BITMAPSETL(Pjlb, PIndex[offset]); + + retval = TRUE; // default. + +#ifdef JUDYL + +// Build subexpanse values-only leaves (LeafVs) under LeafB1: + + for (offset = 0; offset < cJU_NUMSUBEXPL; ++offset) + { + if (! (pop1sub = j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, offset)))) + continue; // skip empty subexpanse. + +// Allocate one LeafV = JP subarray; if out of memory, clear bitmaps for higher +// subexpanses and adjust *PPop1: + + if ((PjvRaw = j__udyLAllocJV(pop1sub, Pjpm)) + == (Pjv_t) NULL) + { + for (/* null */; offset < cJU_NUMSUBEXPL; ++offset) + { + *PPop1 -= j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, offset)); + JU_JLB_BITMAP(Pjlb, offset) = 0; + } + + retval = FALSE; + break; + } + +// Populate values-only leaf and save the pointer to it: + + Pjv = P_JV(PjvRaw); + JU_COPYMEM(Pjv, PValue, pop1sub); + JL_JLB_PVALUE(Pjlb, offset) = PjvRaw; // first-tier pointer. + PValue += pop1sub; + + } // for each subexpanse + +#endif // JUDYL + +// Attach new LeafB1 to parent JP; note use of *PPop1 possibly < pop1: + + JU_JPSETADT(PjpParent, (Word_t) PjlbRaw, + (*PIndex & cJU_DCDMASK(1)) | (*PPop1 - 1), cJU_JPLEAF_B1); + + return(retval); + + } // JPLEAF_B1 or JPFULLPOPU1 + + +// BUILD JPBRANCH_U*: +// +// Arriving at BuildBranch means Level < top level but the pop1 is too large +// for immediate indexes or a leaf, even under a narrow pointer, including a +// LeafB1 or FullPop at level 1. This implies SAMESUBEXP(1) == FALSE, that is, +// the indexes to be stored "branch" at level 2 or higher. + +BuildBranch: // come here directly if a leaf wont work. + + assert(Level >= 2); + assert(Level < cJU_ROOTSTATE); + assert(! SAMESUBEXP(1)); // sanity check, see above. + +// Determine the appropriate level for a new branch node; see if a narrow +// pointer can be used: +// +// This can be confusing. The branch is required at the lowest level L where +// the indexes to store are not in the same subexpanse at level L-1. Work down +// from Level to tree level 3, which is 1 above the lowest tree level = 2 at +// which a branch can be used. Theres no need to check SAMESUBEXP at level 2 +// because its known to be false at level 2-1 = 1. +// +// Note: Unlike for a leaf node, a narrow pointer is always used for a branch +// if possible, that is, maximum compression is always used, except at the top +// level of the tree, where a JPM cannot support a narrow pointer, meaning a +// top BranchL can have a single JP (fanout = 1); but that case jumps directly +// to BuildBranch2. +// +// Note: For 32-bit systems the only usable values for a narrow pointer are +// Level = 3 and levelsub = 2; 64-bit systems have many more choices; but +// hopefully this for-loop is fast enough even on a 32-bit system. +// +// TBD: If not fast enough, #ifdef JU_64BIT and handle the 32-bit case faster. + + for (levelsub = Level; levelsub >= 3; --levelsub) // see above. + if (! SAMESUBEXP(levelsub - 1)) // at limit of narrow pointer. + break; // put branch at levelsub. + +BuildBranch2: // come here directly for Level = levelsub = cJU_ROOTSTATE. + + assert(levelsub >= 2); + assert(levelsub <= Level); + +// Initially build a BranchU: +// +// Always start with a BranchU because the number of populated subexpanses is +// not yet known. Use digitmask, digitshifted, and digitshincr to avoid +// expensive variable shifts within JU_DIGITATSTATE within the loop. +// +// TBD: The use of digitmask, etc. results in more increment operations per +// loop, is there an even faster way? +// +// TBD: Would it pay to pre-count the populated JPs (subexpanses) and +// pre-compress the branch, that is, build a BranchL or BranchB immediately, +// also taking account of opportunistic uncompression rules? Probably not +// because at high levels of the tree there might be huge numbers of indexes +// (hence cache lines) to scan in the PIndex array to determine the fanout +// (number of JPs) needed. + + if ((PjbuRaw = j__udyAllocJBU(Pjpm)) == (Pjbu_t) NULL) NOMEM; + Pjbu = P_JBU(PjbuRaw); + + JPtype_null = cJU_JPNULL1 + levelsub - 2; // in new BranchU. + JU_JPSETADT(&JPnull, 0, 0, JPtype_null); + + Pjp = Pjbu->jbu_jp; // for convenience in loop. + numJPs = 0; // non-null in the BranchU. + digitmask = cJU_MASKATSTATE(levelsub); // see above. + digitshincr = 1UL << (cJU_BITSPERBYTE * (levelsub - 1)); + retval = TRUE; + +// Scan and populate JPs (subexpanses): +// +// Look for all indexes matching each digit in the BranchU (at the correct +// levelsub), and meanwhile notice any sorting error. Increment PIndex (and +// PValue) and reduce pop1 for each subexpanse handled successfully. + + for (digit = digitshifted = 0; + digit < cJU_BRANCHUNUMJPS; + ++digit, digitshifted += digitshincr, ++Pjp) + { + DBGCODE(Word_t pop1subprev;) + assert(pop1 != 0); // end of indexes is handled elsewhere. + +// Count indexes in digits subexpanse: + + for (pop1sub = 0; pop1sub < pop1; ++pop1sub) + if (digitshifted != (PIndex[pop1sub] & digitmask)) break; + +// Empty subexpanse (typical, performance path) or sorting error (rare): + + if (pop1sub == 0) + { + if (digitshifted < (PIndex[0] & digitmask)) + { SETJPNULL(Pjp); continue; } // empty subexpanse. + + assert(pop1 < *PPop1); // did save >= 1 index and decr pop1. + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_UNSORTED); + goto AbandonBranch; + } + +// Non-empty subexpanse: +// +// First shortcut by handling pop1sub == 1 (JPIMMED_*_01) inline locally. + + if (pop1sub == 1) // note: can be at root level. + { + Word_t Addr = 0; + JUDYLCODE(Addr = (Word_t) (*PValue++);) + JU_JPSETADT(Pjp, Addr, *PIndex, cJU_JPIMMED_1_01 + levelsub -2); + + ++numJPs; + + if (--pop1) { ++PIndex; continue; } // more indexes to store. + + ++digit; ++Pjp; // skip JP just saved. + goto ClearBranch; // save time. + } + +// Recurse to populate one digits (subexpanses) JP; if successful, skip +// indexes (and values) just stored (performance path), except when expanse is +// completely stored: + + DBGCODE(pop1subprev = pop1sub;) + + if (j__udyInsArray(Pjp, levelsub - 1, &pop1sub, (PWord_t) PIndex, +#ifdef JUDYL + (Pjv_t) PValue, +#endif + Pjpm)) + { // complete success. + ++numJPs; + assert(pop1subprev == pop1sub); + assert(pop1 >= pop1sub); + + if ((pop1 -= pop1sub) != 0) // more indexes to store: + { + PIndex += pop1sub; // skip indexes just stored. + JUDYLCODE(PValue += pop1sub;) + continue; + } + // else leave PIndex in BranchUs expanse. + +// No more indexes to store in BranchUs expanse: + + ++digit; ++Pjp; // skip JP just saved. + goto ClearBranch; // save time. + } + +// Handle any error at a lower level of recursion: +// +// In case of partial success, pop1sub != 0, but it was reduced from the value +// passed to j__udyInsArray(); skip this JP later during ClearBranch. + + assert(pop1subprev > pop1sub); // check j__udyInsArray(). + assert(pop1 > pop1sub); // check j__udyInsArray(). + + if (pop1sub) // partial success. + { ++digit; ++Pjp; ++numJPs; } // skip JP just saved. + + pop1 -= pop1sub; // deduct saved indexes if any. + +// Same-level sorting error, or any lower-level error; abandon the rest of the +// branch: +// +// Arrive here with pop1 = remaining unsaved indexes (always non-zero). Adjust +// the *PPop1 value to record and return, modify retval, and use ClearBranch to +// finish up. + +AbandonBranch: + assert(pop1 != 0); // more to store, see above. + assert(pop1 <= *PPop1); // sanity check. + + *PPop1 -= pop1; // deduct unsaved indexes. + pop1 = 0; // to avoid error later. + retval = FALSE; + +// Error (rare), or end of indexes while traversing new BranchU (performance +// path); either way, mark the remaining JPs, if any, in the BranchU as nulls +// and exit the loop: +// +// Arrive here with digit and Pjp set to the first JP to set to null. + +ClearBranch: + for (/* null */; digit < cJU_BRANCHUNUMJPS; ++digit, ++Pjp) + SETJPNULL(Pjp); + break; // saves one more compare. + + } // for each digit + + +// FINISH JPBRANCH_U*: +// +// Arrive here with a BranchU built under Pjbu, numJPs set, and either: retval +// == TRUE and *PPop1 unmodified, or else retval == FALSE, *PPop1 set to the +// actual number of indexes saved (possibly 0 for complete failure at a lower +// level upon the first call of j__udyInsArray()), and the Judy error set in +// Pjpm. Either way, PIndex points to an index within the expanse just +// handled. + + Pjbany = (Word_t) PjbuRaw; // default = use this BranchU. + JPtype = branchU_JPtype[levelsub]; + +// Check for complete failure above: + + assert((! retval) || *PPop1); // sanity check. + + if ((! retval) && (*PPop1 == 0)) // nothing stored, full failure. + { + j__udyFreeJBU(PjbuRaw, Pjpm); + SETJPNULL_PARENT; + return(FALSE); + } + +// Complete or partial success so far; watch for sorting error after the +// maximum digit (255) in the BranchU, which is indicated by having more +// indexes to store in the BranchUs expanse: +// +// For example, if an index to store has a digit of 255 at levelsub, followed +// by an index with a digit of 254, the for-loop above runs out of digits +// without reducing pop1 to 0. + + if (pop1 != 0) + { + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_UNSORTED); + *PPop1 -= pop1; // deduct unsaved indexes. + retval = FALSE; + } + assert(*PPop1 != 0); // branch (still) cannot be empty. + + +// OPTIONALLY COMPRESS JPBRANCH_U*: +// +// See if the BranchU should be compressed to a BranchL or BranchB; if so, do +// that and free the BranchU; otherwise just use the existing BranchU. Follow +// the same rules as in JudyIns.c (version 4.95): Only check local population +// (cJU_OPP_UNCOMP_POP0) for BranchL, and only check global memory efficiency +// (JU_OPP_UNCOMPRESS) for BranchB. TBD: Have the rules changed? +// +// Note: Because of differing order of operations, the latter compression +// might not result in the same set of branch nodes as a series of sequential +// insertions. +// +// Note: Allocating a BranchU only to sometimes convert it to a BranchL or +// BranchB is unfortunate, but attempting to work with a temporary BranchU on +// the stack and then allocate and keep it as a BranchU in many cases is worse +// in terms of error handling. + + +// COMPRESS JPBRANCH_U* TO JPBRANCH_L*: + + if (numJPs <= cJU_BRANCHLMAXJPS) // JPs fit in a BranchL. + { + Pjbl_t PjblRaw = (Pjbl_t) NULL; // new BranchL; init for cc. + Pjbl_t Pjbl; + + if ((*PPop1 > JU_BRANCHL_MAX_POP) // pop too high. + || ((PjblRaw = j__udyAllocJBL(Pjpm)) == (Pjbl_t) NULL)) + { // cant alloc BranchL. + goto SetParent; // just keep BranchU. + } + + Pjbl = P_JBL(PjblRaw); + +// Copy BranchU JPs to BranchL: + + (Pjbl->jbl_NumJPs) = numJPs; + offset = 0; + + for (digit = 0; digit < cJU_BRANCHUNUMJPS; ++digit) + { + if ((((Pjbu->jbu_jp) + digit)->jp_Type) == JPtype_null) + continue; + + (Pjbl->jbl_Expanse[offset ]) = digit; + (Pjbl->jbl_jp [offset++]) = Pjbu->jbu_jp[digit]; + } + assert(offset == numJPs); // found same number. + +// Free the BranchU and prepare to use the new BranchL instead: + + j__udyFreeJBU(PjbuRaw, Pjpm); + + Pjbany = (Word_t) PjblRaw; + JPtype = branchL_JPtype[levelsub]; + + } // compress to BranchL + + +// COMPRESS JPBRANCH_U* TO JPBRANCH_B*: +// +// If unable to allocate the BranchB or any JP subarray, free all related +// memory and just keep the BranchU. +// +// Note: This use of JU_OPP_UNCOMPRESS is a bit conservative because the +// BranchU is already allocated while the (presumably smaller) BranchB is not, +// the opposite of how its used in single-insert code. + + else + { + Pjbb_t PjbbRaw = (Pjbb_t) NULL; // new BranchB; init for cc. + Pjbb_t Pjbb; + Pjp_t Pjp2; // in BranchU. + + if ((*PPop1 > JU_BRANCHB_MAX_POP) // pop too high. + || ((PjbbRaw = j__udyAllocJBB(Pjpm)) == (Pjbb_t) NULL)) + { // cant alloc BranchB. + goto SetParent; // just keep BranchU. + } + + Pjbb = P_JBB(PjbbRaw); + +// Set bits in bitmap for populated subexpanses: + + Pjp2 = Pjbu->jbu_jp; + + for (digit = 0; digit < cJU_BRANCHUNUMJPS; ++digit) + if ((((Pjbu->jbu_jp) + digit)->jp_Type) != JPtype_null) + JU_BITMAPSETB(Pjbb, digit); + +// Copy non-null JPs to BranchB JP subarrays: + + for (offset = 0; offset < cJU_NUMSUBEXPB; ++offset) + { + Pjp_t PjparrayRaw; + Pjp_t Pjparray; + + if (! (numJPs = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, offset)))) + continue; // skip empty subexpanse. + +// If unable to allocate a JP subarray, free all BranchB memory so far and +// continue to use the BranchU: + + if ((PjparrayRaw = j__udyAllocJBBJP(numJPs, Pjpm)) + == (Pjp_t) NULL) + { + while (offset-- > 0) + { + if (JU_JBB_PJP(Pjbb, offset) == (Pjp_t) NULL) continue; + + j__udyFreeJBBJP(JU_JBB_PJP(Pjbb, offset), + j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, offset)), + Pjpm); + } + j__udyFreeJBB(PjbbRaw, Pjpm); + goto SetParent; // keep BranchU. + } + +// Set one JP subarray pointer and copy the subexpanses JPs to the subarray: +// +// Scan the BranchU for non-null JPs until numJPs JPs are copied. + + JU_JBB_PJP(Pjbb, offset) = PjparrayRaw; + Pjparray = P_JP(PjparrayRaw); + + while (numJPs-- > 0) + { + while ((Pjp2->jp_Type) == JPtype_null) + { + ++Pjp2; + assert(Pjp2 < (Pjbu->jbu_jp) + cJU_BRANCHUNUMJPS); + } + *Pjparray++ = *Pjp2++; + } + } // for each subexpanse + +// Free the BranchU and prepare to use the new BranchB instead: + + j__udyFreeJBU(PjbuRaw, Pjpm); + + Pjbany = (Word_t) PjbbRaw; + JPtype = branchB_JPtype[levelsub]; + + } // compress to BranchB + + +// COMPLETE OR PARTIAL SUCCESS: +// +// Attach new branch (under Pjp, with JPtype) to parent JP; note use of *PPop1, +// possibly reduced due to partial failure. + +SetParent: + (PjpParent->jp_Addr) = Pjbany; + (PjpParent->jp_Type) = JPtype; + + if (Level < cJU_ROOTSTATE) // PjpParent not in JPM: + { + Word_t DcdP0 = (*PIndex & cJU_DCDMASK(levelsub)) | (*PPop1 - 1); + + JU_JPSETADT(PjpParent ,Pjbany, DcdP0, JPtype); + } + + return(retval); + +} // j__udyInsArray() diff --git a/libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c b/libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c new file mode 100644 index 0000000..cfa16bd --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c @@ -0,0 +1,135 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.17 $ $Source: /judy/src/JudyCommon/JudyInsertBranch.c $ + +// BranchL insertion functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +extern int j__udyCreateBranchL(Pjp_t, Pjp_t, uint8_t *, Word_t, Pvoid_t); + + +// **************************************************************************** +// __ J U D Y I N S E R T B R A N C H +// +// Insert 2-element BranchL in between Pjp and Pjp->jp_Addr. +// +// Return -1 if out of memory, otherwise return 1. + +FUNCTION int j__udyInsertBranch( + Pjp_t Pjp, // JP containing narrow pointer. + Word_t Index, // outlier to Pjp. + Word_t BranchLevel, // of what JP points to, mapped from JP type. + Pjpm_t Pjpm) // for global accounting. +{ + jp_t JP2 [2]; + jp_t JP; + Pjp_t PjpNull; + Word_t XorExp; + Word_t Inew, Iold; + Word_t DCDMask; // initially for original BranchLevel. + int Ret; + uint8_t Exp2[2]; + uint8_t DecodeByteN, DecodeByteO; + +// Get the current mask for the DCD digits: + + DCDMask = cJU_DCDMASK(BranchLevel); + +// Obtain Dcd bits that differ between Index and JP, shifted so the +// digit for BranchLevel is the LSB: + + XorExp = ((Index ^ JU_JPDCDPOP0(Pjp)) & (cJU_ALLONES >> cJU_BITSPERBYTE)) + >> (BranchLevel * cJU_BITSPERBYTE); + assert(XorExp); // Index must be an outlier. + +// Count levels between object under narrow pointer and the level at which +// the outlier diverges from it, which is always at least initial +// BranchLevel + 1, to end up with the level (JP type) at which to insert +// the new intervening BranchL: + + do { ++BranchLevel; } while ((XorExp >>= cJU_BITSPERBYTE)); + assert((BranchLevel > 1) && (BranchLevel < cJU_ROOTSTATE)); + +// Get the MSB (highest digit) that differs between the old expanse and +// the new Index to insert: + + DecodeByteO = JU_DIGITATSTATE(JU_JPDCDPOP0(Pjp), BranchLevel); + DecodeByteN = JU_DIGITATSTATE(Index, BranchLevel); + + assert(DecodeByteO != DecodeByteN); + +// Determine sorted order for old expanse and new Index digits: + + if (DecodeByteN > DecodeByteO) { Iold = 0; Inew = 1; } + else { Iold = 1; Inew = 0; } + +// Copy old JP into staging area for new Branch + JP2 [Iold] = *Pjp; + Exp2[Iold] = DecodeByteO; + Exp2[Inew] = DecodeByteN; + +// Create a 2 Expanse Linear branch +// +// Note: Pjp->jp_Addr is set by j__udyCreateBranchL() + + Ret = j__udyCreateBranchL(Pjp, JP2, Exp2, 2, Pjpm); + if (Ret == -1) return(-1); + +// Get Pjp to the NULL of where to do insert + PjpNull = ((P_JBL(Pjp->jp_Addr))->jbl_jp) + Inew; + +// Convert to a cJU_JPIMMED_*_01 at the correct level: +// Build JP and set type below to: cJU_JPIMMED_X_01 + JU_JPSETADT(PjpNull, 0, Index, cJU_JPIMMED_1_01 - 2 + BranchLevel); + +// Return pointer to Value area in cJU_JPIMMED_X_01 + JUDYLCODE(Pjpm->jpm_PValue = (Pjv_t) PjpNull;) + +// The old JP now points to a BranchL that is at higher level. Therefore +// it contains excess DCD bits (in the least significant position) that +// must be removed (zeroed); that is, they become part of the Pop0 +// subfield. Note that the remaining (lower) bytes in the Pop0 field do +// not change. +// +// Take from the old DCDMask, which went "down" to a lower BranchLevel, +// and zero any high bits that are still in the mask at the new, higher +// BranchLevel; then use this mask to zero the bits in jp_DcdPopO: + +// Set old JP to a BranchL at correct level + + Pjp->jp_Type = cJU_JPBRANCH_L2 - 2 + BranchLevel; + DCDMask ^= cJU_DCDMASK(BranchLevel); + DCDMask = ~DCDMask & JU_JPDCDPOP0(Pjp); + JP = *Pjp; + JU_JPSETADT(Pjp, JP.jp_Addr, DCDMask, JP.jp_Type); + + return(1); + +} // j__udyInsertBranch() diff --git a/libnetdata/libjudy/src/JudyL/JudyLMallocIF.c b/libnetdata/libjudy/src/JudyL/JudyLMallocIF.c new file mode 100644 index 0000000..9a7d02f --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLMallocIF.c @@ -0,0 +1,782 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.45 $ $Source: /judy/src/JudyCommon/JudyMallocIF.c $ +// +// Judy malloc/free interface functions for Judy1 and JudyL. +// +// Compile with one of -DJUDY1 or -DJUDYL. +// +// Compile with -DTRACEMI (Malloc Interface) to turn on tracing of malloc/free +// calls at the interface level. (See also TRACEMF in lower-level code.) +// Use -DTRACEMI2 for a terser format suitable for trace analysis. +// +// There can be malloc namespace bits in the LSBs of "raw" addresses from most, +// but not all, of the j__udy*Alloc*() functions; see also JudyPrivate.h. To +// test the Judy code, compile this file with -DMALLOCBITS and use debug flavor +// only (for assertions). This test ensures that (a) all callers properly mask +// the namespace bits out before dereferencing a pointer (or else a core dump +// occurs), and (b) all callers send "raw" (unmasked) addresses to +// j__udy*Free*() calls. +// +// Note: Currently -DDEBUG turns on MALLOCBITS automatically. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +// Set "hidden" global j__uMaxWords to the maximum number of words to allocate +// to any one array (large enough to have a JPM, otherwise j__uMaxWords is +// ignored), to trigger a fake malloc error when the number is exceeded. Note, +// this code is always executed, not #ifdefd, because its virtually free. +// +// Note: To keep the MALLOC macro faster and simpler, set j__uMaxWords to +// MAXINT, not zero, by default. + +Word_t j__uMaxWords = ~0UL; + +// This macro hides the faking of a malloc failure: +// +// Note: To keep this fast, just compare WordsPrev to j__uMaxWords without the +// complexity of first adding WordsNow, meaning the trigger point is not +// exactly where you might assume, but it shouldnt matter. + +#define MALLOC(MallocFunc,WordsPrev,WordsNow) \ + (((WordsPrev) > j__uMaxWords) ? 0UL : MallocFunc(WordsNow)) + +// Clear words starting at address: +// +// Note: Only use this for objects that care; in other cases, it doesnt +// matter if the objects memory is pre-zeroed. + +#define ZEROWORDS(Addr,Words) \ + { \ + Word_t Words__ = (Words); \ + PWord_t Addr__ = (PWord_t) (Addr); \ + while (Words__--) *Addr__++ = 0UL; \ + } + +#ifdef TRACEMI + +// TRACING SUPPORT: +// +// Note: For TRACEMI, use a format for address printing compatible with other +// tracing facilities; in particular, %x not %lx, to truncate the "noisy" high +// part on 64-bit systems. +// +// TBD: The trace macros need fixing for alternate address types. +// +// Note: TRACEMI2 supports trace analysis no matter the underlying malloc/free +// engine used. + +#include + +static Word_t j__udyMemSequence = 0L; // event sequence number. + +#define TRACE_ALLOC5(a,b,c,d,e) (void) printf(a, (b), c, d) +#define TRACE_FREE5( a,b,c,d,e) (void) printf(a, (b), c, d) +#define TRACE_ALLOC6(a,b,c,d,e,f) (void) printf(a, (b), c, d, e) +#define TRACE_FREE6( a,b,c,d,e,f) (void) printf(a, (b), c, d, e) + +#else + +#ifdef TRACEMI2 + +#include + +#define b_pw cJU_BYTESPERWORD + +#define TRACE_ALLOC5(a,b,c,d,e) \ + (void) printf("a %lx %lx %lx\n", (b), (d) * b_pw, e) +#define TRACE_FREE5( a,b,c,d,e) \ + (void) printf("f %lx %lx %lx\n", (b), (d) * b_pw, e) +#define TRACE_ALLOC6(a,b,c,d,e,f) \ + (void) printf("a %lx %lx %lx\n", (b), (e) * b_pw, f) +#define TRACE_FREE6( a,b,c,d,e,f) \ + (void) printf("f %lx %lx %lx\n", (b), (e) * b_pw, f) + +static Word_t j__udyMemSequence = 0L; // event sequence number. + +#else + +#define TRACE_ALLOC5(a,b,c,d,e) // null. +#define TRACE_FREE5( a,b,c,d,e) // null. +#define TRACE_ALLOC6(a,b,c,d,e,f) // null. +#define TRACE_FREE6( a,b,c,d,e,f) // null. + +#endif // ! TRACEMI2 +#endif // ! TRACEMI + + +// MALLOC NAMESPACE SUPPORT: + +#if (defined(DEBUG) && (! defined(MALLOCBITS))) // for now, DEBUG => MALLOCBITS: +#define MALLOCBITS 1 +#endif + +#ifdef MALLOCBITS +#define MALLOCBITS_VALUE 0x3 // bit pattern to use. +#define MALLOCBITS_MASK 0x7 // note: matches mask__ in JudyPrivate.h. + +#define MALLOCBITS_SET( Type,Addr) \ + ((Addr) = (Type) ((Word_t) (Addr) | MALLOCBITS_VALUE)) +#define MALLOCBITS_TEST(Type,Addr) \ + assert((((Word_t) (Addr)) & MALLOCBITS_MASK) == MALLOCBITS_VALUE); \ + ((Addr) = (Type) ((Word_t) (Addr) & ~MALLOCBITS_VALUE)) +#else +#define MALLOCBITS_SET( Type,Addr) // null. +#define MALLOCBITS_TEST(Type,Addr) // null. +#endif + + +// SAVE ERROR INFORMATION IN A Pjpm: +// +// "Small" (invalid) Addr values are used to distinguish overrun and no-mem +// errors. (TBD, non-zero invalid values are no longer returned from +// lower-level functions, that is, JU_ERRNO_OVERRUN is no longer detected.) + +#define J__UDYSETALLOCERROR(Addr) \ + { \ + JU_ERRID(Pjpm) = __LINE__; \ + if ((Word_t) (Addr) > 0) JU_ERRNO(Pjpm) = JU_ERRNO_OVERRUN; \ + else JU_ERRNO(Pjpm) = JU_ERRNO_NOMEM; \ + return(0); \ + } + + +// **************************************************************************** +// ALLOCATION FUNCTIONS: +// +// To help the compiler catch coding errors, each function returns a specific +// object type. +// +// Note: Only j__udyAllocJPM() and j__udyAllocJLW() return multiple values <= +// sizeof(Word_t) to indicate the type of memory allocation failure. Other +// allocation functions convert this failure to a JU_ERRNO. + + +// Note: Unlike other j__udyAlloc*() functions, Pjpms are returned non-raw, +// that is, without malloc namespace or root pointer type bits: + +FUNCTION Pjpm_t j__udyAllocJPM(void) +{ + Word_t Words = (sizeof(jpm_t) + cJU_BYTESPERWORD - 1) / cJU_BYTESPERWORD; + Pjpm_t Pjpm = (Pjpm_t) MALLOC(JudyMalloc, Words, Words); + + assert((Words * cJU_BYTESPERWORD) == sizeof(jpm_t)); + + if ((Word_t) Pjpm > sizeof(Word_t)) + { + ZEROWORDS(Pjpm, Words); + Pjpm->jpm_TotalMemWords = Words; + } + + TRACE_ALLOC5("0x%x %8lu = j__udyAllocJPM(), Words = %lu\n", + Pjpm, j__udyMemSequence++, Words, cJU_LEAFW_MAXPOP1 + 1); + // MALLOCBITS_SET(Pjpm_t, Pjpm); // see above. + return(Pjpm); + +} // j__udyAllocJPM() + + +FUNCTION Pjbl_t j__udyAllocJBL(Pjpm_t Pjpm) +{ + Word_t Words = sizeof(jbl_t) / cJU_BYTESPERWORD; + Pjbl_t PjblRaw = (Pjbl_t) MALLOC(JudyMallocVirtual, + Pjpm->jpm_TotalMemWords, Words); + + assert((Words * cJU_BYTESPERWORD) == sizeof(jbl_t)); + + if ((Word_t) PjblRaw > sizeof(Word_t)) + { + ZEROWORDS(P_JBL(PjblRaw), Words); + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjblRaw); } + + TRACE_ALLOC5("0x%x %8lu = j__udyAllocJBL(), Words = %lu\n", PjblRaw, + j__udyMemSequence++, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjbl_t, PjblRaw); + return(PjblRaw); + +} // j__udyAllocJBL() + + +FUNCTION Pjbb_t j__udyAllocJBB(Pjpm_t Pjpm) +{ + Word_t Words = sizeof(jbb_t) / cJU_BYTESPERWORD; + Pjbb_t PjbbRaw = (Pjbb_t) MALLOC(JudyMallocVirtual, + Pjpm->jpm_TotalMemWords, Words); + + assert((Words * cJU_BYTESPERWORD) == sizeof(jbb_t)); + + if ((Word_t) PjbbRaw > sizeof(Word_t)) + { + ZEROWORDS(P_JBB(PjbbRaw), Words); + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjbbRaw); } + + TRACE_ALLOC5("0x%x %8lu = j__udyAllocJBB(), Words = %lu\n", PjbbRaw, + j__udyMemSequence++, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjbb_t, PjbbRaw); + return(PjbbRaw); + +} // j__udyAllocJBB() + + +FUNCTION Pjp_t j__udyAllocJBBJP(Word_t NumJPs, Pjpm_t Pjpm) +{ + Word_t Words = JU_BRANCHJP_NUMJPSTOWORDS(NumJPs); + Pjp_t PjpRaw; + + PjpRaw = (Pjp_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + if ((Word_t) PjpRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjpRaw); } + + TRACE_ALLOC6("0x%x %8lu = j__udyAllocJBBJP(%lu), Words = %lu\n", PjpRaw, + j__udyMemSequence++, NumJPs, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjp_t, PjpRaw); + return(PjpRaw); + +} // j__udyAllocJBBJP() + + +FUNCTION Pjbu_t j__udyAllocJBU(Pjpm_t Pjpm) +{ + Word_t Words = sizeof(jbu_t) / cJU_BYTESPERWORD; + Pjbu_t PjbuRaw = (Pjbu_t) MALLOC(JudyMallocVirtual, + Pjpm->jpm_TotalMemWords, Words); + + assert((Words * cJU_BYTESPERWORD) == sizeof(jbu_t)); + + if ((Word_t) PjbuRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjbuRaw); } + + TRACE_ALLOC5("0x%x %8lu = j__udyAllocJBU(), Words = %lu\n", PjbuRaw, + j__udyMemSequence++, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjbu_t, PjbuRaw); + return(PjbuRaw); + +} // j__udyAllocJBU() + + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + +FUNCTION Pjll_t j__udyAllocJLL1(Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF1POPTOWORDS(Pop1); + Pjll_t PjllRaw; + + PjllRaw = (Pjll_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + if ((Word_t) PjllRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjllRaw); } + + TRACE_ALLOC6("0x%x %8lu = j__udyAllocJLL1(%lu), Words = %lu\n", PjllRaw, + j__udyMemSequence++, Pop1, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjll_t, PjllRaw); + return(PjllRaw); + +} // j__udyAllocJLL1() + +#endif // (JUDYL || (! JU_64BIT)) + + +FUNCTION Pjll_t j__udyAllocJLL2(Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF2POPTOWORDS(Pop1); + Pjll_t PjllRaw; + + PjllRaw = (Pjll_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + if ((Word_t) PjllRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjllRaw); } + + TRACE_ALLOC6("0x%x %8lu = j__udyAllocJLL2(%lu), Words = %lu\n", PjllRaw, + j__udyMemSequence++, Pop1, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjll_t, PjllRaw); + return(PjllRaw); + +} // j__udyAllocJLL2() + + +FUNCTION Pjll_t j__udyAllocJLL3(Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF3POPTOWORDS(Pop1); + Pjll_t PjllRaw; + + PjllRaw = (Pjll_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + if ((Word_t) PjllRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjllRaw); } + + TRACE_ALLOC6("0x%x %8lu = j__udyAllocJLL3(%lu), Words = %lu\n", PjllRaw, + j__udyMemSequence++, Pop1, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjll_t, PjllRaw); + return(PjllRaw); + +} // j__udyAllocJLL3() + + +#ifdef JU_64BIT + +FUNCTION Pjll_t j__udyAllocJLL4(Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF4POPTOWORDS(Pop1); + Pjll_t PjllRaw; + + PjllRaw = (Pjll_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + if ((Word_t) PjllRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjllRaw); } + + TRACE_ALLOC6("0x%x %8lu = j__udyAllocJLL4(%lu), Words = %lu\n", PjllRaw, + j__udyMemSequence++, Pop1, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjll_t, PjllRaw); + return(PjllRaw); + +} // j__udyAllocJLL4() + + +FUNCTION Pjll_t j__udyAllocJLL5(Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF5POPTOWORDS(Pop1); + Pjll_t PjllRaw; + + PjllRaw = (Pjll_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + if ((Word_t) PjllRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjllRaw); } + + TRACE_ALLOC6("0x%x %8lu = j__udyAllocJLL5(%lu), Words = %lu\n", PjllRaw, + j__udyMemSequence++, Pop1, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjll_t, PjllRaw); + return(PjllRaw); + +} // j__udyAllocJLL5() + + +FUNCTION Pjll_t j__udyAllocJLL6(Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF6POPTOWORDS(Pop1); + Pjll_t PjllRaw; + + PjllRaw = (Pjll_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + if ((Word_t) PjllRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjllRaw); } + + TRACE_ALLOC6("0x%x %8lu = j__udyAllocJLL6(%lu), Words = %lu\n", PjllRaw, + j__udyMemSequence++, Pop1, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjll_t, PjllRaw); + return(PjllRaw); + +} // j__udyAllocJLL6() + + +FUNCTION Pjll_t j__udyAllocJLL7(Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF7POPTOWORDS(Pop1); + Pjll_t PjllRaw; + + PjllRaw = (Pjll_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + if ((Word_t) PjllRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjllRaw); } + + TRACE_ALLOC6("0x%x %8lu = j__udyAllocJLL7(%lu), Words = %lu\n", PjllRaw, + j__udyMemSequence++, Pop1, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjll_t, PjllRaw); + return(PjllRaw); + +} // j__udyAllocJLL7() + +#endif // JU_64BIT + + +// Note: Root-level leaf addresses are always whole words (Pjlw_t), and unlike +// other j__udyAlloc*() functions, they are returned non-raw, that is, without +// malloc namespace or root pointer type bits (the latter are added later by +// the caller): + +FUNCTION Pjlw_t j__udyAllocJLW(Word_t Pop1) +{ + Word_t Words = JU_LEAFWPOPTOWORDS(Pop1); + Pjlw_t Pjlw = (Pjlw_t) MALLOC(JudyMalloc, Words, Words); + + TRACE_ALLOC6("0x%x %8lu = j__udyAllocJLW(%lu), Words = %lu\n", Pjlw, + j__udyMemSequence++, Pop1, Words, Pop1); + // MALLOCBITS_SET(Pjlw_t, Pjlw); // see above. + return(Pjlw); + +} // j__udyAllocJLW() + + +FUNCTION Pjlb_t j__udyAllocJLB1(Pjpm_t Pjpm) +{ + Word_t Words = sizeof(jlb_t) / cJU_BYTESPERWORD; + Pjlb_t PjlbRaw; + + PjlbRaw = (Pjlb_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + assert((Words * cJU_BYTESPERWORD) == sizeof(jlb_t)); + + if ((Word_t) PjlbRaw > sizeof(Word_t)) + { + ZEROWORDS(P_JLB(PjlbRaw), Words); + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjlbRaw); } + + TRACE_ALLOC5("0x%x %8lu = j__udyAllocJLB1(), Words = %lu\n", PjlbRaw, + j__udyMemSequence++, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjlb_t, PjlbRaw); + return(PjlbRaw); + +} // j__udyAllocJLB1() + + +#ifdef JUDYL + +FUNCTION Pjv_t j__udyLAllocJV(Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JL_LEAFVPOPTOWORDS(Pop1); + Pjv_t PjvRaw; + + PjvRaw = (Pjv_t) MALLOC(JudyMalloc, Pjpm->jpm_TotalMemWords, Words); + + if ((Word_t) PjvRaw > sizeof(Word_t)) + { + Pjpm->jpm_TotalMemWords += Words; + } + else { J__UDYSETALLOCERROR(PjvRaw); } + + TRACE_ALLOC6("0x%x %8lu = j__udyLAllocJV(%lu), Words = %lu\n", PjvRaw, + j__udyMemSequence++, Pop1, Words, (Pjpm->jpm_Pop0) + 2); + MALLOCBITS_SET(Pjv_t, PjvRaw); + return(PjvRaw); + +} // j__udyLAllocJV() + +#endif // JUDYL + + +// **************************************************************************** +// FREE FUNCTIONS: +// +// To help the compiler catch coding errors, each function takes a specific +// object type to free. + + +// Note: j__udyFreeJPM() receives a root pointer with NO root pointer type +// bits present, that is, they must be stripped by the caller using P_JPM(): + +FUNCTION void j__udyFreeJPM(Pjpm_t PjpmFree, Pjpm_t PjpmStats) +{ + Word_t Words = (sizeof(jpm_t) + cJU_BYTESPERWORD - 1) / cJU_BYTESPERWORD; + + // MALLOCBITS_TEST(Pjpm_t, PjpmFree); // see above. + JudyFree((Pvoid_t) PjpmFree, Words); + + if (PjpmStats != (Pjpm_t) NULL) PjpmStats->jpm_TotalMemWords -= Words; + +// Note: Log PjpmFree->jpm_Pop0, similar to other j__udyFree*() functions, not +// an assumed value of cJU_LEAFW_MAXPOP1, for when the caller is +// Judy*FreeArray(), jpm_Pop0 is set to 0, and the population after the free +// really will be 0, not cJU_LEAFW_MAXPOP1. + + TRACE_FREE6("0x%x %8lu = j__udyFreeJPM(%lu), Words = %lu\n", PjpmFree, + j__udyMemSequence++, Words, Words, PjpmFree->jpm_Pop0); + + +} // j__udyFreeJPM() + + +FUNCTION void j__udyFreeJBL(Pjbl_t Pjbl, Pjpm_t Pjpm) +{ + Word_t Words = sizeof(jbl_t) / cJU_BYTESPERWORD; + + MALLOCBITS_TEST(Pjbl_t, Pjbl); + JudyFreeVirtual((Pvoid_t) Pjbl, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE5("0x%x %8lu = j__udyFreeJBL(), Words = %lu\n", Pjbl, + j__udyMemSequence++, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJBL() + + +FUNCTION void j__udyFreeJBB(Pjbb_t Pjbb, Pjpm_t Pjpm) +{ + Word_t Words = sizeof(jbb_t) / cJU_BYTESPERWORD; + + MALLOCBITS_TEST(Pjbb_t, Pjbb); + JudyFreeVirtual((Pvoid_t) Pjbb, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE5("0x%x %8lu = j__udyFreeJBB(), Words = %lu\n", Pjbb, + j__udyMemSequence++, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJBB() + + +FUNCTION void j__udyFreeJBBJP(Pjp_t Pjp, Word_t NumJPs, Pjpm_t Pjpm) +{ + Word_t Words = JU_BRANCHJP_NUMJPSTOWORDS(NumJPs); + + MALLOCBITS_TEST(Pjp_t, Pjp); + JudyFree((Pvoid_t) Pjp, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyFreeJBBJP(%lu), Words = %lu\n", Pjp, + j__udyMemSequence++, NumJPs, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJBBJP() + + +FUNCTION void j__udyFreeJBU(Pjbu_t Pjbu, Pjpm_t Pjpm) +{ + Word_t Words = sizeof(jbu_t) / cJU_BYTESPERWORD; + + MALLOCBITS_TEST(Pjbu_t, Pjbu); + JudyFreeVirtual((Pvoid_t) Pjbu, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE5("0x%x %8lu = j__udyFreeJBU(), Words = %lu\n", Pjbu, + j__udyMemSequence++, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJBU() + + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + +FUNCTION void j__udyFreeJLL1(Pjll_t Pjll, Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF1POPTOWORDS(Pop1); + + MALLOCBITS_TEST(Pjll_t, Pjll); + JudyFree((Pvoid_t) Pjll, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyFreeJLL1(%lu), Words = %lu\n", Pjll, + j__udyMemSequence++, Pop1, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJLL1() + +#endif // (JUDYL || (! JU_64BIT)) + + +FUNCTION void j__udyFreeJLL2(Pjll_t Pjll, Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF2POPTOWORDS(Pop1); + + MALLOCBITS_TEST(Pjll_t, Pjll); + JudyFree((Pvoid_t) Pjll, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyFreeJLL2(%lu), Words = %lu\n", Pjll, + j__udyMemSequence++, Pop1, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJLL2() + + +FUNCTION void j__udyFreeJLL3(Pjll_t Pjll, Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF3POPTOWORDS(Pop1); + + MALLOCBITS_TEST(Pjll_t, Pjll); + JudyFree((Pvoid_t) Pjll, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyFreeJLL3(%lu), Words = %lu\n", Pjll, + j__udyMemSequence++, Pop1, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJLL3() + + +#ifdef JU_64BIT + +FUNCTION void j__udyFreeJLL4(Pjll_t Pjll, Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF4POPTOWORDS(Pop1); + + MALLOCBITS_TEST(Pjll_t, Pjll); + JudyFree((Pvoid_t) Pjll, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyFreeJLL4(%lu), Words = %lu\n", Pjll, + j__udyMemSequence++, Pop1, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJLL4() + + +FUNCTION void j__udyFreeJLL5(Pjll_t Pjll, Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF5POPTOWORDS(Pop1); + + MALLOCBITS_TEST(Pjll_t, Pjll); + JudyFree((Pvoid_t) Pjll, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyFreeJLL5(%lu), Words = %lu\n", Pjll, + j__udyMemSequence++, Pop1, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJLL5() + + +FUNCTION void j__udyFreeJLL6(Pjll_t Pjll, Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF6POPTOWORDS(Pop1); + + MALLOCBITS_TEST(Pjll_t, Pjll); + JudyFree((Pvoid_t) Pjll, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyFreeJLL6(%lu), Words = %lu\n", Pjll, + j__udyMemSequence++, Pop1, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJLL6() + + +FUNCTION void j__udyFreeJLL7(Pjll_t Pjll, Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAF7POPTOWORDS(Pop1); + + MALLOCBITS_TEST(Pjll_t, Pjll); + JudyFree((Pvoid_t) Pjll, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyFreeJLL7(%lu), Words = %lu\n", Pjll, + j__udyMemSequence++, Pop1, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJLL7() + +#endif // JU_64BIT + + +// Note: j__udyFreeJLW() receives a root pointer with NO root pointer type +// bits present, that is, they are stripped by P_JLW(): + +FUNCTION void j__udyFreeJLW(Pjlw_t Pjlw, Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JU_LEAFWPOPTOWORDS(Pop1); + + // MALLOCBITS_TEST(Pjlw_t, Pjlw); // see above. + JudyFree((Pvoid_t) Pjlw, Words); + + if (Pjpm) Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyFreeJLW(%lu), Words = %lu\n", Pjlw, + j__udyMemSequence++, Pop1, Words, Pop1 - 1); + + +} // j__udyFreeJLW() + + +FUNCTION void j__udyFreeJLB1(Pjlb_t Pjlb, Pjpm_t Pjpm) +{ + Word_t Words = sizeof(jlb_t) / cJU_BYTESPERWORD; + + MALLOCBITS_TEST(Pjlb_t, Pjlb); + JudyFree((Pvoid_t) Pjlb, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE5("0x%x %8lu = j__udyFreeJLB1(), Words = %lu\n", Pjlb, + j__udyMemSequence++, Words, Pjpm->jpm_Pop0); + + +} // j__udyFreeJLB1() + + +#ifdef JUDYL + +FUNCTION void j__udyLFreeJV(Pjv_t Pjv, Word_t Pop1, Pjpm_t Pjpm) +{ + Word_t Words = JL_LEAFVPOPTOWORDS(Pop1); + + MALLOCBITS_TEST(Pjv_t, Pjv); + JudyFree((Pvoid_t) Pjv, Words); + + Pjpm->jpm_TotalMemWords -= Words; + + TRACE_FREE6("0x%x %8lu = j__udyLFreeJV(%lu), Words = %lu\n", Pjv, + j__udyMemSequence++, Pop1, Words, Pjpm->jpm_Pop0); + + +} // j__udyLFreeJV() + +#endif // JUDYL diff --git a/libnetdata/libjudy/src/JudyL/JudyLMemActive.c b/libnetdata/libjudy/src/JudyL/JudyLMemActive.c new file mode 100644 index 0000000..fb58d0e --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLMemActive.c @@ -0,0 +1,259 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.7 $ $Source: /judy/src/JudyCommon/JudyMemActive.c $ +// +// Return number of bytes of memory used to support a Judy1/L array. +// Compile with one of -DJUDY1 or -DJUDYL. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +FUNCTION static Word_t j__udyGetMemActive(Pjp_t); + + +// **************************************************************************** +// J U D Y 1 M E M A C T I V E +// J U D Y L M E M A C T I V E + +#ifdef JUDY1 +FUNCTION Word_t Judy1MemActive +#else +FUNCTION Word_t JudyLMemActive +#endif + ( + Pcvoid_t PArray // from which to retrieve. + ) +{ + if (PArray == (Pcvoid_t)NULL) return(0); + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + Word_t Words = Pjlw[0] + 1; // population. +#ifdef JUDY1 + return((Words + 1) * sizeof(Word_t)); +#else + return(((Words * 2) + 1) * sizeof(Word_t)); +#endif + } + else + { + Pjpm_t Pjpm = P_JPM(PArray); + return(j__udyGetMemActive(&Pjpm->jpm_JP) + sizeof(jpm_t)); + } + +} // JudyMemActive() + + +// **************************************************************************** +// __ J U D Y G E T M E M A C T I V E + +FUNCTION static Word_t j__udyGetMemActive( + Pjp_t Pjp) // top of subtree. +{ + Word_t offset; // in a branch. + Word_t Bytes = 0; // actual bytes used at this level. + Word_t IdxSz; // bytes per index in leaves + + switch (JU_JPTYPE(Pjp)) + { + + case cJU_JPBRANCH_L2: + case cJU_JPBRANCH_L3: +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: + case cJU_JPBRANCH_L5: + case cJU_JPBRANCH_L6: + case cJU_JPBRANCH_L7: +#endif + case cJU_JPBRANCH_L: + { + Pjbl_t Pjbl = P_JBL(Pjp->jp_Addr); + + for (offset = 0; offset < (Pjbl->jbl_NumJPs); ++offset) + Bytes += j__udyGetMemActive((Pjbl->jbl_jp) + offset); + + return(Bytes + sizeof(jbl_t)); + } + + case cJU_JPBRANCH_B2: + case cJU_JPBRANCH_B3: +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: + case cJU_JPBRANCH_B5: + case cJU_JPBRANCH_B6: + case cJU_JPBRANCH_B7: +#endif + case cJU_JPBRANCH_B: + { + Word_t subexp; + Word_t jpcount; + Pjbb_t Pjbb = P_JBB(Pjp->jp_Addr); + + for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) + { + jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); + Bytes += jpcount * sizeof(jp_t); + + for (offset = 0; offset < jpcount; ++offset) + { + Bytes += j__udyGetMemActive(P_JP(JU_JBB_PJP(Pjbb, subexp)) + + offset); + } + } + + return(Bytes + sizeof(jbb_t)); + } + + case cJU_JPBRANCH_U2: + case cJU_JPBRANCH_U3: +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: + case cJU_JPBRANCH_U5: + case cJU_JPBRANCH_U6: + case cJU_JPBRANCH_U7: +#endif + case cJU_JPBRANCH_U: + { + Pjbu_t Pjbu = P_JBU(Pjp->jp_Addr); + + for (offset = 0; offset < cJU_BRANCHUNUMJPS; ++offset) + { + if (((Pjbu->jbu_jp[offset].jp_Type) >= cJU_JPNULL1) + && ((Pjbu->jbu_jp[offset].jp_Type) <= cJU_JPNULLMAX)) + { + continue; // skip null JP to save time. + } + + Bytes += j__udyGetMemActive(Pjbu->jbu_jp + offset); + } + + return(Bytes + sizeof(jbu_t)); + } + + +// -- Cases below here terminate and do not recurse. -- + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: IdxSz = 1; goto LeafWords; +#endif + case cJU_JPLEAF2: IdxSz = 2; goto LeafWords; + case cJU_JPLEAF3: IdxSz = 3; goto LeafWords; +#ifdef JU_64BIT + case cJU_JPLEAF4: IdxSz = 4; goto LeafWords; + case cJU_JPLEAF5: IdxSz = 5; goto LeafWords; + case cJU_JPLEAF6: IdxSz = 6; goto LeafWords; + case cJU_JPLEAF7: IdxSz = 7; goto LeafWords; +#endif +LeafWords: + +#ifdef JUDY1 + return(IdxSz * (JU_JPLEAF_POP0(Pjp) + 1)); +#else + return((IdxSz + sizeof(Word_t)) + * (JU_JPLEAF_POP0(Pjp) + 1)); +#endif + case cJU_JPLEAF_B1: + { +#ifdef JUDY1 + return(sizeof(jlb_t)); +#else + Bytes = (JU_JPLEAF_POP0(Pjp) + 1) * sizeof(Word_t); + + return(Bytes + sizeof(jlb_t)); +#endif + } + + JUDY1CODE(case cJ1_JPFULLPOPU1: return(0);) + +#ifdef JUDY1 +#define J__Mpy 0 +#else +#define J__Mpy sizeof(Word_t) +#endif + + case cJU_JPIMMED_1_01: return(0); + case cJU_JPIMMED_2_01: return(0); + case cJU_JPIMMED_3_01: return(0); +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: return(0); + case cJU_JPIMMED_5_01: return(0); + case cJU_JPIMMED_6_01: return(0); + case cJU_JPIMMED_7_01: return(0); +#endif + + case cJU_JPIMMED_1_02: return(J__Mpy * 2); + case cJU_JPIMMED_1_03: return(J__Mpy * 3); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: return(J__Mpy * 4); + case cJU_JPIMMED_1_05: return(J__Mpy * 5); + case cJU_JPIMMED_1_06: return(J__Mpy * 6); + case cJU_JPIMMED_1_07: return(J__Mpy * 7); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: return(0); + case cJ1_JPIMMED_1_09: return(0); + case cJ1_JPIMMED_1_10: return(0); + case cJ1_JPIMMED_1_11: return(0); + case cJ1_JPIMMED_1_12: return(0); + case cJ1_JPIMMED_1_13: return(0); + case cJ1_JPIMMED_1_14: return(0); + case cJ1_JPIMMED_1_15: return(0); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: return(J__Mpy * 2); + case cJU_JPIMMED_2_03: return(J__Mpy * 3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: return(0); + case cJ1_JPIMMED_2_05: return(0); + case cJ1_JPIMMED_2_06: return(0); + case cJ1_JPIMMED_2_07: return(0); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: return(J__Mpy * 2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: return(0); + case cJ1_JPIMMED_3_04: return(0); + case cJ1_JPIMMED_3_05: return(0); + + case cJ1_JPIMMED_4_02: return(0); + case cJ1_JPIMMED_4_03: return(0); + case cJ1_JPIMMED_5_02: return(0); + case cJ1_JPIMMED_5_03: return(0); + case cJ1_JPIMMED_6_02: return(0); + case cJ1_JPIMMED_7_02: return(0); +#endif + + } // switch (JU_JPTYPE(Pjp)) + + return(0); // to make some compilers happy. + +} // j__udyGetMemActive() diff --git a/libnetdata/libjudy/src/JudyL/JudyLMemUsed.c b/libnetdata/libjudy/src/JudyL/JudyLMemUsed.c new file mode 100644 index 0000000..81e3a79 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLMemUsed.c @@ -0,0 +1,61 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.5 $ $Source: /judy/src/JudyCommon/JudyMemUsed.c $ +// +// Return number of bytes of memory used to support a Judy1/L array. +// Compile with one of -DJUDY1 or -DJUDYL. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +#ifdef JUDY1 +FUNCTION Word_t Judy1MemUsed +#else // JUDYL +FUNCTION Word_t JudyLMemUsed +#endif + ( + Pcvoid_t PArray // from which to retrieve. + ) +{ + Word_t Words = 0; + + if (PArray == (Pcvoid_t) NULL) return(0); + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + Words = JU_LEAFWPOPTOWORDS(Pjlw[0] + 1); // based on pop1. + } + else + { + Pjpm_t Pjpm = P_JPM(PArray); + Words = Pjpm->jpm_TotalMemWords; + } + + return(Words * sizeof(Word_t)); // convert to bytes. + +} // Judy1MemUsed() / JudyLMemUsed() diff --git a/libnetdata/libjudy/src/JudyL/JudyLNext.c b/libnetdata/libjudy/src/JudyL/JudyLNext.c new file mode 100644 index 0000000..4bcdccf --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLNext.c @@ -0,0 +1,1890 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.54 $ $Source: /judy/src/JudyCommon/JudyPrevNext.c $ +// +// Judy*Prev() and Judy*Next() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// Compile with -DJUDYNEXT for the Judy*Next() function; otherwise defaults to +// Judy*Prev(). + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifndef JUDYNEXT +#ifndef JUDYPREV +#define JUDYPREV 1 // neither set => use default. +#endif +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + + +// **************************************************************************** +// J U D Y 1 P R E V +// J U D Y 1 N E X T +// J U D Y L P R E V +// J U D Y L N E X T +// +// See the manual entry for the API. +// +// OVERVIEW OF Judy*Prev(): +// +// Use a reentrant switch statement (state machine, SM1 = "get") to decode the +// callers *PIndex-1, starting with the (PArray), through branches, if +// any, down to an immediate or a leaf. Look for *PIndex-1 in that leaf, and +// if found, return it. +// +// A dead end is either a branch that does not contain a JP for the appropriate +// digit in *PIndex-1, or a leaf that does not contain the undecoded digits of +// *PIndex-1. Upon reaching a dead end, backtrack through the leaf/branches +// that were just traversed, using a list (history) of parent JPs that is built +// while going forward in SM1Get. Start with the current leaf or branch. In a +// backtracked leaf, look for an Index less than *PIndex-1. In each +// backtracked branch, look "sideways" for the next JP, if any, lower than the +// one for the digit (from *PIndex-1) that was previously decoded. While +// backtracking, if a leaf has no previous Index or a branch has no lower JP, +// go to its parent branch in turn. Upon reaching the JRP, return failure, "no +// previous Index". The backtrack process is sufficiently different from +// SM1Get to merit its own separate reentrant switch statement (SM2 = +// "backtrack"). +// +// While backtracking, upon finding a lower JP in a branch, there is certain to +// be a "prev" Index under that JP (unless the Judy array is corrupt). +// Traverse forward again, this time taking the last (highest, right-most) JP +// in each branch, and the last (highest) Index upon reaching an immediate or a +// leaf. This traversal is sufficiently different from SM1Get and SM2Backtrack +// to merit its own separate reentrant switch statement (SM3 = "findlimit"). +// +// "Decode" bytes in JPs complicate this process a little. In SM1Get, when a +// JP is a narrow pointer, that is, when states are skipped (so the skipped +// digits are stored in jp_DcdPopO), compare the relevant digits to the same +// digits in *PIndex-1. If they are EQUAL, proceed in SM1Get as before. If +// jp_DcdPopOs digits are GREATER, treat the JP as a dead end and proceed in +// SM2Backtrack. If jp_DcdPopOs digits are LESS, treat the JP as if it had +// just been found during a backtrack and proceed directly in SM3Findlimit. +// +// Note that Decode bytes can be ignored in SM3Findlimit; they dont matter. +// Also note that in practice the Decode bytes are routinely compared with +// *PIndex-1 because thats simpler and no slower than first testing for +// narrowness. +// +// Decode bytes also make it unnecessary to construct the Index to return (the +// revised *PIndex) during the search. This step is deferred until finding an +// Index during backtrack or findlimit, before returning it. The first digit +// of *PIndex is derived (saved) based on which JP is used in a JRP branch. +// The remaining digits are obtained from the jp_DcdPopO field in the JP (if +// any) above the immediate or leaf containing the found (prev) Index, plus the +// remaining digit(s) in the immediate or leaf itself. In the case of a LEAFW, +// the Index to return is found directly in the leaf. +// +// Note: Theoretically, as described above, upon reaching a dead end, SM1Get +// passes control to SM2Backtrack to look sideways, even in a leaf. Actually +// its a little more efficient for the SM1Get leaf cases to shortcut this and +// take care of the sideways searches themselves. Hence the history list only +// contains branch JPs, and SM2Backtrack only handles branches. In fact, even +// the branch handling cases in SM1Get do some shortcutting (sideways +// searching) to avoid pushing history and calling SM2Backtrack unnecessarily. +// +// Upon reaching an Index to return after backtracking, *PIndex must be +// modified to the found Index. In principle this could be done by building +// the Index from a saved rootdigit (in the top branch) plus the Dcd bytes from +// the parent JP plus the appropriate Index bytes from the leaf. However, +// Immediates are difficult because their parent JPs lack one (last) digit. So +// instead just build the *PIndex to return "top down" while backtracking and +// findlimiting. +// +// This function is written iteratively for speed, rather than recursively. +// +// CAVEATS: +// +// Why use a backtrack list (history stack), since it has finite size? The +// size is small for Judy on both 32-bit and 64-bit systems, and a list (really +// just an array) is fast to maintain and use. Other alternatives include +// doing a lookahead (lookaside) in each branch while traversing forward +// (decoding), and restarting from the top upon a dead end. +// +// A lookahead means noting the last branch traversed which contained a +// non-null JP lower than the one specified by a digit in *PIndex-1, and +// returning to that point for SM3Findlimit. This seems like a good idea, and +// should be pretty cheap for linear and bitmap branches, but it could result +// in up to 31 unnecessary additional cache line fills (in extreme cases) for +// every uncompressed branch traversed. We have considered means of attaching +// to or hiding within an uncompressed branch (in null JPs) a "cache line map" +// or other structure, such as an offset to the next non-null JP, that would +// speed this up, but it seems unnecessary merely to avoid having a +// finite-length list (array). (If JudySL is ever made "native", the finite +// list length will be an issue.) +// +// Restarting at the top of the Judy array after a dead end requires a careful +// modification of *PIndex-1 to decrement the digit for the parent branch and +// set the remaining lower digits to all 1s. This must be repeated each time a +// parent branch contains another dead end, so even though it should all happen +// in cache, the CPU time can be excessive. (For JudySL or an equivalent +// "infinitely deep" Judy array, consider a hybrid of a large, finite, +// "circular" list and a restart-at-top when the list is backtracked to +// exhaustion.) +// +// Why search for *PIndex-1 instead of *PIndex during SM1Get? In rare +// instances this prevents an unnecessary decode down the wrong path followed +// by a backtrack; its pretty cheap to set up initially; and it means the +// SM1Get machine can simply return if/when it finds that Index. +// +// TBD: Wed like to enhance this function to make successive searches faster. +// This would require saving some previous state, including the previous Index +// returned, and in which leaf it was found. If the next call is for the same +// Index and the array has not been modified, start at the same leaf. This +// should be much easier to implement since this is iterative rather than +// recursive code. +// +// VARIATIONS FOR Judy*Next(): +// +// The Judy*Next() code is nearly a perfect mirror of the Judy*Prev() code. +// See the Judy*Prev() overview comments, and mentally switch the following: +// +// - "*PIndex-1" => "*PIndex+1" +// - "less than" => "greater than" +// - "lower" => "higher" +// - "lowest" => "highest" +// - "next-left" => "next-right" +// - "right-most" => "left-most" +// +// Note: SM3Findlimit could be called SM3Findmax/SM3Findmin, but a common name +// for both Prev and Next means many fewer ifdefs in this code. +// +// TBD: Currently this code traverses a JP whether its expanse is partially or +// completely full (populated). For Judy1 (only), since there is no value area +// needed, consider shortcutting to a "success" return upon encountering a full +// JP in SM1Get (or even SM3Findlimit?) A full JP looks like this: +// +// (((JU_JPDCDPOP0(Pjp) ^ cJU_ALLONES) & cJU_POP0MASK(cLevel)) == 0) + +#ifdef JUDY1 +#ifdef JUDYPREV +FUNCTION int Judy1Prev +#else +FUNCTION int Judy1Next +#endif +#else +#ifdef JUDYPREV +FUNCTION PPvoid_t JudyLPrev +#else +FUNCTION PPvoid_t JudyLNext +#endif +#endif + ( + Pcvoid_t PArray, // Judy array to search. + Word_t * PIndex, // starting point and result. + PJError_t PJError // optional, for returning error info. + ) +{ + Pjp_t Pjp, Pjp2; // current JPs. + Pjbl_t Pjbl; // Pjp->jp_Addr masked and cast to types: + Pjbb_t Pjbb; + Pjbu_t Pjbu; + +// Note: The following initialization is not strictly required but it makes +// gcc -Wall happy because there is an "impossible" path from Immed handling to +// SM1LeafLImm code that looks like Pjll might be used before set: + + Pjll_t Pjll = (Pjll_t) NULL; + Word_t state; // current state in SM. + Word_t digit; // next digit to decode from Index. + +// Note: The following initialization is not strictly required but it makes +// gcc -Wall happy because there is an "impossible" path from Immed handling to +// SM1LeafLImm code (for JudyL & JudyPrev only) that looks like pop1 might be +// used before set: + +#if (defined(JUDYL) && defined(JUDYPREV)) + Word_t pop1 = 0; // in a leaf. +#else + Word_t pop1; // in a leaf. +#endif + int offset; // linear branch/leaf, from j__udySearchLeaf*(). + int subexp; // subexpanse in a bitmap branch. + Word_t bitposmask; // bit in bitmap for Index. + +// History for SM2Backtrack: +// +// For a given histnum, APjphist[histnum] is a parent JP that points to a +// branch, and Aoffhist[histnum] is the offset of the NEXT JP in the branch to +// which the parent JP points. The meaning of Aoffhist[histnum] depends on the +// type of branch to which the parent JP points: +// +// Linear: Offset of the next JP in the JP list. +// +// Bitmap: Which subexpanse, plus the offset of the next JP in the +// subexpanses JP list (to avoid bit-counting again), plus for Judy*Next(), +// hidden one byte to the left, which digit, because Judy*Next() also needs +// this. +// +// Uncompressed: Digit, which is actually the offset of the JP in the branch. +// +// Note: Only branch JPs are stored in APjphist[] because, as explained +// earlier, SM1Get shortcuts sideways searches in leaves (and even in branches +// in some cases), so SM2Backtrack only handles branches. + +#define HISTNUMMAX cJU_ROOTSTATE // maximum branches traversable. + Pjp_t APjphist[HISTNUMMAX]; // list of branch JPs traversed. + int Aoffhist[HISTNUMMAX]; // list of next JP offsets; see above. + int histnum = 0; // number of JPs now in list. + + +// ---------------------------------------------------------------------------- +// M A C R O S +// +// These are intended to make the code a bit more readable and less redundant. + + +// "PUSH" AND "POP" Pjp AND offset ON HISTORY STACKS: +// +// Note: Ensure a corrupt Judy array does not overflow *hist[]. Meanwhile, +// underflowing *hist[] simply means theres no more room to backtrack => +// "no previous/next Index". + +#define HISTPUSH(Pjp,Offset) \ + APjphist[histnum] = (Pjp); \ + Aoffhist[histnum] = (Offset); \ + \ + if (++histnum >= HISTNUMMAX) \ + { \ + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT) \ + JUDY1CODE(return(JERRI );) \ + JUDYLCODE(return(PPJERR);) \ + } + +#define HISTPOP(Pjp,Offset) \ + if ((histnum--) < 1) JU_RET_NOTFOUND; \ + (Pjp) = APjphist[histnum]; \ + (Offset) = Aoffhist[histnum] + +// How to pack/unpack Aoffhist[] values for bitmap branches: + +#ifdef JUDYPREV + +#define HISTPUSHBOFF(Subexp,Offset,Digit) \ + (((Subexp) * cJU_BITSPERSUBEXPB) | (Offset)) + +#define HISTPOPBOFF(Subexp,Offset,Digit) \ + (Subexp) = (Offset) / cJU_BITSPERSUBEXPB; \ + (Offset) %= cJU_BITSPERSUBEXPB +#else + +#define HISTPUSHBOFF(Subexp,Offset,Digit) \ + (((Digit) << cJU_BITSPERBYTE) \ + | ((Subexp) * cJU_BITSPERSUBEXPB) | (Offset)) + +#define HISTPOPBOFF(Subexp,Offset,Digit) \ + (Digit) = (Offset) >> cJU_BITSPERBYTE; \ + (Subexp) = ((Offset) & JU_LEASTBYTESMASK(1)) / cJU_BITSPERSUBEXPB; \ + (Offset) %= cJU_BITSPERSUBEXPB +#endif + + +// CHECK FOR NULL JP: + +#define JPNULL(Type) (((Type) >= cJU_JPNULL1) && ((Type) <= cJU_JPNULLMAX)) + + +// SEARCH A BITMAP: +// +// This is a weak analog of j__udySearchLeaf*() for bitmaps. Return the actual +// or next-left position, base 0, of Digit in the single uint32_t bitmap, also +// given a Bitposmask for Digit. +// +// Unlike j__udySearchLeaf*(), the offset is not returned bit-complemented if +// Digits bit is unset, because the caller can check the bitmap themselves to +// determine that. Also, if Digits bit is unset, the returned offset is to +// the next-left JP (including -1), not to the "ideal" position for the Index = +// next-right JP. +// +// Shortcut and skip calling j__udyCountBits*() if the bitmap is full, in which +// case (Digit % cJU_BITSPERSUBEXP*) itself is the base-0 offset. +// +// TBD for Judy*Next(): Should this return next-right instead of next-left? +// That is, +1 from current value? Maybe not, if Digits bit IS set, +1 would +// be wrong. + +#define SEARCHBITMAPB(Bitmap,Digit,Bitposmask) \ + (((Bitmap) == cJU_FULLBITMAPB) ? (Digit % cJU_BITSPERSUBEXPB) : \ + j__udyCountBitsB((Bitmap) & JU_MASKLOWERINC(Bitposmask)) - 1) + +#define SEARCHBITMAPL(Bitmap,Digit,Bitposmask) \ + (((Bitmap) == cJU_FULLBITMAPL) ? (Digit % cJU_BITSPERSUBEXPL) : \ + j__udyCountBitsL((Bitmap) & JU_MASKLOWERINC(Bitposmask)) - 1) + +#ifdef JUDYPREV +// Equivalent to search for the highest offset in Bitmap: + +#define SEARCHBITMAPMAXB(Bitmap) \ + (((Bitmap) == cJU_FULLBITMAPB) ? cJU_BITSPERSUBEXPB - 1 : \ + j__udyCountBitsB(Bitmap) - 1) + +#define SEARCHBITMAPMAXL(Bitmap) \ + (((Bitmap) == cJU_FULLBITMAPL) ? cJU_BITSPERSUBEXPL - 1 : \ + j__udyCountBitsL(Bitmap) - 1) +#endif + + +// CHECK DECODE BYTES: +// +// Check Decode bytes in a JP against the equivalent portion of *PIndex. If +// *PIndex is lower (for Judy*Prev()) or higher (for Judy*Next()), this JP is a +// dead end (the same as if it had been absent in a linear or bitmap branch or +// null in an uncompressed branch), enter SM2Backtrack; otherwise enter +// SM3Findlimit to find the highest/lowest Index under this JP, as if the code +// had already backtracked to this JP. + +#ifdef JUDYPREV +#define CDcmp__ < +#else +#define CDcmp__ > +#endif + +#define CHECKDCD(cState) \ + if (JU_DCDNOTMATCHINDEX(*PIndex, Pjp, cState)) \ + { \ + if ((*PIndex & cJU_DCDMASK(cState)) \ + CDcmp__(JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(cState))) \ + { \ + goto SM2Backtrack; \ + } \ + goto SM3Findlimit; \ + } + + +// PREPARE TO HANDLE A LEAFW OR JRP BRANCH IN SM1: +// +// Extract a state-dependent digit from Index in a "constant" way, then jump to +// common code for multiple cases. + +#define SM1PREPB(cState,Next) \ + state = (cState); \ + digit = JU_DIGITATSTATE(*PIndex, cState); \ + goto Next + + +// PREPARE TO HANDLE A LEAFW OR JRP BRANCH IN SM3: +// +// Optionally save Dcd bytes into *PIndex, then save state and jump to common +// code for multiple cases. + +#define SM3PREPB_DCD(cState,Next) \ + JU_SETDCD(*PIndex, Pjp, cState); \ + SM3PREPB(cState,Next) + +#define SM3PREPB(cState,Next) state = (cState); goto Next + + +// ---------------------------------------------------------------------------- +// CHECK FOR SHORTCUTS: +// +// Error out if PIndex is null. Execute JU_RET_NOTFOUND if the Judy array is +// empty or *PIndex is already the minimum/maximum Index possible. +// +// Note: As documented, in case of failure *PIndex may be modified. + + if (PIndex == (PWord_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + +#ifdef JUDYPREV + if ((PArray == (Pvoid_t) NULL) || ((*PIndex)-- == 0)) +#else + if ((PArray == (Pvoid_t) NULL) || ((*PIndex)++ == cJU_ALLONES)) +#endif + JU_RET_NOTFOUND; + + +// HANDLE JRP: +// +// Before even entering SM1Get, check the JRP type. For JRP branches, traverse +// the JPM; handle LEAFW leaves directly; but look for the most common cases +// first. + +// ROOT-STATE LEAF that starts with a Pop0 word; just look within the leaf: +// +// If *PIndex is in the leaf, return it; otherwise return the Index, if any, +// below where it would belong. + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + pop1 = Pjlw[0] + 1; + + if ((offset = j__udySearchLeafW(Pjlw + 1, pop1, *PIndex)) + >= 0) // Index is present. + { + assert(offset < pop1); // in expected range. + JU_RET_FOUND_LEAFW(Pjlw, pop1, offset); // *PIndex is set. + } + +#ifdef JUDYPREV + if ((offset = ~offset) == 0) // no next-left Index. +#else + if ((offset = ~offset) >= pop1) // no next-right Index. +#endif + JU_RET_NOTFOUND; + + assert(offset <= pop1); // valid result. + +#ifdef JUDYPREV + *PIndex = Pjlw[offset--]; // next-left Index, base 1. +#else + *PIndex = Pjlw[offset + 1]; // next-right Index, base 1. +#endif + JU_RET_FOUND_LEAFW(Pjlw, pop1, offset); // base 0. + + } + else // JRP BRANCH + { + Pjpm_t Pjpm = P_JPM(PArray); + Pjp = &(Pjpm->jpm_JP); + +// goto SM1Get; + } + +// ============================================================================ +// STATE MACHINE 1 -- GET INDEX: +// +// Search for *PIndex (already decremented/incremented so as to be inclusive). +// If found, return it. Otherwise in theory hand off to SM2Backtrack or +// SM3Findlimit, but in practice "shortcut" by first sideways searching the +// current branch or leaf upon hitting a dead end. During sideways search, +// modify *PIndex to a new path taken. +// +// ENTRY: Pjp points to next JP to interpret, whose Decode bytes have not yet +// been checked. This JP is not yet listed in history. +// +// Note: Check Decode bytes at the start of each loop, not after looking up a +// new JP, so its easy to do constant shifts/masks, although this requires +// cautious handling of Pjp, offset, and *hist[] for correct entry to +// SM2Backtrack. +// +// EXIT: Return, or branch to SM2Backtrack or SM3Findlimit with correct +// interface, as described elsewhere. +// +// WARNING: For run-time efficiency the following cases replicate code with +// varying constants, rather than using common code with variable values! + +SM1Get: // return here for next branch/leaf. + + switch (JU_JPTYPE(Pjp)) + { + + +// ---------------------------------------------------------------------------- +// LINEAR BRANCH: +// +// Check Decode bytes, if any, in the current JP, then search for a JP for the +// next digit in *PIndex. + + case cJU_JPBRANCH_L2: CHECKDCD(2); SM1PREPB(2, SM1BranchL); + case cJU_JPBRANCH_L3: CHECKDCD(3); SM1PREPB(3, SM1BranchL); +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: CHECKDCD(4); SM1PREPB(4, SM1BranchL); + case cJU_JPBRANCH_L5: CHECKDCD(5); SM1PREPB(5, SM1BranchL); + case cJU_JPBRANCH_L6: CHECKDCD(6); SM1PREPB(6, SM1BranchL); + case cJU_JPBRANCH_L7: CHECKDCD(7); SM1PREPB(7, SM1BranchL); +#endif + case cJU_JPBRANCH_L: SM1PREPB(cJU_ROOTSTATE, SM1BranchL); + +// Common code (state-independent) for all cases of linear branches: + +SM1BranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + +// Found JP matching current digit in *PIndex; record parent JP and the next +// JPs offset, and iterate to the next JP: + + if ((offset = j__udySearchLeaf1((Pjll_t) (Pjbl->jbl_Expanse), + Pjbl->jbl_NumJPs, digit)) >= 0) + { + HISTPUSH(Pjp, offset); + Pjp = (Pjbl->jbl_jp) + offset; + goto SM1Get; + } + +// Dead end, no JP in BranchL for next digit in *PIndex: +// +// Get the ideal location of digits JP, and if theres no next-left/right JP +// in the BranchL, shortcut and start backtracking one level up; ignore the +// current Pjp because it points to a BranchL with no next-left/right JP. + +#ifdef JUDYPREV + if ((offset = (~offset) - 1) < 0) // no next-left JP in BranchL. +#else + if ((offset = (~offset)) >= Pjbl->jbl_NumJPs) // no next-right. +#endif + goto SM2Backtrack; + +// Theres a next-left/right JP in the current BranchL; save its digit in +// *PIndex and shortcut to SM3Findlimit: + + JU_SETDIGIT(*PIndex, Pjbl->jbl_Expanse[offset], state); + Pjp = (Pjbl->jbl_jp) + offset; + goto SM3Findlimit; + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH: +// +// Check Decode bytes, if any, in the current JP, then look for a JP for the +// next digit in *PIndex. + + case cJU_JPBRANCH_B2: CHECKDCD(2); SM1PREPB(2, SM1BranchB); + case cJU_JPBRANCH_B3: CHECKDCD(3); SM1PREPB(3, SM1BranchB); +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: CHECKDCD(4); SM1PREPB(4, SM1BranchB); + case cJU_JPBRANCH_B5: CHECKDCD(5); SM1PREPB(5, SM1BranchB); + case cJU_JPBRANCH_B6: CHECKDCD(6); SM1PREPB(6, SM1BranchB); + case cJU_JPBRANCH_B7: CHECKDCD(7); SM1PREPB(7, SM1BranchB); +#endif + case cJU_JPBRANCH_B: SM1PREPB(cJU_ROOTSTATE, SM1BranchB); + +// Common code (state-independent) for all cases of bitmap branches: + +SM1BranchB: + Pjbb = P_JBB(Pjp->jp_Addr); + +// Locate the digits JP in the subexpanse list, if present, otherwise the +// offset of the next-left JP, if any: + + subexp = digit / cJU_BITSPERSUBEXPB; + assert(subexp < cJU_NUMSUBEXPB); // falls in expected range. + bitposmask = JU_BITPOSMASKB(digit); + offset = SEARCHBITMAPB(JU_JBB_BITMAP(Pjbb, subexp), digit, + bitposmask); + // right range: + assert((offset >= -1) && (offset < (int) cJU_BITSPERSUBEXPB)); + +// Found JP matching current digit in *PIndex: +// +// Record the parent JP and the next JPs offset; and iterate to the next JP. + +// if (JU_BITMAPTESTB(Pjbb, digit)) // slower. + if (JU_JBB_BITMAP(Pjbb, subexp) & bitposmask) // faster. + { + // not negative since at least one bit is set: + assert(offset >= 0); + + HISTPUSH(Pjp, HISTPUSHBOFF(subexp, offset, digit)); + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + Pjp += offset; + goto SM1Get; // iterate to next JP. + } + +// Dead end, no JP in BranchB for next digit in *PIndex: +// +// If theres a next-left/right JP in the current BranchB, shortcut to +// SM3Findlimit. Note: offset is already set to the correct value for the +// next-left/right JP. + +#ifdef JUDYPREV + if (offset >= 0) // next-left JP is in this subexpanse. + goto SM1BranchBFindlimit; + + while (--subexp >= 0) // search next-left subexpanses. +#else + if (JU_JBB_BITMAP(Pjbb, subexp) & JU_MASKHIGHEREXC(bitposmask)) + { + ++offset; // next-left => next-right. + goto SM1BranchBFindlimit; + } + + while (++subexp < cJU_NUMSUBEXPB) // search next-right subexps. +#endif + { + if (! JU_JBB_PJP(Pjbb, subexp)) continue; // empty subexpanse. + +#ifdef JUDYPREV + offset = SEARCHBITMAPMAXB(JU_JBB_BITMAP(Pjbb, subexp)); + // expected range: + assert((offset >= 0) && (offset < cJU_BITSPERSUBEXPB)); +#else + offset = 0; +#endif + +// Save the next-left/right JPs digit in *PIndex: + +SM1BranchBFindlimit: + JU_BITMAPDIGITB(digit, subexp, JU_JBB_BITMAP(Pjbb, subexp), + offset); + JU_SETDIGIT(*PIndex, digit, state); + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + Pjp += offset; + goto SM3Findlimit; + } + +// Theres no next-left/right JP in the BranchB: +// +// Shortcut and start backtracking one level up; ignore the current Pjp because +// it points to a BranchB with no next-left/right JP. + + goto SM2Backtrack; + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH: +// +// Check Decode bytes, if any, in the current JP, then look for a JP for the +// next digit in *PIndex. + + case cJU_JPBRANCH_U2: CHECKDCD(2); SM1PREPB(2, SM1BranchU); + case cJU_JPBRANCH_U3: CHECKDCD(3); SM1PREPB(3, SM1BranchU); +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: CHECKDCD(4); SM1PREPB(4, SM1BranchU); + case cJU_JPBRANCH_U5: CHECKDCD(5); SM1PREPB(5, SM1BranchU); + case cJU_JPBRANCH_U6: CHECKDCD(6); SM1PREPB(6, SM1BranchU); + case cJU_JPBRANCH_U7: CHECKDCD(7); SM1PREPB(7, SM1BranchU); +#endif + case cJU_JPBRANCH_U: SM1PREPB(cJU_ROOTSTATE, SM1BranchU); + +// Common code (state-independent) for all cases of uncompressed branches: + +SM1BranchU: + Pjbu = P_JBU(Pjp->jp_Addr); + Pjp2 = (Pjbu->jbu_jp) + digit; + +// Found JP matching current digit in *PIndex: +// +// Record the parent JP and the next JPs digit, and iterate to the next JP. +// +// TBD: Instead of this, just goto SM1Get, and add cJU_JPNULL* cases to the +// SM1Get state machine? Then backtrack? However, it means you cant detect +// an inappropriate cJU_JPNULL*, when it occurs in other than a BranchU, and +// return JU_RET_CORRUPT. + + if (! JPNULL(JU_JPTYPE(Pjp2))) // digit has a JP. + { + HISTPUSH(Pjp, digit); + Pjp = Pjp2; + goto SM1Get; + } + +// Dead end, no JP in BranchU for next digit in *PIndex: +// +// Search for a next-left/right JP in the current BranchU, and if one is found, +// save its digit in *PIndex and shortcut to SM3Findlimit: + +#ifdef JUDYPREV + while (digit >= 1) + { + Pjp = (Pjbu->jbu_jp) + (--digit); +#else + while (digit < cJU_BRANCHUNUMJPS - 1) + { + Pjp = (Pjbu->jbu_jp) + (++digit); +#endif + if (JPNULL(JU_JPTYPE(Pjp))) continue; + + JU_SETDIGIT(*PIndex, digit, state); + goto SM3Findlimit; + } + +// Theres no next-left/right JP in the BranchU: +// +// Shortcut and start backtracking one level up; ignore the current Pjp because +// it points to a BranchU with no next-left/right JP. + + goto SM2Backtrack; + + +// ---------------------------------------------------------------------------- +// LINEAR LEAF: +// +// Check Decode bytes, if any, in the current JP, then search the leaf for +// *PIndex. + +#define SM1LEAFL(Func) \ + Pjll = P_JLL(Pjp->jp_Addr); \ + pop1 = JU_JPLEAF_POP0(Pjp) + 1; \ + offset = Func(Pjll, pop1, *PIndex); \ + goto SM1LeafLImm + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: CHECKDCD(1); SM1LEAFL(j__udySearchLeaf1); +#endif + case cJU_JPLEAF2: CHECKDCD(2); SM1LEAFL(j__udySearchLeaf2); + case cJU_JPLEAF3: CHECKDCD(3); SM1LEAFL(j__udySearchLeaf3); + +#ifdef JU_64BIT + case cJU_JPLEAF4: CHECKDCD(4); SM1LEAFL(j__udySearchLeaf4); + case cJU_JPLEAF5: CHECKDCD(5); SM1LEAFL(j__udySearchLeaf5); + case cJU_JPLEAF6: CHECKDCD(6); SM1LEAFL(j__udySearchLeaf6); + case cJU_JPLEAF7: CHECKDCD(7); SM1LEAFL(j__udySearchLeaf7); +#endif + +// Common code (state-independent) for all cases of linear leaves and +// immediates: + +SM1LeafLImm: + if (offset >= 0) // *PIndex is in LeafL / Immed. +#ifdef JUDY1 + JU_RET_FOUND; +#else + { // JudyL is trickier... + switch (JU_JPTYPE(Pjp)) + { +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: JU_RET_FOUND_LEAF1(Pjll, pop1, offset); +#endif + case cJU_JPLEAF2: JU_RET_FOUND_LEAF2(Pjll, pop1, offset); + case cJU_JPLEAF3: JU_RET_FOUND_LEAF3(Pjll, pop1, offset); +#ifdef JU_64BIT + case cJU_JPLEAF4: JU_RET_FOUND_LEAF4(Pjll, pop1, offset); + case cJU_JPLEAF5: JU_RET_FOUND_LEAF5(Pjll, pop1, offset); + case cJU_JPLEAF6: JU_RET_FOUND_LEAF6(Pjll, pop1, offset); + case cJU_JPLEAF7: JU_RET_FOUND_LEAF7(Pjll, pop1, offset); +#endif + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: +#endif + JU_RET_FOUND_IMM_01(Pjp); + + case cJU_JPIMMED_1_02: + case cJU_JPIMMED_1_03: +#ifdef JU_64BIT + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: + case cJU_JPIMMED_1_07: + case cJU_JPIMMED_2_02: + case cJU_JPIMMED_2_03: + case cJU_JPIMMED_3_02: +#endif + JU_RET_FOUND_IMM(Pjp, offset); + } + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // impossible? + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // found *PIndex + +#endif // JUDYL + +// Dead end, no Index in LeafL / Immed for remaining digit(s) in *PIndex: +// +// Get the ideal location of Index, and if theres no next-left/right Index in +// the LeafL / Immed, shortcut and start backtracking one level up; ignore the +// current Pjp because it points to a LeafL / Immed with no next-left/right +// Index. + +#ifdef JUDYPREV + if ((offset = (~offset) - 1) < 0) // no next-left Index. +#else + if ((offset = (~offset)) >= pop1) // no next-right Index. +#endif + goto SM2Backtrack; + +// Theres a next-left/right Index in the current LeafL / Immed; shortcut by +// copying its digit(s) to *PIndex and returning it. +// +// Unfortunately this is pretty hairy, especially avoiding endian issues. +// +// The cJU_JPLEAF* cases are very similar to same-index-size cJU_JPIMMED* cases +// for *_02 and above, but must return differently, at least for JudyL, so +// spell them out separately here at the cost of a little redundant code for +// Judy1. + + switch (JU_JPTYPE(Pjp)) + { +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: + + JU_SETDIGIT1(*PIndex, ((uint8_t *) Pjll)[offset]); + JU_RET_FOUND_LEAF1(Pjll, pop1, offset); +#endif + + case cJU_JPLEAF2: + + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF2(Pjll, pop1, offset); + + case cJU_JPLEAF3: + { + Word_t lsb; + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_LEAF3(Pjll, pop1, offset); + } + +#ifdef JU_64BIT + case cJU_JPLEAF4: + + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF4(Pjll, pop1, offset); + + case cJU_JPLEAF5: + { + Word_t lsb; + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_LEAF5(Pjll, pop1, offset); + } + + case cJU_JPLEAF6: + { + Word_t lsb; + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_LEAF6(Pjll, pop1, offset); + } + + case cJU_JPLEAF7: + { + Word_t lsb; + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_LEAF7(Pjll, pop1, offset); + } + +#endif // JU_64BIT + +#define SET_01(cState) JU_SETDIGITS(*PIndex, JU_JPDCDPOP0(Pjp), cState) + + case cJU_JPIMMED_1_01: SET_01(1); goto SM1Imm_01; + case cJU_JPIMMED_2_01: SET_01(2); goto SM1Imm_01; + case cJU_JPIMMED_3_01: SET_01(3); goto SM1Imm_01; +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: SET_01(4); goto SM1Imm_01; + case cJU_JPIMMED_5_01: SET_01(5); goto SM1Imm_01; + case cJU_JPIMMED_6_01: SET_01(6); goto SM1Imm_01; + case cJU_JPIMMED_7_01: SET_01(7); goto SM1Imm_01; +#endif +SM1Imm_01: JU_RET_FOUND_IMM_01(Pjp); + +// Shorthand for where to find start of Index bytes array: + +#ifdef JUDY1 +#define PJI (Pjp->jp_1Index) +#else +#define PJI (Pjp->jp_LIndex) +#endif + + case cJU_JPIMMED_1_02: + case cJU_JPIMMED_1_03: +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: + case cJU_JPIMMED_1_07: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: + case cJ1_JPIMMED_1_09: + case cJ1_JPIMMED_1_10: + case cJ1_JPIMMED_1_11: + case cJ1_JPIMMED_1_12: + case cJ1_JPIMMED_1_13: + case cJ1_JPIMMED_1_14: + case cJ1_JPIMMED_1_15: +#endif + JU_SETDIGIT1(*PIndex, ((uint8_t *) PJI)[offset]); + JU_RET_FOUND_IMM(Pjp, offset); + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: + case cJU_JPIMMED_2_03: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: + case cJ1_JPIMMED_2_05: + case cJ1_JPIMMED_2_06: + case cJ1_JPIMMED_2_07: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: + case cJ1_JPIMMED_3_04: + case cJ1_JPIMMED_3_05: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + { + Word_t lsb; + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_4_02: + case cJ1_JPIMMED_4_03: + + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); + + case cJ1_JPIMMED_5_02: + case cJ1_JPIMMED_5_03: + { + Word_t lsb; + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_6_02: + { + Word_t lsb; + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_7_02: + { + Word_t lsb; + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + +#endif // (JUDY1 && JU_64BIT) + + } // switch for not-found *PIndex + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // impossible? + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + +// ---------------------------------------------------------------------------- +// BITMAP LEAF: +// +// Check Decode bytes, if any, in the current JP, then look in the leaf for +// *PIndex. + + case cJU_JPLEAF_B1: + { + Pjlb_t Pjlb; + CHECKDCD(1); + + Pjlb = P_JLB(Pjp->jp_Addr); + digit = JU_DIGITATSTATE(*PIndex, 1); + subexp = JU_SUBEXPL(digit); + bitposmask = JU_BITPOSMASKL(digit); + assert(subexp < cJU_NUMSUBEXPL); // falls in expected range. + +// *PIndex exists in LeafB1: + +// if (JU_BITMAPTESTL(Pjlb, digit)) // slower. + if (JU_JLB_BITMAP(Pjlb, subexp) & bitposmask) // faster. + { +#ifdef JUDYL // needs offset at this point: + offset = SEARCHBITMAPL(JU_JLB_BITMAP(Pjlb, subexp), digit, bitposmask); +#endif + JU_RET_FOUND_LEAF_B1(Pjlb, subexp, offset); +// == return((PPvoid_t) (P_JV(JL_JLB_PVALUE(Pjlb, subexp)) + (offset))); + } + +// Dead end, no Index in LeafB1 for remaining digit in *PIndex: +// +// If theres a next-left/right Index in the current LeafB1, which for +// Judy*Next() is true if any bits are set for higher Indexes, shortcut by +// returning it. Note: For Judy*Prev(), offset is set here to the correct +// value for the next-left JP. + + offset = SEARCHBITMAPL(JU_JLB_BITMAP(Pjlb, subexp), digit, + bitposmask); + // right range: + assert((offset >= -1) && (offset < (int) cJU_BITSPERSUBEXPL)); + +#ifdef JUDYPREV + if (offset >= 0) // next-left JP is in this subexpanse. + goto SM1LeafB1Findlimit; + + while (--subexp >= 0) // search next-left subexpanses. +#else + if (JU_JLB_BITMAP(Pjlb, subexp) & JU_MASKHIGHEREXC(bitposmask)) + { + ++offset; // next-left => next-right. + goto SM1LeafB1Findlimit; + } + + while (++subexp < cJU_NUMSUBEXPL) // search next-right subexps. +#endif + { + if (! JU_JLB_BITMAP(Pjlb, subexp)) continue; // empty subexp. + +#ifdef JUDYPREV + offset = SEARCHBITMAPMAXL(JU_JLB_BITMAP(Pjlb, subexp)); + // expected range: + assert((offset >= 0) && (offset < (int) cJU_BITSPERSUBEXPL)); +#else + offset = 0; +#endif + +// Save the next-left/right Indexess digit in *PIndex: + +SM1LeafB1Findlimit: + JU_BITMAPDIGITL(digit, subexp, JU_JLB_BITMAP(Pjlb, subexp), offset); + JU_SETDIGIT1(*PIndex, digit); + JU_RET_FOUND_LEAF_B1(Pjlb, subexp, offset); +// == return((PPvoid_t) (P_JV(JL_JLB_PVALUE(Pjlb, subexp)) + (offset))); + } + +// Theres no next-left/right Index in the LeafB1: +// +// Shortcut and start backtracking one level up; ignore the current Pjp because +// it points to a LeafB1 with no next-left/right Index. + + goto SM2Backtrack; + + } // case cJU_JPLEAF_B1 + +#ifdef JUDY1 +// ---------------------------------------------------------------------------- +// FULL POPULATION: +// +// If the Decode bytes match, *PIndex is found (without modification). + + case cJ1_JPFULLPOPU1: + + CHECKDCD(1); + JU_RET_FOUND_FULLPOPU1; +#endif + + +// ---------------------------------------------------------------------------- +// IMMEDIATE: + +#ifdef JUDYPREV +#define SM1IMM_SETPOP1(cPop1) +#else +#define SM1IMM_SETPOP1(cPop1) pop1 = (cPop1) +#endif + +#define SM1IMM(Func,cPop1) \ + SM1IMM_SETPOP1(cPop1); \ + offset = Func((Pjll_t) (PJI), cPop1, *PIndex); \ + goto SM1LeafLImm + +// Special case for Pop1 = 1 Immediate JPs: +// +// If *PIndex is in the immediate, offset is 0, otherwise the binary NOT of the +// offset where it belongs, 0 or 1, same as from the search functions. + +#ifdef JUDYPREV +#define SM1IMM_01_SETPOP1 +#else +#define SM1IMM_01_SETPOP1 pop1 = 1 +#endif + +#define SM1IMM_01 \ + SM1IMM_01_SETPOP1; \ + offset = ((JU_JPDCDPOP0(Pjp) < JU_TRIMTODCDSIZE(*PIndex)) ? ~1 : \ + (JU_JPDCDPOP0(Pjp) == JU_TRIMTODCDSIZE(*PIndex)) ? 0 : \ + ~0); \ + goto SM1LeafLImm + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: +#endif + SM1IMM_01; + +// TBD: Doug says it would be OK to have fewer calls and calculate arg 2, here +// and in Judy*Count() also. + + case cJU_JPIMMED_1_02: SM1IMM(j__udySearchLeaf1, 2); + case cJU_JPIMMED_1_03: SM1IMM(j__udySearchLeaf1, 3); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: SM1IMM(j__udySearchLeaf1, 4); + case cJU_JPIMMED_1_05: SM1IMM(j__udySearchLeaf1, 5); + case cJU_JPIMMED_1_06: SM1IMM(j__udySearchLeaf1, 6); + case cJU_JPIMMED_1_07: SM1IMM(j__udySearchLeaf1, 7); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: SM1IMM(j__udySearchLeaf1, 8); + case cJ1_JPIMMED_1_09: SM1IMM(j__udySearchLeaf1, 9); + case cJ1_JPIMMED_1_10: SM1IMM(j__udySearchLeaf1, 10); + case cJ1_JPIMMED_1_11: SM1IMM(j__udySearchLeaf1, 11); + case cJ1_JPIMMED_1_12: SM1IMM(j__udySearchLeaf1, 12); + case cJ1_JPIMMED_1_13: SM1IMM(j__udySearchLeaf1, 13); + case cJ1_JPIMMED_1_14: SM1IMM(j__udySearchLeaf1, 14); + case cJ1_JPIMMED_1_15: SM1IMM(j__udySearchLeaf1, 15); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: SM1IMM(j__udySearchLeaf2, 2); + case cJU_JPIMMED_2_03: SM1IMM(j__udySearchLeaf2, 3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: SM1IMM(j__udySearchLeaf2, 4); + case cJ1_JPIMMED_2_05: SM1IMM(j__udySearchLeaf2, 5); + case cJ1_JPIMMED_2_06: SM1IMM(j__udySearchLeaf2, 6); + case cJ1_JPIMMED_2_07: SM1IMM(j__udySearchLeaf2, 7); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: SM1IMM(j__udySearchLeaf3, 2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: SM1IMM(j__udySearchLeaf3, 3); + case cJ1_JPIMMED_3_04: SM1IMM(j__udySearchLeaf3, 4); + case cJ1_JPIMMED_3_05: SM1IMM(j__udySearchLeaf3, 5); + + case cJ1_JPIMMED_4_02: SM1IMM(j__udySearchLeaf4, 2); + case cJ1_JPIMMED_4_03: SM1IMM(j__udySearchLeaf4, 3); + + case cJ1_JPIMMED_5_02: SM1IMM(j__udySearchLeaf5, 2); + case cJ1_JPIMMED_5_03: SM1IMM(j__udySearchLeaf5, 3); + + case cJ1_JPIMMED_6_02: SM1IMM(j__udySearchLeaf6, 2); + + case cJ1_JPIMMED_7_02: SM1IMM(j__udySearchLeaf7, 2); +#endif + + +// ---------------------------------------------------------------------------- +// INVALID JP TYPE: + + default: JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // SM1Get switch. + + /*NOTREACHED*/ + + +// ============================================================================ +// STATE MACHINE 2 -- BACKTRACK BRANCH TO PREVIOUS JP: +// +// Look for the next-left/right JP in a branch, backing up the history list as +// necessary. Upon finding a next-left/right JP, modify the corresponding +// digit in *PIndex before passing control to SM3Findlimit. +// +// Note: As described earlier, only branch JPs are expected here; other types +// fall into the default case. +// +// Note: If a found JP contains needed Dcd bytes, thats OK, theyre copied to +// *PIndex in SM3Findlimit. +// +// TBD: This code has a lot in common with similar code in the shortcut cases +// in SM1Get. Can combine this code somehow? +// +// ENTRY: List, possibly empty, of JPs and offsets in APjphist[] and +// Aoffhist[]; see earlier comments. +// +// EXIT: Execute JU_RET_NOTFOUND if no previous/next JP; otherwise jump to +// SM3Findlimit to resume a new but different downward search. + +SM2Backtrack: // come or return here for first/next sideways search. + + HISTPOP(Pjp, offset); + + switch (JU_JPTYPE(Pjp)) + { + + +// ---------------------------------------------------------------------------- +// LINEAR BRANCH: + + case cJU_JPBRANCH_L2: state = 2; goto SM2BranchL; + case cJU_JPBRANCH_L3: state = 3; goto SM2BranchL; +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: state = 4; goto SM2BranchL; + case cJU_JPBRANCH_L5: state = 5; goto SM2BranchL; + case cJU_JPBRANCH_L6: state = 6; goto SM2BranchL; + case cJU_JPBRANCH_L7: state = 7; goto SM2BranchL; +#endif + case cJU_JPBRANCH_L: state = cJU_ROOTSTATE; goto SM2BranchL; + +SM2BranchL: +#ifdef JUDYPREV + if (--offset < 0) goto SM2Backtrack; // no next-left JP in BranchL. +#endif + Pjbl = P_JBL(Pjp->jp_Addr); +#ifdef JUDYNEXT + if (++offset >= (Pjbl->jbl_NumJPs)) goto SM2Backtrack; + // no next-right JP in BranchL. +#endif + +// Theres a next-left/right JP in the current BranchL; save its digit in +// *PIndex and continue with SM3Findlimit: + + JU_SETDIGIT(*PIndex, Pjbl->jbl_Expanse[offset], state); + Pjp = (Pjbl->jbl_jp) + offset; + goto SM3Findlimit; + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH: + + case cJU_JPBRANCH_B2: state = 2; goto SM2BranchB; + case cJU_JPBRANCH_B3: state = 3; goto SM2BranchB; +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: state = 4; goto SM2BranchB; + case cJU_JPBRANCH_B5: state = 5; goto SM2BranchB; + case cJU_JPBRANCH_B6: state = 6; goto SM2BranchB; + case cJU_JPBRANCH_B7: state = 7; goto SM2BranchB; +#endif + case cJU_JPBRANCH_B: state = cJU_ROOTSTATE; goto SM2BranchB; + +SM2BranchB: + Pjbb = P_JBB(Pjp->jp_Addr); + HISTPOPBOFF(subexp, offset, digit); // unpack values. + +// If theres a next-left/right JP in the current BranchB, which for +// Judy*Next() is true if any bits are set for higher Indexes, continue to +// SM3Findlimit: +// +// Note: offset is set to the JP previously traversed; go one to the +// left/right. + +#ifdef JUDYPREV + if (offset > 0) // next-left JP is in this subexpanse. + { + --offset; + goto SM2BranchBFindlimit; + } + + while (--subexp >= 0) // search next-left subexpanses. +#else + if (JU_JBB_BITMAP(Pjbb, subexp) + & JU_MASKHIGHEREXC(JU_BITPOSMASKB(digit))) + { + ++offset; // next-left => next-right. + goto SM2BranchBFindlimit; + } + + while (++subexp < cJU_NUMSUBEXPB) // search next-right subexps. +#endif + { + if (! JU_JBB_PJP(Pjbb, subexp)) continue; // empty subexpanse. + +#ifdef JUDYPREV + offset = SEARCHBITMAPMAXB(JU_JBB_BITMAP(Pjbb, subexp)); + // expected range: + assert((offset >= 0) && (offset < cJU_BITSPERSUBEXPB)); +#else + offset = 0; +#endif + +// Save the next-left/right JPs digit in *PIndex: + +SM2BranchBFindlimit: + JU_BITMAPDIGITB(digit, subexp, JU_JBB_BITMAP(Pjbb, subexp), + offset); + JU_SETDIGIT(*PIndex, digit, state); + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + Pjp += offset; + goto SM3Findlimit; + } + +// Theres no next-left/right JP in the BranchB: + + goto SM2Backtrack; + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH: + + case cJU_JPBRANCH_U2: state = 2; goto SM2BranchU; + case cJU_JPBRANCH_U3: state = 3; goto SM2BranchU; +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: state = 4; goto SM2BranchU; + case cJU_JPBRANCH_U5: state = 5; goto SM2BranchU; + case cJU_JPBRANCH_U6: state = 6; goto SM2BranchU; + case cJU_JPBRANCH_U7: state = 7; goto SM2BranchU; +#endif + case cJU_JPBRANCH_U: state = cJU_ROOTSTATE; goto SM2BranchU; + +SM2BranchU: + +// Search for a next-left/right JP in the current BranchU, and if one is found, +// save its digit in *PIndex and continue to SM3Findlimit: + + Pjbu = P_JBU(Pjp->jp_Addr); + digit = offset; + +#ifdef JUDYPREV + while (digit >= 1) + { + Pjp = (Pjbu->jbu_jp) + (--digit); +#else + while (digit < cJU_BRANCHUNUMJPS - 1) + { + Pjp = (Pjbu->jbu_jp) + (++digit); +#endif + if (JPNULL(JU_JPTYPE(Pjp))) continue; + + JU_SETDIGIT(*PIndex, digit, state); + goto SM3Findlimit; + } + +// Theres no next-left/right JP in the BranchU: + + goto SM2Backtrack; + + +// ---------------------------------------------------------------------------- +// INVALID JP TYPE: + + default: JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // SM2Backtrack switch. + + /*NOTREACHED*/ + + +// ============================================================================ +// STATE MACHINE 3 -- FIND LIMIT JP/INDEX: +// +// Look for the highest/lowest (right/left-most) JP in each branch and the +// highest/lowest Index in a leaf or immediate, and return it. While +// traversing, modify appropriate digit(s) in *PIndex to reflect the path +// taken, including Dcd bytes in each JP (which could hold critical missing +// digits for skipped branches). +// +// ENTRY: Pjp set to a JP under which to find max/min JPs (if a branch JP) or +// a max/min Index and return (if a leaf or immediate JP). +// +// EXIT: Execute JU_RET_FOUND* upon reaching a leaf or immediate. Should be +// impossible to fail, unless the Judy array is corrupt. + +SM3Findlimit: // come or return here for first/next branch/leaf. + + switch (JU_JPTYPE(Pjp)) + { +// ---------------------------------------------------------------------------- +// LINEAR BRANCH: +// +// Simply use the highest/lowest (right/left-most) JP in the BranchL, but first +// copy the Dcd bytes to *PIndex if there are any (only if state < +// cJU_ROOTSTATE - 1). + + case cJU_JPBRANCH_L2: SM3PREPB_DCD(2, SM3BranchL); +#ifndef JU_64BIT + case cJU_JPBRANCH_L3: SM3PREPB( 3, SM3BranchL); +#else + case cJU_JPBRANCH_L3: SM3PREPB_DCD(3, SM3BranchL); + case cJU_JPBRANCH_L4: SM3PREPB_DCD(4, SM3BranchL); + case cJU_JPBRANCH_L5: SM3PREPB_DCD(5, SM3BranchL); + case cJU_JPBRANCH_L6: SM3PREPB_DCD(6, SM3BranchL); + case cJU_JPBRANCH_L7: SM3PREPB( 7, SM3BranchL); +#endif + case cJU_JPBRANCH_L: SM3PREPB( cJU_ROOTSTATE, SM3BranchL); + +SM3BranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + +#ifdef JUDYPREV + if ((offset = (Pjbl->jbl_NumJPs) - 1) < 0) +#else + offset = 0; if ((Pjbl->jbl_NumJPs) == 0) +#endif + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + JU_SETDIGIT(*PIndex, Pjbl->jbl_Expanse[offset], state); + Pjp = (Pjbl->jbl_jp) + offset; + goto SM3Findlimit; + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH: +// +// Look for the highest/lowest (right/left-most) non-null subexpanse, then use +// the highest/lowest JP in that subexpanse, but first copy Dcd bytes, if there +// are any (only if state < cJU_ROOTSTATE - 1), to *PIndex. + + case cJU_JPBRANCH_B2: SM3PREPB_DCD(2, SM3BranchB); +#ifndef JU_64BIT + case cJU_JPBRANCH_B3: SM3PREPB( 3, SM3BranchB); +#else + case cJU_JPBRANCH_B3: SM3PREPB_DCD(3, SM3BranchB); + case cJU_JPBRANCH_B4: SM3PREPB_DCD(4, SM3BranchB); + case cJU_JPBRANCH_B5: SM3PREPB_DCD(5, SM3BranchB); + case cJU_JPBRANCH_B6: SM3PREPB_DCD(6, SM3BranchB); + case cJU_JPBRANCH_B7: SM3PREPB( 7, SM3BranchB); +#endif + case cJU_JPBRANCH_B: SM3PREPB( cJU_ROOTSTATE, SM3BranchB); + +SM3BranchB: + Pjbb = P_JBB(Pjp->jp_Addr); +#ifdef JUDYPREV + subexp = cJU_NUMSUBEXPB; + + while (! (JU_JBB_BITMAP(Pjbb, --subexp))) // find non-empty subexp. + { + if (subexp <= 0) // wholly empty bitmap. + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + } + + offset = SEARCHBITMAPMAXB(JU_JBB_BITMAP(Pjbb, subexp)); + // expected range: + assert((offset >= 0) && (offset < cJU_BITSPERSUBEXPB)); +#else + subexp = -1; + + while (! (JU_JBB_BITMAP(Pjbb, ++subexp))) // find non-empty subexp. + { + if (subexp >= cJU_NUMSUBEXPB - 1) // didnt find one. + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + } + + offset = 0; +#endif + + JU_BITMAPDIGITB(digit, subexp, JU_JBB_BITMAP(Pjbb, subexp), offset); + JU_SETDIGIT(*PIndex, digit, state); + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + Pjp += offset; + goto SM3Findlimit; + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH: +// +// Look for the highest/lowest (right/left-most) non-null JP, and use it, but +// first copy Dcd bytes to *PIndex if there are any (only if state < +// cJU_ROOTSTATE - 1). + + case cJU_JPBRANCH_U2: SM3PREPB_DCD(2, SM3BranchU); +#ifndef JU_64BIT + case cJU_JPBRANCH_U3: SM3PREPB( 3, SM3BranchU); +#else + case cJU_JPBRANCH_U3: SM3PREPB_DCD(3, SM3BranchU); + case cJU_JPBRANCH_U4: SM3PREPB_DCD(4, SM3BranchU); + case cJU_JPBRANCH_U5: SM3PREPB_DCD(5, SM3BranchU); + case cJU_JPBRANCH_U6: SM3PREPB_DCD(6, SM3BranchU); + case cJU_JPBRANCH_U7: SM3PREPB( 7, SM3BranchU); +#endif + case cJU_JPBRANCH_U: SM3PREPB( cJU_ROOTSTATE, SM3BranchU); + +SM3BranchU: + Pjbu = P_JBU(Pjp->jp_Addr); +#ifdef JUDYPREV + digit = cJU_BRANCHUNUMJPS; + + while (digit >= 1) + { + Pjp = (Pjbu->jbu_jp) + (--digit); +#else + + for (digit = 0; digit < cJU_BRANCHUNUMJPS; ++digit) + { + Pjp = (Pjbu->jbu_jp) + digit; +#endif + if (JPNULL(JU_JPTYPE(Pjp))) continue; + + JU_SETDIGIT(*PIndex, digit, state); + goto SM3Findlimit; + } + +// No non-null JPs in BranchU: + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + +// ---------------------------------------------------------------------------- +// LINEAR LEAF: +// +// Simply use the highest/lowest (right/left-most) Index in the LeafL, but the +// details vary depending on leaf Index Size. First copy Dcd bytes, if there +// are any (only if state < cJU_ROOTSTATE - 1), to *PIndex. + +#define SM3LEAFLDCD(cState) \ + JU_SETDCD(*PIndex, Pjp, cState); \ + SM3LEAFLNODCD + +#ifdef JUDY1 +#define SM3LEAFL_SETPOP1 // not needed in any cases. +#else +#define SM3LEAFL_SETPOP1 pop1 = JU_JPLEAF_POP0(Pjp) + 1 +#endif + +#ifdef JUDYPREV +#define SM3LEAFLNODCD \ + Pjll = P_JLL(Pjp->jp_Addr); \ + SM3LEAFL_SETPOP1; \ + offset = JU_JPLEAF_POP0(Pjp); assert(offset >= 0) +#else +#define SM3LEAFLNODCD \ + Pjll = P_JLL(Pjp->jp_Addr); \ + SM3LEAFL_SETPOP1; \ + offset = 0; assert(JU_JPLEAF_POP0(Pjp) >= 0); +#endif + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: + + SM3LEAFLDCD(1); + JU_SETDIGIT1(*PIndex, ((uint8_t *) Pjll)[offset]); + JU_RET_FOUND_LEAF1(Pjll, pop1, offset); +#endif + + case cJU_JPLEAF2: + + SM3LEAFLDCD(2); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF2(Pjll, pop1, offset); + +#ifndef JU_64BIT + case cJU_JPLEAF3: + { + Word_t lsb; + SM3LEAFLNODCD; + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_LEAF3(Pjll, pop1, offset); + } + +#else + case cJU_JPLEAF3: + { + Word_t lsb; + SM3LEAFLDCD(3); + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_LEAF3(Pjll, pop1, offset); + } + + case cJU_JPLEAF4: + + SM3LEAFLDCD(4); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF4(Pjll, pop1, offset); + + case cJU_JPLEAF5: + { + Word_t lsb; + SM3LEAFLDCD(5); + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_LEAF5(Pjll, pop1, offset); + } + + case cJU_JPLEAF6: + { + Word_t lsb; + SM3LEAFLDCD(6); + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_LEAF6(Pjll, pop1, offset); + } + + case cJU_JPLEAF7: + { + Word_t lsb; + SM3LEAFLNODCD; + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_LEAF7(Pjll, pop1, offset); + } +#endif + + +// ---------------------------------------------------------------------------- +// BITMAP LEAF: +// +// Look for the highest/lowest (right/left-most) non-null subexpanse, then use +// the highest/lowest Index in that subexpanse, but first copy Dcd bytes +// (always present since state 1 < cJU_ROOTSTATE) to *PIndex. + + case cJU_JPLEAF_B1: + { + Pjlb_t Pjlb; + + JU_SETDCD(*PIndex, Pjp, 1); + + Pjlb = P_JLB(Pjp->jp_Addr); +#ifdef JUDYPREV + subexp = cJU_NUMSUBEXPL; + + while (! JU_JLB_BITMAP(Pjlb, --subexp)) // find non-empty subexp. + { + if (subexp <= 0) // wholly empty bitmap. + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + } + +// TBD: Might it be faster to just use a variant of BITMAPDIGIT*() that yields +// the digit for the right-most Index with a bit set? + + offset = SEARCHBITMAPMAXL(JU_JLB_BITMAP(Pjlb, subexp)); + // expected range: + assert((offset >= 0) && (offset < cJU_BITSPERSUBEXPL)); +#else + subexp = -1; + + while (! JU_JLB_BITMAP(Pjlb, ++subexp)) // find non-empty subexp. + { + if (subexp >= cJU_NUMSUBEXPL - 1) // didnt find one. + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + } + + offset = 0; +#endif + + JU_BITMAPDIGITL(digit, subexp, JU_JLB_BITMAP(Pjlb, subexp), offset); + JU_SETDIGIT1(*PIndex, digit); + JU_RET_FOUND_LEAF_B1(Pjlb, subexp, offset); +// == return((PPvoid_t) (P_JV(JL_JLB_PVALUE(Pjlb, subexp)) + (offset))); + + } // case cJU_JPLEAF_B1 + +#ifdef JUDY1 +// ---------------------------------------------------------------------------- +// FULL POPULATION: +// +// Copy Dcd bytes to *PIndex (always present since state 1 < cJU_ROOTSTATE), +// then set the highest/lowest possible digit as the LSB in *PIndex. + + case cJ1_JPFULLPOPU1: + + JU_SETDCD( *PIndex, Pjp, 1); +#ifdef JUDYPREV + JU_SETDIGIT1(*PIndex, cJU_BITSPERBITMAP - 1); +#else + JU_SETDIGIT1(*PIndex, 0); +#endif + JU_RET_FOUND_FULLPOPU1; +#endif // JUDY1 + + +// ---------------------------------------------------------------------------- +// IMMEDIATE: +// +// Simply use the highest/lowest (right/left-most) Index in the Imm, but the +// details vary depending on leaf Index Size and pop1. Note: There are no Dcd +// bytes in an Immediate JP, but in a cJU_JPIMMED_*_01 JP, the field holds the +// least bytes of the immediate Index. + + case cJU_JPIMMED_1_01: SET_01(1); goto SM3Imm_01; + case cJU_JPIMMED_2_01: SET_01(2); goto SM3Imm_01; + case cJU_JPIMMED_3_01: SET_01(3); goto SM3Imm_01; +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: SET_01(4); goto SM3Imm_01; + case cJU_JPIMMED_5_01: SET_01(5); goto SM3Imm_01; + case cJU_JPIMMED_6_01: SET_01(6); goto SM3Imm_01; + case cJU_JPIMMED_7_01: SET_01(7); goto SM3Imm_01; +#endif +SM3Imm_01: JU_RET_FOUND_IMM_01(Pjp); + +#ifdef JUDYPREV +#define SM3IMM_OFFSET(cPop1) (cPop1) - 1 // highest. +#else +#define SM3IMM_OFFSET(cPop1) 0 // lowest. +#endif + +#define SM3IMM(cPop1,Next) \ + offset = SM3IMM_OFFSET(cPop1); \ + goto Next + + case cJU_JPIMMED_1_02: SM3IMM( 2, SM3Imm1); + case cJU_JPIMMED_1_03: SM3IMM( 3, SM3Imm1); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: SM3IMM( 4, SM3Imm1); + case cJU_JPIMMED_1_05: SM3IMM( 5, SM3Imm1); + case cJU_JPIMMED_1_06: SM3IMM( 6, SM3Imm1); + case cJU_JPIMMED_1_07: SM3IMM( 7, SM3Imm1); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: SM3IMM( 8, SM3Imm1); + case cJ1_JPIMMED_1_09: SM3IMM( 9, SM3Imm1); + case cJ1_JPIMMED_1_10: SM3IMM(10, SM3Imm1); + case cJ1_JPIMMED_1_11: SM3IMM(11, SM3Imm1); + case cJ1_JPIMMED_1_12: SM3IMM(12, SM3Imm1); + case cJ1_JPIMMED_1_13: SM3IMM(13, SM3Imm1); + case cJ1_JPIMMED_1_14: SM3IMM(14, SM3Imm1); + case cJ1_JPIMMED_1_15: SM3IMM(15, SM3Imm1); +#endif + +SM3Imm1: JU_SETDIGIT1(*PIndex, ((uint8_t *) PJI)[offset]); + JU_RET_FOUND_IMM(Pjp, offset); + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: SM3IMM(2, SM3Imm2); + case cJU_JPIMMED_2_03: SM3IMM(3, SM3Imm2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: SM3IMM(4, SM3Imm2); + case cJ1_JPIMMED_2_05: SM3IMM(5, SM3Imm2); + case cJ1_JPIMMED_2_06: SM3IMM(6, SM3Imm2); + case cJ1_JPIMMED_2_07: SM3IMM(7, SM3Imm2); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) +SM3Imm2: *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: SM3IMM(2, SM3Imm3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: SM3IMM(3, SM3Imm3); + case cJ1_JPIMMED_3_04: SM3IMM(4, SM3Imm3); + case cJ1_JPIMMED_3_05: SM3IMM(5, SM3Imm3); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) +SM3Imm3: + { + Word_t lsb; + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_4_02: SM3IMM(2, SM3Imm4); + case cJ1_JPIMMED_4_03: SM3IMM(3, SM3Imm4); + +SM3Imm4: *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); + + case cJ1_JPIMMED_5_02: SM3IMM(2, SM3Imm5); + case cJ1_JPIMMED_5_03: SM3IMM(3, SM3Imm5); + +SM3Imm5: + { + Word_t lsb; + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_6_02: SM3IMM(2, SM3Imm6); + +SM3Imm6: + { + Word_t lsb; + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_7_02: SM3IMM(2, SM3Imm7); + +SM3Imm7: + { + Word_t lsb; + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } +#endif // (JUDY1 && JU_64BIT) + + +// ---------------------------------------------------------------------------- +// OTHER CASES: + + default: JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // SM3Findlimit switch. + + /*NOTREACHED*/ + +} // Judy1Prev() / Judy1Next() / JudyLPrev() / JudyLNext() diff --git a/libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c b/libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c new file mode 100644 index 0000000..4da4356 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c @@ -0,0 +1,1390 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.32 $ $Source: /judy/src/JudyCommon/JudyPrevNextEmpty.c $ +// +// Judy*PrevEmpty() and Judy*NextEmpty() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// Compile with -DJUDYNEXT for the Judy*NextEmpty() function; otherwise +// defaults to Judy*PrevEmpty(). +// +// Compile with -DTRACEJPSE to trace JP traversals. +// +// This file is separate from JudyPrevNext.c because it differs too greatly for +// ifdefs. This might be a bit surprising, but there are two reasons: +// +// - First, down in the details, searching for an empty index (SearchEmpty) is +// remarkably asymmetric with searching for a valid index (SearchValid), +// mainly with respect to: No return of a value area for JudyL; partially- +// full versus totally-full JPs; and handling of narrow pointers. +// +// - Second, we chose to implement SearchEmpty without a backtrack stack or +// backtrack engine, partly as an experiment, and partly because we think +// restarting from the top of the tree is less likely for SearchEmpty than +// for SearchValid, because empty indexes are more likely than valid indexes. +// +// A word about naming: A prior version of this feature (see 4.13) was named +// Judy*Free(), but there were concerns about that being read as a verb rather +// than an adjective. After prolonged debate and based on user input, we +// changed "Free" to "Empty". + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifndef JUDYNEXT +#ifndef JUDYPREV +#define JUDYPREV 1 // neither set => use default. +#endif +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +#ifdef TRACEJPSE +#include "JudyPrintJP.c" +#endif + + +// **************************************************************************** +// J U D Y 1 P R E V E M P T Y +// J U D Y 1 N E X T E M P T Y +// J U D Y L P R E V E M P T Y +// J U D Y L N E X T E M P T Y +// +// See the manual entry for the API. +// +// OVERVIEW OF Judy*PrevEmpty() / Judy*NextEmpty(): +// +// See also for comparison the equivalent comments in JudyPrevNext.c. +// +// Take the callers *PIndex and subtract/add 1, but watch out for +// underflow/overflow, which means "no previous/next empty index found." Use a +// reentrant switch statement (state machine, see SMGetRestart and +// SMGetContinue) to decode Index, starting with the JRP (PArray), through a +// JPM and branches, if any, down to an immediate or a leaf. Look for Index in +// that immediate or leaf, and if not found (invalid index), return success +// (Index is empty). +// +// This search can result in a dead end where taking a different path is +// required. There are four kinds of dead ends: +// +// BRANCH PRIMARY dead end: Encountering a fully-populated JP for the +// appropriate digit in Index. Search sideways in the branch for the +// previous/next absent/null/non-full JP, and if one is found, set Index to the +// highest/lowest index possible in that JPs expanse. Then if the JP is an +// absent or null JP, return success; otherwise for a non-full JP, traverse +// through the partially populated JP. +// +// BRANCH SECONDARY dead end: Reaching the end of a branch during a sideways +// search after a branch primary dead end. Set Index to the lowest/highest +// index possible in the whole branchs expanse (one higher/lower than the +// previous/next branchs expanse), then restart at the top of the tree, which +// includes pre-decrementing/incrementing Index (again) and watching for +// underflow/overflow (again). +// +// LEAF PRIMARY dead end: Finding a valid (non-empty) index in an immediate or +// leaf matching Index. Search sideways in the immediate/leaf for the +// previous/next empty index; if found, set *PIndex to match and return success. +// +// LEAF SECONDARY dead end: Reaching the end of an immediate or leaf during a +// sideways search after a leaf primary dead end. Just as for a branch +// secondary dead end, restart at the top of the tree with Index set to the +// lowest/highest index possible in the whole immediate/leafs expanse. +// TBD: If leaf secondary dead end occurs, could shortcut and treat it as a +// branch primary dead end; but this would require remembering the parent +// branchs type and offset (a "one-deep stack"), and also wrestling with +// narrow pointers, at least for leaves (but not for immediates). +// +// Note some ASYMMETRIES between SearchValid and SearchEmpty: +// +// - The SearchValid code, upon descending through a narrow pointer, if Index +// is outside the expanse of the subsidiary node (effectively a secondary +// dead end), must decide whether to backtrack or findlimit. But the +// SearchEmpty code simply returns success (Index is empty). +// +// - Similarly, the SearchValid code, upon finding no previous/next index in +// the expanse of a narrow pointer (again, a secondary dead end), can simply +// start to backtrack at the parent JP. But the SearchEmpty code would have +// to first determine whether or not the parent JPs narrow expanse contains +// a previous/next empty index outside the subexpanse. Rather than keeping a +// parent state stack and backtracking this way, upon a secondary dead end, +// the SearchEmpty code simply restarts at the top of the tree, whether or +// not a narrow pointer is involved. Again, see the equivalent comments in +// JudyPrevNext.c for comparison. +// +// This function is written iteratively for speed, rather than recursively. +// +// TBD: Wed like to enhance this function to make successive searches faster. +// This would require saving some previous state, including the previous Index +// returned, and in which leaf it was found. If the next call is for the same +// Index and the array has not been modified, start at the same leaf. This +// should be much easier to implement since this is iterative rather than +// recursive code. + +#ifdef JUDY1 +#ifdef JUDYPREV +FUNCTION int Judy1PrevEmpty +#else +FUNCTION int Judy1NextEmpty +#endif +#else +#ifdef JUDYPREV +FUNCTION int JudyLPrevEmpty +#else +FUNCTION int JudyLNextEmpty +#endif +#endif + ( + Pcvoid_t PArray, // Judy array to search. + Word_t * PIndex, // starting point and result. + PJError_t PJError // optional, for returning error info. + ) +{ + Word_t Index; // fast copy, in a register. + Pjp_t Pjp; // current JP. + Pjbl_t Pjbl; // Pjp->jp_Addr masked and cast to types: + Pjbb_t Pjbb; + Pjbu_t Pjbu; + Pjlb_t Pjlb; + PWord_t Pword; // alternate name for use by GET* macros. + + Word_t digit; // next digit to decode from Index. + Word_t digits; // current state in SM = digits left to decode. + Word_t pop0; // in a leaf. + Word_t pop0mask; // precalculated to avoid variable shifts. + long offset; // within a branch or leaf (can be large). + int subexp; // subexpanse in a bitmap branch. + BITMAPB_t bitposmaskB; // bit in bitmap for bitmap branch. + BITMAPL_t bitposmaskL; // bit in bitmap for bitmap leaf. + Word_t possfullJP1; // JP types for possibly full subexpanses: + Word_t possfullJP2; + Word_t possfullJP3; + + +// ---------------------------------------------------------------------------- +// M A C R O S +// +// These are intended to make the code a bit more readable and less redundant. + + +// CHECK FOR NULL JP: +// +// TBD: In principle this can be reduced (here and in other *.c files) to just +// the latter clause since no Type should ever be below cJU_JPNULL1, but in +// fact some root pointer types can be lower, so for safety do both checks. + +#define JPNULL(Type) (((Type) >= cJU_JPNULL1) && ((Type) <= cJU_JPNULLMAX)) + + +// CHECK FOR A FULL JP: +// +// Given a JP, indicate if it is fully populated. Use digits, pop0mask, and +// possfullJP1..3 in the context. +// +// This is a difficult problem because it requires checking the Pop0 bits for +// all-ones, but the number of bytes depends on the JP type, which is not +// directly related to the parent branchs type or level -- the JPs child +// could be under a narrow pointer (hence not full). The simple answer +// requires switching on or otherwise calculating the JP type, which could be +// slow. Instead, in SMPREPB* precalculate pop0mask and also record in +// possfullJP1..3 the child JP (branch) types that could possibly be full (one +// level down), and use them here. For level-2 branches (with digits == 2), +// the test for a full child depends on Judy1/JudyL. +// +// Note: This cannot be applied to the JP in a JPM because it doesnt have +// enough pop0 digits. +// +// TBD: JPFULL_BRANCH diligently checks for BranchL or BranchB, where neither +// of those can ever be full as it turns out. Could just check for a BranchU +// at the right level. Also, pop0mask might be overkill, its not used much, +// so perhaps just call cJU_POP0MASK(digits - 1) here? +// +// First, JPFULL_BRANCH checks for a full expanse for a JP whose child can be a +// branch, that is, a JP in a branch at level 3 or higher: + +#define JPFULL_BRANCH(Pjp) \ + ((((JU_JPDCDPOP0(Pjp) ^ cJU_ALLONES) & pop0mask) == 0) \ + && ((JU_JPTYPE(Pjp) == possfullJP1) \ + || (JU_JPTYPE(Pjp) == possfullJP2) \ + || (JU_JPTYPE(Pjp) == possfullJP3))) + +#ifdef JUDY1 +#define JPFULL(Pjp) \ + ((digits == 2) ? \ + (JU_JPTYPE(Pjp) == cJ1_JPFULLPOPU1) : JPFULL_BRANCH(Pjp)) +#else +#define JPFULL(Pjp) \ + ((digits == 2) ? \ + (JU_JPTYPE(Pjp) == cJU_JPLEAF_B1) \ + && (((JU_JPDCDPOP0(Pjp) & cJU_POP0MASK(1)) == cJU_POP0MASK(1))) : \ + JPFULL_BRANCH(Pjp)) +#endif + + +// RETURN SUCCESS: +// +// This hides the need to set *PIndex back to the local value of Index -- use a +// local value for faster operation. Note that the callers *PIndex is ALWAYS +// modified upon success, at least decremented/incremented. + +#define RET_SUCCESS { *PIndex = Index; return(1); } + + +// RETURN A CORRUPTION: + +#define RET_CORRUPT { JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); return(JERRI); } + + +// SEARCH A BITMAP BRANCH: +// +// This is a weak analog of j__udySearchLeaf*() for bitmap branches. Return +// the actual or next-left position, base 0, of Digit in a BITMAPB_t bitmap +// (subexpanse of a full bitmap), also given a Bitposmask for Digit. The +// position is the offset within the set bits. +// +// Unlike j__udySearchLeaf*(), the offset is not returned bit-complemented if +// Digits bit is unset, because the caller can check the bitmap themselves to +// determine that. Also, if Digits bit is unset, the returned offset is to +// the next-left JP or index (including -1), not to the "ideal" position for +// the index = next-right JP or index. +// +// Shortcut and skip calling j__udyCountBitsB() if the bitmap is full, in which +// case (Digit % cJU_BITSPERSUBEXPB) itself is the base-0 offset. + +#define SEARCHBITMAPB(Bitmap,Digit,Bitposmask) \ + (((Bitmap) == cJU_FULLBITMAPB) ? (Digit % cJU_BITSPERSUBEXPB) : \ + j__udyCountBitsB((Bitmap) & JU_MASKLOWERINC(Bitposmask)) - 1) + +#ifdef JUDYPREV +// Equivalent to search for the highest offset in Bitmap, that is, one less +// than the number of bits set: + +#define SEARCHBITMAPMAXB(Bitmap) \ + (((Bitmap) == cJU_FULLBITMAPB) ? cJU_BITSPERSUBEXPB - 1 : \ + j__udyCountBitsB(Bitmap) - 1) +#endif + + +// CHECK DECODE BYTES: +// +// Check Decode bytes in a JP against the equivalent portion of Index. If they +// dont match, Index is outside the subexpanse of a narrow pointer, hence is +// empty. + +#define CHECKDCD(cDigits) \ + if (JU_DCDNOTMATCHINDEX(Index, Pjp, cDigits)) RET_SUCCESS + + +// REVISE REMAINDER OF INDEX: +// +// Put one digit in place in Index and clear/set the lower digits, if any, so +// the resulting Index is at the start/end of an expanse, or just clear/set the +// least digits. +// +// Actually, to make simple use of JU_LEASTBYTESMASK, first clear/set all least +// digits of Index including the digit to be overridden, then set the value of +// that one digit. If Digits == 1 the first operation is redundant, but either +// very fast or even removed by the optimizer. + +#define CLEARLEASTDIGITS(Digits) Index &= ~JU_LEASTBYTESMASK(Digits) +#define SETLEASTDIGITS( Digits) Index |= JU_LEASTBYTESMASK(Digits) + +#define CLEARLEASTDIGITS_D(Digit,Digits) \ + { \ + CLEARLEASTDIGITS(Digits); \ + JU_SETDIGIT(Index, Digit, Digits); \ + } + +#define SETLEASTDIGITS_D(Digit,Digits) \ + { \ + SETLEASTDIGITS(Digits); \ + JU_SETDIGIT(Index, Digit, Digits); \ + } + + +// SET REMAINDER OF INDEX AND THEN RETURN OR CONTINUE: + +#define SET_AND_RETURN(OpLeastDigits,Digit,Digits) \ + { \ + OpLeastDigits(Digit, Digits); \ + RET_SUCCESS; \ + } + +#define SET_AND_CONTINUE(OpLeastDigits,Digit,Digits) \ + { \ + OpLeastDigits(Digit, Digits); \ + goto SMGetContinue; \ + } + + +// PREPARE TO HANDLE A LEAFW OR JP BRANCH IN THE STATE MACHINE: +// +// Extract a state-dependent digit from Index in a "constant" way, then jump to +// common code for multiple cases. +// +// TBD: Should this macro do more, such as preparing variable-shift masks for +// use in CLEARLEASTDIGITS and SETLEASTDIGITS? + +#define SMPREPB(cDigits,Next,PossFullJP1,PossFullJP2,PossFullJP3) \ + digits = (cDigits); \ + digit = JU_DIGITATSTATE(Index, cDigits); \ + pop0mask = cJU_POP0MASK((cDigits) - 1); /* for branchs JPs */ \ + possfullJP1 = (PossFullJP1); \ + possfullJP2 = (PossFullJP2); \ + possfullJP3 = (PossFullJP3); \ + goto Next + +// Variations for specific-level branches and for shorthands: +// +// Note: SMPREPB2 need not initialize possfullJP* because JPFULL does not use +// them for digits == 2, but gcc -Wall isnt quite smart enough to see this, so +// waste a bit of time and space to get rid of the warning: + +#define SMPREPB2(Next) \ + digits = 2; \ + digit = JU_DIGITATSTATE(Index, 2); \ + pop0mask = cJU_POP0MASK(1); /* for branchs JPs */ \ + possfullJP1 = possfullJP2 = possfullJP3 = 0; \ + goto Next + +#define SMPREPB3(Next) SMPREPB(3, Next, cJU_JPBRANCH_L2, \ + cJU_JPBRANCH_B2, \ + cJU_JPBRANCH_U2) +#ifndef JU_64BIT +#define SMPREPBL(Next) SMPREPB(cJU_ROOTSTATE, Next, cJU_JPBRANCH_L3, \ + cJU_JPBRANCH_B3, \ + cJU_JPBRANCH_U3) +#else +#define SMPREPB4(Next) SMPREPB(4, Next, cJU_JPBRANCH_L3, \ + cJU_JPBRANCH_B3, \ + cJU_JPBRANCH_U3) +#define SMPREPB5(Next) SMPREPB(5, Next, cJU_JPBRANCH_L4, \ + cJU_JPBRANCH_B4, \ + cJU_JPBRANCH_U4) +#define SMPREPB6(Next) SMPREPB(6, Next, cJU_JPBRANCH_L5, \ + cJU_JPBRANCH_B5, \ + cJU_JPBRANCH_U5) +#define SMPREPB7(Next) SMPREPB(7, Next, cJU_JPBRANCH_L6, \ + cJU_JPBRANCH_B6, \ + cJU_JPBRANCH_U6) +#define SMPREPBL(Next) SMPREPB(cJU_ROOTSTATE, Next, cJU_JPBRANCH_L7, \ + cJU_JPBRANCH_B7, \ + cJU_JPBRANCH_U7) +#endif + + +// RESTART AFTER SECONDARY DEAD END: +// +// Set Index to the first/last index in the branch or leaf subexpanse and start +// over at the top of the tree. + +#ifdef JUDYPREV +#define SMRESTART(Digits) { CLEARLEASTDIGITS(Digits); goto SMGetRestart; } +#else +#define SMRESTART(Digits) { SETLEASTDIGITS( Digits); goto SMGetRestart; } +#endif + + +// CHECK EDGE OF LEAFS EXPANSE: +// +// Given the LSBs of the lowest/highest valid index in a leaf (or equivalently +// in an immediate JP), the level (index size) of the leaf, and the full index +// to return (as Index in the context) already set to the full index matching +// the lowest/highest one, determine if there is an empty index in the leafs +// expanse below/above the lowest/highest index, which is true if the +// lowest/highest index is not at the "edge" of the leafs expanse based on its +// LSBs. If so, return Index decremented/incremented; otherwise restart at the +// top of the tree. +// +// Note: In many cases Index is already at the right spot and calling +// SMRESTART instead of just going directly to SMGetRestart is a bit of +// overkill. +// +// Note: Variable shift occurs if Digits is not a constant. + +#ifdef JUDYPREV +#define LEAF_EDGE(MinIndex,Digits) \ + { \ + if (MinIndex) { --Index; RET_SUCCESS; } \ + SMRESTART(Digits); \ + } +#else +#define LEAF_EDGE(MaxIndex,Digits) \ + { \ + if ((MaxIndex) != JU_LEASTBYTES(cJU_ALLONES, Digits)) \ + { ++Index; RET_SUCCESS; } \ + SMRESTART(Digits); \ + } +#endif + +// Same as above except Index is not already set to match the lowest/highest +// index, so do that before decrementing/incrementing it: + +#ifdef JUDYPREV +#define LEAF_EDGE_SET(MinIndex,Digits) \ + { \ + if (MinIndex) \ + { JU_SETDIGITS(Index, MinIndex, Digits); --Index; RET_SUCCESS; } \ + SMRESTART(Digits); \ + } +#else +#define LEAF_EDGE_SET(MaxIndex,Digits) \ + { \ + if ((MaxIndex) != JU_LEASTBYTES(cJU_ALLONES, Digits)) \ + { JU_SETDIGITS(Index, MaxIndex, Digits); ++Index; RET_SUCCESS; } \ + SMRESTART(Digits); \ + } +#endif + + +// FIND A HOLE (EMPTY INDEX) IN AN IMMEDIATE OR LEAF: +// +// Given an index location in a leaf (or equivalently an immediate JP) known to +// contain a usable hole (an empty index less/greater than Index), and the LSBs +// of a minimum/maximum index to locate, find the previous/next empty index and +// return it. +// +// Note: "Even" index sizes (1,2,4[,8] bytes) have corresponding native C +// types; "odd" index sizes dont, but they are not represented here because +// they are handled completely differently; see elsewhere. + +#ifdef JUDYPREV + +#define LEAF_HOLE_EVEN(cDigits,Pjll,IndexLSB) \ + { \ + while (*(Pjll) > (IndexLSB)) --(Pjll); /* too high */ \ + if (*(Pjll) < (IndexLSB)) RET_SUCCESS /* Index is empty */ \ + while (*(--(Pjll)) == --(IndexLSB)) /* null, find a hole */;\ + JU_SETDIGITS(Index, IndexLSB, cDigits); \ + RET_SUCCESS; \ + } +#else +#define LEAF_HOLE_EVEN(cDigits,Pjll,IndexLSB) \ + { \ + while (*(Pjll) < (IndexLSB)) ++(Pjll); /* too low */ \ + if (*(Pjll) > (IndexLSB)) RET_SUCCESS /* Index is empty */ \ + while (*(++(Pjll)) == ++(IndexLSB)) /* null, find a hole */;\ + JU_SETDIGITS(Index, IndexLSB, cDigits); \ + RET_SUCCESS; \ + } +#endif + + +// SEARCH FOR AN EMPTY INDEX IN AN IMMEDIATE OR LEAF: +// +// Given a pointer to the first index in a leaf (or equivalently an immediate +// JP), the population of the leaf, and a first empty Index to find (inclusive, +// as Index in the context), where Index is known to fall within the expanse of +// the leaf to search, efficiently find the previous/next empty index in the +// leaf, if any. For simplicity the following overview is stated in terms of +// Judy*NextEmpty() only, but the same concepts apply symmetrically for +// Judy*PrevEmpty(). Also, in each case the comparisons are for the LSBs of +// Index and leaf indexes, according to the leafs level. +// +// 1. If Index is GREATER than the last (highest) index in the leaf +// (maxindex), return success, Index is empty. (Remember, Index is known +// to be in the leafs expanse.) +// +// 2. If Index is EQUAL to maxindex: If maxindex is not at the edge of the +// leafs expanse, increment Index and return success, there is an empty +// Index one higher than any in the leaf; otherwise restart with Index +// reset to the upper edge of the leafs expanse. Note: This might cause +// an extra cache line fill, but this is OK for repeatedly-called search +// code, and it saves CPU time. +// +// 3. If Index is LESS than maxindex, check for "dense to end of leaf": +// Subtract Index from maxindex, and back up that many slots in the leaf. +// If the resulting offset is not before the start of the leaf then compare +// the index at this offset (baseindex) with Index: +// +// 3a. If GREATER, the leaf must be corrupt, since indexes are sorted and +// there are no duplicates. +// +// 3b. If EQUAL, the leaf is "dense" from Index to maxindex, meaning there is +// no reason to search it. "Slide right" to the high end of the leaf +// (modify Index to maxindex) and continue with step 2 above. +// +// 3c. If LESS, continue with step 4. +// +// 4. If the offset based on maxindex minus Index falls BEFORE the start of +// the leaf, or if, per 3c above, baseindex is LESS than Index, the leaf is +// guaranteed "not dense to the end" and a usable empty Index must exist. +// This supports a more efficient search loop. Start at the FIRST index in +// the leaf, or one BEYOND baseindex, respectively, and search the leaf as +// follows, comparing each current index (currindex) with Index: +// +// 4a. If LESS, keep going to next index. Note: This is certain to terminate +// because maxindex is known to be greater than Index, hence the loop can +// be small and fast. +// +// 4b. If EQUAL, loop and increment Index until finding currindex greater than +// Index, and return success with the modified Index. +// +// 4c. If GREATER, return success, Index (unmodified) is empty. +// +// Note: These are macros rather than functions for speed. + +#ifdef JUDYPREV + +#define JSLE_EVEN(Addr,Pop0,cDigits,LeafType) \ + { \ + LeafType * PjllLSB = (LeafType *) (Addr); \ + LeafType IndexLSB = Index; /* auto-masking */ \ + \ + /* Index before or at start of leaf: */ \ + \ + if (*PjllLSB >= IndexLSB) /* no need to search */ \ + { \ + if (*PjllLSB > IndexLSB) RET_SUCCESS; /* Index empty */ \ + LEAF_EDGE(*PjllLSB, cDigits); \ + } \ + \ + /* Index in or after leaf: */ \ + \ + offset = IndexLSB - *PjllLSB; /* tentative offset */ \ + if (offset <= (Pop0)) /* can check density */ \ + { \ + PjllLSB += offset; /* move to slot */ \ + \ + if (*PjllLSB <= IndexLSB) /* dense or corrupt */ \ + { \ + if (*PjllLSB == IndexLSB) /* dense, check edge */ \ + LEAF_EDGE_SET(PjllLSB[-offset], cDigits); \ + RET_CORRUPT; \ + } \ + --PjllLSB; /* not dense, start at previous */ \ + } \ + else PjllLSB = ((LeafType *) (Addr)) + (Pop0); /* start at max */ \ + \ + LEAF_HOLE_EVEN(cDigits, PjllLSB, IndexLSB); \ + } + +// JSLE_ODD is completely different from JSLE_EVEN because its important to +// minimize copying odd indexes to compare them (see 4.14). Furthermore, a +// very complex version (4.17, but abandoned before fully debugged) that +// avoided calling j__udySearchLeaf*() ran twice as fast as 4.14, but still +// half as fast as SearchValid. Doug suggested that to minimize complexity and +// share common code we should use j__udySearchLeaf*() for the initial search +// to establish if Index is empty, which should be common. If Index is valid +// in a leaf or immediate indexes, odds are good that an empty Index is nearby, +// so for simplicity just use a *COPY* function to linearly search the +// remainder. +// +// TBD: Pathological case? Average performance should be good, but worst-case +// might suffer. When Search says the initial Index is valid, so a linear +// copy-and-compare is begun, if the caller builds fairly large leaves with +// dense clusters AND frequently does a SearchEmpty at one end of such a +// cluster, performance wont be very good. Might a dense-check help? This +// means checking offset against the index at offset, and then against the +// first/last index in the leaf. We doubt the pathological case will appear +// much in real applications because they will probably alternate SearchValid +// and SearchEmpty calls. + +#define JSLE_ODD(cDigits,Pjll,Pop0,Search,Copy) \ + { \ + Word_t IndexLSB; /* least bytes only */ \ + Word_t IndexFound; /* in leaf */ \ + \ + if ((offset = Search(Pjll, (Pop0) + 1, Index)) < 0) \ + RET_SUCCESS; /* Index is empty */ \ + \ + IndexLSB = JU_LEASTBYTES(Index, cDigits); \ + offset *= (cDigits); \ + \ + while ((offset -= (cDigits)) >= 0) \ + { /* skip until empty or start */ \ + Copy(IndexFound, ((uint8_t *) (Pjll)) + offset); \ + if (IndexFound != (--IndexLSB)) /* found an empty */ \ + { JU_SETDIGITS(Index, IndexLSB, cDigits); RET_SUCCESS; }\ + } \ + LEAF_EDGE_SET(IndexLSB, cDigits); \ + } + +#else // JUDYNEXT + +#define JSLE_EVEN(Addr,Pop0,cDigits,LeafType) \ + { \ + LeafType * PjllLSB = ((LeafType *) (Addr)) + (Pop0); \ + LeafType IndexLSB = Index; /* auto-masking */ \ + \ + /* Index at or after end of leaf: */ \ + \ + if (*PjllLSB <= IndexLSB) /* no need to search */ \ + { \ + if (*PjllLSB < IndexLSB) RET_SUCCESS; /* Index empty */\ + LEAF_EDGE(*PjllLSB, cDigits); \ + } \ + \ + /* Index before or in leaf: */ \ + \ + offset = *PjllLSB - IndexLSB; /* tentative offset */ \ + if (offset <= (Pop0)) /* can check density */ \ + { \ + PjllLSB -= offset; /* move to slot */ \ + \ + if (*PjllLSB >= IndexLSB) /* dense or corrupt */ \ + { \ + if (*PjllLSB == IndexLSB) /* dense, check edge */ \ + LEAF_EDGE_SET(PjllLSB[offset], cDigits); \ + RET_CORRUPT; \ + } \ + ++PjllLSB; /* not dense, start at next */ \ + } \ + else PjllLSB = (LeafType *) (Addr); /* start at minimum */ \ + \ + LEAF_HOLE_EVEN(cDigits, PjllLSB, IndexLSB); \ + } + +#define JSLE_ODD(cDigits,Pjll,Pop0,Search,Copy) \ + { \ + Word_t IndexLSB; /* least bytes only */ \ + Word_t IndexFound; /* in leaf */ \ + int offsetmax; /* in bytes */ \ + \ + if ((offset = Search(Pjll, (Pop0) + 1, Index)) < 0) \ + RET_SUCCESS; /* Index is empty */ \ + \ + IndexLSB = JU_LEASTBYTES(Index, cDigits); \ + offset *= (cDigits); \ + offsetmax = (Pop0) * (cDigits); /* single multiply */ \ + \ + while ((offset += (cDigits)) <= offsetmax) \ + { /* skip until empty or end */ \ + Copy(IndexFound, ((uint8_t *) (Pjll)) + offset); \ + if (IndexFound != (++IndexLSB)) /* found an empty */ \ + { JU_SETDIGITS(Index, IndexLSB, cDigits); RET_SUCCESS; } \ + } \ + LEAF_EDGE_SET(IndexLSB, cDigits); \ + } + +#endif // JUDYNEXT + +// Note: Immediate indexes never fill a single index group, so for odd index +// sizes, save time by calling JSLE_ODD_IMM instead of JSLE_ODD. + +#define j__udySearchLeafEmpty1(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 1, uint8_t) + +#define j__udySearchLeafEmpty2(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 2, uint16_t) + +#define j__udySearchLeafEmpty3(Addr,Pop0) \ + JSLE_ODD(3, Addr, Pop0, j__udySearchLeaf3, JU_COPY3_PINDEX_TO_LONG) + +#ifndef JU_64BIT + +#define j__udySearchLeafEmptyL(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 4, Word_t) + +#else + +#define j__udySearchLeafEmpty4(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 4, uint32_t) + +#define j__udySearchLeafEmpty5(Addr,Pop0) \ + JSLE_ODD(5, Addr, Pop0, j__udySearchLeaf5, JU_COPY5_PINDEX_TO_LONG) + +#define j__udySearchLeafEmpty6(Addr,Pop0) \ + JSLE_ODD(6, Addr, Pop0, j__udySearchLeaf6, JU_COPY6_PINDEX_TO_LONG) + +#define j__udySearchLeafEmpty7(Addr,Pop0) \ + JSLE_ODD(7, Addr, Pop0, j__udySearchLeaf7, JU_COPY7_PINDEX_TO_LONG) + +#define j__udySearchLeafEmptyL(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 8, Word_t) + +#endif // JU_64BIT + + +// ---------------------------------------------------------------------------- +// START OF CODE: +// +// CHECK FOR SHORTCUTS: +// +// Error out if PIndex is null. + + if (PIndex == (PWord_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); + return(JERRI); + } + + Index = *PIndex; // fast local copy. + +// Set and pre-decrement/increment Index, watching for underflow/overflow: +// +// An out-of-bounds Index means failure: No previous/next empty index. + +SMGetRestart: // return here with revised Index. + +#ifdef JUDYPREV + if (Index-- == 0) return(0); +#else + if (++Index == 0) return(0); +#endif + +// An empty array with an in-bounds (not underflowed/overflowed) Index means +// success: +// +// Note: This check is redundant after restarting at SMGetRestart, but should +// take insignificant time. + + if (PArray == (Pvoid_t) NULL) RET_SUCCESS; + +// ---------------------------------------------------------------------------- +// ROOT-LEVEL LEAF that starts with a Pop0 word; just look within the leaf: +// +// If Index is not in the leaf, return success; otherwise return the first +// empty Index, if any, below/above where it would belong. + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + pop0 = Pjlw[0]; + +#ifdef JUDY1 + if (pop0 == 0) // special case. + { +#ifdef JUDYPREV + if ((Index != Pjlw[1]) || (Index-- != 0)) RET_SUCCESS; +#else + if ((Index != Pjlw[1]) || (++Index != 0)) RET_SUCCESS; +#endif + return(0); // no previous/next empty index. + } +#endif // JUDY1 + + j__udySearchLeafEmptyL(Pjlw + 1, pop0); + +// No return -- thanks ALAN + + } + else + +// ---------------------------------------------------------------------------- +// HANDLE JRP Branch: +// +// For JRP branches, traverse the JPM; handle LEAFW +// directly; but look for the most common cases first. + + { + Pjpm_t Pjpm = P_JPM(PArray); + Pjp = &(Pjpm->jpm_JP); + +// goto SMGetContinue; + } + + +// ============================================================================ +// STATE MACHINE -- GET INDEX: +// +// Search for Index (already decremented/incremented so as to be an inclusive +// search). If not found (empty index), return success. Otherwise do a +// previous/next search, and if successful modify Index to the empty index +// found. See function header comments. +// +// ENTRY: Pjp points to next JP to interpret, whose Decode bytes have not yet +// been checked. +// +// Note: Check Decode bytes at the start of each loop, not after looking up a +// new JP, so its easy to do constant shifts/masks. +// +// EXIT: Return, or branch to SMGetRestart with modified Index, or branch to +// SMGetContinue with a modified Pjp, as described elsewhere. +// +// WARNING: For run-time efficiency the following cases replicate code with +// varying constants, rather than using common code with variable values! + +SMGetContinue: // return here for next branch/leaf. + +#ifdef TRACEJPSE + JudyPrintJP(Pjp, "sf", __LINE__); +#endif + + switch (JU_JPTYPE(Pjp)) + { + + +// ---------------------------------------------------------------------------- +// LINEAR BRANCH: +// +// Check Decode bytes, if any, in the current JP, then search for a JP for the +// next digit in Index. + + case cJU_JPBRANCH_L2: CHECKDCD(2); SMPREPB2(SMBranchL); + case cJU_JPBRANCH_L3: CHECKDCD(3); SMPREPB3(SMBranchL); +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: CHECKDCD(4); SMPREPB4(SMBranchL); + case cJU_JPBRANCH_L5: CHECKDCD(5); SMPREPB5(SMBranchL); + case cJU_JPBRANCH_L6: CHECKDCD(6); SMPREPB6(SMBranchL); + case cJU_JPBRANCH_L7: CHECKDCD(7); SMPREPB7(SMBranchL); +#endif + case cJU_JPBRANCH_L: SMPREPBL(SMBranchL); + +// Common code (state-independent) for all cases of linear branches: + +SMBranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + +// First, check if Indexs expanse (digit) is below/above the first/last +// populated expanse in the BranchL, in which case Index is empty; otherwise +// find the offset of the lowest/highest populated expanse at or above/below +// digit, if any: +// +// Note: The for-loop is guaranteed to exit eventually because the first/last +// expanse is known to be a terminator. +// +// Note: Cannot use j__udySearchLeaf*Empty1() here because it only applies to +// leaves and does not know about partial versus full JPs, unlike the use of +// j__udySearchLeaf1() for BranchLs in SearchValid code. Also, since linear +// leaf expanse lists are small, dont waste time calling j__udySearchLeaf1(), +// just scan the expanse list. + +#ifdef JUDYPREV + if ((Pjbl->jbl_Expanse[0]) > digit) RET_SUCCESS; + + for (offset = (Pjbl->jbl_NumJPs) - 1; /* null */; --offset) +#else + if ((Pjbl->jbl_Expanse[(Pjbl->jbl_NumJPs) - 1]) < digit) + RET_SUCCESS; + + for (offset = 0; /* null */; ++offset) +#endif + { + +// Too low/high, keep going; or too high/low, meaning the loop passed a hole +// and the initial Index is empty: + +#ifdef JUDYPREV + if ((Pjbl->jbl_Expanse[offset]) > digit) continue; + if ((Pjbl->jbl_Expanse[offset]) < digit) RET_SUCCESS; +#else + if ((Pjbl->jbl_Expanse[offset]) < digit) continue; + if ((Pjbl->jbl_Expanse[offset]) > digit) RET_SUCCESS; +#endif + +// Found expanse matching digit; if its not full, traverse through it: + + if (! JPFULL((Pjbl->jbl_jp) + offset)) + { + Pjp = (Pjbl->jbl_jp) + offset; + goto SMGetContinue; + } + +// Common code: While searching for a lower/higher hole or a non-full JP, upon +// finding a lower/higher hole, adjust Index using the revised digit and +// return; or upon finding a consecutive lower/higher expanse, if the expanses +// JP is non-full, modify Index and traverse through the JP: + +#define BRANCHL_CHECK(OpIncDec,OpLeastDigits,Digit,Digits) \ + { \ + if ((Pjbl->jbl_Expanse[offset]) != OpIncDec digit) \ + SET_AND_RETURN(OpLeastDigits, Digit, Digits); \ + \ + if (! JPFULL((Pjbl->jbl_jp) + offset)) \ + { \ + Pjp = (Pjbl->jbl_jp) + offset; \ + SET_AND_CONTINUE(OpLeastDigits, Digit, Digits); \ + } \ + } + +// BranchL primary dead end: Expanse matching Index/digit is full (rare except +// for dense/sequential indexes): +// +// Search for a lower/higher hole, a non-full JP, or the end of the expanse +// list, while decrementing/incrementing digit. + +#ifdef JUDYPREV + while (--offset >= 0) + BRANCHL_CHECK(--, SETLEASTDIGITS_D, digit, digits) +#else + while (++offset < Pjbl->jbl_NumJPs) + BRANCHL_CHECK(++, CLEARLEASTDIGITS_D, digit, digits) +#endif + +// Passed end of BranchL expanse list after finding a matching but full +// expanse: +// +// Digit now matches the lowest/highest expanse, which is a full expanse; if +// digit is at the end of BranchLs expanse (no hole before/after), break out +// of the loop; otherwise modify Index to the next lower/higher digit and +// return success: + +#ifdef JUDYPREV + if (digit == 0) break; + --digit; SET_AND_RETURN(SETLEASTDIGITS_D, digit, digits); +#else + if (digit == JU_LEASTBYTES(cJU_ALLONES, 1)) break; + ++digit; SET_AND_RETURN(CLEARLEASTDIGITS_D, digit, digits); +#endif + } // for-loop + +// BranchL secondary dead end, no non-full previous/next JP: + + SMRESTART(digits); + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH: +// +// Check Decode bytes, if any, in the current JP, then search for a JP for the +// next digit in Index. + + case cJU_JPBRANCH_B2: CHECKDCD(2); SMPREPB2(SMBranchB); + case cJU_JPBRANCH_B3: CHECKDCD(3); SMPREPB3(SMBranchB); +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: CHECKDCD(4); SMPREPB4(SMBranchB); + case cJU_JPBRANCH_B5: CHECKDCD(5); SMPREPB5(SMBranchB); + case cJU_JPBRANCH_B6: CHECKDCD(6); SMPREPB6(SMBranchB); + case cJU_JPBRANCH_B7: CHECKDCD(7); SMPREPB7(SMBranchB); +#endif + case cJU_JPBRANCH_B: SMPREPBL(SMBranchB); + +// Common code (state-independent) for all cases of bitmap branches: + +SMBranchB: + Pjbb = P_JBB(Pjp->jp_Addr); + +// Locate the digits JP in the subexpanse list, if present: + + subexp = digit / cJU_BITSPERSUBEXPB; + assert(subexp < cJU_NUMSUBEXPB); // falls in expected range. + bitposmaskB = JU_BITPOSMASKB(digit); + +// Absent JP = no JP matches current digit in Index: + +// if (! JU_BITMAPTESTB(Pjbb, digit)) // slower. + if (! (JU_JBB_BITMAP(Pjbb, subexp) & bitposmaskB)) // faster. + RET_SUCCESS; + +// Non-full JP matches current digit in Index: +// +// Iterate to the subsidiary non-full JP. + + offset = SEARCHBITMAPB(JU_JBB_BITMAP(Pjbb, subexp), digit, + bitposmaskB); + // not negative since at least one bit is set: + assert(offset >= 0); + assert(offset < (int) cJU_BITSPERSUBEXPB); + +// Watch for null JP subarray pointer with non-null bitmap (a corruption): + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) + == (Pjp_t) NULL) RET_CORRUPT; + + Pjp += offset; + if (! JPFULL(Pjp)) goto SMGetContinue; + +// BranchB primary dead end: +// +// Upon hitting a full JP in a BranchB for the next digit in Index, search +// sideways for a previous/next absent JP (unset bit) or non-full JP (set bit +// with non-full JP); first in the current bitmap subexpanse, then in +// lower/higher subexpanses. Upon entry, Pjp points to a known-unusable JP, +// ready to decrement/increment. +// +// Note: The preceding code is separate from this loop because Index does not +// need revising (see SET_AND_*()) if the initial index is an empty index. +// +// TBD: For speed, shift bitposmaskB instead of using JU_BITMAPTESTB or +// JU_BITPOSMASKB, but this shift has knowledge of bit order that really should +// be encapsulated in a header file. + +#define BRANCHB_CHECKBIT(OpLeastDigits) \ + if (! (JU_JBB_BITMAP(Pjbb, subexp) & bitposmaskB)) /* absent JP */ \ + SET_AND_RETURN(OpLeastDigits, digit, digits) + +#define BRANCHB_CHECKJPFULL(OpLeastDigits) \ + if (! JPFULL(Pjp)) \ + SET_AND_CONTINUE(OpLeastDigits, digit, digits) + +#define BRANCHB_STARTSUBEXP(OpLeastDigits) \ + if (! JU_JBB_BITMAP(Pjbb, subexp)) /* empty subexpanse, shortcut */ \ + SET_AND_RETURN(OpLeastDigits, digit, digits) \ + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) RET_CORRUPT + +#ifdef JUDYPREV + + --digit; // skip initial digit. + bitposmaskB >>= 1; // see TBD above. + +BranchBNextSubexp: // return here to check next bitmap subexpanse. + + while (bitposmaskB) // more bits to check in subexp. + { + BRANCHB_CHECKBIT(SETLEASTDIGITS_D); + --Pjp; // previous in subarray. + BRANCHB_CHECKJPFULL(SETLEASTDIGITS_D); + assert(digit >= 0); + --digit; + bitposmaskB >>= 1; + } + + if (subexp-- > 0) // more subexpanses. + { + BRANCHB_STARTSUBEXP(SETLEASTDIGITS_D); + Pjp += SEARCHBITMAPMAXB(JU_JBB_BITMAP(Pjbb, subexp)) + 1; + bitposmaskB = (1U << (cJU_BITSPERSUBEXPB - 1)); + goto BranchBNextSubexp; + } + +#else // JUDYNEXT + + ++digit; // skip initial digit. + bitposmaskB <<= 1; // note: BITMAPB_t. + +BranchBNextSubexp: // return here to check next bitmap subexpanse. + + while (bitposmaskB) // more bits to check in subexp. + { + BRANCHB_CHECKBIT(CLEARLEASTDIGITS_D); + ++Pjp; // previous in subarray. + BRANCHB_CHECKJPFULL(CLEARLEASTDIGITS_D); + assert(digit < cJU_SUBEXPPERSTATE); + ++digit; + bitposmaskB <<= 1; // note: BITMAPB_t. + } + + if (++subexp < cJU_NUMSUBEXPB) // more subexpanses. + { + BRANCHB_STARTSUBEXP(CLEARLEASTDIGITS_D); + --Pjp; // pre-decrement. + bitposmaskB = 1; + goto BranchBNextSubexp; + } + +#endif // JUDYNEXT + +// BranchB secondary dead end, no non-full previous/next JP: + + SMRESTART(digits); + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH: +// +// Check Decode bytes, if any, in the current JP, then search for a JP for the +// next digit in Index. + + case cJU_JPBRANCH_U2: CHECKDCD(2); SMPREPB2(SMBranchU); + case cJU_JPBRANCH_U3: CHECKDCD(3); SMPREPB3(SMBranchU); +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: CHECKDCD(4); SMPREPB4(SMBranchU); + case cJU_JPBRANCH_U5: CHECKDCD(5); SMPREPB5(SMBranchU); + case cJU_JPBRANCH_U6: CHECKDCD(6); SMPREPB6(SMBranchU); + case cJU_JPBRANCH_U7: CHECKDCD(7); SMPREPB7(SMBranchU); +#endif + case cJU_JPBRANCH_U: SMPREPBL(SMBranchU); + +// Common code (state-independent) for all cases of uncompressed branches: + +SMBranchU: + Pjbu = P_JBU(Pjp->jp_Addr); + Pjp = (Pjbu->jbu_jp) + digit; + +// Absent JP = null JP for current digit in Index: + + if (JPNULL(JU_JPTYPE(Pjp))) RET_SUCCESS; + +// Non-full JP matches current digit in Index: +// +// Iterate to the subsidiary JP. + + if (! JPFULL(Pjp)) goto SMGetContinue; + +// BranchU primary dead end: +// +// Upon hitting a full JP in a BranchU for the next digit in Index, search +// sideways for a previous/next null or non-full JP. BRANCHU_CHECKJP() is +// shorthand for common code. +// +// Note: The preceding code is separate from this loop because Index does not +// need revising (see SET_AND_*()) if the initial index is an empty index. + +#define BRANCHU_CHECKJP(OpIncDec,OpLeastDigits) \ + { \ + OpIncDec Pjp; \ + \ + if (JPNULL(JU_JPTYPE(Pjp))) \ + SET_AND_RETURN(OpLeastDigits, digit, digits) \ + \ + if (! JPFULL(Pjp)) \ + SET_AND_CONTINUE(OpLeastDigits, digit, digits) \ + } + +#ifdef JUDYPREV + while (digit-- > 0) + BRANCHU_CHECKJP(--, SETLEASTDIGITS_D); +#else + while (++digit < cJU_BRANCHUNUMJPS) + BRANCHU_CHECKJP(++, CLEARLEASTDIGITS_D); +#endif + +// BranchU secondary dead end, no non-full previous/next JP: + + SMRESTART(digits); + + +// ---------------------------------------------------------------------------- +// LINEAR LEAF: +// +// Check Decode bytes, if any, in the current JP, then search the leaf for the +// previous/next empty index starting at Index. Primary leaf dead end is +// hidden within j__udySearchLeaf*Empty*(). In case of secondary leaf dead +// end, restart at the top of the tree. +// +// Note: Pword is the name known to GET*; think of it as Pjlw. + +#define SMLEAFL(cDigits,Func) \ + Pword = (PWord_t) P_JLW(Pjp->jp_Addr); \ + pop0 = JU_JPLEAF_POP0(Pjp); \ + Func(Pword, pop0) + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: CHECKDCD(1); SMLEAFL(1, j__udySearchLeafEmpty1); +#endif + case cJU_JPLEAF2: CHECKDCD(2); SMLEAFL(2, j__udySearchLeafEmpty2); + case cJU_JPLEAF3: CHECKDCD(3); SMLEAFL(3, j__udySearchLeafEmpty3); + +#ifdef JU_64BIT + case cJU_JPLEAF4: CHECKDCD(4); SMLEAFL(4, j__udySearchLeafEmpty4); + case cJU_JPLEAF5: CHECKDCD(5); SMLEAFL(5, j__udySearchLeafEmpty5); + case cJU_JPLEAF6: CHECKDCD(6); SMLEAFL(6, j__udySearchLeafEmpty6); + case cJU_JPLEAF7: CHECKDCD(7); SMLEAFL(7, j__udySearchLeafEmpty7); +#endif + + +// ---------------------------------------------------------------------------- +// BITMAP LEAF: +// +// Check Decode bytes, if any, in the current JP, then search the leaf for the +// previous/next empty index starting at Index. + + case cJU_JPLEAF_B1: + + CHECKDCD(1); + + Pjlb = P_JLB(Pjp->jp_Addr); + digit = JU_DIGITATSTATE(Index, 1); + subexp = digit / cJU_BITSPERSUBEXPL; + bitposmaskL = JU_BITPOSMASKL(digit); + assert(subexp < cJU_NUMSUBEXPL); // falls in expected range. + +// Absent index = no index matches current digit in Index: + +// if (! JU_BITMAPTESTL(Pjlb, digit)) // slower. + if (! (JU_JLB_BITMAP(Pjlb, subexp) & bitposmaskL)) // faster. + RET_SUCCESS; + +// LeafB1 primary dead end: +// +// Upon hitting a valid (non-empty) index in a LeafB1 for the last digit in +// Index, search sideways for a previous/next absent index, first in the +// current bitmap subexpanse, then in lower/higher subexpanses. +// LEAFB1_CHECKBIT() is shorthand for common code to handle one bit in one +// bitmap subexpanse. +// +// Note: The preceding code is separate from this loop because Index does not +// need revising (see SET_AND_*()) if the initial index is an empty index. +// +// TBD: For speed, shift bitposmaskL instead of using JU_BITMAPTESTL or +// JU_BITPOSMASKL, but this shift has knowledge of bit order that really should +// be encapsulated in a header file. + +#define LEAFB1_CHECKBIT(OpLeastDigits) \ + if (! (JU_JLB_BITMAP(Pjlb, subexp) & bitposmaskL)) \ + SET_AND_RETURN(OpLeastDigits, digit, 1) + +#define LEAFB1_STARTSUBEXP(OpLeastDigits) \ + if (! JU_JLB_BITMAP(Pjlb, subexp)) /* empty subexp */ \ + SET_AND_RETURN(OpLeastDigits, digit, 1) + +#ifdef JUDYPREV + + --digit; // skip initial digit. + bitposmaskL >>= 1; // see TBD above. + +LeafB1NextSubexp: // return here to check next bitmap subexpanse. + + while (bitposmaskL) // more bits to check in subexp. + { + LEAFB1_CHECKBIT(SETLEASTDIGITS_D); + assert(digit >= 0); + --digit; + bitposmaskL >>= 1; + } + + if (subexp-- > 0) // more subexpanses. + { + LEAFB1_STARTSUBEXP(SETLEASTDIGITS_D); + bitposmaskL = (1UL << (cJU_BITSPERSUBEXPL - 1)); + goto LeafB1NextSubexp; + } + +#else // JUDYNEXT + + ++digit; // skip initial digit. + bitposmaskL <<= 1; // note: BITMAPL_t. + +LeafB1NextSubexp: // return here to check next bitmap subexpanse. + + while (bitposmaskL) // more bits to check in subexp. + { + LEAFB1_CHECKBIT(CLEARLEASTDIGITS_D); + assert(digit < cJU_SUBEXPPERSTATE); + ++digit; + bitposmaskL <<= 1; // note: BITMAPL_t. + } + + if (++subexp < cJU_NUMSUBEXPL) // more subexpanses. + { + LEAFB1_STARTSUBEXP(CLEARLEASTDIGITS_D); + bitposmaskL = 1; + goto LeafB1NextSubexp; + } + +#endif // JUDYNEXT + +// LeafB1 secondary dead end, no empty index: + + SMRESTART(1); + + +#ifdef JUDY1 +// ---------------------------------------------------------------------------- +// FULL POPULATION: +// +// If the Decode bytes do not match, Index is empty (without modification); +// otherwise restart. + + case cJ1_JPFULLPOPU1: + + CHECKDCD(1); + SMRESTART(1); +#endif + + +// ---------------------------------------------------------------------------- +// IMMEDIATE: +// +// Pop1 = 1 Immediate JPs: +// +// If Index is not in the immediate JP, return success; otherwise check if +// there is an empty index below/above the immediate JPs index, and if so, +// return success with modified Index, else restart. +// +// Note: Doug says its fast enough to calculate the index size (digits) in +// the following; no need to set it separately for each case. + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: +#endif + if (JU_JPDCDPOP0(Pjp) != JU_TRIMTODCDSIZE(Index)) RET_SUCCESS; + digits = JU_JPTYPE(Pjp) - cJU_JPIMMED_1_01 + 1; + LEAF_EDGE(JU_LEASTBYTES(JU_JPDCDPOP0(Pjp), digits), digits); + +// Immediate JPs with Pop1 > 1: + +#define IMM_MULTI(Func,BaseJPType) \ + JUDY1CODE(Pword = (PWord_t) (Pjp->jp_1Index);) \ + JUDYLCODE(Pword = (PWord_t) (Pjp->jp_LIndex);) \ + Func(Pword, JU_JPTYPE(Pjp) - (BaseJPType) + 1) + + case cJU_JPIMMED_1_02: + case cJU_JPIMMED_1_03: +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: + case cJU_JPIMMED_1_07: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: + case cJ1_JPIMMED_1_09: + case cJ1_JPIMMED_1_10: + case cJ1_JPIMMED_1_11: + case cJ1_JPIMMED_1_12: + case cJ1_JPIMMED_1_13: + case cJ1_JPIMMED_1_14: + case cJ1_JPIMMED_1_15: +#endif + IMM_MULTI(j__udySearchLeafEmpty1, cJU_JPIMMED_1_02); + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: + case cJU_JPIMMED_2_03: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: + case cJ1_JPIMMED_2_05: + case cJ1_JPIMMED_2_06: + case cJ1_JPIMMED_2_07: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + IMM_MULTI(j__udySearchLeafEmpty2, cJU_JPIMMED_2_02); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: + case cJ1_JPIMMED_3_04: + case cJ1_JPIMMED_3_05: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + IMM_MULTI(j__udySearchLeafEmpty3, cJU_JPIMMED_3_02); +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_4_02: + case cJ1_JPIMMED_4_03: + IMM_MULTI(j__udySearchLeafEmpty4, cJ1_JPIMMED_4_02); + + case cJ1_JPIMMED_5_02: + case cJ1_JPIMMED_5_03: + IMM_MULTI(j__udySearchLeafEmpty5, cJ1_JPIMMED_5_02); + + case cJ1_JPIMMED_6_02: + IMM_MULTI(j__udySearchLeafEmpty6, cJ1_JPIMMED_6_02); + + case cJ1_JPIMMED_7_02: + IMM_MULTI(j__udySearchLeafEmpty7, cJ1_JPIMMED_7_02); +#endif + + +// ---------------------------------------------------------------------------- +// INVALID JP TYPE: + + default: RET_CORRUPT; + + } // SMGet switch. + +} // Judy1PrevEmpty() / Judy1NextEmpty() / JudyLPrevEmpty() / JudyLNextEmpty() diff --git a/libnetdata/libjudy/src/JudyL/JudyLPrev.c b/libnetdata/libjudy/src/JudyL/JudyLPrev.c new file mode 100644 index 0000000..4bcdccf --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLPrev.c @@ -0,0 +1,1890 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.54 $ $Source: /judy/src/JudyCommon/JudyPrevNext.c $ +// +// Judy*Prev() and Judy*Next() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// Compile with -DJUDYNEXT for the Judy*Next() function; otherwise defaults to +// Judy*Prev(). + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifndef JUDYNEXT +#ifndef JUDYPREV +#define JUDYPREV 1 // neither set => use default. +#endif +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + + +// **************************************************************************** +// J U D Y 1 P R E V +// J U D Y 1 N E X T +// J U D Y L P R E V +// J U D Y L N E X T +// +// See the manual entry for the API. +// +// OVERVIEW OF Judy*Prev(): +// +// Use a reentrant switch statement (state machine, SM1 = "get") to decode the +// callers *PIndex-1, starting with the (PArray), through branches, if +// any, down to an immediate or a leaf. Look for *PIndex-1 in that leaf, and +// if found, return it. +// +// A dead end is either a branch that does not contain a JP for the appropriate +// digit in *PIndex-1, or a leaf that does not contain the undecoded digits of +// *PIndex-1. Upon reaching a dead end, backtrack through the leaf/branches +// that were just traversed, using a list (history) of parent JPs that is built +// while going forward in SM1Get. Start with the current leaf or branch. In a +// backtracked leaf, look for an Index less than *PIndex-1. In each +// backtracked branch, look "sideways" for the next JP, if any, lower than the +// one for the digit (from *PIndex-1) that was previously decoded. While +// backtracking, if a leaf has no previous Index or a branch has no lower JP, +// go to its parent branch in turn. Upon reaching the JRP, return failure, "no +// previous Index". The backtrack process is sufficiently different from +// SM1Get to merit its own separate reentrant switch statement (SM2 = +// "backtrack"). +// +// While backtracking, upon finding a lower JP in a branch, there is certain to +// be a "prev" Index under that JP (unless the Judy array is corrupt). +// Traverse forward again, this time taking the last (highest, right-most) JP +// in each branch, and the last (highest) Index upon reaching an immediate or a +// leaf. This traversal is sufficiently different from SM1Get and SM2Backtrack +// to merit its own separate reentrant switch statement (SM3 = "findlimit"). +// +// "Decode" bytes in JPs complicate this process a little. In SM1Get, when a +// JP is a narrow pointer, that is, when states are skipped (so the skipped +// digits are stored in jp_DcdPopO), compare the relevant digits to the same +// digits in *PIndex-1. If they are EQUAL, proceed in SM1Get as before. If +// jp_DcdPopOs digits are GREATER, treat the JP as a dead end and proceed in +// SM2Backtrack. If jp_DcdPopOs digits are LESS, treat the JP as if it had +// just been found during a backtrack and proceed directly in SM3Findlimit. +// +// Note that Decode bytes can be ignored in SM3Findlimit; they dont matter. +// Also note that in practice the Decode bytes are routinely compared with +// *PIndex-1 because thats simpler and no slower than first testing for +// narrowness. +// +// Decode bytes also make it unnecessary to construct the Index to return (the +// revised *PIndex) during the search. This step is deferred until finding an +// Index during backtrack or findlimit, before returning it. The first digit +// of *PIndex is derived (saved) based on which JP is used in a JRP branch. +// The remaining digits are obtained from the jp_DcdPopO field in the JP (if +// any) above the immediate or leaf containing the found (prev) Index, plus the +// remaining digit(s) in the immediate or leaf itself. In the case of a LEAFW, +// the Index to return is found directly in the leaf. +// +// Note: Theoretically, as described above, upon reaching a dead end, SM1Get +// passes control to SM2Backtrack to look sideways, even in a leaf. Actually +// its a little more efficient for the SM1Get leaf cases to shortcut this and +// take care of the sideways searches themselves. Hence the history list only +// contains branch JPs, and SM2Backtrack only handles branches. In fact, even +// the branch handling cases in SM1Get do some shortcutting (sideways +// searching) to avoid pushing history and calling SM2Backtrack unnecessarily. +// +// Upon reaching an Index to return after backtracking, *PIndex must be +// modified to the found Index. In principle this could be done by building +// the Index from a saved rootdigit (in the top branch) plus the Dcd bytes from +// the parent JP plus the appropriate Index bytes from the leaf. However, +// Immediates are difficult because their parent JPs lack one (last) digit. So +// instead just build the *PIndex to return "top down" while backtracking and +// findlimiting. +// +// This function is written iteratively for speed, rather than recursively. +// +// CAVEATS: +// +// Why use a backtrack list (history stack), since it has finite size? The +// size is small for Judy on both 32-bit and 64-bit systems, and a list (really +// just an array) is fast to maintain and use. Other alternatives include +// doing a lookahead (lookaside) in each branch while traversing forward +// (decoding), and restarting from the top upon a dead end. +// +// A lookahead means noting the last branch traversed which contained a +// non-null JP lower than the one specified by a digit in *PIndex-1, and +// returning to that point for SM3Findlimit. This seems like a good idea, and +// should be pretty cheap for linear and bitmap branches, but it could result +// in up to 31 unnecessary additional cache line fills (in extreme cases) for +// every uncompressed branch traversed. We have considered means of attaching +// to or hiding within an uncompressed branch (in null JPs) a "cache line map" +// or other structure, such as an offset to the next non-null JP, that would +// speed this up, but it seems unnecessary merely to avoid having a +// finite-length list (array). (If JudySL is ever made "native", the finite +// list length will be an issue.) +// +// Restarting at the top of the Judy array after a dead end requires a careful +// modification of *PIndex-1 to decrement the digit for the parent branch and +// set the remaining lower digits to all 1s. This must be repeated each time a +// parent branch contains another dead end, so even though it should all happen +// in cache, the CPU time can be excessive. (For JudySL or an equivalent +// "infinitely deep" Judy array, consider a hybrid of a large, finite, +// "circular" list and a restart-at-top when the list is backtracked to +// exhaustion.) +// +// Why search for *PIndex-1 instead of *PIndex during SM1Get? In rare +// instances this prevents an unnecessary decode down the wrong path followed +// by a backtrack; its pretty cheap to set up initially; and it means the +// SM1Get machine can simply return if/when it finds that Index. +// +// TBD: Wed like to enhance this function to make successive searches faster. +// This would require saving some previous state, including the previous Index +// returned, and in which leaf it was found. If the next call is for the same +// Index and the array has not been modified, start at the same leaf. This +// should be much easier to implement since this is iterative rather than +// recursive code. +// +// VARIATIONS FOR Judy*Next(): +// +// The Judy*Next() code is nearly a perfect mirror of the Judy*Prev() code. +// See the Judy*Prev() overview comments, and mentally switch the following: +// +// - "*PIndex-1" => "*PIndex+1" +// - "less than" => "greater than" +// - "lower" => "higher" +// - "lowest" => "highest" +// - "next-left" => "next-right" +// - "right-most" => "left-most" +// +// Note: SM3Findlimit could be called SM3Findmax/SM3Findmin, but a common name +// for both Prev and Next means many fewer ifdefs in this code. +// +// TBD: Currently this code traverses a JP whether its expanse is partially or +// completely full (populated). For Judy1 (only), since there is no value area +// needed, consider shortcutting to a "success" return upon encountering a full +// JP in SM1Get (or even SM3Findlimit?) A full JP looks like this: +// +// (((JU_JPDCDPOP0(Pjp) ^ cJU_ALLONES) & cJU_POP0MASK(cLevel)) == 0) + +#ifdef JUDY1 +#ifdef JUDYPREV +FUNCTION int Judy1Prev +#else +FUNCTION int Judy1Next +#endif +#else +#ifdef JUDYPREV +FUNCTION PPvoid_t JudyLPrev +#else +FUNCTION PPvoid_t JudyLNext +#endif +#endif + ( + Pcvoid_t PArray, // Judy array to search. + Word_t * PIndex, // starting point and result. + PJError_t PJError // optional, for returning error info. + ) +{ + Pjp_t Pjp, Pjp2; // current JPs. + Pjbl_t Pjbl; // Pjp->jp_Addr masked and cast to types: + Pjbb_t Pjbb; + Pjbu_t Pjbu; + +// Note: The following initialization is not strictly required but it makes +// gcc -Wall happy because there is an "impossible" path from Immed handling to +// SM1LeafLImm code that looks like Pjll might be used before set: + + Pjll_t Pjll = (Pjll_t) NULL; + Word_t state; // current state in SM. + Word_t digit; // next digit to decode from Index. + +// Note: The following initialization is not strictly required but it makes +// gcc -Wall happy because there is an "impossible" path from Immed handling to +// SM1LeafLImm code (for JudyL & JudyPrev only) that looks like pop1 might be +// used before set: + +#if (defined(JUDYL) && defined(JUDYPREV)) + Word_t pop1 = 0; // in a leaf. +#else + Word_t pop1; // in a leaf. +#endif + int offset; // linear branch/leaf, from j__udySearchLeaf*(). + int subexp; // subexpanse in a bitmap branch. + Word_t bitposmask; // bit in bitmap for Index. + +// History for SM2Backtrack: +// +// For a given histnum, APjphist[histnum] is a parent JP that points to a +// branch, and Aoffhist[histnum] is the offset of the NEXT JP in the branch to +// which the parent JP points. The meaning of Aoffhist[histnum] depends on the +// type of branch to which the parent JP points: +// +// Linear: Offset of the next JP in the JP list. +// +// Bitmap: Which subexpanse, plus the offset of the next JP in the +// subexpanses JP list (to avoid bit-counting again), plus for Judy*Next(), +// hidden one byte to the left, which digit, because Judy*Next() also needs +// this. +// +// Uncompressed: Digit, which is actually the offset of the JP in the branch. +// +// Note: Only branch JPs are stored in APjphist[] because, as explained +// earlier, SM1Get shortcuts sideways searches in leaves (and even in branches +// in some cases), so SM2Backtrack only handles branches. + +#define HISTNUMMAX cJU_ROOTSTATE // maximum branches traversable. + Pjp_t APjphist[HISTNUMMAX]; // list of branch JPs traversed. + int Aoffhist[HISTNUMMAX]; // list of next JP offsets; see above. + int histnum = 0; // number of JPs now in list. + + +// ---------------------------------------------------------------------------- +// M A C R O S +// +// These are intended to make the code a bit more readable and less redundant. + + +// "PUSH" AND "POP" Pjp AND offset ON HISTORY STACKS: +// +// Note: Ensure a corrupt Judy array does not overflow *hist[]. Meanwhile, +// underflowing *hist[] simply means theres no more room to backtrack => +// "no previous/next Index". + +#define HISTPUSH(Pjp,Offset) \ + APjphist[histnum] = (Pjp); \ + Aoffhist[histnum] = (Offset); \ + \ + if (++histnum >= HISTNUMMAX) \ + { \ + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT) \ + JUDY1CODE(return(JERRI );) \ + JUDYLCODE(return(PPJERR);) \ + } + +#define HISTPOP(Pjp,Offset) \ + if ((histnum--) < 1) JU_RET_NOTFOUND; \ + (Pjp) = APjphist[histnum]; \ + (Offset) = Aoffhist[histnum] + +// How to pack/unpack Aoffhist[] values for bitmap branches: + +#ifdef JUDYPREV + +#define HISTPUSHBOFF(Subexp,Offset,Digit) \ + (((Subexp) * cJU_BITSPERSUBEXPB) | (Offset)) + +#define HISTPOPBOFF(Subexp,Offset,Digit) \ + (Subexp) = (Offset) / cJU_BITSPERSUBEXPB; \ + (Offset) %= cJU_BITSPERSUBEXPB +#else + +#define HISTPUSHBOFF(Subexp,Offset,Digit) \ + (((Digit) << cJU_BITSPERBYTE) \ + | ((Subexp) * cJU_BITSPERSUBEXPB) | (Offset)) + +#define HISTPOPBOFF(Subexp,Offset,Digit) \ + (Digit) = (Offset) >> cJU_BITSPERBYTE; \ + (Subexp) = ((Offset) & JU_LEASTBYTESMASK(1)) / cJU_BITSPERSUBEXPB; \ + (Offset) %= cJU_BITSPERSUBEXPB +#endif + + +// CHECK FOR NULL JP: + +#define JPNULL(Type) (((Type) >= cJU_JPNULL1) && ((Type) <= cJU_JPNULLMAX)) + + +// SEARCH A BITMAP: +// +// This is a weak analog of j__udySearchLeaf*() for bitmaps. Return the actual +// or next-left position, base 0, of Digit in the single uint32_t bitmap, also +// given a Bitposmask for Digit. +// +// Unlike j__udySearchLeaf*(), the offset is not returned bit-complemented if +// Digits bit is unset, because the caller can check the bitmap themselves to +// determine that. Also, if Digits bit is unset, the returned offset is to +// the next-left JP (including -1), not to the "ideal" position for the Index = +// next-right JP. +// +// Shortcut and skip calling j__udyCountBits*() if the bitmap is full, in which +// case (Digit % cJU_BITSPERSUBEXP*) itself is the base-0 offset. +// +// TBD for Judy*Next(): Should this return next-right instead of next-left? +// That is, +1 from current value? Maybe not, if Digits bit IS set, +1 would +// be wrong. + +#define SEARCHBITMAPB(Bitmap,Digit,Bitposmask) \ + (((Bitmap) == cJU_FULLBITMAPB) ? (Digit % cJU_BITSPERSUBEXPB) : \ + j__udyCountBitsB((Bitmap) & JU_MASKLOWERINC(Bitposmask)) - 1) + +#define SEARCHBITMAPL(Bitmap,Digit,Bitposmask) \ + (((Bitmap) == cJU_FULLBITMAPL) ? (Digit % cJU_BITSPERSUBEXPL) : \ + j__udyCountBitsL((Bitmap) & JU_MASKLOWERINC(Bitposmask)) - 1) + +#ifdef JUDYPREV +// Equivalent to search for the highest offset in Bitmap: + +#define SEARCHBITMAPMAXB(Bitmap) \ + (((Bitmap) == cJU_FULLBITMAPB) ? cJU_BITSPERSUBEXPB - 1 : \ + j__udyCountBitsB(Bitmap) - 1) + +#define SEARCHBITMAPMAXL(Bitmap) \ + (((Bitmap) == cJU_FULLBITMAPL) ? cJU_BITSPERSUBEXPL - 1 : \ + j__udyCountBitsL(Bitmap) - 1) +#endif + + +// CHECK DECODE BYTES: +// +// Check Decode bytes in a JP against the equivalent portion of *PIndex. If +// *PIndex is lower (for Judy*Prev()) or higher (for Judy*Next()), this JP is a +// dead end (the same as if it had been absent in a linear or bitmap branch or +// null in an uncompressed branch), enter SM2Backtrack; otherwise enter +// SM3Findlimit to find the highest/lowest Index under this JP, as if the code +// had already backtracked to this JP. + +#ifdef JUDYPREV +#define CDcmp__ < +#else +#define CDcmp__ > +#endif + +#define CHECKDCD(cState) \ + if (JU_DCDNOTMATCHINDEX(*PIndex, Pjp, cState)) \ + { \ + if ((*PIndex & cJU_DCDMASK(cState)) \ + CDcmp__(JU_JPDCDPOP0(Pjp) & cJU_DCDMASK(cState))) \ + { \ + goto SM2Backtrack; \ + } \ + goto SM3Findlimit; \ + } + + +// PREPARE TO HANDLE A LEAFW OR JRP BRANCH IN SM1: +// +// Extract a state-dependent digit from Index in a "constant" way, then jump to +// common code for multiple cases. + +#define SM1PREPB(cState,Next) \ + state = (cState); \ + digit = JU_DIGITATSTATE(*PIndex, cState); \ + goto Next + + +// PREPARE TO HANDLE A LEAFW OR JRP BRANCH IN SM3: +// +// Optionally save Dcd bytes into *PIndex, then save state and jump to common +// code for multiple cases. + +#define SM3PREPB_DCD(cState,Next) \ + JU_SETDCD(*PIndex, Pjp, cState); \ + SM3PREPB(cState,Next) + +#define SM3PREPB(cState,Next) state = (cState); goto Next + + +// ---------------------------------------------------------------------------- +// CHECK FOR SHORTCUTS: +// +// Error out if PIndex is null. Execute JU_RET_NOTFOUND if the Judy array is +// empty or *PIndex is already the minimum/maximum Index possible. +// +// Note: As documented, in case of failure *PIndex may be modified. + + if (PIndex == (PWord_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + +#ifdef JUDYPREV + if ((PArray == (Pvoid_t) NULL) || ((*PIndex)-- == 0)) +#else + if ((PArray == (Pvoid_t) NULL) || ((*PIndex)++ == cJU_ALLONES)) +#endif + JU_RET_NOTFOUND; + + +// HANDLE JRP: +// +// Before even entering SM1Get, check the JRP type. For JRP branches, traverse +// the JPM; handle LEAFW leaves directly; but look for the most common cases +// first. + +// ROOT-STATE LEAF that starts with a Pop0 word; just look within the leaf: +// +// If *PIndex is in the leaf, return it; otherwise return the Index, if any, +// below where it would belong. + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + pop1 = Pjlw[0] + 1; + + if ((offset = j__udySearchLeafW(Pjlw + 1, pop1, *PIndex)) + >= 0) // Index is present. + { + assert(offset < pop1); // in expected range. + JU_RET_FOUND_LEAFW(Pjlw, pop1, offset); // *PIndex is set. + } + +#ifdef JUDYPREV + if ((offset = ~offset) == 0) // no next-left Index. +#else + if ((offset = ~offset) >= pop1) // no next-right Index. +#endif + JU_RET_NOTFOUND; + + assert(offset <= pop1); // valid result. + +#ifdef JUDYPREV + *PIndex = Pjlw[offset--]; // next-left Index, base 1. +#else + *PIndex = Pjlw[offset + 1]; // next-right Index, base 1. +#endif + JU_RET_FOUND_LEAFW(Pjlw, pop1, offset); // base 0. + + } + else // JRP BRANCH + { + Pjpm_t Pjpm = P_JPM(PArray); + Pjp = &(Pjpm->jpm_JP); + +// goto SM1Get; + } + +// ============================================================================ +// STATE MACHINE 1 -- GET INDEX: +// +// Search for *PIndex (already decremented/incremented so as to be inclusive). +// If found, return it. Otherwise in theory hand off to SM2Backtrack or +// SM3Findlimit, but in practice "shortcut" by first sideways searching the +// current branch or leaf upon hitting a dead end. During sideways search, +// modify *PIndex to a new path taken. +// +// ENTRY: Pjp points to next JP to interpret, whose Decode bytes have not yet +// been checked. This JP is not yet listed in history. +// +// Note: Check Decode bytes at the start of each loop, not after looking up a +// new JP, so its easy to do constant shifts/masks, although this requires +// cautious handling of Pjp, offset, and *hist[] for correct entry to +// SM2Backtrack. +// +// EXIT: Return, or branch to SM2Backtrack or SM3Findlimit with correct +// interface, as described elsewhere. +// +// WARNING: For run-time efficiency the following cases replicate code with +// varying constants, rather than using common code with variable values! + +SM1Get: // return here for next branch/leaf. + + switch (JU_JPTYPE(Pjp)) + { + + +// ---------------------------------------------------------------------------- +// LINEAR BRANCH: +// +// Check Decode bytes, if any, in the current JP, then search for a JP for the +// next digit in *PIndex. + + case cJU_JPBRANCH_L2: CHECKDCD(2); SM1PREPB(2, SM1BranchL); + case cJU_JPBRANCH_L3: CHECKDCD(3); SM1PREPB(3, SM1BranchL); +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: CHECKDCD(4); SM1PREPB(4, SM1BranchL); + case cJU_JPBRANCH_L5: CHECKDCD(5); SM1PREPB(5, SM1BranchL); + case cJU_JPBRANCH_L6: CHECKDCD(6); SM1PREPB(6, SM1BranchL); + case cJU_JPBRANCH_L7: CHECKDCD(7); SM1PREPB(7, SM1BranchL); +#endif + case cJU_JPBRANCH_L: SM1PREPB(cJU_ROOTSTATE, SM1BranchL); + +// Common code (state-independent) for all cases of linear branches: + +SM1BranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + +// Found JP matching current digit in *PIndex; record parent JP and the next +// JPs offset, and iterate to the next JP: + + if ((offset = j__udySearchLeaf1((Pjll_t) (Pjbl->jbl_Expanse), + Pjbl->jbl_NumJPs, digit)) >= 0) + { + HISTPUSH(Pjp, offset); + Pjp = (Pjbl->jbl_jp) + offset; + goto SM1Get; + } + +// Dead end, no JP in BranchL for next digit in *PIndex: +// +// Get the ideal location of digits JP, and if theres no next-left/right JP +// in the BranchL, shortcut and start backtracking one level up; ignore the +// current Pjp because it points to a BranchL with no next-left/right JP. + +#ifdef JUDYPREV + if ((offset = (~offset) - 1) < 0) // no next-left JP in BranchL. +#else + if ((offset = (~offset)) >= Pjbl->jbl_NumJPs) // no next-right. +#endif + goto SM2Backtrack; + +// Theres a next-left/right JP in the current BranchL; save its digit in +// *PIndex and shortcut to SM3Findlimit: + + JU_SETDIGIT(*PIndex, Pjbl->jbl_Expanse[offset], state); + Pjp = (Pjbl->jbl_jp) + offset; + goto SM3Findlimit; + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH: +// +// Check Decode bytes, if any, in the current JP, then look for a JP for the +// next digit in *PIndex. + + case cJU_JPBRANCH_B2: CHECKDCD(2); SM1PREPB(2, SM1BranchB); + case cJU_JPBRANCH_B3: CHECKDCD(3); SM1PREPB(3, SM1BranchB); +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: CHECKDCD(4); SM1PREPB(4, SM1BranchB); + case cJU_JPBRANCH_B5: CHECKDCD(5); SM1PREPB(5, SM1BranchB); + case cJU_JPBRANCH_B6: CHECKDCD(6); SM1PREPB(6, SM1BranchB); + case cJU_JPBRANCH_B7: CHECKDCD(7); SM1PREPB(7, SM1BranchB); +#endif + case cJU_JPBRANCH_B: SM1PREPB(cJU_ROOTSTATE, SM1BranchB); + +// Common code (state-independent) for all cases of bitmap branches: + +SM1BranchB: + Pjbb = P_JBB(Pjp->jp_Addr); + +// Locate the digits JP in the subexpanse list, if present, otherwise the +// offset of the next-left JP, if any: + + subexp = digit / cJU_BITSPERSUBEXPB; + assert(subexp < cJU_NUMSUBEXPB); // falls in expected range. + bitposmask = JU_BITPOSMASKB(digit); + offset = SEARCHBITMAPB(JU_JBB_BITMAP(Pjbb, subexp), digit, + bitposmask); + // right range: + assert((offset >= -1) && (offset < (int) cJU_BITSPERSUBEXPB)); + +// Found JP matching current digit in *PIndex: +// +// Record the parent JP and the next JPs offset; and iterate to the next JP. + +// if (JU_BITMAPTESTB(Pjbb, digit)) // slower. + if (JU_JBB_BITMAP(Pjbb, subexp) & bitposmask) // faster. + { + // not negative since at least one bit is set: + assert(offset >= 0); + + HISTPUSH(Pjp, HISTPUSHBOFF(subexp, offset, digit)); + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + Pjp += offset; + goto SM1Get; // iterate to next JP. + } + +// Dead end, no JP in BranchB for next digit in *PIndex: +// +// If theres a next-left/right JP in the current BranchB, shortcut to +// SM3Findlimit. Note: offset is already set to the correct value for the +// next-left/right JP. + +#ifdef JUDYPREV + if (offset >= 0) // next-left JP is in this subexpanse. + goto SM1BranchBFindlimit; + + while (--subexp >= 0) // search next-left subexpanses. +#else + if (JU_JBB_BITMAP(Pjbb, subexp) & JU_MASKHIGHEREXC(bitposmask)) + { + ++offset; // next-left => next-right. + goto SM1BranchBFindlimit; + } + + while (++subexp < cJU_NUMSUBEXPB) // search next-right subexps. +#endif + { + if (! JU_JBB_PJP(Pjbb, subexp)) continue; // empty subexpanse. + +#ifdef JUDYPREV + offset = SEARCHBITMAPMAXB(JU_JBB_BITMAP(Pjbb, subexp)); + // expected range: + assert((offset >= 0) && (offset < cJU_BITSPERSUBEXPB)); +#else + offset = 0; +#endif + +// Save the next-left/right JPs digit in *PIndex: + +SM1BranchBFindlimit: + JU_BITMAPDIGITB(digit, subexp, JU_JBB_BITMAP(Pjbb, subexp), + offset); + JU_SETDIGIT(*PIndex, digit, state); + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + Pjp += offset; + goto SM3Findlimit; + } + +// Theres no next-left/right JP in the BranchB: +// +// Shortcut and start backtracking one level up; ignore the current Pjp because +// it points to a BranchB with no next-left/right JP. + + goto SM2Backtrack; + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH: +// +// Check Decode bytes, if any, in the current JP, then look for a JP for the +// next digit in *PIndex. + + case cJU_JPBRANCH_U2: CHECKDCD(2); SM1PREPB(2, SM1BranchU); + case cJU_JPBRANCH_U3: CHECKDCD(3); SM1PREPB(3, SM1BranchU); +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: CHECKDCD(4); SM1PREPB(4, SM1BranchU); + case cJU_JPBRANCH_U5: CHECKDCD(5); SM1PREPB(5, SM1BranchU); + case cJU_JPBRANCH_U6: CHECKDCD(6); SM1PREPB(6, SM1BranchU); + case cJU_JPBRANCH_U7: CHECKDCD(7); SM1PREPB(7, SM1BranchU); +#endif + case cJU_JPBRANCH_U: SM1PREPB(cJU_ROOTSTATE, SM1BranchU); + +// Common code (state-independent) for all cases of uncompressed branches: + +SM1BranchU: + Pjbu = P_JBU(Pjp->jp_Addr); + Pjp2 = (Pjbu->jbu_jp) + digit; + +// Found JP matching current digit in *PIndex: +// +// Record the parent JP and the next JPs digit, and iterate to the next JP. +// +// TBD: Instead of this, just goto SM1Get, and add cJU_JPNULL* cases to the +// SM1Get state machine? Then backtrack? However, it means you cant detect +// an inappropriate cJU_JPNULL*, when it occurs in other than a BranchU, and +// return JU_RET_CORRUPT. + + if (! JPNULL(JU_JPTYPE(Pjp2))) // digit has a JP. + { + HISTPUSH(Pjp, digit); + Pjp = Pjp2; + goto SM1Get; + } + +// Dead end, no JP in BranchU for next digit in *PIndex: +// +// Search for a next-left/right JP in the current BranchU, and if one is found, +// save its digit in *PIndex and shortcut to SM3Findlimit: + +#ifdef JUDYPREV + while (digit >= 1) + { + Pjp = (Pjbu->jbu_jp) + (--digit); +#else + while (digit < cJU_BRANCHUNUMJPS - 1) + { + Pjp = (Pjbu->jbu_jp) + (++digit); +#endif + if (JPNULL(JU_JPTYPE(Pjp))) continue; + + JU_SETDIGIT(*PIndex, digit, state); + goto SM3Findlimit; + } + +// Theres no next-left/right JP in the BranchU: +// +// Shortcut and start backtracking one level up; ignore the current Pjp because +// it points to a BranchU with no next-left/right JP. + + goto SM2Backtrack; + + +// ---------------------------------------------------------------------------- +// LINEAR LEAF: +// +// Check Decode bytes, if any, in the current JP, then search the leaf for +// *PIndex. + +#define SM1LEAFL(Func) \ + Pjll = P_JLL(Pjp->jp_Addr); \ + pop1 = JU_JPLEAF_POP0(Pjp) + 1; \ + offset = Func(Pjll, pop1, *PIndex); \ + goto SM1LeafLImm + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: CHECKDCD(1); SM1LEAFL(j__udySearchLeaf1); +#endif + case cJU_JPLEAF2: CHECKDCD(2); SM1LEAFL(j__udySearchLeaf2); + case cJU_JPLEAF3: CHECKDCD(3); SM1LEAFL(j__udySearchLeaf3); + +#ifdef JU_64BIT + case cJU_JPLEAF4: CHECKDCD(4); SM1LEAFL(j__udySearchLeaf4); + case cJU_JPLEAF5: CHECKDCD(5); SM1LEAFL(j__udySearchLeaf5); + case cJU_JPLEAF6: CHECKDCD(6); SM1LEAFL(j__udySearchLeaf6); + case cJU_JPLEAF7: CHECKDCD(7); SM1LEAFL(j__udySearchLeaf7); +#endif + +// Common code (state-independent) for all cases of linear leaves and +// immediates: + +SM1LeafLImm: + if (offset >= 0) // *PIndex is in LeafL / Immed. +#ifdef JUDY1 + JU_RET_FOUND; +#else + { // JudyL is trickier... + switch (JU_JPTYPE(Pjp)) + { +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: JU_RET_FOUND_LEAF1(Pjll, pop1, offset); +#endif + case cJU_JPLEAF2: JU_RET_FOUND_LEAF2(Pjll, pop1, offset); + case cJU_JPLEAF3: JU_RET_FOUND_LEAF3(Pjll, pop1, offset); +#ifdef JU_64BIT + case cJU_JPLEAF4: JU_RET_FOUND_LEAF4(Pjll, pop1, offset); + case cJU_JPLEAF5: JU_RET_FOUND_LEAF5(Pjll, pop1, offset); + case cJU_JPLEAF6: JU_RET_FOUND_LEAF6(Pjll, pop1, offset); + case cJU_JPLEAF7: JU_RET_FOUND_LEAF7(Pjll, pop1, offset); +#endif + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: +#endif + JU_RET_FOUND_IMM_01(Pjp); + + case cJU_JPIMMED_1_02: + case cJU_JPIMMED_1_03: +#ifdef JU_64BIT + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: + case cJU_JPIMMED_1_07: + case cJU_JPIMMED_2_02: + case cJU_JPIMMED_2_03: + case cJU_JPIMMED_3_02: +#endif + JU_RET_FOUND_IMM(Pjp, offset); + } + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // impossible? + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // found *PIndex + +#endif // JUDYL + +// Dead end, no Index in LeafL / Immed for remaining digit(s) in *PIndex: +// +// Get the ideal location of Index, and if theres no next-left/right Index in +// the LeafL / Immed, shortcut and start backtracking one level up; ignore the +// current Pjp because it points to a LeafL / Immed with no next-left/right +// Index. + +#ifdef JUDYPREV + if ((offset = (~offset) - 1) < 0) // no next-left Index. +#else + if ((offset = (~offset)) >= pop1) // no next-right Index. +#endif + goto SM2Backtrack; + +// Theres a next-left/right Index in the current LeafL / Immed; shortcut by +// copying its digit(s) to *PIndex and returning it. +// +// Unfortunately this is pretty hairy, especially avoiding endian issues. +// +// The cJU_JPLEAF* cases are very similar to same-index-size cJU_JPIMMED* cases +// for *_02 and above, but must return differently, at least for JudyL, so +// spell them out separately here at the cost of a little redundant code for +// Judy1. + + switch (JU_JPTYPE(Pjp)) + { +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: + + JU_SETDIGIT1(*PIndex, ((uint8_t *) Pjll)[offset]); + JU_RET_FOUND_LEAF1(Pjll, pop1, offset); +#endif + + case cJU_JPLEAF2: + + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF2(Pjll, pop1, offset); + + case cJU_JPLEAF3: + { + Word_t lsb; + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_LEAF3(Pjll, pop1, offset); + } + +#ifdef JU_64BIT + case cJU_JPLEAF4: + + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF4(Pjll, pop1, offset); + + case cJU_JPLEAF5: + { + Word_t lsb; + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_LEAF5(Pjll, pop1, offset); + } + + case cJU_JPLEAF6: + { + Word_t lsb; + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_LEAF6(Pjll, pop1, offset); + } + + case cJU_JPLEAF7: + { + Word_t lsb; + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_LEAF7(Pjll, pop1, offset); + } + +#endif // JU_64BIT + +#define SET_01(cState) JU_SETDIGITS(*PIndex, JU_JPDCDPOP0(Pjp), cState) + + case cJU_JPIMMED_1_01: SET_01(1); goto SM1Imm_01; + case cJU_JPIMMED_2_01: SET_01(2); goto SM1Imm_01; + case cJU_JPIMMED_3_01: SET_01(3); goto SM1Imm_01; +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: SET_01(4); goto SM1Imm_01; + case cJU_JPIMMED_5_01: SET_01(5); goto SM1Imm_01; + case cJU_JPIMMED_6_01: SET_01(6); goto SM1Imm_01; + case cJU_JPIMMED_7_01: SET_01(7); goto SM1Imm_01; +#endif +SM1Imm_01: JU_RET_FOUND_IMM_01(Pjp); + +// Shorthand for where to find start of Index bytes array: + +#ifdef JUDY1 +#define PJI (Pjp->jp_1Index) +#else +#define PJI (Pjp->jp_LIndex) +#endif + + case cJU_JPIMMED_1_02: + case cJU_JPIMMED_1_03: +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: + case cJU_JPIMMED_1_07: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: + case cJ1_JPIMMED_1_09: + case cJ1_JPIMMED_1_10: + case cJ1_JPIMMED_1_11: + case cJ1_JPIMMED_1_12: + case cJ1_JPIMMED_1_13: + case cJ1_JPIMMED_1_14: + case cJ1_JPIMMED_1_15: +#endif + JU_SETDIGIT1(*PIndex, ((uint8_t *) PJI)[offset]); + JU_RET_FOUND_IMM(Pjp, offset); + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: + case cJU_JPIMMED_2_03: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: + case cJ1_JPIMMED_2_05: + case cJ1_JPIMMED_2_06: + case cJ1_JPIMMED_2_07: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: + case cJ1_JPIMMED_3_04: + case cJ1_JPIMMED_3_05: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + { + Word_t lsb; + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_4_02: + case cJ1_JPIMMED_4_03: + + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); + + case cJ1_JPIMMED_5_02: + case cJ1_JPIMMED_5_03: + { + Word_t lsb; + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_6_02: + { + Word_t lsb; + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_7_02: + { + Word_t lsb; + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + +#endif // (JUDY1 && JU_64BIT) + + } // switch for not-found *PIndex + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); // impossible? + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + +// ---------------------------------------------------------------------------- +// BITMAP LEAF: +// +// Check Decode bytes, if any, in the current JP, then look in the leaf for +// *PIndex. + + case cJU_JPLEAF_B1: + { + Pjlb_t Pjlb; + CHECKDCD(1); + + Pjlb = P_JLB(Pjp->jp_Addr); + digit = JU_DIGITATSTATE(*PIndex, 1); + subexp = JU_SUBEXPL(digit); + bitposmask = JU_BITPOSMASKL(digit); + assert(subexp < cJU_NUMSUBEXPL); // falls in expected range. + +// *PIndex exists in LeafB1: + +// if (JU_BITMAPTESTL(Pjlb, digit)) // slower. + if (JU_JLB_BITMAP(Pjlb, subexp) & bitposmask) // faster. + { +#ifdef JUDYL // needs offset at this point: + offset = SEARCHBITMAPL(JU_JLB_BITMAP(Pjlb, subexp), digit, bitposmask); +#endif + JU_RET_FOUND_LEAF_B1(Pjlb, subexp, offset); +// == return((PPvoid_t) (P_JV(JL_JLB_PVALUE(Pjlb, subexp)) + (offset))); + } + +// Dead end, no Index in LeafB1 for remaining digit in *PIndex: +// +// If theres a next-left/right Index in the current LeafB1, which for +// Judy*Next() is true if any bits are set for higher Indexes, shortcut by +// returning it. Note: For Judy*Prev(), offset is set here to the correct +// value for the next-left JP. + + offset = SEARCHBITMAPL(JU_JLB_BITMAP(Pjlb, subexp), digit, + bitposmask); + // right range: + assert((offset >= -1) && (offset < (int) cJU_BITSPERSUBEXPL)); + +#ifdef JUDYPREV + if (offset >= 0) // next-left JP is in this subexpanse. + goto SM1LeafB1Findlimit; + + while (--subexp >= 0) // search next-left subexpanses. +#else + if (JU_JLB_BITMAP(Pjlb, subexp) & JU_MASKHIGHEREXC(bitposmask)) + { + ++offset; // next-left => next-right. + goto SM1LeafB1Findlimit; + } + + while (++subexp < cJU_NUMSUBEXPL) // search next-right subexps. +#endif + { + if (! JU_JLB_BITMAP(Pjlb, subexp)) continue; // empty subexp. + +#ifdef JUDYPREV + offset = SEARCHBITMAPMAXL(JU_JLB_BITMAP(Pjlb, subexp)); + // expected range: + assert((offset >= 0) && (offset < (int) cJU_BITSPERSUBEXPL)); +#else + offset = 0; +#endif + +// Save the next-left/right Indexess digit in *PIndex: + +SM1LeafB1Findlimit: + JU_BITMAPDIGITL(digit, subexp, JU_JLB_BITMAP(Pjlb, subexp), offset); + JU_SETDIGIT1(*PIndex, digit); + JU_RET_FOUND_LEAF_B1(Pjlb, subexp, offset); +// == return((PPvoid_t) (P_JV(JL_JLB_PVALUE(Pjlb, subexp)) + (offset))); + } + +// Theres no next-left/right Index in the LeafB1: +// +// Shortcut and start backtracking one level up; ignore the current Pjp because +// it points to a LeafB1 with no next-left/right Index. + + goto SM2Backtrack; + + } // case cJU_JPLEAF_B1 + +#ifdef JUDY1 +// ---------------------------------------------------------------------------- +// FULL POPULATION: +// +// If the Decode bytes match, *PIndex is found (without modification). + + case cJ1_JPFULLPOPU1: + + CHECKDCD(1); + JU_RET_FOUND_FULLPOPU1; +#endif + + +// ---------------------------------------------------------------------------- +// IMMEDIATE: + +#ifdef JUDYPREV +#define SM1IMM_SETPOP1(cPop1) +#else +#define SM1IMM_SETPOP1(cPop1) pop1 = (cPop1) +#endif + +#define SM1IMM(Func,cPop1) \ + SM1IMM_SETPOP1(cPop1); \ + offset = Func((Pjll_t) (PJI), cPop1, *PIndex); \ + goto SM1LeafLImm + +// Special case for Pop1 = 1 Immediate JPs: +// +// If *PIndex is in the immediate, offset is 0, otherwise the binary NOT of the +// offset where it belongs, 0 or 1, same as from the search functions. + +#ifdef JUDYPREV +#define SM1IMM_01_SETPOP1 +#else +#define SM1IMM_01_SETPOP1 pop1 = 1 +#endif + +#define SM1IMM_01 \ + SM1IMM_01_SETPOP1; \ + offset = ((JU_JPDCDPOP0(Pjp) < JU_TRIMTODCDSIZE(*PIndex)) ? ~1 : \ + (JU_JPDCDPOP0(Pjp) == JU_TRIMTODCDSIZE(*PIndex)) ? 0 : \ + ~0); \ + goto SM1LeafLImm + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: +#endif + SM1IMM_01; + +// TBD: Doug says it would be OK to have fewer calls and calculate arg 2, here +// and in Judy*Count() also. + + case cJU_JPIMMED_1_02: SM1IMM(j__udySearchLeaf1, 2); + case cJU_JPIMMED_1_03: SM1IMM(j__udySearchLeaf1, 3); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: SM1IMM(j__udySearchLeaf1, 4); + case cJU_JPIMMED_1_05: SM1IMM(j__udySearchLeaf1, 5); + case cJU_JPIMMED_1_06: SM1IMM(j__udySearchLeaf1, 6); + case cJU_JPIMMED_1_07: SM1IMM(j__udySearchLeaf1, 7); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: SM1IMM(j__udySearchLeaf1, 8); + case cJ1_JPIMMED_1_09: SM1IMM(j__udySearchLeaf1, 9); + case cJ1_JPIMMED_1_10: SM1IMM(j__udySearchLeaf1, 10); + case cJ1_JPIMMED_1_11: SM1IMM(j__udySearchLeaf1, 11); + case cJ1_JPIMMED_1_12: SM1IMM(j__udySearchLeaf1, 12); + case cJ1_JPIMMED_1_13: SM1IMM(j__udySearchLeaf1, 13); + case cJ1_JPIMMED_1_14: SM1IMM(j__udySearchLeaf1, 14); + case cJ1_JPIMMED_1_15: SM1IMM(j__udySearchLeaf1, 15); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: SM1IMM(j__udySearchLeaf2, 2); + case cJU_JPIMMED_2_03: SM1IMM(j__udySearchLeaf2, 3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: SM1IMM(j__udySearchLeaf2, 4); + case cJ1_JPIMMED_2_05: SM1IMM(j__udySearchLeaf2, 5); + case cJ1_JPIMMED_2_06: SM1IMM(j__udySearchLeaf2, 6); + case cJ1_JPIMMED_2_07: SM1IMM(j__udySearchLeaf2, 7); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: SM1IMM(j__udySearchLeaf3, 2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: SM1IMM(j__udySearchLeaf3, 3); + case cJ1_JPIMMED_3_04: SM1IMM(j__udySearchLeaf3, 4); + case cJ1_JPIMMED_3_05: SM1IMM(j__udySearchLeaf3, 5); + + case cJ1_JPIMMED_4_02: SM1IMM(j__udySearchLeaf4, 2); + case cJ1_JPIMMED_4_03: SM1IMM(j__udySearchLeaf4, 3); + + case cJ1_JPIMMED_5_02: SM1IMM(j__udySearchLeaf5, 2); + case cJ1_JPIMMED_5_03: SM1IMM(j__udySearchLeaf5, 3); + + case cJ1_JPIMMED_6_02: SM1IMM(j__udySearchLeaf6, 2); + + case cJ1_JPIMMED_7_02: SM1IMM(j__udySearchLeaf7, 2); +#endif + + +// ---------------------------------------------------------------------------- +// INVALID JP TYPE: + + default: JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // SM1Get switch. + + /*NOTREACHED*/ + + +// ============================================================================ +// STATE MACHINE 2 -- BACKTRACK BRANCH TO PREVIOUS JP: +// +// Look for the next-left/right JP in a branch, backing up the history list as +// necessary. Upon finding a next-left/right JP, modify the corresponding +// digit in *PIndex before passing control to SM3Findlimit. +// +// Note: As described earlier, only branch JPs are expected here; other types +// fall into the default case. +// +// Note: If a found JP contains needed Dcd bytes, thats OK, theyre copied to +// *PIndex in SM3Findlimit. +// +// TBD: This code has a lot in common with similar code in the shortcut cases +// in SM1Get. Can combine this code somehow? +// +// ENTRY: List, possibly empty, of JPs and offsets in APjphist[] and +// Aoffhist[]; see earlier comments. +// +// EXIT: Execute JU_RET_NOTFOUND if no previous/next JP; otherwise jump to +// SM3Findlimit to resume a new but different downward search. + +SM2Backtrack: // come or return here for first/next sideways search. + + HISTPOP(Pjp, offset); + + switch (JU_JPTYPE(Pjp)) + { + + +// ---------------------------------------------------------------------------- +// LINEAR BRANCH: + + case cJU_JPBRANCH_L2: state = 2; goto SM2BranchL; + case cJU_JPBRANCH_L3: state = 3; goto SM2BranchL; +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: state = 4; goto SM2BranchL; + case cJU_JPBRANCH_L5: state = 5; goto SM2BranchL; + case cJU_JPBRANCH_L6: state = 6; goto SM2BranchL; + case cJU_JPBRANCH_L7: state = 7; goto SM2BranchL; +#endif + case cJU_JPBRANCH_L: state = cJU_ROOTSTATE; goto SM2BranchL; + +SM2BranchL: +#ifdef JUDYPREV + if (--offset < 0) goto SM2Backtrack; // no next-left JP in BranchL. +#endif + Pjbl = P_JBL(Pjp->jp_Addr); +#ifdef JUDYNEXT + if (++offset >= (Pjbl->jbl_NumJPs)) goto SM2Backtrack; + // no next-right JP in BranchL. +#endif + +// Theres a next-left/right JP in the current BranchL; save its digit in +// *PIndex and continue with SM3Findlimit: + + JU_SETDIGIT(*PIndex, Pjbl->jbl_Expanse[offset], state); + Pjp = (Pjbl->jbl_jp) + offset; + goto SM3Findlimit; + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH: + + case cJU_JPBRANCH_B2: state = 2; goto SM2BranchB; + case cJU_JPBRANCH_B3: state = 3; goto SM2BranchB; +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: state = 4; goto SM2BranchB; + case cJU_JPBRANCH_B5: state = 5; goto SM2BranchB; + case cJU_JPBRANCH_B6: state = 6; goto SM2BranchB; + case cJU_JPBRANCH_B7: state = 7; goto SM2BranchB; +#endif + case cJU_JPBRANCH_B: state = cJU_ROOTSTATE; goto SM2BranchB; + +SM2BranchB: + Pjbb = P_JBB(Pjp->jp_Addr); + HISTPOPBOFF(subexp, offset, digit); // unpack values. + +// If theres a next-left/right JP in the current BranchB, which for +// Judy*Next() is true if any bits are set for higher Indexes, continue to +// SM3Findlimit: +// +// Note: offset is set to the JP previously traversed; go one to the +// left/right. + +#ifdef JUDYPREV + if (offset > 0) // next-left JP is in this subexpanse. + { + --offset; + goto SM2BranchBFindlimit; + } + + while (--subexp >= 0) // search next-left subexpanses. +#else + if (JU_JBB_BITMAP(Pjbb, subexp) + & JU_MASKHIGHEREXC(JU_BITPOSMASKB(digit))) + { + ++offset; // next-left => next-right. + goto SM2BranchBFindlimit; + } + + while (++subexp < cJU_NUMSUBEXPB) // search next-right subexps. +#endif + { + if (! JU_JBB_PJP(Pjbb, subexp)) continue; // empty subexpanse. + +#ifdef JUDYPREV + offset = SEARCHBITMAPMAXB(JU_JBB_BITMAP(Pjbb, subexp)); + // expected range: + assert((offset >= 0) && (offset < cJU_BITSPERSUBEXPB)); +#else + offset = 0; +#endif + +// Save the next-left/right JPs digit in *PIndex: + +SM2BranchBFindlimit: + JU_BITMAPDIGITB(digit, subexp, JU_JBB_BITMAP(Pjbb, subexp), + offset); + JU_SETDIGIT(*PIndex, digit, state); + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + Pjp += offset; + goto SM3Findlimit; + } + +// Theres no next-left/right JP in the BranchB: + + goto SM2Backtrack; + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH: + + case cJU_JPBRANCH_U2: state = 2; goto SM2BranchU; + case cJU_JPBRANCH_U3: state = 3; goto SM2BranchU; +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: state = 4; goto SM2BranchU; + case cJU_JPBRANCH_U5: state = 5; goto SM2BranchU; + case cJU_JPBRANCH_U6: state = 6; goto SM2BranchU; + case cJU_JPBRANCH_U7: state = 7; goto SM2BranchU; +#endif + case cJU_JPBRANCH_U: state = cJU_ROOTSTATE; goto SM2BranchU; + +SM2BranchU: + +// Search for a next-left/right JP in the current BranchU, and if one is found, +// save its digit in *PIndex and continue to SM3Findlimit: + + Pjbu = P_JBU(Pjp->jp_Addr); + digit = offset; + +#ifdef JUDYPREV + while (digit >= 1) + { + Pjp = (Pjbu->jbu_jp) + (--digit); +#else + while (digit < cJU_BRANCHUNUMJPS - 1) + { + Pjp = (Pjbu->jbu_jp) + (++digit); +#endif + if (JPNULL(JU_JPTYPE(Pjp))) continue; + + JU_SETDIGIT(*PIndex, digit, state); + goto SM3Findlimit; + } + +// Theres no next-left/right JP in the BranchU: + + goto SM2Backtrack; + + +// ---------------------------------------------------------------------------- +// INVALID JP TYPE: + + default: JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // SM2Backtrack switch. + + /*NOTREACHED*/ + + +// ============================================================================ +// STATE MACHINE 3 -- FIND LIMIT JP/INDEX: +// +// Look for the highest/lowest (right/left-most) JP in each branch and the +// highest/lowest Index in a leaf or immediate, and return it. While +// traversing, modify appropriate digit(s) in *PIndex to reflect the path +// taken, including Dcd bytes in each JP (which could hold critical missing +// digits for skipped branches). +// +// ENTRY: Pjp set to a JP under which to find max/min JPs (if a branch JP) or +// a max/min Index and return (if a leaf or immediate JP). +// +// EXIT: Execute JU_RET_FOUND* upon reaching a leaf or immediate. Should be +// impossible to fail, unless the Judy array is corrupt. + +SM3Findlimit: // come or return here for first/next branch/leaf. + + switch (JU_JPTYPE(Pjp)) + { +// ---------------------------------------------------------------------------- +// LINEAR BRANCH: +// +// Simply use the highest/lowest (right/left-most) JP in the BranchL, but first +// copy the Dcd bytes to *PIndex if there are any (only if state < +// cJU_ROOTSTATE - 1). + + case cJU_JPBRANCH_L2: SM3PREPB_DCD(2, SM3BranchL); +#ifndef JU_64BIT + case cJU_JPBRANCH_L3: SM3PREPB( 3, SM3BranchL); +#else + case cJU_JPBRANCH_L3: SM3PREPB_DCD(3, SM3BranchL); + case cJU_JPBRANCH_L4: SM3PREPB_DCD(4, SM3BranchL); + case cJU_JPBRANCH_L5: SM3PREPB_DCD(5, SM3BranchL); + case cJU_JPBRANCH_L6: SM3PREPB_DCD(6, SM3BranchL); + case cJU_JPBRANCH_L7: SM3PREPB( 7, SM3BranchL); +#endif + case cJU_JPBRANCH_L: SM3PREPB( cJU_ROOTSTATE, SM3BranchL); + +SM3BranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + +#ifdef JUDYPREV + if ((offset = (Pjbl->jbl_NumJPs) - 1) < 0) +#else + offset = 0; if ((Pjbl->jbl_NumJPs) == 0) +#endif + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + JU_SETDIGIT(*PIndex, Pjbl->jbl_Expanse[offset], state); + Pjp = (Pjbl->jbl_jp) + offset; + goto SM3Findlimit; + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH: +// +// Look for the highest/lowest (right/left-most) non-null subexpanse, then use +// the highest/lowest JP in that subexpanse, but first copy Dcd bytes, if there +// are any (only if state < cJU_ROOTSTATE - 1), to *PIndex. + + case cJU_JPBRANCH_B2: SM3PREPB_DCD(2, SM3BranchB); +#ifndef JU_64BIT + case cJU_JPBRANCH_B3: SM3PREPB( 3, SM3BranchB); +#else + case cJU_JPBRANCH_B3: SM3PREPB_DCD(3, SM3BranchB); + case cJU_JPBRANCH_B4: SM3PREPB_DCD(4, SM3BranchB); + case cJU_JPBRANCH_B5: SM3PREPB_DCD(5, SM3BranchB); + case cJU_JPBRANCH_B6: SM3PREPB_DCD(6, SM3BranchB); + case cJU_JPBRANCH_B7: SM3PREPB( 7, SM3BranchB); +#endif + case cJU_JPBRANCH_B: SM3PREPB( cJU_ROOTSTATE, SM3BranchB); + +SM3BranchB: + Pjbb = P_JBB(Pjp->jp_Addr); +#ifdef JUDYPREV + subexp = cJU_NUMSUBEXPB; + + while (! (JU_JBB_BITMAP(Pjbb, --subexp))) // find non-empty subexp. + { + if (subexp <= 0) // wholly empty bitmap. + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + } + + offset = SEARCHBITMAPMAXB(JU_JBB_BITMAP(Pjbb, subexp)); + // expected range: + assert((offset >= 0) && (offset < cJU_BITSPERSUBEXPB)); +#else + subexp = -1; + + while (! (JU_JBB_BITMAP(Pjbb, ++subexp))) // find non-empty subexp. + { + if (subexp >= cJU_NUMSUBEXPB - 1) // didnt find one. + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + } + + offset = 0; +#endif + + JU_BITMAPDIGITB(digit, subexp, JU_JBB_BITMAP(Pjbb, subexp), offset); + JU_SETDIGIT(*PIndex, digit, state); + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + + Pjp += offset; + goto SM3Findlimit; + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH: +// +// Look for the highest/lowest (right/left-most) non-null JP, and use it, but +// first copy Dcd bytes to *PIndex if there are any (only if state < +// cJU_ROOTSTATE - 1). + + case cJU_JPBRANCH_U2: SM3PREPB_DCD(2, SM3BranchU); +#ifndef JU_64BIT + case cJU_JPBRANCH_U3: SM3PREPB( 3, SM3BranchU); +#else + case cJU_JPBRANCH_U3: SM3PREPB_DCD(3, SM3BranchU); + case cJU_JPBRANCH_U4: SM3PREPB_DCD(4, SM3BranchU); + case cJU_JPBRANCH_U5: SM3PREPB_DCD(5, SM3BranchU); + case cJU_JPBRANCH_U6: SM3PREPB_DCD(6, SM3BranchU); + case cJU_JPBRANCH_U7: SM3PREPB( 7, SM3BranchU); +#endif + case cJU_JPBRANCH_U: SM3PREPB( cJU_ROOTSTATE, SM3BranchU); + +SM3BranchU: + Pjbu = P_JBU(Pjp->jp_Addr); +#ifdef JUDYPREV + digit = cJU_BRANCHUNUMJPS; + + while (digit >= 1) + { + Pjp = (Pjbu->jbu_jp) + (--digit); +#else + + for (digit = 0; digit < cJU_BRANCHUNUMJPS; ++digit) + { + Pjp = (Pjbu->jbu_jp) + digit; +#endif + if (JPNULL(JU_JPTYPE(Pjp))) continue; + + JU_SETDIGIT(*PIndex, digit, state); + goto SM3Findlimit; + } + +// No non-null JPs in BranchU: + + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + +// ---------------------------------------------------------------------------- +// LINEAR LEAF: +// +// Simply use the highest/lowest (right/left-most) Index in the LeafL, but the +// details vary depending on leaf Index Size. First copy Dcd bytes, if there +// are any (only if state < cJU_ROOTSTATE - 1), to *PIndex. + +#define SM3LEAFLDCD(cState) \ + JU_SETDCD(*PIndex, Pjp, cState); \ + SM3LEAFLNODCD + +#ifdef JUDY1 +#define SM3LEAFL_SETPOP1 // not needed in any cases. +#else +#define SM3LEAFL_SETPOP1 pop1 = JU_JPLEAF_POP0(Pjp) + 1 +#endif + +#ifdef JUDYPREV +#define SM3LEAFLNODCD \ + Pjll = P_JLL(Pjp->jp_Addr); \ + SM3LEAFL_SETPOP1; \ + offset = JU_JPLEAF_POP0(Pjp); assert(offset >= 0) +#else +#define SM3LEAFLNODCD \ + Pjll = P_JLL(Pjp->jp_Addr); \ + SM3LEAFL_SETPOP1; \ + offset = 0; assert(JU_JPLEAF_POP0(Pjp) >= 0); +#endif + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: + + SM3LEAFLDCD(1); + JU_SETDIGIT1(*PIndex, ((uint8_t *) Pjll)[offset]); + JU_RET_FOUND_LEAF1(Pjll, pop1, offset); +#endif + + case cJU_JPLEAF2: + + SM3LEAFLDCD(2); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF2(Pjll, pop1, offset); + +#ifndef JU_64BIT + case cJU_JPLEAF3: + { + Word_t lsb; + SM3LEAFLNODCD; + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_LEAF3(Pjll, pop1, offset); + } + +#else + case cJU_JPLEAF3: + { + Word_t lsb; + SM3LEAFLDCD(3); + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_LEAF3(Pjll, pop1, offset); + } + + case cJU_JPLEAF4: + + SM3LEAFLDCD(4); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) Pjll)[offset]; + JU_RET_FOUND_LEAF4(Pjll, pop1, offset); + + case cJU_JPLEAF5: + { + Word_t lsb; + SM3LEAFLDCD(5); + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_LEAF5(Pjll, pop1, offset); + } + + case cJU_JPLEAF6: + { + Word_t lsb; + SM3LEAFLDCD(6); + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_LEAF6(Pjll, pop1, offset); + } + + case cJU_JPLEAF7: + { + Word_t lsb; + SM3LEAFLNODCD; + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) Pjll) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_LEAF7(Pjll, pop1, offset); + } +#endif + + +// ---------------------------------------------------------------------------- +// BITMAP LEAF: +// +// Look for the highest/lowest (right/left-most) non-null subexpanse, then use +// the highest/lowest Index in that subexpanse, but first copy Dcd bytes +// (always present since state 1 < cJU_ROOTSTATE) to *PIndex. + + case cJU_JPLEAF_B1: + { + Pjlb_t Pjlb; + + JU_SETDCD(*PIndex, Pjp, 1); + + Pjlb = P_JLB(Pjp->jp_Addr); +#ifdef JUDYPREV + subexp = cJU_NUMSUBEXPL; + + while (! JU_JLB_BITMAP(Pjlb, --subexp)) // find non-empty subexp. + { + if (subexp <= 0) // wholly empty bitmap. + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + } + +// TBD: Might it be faster to just use a variant of BITMAPDIGIT*() that yields +// the digit for the right-most Index with a bit set? + + offset = SEARCHBITMAPMAXL(JU_JLB_BITMAP(Pjlb, subexp)); + // expected range: + assert((offset >= 0) && (offset < cJU_BITSPERSUBEXPL)); +#else + subexp = -1; + + while (! JU_JLB_BITMAP(Pjlb, ++subexp)) // find non-empty subexp. + { + if (subexp >= cJU_NUMSUBEXPL - 1) // didnt find one. + { + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + } + } + + offset = 0; +#endif + + JU_BITMAPDIGITL(digit, subexp, JU_JLB_BITMAP(Pjlb, subexp), offset); + JU_SETDIGIT1(*PIndex, digit); + JU_RET_FOUND_LEAF_B1(Pjlb, subexp, offset); +// == return((PPvoid_t) (P_JV(JL_JLB_PVALUE(Pjlb, subexp)) + (offset))); + + } // case cJU_JPLEAF_B1 + +#ifdef JUDY1 +// ---------------------------------------------------------------------------- +// FULL POPULATION: +// +// Copy Dcd bytes to *PIndex (always present since state 1 < cJU_ROOTSTATE), +// then set the highest/lowest possible digit as the LSB in *PIndex. + + case cJ1_JPFULLPOPU1: + + JU_SETDCD( *PIndex, Pjp, 1); +#ifdef JUDYPREV + JU_SETDIGIT1(*PIndex, cJU_BITSPERBITMAP - 1); +#else + JU_SETDIGIT1(*PIndex, 0); +#endif + JU_RET_FOUND_FULLPOPU1; +#endif // JUDY1 + + +// ---------------------------------------------------------------------------- +// IMMEDIATE: +// +// Simply use the highest/lowest (right/left-most) Index in the Imm, but the +// details vary depending on leaf Index Size and pop1. Note: There are no Dcd +// bytes in an Immediate JP, but in a cJU_JPIMMED_*_01 JP, the field holds the +// least bytes of the immediate Index. + + case cJU_JPIMMED_1_01: SET_01(1); goto SM3Imm_01; + case cJU_JPIMMED_2_01: SET_01(2); goto SM3Imm_01; + case cJU_JPIMMED_3_01: SET_01(3); goto SM3Imm_01; +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: SET_01(4); goto SM3Imm_01; + case cJU_JPIMMED_5_01: SET_01(5); goto SM3Imm_01; + case cJU_JPIMMED_6_01: SET_01(6); goto SM3Imm_01; + case cJU_JPIMMED_7_01: SET_01(7); goto SM3Imm_01; +#endif +SM3Imm_01: JU_RET_FOUND_IMM_01(Pjp); + +#ifdef JUDYPREV +#define SM3IMM_OFFSET(cPop1) (cPop1) - 1 // highest. +#else +#define SM3IMM_OFFSET(cPop1) 0 // lowest. +#endif + +#define SM3IMM(cPop1,Next) \ + offset = SM3IMM_OFFSET(cPop1); \ + goto Next + + case cJU_JPIMMED_1_02: SM3IMM( 2, SM3Imm1); + case cJU_JPIMMED_1_03: SM3IMM( 3, SM3Imm1); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: SM3IMM( 4, SM3Imm1); + case cJU_JPIMMED_1_05: SM3IMM( 5, SM3Imm1); + case cJU_JPIMMED_1_06: SM3IMM( 6, SM3Imm1); + case cJU_JPIMMED_1_07: SM3IMM( 7, SM3Imm1); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: SM3IMM( 8, SM3Imm1); + case cJ1_JPIMMED_1_09: SM3IMM( 9, SM3Imm1); + case cJ1_JPIMMED_1_10: SM3IMM(10, SM3Imm1); + case cJ1_JPIMMED_1_11: SM3IMM(11, SM3Imm1); + case cJ1_JPIMMED_1_12: SM3IMM(12, SM3Imm1); + case cJ1_JPIMMED_1_13: SM3IMM(13, SM3Imm1); + case cJ1_JPIMMED_1_14: SM3IMM(14, SM3Imm1); + case cJ1_JPIMMED_1_15: SM3IMM(15, SM3Imm1); +#endif + +SM3Imm1: JU_SETDIGIT1(*PIndex, ((uint8_t *) PJI)[offset]); + JU_RET_FOUND_IMM(Pjp, offset); + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: SM3IMM(2, SM3Imm2); + case cJU_JPIMMED_2_03: SM3IMM(3, SM3Imm2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: SM3IMM(4, SM3Imm2); + case cJ1_JPIMMED_2_05: SM3IMM(5, SM3Imm2); + case cJ1_JPIMMED_2_06: SM3IMM(6, SM3Imm2); + case cJ1_JPIMMED_2_07: SM3IMM(7, SM3Imm2); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) +SM3Imm2: *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(2))) + | ((uint16_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: SM3IMM(2, SM3Imm3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: SM3IMM(3, SM3Imm3); + case cJ1_JPIMMED_3_04: SM3IMM(4, SM3Imm3); + case cJ1_JPIMMED_3_05: SM3IMM(5, SM3Imm3); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) +SM3Imm3: + { + Word_t lsb; + JU_COPY3_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (3 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(3))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_4_02: SM3IMM(2, SM3Imm4); + case cJ1_JPIMMED_4_03: SM3IMM(3, SM3Imm4); + +SM3Imm4: *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(4))) + | ((uint32_t *) PJI)[offset]; + JU_RET_FOUND_IMM(Pjp, offset); + + case cJ1_JPIMMED_5_02: SM3IMM(2, SM3Imm5); + case cJ1_JPIMMED_5_03: SM3IMM(3, SM3Imm5); + +SM3Imm5: + { + Word_t lsb; + JU_COPY5_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (5 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(5))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_6_02: SM3IMM(2, SM3Imm6); + +SM3Imm6: + { + Word_t lsb; + JU_COPY6_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (6 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(6))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } + + case cJ1_JPIMMED_7_02: SM3IMM(2, SM3Imm7); + +SM3Imm7: + { + Word_t lsb; + JU_COPY7_PINDEX_TO_LONG(lsb, ((uint8_t *) PJI) + (7 * offset)); + *PIndex = (*PIndex & (~JU_LEASTBYTESMASK(7))) | lsb; + JU_RET_FOUND_IMM(Pjp, offset); + } +#endif // (JUDY1 && JU_64BIT) + + +// ---------------------------------------------------------------------------- +// OTHER CASES: + + default: JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // SM3Findlimit switch. + + /*NOTREACHED*/ + +} // Judy1Prev() / Judy1Next() / JudyLPrev() / JudyLNext() diff --git a/libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c b/libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c new file mode 100644 index 0000000..4da4356 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c @@ -0,0 +1,1390 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.32 $ $Source: /judy/src/JudyCommon/JudyPrevNextEmpty.c $ +// +// Judy*PrevEmpty() and Judy*NextEmpty() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. +// +// Compile with -DJUDYNEXT for the Judy*NextEmpty() function; otherwise +// defaults to Judy*PrevEmpty(). +// +// Compile with -DTRACEJPSE to trace JP traversals. +// +// This file is separate from JudyPrevNext.c because it differs too greatly for +// ifdefs. This might be a bit surprising, but there are two reasons: +// +// - First, down in the details, searching for an empty index (SearchEmpty) is +// remarkably asymmetric with searching for a valid index (SearchValid), +// mainly with respect to: No return of a value area for JudyL; partially- +// full versus totally-full JPs; and handling of narrow pointers. +// +// - Second, we chose to implement SearchEmpty without a backtrack stack or +// backtrack engine, partly as an experiment, and partly because we think +// restarting from the top of the tree is less likely for SearchEmpty than +// for SearchValid, because empty indexes are more likely than valid indexes. +// +// A word about naming: A prior version of this feature (see 4.13) was named +// Judy*Free(), but there were concerns about that being read as a verb rather +// than an adjective. After prolonged debate and based on user input, we +// changed "Free" to "Empty". + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifndef JUDYNEXT +#ifndef JUDYPREV +#define JUDYPREV 1 // neither set => use default. +#endif +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +#ifdef TRACEJPSE +#include "JudyPrintJP.c" +#endif + + +// **************************************************************************** +// J U D Y 1 P R E V E M P T Y +// J U D Y 1 N E X T E M P T Y +// J U D Y L P R E V E M P T Y +// J U D Y L N E X T E M P T Y +// +// See the manual entry for the API. +// +// OVERVIEW OF Judy*PrevEmpty() / Judy*NextEmpty(): +// +// See also for comparison the equivalent comments in JudyPrevNext.c. +// +// Take the callers *PIndex and subtract/add 1, but watch out for +// underflow/overflow, which means "no previous/next empty index found." Use a +// reentrant switch statement (state machine, see SMGetRestart and +// SMGetContinue) to decode Index, starting with the JRP (PArray), through a +// JPM and branches, if any, down to an immediate or a leaf. Look for Index in +// that immediate or leaf, and if not found (invalid index), return success +// (Index is empty). +// +// This search can result in a dead end where taking a different path is +// required. There are four kinds of dead ends: +// +// BRANCH PRIMARY dead end: Encountering a fully-populated JP for the +// appropriate digit in Index. Search sideways in the branch for the +// previous/next absent/null/non-full JP, and if one is found, set Index to the +// highest/lowest index possible in that JPs expanse. Then if the JP is an +// absent or null JP, return success; otherwise for a non-full JP, traverse +// through the partially populated JP. +// +// BRANCH SECONDARY dead end: Reaching the end of a branch during a sideways +// search after a branch primary dead end. Set Index to the lowest/highest +// index possible in the whole branchs expanse (one higher/lower than the +// previous/next branchs expanse), then restart at the top of the tree, which +// includes pre-decrementing/incrementing Index (again) and watching for +// underflow/overflow (again). +// +// LEAF PRIMARY dead end: Finding a valid (non-empty) index in an immediate or +// leaf matching Index. Search sideways in the immediate/leaf for the +// previous/next empty index; if found, set *PIndex to match and return success. +// +// LEAF SECONDARY dead end: Reaching the end of an immediate or leaf during a +// sideways search after a leaf primary dead end. Just as for a branch +// secondary dead end, restart at the top of the tree with Index set to the +// lowest/highest index possible in the whole immediate/leafs expanse. +// TBD: If leaf secondary dead end occurs, could shortcut and treat it as a +// branch primary dead end; but this would require remembering the parent +// branchs type and offset (a "one-deep stack"), and also wrestling with +// narrow pointers, at least for leaves (but not for immediates). +// +// Note some ASYMMETRIES between SearchValid and SearchEmpty: +// +// - The SearchValid code, upon descending through a narrow pointer, if Index +// is outside the expanse of the subsidiary node (effectively a secondary +// dead end), must decide whether to backtrack or findlimit. But the +// SearchEmpty code simply returns success (Index is empty). +// +// - Similarly, the SearchValid code, upon finding no previous/next index in +// the expanse of a narrow pointer (again, a secondary dead end), can simply +// start to backtrack at the parent JP. But the SearchEmpty code would have +// to first determine whether or not the parent JPs narrow expanse contains +// a previous/next empty index outside the subexpanse. Rather than keeping a +// parent state stack and backtracking this way, upon a secondary dead end, +// the SearchEmpty code simply restarts at the top of the tree, whether or +// not a narrow pointer is involved. Again, see the equivalent comments in +// JudyPrevNext.c for comparison. +// +// This function is written iteratively for speed, rather than recursively. +// +// TBD: Wed like to enhance this function to make successive searches faster. +// This would require saving some previous state, including the previous Index +// returned, and in which leaf it was found. If the next call is for the same +// Index and the array has not been modified, start at the same leaf. This +// should be much easier to implement since this is iterative rather than +// recursive code. + +#ifdef JUDY1 +#ifdef JUDYPREV +FUNCTION int Judy1PrevEmpty +#else +FUNCTION int Judy1NextEmpty +#endif +#else +#ifdef JUDYPREV +FUNCTION int JudyLPrevEmpty +#else +FUNCTION int JudyLNextEmpty +#endif +#endif + ( + Pcvoid_t PArray, // Judy array to search. + Word_t * PIndex, // starting point and result. + PJError_t PJError // optional, for returning error info. + ) +{ + Word_t Index; // fast copy, in a register. + Pjp_t Pjp; // current JP. + Pjbl_t Pjbl; // Pjp->jp_Addr masked and cast to types: + Pjbb_t Pjbb; + Pjbu_t Pjbu; + Pjlb_t Pjlb; + PWord_t Pword; // alternate name for use by GET* macros. + + Word_t digit; // next digit to decode from Index. + Word_t digits; // current state in SM = digits left to decode. + Word_t pop0; // in a leaf. + Word_t pop0mask; // precalculated to avoid variable shifts. + long offset; // within a branch or leaf (can be large). + int subexp; // subexpanse in a bitmap branch. + BITMAPB_t bitposmaskB; // bit in bitmap for bitmap branch. + BITMAPL_t bitposmaskL; // bit in bitmap for bitmap leaf. + Word_t possfullJP1; // JP types for possibly full subexpanses: + Word_t possfullJP2; + Word_t possfullJP3; + + +// ---------------------------------------------------------------------------- +// M A C R O S +// +// These are intended to make the code a bit more readable and less redundant. + + +// CHECK FOR NULL JP: +// +// TBD: In principle this can be reduced (here and in other *.c files) to just +// the latter clause since no Type should ever be below cJU_JPNULL1, but in +// fact some root pointer types can be lower, so for safety do both checks. + +#define JPNULL(Type) (((Type) >= cJU_JPNULL1) && ((Type) <= cJU_JPNULLMAX)) + + +// CHECK FOR A FULL JP: +// +// Given a JP, indicate if it is fully populated. Use digits, pop0mask, and +// possfullJP1..3 in the context. +// +// This is a difficult problem because it requires checking the Pop0 bits for +// all-ones, but the number of bytes depends on the JP type, which is not +// directly related to the parent branchs type or level -- the JPs child +// could be under a narrow pointer (hence not full). The simple answer +// requires switching on or otherwise calculating the JP type, which could be +// slow. Instead, in SMPREPB* precalculate pop0mask and also record in +// possfullJP1..3 the child JP (branch) types that could possibly be full (one +// level down), and use them here. For level-2 branches (with digits == 2), +// the test for a full child depends on Judy1/JudyL. +// +// Note: This cannot be applied to the JP in a JPM because it doesnt have +// enough pop0 digits. +// +// TBD: JPFULL_BRANCH diligently checks for BranchL or BranchB, where neither +// of those can ever be full as it turns out. Could just check for a BranchU +// at the right level. Also, pop0mask might be overkill, its not used much, +// so perhaps just call cJU_POP0MASK(digits - 1) here? +// +// First, JPFULL_BRANCH checks for a full expanse for a JP whose child can be a +// branch, that is, a JP in a branch at level 3 or higher: + +#define JPFULL_BRANCH(Pjp) \ + ((((JU_JPDCDPOP0(Pjp) ^ cJU_ALLONES) & pop0mask) == 0) \ + && ((JU_JPTYPE(Pjp) == possfullJP1) \ + || (JU_JPTYPE(Pjp) == possfullJP2) \ + || (JU_JPTYPE(Pjp) == possfullJP3))) + +#ifdef JUDY1 +#define JPFULL(Pjp) \ + ((digits == 2) ? \ + (JU_JPTYPE(Pjp) == cJ1_JPFULLPOPU1) : JPFULL_BRANCH(Pjp)) +#else +#define JPFULL(Pjp) \ + ((digits == 2) ? \ + (JU_JPTYPE(Pjp) == cJU_JPLEAF_B1) \ + && (((JU_JPDCDPOP0(Pjp) & cJU_POP0MASK(1)) == cJU_POP0MASK(1))) : \ + JPFULL_BRANCH(Pjp)) +#endif + + +// RETURN SUCCESS: +// +// This hides the need to set *PIndex back to the local value of Index -- use a +// local value for faster operation. Note that the callers *PIndex is ALWAYS +// modified upon success, at least decremented/incremented. + +#define RET_SUCCESS { *PIndex = Index; return(1); } + + +// RETURN A CORRUPTION: + +#define RET_CORRUPT { JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); return(JERRI); } + + +// SEARCH A BITMAP BRANCH: +// +// This is a weak analog of j__udySearchLeaf*() for bitmap branches. Return +// the actual or next-left position, base 0, of Digit in a BITMAPB_t bitmap +// (subexpanse of a full bitmap), also given a Bitposmask for Digit. The +// position is the offset within the set bits. +// +// Unlike j__udySearchLeaf*(), the offset is not returned bit-complemented if +// Digits bit is unset, because the caller can check the bitmap themselves to +// determine that. Also, if Digits bit is unset, the returned offset is to +// the next-left JP or index (including -1), not to the "ideal" position for +// the index = next-right JP or index. +// +// Shortcut and skip calling j__udyCountBitsB() if the bitmap is full, in which +// case (Digit % cJU_BITSPERSUBEXPB) itself is the base-0 offset. + +#define SEARCHBITMAPB(Bitmap,Digit,Bitposmask) \ + (((Bitmap) == cJU_FULLBITMAPB) ? (Digit % cJU_BITSPERSUBEXPB) : \ + j__udyCountBitsB((Bitmap) & JU_MASKLOWERINC(Bitposmask)) - 1) + +#ifdef JUDYPREV +// Equivalent to search for the highest offset in Bitmap, that is, one less +// than the number of bits set: + +#define SEARCHBITMAPMAXB(Bitmap) \ + (((Bitmap) == cJU_FULLBITMAPB) ? cJU_BITSPERSUBEXPB - 1 : \ + j__udyCountBitsB(Bitmap) - 1) +#endif + + +// CHECK DECODE BYTES: +// +// Check Decode bytes in a JP against the equivalent portion of Index. If they +// dont match, Index is outside the subexpanse of a narrow pointer, hence is +// empty. + +#define CHECKDCD(cDigits) \ + if (JU_DCDNOTMATCHINDEX(Index, Pjp, cDigits)) RET_SUCCESS + + +// REVISE REMAINDER OF INDEX: +// +// Put one digit in place in Index and clear/set the lower digits, if any, so +// the resulting Index is at the start/end of an expanse, or just clear/set the +// least digits. +// +// Actually, to make simple use of JU_LEASTBYTESMASK, first clear/set all least +// digits of Index including the digit to be overridden, then set the value of +// that one digit. If Digits == 1 the first operation is redundant, but either +// very fast or even removed by the optimizer. + +#define CLEARLEASTDIGITS(Digits) Index &= ~JU_LEASTBYTESMASK(Digits) +#define SETLEASTDIGITS( Digits) Index |= JU_LEASTBYTESMASK(Digits) + +#define CLEARLEASTDIGITS_D(Digit,Digits) \ + { \ + CLEARLEASTDIGITS(Digits); \ + JU_SETDIGIT(Index, Digit, Digits); \ + } + +#define SETLEASTDIGITS_D(Digit,Digits) \ + { \ + SETLEASTDIGITS(Digits); \ + JU_SETDIGIT(Index, Digit, Digits); \ + } + + +// SET REMAINDER OF INDEX AND THEN RETURN OR CONTINUE: + +#define SET_AND_RETURN(OpLeastDigits,Digit,Digits) \ + { \ + OpLeastDigits(Digit, Digits); \ + RET_SUCCESS; \ + } + +#define SET_AND_CONTINUE(OpLeastDigits,Digit,Digits) \ + { \ + OpLeastDigits(Digit, Digits); \ + goto SMGetContinue; \ + } + + +// PREPARE TO HANDLE A LEAFW OR JP BRANCH IN THE STATE MACHINE: +// +// Extract a state-dependent digit from Index in a "constant" way, then jump to +// common code for multiple cases. +// +// TBD: Should this macro do more, such as preparing variable-shift masks for +// use in CLEARLEASTDIGITS and SETLEASTDIGITS? + +#define SMPREPB(cDigits,Next,PossFullJP1,PossFullJP2,PossFullJP3) \ + digits = (cDigits); \ + digit = JU_DIGITATSTATE(Index, cDigits); \ + pop0mask = cJU_POP0MASK((cDigits) - 1); /* for branchs JPs */ \ + possfullJP1 = (PossFullJP1); \ + possfullJP2 = (PossFullJP2); \ + possfullJP3 = (PossFullJP3); \ + goto Next + +// Variations for specific-level branches and for shorthands: +// +// Note: SMPREPB2 need not initialize possfullJP* because JPFULL does not use +// them for digits == 2, but gcc -Wall isnt quite smart enough to see this, so +// waste a bit of time and space to get rid of the warning: + +#define SMPREPB2(Next) \ + digits = 2; \ + digit = JU_DIGITATSTATE(Index, 2); \ + pop0mask = cJU_POP0MASK(1); /* for branchs JPs */ \ + possfullJP1 = possfullJP2 = possfullJP3 = 0; \ + goto Next + +#define SMPREPB3(Next) SMPREPB(3, Next, cJU_JPBRANCH_L2, \ + cJU_JPBRANCH_B2, \ + cJU_JPBRANCH_U2) +#ifndef JU_64BIT +#define SMPREPBL(Next) SMPREPB(cJU_ROOTSTATE, Next, cJU_JPBRANCH_L3, \ + cJU_JPBRANCH_B3, \ + cJU_JPBRANCH_U3) +#else +#define SMPREPB4(Next) SMPREPB(4, Next, cJU_JPBRANCH_L3, \ + cJU_JPBRANCH_B3, \ + cJU_JPBRANCH_U3) +#define SMPREPB5(Next) SMPREPB(5, Next, cJU_JPBRANCH_L4, \ + cJU_JPBRANCH_B4, \ + cJU_JPBRANCH_U4) +#define SMPREPB6(Next) SMPREPB(6, Next, cJU_JPBRANCH_L5, \ + cJU_JPBRANCH_B5, \ + cJU_JPBRANCH_U5) +#define SMPREPB7(Next) SMPREPB(7, Next, cJU_JPBRANCH_L6, \ + cJU_JPBRANCH_B6, \ + cJU_JPBRANCH_U6) +#define SMPREPBL(Next) SMPREPB(cJU_ROOTSTATE, Next, cJU_JPBRANCH_L7, \ + cJU_JPBRANCH_B7, \ + cJU_JPBRANCH_U7) +#endif + + +// RESTART AFTER SECONDARY DEAD END: +// +// Set Index to the first/last index in the branch or leaf subexpanse and start +// over at the top of the tree. + +#ifdef JUDYPREV +#define SMRESTART(Digits) { CLEARLEASTDIGITS(Digits); goto SMGetRestart; } +#else +#define SMRESTART(Digits) { SETLEASTDIGITS( Digits); goto SMGetRestart; } +#endif + + +// CHECK EDGE OF LEAFS EXPANSE: +// +// Given the LSBs of the lowest/highest valid index in a leaf (or equivalently +// in an immediate JP), the level (index size) of the leaf, and the full index +// to return (as Index in the context) already set to the full index matching +// the lowest/highest one, determine if there is an empty index in the leafs +// expanse below/above the lowest/highest index, which is true if the +// lowest/highest index is not at the "edge" of the leafs expanse based on its +// LSBs. If so, return Index decremented/incremented; otherwise restart at the +// top of the tree. +// +// Note: In many cases Index is already at the right spot and calling +// SMRESTART instead of just going directly to SMGetRestart is a bit of +// overkill. +// +// Note: Variable shift occurs if Digits is not a constant. + +#ifdef JUDYPREV +#define LEAF_EDGE(MinIndex,Digits) \ + { \ + if (MinIndex) { --Index; RET_SUCCESS; } \ + SMRESTART(Digits); \ + } +#else +#define LEAF_EDGE(MaxIndex,Digits) \ + { \ + if ((MaxIndex) != JU_LEASTBYTES(cJU_ALLONES, Digits)) \ + { ++Index; RET_SUCCESS; } \ + SMRESTART(Digits); \ + } +#endif + +// Same as above except Index is not already set to match the lowest/highest +// index, so do that before decrementing/incrementing it: + +#ifdef JUDYPREV +#define LEAF_EDGE_SET(MinIndex,Digits) \ + { \ + if (MinIndex) \ + { JU_SETDIGITS(Index, MinIndex, Digits); --Index; RET_SUCCESS; } \ + SMRESTART(Digits); \ + } +#else +#define LEAF_EDGE_SET(MaxIndex,Digits) \ + { \ + if ((MaxIndex) != JU_LEASTBYTES(cJU_ALLONES, Digits)) \ + { JU_SETDIGITS(Index, MaxIndex, Digits); ++Index; RET_SUCCESS; } \ + SMRESTART(Digits); \ + } +#endif + + +// FIND A HOLE (EMPTY INDEX) IN AN IMMEDIATE OR LEAF: +// +// Given an index location in a leaf (or equivalently an immediate JP) known to +// contain a usable hole (an empty index less/greater than Index), and the LSBs +// of a minimum/maximum index to locate, find the previous/next empty index and +// return it. +// +// Note: "Even" index sizes (1,2,4[,8] bytes) have corresponding native C +// types; "odd" index sizes dont, but they are not represented here because +// they are handled completely differently; see elsewhere. + +#ifdef JUDYPREV + +#define LEAF_HOLE_EVEN(cDigits,Pjll,IndexLSB) \ + { \ + while (*(Pjll) > (IndexLSB)) --(Pjll); /* too high */ \ + if (*(Pjll) < (IndexLSB)) RET_SUCCESS /* Index is empty */ \ + while (*(--(Pjll)) == --(IndexLSB)) /* null, find a hole */;\ + JU_SETDIGITS(Index, IndexLSB, cDigits); \ + RET_SUCCESS; \ + } +#else +#define LEAF_HOLE_EVEN(cDigits,Pjll,IndexLSB) \ + { \ + while (*(Pjll) < (IndexLSB)) ++(Pjll); /* too low */ \ + if (*(Pjll) > (IndexLSB)) RET_SUCCESS /* Index is empty */ \ + while (*(++(Pjll)) == ++(IndexLSB)) /* null, find a hole */;\ + JU_SETDIGITS(Index, IndexLSB, cDigits); \ + RET_SUCCESS; \ + } +#endif + + +// SEARCH FOR AN EMPTY INDEX IN AN IMMEDIATE OR LEAF: +// +// Given a pointer to the first index in a leaf (or equivalently an immediate +// JP), the population of the leaf, and a first empty Index to find (inclusive, +// as Index in the context), where Index is known to fall within the expanse of +// the leaf to search, efficiently find the previous/next empty index in the +// leaf, if any. For simplicity the following overview is stated in terms of +// Judy*NextEmpty() only, but the same concepts apply symmetrically for +// Judy*PrevEmpty(). Also, in each case the comparisons are for the LSBs of +// Index and leaf indexes, according to the leafs level. +// +// 1. If Index is GREATER than the last (highest) index in the leaf +// (maxindex), return success, Index is empty. (Remember, Index is known +// to be in the leafs expanse.) +// +// 2. If Index is EQUAL to maxindex: If maxindex is not at the edge of the +// leafs expanse, increment Index and return success, there is an empty +// Index one higher than any in the leaf; otherwise restart with Index +// reset to the upper edge of the leafs expanse. Note: This might cause +// an extra cache line fill, but this is OK for repeatedly-called search +// code, and it saves CPU time. +// +// 3. If Index is LESS than maxindex, check for "dense to end of leaf": +// Subtract Index from maxindex, and back up that many slots in the leaf. +// If the resulting offset is not before the start of the leaf then compare +// the index at this offset (baseindex) with Index: +// +// 3a. If GREATER, the leaf must be corrupt, since indexes are sorted and +// there are no duplicates. +// +// 3b. If EQUAL, the leaf is "dense" from Index to maxindex, meaning there is +// no reason to search it. "Slide right" to the high end of the leaf +// (modify Index to maxindex) and continue with step 2 above. +// +// 3c. If LESS, continue with step 4. +// +// 4. If the offset based on maxindex minus Index falls BEFORE the start of +// the leaf, or if, per 3c above, baseindex is LESS than Index, the leaf is +// guaranteed "not dense to the end" and a usable empty Index must exist. +// This supports a more efficient search loop. Start at the FIRST index in +// the leaf, or one BEYOND baseindex, respectively, and search the leaf as +// follows, comparing each current index (currindex) with Index: +// +// 4a. If LESS, keep going to next index. Note: This is certain to terminate +// because maxindex is known to be greater than Index, hence the loop can +// be small and fast. +// +// 4b. If EQUAL, loop and increment Index until finding currindex greater than +// Index, and return success with the modified Index. +// +// 4c. If GREATER, return success, Index (unmodified) is empty. +// +// Note: These are macros rather than functions for speed. + +#ifdef JUDYPREV + +#define JSLE_EVEN(Addr,Pop0,cDigits,LeafType) \ + { \ + LeafType * PjllLSB = (LeafType *) (Addr); \ + LeafType IndexLSB = Index; /* auto-masking */ \ + \ + /* Index before or at start of leaf: */ \ + \ + if (*PjllLSB >= IndexLSB) /* no need to search */ \ + { \ + if (*PjllLSB > IndexLSB) RET_SUCCESS; /* Index empty */ \ + LEAF_EDGE(*PjllLSB, cDigits); \ + } \ + \ + /* Index in or after leaf: */ \ + \ + offset = IndexLSB - *PjllLSB; /* tentative offset */ \ + if (offset <= (Pop0)) /* can check density */ \ + { \ + PjllLSB += offset; /* move to slot */ \ + \ + if (*PjllLSB <= IndexLSB) /* dense or corrupt */ \ + { \ + if (*PjllLSB == IndexLSB) /* dense, check edge */ \ + LEAF_EDGE_SET(PjllLSB[-offset], cDigits); \ + RET_CORRUPT; \ + } \ + --PjllLSB; /* not dense, start at previous */ \ + } \ + else PjllLSB = ((LeafType *) (Addr)) + (Pop0); /* start at max */ \ + \ + LEAF_HOLE_EVEN(cDigits, PjllLSB, IndexLSB); \ + } + +// JSLE_ODD is completely different from JSLE_EVEN because its important to +// minimize copying odd indexes to compare them (see 4.14). Furthermore, a +// very complex version (4.17, but abandoned before fully debugged) that +// avoided calling j__udySearchLeaf*() ran twice as fast as 4.14, but still +// half as fast as SearchValid. Doug suggested that to minimize complexity and +// share common code we should use j__udySearchLeaf*() for the initial search +// to establish if Index is empty, which should be common. If Index is valid +// in a leaf or immediate indexes, odds are good that an empty Index is nearby, +// so for simplicity just use a *COPY* function to linearly search the +// remainder. +// +// TBD: Pathological case? Average performance should be good, but worst-case +// might suffer. When Search says the initial Index is valid, so a linear +// copy-and-compare is begun, if the caller builds fairly large leaves with +// dense clusters AND frequently does a SearchEmpty at one end of such a +// cluster, performance wont be very good. Might a dense-check help? This +// means checking offset against the index at offset, and then against the +// first/last index in the leaf. We doubt the pathological case will appear +// much in real applications because they will probably alternate SearchValid +// and SearchEmpty calls. + +#define JSLE_ODD(cDigits,Pjll,Pop0,Search,Copy) \ + { \ + Word_t IndexLSB; /* least bytes only */ \ + Word_t IndexFound; /* in leaf */ \ + \ + if ((offset = Search(Pjll, (Pop0) + 1, Index)) < 0) \ + RET_SUCCESS; /* Index is empty */ \ + \ + IndexLSB = JU_LEASTBYTES(Index, cDigits); \ + offset *= (cDigits); \ + \ + while ((offset -= (cDigits)) >= 0) \ + { /* skip until empty or start */ \ + Copy(IndexFound, ((uint8_t *) (Pjll)) + offset); \ + if (IndexFound != (--IndexLSB)) /* found an empty */ \ + { JU_SETDIGITS(Index, IndexLSB, cDigits); RET_SUCCESS; }\ + } \ + LEAF_EDGE_SET(IndexLSB, cDigits); \ + } + +#else // JUDYNEXT + +#define JSLE_EVEN(Addr,Pop0,cDigits,LeafType) \ + { \ + LeafType * PjllLSB = ((LeafType *) (Addr)) + (Pop0); \ + LeafType IndexLSB = Index; /* auto-masking */ \ + \ + /* Index at or after end of leaf: */ \ + \ + if (*PjllLSB <= IndexLSB) /* no need to search */ \ + { \ + if (*PjllLSB < IndexLSB) RET_SUCCESS; /* Index empty */\ + LEAF_EDGE(*PjllLSB, cDigits); \ + } \ + \ + /* Index before or in leaf: */ \ + \ + offset = *PjllLSB - IndexLSB; /* tentative offset */ \ + if (offset <= (Pop0)) /* can check density */ \ + { \ + PjllLSB -= offset; /* move to slot */ \ + \ + if (*PjllLSB >= IndexLSB) /* dense or corrupt */ \ + { \ + if (*PjllLSB == IndexLSB) /* dense, check edge */ \ + LEAF_EDGE_SET(PjllLSB[offset], cDigits); \ + RET_CORRUPT; \ + } \ + ++PjllLSB; /* not dense, start at next */ \ + } \ + else PjllLSB = (LeafType *) (Addr); /* start at minimum */ \ + \ + LEAF_HOLE_EVEN(cDigits, PjllLSB, IndexLSB); \ + } + +#define JSLE_ODD(cDigits,Pjll,Pop0,Search,Copy) \ + { \ + Word_t IndexLSB; /* least bytes only */ \ + Word_t IndexFound; /* in leaf */ \ + int offsetmax; /* in bytes */ \ + \ + if ((offset = Search(Pjll, (Pop0) + 1, Index)) < 0) \ + RET_SUCCESS; /* Index is empty */ \ + \ + IndexLSB = JU_LEASTBYTES(Index, cDigits); \ + offset *= (cDigits); \ + offsetmax = (Pop0) * (cDigits); /* single multiply */ \ + \ + while ((offset += (cDigits)) <= offsetmax) \ + { /* skip until empty or end */ \ + Copy(IndexFound, ((uint8_t *) (Pjll)) + offset); \ + if (IndexFound != (++IndexLSB)) /* found an empty */ \ + { JU_SETDIGITS(Index, IndexLSB, cDigits); RET_SUCCESS; } \ + } \ + LEAF_EDGE_SET(IndexLSB, cDigits); \ + } + +#endif // JUDYNEXT + +// Note: Immediate indexes never fill a single index group, so for odd index +// sizes, save time by calling JSLE_ODD_IMM instead of JSLE_ODD. + +#define j__udySearchLeafEmpty1(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 1, uint8_t) + +#define j__udySearchLeafEmpty2(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 2, uint16_t) + +#define j__udySearchLeafEmpty3(Addr,Pop0) \ + JSLE_ODD(3, Addr, Pop0, j__udySearchLeaf3, JU_COPY3_PINDEX_TO_LONG) + +#ifndef JU_64BIT + +#define j__udySearchLeafEmptyL(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 4, Word_t) + +#else + +#define j__udySearchLeafEmpty4(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 4, uint32_t) + +#define j__udySearchLeafEmpty5(Addr,Pop0) \ + JSLE_ODD(5, Addr, Pop0, j__udySearchLeaf5, JU_COPY5_PINDEX_TO_LONG) + +#define j__udySearchLeafEmpty6(Addr,Pop0) \ + JSLE_ODD(6, Addr, Pop0, j__udySearchLeaf6, JU_COPY6_PINDEX_TO_LONG) + +#define j__udySearchLeafEmpty7(Addr,Pop0) \ + JSLE_ODD(7, Addr, Pop0, j__udySearchLeaf7, JU_COPY7_PINDEX_TO_LONG) + +#define j__udySearchLeafEmptyL(Addr,Pop0) \ + JSLE_EVEN(Addr, Pop0, 8, Word_t) + +#endif // JU_64BIT + + +// ---------------------------------------------------------------------------- +// START OF CODE: +// +// CHECK FOR SHORTCUTS: +// +// Error out if PIndex is null. + + if (PIndex == (PWord_t) NULL) + { + JU_SET_ERRNO(PJError, JU_ERRNO_NULLPINDEX); + return(JERRI); + } + + Index = *PIndex; // fast local copy. + +// Set and pre-decrement/increment Index, watching for underflow/overflow: +// +// An out-of-bounds Index means failure: No previous/next empty index. + +SMGetRestart: // return here with revised Index. + +#ifdef JUDYPREV + if (Index-- == 0) return(0); +#else + if (++Index == 0) return(0); +#endif + +// An empty array with an in-bounds (not underflowed/overflowed) Index means +// success: +// +// Note: This check is redundant after restarting at SMGetRestart, but should +// take insignificant time. + + if (PArray == (Pvoid_t) NULL) RET_SUCCESS; + +// ---------------------------------------------------------------------------- +// ROOT-LEVEL LEAF that starts with a Pop0 word; just look within the leaf: +// +// If Index is not in the leaf, return success; otherwise return the first +// empty Index, if any, below/above where it would belong. + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + pop0 = Pjlw[0]; + +#ifdef JUDY1 + if (pop0 == 0) // special case. + { +#ifdef JUDYPREV + if ((Index != Pjlw[1]) || (Index-- != 0)) RET_SUCCESS; +#else + if ((Index != Pjlw[1]) || (++Index != 0)) RET_SUCCESS; +#endif + return(0); // no previous/next empty index. + } +#endif // JUDY1 + + j__udySearchLeafEmptyL(Pjlw + 1, pop0); + +// No return -- thanks ALAN + + } + else + +// ---------------------------------------------------------------------------- +// HANDLE JRP Branch: +// +// For JRP branches, traverse the JPM; handle LEAFW +// directly; but look for the most common cases first. + + { + Pjpm_t Pjpm = P_JPM(PArray); + Pjp = &(Pjpm->jpm_JP); + +// goto SMGetContinue; + } + + +// ============================================================================ +// STATE MACHINE -- GET INDEX: +// +// Search for Index (already decremented/incremented so as to be an inclusive +// search). If not found (empty index), return success. Otherwise do a +// previous/next search, and if successful modify Index to the empty index +// found. See function header comments. +// +// ENTRY: Pjp points to next JP to interpret, whose Decode bytes have not yet +// been checked. +// +// Note: Check Decode bytes at the start of each loop, not after looking up a +// new JP, so its easy to do constant shifts/masks. +// +// EXIT: Return, or branch to SMGetRestart with modified Index, or branch to +// SMGetContinue with a modified Pjp, as described elsewhere. +// +// WARNING: For run-time efficiency the following cases replicate code with +// varying constants, rather than using common code with variable values! + +SMGetContinue: // return here for next branch/leaf. + +#ifdef TRACEJPSE + JudyPrintJP(Pjp, "sf", __LINE__); +#endif + + switch (JU_JPTYPE(Pjp)) + { + + +// ---------------------------------------------------------------------------- +// LINEAR BRANCH: +// +// Check Decode bytes, if any, in the current JP, then search for a JP for the +// next digit in Index. + + case cJU_JPBRANCH_L2: CHECKDCD(2); SMPREPB2(SMBranchL); + case cJU_JPBRANCH_L3: CHECKDCD(3); SMPREPB3(SMBranchL); +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: CHECKDCD(4); SMPREPB4(SMBranchL); + case cJU_JPBRANCH_L5: CHECKDCD(5); SMPREPB5(SMBranchL); + case cJU_JPBRANCH_L6: CHECKDCD(6); SMPREPB6(SMBranchL); + case cJU_JPBRANCH_L7: CHECKDCD(7); SMPREPB7(SMBranchL); +#endif + case cJU_JPBRANCH_L: SMPREPBL(SMBranchL); + +// Common code (state-independent) for all cases of linear branches: + +SMBranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + +// First, check if Indexs expanse (digit) is below/above the first/last +// populated expanse in the BranchL, in which case Index is empty; otherwise +// find the offset of the lowest/highest populated expanse at or above/below +// digit, if any: +// +// Note: The for-loop is guaranteed to exit eventually because the first/last +// expanse is known to be a terminator. +// +// Note: Cannot use j__udySearchLeaf*Empty1() here because it only applies to +// leaves and does not know about partial versus full JPs, unlike the use of +// j__udySearchLeaf1() for BranchLs in SearchValid code. Also, since linear +// leaf expanse lists are small, dont waste time calling j__udySearchLeaf1(), +// just scan the expanse list. + +#ifdef JUDYPREV + if ((Pjbl->jbl_Expanse[0]) > digit) RET_SUCCESS; + + for (offset = (Pjbl->jbl_NumJPs) - 1; /* null */; --offset) +#else + if ((Pjbl->jbl_Expanse[(Pjbl->jbl_NumJPs) - 1]) < digit) + RET_SUCCESS; + + for (offset = 0; /* null */; ++offset) +#endif + { + +// Too low/high, keep going; or too high/low, meaning the loop passed a hole +// and the initial Index is empty: + +#ifdef JUDYPREV + if ((Pjbl->jbl_Expanse[offset]) > digit) continue; + if ((Pjbl->jbl_Expanse[offset]) < digit) RET_SUCCESS; +#else + if ((Pjbl->jbl_Expanse[offset]) < digit) continue; + if ((Pjbl->jbl_Expanse[offset]) > digit) RET_SUCCESS; +#endif + +// Found expanse matching digit; if its not full, traverse through it: + + if (! JPFULL((Pjbl->jbl_jp) + offset)) + { + Pjp = (Pjbl->jbl_jp) + offset; + goto SMGetContinue; + } + +// Common code: While searching for a lower/higher hole or a non-full JP, upon +// finding a lower/higher hole, adjust Index using the revised digit and +// return; or upon finding a consecutive lower/higher expanse, if the expanses +// JP is non-full, modify Index and traverse through the JP: + +#define BRANCHL_CHECK(OpIncDec,OpLeastDigits,Digit,Digits) \ + { \ + if ((Pjbl->jbl_Expanse[offset]) != OpIncDec digit) \ + SET_AND_RETURN(OpLeastDigits, Digit, Digits); \ + \ + if (! JPFULL((Pjbl->jbl_jp) + offset)) \ + { \ + Pjp = (Pjbl->jbl_jp) + offset; \ + SET_AND_CONTINUE(OpLeastDigits, Digit, Digits); \ + } \ + } + +// BranchL primary dead end: Expanse matching Index/digit is full (rare except +// for dense/sequential indexes): +// +// Search for a lower/higher hole, a non-full JP, or the end of the expanse +// list, while decrementing/incrementing digit. + +#ifdef JUDYPREV + while (--offset >= 0) + BRANCHL_CHECK(--, SETLEASTDIGITS_D, digit, digits) +#else + while (++offset < Pjbl->jbl_NumJPs) + BRANCHL_CHECK(++, CLEARLEASTDIGITS_D, digit, digits) +#endif + +// Passed end of BranchL expanse list after finding a matching but full +// expanse: +// +// Digit now matches the lowest/highest expanse, which is a full expanse; if +// digit is at the end of BranchLs expanse (no hole before/after), break out +// of the loop; otherwise modify Index to the next lower/higher digit and +// return success: + +#ifdef JUDYPREV + if (digit == 0) break; + --digit; SET_AND_RETURN(SETLEASTDIGITS_D, digit, digits); +#else + if (digit == JU_LEASTBYTES(cJU_ALLONES, 1)) break; + ++digit; SET_AND_RETURN(CLEARLEASTDIGITS_D, digit, digits); +#endif + } // for-loop + +// BranchL secondary dead end, no non-full previous/next JP: + + SMRESTART(digits); + + +// ---------------------------------------------------------------------------- +// BITMAP BRANCH: +// +// Check Decode bytes, if any, in the current JP, then search for a JP for the +// next digit in Index. + + case cJU_JPBRANCH_B2: CHECKDCD(2); SMPREPB2(SMBranchB); + case cJU_JPBRANCH_B3: CHECKDCD(3); SMPREPB3(SMBranchB); +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: CHECKDCD(4); SMPREPB4(SMBranchB); + case cJU_JPBRANCH_B5: CHECKDCD(5); SMPREPB5(SMBranchB); + case cJU_JPBRANCH_B6: CHECKDCD(6); SMPREPB6(SMBranchB); + case cJU_JPBRANCH_B7: CHECKDCD(7); SMPREPB7(SMBranchB); +#endif + case cJU_JPBRANCH_B: SMPREPBL(SMBranchB); + +// Common code (state-independent) for all cases of bitmap branches: + +SMBranchB: + Pjbb = P_JBB(Pjp->jp_Addr); + +// Locate the digits JP in the subexpanse list, if present: + + subexp = digit / cJU_BITSPERSUBEXPB; + assert(subexp < cJU_NUMSUBEXPB); // falls in expected range. + bitposmaskB = JU_BITPOSMASKB(digit); + +// Absent JP = no JP matches current digit in Index: + +// if (! JU_BITMAPTESTB(Pjbb, digit)) // slower. + if (! (JU_JBB_BITMAP(Pjbb, subexp) & bitposmaskB)) // faster. + RET_SUCCESS; + +// Non-full JP matches current digit in Index: +// +// Iterate to the subsidiary non-full JP. + + offset = SEARCHBITMAPB(JU_JBB_BITMAP(Pjbb, subexp), digit, + bitposmaskB); + // not negative since at least one bit is set: + assert(offset >= 0); + assert(offset < (int) cJU_BITSPERSUBEXPB); + +// Watch for null JP subarray pointer with non-null bitmap (a corruption): + + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) + == (Pjp_t) NULL) RET_CORRUPT; + + Pjp += offset; + if (! JPFULL(Pjp)) goto SMGetContinue; + +// BranchB primary dead end: +// +// Upon hitting a full JP in a BranchB for the next digit in Index, search +// sideways for a previous/next absent JP (unset bit) or non-full JP (set bit +// with non-full JP); first in the current bitmap subexpanse, then in +// lower/higher subexpanses. Upon entry, Pjp points to a known-unusable JP, +// ready to decrement/increment. +// +// Note: The preceding code is separate from this loop because Index does not +// need revising (see SET_AND_*()) if the initial index is an empty index. +// +// TBD: For speed, shift bitposmaskB instead of using JU_BITMAPTESTB or +// JU_BITPOSMASKB, but this shift has knowledge of bit order that really should +// be encapsulated in a header file. + +#define BRANCHB_CHECKBIT(OpLeastDigits) \ + if (! (JU_JBB_BITMAP(Pjbb, subexp) & bitposmaskB)) /* absent JP */ \ + SET_AND_RETURN(OpLeastDigits, digit, digits) + +#define BRANCHB_CHECKJPFULL(OpLeastDigits) \ + if (! JPFULL(Pjp)) \ + SET_AND_CONTINUE(OpLeastDigits, digit, digits) + +#define BRANCHB_STARTSUBEXP(OpLeastDigits) \ + if (! JU_JBB_BITMAP(Pjbb, subexp)) /* empty subexpanse, shortcut */ \ + SET_AND_RETURN(OpLeastDigits, digit, digits) \ + if ((Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp))) == (Pjp_t) NULL) RET_CORRUPT + +#ifdef JUDYPREV + + --digit; // skip initial digit. + bitposmaskB >>= 1; // see TBD above. + +BranchBNextSubexp: // return here to check next bitmap subexpanse. + + while (bitposmaskB) // more bits to check in subexp. + { + BRANCHB_CHECKBIT(SETLEASTDIGITS_D); + --Pjp; // previous in subarray. + BRANCHB_CHECKJPFULL(SETLEASTDIGITS_D); + assert(digit >= 0); + --digit; + bitposmaskB >>= 1; + } + + if (subexp-- > 0) // more subexpanses. + { + BRANCHB_STARTSUBEXP(SETLEASTDIGITS_D); + Pjp += SEARCHBITMAPMAXB(JU_JBB_BITMAP(Pjbb, subexp)) + 1; + bitposmaskB = (1U << (cJU_BITSPERSUBEXPB - 1)); + goto BranchBNextSubexp; + } + +#else // JUDYNEXT + + ++digit; // skip initial digit. + bitposmaskB <<= 1; // note: BITMAPB_t. + +BranchBNextSubexp: // return here to check next bitmap subexpanse. + + while (bitposmaskB) // more bits to check in subexp. + { + BRANCHB_CHECKBIT(CLEARLEASTDIGITS_D); + ++Pjp; // previous in subarray. + BRANCHB_CHECKJPFULL(CLEARLEASTDIGITS_D); + assert(digit < cJU_SUBEXPPERSTATE); + ++digit; + bitposmaskB <<= 1; // note: BITMAPB_t. + } + + if (++subexp < cJU_NUMSUBEXPB) // more subexpanses. + { + BRANCHB_STARTSUBEXP(CLEARLEASTDIGITS_D); + --Pjp; // pre-decrement. + bitposmaskB = 1; + goto BranchBNextSubexp; + } + +#endif // JUDYNEXT + +// BranchB secondary dead end, no non-full previous/next JP: + + SMRESTART(digits); + + +// ---------------------------------------------------------------------------- +// UNCOMPRESSED BRANCH: +// +// Check Decode bytes, if any, in the current JP, then search for a JP for the +// next digit in Index. + + case cJU_JPBRANCH_U2: CHECKDCD(2); SMPREPB2(SMBranchU); + case cJU_JPBRANCH_U3: CHECKDCD(3); SMPREPB3(SMBranchU); +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: CHECKDCD(4); SMPREPB4(SMBranchU); + case cJU_JPBRANCH_U5: CHECKDCD(5); SMPREPB5(SMBranchU); + case cJU_JPBRANCH_U6: CHECKDCD(6); SMPREPB6(SMBranchU); + case cJU_JPBRANCH_U7: CHECKDCD(7); SMPREPB7(SMBranchU); +#endif + case cJU_JPBRANCH_U: SMPREPBL(SMBranchU); + +// Common code (state-independent) for all cases of uncompressed branches: + +SMBranchU: + Pjbu = P_JBU(Pjp->jp_Addr); + Pjp = (Pjbu->jbu_jp) + digit; + +// Absent JP = null JP for current digit in Index: + + if (JPNULL(JU_JPTYPE(Pjp))) RET_SUCCESS; + +// Non-full JP matches current digit in Index: +// +// Iterate to the subsidiary JP. + + if (! JPFULL(Pjp)) goto SMGetContinue; + +// BranchU primary dead end: +// +// Upon hitting a full JP in a BranchU for the next digit in Index, search +// sideways for a previous/next null or non-full JP. BRANCHU_CHECKJP() is +// shorthand for common code. +// +// Note: The preceding code is separate from this loop because Index does not +// need revising (see SET_AND_*()) if the initial index is an empty index. + +#define BRANCHU_CHECKJP(OpIncDec,OpLeastDigits) \ + { \ + OpIncDec Pjp; \ + \ + if (JPNULL(JU_JPTYPE(Pjp))) \ + SET_AND_RETURN(OpLeastDigits, digit, digits) \ + \ + if (! JPFULL(Pjp)) \ + SET_AND_CONTINUE(OpLeastDigits, digit, digits) \ + } + +#ifdef JUDYPREV + while (digit-- > 0) + BRANCHU_CHECKJP(--, SETLEASTDIGITS_D); +#else + while (++digit < cJU_BRANCHUNUMJPS) + BRANCHU_CHECKJP(++, CLEARLEASTDIGITS_D); +#endif + +// BranchU secondary dead end, no non-full previous/next JP: + + SMRESTART(digits); + + +// ---------------------------------------------------------------------------- +// LINEAR LEAF: +// +// Check Decode bytes, if any, in the current JP, then search the leaf for the +// previous/next empty index starting at Index. Primary leaf dead end is +// hidden within j__udySearchLeaf*Empty*(). In case of secondary leaf dead +// end, restart at the top of the tree. +// +// Note: Pword is the name known to GET*; think of it as Pjlw. + +#define SMLEAFL(cDigits,Func) \ + Pword = (PWord_t) P_JLW(Pjp->jp_Addr); \ + pop0 = JU_JPLEAF_POP0(Pjp); \ + Func(Pword, pop0) + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: CHECKDCD(1); SMLEAFL(1, j__udySearchLeafEmpty1); +#endif + case cJU_JPLEAF2: CHECKDCD(2); SMLEAFL(2, j__udySearchLeafEmpty2); + case cJU_JPLEAF3: CHECKDCD(3); SMLEAFL(3, j__udySearchLeafEmpty3); + +#ifdef JU_64BIT + case cJU_JPLEAF4: CHECKDCD(4); SMLEAFL(4, j__udySearchLeafEmpty4); + case cJU_JPLEAF5: CHECKDCD(5); SMLEAFL(5, j__udySearchLeafEmpty5); + case cJU_JPLEAF6: CHECKDCD(6); SMLEAFL(6, j__udySearchLeafEmpty6); + case cJU_JPLEAF7: CHECKDCD(7); SMLEAFL(7, j__udySearchLeafEmpty7); +#endif + + +// ---------------------------------------------------------------------------- +// BITMAP LEAF: +// +// Check Decode bytes, if any, in the current JP, then search the leaf for the +// previous/next empty index starting at Index. + + case cJU_JPLEAF_B1: + + CHECKDCD(1); + + Pjlb = P_JLB(Pjp->jp_Addr); + digit = JU_DIGITATSTATE(Index, 1); + subexp = digit / cJU_BITSPERSUBEXPL; + bitposmaskL = JU_BITPOSMASKL(digit); + assert(subexp < cJU_NUMSUBEXPL); // falls in expected range. + +// Absent index = no index matches current digit in Index: + +// if (! JU_BITMAPTESTL(Pjlb, digit)) // slower. + if (! (JU_JLB_BITMAP(Pjlb, subexp) & bitposmaskL)) // faster. + RET_SUCCESS; + +// LeafB1 primary dead end: +// +// Upon hitting a valid (non-empty) index in a LeafB1 for the last digit in +// Index, search sideways for a previous/next absent index, first in the +// current bitmap subexpanse, then in lower/higher subexpanses. +// LEAFB1_CHECKBIT() is shorthand for common code to handle one bit in one +// bitmap subexpanse. +// +// Note: The preceding code is separate from this loop because Index does not +// need revising (see SET_AND_*()) if the initial index is an empty index. +// +// TBD: For speed, shift bitposmaskL instead of using JU_BITMAPTESTL or +// JU_BITPOSMASKL, but this shift has knowledge of bit order that really should +// be encapsulated in a header file. + +#define LEAFB1_CHECKBIT(OpLeastDigits) \ + if (! (JU_JLB_BITMAP(Pjlb, subexp) & bitposmaskL)) \ + SET_AND_RETURN(OpLeastDigits, digit, 1) + +#define LEAFB1_STARTSUBEXP(OpLeastDigits) \ + if (! JU_JLB_BITMAP(Pjlb, subexp)) /* empty subexp */ \ + SET_AND_RETURN(OpLeastDigits, digit, 1) + +#ifdef JUDYPREV + + --digit; // skip initial digit. + bitposmaskL >>= 1; // see TBD above. + +LeafB1NextSubexp: // return here to check next bitmap subexpanse. + + while (bitposmaskL) // more bits to check in subexp. + { + LEAFB1_CHECKBIT(SETLEASTDIGITS_D); + assert(digit >= 0); + --digit; + bitposmaskL >>= 1; + } + + if (subexp-- > 0) // more subexpanses. + { + LEAFB1_STARTSUBEXP(SETLEASTDIGITS_D); + bitposmaskL = (1UL << (cJU_BITSPERSUBEXPL - 1)); + goto LeafB1NextSubexp; + } + +#else // JUDYNEXT + + ++digit; // skip initial digit. + bitposmaskL <<= 1; // note: BITMAPL_t. + +LeafB1NextSubexp: // return here to check next bitmap subexpanse. + + while (bitposmaskL) // more bits to check in subexp. + { + LEAFB1_CHECKBIT(CLEARLEASTDIGITS_D); + assert(digit < cJU_SUBEXPPERSTATE); + ++digit; + bitposmaskL <<= 1; // note: BITMAPL_t. + } + + if (++subexp < cJU_NUMSUBEXPL) // more subexpanses. + { + LEAFB1_STARTSUBEXP(CLEARLEASTDIGITS_D); + bitposmaskL = 1; + goto LeafB1NextSubexp; + } + +#endif // JUDYNEXT + +// LeafB1 secondary dead end, no empty index: + + SMRESTART(1); + + +#ifdef JUDY1 +// ---------------------------------------------------------------------------- +// FULL POPULATION: +// +// If the Decode bytes do not match, Index is empty (without modification); +// otherwise restart. + + case cJ1_JPFULLPOPU1: + + CHECKDCD(1); + SMRESTART(1); +#endif + + +// ---------------------------------------------------------------------------- +// IMMEDIATE: +// +// Pop1 = 1 Immediate JPs: +// +// If Index is not in the immediate JP, return success; otherwise check if +// there is an empty index below/above the immediate JPs index, and if so, +// return success with modified Index, else restart. +// +// Note: Doug says its fast enough to calculate the index size (digits) in +// the following; no need to set it separately for each case. + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: +#endif + if (JU_JPDCDPOP0(Pjp) != JU_TRIMTODCDSIZE(Index)) RET_SUCCESS; + digits = JU_JPTYPE(Pjp) - cJU_JPIMMED_1_01 + 1; + LEAF_EDGE(JU_LEASTBYTES(JU_JPDCDPOP0(Pjp), digits), digits); + +// Immediate JPs with Pop1 > 1: + +#define IMM_MULTI(Func,BaseJPType) \ + JUDY1CODE(Pword = (PWord_t) (Pjp->jp_1Index);) \ + JUDYLCODE(Pword = (PWord_t) (Pjp->jp_LIndex);) \ + Func(Pword, JU_JPTYPE(Pjp) - (BaseJPType) + 1) + + case cJU_JPIMMED_1_02: + case cJU_JPIMMED_1_03: +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: + case cJU_JPIMMED_1_05: + case cJU_JPIMMED_1_06: + case cJU_JPIMMED_1_07: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: + case cJ1_JPIMMED_1_09: + case cJ1_JPIMMED_1_10: + case cJ1_JPIMMED_1_11: + case cJ1_JPIMMED_1_12: + case cJ1_JPIMMED_1_13: + case cJ1_JPIMMED_1_14: + case cJ1_JPIMMED_1_15: +#endif + IMM_MULTI(j__udySearchLeafEmpty1, cJU_JPIMMED_1_02); + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: + case cJU_JPIMMED_2_03: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: + case cJ1_JPIMMED_2_05: + case cJ1_JPIMMED_2_06: + case cJ1_JPIMMED_2_07: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + IMM_MULTI(j__udySearchLeafEmpty2, cJU_JPIMMED_2_02); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: + case cJ1_JPIMMED_3_04: + case cJ1_JPIMMED_3_05: +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + IMM_MULTI(j__udySearchLeafEmpty3, cJU_JPIMMED_3_02); +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_4_02: + case cJ1_JPIMMED_4_03: + IMM_MULTI(j__udySearchLeafEmpty4, cJ1_JPIMMED_4_02); + + case cJ1_JPIMMED_5_02: + case cJ1_JPIMMED_5_03: + IMM_MULTI(j__udySearchLeafEmpty5, cJ1_JPIMMED_5_02); + + case cJ1_JPIMMED_6_02: + IMM_MULTI(j__udySearchLeafEmpty6, cJ1_JPIMMED_6_02); + + case cJ1_JPIMMED_7_02: + IMM_MULTI(j__udySearchLeafEmpty7, cJ1_JPIMMED_7_02); +#endif + + +// ---------------------------------------------------------------------------- +// INVALID JP TYPE: + + default: RET_CORRUPT; + + } // SMGet switch. + +} // Judy1PrevEmpty() / Judy1NextEmpty() / JudyLPrevEmpty() / JudyLNextEmpty() diff --git a/libnetdata/libjudy/src/JudyL/JudyLTablesGen.c b/libnetdata/libjudy/src/JudyL/JudyLTablesGen.c new file mode 100644 index 0000000..cb8b13f --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/JudyLTablesGen.c @@ -0,0 +1,296 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.37 $ $Source: /judy/src/JudyCommon/JudyTables.c $ + +#ifndef JU_WIN +#include // unavailable on win_*. +#endif + +#include +#include + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#define TERMINATOR 999 // terminator for Alloc tables + +#define BPW sizeof(Word_t) // define bytes per word + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +FILE *fd; + +// Definitions come from header files Judy1.h and JudyL.h: + +int AllocSizes[] = ALLOCSIZES; + +#define ROUNDUP(BYTES,BPW,OFFSETW) \ + ((((BYTES) + (BPW) - 1) / (BPW)) + (OFFSETW)) + + +// **************************************************************************** +// G E N T A B L E +// +// Note: "const" is required for newer compilers. + +FUNCTION void GenTable( + const char * TableName, // name of table string + const char * TableSize, // dimentioned size string + int IndexBytes, // bytes per Index + int LeafSize, // number elements in object + int ValueBytes, // bytes per Value + int OffsetWords) // 1 for LEAFW +{ + int * PAllocSizes = AllocSizes; + int OWord; + int CurWord; + int IWord; + int ii; + int BytesOfIndex; + int BytesOfObject; + int Index; + int LastWords; + int Words [1000] = { 0 }; + int Offset[1000] = { 0 }; + int MaxWords; + + MaxWords = ROUNDUP((IndexBytes + ValueBytes) * LeafSize, BPW, OffsetWords); + Words[0] = 0; + Offset[0] = 0; + CurWord = TERMINATOR; + +// Walk through all number of Indexes in table: + + for (Index = 1; /* null */; ++Index) + { + +// Calculate byte required for next size: + + BytesOfIndex = IndexBytes * Index; + BytesOfObject = (IndexBytes + ValueBytes) * Index; + +// Round up and calculate words required for next size: + + OWord = ROUNDUP(BytesOfObject, BPW, OffsetWords); + IWord = ROUNDUP(BytesOfIndex, BPW, OffsetWords); + +// Root-level leaves of population of 1 and 2 do not have the 1 word offset: + +// Save minimum value of offset: + + Offset[Index] = IWord; + +// Round up to next available size of words: + + while (OWord > *PAllocSizes) PAllocSizes++; + + if (Index == LeafSize) + { + CurWord = Words[Index] = OWord; + break; + } +// end of available sizes ? + + if (*PAllocSizes == TERMINATOR) + { + fprintf(stderr, "BUG, in %sPopToWords, sizes not big enough for object\n", TableName); + exit(1); + } + +// Save words required and last word: + + if (*PAllocSizes < MaxWords) { CurWord = Words[Index] = *PAllocSizes; } + else { CurWord = Words[Index] = MaxWords; } + + } // for each index + + LastWords = TERMINATOR; + +// Round up to largest size in each group of malloc sizes: + + for (ii = LeafSize; ii > 0; ii--) + { + if (LastWords > (Words[ii] - ii)) LastWords = Offset[ii]; + else Offset[ii] = LastWords; + } + +// Print the PopToWords[] table: + + fprintf(fd,"\n//\tobject uses %d words\n", CurWord); + fprintf(fd,"//\t%s = %d\n", TableSize, LeafSize); + + fprintf(fd,"const uint8_t\n"); + fprintf(fd,"%sPopToWords[%s + 1] =\n", TableName, TableSize); + fprintf(fd,"{\n\t 0,"); + + for (ii = 1; ii <= LeafSize; ii++) + { + +// 8 columns per line, starting with 1: + + if ((ii % 8) == 1) fprintf(fd,"\n\t"); + + fprintf(fd,"%2d", Words[ii]); + +// If not last number place comma: + + if (ii != LeafSize) fprintf(fd,", "); + } + fprintf(fd,"\n};\n"); + +// Print the Offset table if needed: + + if (! ValueBytes) return; + + fprintf(fd,"const uint8_t\n"); + fprintf(fd,"%sOffset[%s + 1] =\n", TableName, TableSize); + fprintf(fd,"{\n"); + fprintf(fd,"\t 0,"); + + for (ii = 1; ii <= LeafSize; ii++) + { + if ((ii % 8) == 1) fprintf(fd,"\n\t"); + + fprintf(fd,"%2d", Offset[ii]); + + if (ii != LeafSize) fprintf(fd,", "); + } + fprintf(fd,"\n};\n"); + +} // GenTable() + + +// **************************************************************************** +// M A I N + +FUNCTION int main() +{ + int ii; + +#ifdef JUDY1 + char *fname = "Judy1Tables.c"; +#else + char *fname = "JudyLTables.c"; +#endif + + if ((fd = fopen(fname, "w")) == NULL){ + perror("FATAL ERROR: could not write to Judy[1L]Tables.c file\n"); + return (-1); + } + + + fprintf(fd,"// @(#) From generation tool: $Revision: 4.37 $ $Source: /judy/src/JudyCommon/JudyTables.c $\n"); + fprintf(fd,"//\n\n"); + + +// ================================ Judy1 ================================= +#ifdef JUDY1 + + fprintf(fd,"#include \"Judy1.h\"\n"); + + fprintf(fd,"// Leave the malloc() sizes readable in the binary (via " + "strings(1)):\n"); + fprintf(fd,"const char * Judy1MallocSizes = \"Judy1MallocSizes ="); + + for (ii = 0; AllocSizes[ii] != TERMINATOR; ii++) + fprintf(fd," %d,", AllocSizes[ii]); + +#ifndef JU_64BIT + fprintf(fd," Leaf1 = %d\";\n\n", cJ1_LEAF1_MAXPOP1); +#else + fprintf(fd,"\";\n\n"); // no Leaf1 in this case. +#endif + +// ================================ 32 bit ================================ +#ifndef JU_64BIT + + GenTable("j__1_BranchBJP","cJU_BITSPERSUBEXPB", 8, cJU_BITSPERSUBEXPB,0,0); + + GenTable("j__1_Leaf1", "cJ1_LEAF1_MAXPOP1", 1, cJ1_LEAF1_MAXPOP1, 0, 0); + GenTable("j__1_Leaf2", "cJ1_LEAF2_MAXPOP1", 2, cJ1_LEAF2_MAXPOP1, 0, 0); + GenTable("j__1_Leaf3", "cJ1_LEAF3_MAXPOP1", 3, cJ1_LEAF3_MAXPOP1, 0, 0); + GenTable("j__1_LeafW", "cJ1_LEAFW_MAXPOP1", 4, cJ1_LEAFW_MAXPOP1, 0, 1); + +#endif + +// ================================ 64 bit ================================ +#ifdef JU_64BIT + GenTable("j__1_BranchBJP","cJU_BITSPERSUBEXPB",16, cJU_BITSPERSUBEXPB,0,0); + + GenTable("j__1_Leaf2", "cJ1_LEAF2_MAXPOP1", 2, cJ1_LEAF2_MAXPOP1, 0, 0); + GenTable("j__1_Leaf3", "cJ1_LEAF3_MAXPOP1", 3, cJ1_LEAF3_MAXPOP1, 0, 0); + GenTable("j__1_Leaf4", "cJ1_LEAF4_MAXPOP1", 4, cJ1_LEAF4_MAXPOP1, 0, 0); + GenTable("j__1_Leaf5", "cJ1_LEAF5_MAXPOP1", 5, cJ1_LEAF5_MAXPOP1, 0, 0); + GenTable("j__1_Leaf6", "cJ1_LEAF6_MAXPOP1", 6, cJ1_LEAF6_MAXPOP1, 0, 0); + GenTable("j__1_Leaf7", "cJ1_LEAF7_MAXPOP1", 7, cJ1_LEAF7_MAXPOP1, 0, 0); + GenTable("j__1_LeafW", "cJ1_LEAFW_MAXPOP1", 8, cJ1_LEAFW_MAXPOP1, 0, 1); +#endif +#endif // JUDY1 + + +// ================================ JudyL ================================= +#ifdef JUDYL + + fprintf(fd,"#include \"JudyL.h\"\n"); + + fprintf(fd,"// Leave the malloc() sizes readable in the binary (via " + "strings(1)):\n"); + fprintf(fd,"const char * JudyLMallocSizes = \"JudyLMallocSizes ="); + + for (ii = 0; AllocSizes[ii] != TERMINATOR; ii++) + fprintf(fd," %d,", AllocSizes[ii]); + + fprintf(fd," Leaf1 = %ld\";\n\n", (Word_t)cJL_LEAF1_MAXPOP1); + +#ifndef JU_64BIT +// ================================ 32 bit ================================ + GenTable("j__L_BranchBJP","cJU_BITSPERSUBEXPB", 8, cJU_BITSPERSUBEXPB, 0,0); + + GenTable("j__L_Leaf1", "cJL_LEAF1_MAXPOP1", 1, cJL_LEAF1_MAXPOP1, BPW,0); + GenTable("j__L_Leaf2", "cJL_LEAF2_MAXPOP1", 2, cJL_LEAF2_MAXPOP1, BPW,0); + GenTable("j__L_Leaf3", "cJL_LEAF3_MAXPOP1", 3, cJL_LEAF3_MAXPOP1, BPW,0); + GenTable("j__L_LeafW", "cJL_LEAFW_MAXPOP1", 4, cJL_LEAFW_MAXPOP1, BPW,1); + GenTable("j__L_LeafV", "cJU_BITSPERSUBEXPL", 4, cJU_BITSPERSUBEXPL, 0,0); +#endif // 32 BIT + +#ifdef JU_64BIT +// ================================ 64 bit ================================ + GenTable("j__L_BranchBJP","cJU_BITSPERSUBEXPB",16, cJU_BITSPERSUBEXPB, 0,0); + + GenTable("j__L_Leaf1", "cJL_LEAF1_MAXPOP1", 1, cJL_LEAF1_MAXPOP1, BPW,0); + GenTable("j__L_Leaf2", "cJL_LEAF2_MAXPOP1", 2, cJL_LEAF2_MAXPOP1, BPW,0); + GenTable("j__L_Leaf3", "cJL_LEAF3_MAXPOP1", 3, cJL_LEAF3_MAXPOP1, BPW,0); + GenTable("j__L_Leaf4", "cJL_LEAF4_MAXPOP1", 4, cJL_LEAF4_MAXPOP1, BPW,0); + GenTable("j__L_Leaf5", "cJL_LEAF5_MAXPOP1", 5, cJL_LEAF5_MAXPOP1, BPW,0); + GenTable("j__L_Leaf6", "cJL_LEAF6_MAXPOP1", 6, cJL_LEAF6_MAXPOP1, BPW,0); + GenTable("j__L_Leaf7", "cJL_LEAF7_MAXPOP1", 7, cJL_LEAF7_MAXPOP1, BPW,0); + GenTable("j__L_LeafW", "cJL_LEAFW_MAXPOP1", 8, cJL_LEAFW_MAXPOP1, BPW,1); + GenTable("j__L_LeafV", "cJU_BITSPERSUBEXPL", 8, cJU_BITSPERSUBEXPL, 0,0); +#endif // 64 BIT + +#endif // JUDYL + fclose(fd); + + return(0); + +} // main() diff --git a/libnetdata/libjudy/src/JudyL/j__udyLGet.c b/libnetdata/libjudy/src/JudyL/j__udyLGet.c new file mode 100644 index 0000000..0bb9971 --- /dev/null +++ b/libnetdata/libjudy/src/JudyL/j__udyLGet.c @@ -0,0 +1,1094 @@ +// Copyright (C) 2000 - 2002 Hewlett-Packard Company +// +// This program is free software; you can redistribute it and/or modify it +// under the term of the GNU Lesser General Public License as published by the +// Free Software Foundation; either version 2 of the License, or (at your +// option) any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License +// for more details. +// +// You should have received a copy of the GNU Lesser General Public License +// along with this program; if not, write to the Free Software Foundation, +// Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +// _________________ + +// @(#) $Revision: 4.43 $ $Source: /judy/src/JudyCommon/JudyGet.c $ +// +// Judy1Test() and JudyLGet() functions for Judy1 and JudyL. +// Compile with one of -DJUDY1 or -DJUDYL. + +#if (! (defined(JUDY1) || defined(JUDYL))) +#error: One of -DJUDY1 or -DJUDYL must be specified. +#endif + +#ifdef JUDY1 +#include "Judy1.h" +#else +#include "JudyL.h" +#endif + +#include "JudyPrivate1L.h" + +#ifdef TRACEJPR // different macro name, for "retrieval" only. +#include "JudyPrintJP.c" +#endif + + +// **************************************************************************** +// J U D Y 1 T E S T +// J U D Y L G E T +// +// See the manual entry for details. Note support for "shortcut" entries to +// trees known to start with a JPM. + +#ifdef JUDY1 + +#ifdef JUDYGETINLINE +FUNCTION int j__udy1Test +#else +FUNCTION int Judy1Test +#endif + +#else // JUDYL + +#ifdef JUDYGETINLINE +FUNCTION PPvoid_t j__udyLGet +#else +FUNCTION PPvoid_t JudyLGet +#endif + +#endif // JUDYL + ( +#ifdef JUDYGETINLINE + Pvoid_t PArray, // from which to retrieve. + Word_t Index // to retrieve. +#else + Pcvoid_t PArray, // from which to retrieve. + Word_t Index, // to retrieve. + PJError_t PJError // optional, for returning error info. +#endif + ) +{ + Pjp_t Pjp; // current JP while walking the tree. + Pjpm_t Pjpm; // for global accounting. + uint8_t Digit; // byte just decoded from Index. + Word_t Pop1; // leaf population (number of indexes). + Pjll_t Pjll; // pointer to LeafL. + DBGCODE(uint8_t ParentJPType;) + +#ifndef JUDYGETINLINE + + if (PArray == (Pcvoid_t) NULL) // empty array. + { + JUDY1CODE(return(0);) + JUDYLCODE(return((PPvoid_t) NULL);) + } + +// **************************************************************************** +// PROCESS TOP LEVEL BRANCHES AND LEAF: + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + { + Pjlw_t Pjlw = P_JLW(PArray); // first word of leaf. + int posidx; // signed offset in leaf. + + Pop1 = Pjlw[0] + 1; + posidx = j__udySearchLeafW(Pjlw + 1, Pop1, Index); + + if (posidx >= 0) + { + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAFWVALUEAREA(Pjlw, Pop1) + posidx));) + } + JUDY1CODE(return(0);) + JUDYLCODE(return((PPvoid_t) NULL);) + } + +#endif // ! JUDYGETINLINE + + Pjpm = P_JPM(PArray); + Pjp = &(Pjpm->jpm_JP); // top branch is below JPM. + +// **************************************************************************** +// WALK THE JUDY TREE USING A STATE MACHINE: + +ContinueWalk: // for going down one level; come here with Pjp set. + +#ifdef TRACEJPR + JudyPrintJP(Pjp, "g", __LINE__); +#endif + switch (JU_JPTYPE(Pjp)) + { + +// Ensure the switch table starts at 0 for speed; otherwise more code is +// executed: + + case 0: goto ReturnCorrupt; // save a little code. + + +// **************************************************************************** +// JPNULL*: +// +// Note: These are legitimate in a BranchU (only) and do not constitute a +// fault. + + case cJU_JPNULL1: + case cJU_JPNULL2: + case cJU_JPNULL3: +#ifdef JU_64BIT + case cJU_JPNULL4: + case cJU_JPNULL5: + case cJU_JPNULL6: + case cJU_JPNULL7: +#endif + assert(ParentJPType >= cJU_JPBRANCH_U2); + assert(ParentJPType <= cJU_JPBRANCH_U); + JUDY1CODE(return(0);) + JUDYLCODE(return((PPvoid_t) NULL);) + + +// **************************************************************************** +// JPBRANCH_L*: +// +// Note: The use of JU_DCDNOTMATCHINDEX() in branches is not strictly +// required,since this can be done at leaf level, but it costs nothing to do it +// sooner, and it aborts an unnecessary traversal sooner. + + case cJU_JPBRANCH_L2: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 2)) break; + Digit = JU_DIGITATSTATE(Index, 2); + goto JudyBranchL; + + case cJU_JPBRANCH_L3: + +#ifdef JU_64BIT // otherwise its a no-op: + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 3)) break; +#endif + Digit = JU_DIGITATSTATE(Index, 3); + goto JudyBranchL; + +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 4)) break; + Digit = JU_DIGITATSTATE(Index, 4); + goto JudyBranchL; + + case cJU_JPBRANCH_L5: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 5)) break; + Digit = JU_DIGITATSTATE(Index, 5); + goto JudyBranchL; + + case cJU_JPBRANCH_L6: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 6)) break; + Digit = JU_DIGITATSTATE(Index, 6); + goto JudyBranchL; + + case cJU_JPBRANCH_L7: + + // JU_DCDNOTMATCHINDEX() would be a no-op. + Digit = JU_DIGITATSTATE(Index, 7); + goto JudyBranchL; + +#endif // JU_64BIT + + case cJU_JPBRANCH_L: + { + Pjbl_t Pjbl; + int posidx; + + Digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); + +// Common code for all BranchLs; come here with Digit set: + +JudyBranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + + posidx = 0; + + do { + if (Pjbl->jbl_Expanse[posidx] == Digit) + { // found Digit; continue traversal: + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = Pjbl->jbl_jp + posidx; + goto ContinueWalk; + } + } while (++posidx != Pjbl->jbl_NumJPs); + + break; + } + + +// **************************************************************************** +// JPBRANCH_B*: + + case cJU_JPBRANCH_B2: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 2)) break; + Digit = JU_DIGITATSTATE(Index, 2); + goto JudyBranchB; + + case cJU_JPBRANCH_B3: + +#ifdef JU_64BIT // otherwise its a no-op: + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 3)) break; +#endif + Digit = JU_DIGITATSTATE(Index, 3); + goto JudyBranchB; + + +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 4)) break; + Digit = JU_DIGITATSTATE(Index, 4); + goto JudyBranchB; + + case cJU_JPBRANCH_B5: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 5)) break; + Digit = JU_DIGITATSTATE(Index, 5); + goto JudyBranchB; + + case cJU_JPBRANCH_B6: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 6)) break; + Digit = JU_DIGITATSTATE(Index, 6); + goto JudyBranchB; + + case cJU_JPBRANCH_B7: + + // JU_DCDNOTMATCHINDEX() would be a no-op. + Digit = JU_DIGITATSTATE(Index, 7); + goto JudyBranchB; + +#endif // JU_64BIT + + case cJU_JPBRANCH_B: + { + Pjbb_t Pjbb; + Word_t subexp; // in bitmap, 0..7. + BITMAPB_t BitMap; // for one subexpanse. + BITMAPB_t BitMask; // bit in BitMap for Indexs Digit. + + Digit = JU_DIGITATSTATE(Index, cJU_ROOTSTATE); + +// Common code for all BranchBs; come here with Digit set: + +JudyBranchB: + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjbb = P_JBB(Pjp->jp_Addr); + subexp = Digit / cJU_BITSPERSUBEXPB; + + BitMap = JU_JBB_BITMAP(Pjbb, subexp); + Pjp = P_JP(JU_JBB_PJP(Pjbb, subexp)); + + BitMask = JU_BITPOSMASKB(Digit); + +// No JP in subexpanse for Index => Index not found: + + if (! (BitMap & BitMask)) break; + +// Count JPs in the subexpanse below the one for Index: + + Pjp += j__udyCountBitsB(BitMap & (BitMask - 1)); + + goto ContinueWalk; + + } // case cJU_JPBRANCH_B* + + +// **************************************************************************** +// JPBRANCH_U*: +// +// Notice the reverse order of the cases, and falling through to the next case, +// for performance. + + case cJU_JPBRANCH_U: + + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, cJU_ROOTSTATE); + +// If not a BranchU, traverse; otherwise fall into the next case, which makes +// this very fast code for a large Judy array (mainly BranchUs), especially +// when branches are already in the cache, such as for prev/next: + +#ifndef JU_64BIT + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U3) goto ContinueWalk; +#else + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U7) goto ContinueWalk; +#endif + +#ifdef JU_64BIT + case cJU_JPBRANCH_U7: + + // JU_DCDNOTMATCHINDEX() would be a no-op. + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 7); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U6) goto ContinueWalk; + // and fall through. + + case cJU_JPBRANCH_U6: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 6)) break; + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 6); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U5) goto ContinueWalk; + // and fall through. + + case cJU_JPBRANCH_U5: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 5)) break; + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 5); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U4) goto ContinueWalk; + // and fall through. + + case cJU_JPBRANCH_U4: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 4)) break; + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 4); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U3) goto ContinueWalk; + // and fall through. + +#endif // JU_64BIT + + case cJU_JPBRANCH_U3: + +#ifdef JU_64BIT // otherwise its a no-op: + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 3)) break; +#endif + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 3); + + if (JU_JPTYPE(Pjp) != cJU_JPBRANCH_U2) goto ContinueWalk; + // and fall through. + + case cJU_JPBRANCH_U2: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 2)) break; + DBGCODE(ParentJPType = JU_JPTYPE(Pjp);) + Pjp = JU_JBU_PJP(Pjp, Index, 2); + +// Note: BranchU2 is a special case that must continue traversal to a leaf, +// immed, full, or null type: + + goto ContinueWalk; + + +// **************************************************************************** +// JPLEAF*: +// +// Note: Here the calls of JU_DCDNOTMATCHINDEX() are necessary and check +// whether Index is out of the expanse of a narrow pointer. + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + + case cJU_JPLEAF1: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 1)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf1(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF1VALUEAREA(Pjll, Pop1) + posidx));) + } + +#endif // (JUDYL || (! JU_64BIT)) + + case cJU_JPLEAF2: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 2)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf2(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF2VALUEAREA(Pjll, Pop1) + posidx));) + } + case cJU_JPLEAF3: + { + int posidx; // signed offset in leaf. + +#ifdef JU_64BIT // otherwise its a no-op: + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 3)) break; +#endif + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf3(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF3VALUEAREA(Pjll, Pop1) + posidx));) + } +#ifdef JU_64BIT + case cJU_JPLEAF4: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 4)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf4(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF4VALUEAREA(Pjll, Pop1) + posidx));) + } + case cJU_JPLEAF5: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 5)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf5(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF5VALUEAREA(Pjll, Pop1) + posidx));) + } + + case cJU_JPLEAF6: + { + int posidx; // signed offset in leaf. + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 6)) break; + + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf6(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF6VALUEAREA(Pjll, Pop1) + posidx));) + } + case cJU_JPLEAF7: + { + int posidx; // signed offset in leaf. + + // JU_DCDNOTMATCHINDEX() would be a no-op. + Pop1 = JU_JPLEAF_POP0(Pjp) + 1; + Pjll = P_JLL(Pjp->jp_Addr); + + if ((posidx = j__udySearchLeaf7(Pjll, Pop1, Index)) < 0) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) (JL_LEAF7VALUEAREA(Pjll, Pop1) + posidx));) + } +#endif // JU_64BIT + + +// **************************************************************************** +// JPLEAF_B1: + + case cJU_JPLEAF_B1: + { + Pjlb_t Pjlb; +#ifdef JUDYL + int posidx; + Word_t subexp; // in bitmap, 0..7. + BITMAPL_t BitMap; // for one subexpanse. + BITMAPL_t BitMask; // bit in BitMap for Indexs Digit. + Pjv_t Pjv; +#endif + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 1)) break; + + Pjlb = P_JLB(Pjp->jp_Addr); + +#ifdef JUDY1 + +// Simply check if Indexs bit is set in the bitmap: + + if (JU_BITMAPTESTL(Pjlb, Index)) return(1); + break; + +#else // JUDYL + +// JudyL is much more complicated because of value area subarrays: + + Digit = JU_DIGITATSTATE(Index, 1); + subexp = Digit / cJU_BITSPERSUBEXPL; + BitMap = JU_JLB_BITMAP(Pjlb, subexp); + BitMask = JU_BITPOSMASKL(Digit); + +// No value in subexpanse for Index => Index not found: + + if (! (BitMap & BitMask)) break; + +// Count value areas in the subexpanse below the one for Index: + + Pjv = P_JV(JL_JLB_PVALUE(Pjlb, subexp)); + assert(Pjv != (Pjv_t) NULL); + posidx = j__udyCountBitsL(BitMap & (BitMask - 1)); + + return((PPvoid_t) (Pjv + posidx)); + +#endif // JUDYL + + } // case cJU_JPLEAF_B1 + +#ifdef JUDY1 + +// **************************************************************************** +// JPFULLPOPU1: +// +// If the Index is in the expanse, it is necessarily valid (found). + + case cJ1_JPFULLPOPU1: + + if (JU_DCDNOTMATCHINDEX(Index, Pjp, 1)) break; + return(1); + +#ifdef notdef // for future enhancements +#ifdef JU_64BIT + +// Note: Need ? if (JU_DCDNOTMATCHINDEX(Index, Pjp, 1)) break; + + case cJ1_JPFULLPOPU1m15: + if (Pjp->jp_1Index[14] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m14: + if (Pjp->jp_1Index[13] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m13: + if (Pjp->jp_1Index[12] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m12: + if (Pjp->jp_1Index[11] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m11: + if (Pjp->jp_1Index[10] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m10: + if (Pjp->jp_1Index[9] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m9: + if (Pjp->jp_1Index[8] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m8: + if (Pjp->jp_1Index[7] == (uint8_t)Index) break; +#endif + case cJ1_JPFULLPOPU1m7: + if (Pjp->jp_1Index[6] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m6: + if (Pjp->jp_1Index[5] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m5: + if (Pjp->jp_1Index[4] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m4: + if (Pjp->jp_1Index[3] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m3: + if (Pjp->jp_1Index[2] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m2: + if (Pjp->jp_1Index[1] == (uint8_t)Index) break; + case cJ1_JPFULLPOPU1m1: + if (Pjp->jp_1Index[0] == (uint8_t)Index) break; + + return(1); // found, not in exclusion list + +#endif // JUDY1 +#endif // notdef + +// **************************************************************************** +// JPIMMED*: +// +// Note that the contents of jp_DcdPopO are different for cJU_JPIMMED_*_01: + + case cJU_JPIMMED_1_01: + case cJU_JPIMMED_2_01: + case cJU_JPIMMED_3_01: +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: + case cJU_JPIMMED_5_01: + case cJU_JPIMMED_6_01: + case cJU_JPIMMED_7_01: +#endif + if (JU_JPDCDPOP0(Pjp) != JU_TRIMTODCDSIZE(Index)) break; + + JUDY1CODE(return(1);) + JUDYLCODE(return((PPvoid_t) &(Pjp->jp_Addr));) // immediate value area. + + +// Macros to make code more readable and avoid dup errors + +#ifdef JUDY1 + +#define CHECKINDEXNATIVE(LEAF_T, PJP, IDX, INDEX) \ +if (((LEAF_T *)((PJP)->jp_1Index))[(IDX) - 1] == (LEAF_T)(INDEX)) \ + return(1) + +#define CHECKLEAFNONNAT(LFBTS, PJP, INDEX, IDX, COPY) \ +{ \ + Word_t i_ndex; \ + uint8_t *a_ddr; \ + a_ddr = (PJP)->jp_1Index + (((IDX) - 1) * (LFBTS)); \ + COPY(i_ndex, a_ddr); \ + if (i_ndex == JU_LEASTBYTES((INDEX), (LFBTS))) \ + return(1); \ +} +#endif + +#ifdef JUDYL + +#define CHECKINDEXNATIVE(LEAF_T, PJP, IDX, INDEX) \ +if (((LEAF_T *)((PJP)->jp_LIndex))[(IDX) - 1] == (LEAF_T)(INDEX)) \ + return((PPvoid_t)(P_JV((PJP)->jp_Addr) + (IDX) - 1)) + +#define CHECKLEAFNONNAT(LFBTS, PJP, INDEX, IDX, COPY) \ +{ \ + Word_t i_ndex; \ + uint8_t *a_ddr; \ + a_ddr = (PJP)->jp_LIndex + (((IDX) - 1) * (LFBTS)); \ + COPY(i_ndex, a_ddr); \ + if (i_ndex == JU_LEASTBYTES((INDEX), (LFBTS))) \ + return((PPvoid_t)(P_JV((PJP)->jp_Addr) + (IDX) - 1)); \ +} +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_15: CHECKINDEXNATIVE(uint8_t, Pjp, 15, Index); + case cJ1_JPIMMED_1_14: CHECKINDEXNATIVE(uint8_t, Pjp, 14, Index); + case cJ1_JPIMMED_1_13: CHECKINDEXNATIVE(uint8_t, Pjp, 13, Index); + case cJ1_JPIMMED_1_12: CHECKINDEXNATIVE(uint8_t, Pjp, 12, Index); + case cJ1_JPIMMED_1_11: CHECKINDEXNATIVE(uint8_t, Pjp, 11, Index); + case cJ1_JPIMMED_1_10: CHECKINDEXNATIVE(uint8_t, Pjp, 10, Index); + case cJ1_JPIMMED_1_09: CHECKINDEXNATIVE(uint8_t, Pjp, 9, Index); + case cJ1_JPIMMED_1_08: CHECKINDEXNATIVE(uint8_t, Pjp, 8, Index); +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_07: CHECKINDEXNATIVE(uint8_t, Pjp, 7, Index); + case cJU_JPIMMED_1_06: CHECKINDEXNATIVE(uint8_t, Pjp, 6, Index); + case cJU_JPIMMED_1_05: CHECKINDEXNATIVE(uint8_t, Pjp, 5, Index); + case cJU_JPIMMED_1_04: CHECKINDEXNATIVE(uint8_t, Pjp, 4, Index); +#endif + case cJU_JPIMMED_1_03: CHECKINDEXNATIVE(uint8_t, Pjp, 3, Index); + case cJU_JPIMMED_1_02: CHECKINDEXNATIVE(uint8_t, Pjp, 2, Index); + CHECKINDEXNATIVE(uint8_t, Pjp, 1, Index); + break; + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_07: CHECKINDEXNATIVE(uint16_t, Pjp, 7, Index); + case cJ1_JPIMMED_2_06: CHECKINDEXNATIVE(uint16_t, Pjp, 6, Index); + case cJ1_JPIMMED_2_05: CHECKINDEXNATIVE(uint16_t, Pjp, 5, Index); + case cJ1_JPIMMED_2_04: CHECKINDEXNATIVE(uint16_t, Pjp, 4, Index); +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_03: CHECKINDEXNATIVE(uint16_t, Pjp, 3, Index); + case cJU_JPIMMED_2_02: CHECKINDEXNATIVE(uint16_t, Pjp, 2, Index); + CHECKINDEXNATIVE(uint16_t, Pjp, 1, Index); + break; +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_05: + CHECKLEAFNONNAT(3, Pjp, Index, 5, JU_COPY3_PINDEX_TO_LONG); + case cJ1_JPIMMED_3_04: + CHECKLEAFNONNAT(3, Pjp, Index, 4, JU_COPY3_PINDEX_TO_LONG); + case cJ1_JPIMMED_3_03: + CHECKLEAFNONNAT(3, Pjp, Index, 3, JU_COPY3_PINDEX_TO_LONG); +#endif +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: + CHECKLEAFNONNAT(3, Pjp, Index, 2, JU_COPY3_PINDEX_TO_LONG); + CHECKLEAFNONNAT(3, Pjp, Index, 1, JU_COPY3_PINDEX_TO_LONG); + break; +#endif + +#if (defined(JUDY1) && defined(JU_64BIT)) + + case cJ1_JPIMMED_4_03: CHECKINDEXNATIVE(uint32_t, Pjp, 3, Index); + case cJ1_JPIMMED_4_02: CHECKINDEXNATIVE(uint32_t, Pjp, 2, Index); + CHECKINDEXNATIVE(uint32_t, Pjp, 1, Index); + break; + + case cJ1_JPIMMED_5_03: + CHECKLEAFNONNAT(5, Pjp, Index, 3, JU_COPY5_PINDEX_TO_LONG); + case cJ1_JPIMMED_5_02: + CHECKLEAFNONNAT(5, Pjp, Index, 2, JU_COPY5_PINDEX_TO_LONG); + CHECKLEAFNONNAT(5, Pjp, Index, 1, JU_COPY5_PINDEX_TO_LONG); + break; + + case cJ1_JPIMMED_6_02: + CHECKLEAFNONNAT(6, Pjp, Index, 2, JU_COPY6_PINDEX_TO_LONG); + CHECKLEAFNONNAT(6, Pjp, Index, 1, JU_COPY6_PINDEX_TO_LONG); + break; + + case cJ1_JPIMMED_7_02: + CHECKLEAFNONNAT(7, Pjp, Index, 2, JU_COPY7_PINDEX_TO_LONG); + CHECKLEAFNONNAT(7, Pjp, Index, 1, JU_COPY7_PINDEX_TO_LONG); + break; + +#endif // (JUDY1 && JU_64BIT) + + +// **************************************************************************** +// INVALID JP TYPE: + + default: + +ReturnCorrupt: + +#ifdef JUDYGETINLINE // Pjpm is known to be non-null: + JU_SET_ERRNO_NONNULL(Pjpm, JU_ERRNO_CORRUPT); +#else + JU_SET_ERRNO(PJError, JU_ERRNO_CORRUPT); +#endif + JUDY1CODE(return(JERRI );) + JUDYLCODE(return(PPJERR);) + + } // switch on JP type + +JUDY1CODE(return(0);) +JUDYLCODE(return((PPvoid_t) NULL);) + +} // Judy1Test() / JudyLGet() + + +#ifndef JUDYGETINLINE // only compile the following function once: +#ifdef DEBUG + +// **************************************************************************** +// J U D Y C H E C K P O P +// +// Given a pointer to a Judy array, traverse the entire array to ensure +// population counts add up correctly. This can catch various coding errors. +// +// Since walking the entire tree is probably time-consuming, enable this +// function by setting env parameter $CHECKPOP to first call at which to start +// checking. Note: This function is called both from insert and delete code. +// +// Note: Even though this function does nothing useful for LEAFW leaves, its +// good practice to call it anyway, and cheap too. +// +// TBD: This is a debug-only check function similar to JudyCheckSorted(), but +// since it walks the tree it is Judy1/JudyL-specific and must live in a source +// file that is built both ways. +// +// TBD: As feared, enabling this code for every insert/delete makes Judy +// deathly slow, even for a small tree (10K indexes). Its not so bad if +// present but disabled (<1% slowdown measured). Still, should it be ifdefd +// other than DEBUG and/or called less often? +// +// TBD: Should this "population checker" be expanded to a comprehensive tree +// checker? It currently detects invalid LEAFW/JP types as well as inconsistent +// pop1s. Other possible checks, all based on essentially redundant data in +// the Judy tree, include: +// +// - Zero LS bits in jp_Addr field. +// +// - Correct Dcd bits. +// +// - Consistent JP types (always descending down the tree). +// +// - Sorted linear lists in BranchLs and leaves (using JudyCheckSorted(), but +// ideally that function is already called wherever appropriate after any +// linear list is modified). +// +// - Any others possible? + +#include // for getenv() and atol(). + +static Word_t JudyCheckPopSM(Pjp_t Pjp, Word_t RootPop1); + +FUNCTION void JudyCheckPop( + Pvoid_t PArray) +{ +static bool_t checked = FALSE; // already checked env parameter. +static bool_t enabled = FALSE; // env parameter set. +static bool_t active = FALSE; // calls >= callsmin. +static Word_t callsmin; // start point from $CHECKPOP. +static Word_t calls = 0; // times called so far. + + +// CHECK FOR EXTERNAL ENABLING: + + if (! checked) // only check once. + { + char * value; // for getenv(). + + checked = TRUE; + + if ((value = getenv("CHECKPOP")) == (char *) NULL) + { +#ifdef notdef +// Take this out because nightly tests want to be flavor-independent; its not +// OK to emit special non-error output from the debug flavor: + + (void) puts("JudyCheckPop() present but not enabled by " + "$CHECKPOP env parameter; set it to the number of " + "calls at which to begin checking"); +#endif + return; + } + + callsmin = atol(value); // note: non-number evaluates to 0. + enabled = TRUE; + + (void) printf("JudyCheckPop() present and enabled; callsmin = " + "%lu\n", callsmin); + } + else if (! enabled) return; + +// Previously or just now enabled; check if non-active or newly active: + + if (! active) + { + if (++calls < callsmin) return; + + (void) printf("JudyCheckPop() activated at call %lu\n", calls); + active = TRUE; + } + +// IGNORE LEAFW AT TOP OF TREE: + + if (JU_LEAFW_POP0(PArray) < cJU_LEAFW_MAXPOP1) // must be a LEAFW + return; + +// Check JPM pop0 against tree, recursively: +// +// Note: The traversal code in JudyCheckPopSM() is simplest when the case +// statement for each JP type compares the pop1 for that JP to its subtree (if +// any) after traversing the subtree (thats the hard part) and adding up +// actual pop1s. A top branchs JP in the JPM does not have room for a +// full-word pop1, so pass it in as a special case. + + { + Pjpm_t Pjpm = P_JPM(PArray); + (void) JudyCheckPopSM(&(Pjpm->jpm_JP), Pjpm->jpm_Pop0 + 1); + return; + } + +} // JudyCheckPop() + + +// **************************************************************************** +// J U D Y C H E C K P O P S M +// +// Recursive state machine (subroutine) for JudyCheckPop(): Given a Pjp (other +// than JPNULL*; caller should shortcut) and the root population for top-level +// branches, check the subtrees actual pop1 against its nominal value, and +// return the total pop1 for the subtree. +// +// Note: Expect RootPop1 to be ignored at lower levels, so pass down 0, which +// should pop an assertion if this expectation is violated. + +FUNCTION static Word_t JudyCheckPopSM( + Pjp_t Pjp, // top of subtree. + Word_t RootPop1) // whole array, for top-level branches only. +{ + Word_t pop1_jp; // nominal population from the JP. + Word_t pop1 = 0; // actual population at this level. + Word_t offset; // in a branch. + +#define PREPBRANCH(cPopBytes,Next) \ + pop1_jp = JU_JPBRANCH_POP0(Pjp, cPopBytes) + 1; goto Next + +assert((((Word_t) (Pjp->jp_Addr)) & 7) == 3); + switch (JU_JPTYPE(Pjp)) + { + + case cJU_JPBRANCH_L2: PREPBRANCH(2, BranchL); + case cJU_JPBRANCH_L3: PREPBRANCH(3, BranchL); +#ifdef JU_64BIT + case cJU_JPBRANCH_L4: PREPBRANCH(4, BranchL); + case cJU_JPBRANCH_L5: PREPBRANCH(5, BranchL); + case cJU_JPBRANCH_L6: PREPBRANCH(6, BranchL); + case cJU_JPBRANCH_L7: PREPBRANCH(7, BranchL); +#endif + case cJU_JPBRANCH_L: pop1_jp = RootPop1; + { + Pjbl_t Pjbl; +BranchL: + Pjbl = P_JBL(Pjp->jp_Addr); + + for (offset = 0; offset < (Pjbl->jbl_NumJPs); ++offset) + pop1 += JudyCheckPopSM((Pjbl->jbl_jp) + offset, 0); + + assert(pop1_jp == pop1); + return(pop1); + } + + case cJU_JPBRANCH_B2: PREPBRANCH(2, BranchB); + case cJU_JPBRANCH_B3: PREPBRANCH(3, BranchB); +#ifdef JU_64BIT + case cJU_JPBRANCH_B4: PREPBRANCH(4, BranchB); + case cJU_JPBRANCH_B5: PREPBRANCH(5, BranchB); + case cJU_JPBRANCH_B6: PREPBRANCH(6, BranchB); + case cJU_JPBRANCH_B7: PREPBRANCH(7, BranchB); +#endif + case cJU_JPBRANCH_B: pop1_jp = RootPop1; + { + Word_t subexp; + Word_t jpcount; + Pjbb_t Pjbb; +BranchB: + Pjbb = P_JBB(Pjp->jp_Addr); + + for (subexp = 0; subexp < cJU_NUMSUBEXPB; ++subexp) + { + jpcount = j__udyCountBitsB(JU_JBB_BITMAP(Pjbb, subexp)); + + for (offset = 0; offset < jpcount; ++offset) + { + pop1 += JudyCheckPopSM(P_JP(JU_JBB_PJP(Pjbb, subexp)) + + offset, 0); + } + } + + assert(pop1_jp == pop1); + return(pop1); + } + + case cJU_JPBRANCH_U2: PREPBRANCH(2, BranchU); + case cJU_JPBRANCH_U3: PREPBRANCH(3, BranchU); +#ifdef JU_64BIT + case cJU_JPBRANCH_U4: PREPBRANCH(4, BranchU); + case cJU_JPBRANCH_U5: PREPBRANCH(5, BranchU); + case cJU_JPBRANCH_U6: PREPBRANCH(6, BranchU); + case cJU_JPBRANCH_U7: PREPBRANCH(7, BranchU); +#endif + case cJU_JPBRANCH_U: pop1_jp = RootPop1; + { + Pjbu_t Pjbu; +BranchU: + Pjbu = P_JBU(Pjp->jp_Addr); + + for (offset = 0; offset < cJU_BRANCHUNUMJPS; ++offset) + { + if (((Pjbu->jbu_jp[offset].jp_Type) >= cJU_JPNULL1) + && ((Pjbu->jbu_jp[offset].jp_Type) <= cJU_JPNULLMAX)) + { + continue; // skip null JP to save time. + } + + pop1 += JudyCheckPopSM((Pjbu->jbu_jp) + offset, 0); + } + + assert(pop1_jp == pop1); + return(pop1); + } + + +// -- Cases below here terminate and do not recurse. -- +// +// For all of these cases except JPLEAF_B1, there is no way to check the JPs +// pop1 against the object itself; just return the pop1; but for linear leaves, +// a bounds check is possible. + +#define CHECKLEAF(MaxPop1) \ + pop1 = JU_JPLEAF_POP0(Pjp) + 1; \ + assert(pop1 >= 1); \ + assert(pop1 <= (MaxPop1)); \ + return(pop1) + +#if (defined(JUDYL) || (! defined(JU_64BIT))) + case cJU_JPLEAF1: CHECKLEAF(cJU_LEAF1_MAXPOP1); +#endif + case cJU_JPLEAF2: CHECKLEAF(cJU_LEAF2_MAXPOP1); + case cJU_JPLEAF3: CHECKLEAF(cJU_LEAF3_MAXPOP1); +#ifdef JU_64BIT + case cJU_JPLEAF4: CHECKLEAF(cJU_LEAF4_MAXPOP1); + case cJU_JPLEAF5: CHECKLEAF(cJU_LEAF5_MAXPOP1); + case cJU_JPLEAF6: CHECKLEAF(cJU_LEAF6_MAXPOP1); + case cJU_JPLEAF7: CHECKLEAF(cJU_LEAF7_MAXPOP1); +#endif + + case cJU_JPLEAF_B1: + { + Word_t subexp; + Pjlb_t Pjlb; + + pop1_jp = JU_JPLEAF_POP0(Pjp) + 1; + + Pjlb = P_JLB(Pjp->jp_Addr); + + for (subexp = 0; subexp < cJU_NUMSUBEXPL; ++subexp) + pop1 += j__udyCountBitsL(JU_JLB_BITMAP(Pjlb, subexp)); + + assert(pop1_jp == pop1); + return(pop1); + } + + JUDY1CODE(case cJ1_JPFULLPOPU1: return(cJU_JPFULLPOPU1_POP0);) + + case cJU_JPIMMED_1_01: return(1); + case cJU_JPIMMED_2_01: return(1); + case cJU_JPIMMED_3_01: return(1); +#ifdef JU_64BIT + case cJU_JPIMMED_4_01: return(1); + case cJU_JPIMMED_5_01: return(1); + case cJU_JPIMMED_6_01: return(1); + case cJU_JPIMMED_7_01: return(1); +#endif + + case cJU_JPIMMED_1_02: return(2); + case cJU_JPIMMED_1_03: return(3); +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_1_04: return(4); + case cJU_JPIMMED_1_05: return(5); + case cJU_JPIMMED_1_06: return(6); + case cJU_JPIMMED_1_07: return(7); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_1_08: return(8); + case cJ1_JPIMMED_1_09: return(9); + case cJ1_JPIMMED_1_10: return(10); + case cJ1_JPIMMED_1_11: return(11); + case cJ1_JPIMMED_1_12: return(12); + case cJ1_JPIMMED_1_13: return(13); + case cJ1_JPIMMED_1_14: return(14); + case cJ1_JPIMMED_1_15: return(15); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_2_02: return(2); + case cJU_JPIMMED_2_03: return(3); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_2_04: return(4); + case cJ1_JPIMMED_2_05: return(5); + case cJ1_JPIMMED_2_06: return(6); + case cJ1_JPIMMED_2_07: return(7); +#endif + +#if (defined(JUDY1) || defined(JU_64BIT)) + case cJU_JPIMMED_3_02: return(2); +#endif +#if (defined(JUDY1) && defined(JU_64BIT)) + case cJ1_JPIMMED_3_03: return(3); + case cJ1_JPIMMED_3_04: return(4); + case cJ1_JPIMMED_3_05: return(5); + + case cJ1_JPIMMED_4_02: return(2); + case cJ1_JPIMMED_4_03: return(3); + case cJ1_JPIMMED_5_02: return(2); + case cJ1_JPIMMED_5_03: return(3); + case cJ1_JPIMMED_6_02: return(2); + case cJ1_JPIMMED_7_02: return(2); +#endif + + } // switch (JU_JPTYPE(Pjp)) + + assert(FALSE); // unrecognized JP type => corruption. + return(0); // to make some compilers happy. + +} // JudyCheckPopSM() + +#endif // DEBUG +#endif // ! JUDYGETINLINE diff --git a/libnetdata/libnetdata.c b/libnetdata/libnetdata.c new file mode 100644 index 0000000..cc04a97 --- /dev/null +++ b/libnetdata/libnetdata.c @@ -0,0 +1,1936 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "libnetdata.h" + +#ifdef __APPLE__ +#define INHERIT_NONE 0 +#endif /* __APPLE__ */ +#if defined(__FreeBSD__) || defined(__APPLE__) +# define O_NOATIME 0 +# define MADV_DONTFORK INHERIT_NONE +#endif /* __FreeBSD__ || __APPLE__*/ + +struct rlimit rlimit_nofile = { .rlim_cur = 1024, .rlim_max = 1024 }; + +#ifdef MADV_MERGEABLE +int enable_ksm = 1; +#else +int enable_ksm = 0; +#endif + +volatile sig_atomic_t netdata_exit = 0; +const char *program_version = VERSION; + +// ---------------------------------------------------------------------------- +// memory allocation functions that handle failures + +// although netdata does not use memory allocations too often (netdata tries to +// maintain its memory footprint stable during runtime, i.e. all buffers are +// allocated during initialization and are adapted to current use throughout +// its lifetime), these can be used to override the default system allocation +// routines. + +#ifdef NETDATA_TRACE_ALLOCATIONS +#warning NETDATA_TRACE_ALLOCATIONS ENABLED +#include "Judy.h" + +#if defined(HAVE_DLSYM) && defined(ENABLE_DLSYM) +#include + +typedef void (*libc_function_t)(void); + +static void *malloc_first_run(size_t size); +static void *(*libc_malloc)(size_t) = malloc_first_run; + +static void *calloc_first_run(size_t n, size_t size); +static void *(*libc_calloc)(size_t, size_t) = calloc_first_run; + +static void *realloc_first_run(void *ptr, size_t size); +static void *(*libc_realloc)(void *, size_t) = realloc_first_run; + +static void free_first_run(void *ptr); +static void (*libc_free)(void *) = free_first_run; + +static char *strdup_first_run(const char *s); +static char *(*libc_strdup)(const char *) = strdup_first_run; + +static size_t malloc_usable_size_first_run(void *ptr); +#ifdef HAVE_MALLOC_USABLE_SIZE +static size_t (*libc_malloc_usable_size)(void *) = malloc_usable_size_first_run; +#else +static size_t (*libc_malloc_usable_size)(void *) = NULL; +#endif + +static void link_system_library_function(libc_function_t *func_pptr, const char *name, bool required) { + *func_pptr = dlsym(RTLD_NEXT, name); + if(!*func_pptr && required) { + fprintf(stderr, "FATAL: Cannot find system's %s() function.\n", name); + abort(); + } +} + +static void *malloc_first_run(size_t size) { + link_system_library_function((libc_function_t *) &libc_malloc, "malloc", true); + return libc_malloc(size); +} + +static void *calloc_first_run(size_t n, size_t size) { + link_system_library_function((libc_function_t *) &libc_calloc, "calloc", true); + return libc_calloc(n, size); +} + +static void *realloc_first_run(void *ptr, size_t size) { + link_system_library_function((libc_function_t *) &libc_realloc, "realloc", true); + return libc_realloc(ptr, size); +} + +static void free_first_run(void *ptr) { + link_system_library_function((libc_function_t *) &libc_free, "free", true); + libc_free(ptr); +} + +static char *strdup_first_run(const char *s) { + link_system_library_function((libc_function_t *) &libc_strdup, "strdup", true); + return libc_strdup(s); +} + +static size_t malloc_usable_size_first_run(void *ptr) { + link_system_library_function((libc_function_t *) &libc_malloc_usable_size, "malloc_usable_size", false); + + if(libc_malloc_usable_size) + return libc_malloc_usable_size(ptr); + else + return 0; +} + +void *malloc(size_t size) { + return mallocz(size); +} + +void *calloc(size_t n, size_t size) { + return callocz(n, size); +} + +void *realloc(void *ptr, size_t size) { + return reallocz(ptr, size); +} + +void *reallocarray(void *ptr, size_t n, size_t size) { + return reallocz(ptr, n * size); +} + +void free(void *ptr) { + freez(ptr); +} + +char *strdup(const char *s) { + return strdupz(s); +} + +size_t malloc_usable_size(void *ptr) { + return mallocz_usable_size(ptr); +} +#else // !HAVE_DLSYM + +static void *(*libc_malloc)(size_t) = malloc; +static void *(*libc_calloc)(size_t, size_t) = calloc; +static void *(*libc_realloc)(void *, size_t) = realloc; +static void (*libc_free)(void *) = free; + +#ifdef HAVE_MALLOC_USABLE_SIZE +static size_t (*libc_malloc_usable_size)(void *) = malloc_usable_size; +#else +static size_t (*libc_malloc_usable_size)(void *) = NULL; +#endif + +#endif // HAVE_DLSYM + + +void posix_memfree(void *ptr) { + libc_free(ptr); +} + +Word_t JudyMalloc(Word_t Words) { + Word_t Addr; + + Addr = (Word_t) mallocz(Words * sizeof(Word_t)); + return(Addr); +} +void JudyFree(void * PWord, Word_t Words) { + (void)Words; + freez(PWord); +} +Word_t JudyMallocVirtual(Word_t Words) { + Word_t Addr; + + Addr = (Word_t) mallocz(Words * sizeof(Word_t)); + return(Addr); +} +void JudyFreeVirtual(void * PWord, Word_t Words) { + (void)Words; + freez(PWord); +} + +#define MALLOC_ALIGNMENT (sizeof(uintptr_t) * 2) +#define size_t_atomic_count(op, var, size) __atomic_## op ##_fetch(&(var), size, __ATOMIC_RELAXED) +#define size_t_atomic_bytes(op, var, size) __atomic_## op ##_fetch(&(var), ((size) % MALLOC_ALIGNMENT)?((size) + MALLOC_ALIGNMENT - ((size) % MALLOC_ALIGNMENT)):(size), __ATOMIC_RELAXED) + +struct malloc_header_signature { + uint32_t magic; + uint32_t size; + struct malloc_trace *trace; +}; + +struct malloc_header { + struct malloc_header_signature signature; + uint8_t padding[(sizeof(struct malloc_header_signature) % MALLOC_ALIGNMENT) ? MALLOC_ALIGNMENT - (sizeof(struct malloc_header_signature) % MALLOC_ALIGNMENT) : 0]; + uint8_t data[]; +}; + +static size_t malloc_header_size = sizeof(struct malloc_header); + +int malloc_trace_compare(void *A, void *B) { + struct malloc_trace *a = A; + struct malloc_trace *b = B; + return strcmp(a->function, b->function); +} + +static avl_tree_lock malloc_trace_index = { + .avl_tree = { + .root = NULL, + .compar = malloc_trace_compare}, + .rwlock = NETDATA_RWLOCK_INITIALIZER +}; + +int malloc_trace_walkthrough(int (*callback)(void *item, void *data), void *data) { + return avl_traverse_lock(&malloc_trace_index, callback, data); +} + +NEVERNULL WARNUNUSED +static struct malloc_trace *malloc_trace_find_or_create(const char *file, const char *function, size_t line) { + struct malloc_trace tmp = { + .line = line, + .function = function, + .file = file, + }; + + struct malloc_trace *t = (struct malloc_trace *)avl_search_lock(&malloc_trace_index, (avl_t *)&tmp); + if(!t) { + t = libc_calloc(1, sizeof(struct malloc_trace)); + if(!t) fatal("No memory"); + t->line = line; + t->function = function; + t->file = file; + + struct malloc_trace *t2 = (struct malloc_trace *)avl_insert_lock(&malloc_trace_index, (avl_t *)t); + if(t2 != t) + free(t); + + t = t2; + } + + if(!t) + fatal("Cannot insert to AVL"); + + return t; +} + +void malloc_trace_mmap(size_t size) { + struct malloc_trace *p = malloc_trace_find_or_create("unknown", "netdata_mmap", 1); + size_t_atomic_count(add, p->mmap_calls, 1); + size_t_atomic_count(add, p->allocations, 1); + size_t_atomic_bytes(add, p->bytes, size); +} + +void malloc_trace_munmap(size_t size) { + struct malloc_trace *p = malloc_trace_find_or_create("unknown", "netdata_mmap", 1); + size_t_atomic_count(add, p->munmap_calls, 1); + size_t_atomic_count(sub, p->allocations, 1); + size_t_atomic_bytes(sub, p->bytes, size); +} + +void *mallocz_int(size_t size, const char *file, const char *function, size_t line) { + struct malloc_trace *p = malloc_trace_find_or_create(file, function, line); + + size_t_atomic_count(add, p->malloc_calls, 1); + size_t_atomic_count(add, p->allocations, 1); + size_t_atomic_bytes(add, p->bytes, size); + + struct malloc_header *t = (struct malloc_header *)libc_malloc(malloc_header_size + size); + if (unlikely(!t)) fatal("mallocz() cannot allocate %zu bytes of memory (%zu with header).", size, malloc_header_size + size); + t->signature.magic = 0x0BADCAFE; + t->signature.trace = p; + t->signature.size = size; + +#ifdef NETDATA_INTERNAL_CHECKS + for(ssize_t i = 0; i < (ssize_t)sizeof(t->padding) ;i++) // signed to avoid compiler warning when zero-padded + t->padding[i] = 0xFF; +#endif + + return (void *)&t->data; +} + +void *callocz_int(size_t nmemb, size_t size, const char *file, const char *function, size_t line) { + struct malloc_trace *p = malloc_trace_find_or_create(file, function, line); + size = nmemb * size; + + size_t_atomic_count(add, p->calloc_calls, 1); + size_t_atomic_count(add, p->allocations, 1); + size_t_atomic_bytes(add, p->bytes, size); + + struct malloc_header *t = (struct malloc_header *)libc_calloc(1, malloc_header_size + size); + if (unlikely(!t)) fatal("mallocz() cannot allocate %zu bytes of memory (%zu with header).", size, malloc_header_size + size); + t->signature.magic = 0x0BADCAFE; + t->signature.trace = p; + t->signature.size = size; + +#ifdef NETDATA_INTERNAL_CHECKS + for(ssize_t i = 0; i < (ssize_t)sizeof(t->padding) ;i++) // signed to avoid compiler warning when zero-padded + t->padding[i] = 0xFF; +#endif + + return &t->data; +} + +char *strdupz_int(const char *s, const char *file, const char *function, size_t line) { + struct malloc_trace *p = malloc_trace_find_or_create(file, function, line); + size_t size = strlen(s) + 1; + + size_t_atomic_count(add, p->strdup_calls, 1); + size_t_atomic_count(add, p->allocations, 1); + size_t_atomic_bytes(add, p->bytes, size); + + struct malloc_header *t = (struct malloc_header *)libc_malloc(malloc_header_size + size); + if (unlikely(!t)) fatal("strdupz() cannot allocate %zu bytes of memory (%zu with header).", size, malloc_header_size + size); + t->signature.magic = 0x0BADCAFE; + t->signature.trace = p; + t->signature.size = size; + +#ifdef NETDATA_INTERNAL_CHECKS + for(ssize_t i = 0; i < (ssize_t)sizeof(t->padding) ;i++) // signed to avoid compiler warning when zero-padded + t->padding[i] = 0xFF; +#endif + + memcpy(&t->data, s, size); + return (char *)&t->data; +} + +static struct malloc_header *malloc_get_header(void *ptr, const char *caller, const char *file, const char *function, size_t line) { + uint8_t *ret = (uint8_t *)ptr - malloc_header_size; + struct malloc_header *t = (struct malloc_header *)ret; + + if(t->signature.magic != 0x0BADCAFE) { + error("pointer %p is not our pointer (called %s() from %zu@%s, %s()).", ptr, caller, line, file, function); + return NULL; + } + + return t; +} + +void *reallocz_int(void *ptr, size_t size, const char *file, const char *function, size_t line) { + if(!ptr) return mallocz_int(size, file, function, line); + + struct malloc_header *t = malloc_get_header(ptr, __FUNCTION__, file, function, line); + if(!t) + return libc_realloc(ptr, size); + + if(t->signature.size == size) return ptr; + size_t_atomic_count(add, t->signature.trace->free_calls, 1); + size_t_atomic_count(sub, t->signature.trace->allocations, 1); + size_t_atomic_bytes(sub, t->signature.trace->bytes, t->signature.size); + + struct malloc_trace *p = malloc_trace_find_or_create(file, function, line); + size_t_atomic_count(add, p->realloc_calls, 1); + size_t_atomic_count(add, p->allocations, 1); + size_t_atomic_bytes(add, p->bytes, size); + + t = (struct malloc_header *)libc_realloc(t, malloc_header_size + size); + if (unlikely(!t)) fatal("reallocz() cannot allocate %zu bytes of memory (%zu with header).", size, malloc_header_size + size); + t->signature.magic = 0x0BADCAFE; + t->signature.trace = p; + t->signature.size = size; + +#ifdef NETDATA_INTERNAL_CHECKS + for(ssize_t i = 0; i < (ssize_t)sizeof(t->padding) ;i++) // signed to avoid compiler warning when zero-padded + t->padding[i] = 0xFF; +#endif + + return (void *)&t->data; +} + +size_t mallocz_usable_size_int(void *ptr, const char *file, const char *function, size_t line) { + if(unlikely(!ptr)) return 0; + + struct malloc_header *t = malloc_get_header(ptr, __FUNCTION__, file, function, line); + if(!t) { + if(libc_malloc_usable_size) + return libc_malloc_usable_size(ptr); + else + return 0; + } + + return t->signature.size; +} + +void freez_int(void *ptr, const char *file, const char *function, size_t line) { + if(unlikely(!ptr)) return; + + struct malloc_header *t = malloc_get_header(ptr, __FUNCTION__, file, function, line); + if(!t) { + libc_free(ptr); + return; + } + + size_t_atomic_count(add, t->signature.trace->free_calls, 1); + size_t_atomic_count(sub, t->signature.trace->allocations, 1); + size_t_atomic_bytes(sub, t->signature.trace->bytes, t->signature.size); + +#ifdef NETDATA_INTERNAL_CHECKS + // it should crash if it is used after freeing it + memset(t, 0, malloc_header_size + t->signature.size); +#endif + + libc_free(t); +} +#else + +char *strdupz(const char *s) { + char *t = strdup(s); + if (unlikely(!t)) fatal("Cannot strdup() string '%s'", s); + return t; +} + +// If ptr is NULL, no operation is performed. +void freez(void *ptr) { + free(ptr); +} + +void *mallocz(size_t size) { + void *p = malloc(size); + if (unlikely(!p)) fatal("Cannot allocate %zu bytes of memory.", size); + return p; +} + +void *callocz(size_t nmemb, size_t size) { + void *p = calloc(nmemb, size); + if (unlikely(!p)) fatal("Cannot allocate %zu bytes of memory.", nmemb * size); + return p; +} + +void *reallocz(void *ptr, size_t size) { + void *p = realloc(ptr, size); + if (unlikely(!p)) fatal("Cannot re-allocate memory to %zu bytes.", size); + return p; +} + +void posix_memfree(void *ptr) { + free(ptr); +} + +#endif + +// -------------------------------------------------------------------------------------------------------------------- + +void json_escape_string(char *dst, const char *src, size_t size) { + const char *t; + char *d = dst, *e = &dst[size - 1]; + + for(t = src; *t && d < e ;t++) { + if(unlikely(*t == '\\' || *t == '"')) { + if(unlikely(d + 1 >= e)) break; + *d++ = '\\'; + } + *d++ = *t; + } + + *d = '\0'; +} + +void json_fix_string(char *s) { + unsigned char c; + while((c = (unsigned char)*s)) { + if(unlikely(c == '\\')) + *s++ = '/'; + else if(unlikely(c == '"')) + *s++ = '\''; + else if(unlikely(isspace(c) || iscntrl(c))) + *s++ = ' '; + else if(unlikely(!isprint(c) || c > 127)) + *s++ = '_'; + else + s++; + } +} + +unsigned char netdata_map_chart_names[256] = { + [0] = '\0', // + [1] = '_', // + [2] = '_', // + [3] = '_', // + [4] = '_', // + [5] = '_', // + [6] = '_', // + [7] = '_', // + [8] = '_', // + [9] = '_', // + [10] = '_', // + [11] = '_', // + [12] = '_', // + [13] = '_', // + [14] = '_', // + [15] = '_', // + [16] = '_', // + [17] = '_', // + [18] = '_', // + [19] = '_', // + [20] = '_', // + [21] = '_', // + [22] = '_', // + [23] = '_', // + [24] = '_', // + [25] = '_', // + [26] = '_', // + [27] = '_', // + [28] = '_', // + [29] = '_', // + [30] = '_', // + [31] = '_', // + [32] = '_', // + [33] = '_', // ! + [34] = '_', // " + [35] = '_', // # + [36] = '_', // $ + [37] = '_', // % + [38] = '_', // & + [39] = '_', // ' + [40] = '_', // ( + [41] = '_', // ) + [42] = '_', // * + [43] = '_', // + + [44] = '.', // , + [45] = '-', // - + [46] = '.', // . + [47] = '/', // / + [48] = '0', // 0 + [49] = '1', // 1 + [50] = '2', // 2 + [51] = '3', // 3 + [52] = '4', // 4 + [53] = '5', // 5 + [54] = '6', // 6 + [55] = '7', // 7 + [56] = '8', // 8 + [57] = '9', // 9 + [58] = '_', // : + [59] = '_', // ; + [60] = '_', // < + [61] = '_', // = + [62] = '_', // > + [63] = '_', // ? + [64] = '_', // @ + [65] = 'a', // A + [66] = 'b', // B + [67] = 'c', // C + [68] = 'd', // D + [69] = 'e', // E + [70] = 'f', // F + [71] = 'g', // G + [72] = 'h', // H + [73] = 'i', // I + [74] = 'j', // J + [75] = 'k', // K + [76] = 'l', // L + [77] = 'm', // M + [78] = 'n', // N + [79] = 'o', // O + [80] = 'p', // P + [81] = 'q', // Q + [82] = 'r', // R + [83] = 's', // S + [84] = 't', // T + [85] = 'u', // U + [86] = 'v', // V + [87] = 'w', // W + [88] = 'x', // X + [89] = 'y', // Y + [90] = 'z', // Z + [91] = '_', // [ + [92] = '/', // backslash + [93] = '_', // ] + [94] = '_', // ^ + [95] = '_', // _ + [96] = '_', // ` + [97] = 'a', // a + [98] = 'b', // b + [99] = 'c', // c + [100] = 'd', // d + [101] = 'e', // e + [102] = 'f', // f + [103] = 'g', // g + [104] = 'h', // h + [105] = 'i', // i + [106] = 'j', // j + [107] = 'k', // k + [108] = 'l', // l + [109] = 'm', // m + [110] = 'n', // n + [111] = 'o', // o + [112] = 'p', // p + [113] = 'q', // q + [114] = 'r', // r + [115] = 's', // s + [116] = 't', // t + [117] = 'u', // u + [118] = 'v', // v + [119] = 'w', // w + [120] = 'x', // x + [121] = 'y', // y + [122] = 'z', // z + [123] = '_', // { + [124] = '_', // | + [125] = '_', // } + [126] = '_', // ~ + [127] = '_', // + [128] = '_', // + [129] = '_', // + [130] = '_', // + [131] = '_', // + [132] = '_', // + [133] = '_', // + [134] = '_', // + [135] = '_', // + [136] = '_', // + [137] = '_', // + [138] = '_', // + [139] = '_', // + [140] = '_', // + [141] = '_', // + [142] = '_', // + [143] = '_', // + [144] = '_', // + [145] = '_', // + [146] = '_', // + [147] = '_', // + [148] = '_', // + [149] = '_', // + [150] = '_', // + [151] = '_', // + [152] = '_', // + [153] = '_', // + [154] = '_', // + [155] = '_', // + [156] = '_', // + [157] = '_', // + [158] = '_', // + [159] = '_', // + [160] = '_', // + [161] = '_', // + [162] = '_', // + [163] = '_', // + [164] = '_', // + [165] = '_', // + [166] = '_', // + [167] = '_', // + [168] = '_', // + [169] = '_', // + [170] = '_', // + [171] = '_', // + [172] = '_', // + [173] = '_', // + [174] = '_', // + [175] = '_', // + [176] = '_', // + [177] = '_', // + [178] = '_', // + [179] = '_', // + [180] = '_', // + [181] = '_', // + [182] = '_', // + [183] = '_', // + [184] = '_', // + [185] = '_', // + [186] = '_', // + [187] = '_', // + [188] = '_', // + [189] = '_', // + [190] = '_', // + [191] = '_', // + [192] = '_', // + [193] = '_', // + [194] = '_', // + [195] = '_', // + [196] = '_', // + [197] = '_', // + [198] = '_', // + [199] = '_', // + [200] = '_', // + [201] = '_', // + [202] = '_', // + [203] = '_', // + [204] = '_', // + [205] = '_', // + [206] = '_', // + [207] = '_', // + [208] = '_', // + [209] = '_', // + [210] = '_', // + [211] = '_', // + [212] = '_', // + [213] = '_', // + [214] = '_', // + [215] = '_', // + [216] = '_', // + [217] = '_', // + [218] = '_', // + [219] = '_', // + [220] = '_', // + [221] = '_', // + [222] = '_', // + [223] = '_', // + [224] = '_', // + [225] = '_', // + [226] = '_', // + [227] = '_', // + [228] = '_', // + [229] = '_', // + [230] = '_', // + [231] = '_', // + [232] = '_', // + [233] = '_', // + [234] = '_', // + [235] = '_', // + [236] = '_', // + [237] = '_', // + [238] = '_', // + [239] = '_', // + [240] = '_', // + [241] = '_', // + [242] = '_', // + [243] = '_', // + [244] = '_', // + [245] = '_', // + [246] = '_', // + [247] = '_', // + [248] = '_', // + [249] = '_', // + [250] = '_', // + [251] = '_', // + [252] = '_', // + [253] = '_', // + [254] = '_', // + [255] = '_' // +}; + +// make sure the supplied string +// is good for a netdata chart/dimension ID/NAME +void netdata_fix_chart_name(char *s) { + while ((*s = netdata_map_chart_names[(unsigned char) *s])) s++; +} + +unsigned char netdata_map_chart_ids[256] = { + [0] = '\0', // + [1] = '_', // + [2] = '_', // + [3] = '_', // + [4] = '_', // + [5] = '_', // + [6] = '_', // + [7] = '_', // + [8] = '_', // + [9] = '_', // + [10] = '_', // + [11] = '_', // + [12] = '_', // + [13] = '_', // + [14] = '_', // + [15] = '_', // + [16] = '_', // + [17] = '_', // + [18] = '_', // + [19] = '_', // + [20] = '_', // + [21] = '_', // + [22] = '_', // + [23] = '_', // + [24] = '_', // + [25] = '_', // + [26] = '_', // + [27] = '_', // + [28] = '_', // + [29] = '_', // + [30] = '_', // + [31] = '_', // + [32] = '_', // + [33] = '_', // ! + [34] = '_', // " + [35] = '_', // # + [36] = '_', // $ + [37] = '_', // % + [38] = '_', // & + [39] = '_', // ' + [40] = '_', // ( + [41] = '_', // ) + [42] = '_', // * + [43] = '_', // + + [44] = '.', // , + [45] = '-', // - + [46] = '.', // . + [47] = '_', // / + [48] = '0', // 0 + [49] = '1', // 1 + [50] = '2', // 2 + [51] = '3', // 3 + [52] = '4', // 4 + [53] = '5', // 5 + [54] = '6', // 6 + [55] = '7', // 7 + [56] = '8', // 8 + [57] = '9', // 9 + [58] = '_', // : + [59] = '_', // ; + [60] = '_', // < + [61] = '_', // = + [62] = '_', // > + [63] = '_', // ? + [64] = '_', // @ + [65] = 'a', // A + [66] = 'b', // B + [67] = 'c', // C + [68] = 'd', // D + [69] = 'e', // E + [70] = 'f', // F + [71] = 'g', // G + [72] = 'h', // H + [73] = 'i', // I + [74] = 'j', // J + [75] = 'k', // K + [76] = 'l', // L + [77] = 'm', // M + [78] = 'n', // N + [79] = 'o', // O + [80] = 'p', // P + [81] = 'q', // Q + [82] = 'r', // R + [83] = 's', // S + [84] = 't', // T + [85] = 'u', // U + [86] = 'v', // V + [87] = 'w', // W + [88] = 'x', // X + [89] = 'y', // Y + [90] = 'z', // Z + [91] = '_', // [ + [92] = '_', // backslash + [93] = '_', // ] + [94] = '_', // ^ + [95] = '_', // _ + [96] = '_', // ` + [97] = 'a', // a + [98] = 'b', // b + [99] = 'c', // c + [100] = 'd', // d + [101] = 'e', // e + [102] = 'f', // f + [103] = 'g', // g + [104] = 'h', // h + [105] = 'i', // i + [106] = 'j', // j + [107] = 'k', // k + [108] = 'l', // l + [109] = 'm', // m + [110] = 'n', // n + [111] = 'o', // o + [112] = 'p', // p + [113] = 'q', // q + [114] = 'r', // r + [115] = 's', // s + [116] = 't', // t + [117] = 'u', // u + [118] = 'v', // v + [119] = 'w', // w + [120] = 'x', // x + [121] = 'y', // y + [122] = 'z', // z + [123] = '_', // { + [124] = '_', // | + [125] = '_', // } + [126] = '_', // ~ + [127] = '_', // + [128] = '_', // + [129] = '_', // + [130] = '_', // + [131] = '_', // + [132] = '_', // + [133] = '_', // + [134] = '_', // + [135] = '_', // + [136] = '_', // + [137] = '_', // + [138] = '_', // + [139] = '_', // + [140] = '_', // + [141] = '_', // + [142] = '_', // + [143] = '_', // + [144] = '_', // + [145] = '_', // + [146] = '_', // + [147] = '_', // + [148] = '_', // + [149] = '_', // + [150] = '_', // + [151] = '_', // + [152] = '_', // + [153] = '_', // + [154] = '_', // + [155] = '_', // + [156] = '_', // + [157] = '_', // + [158] = '_', // + [159] = '_', // + [160] = '_', // + [161] = '_', // + [162] = '_', // + [163] = '_', // + [164] = '_', // + [165] = '_', // + [166] = '_', // + [167] = '_', // + [168] = '_', // + [169] = '_', // + [170] = '_', // + [171] = '_', // + [172] = '_', // + [173] = '_', // + [174] = '_', // + [175] = '_', // + [176] = '_', // + [177] = '_', // + [178] = '_', // + [179] = '_', // + [180] = '_', // + [181] = '_', // + [182] = '_', // + [183] = '_', // + [184] = '_', // + [185] = '_', // + [186] = '_', // + [187] = '_', // + [188] = '_', // + [189] = '_', // + [190] = '_', // + [191] = '_', // + [192] = '_', // + [193] = '_', // + [194] = '_', // + [195] = '_', // + [196] = '_', // + [197] = '_', // + [198] = '_', // + [199] = '_', // + [200] = '_', // + [201] = '_', // + [202] = '_', // + [203] = '_', // + [204] = '_', // + [205] = '_', // + [206] = '_', // + [207] = '_', // + [208] = '_', // + [209] = '_', // + [210] = '_', // + [211] = '_', // + [212] = '_', // + [213] = '_', // + [214] = '_', // + [215] = '_', // + [216] = '_', // + [217] = '_', // + [218] = '_', // + [219] = '_', // + [220] = '_', // + [221] = '_', // + [222] = '_', // + [223] = '_', // + [224] = '_', // + [225] = '_', // + [226] = '_', // + [227] = '_', // + [228] = '_', // + [229] = '_', // + [230] = '_', // + [231] = '_', // + [232] = '_', // + [233] = '_', // + [234] = '_', // + [235] = '_', // + [236] = '_', // + [237] = '_', // + [238] = '_', // + [239] = '_', // + [240] = '_', // + [241] = '_', // + [242] = '_', // + [243] = '_', // + [244] = '_', // + [245] = '_', // + [246] = '_', // + [247] = '_', // + [248] = '_', // + [249] = '_', // + [250] = '_', // + [251] = '_', // + [252] = '_', // + [253] = '_', // + [254] = '_', // + [255] = '_' // +}; + +// make sure the supplied string +// is good for a netdata chart/dimension ID/NAME +void netdata_fix_chart_id(char *s) { + while ((*s = netdata_map_chart_ids[(unsigned char) *s])) s++; +} + +/* +// http://stackoverflow.com/questions/7666509/hash-function-for-string +uint32_t simple_hash(const char *name) +{ + const char *s = name; + uint32_t hash = 5381; + int i; + + while((i = *s++)) hash = ((hash << 5) + hash) + i; + + // fprintf(stderr, "HASH: %lu %s\n", hash, name); + + return hash; +} +*/ + +/* +// http://isthe.com/chongo/tech/comp/fnv/#FNV-1a +uint32_t simple_hash(const char *name) { + unsigned char *s = (unsigned char *) name; + uint32_t hval = 0x811c9dc5; + + // FNV-1a algorithm + while (*s) { + // multiply by the 32 bit FNV magic prime mod 2^32 + // NOTE: No need to optimize with left shifts. + // GCC will use imul instruction anyway. + // Tested with 'gcc -O3 -S' + //hval += (hval<<1) + (hval<<4) + (hval<<7) + (hval<<8) + (hval<<24); + hval *= 16777619; + + // xor the bottom with the current octet + hval ^= (uint32_t) *s++; + } + + // fprintf(stderr, "HASH: %u = %s\n", hval, name); + return hval; +} + +uint32_t simple_uhash(const char *name) { + unsigned char *s = (unsigned char *) name; + uint32_t hval = 0x811c9dc5, c; + + // FNV-1a algorithm + while ((c = *s++)) { + if (unlikely(c >= 'A' && c <= 'Z')) c += 'a' - 'A'; + hval *= 16777619; + hval ^= c; + } + return hval; +} +*/ + +/* +// http://eternallyconfuzzled.com/tuts/algorithms/jsw_tut_hashing.aspx +// one at a time hash +uint32_t simple_hash(const char *name) { + unsigned char *s = (unsigned char *)name; + uint32_t h = 0; + + while(*s) { + h += *s++; + h += (h << 10); + h ^= (h >> 6); + } + + h += (h << 3); + h ^= (h >> 11); + h += (h << 15); + + // fprintf(stderr, "HASH: %u = %s\n", h, name); + + return h; +} +*/ + +void strreverse(char *begin, char *end) { + while (end > begin) { + // clearer code. + char aux = *end; + *end-- = *begin; + *begin++ = aux; + } +} + +char *strsep_on_1char(char **ptr, char c) { + if(unlikely(!ptr || !*ptr)) + return NULL; + + // remember the position we started + char *s = *ptr; + + // skip separators in front + while(*s == c) s++; + char *ret = s; + + // find the next separator + while(*s++) { + if(unlikely(*s == c)) { + *s++ = '\0'; + *ptr = s; + return ret; + } + } + + *ptr = NULL; + return ret; +} + +char *mystrsep(char **ptr, char *s) { + char *p = ""; + while (p && !p[0] && *ptr) p = strsep(ptr, s); + return (p); +} + +char *trim(char *s) { + // skip leading spaces + while (*s && isspace(*s)) s++; + if (!*s) return NULL; + + // skip tailing spaces + // this way is way faster. Writes only one NUL char. + ssize_t l = strlen(s); + if (--l >= 0) { + char *p = s + l; + while (p > s && isspace(*p)) p--; + *++p = '\0'; + } + + if (!*s) return NULL; + + return s; +} + +inline char *trim_all(char *buffer) { + char *d = buffer, *s = buffer; + + // skip spaces + while(isspace(*s)) s++; + + while(*s) { + // copy the non-space part + while(*s && !isspace(*s)) *d++ = *s++; + + // add a space if we have to + if(*s && isspace(*s)) { + *d++ = ' '; + s++; + } + + // skip spaces + while(isspace(*s)) s++; + } + + *d = '\0'; + + if(d > buffer) { + d--; + if(isspace(*d)) *d = '\0'; + } + + if(!buffer[0]) return NULL; + return buffer; +} + +static int memory_file_open(const char *filename, size_t size) { + // info("memory_file_open('%s', %zu", filename, size); + + int fd = open(filename, O_RDWR | O_CREAT | O_NOATIME, 0664); + if (fd != -1) { + if (lseek(fd, size, SEEK_SET) == (off_t) size) { + if (write(fd, "", 1) == 1) { + if (ftruncate(fd, size)) + error("Cannot truncate file '%s' to size %zu. Will use the larger file.", filename, size); + } + else error("Cannot write to file '%s' at position %zu.", filename, size); + } + else error("Cannot seek file '%s' to size %zu.", filename, size); + } + else error("Cannot create/open file '%s'.", filename); + + return fd; +} + +static inline int madvise_sequential(void *mem, size_t len) { + static int logger = 1; + int ret = madvise(mem, len, MADV_SEQUENTIAL); + + if (ret != 0 && logger-- > 0) error("madvise(MADV_SEQUENTIAL) failed."); + return ret; +} + +static inline int madvise_dontfork(void *mem, size_t len) { + static int logger = 1; + int ret = madvise(mem, len, MADV_DONTFORK); + + if (ret != 0 && logger-- > 0) error("madvise(MADV_DONTFORK) failed."); + return ret; +} + +static inline int madvise_willneed(void *mem, size_t len) { + static int logger = 1; + int ret = madvise(mem, len, MADV_WILLNEED); + + if (ret != 0 && logger-- > 0) error("madvise(MADV_WILLNEED) failed."); + return ret; +} + +#if __linux__ +static inline int madvise_dontdump(void *mem, size_t len) { + static int logger = 1; + int ret = madvise(mem, len, MADV_DONTDUMP); + + if (ret != 0 && logger-- > 0) error("madvise(MADV_DONTDUMP) failed."); + return ret; +} +#else +static inline int madvise_dontdump(void *mem, size_t len) { + UNUSED(mem); + UNUSED(len); + + return 0; +} +#endif + +static inline int madvise_mergeable(void *mem, size_t len) { +#ifdef MADV_MERGEABLE + static int logger = 1; + int ret = madvise(mem, len, MADV_MERGEABLE); + + if (ret != 0 && logger-- > 0) error("madvise(MADV_MERGEABLE) failed."); + return ret; +#else + UNUSED(mem); + UNUSED(len); + + return 0; +#endif +} + +void *netdata_mmap(const char *filename, size_t size, int flags, int ksm) { + // info("netdata_mmap('%s', %zu", filename, size); + + // MAP_SHARED is used in memory mode map + // MAP_PRIVATE is used in memory mode ram and save + + if(unlikely(!(flags & MAP_SHARED) && !(flags & MAP_PRIVATE))) + fatal("Neither MAP_SHARED or MAP_PRIVATE were given to netdata_mmap()"); + + if(unlikely((flags & MAP_SHARED) && (flags & MAP_PRIVATE))) + fatal("Both MAP_SHARED and MAP_PRIVATE were given to netdata_mmap()"); + + if(unlikely((flags & MAP_SHARED) && (!filename || !*filename))) + fatal("MAP_SHARED requested, without a filename to netdata_mmap()"); + + // don't enable ksm is the global setting is disabled + if(unlikely(!enable_ksm)) ksm = 0; + + // KSM only merges anonymous (private) pages, never pagecache (file) pages + // but MAP_PRIVATE without MAP_ANONYMOUS it fails too, so we need it always + if((flags & MAP_PRIVATE)) flags |= MAP_ANONYMOUS; + + int fd = -1; + void *mem = MAP_FAILED; + + if(filename && *filename) { + // open/create the file to be used + fd = memory_file_open(filename, size); + if(fd == -1) goto cleanup; + } + + int fd_for_mmap = fd; + if(fd != -1 && (flags & MAP_PRIVATE)) { + // this is MAP_PRIVATE allocation + // no need for mmap() to use our fd + // we will copy the file into the memory allocated + fd_for_mmap = -1; + } + + mem = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, fd_for_mmap, 0); + if (mem != MAP_FAILED) { + +#ifdef NETDATA_TRACE_ALLOCATIONS + malloc_trace_mmap(size); +#endif + + // if we have a file open, but we didn't give it to mmap(), + // we have to read the file into the memory block we allocated + if(fd != -1 && fd_for_mmap == -1) { + if (lseek(fd, 0, SEEK_SET) == 0) { + if (read(fd, mem, size) != (ssize_t) size) + info("Cannot read from file '%s'", filename); + } + else info("Cannot seek to beginning of file '%s'.", filename); + } + + madvise_sequential(mem, size); + madvise_dontfork(mem, size); + madvise_dontdump(mem, size); + if(flags & MAP_SHARED) madvise_willneed(mem, size); + if(ksm) madvise_mergeable(mem, size); + } + +cleanup: + if(fd != -1) close(fd); + if(mem == MAP_FAILED) return NULL; + errno = 0; + return mem; +} + +int netdata_munmap(void *ptr, size_t size) { +#ifdef NETDATA_TRACE_ALLOCATIONS + malloc_trace_munmap(size); +#endif + return munmap(ptr, size); +} + +int memory_file_save(const char *filename, void *mem, size_t size) { + char tmpfilename[FILENAME_MAX + 1]; + + snprintfz(tmpfilename, FILENAME_MAX, "%s.%ld.tmp", filename, (long) getpid()); + + int fd = open(tmpfilename, O_RDWR | O_CREAT | O_NOATIME, 0664); + if (fd < 0) { + error("Cannot create/open file '%s'.", filename); + return -1; + } + + if (write(fd, mem, size) != (ssize_t) size) { + error("Cannot write to file '%s' %ld bytes.", filename, (long) size); + close(fd); + return -1; + } + + close(fd); + + if (rename(tmpfilename, filename)) { + error("Cannot rename '%s' to '%s'", tmpfilename, filename); + return -1; + } + + return 0; +} + +int fd_is_valid(int fd) { + return fcntl(fd, F_GETFD) != -1 || errno != EBADF; +} + +char *fgets_trim_len(char *buf, size_t buf_size, FILE *fp, size_t *len) { + char *s = fgets(buf, (int)buf_size, fp); + if (!s) return NULL; + + char *t = s; + if (*t != '\0') { + // find the string end + while (*++t != '\0'); + + // trim trailing spaces/newlines/tabs + while (--t > s && *t == '\n') + *t = '\0'; + } + + if (len) + *len = t - s + 1; + + return s; +} + +int vsnprintfz(char *dst, size_t n, const char *fmt, va_list args) { + if(unlikely(!n)) return 0; + + int size = vsnprintf(dst, n, fmt, args); + dst[n - 1] = '\0'; + + if (unlikely((size_t) size > n)) size = (int)n; + + return size; +} + +int snprintfz(char *dst, size_t n, const char *fmt, ...) { + va_list args; + + va_start(args, fmt); + int ret = vsnprintfz(dst, n, fmt, args); + va_end(args); + + return ret; +} + +/* +// poor man cycle counting +static unsigned long tsc; +void begin_tsc(void) { + unsigned long a, d; + asm volatile ("cpuid\nrdtsc" : "=a" (a), "=d" (d) : "0" (0) : "ebx", "ecx"); + tsc = ((unsigned long)d << 32) | (unsigned long)a; +} +unsigned long end_tsc(void) { + unsigned long a, d; + asm volatile ("rdtscp" : "=a" (a), "=d" (d) : : "ecx"); + return (((unsigned long)d << 32) | (unsigned long)a) - tsc; +} +*/ + +int recursively_delete_dir(const char *path, const char *reason) { + DIR *dir = opendir(path); + if(!dir) { + error("Cannot read %s directory to be deleted '%s'", reason?reason:"", path); + return -1; + } + + int ret = 0; + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR + && ( + (de->d_name[0] == '.' && de->d_name[1] == '\0') + || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + )) + continue; + + char fullpath[FILENAME_MAX + 1]; + snprintfz(fullpath, FILENAME_MAX, "%s/%s", path, de->d_name); + + if(de->d_type == DT_DIR) { + int r = recursively_delete_dir(fullpath, reason); + if(r > 0) ret += r; + continue; + } + + info("Deleting %s file '%s'", reason?reason:"", fullpath); + if(unlikely(unlink(fullpath) == -1)) + error("Cannot delete %s file '%s'", reason?reason:"", fullpath); + else + ret++; + } + + info("Deleting empty directory '%s'", path); + if(unlikely(rmdir(path) == -1)) + error("Cannot delete empty directory '%s'", path); + else + ret++; + + closedir(dir); + + return ret; +} + +static int is_virtual_filesystem(const char *path, char **reason) { + +#if defined(__APPLE__) || defined(__FreeBSD__) + (void)path; + (void)reason; +#else + struct statfs stat; + // stat.f_fsid.__val[0] is a file system id + // stat.f_fsid.__val[1] is the inode + // so their combination uniquely identifies the file/dir + + if (statfs(path, &stat) == -1) { + if(reason) *reason = "failed to statfs()"; + return -1; + } + + if(stat.f_fsid.__val[0] != 0 || stat.f_fsid.__val[1] != 0) { + errno = EINVAL; + if(reason) *reason = "is not a virtual file system"; + return -1; + } +#endif + + return 0; +} + +int verify_netdata_host_prefix() { + if(!netdata_configured_host_prefix) + netdata_configured_host_prefix = ""; + + if(!*netdata_configured_host_prefix) + return 0; + + char buffer[FILENAME_MAX + 1]; + char *path = netdata_configured_host_prefix; + char *reason = "unknown reason"; + errno = 0; + + struct stat sb; + if (stat(path, &sb) == -1) { + reason = "failed to stat()"; + goto failed; + } + + if((sb.st_mode & S_IFMT) != S_IFDIR) { + errno = EINVAL; + reason = "is not a directory"; + goto failed; + } + + path = buffer; + snprintfz(path, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); + if(is_virtual_filesystem(path, &reason) == -1) + goto failed; + + snprintfz(path, FILENAME_MAX, "%s/sys", netdata_configured_host_prefix); + if(is_virtual_filesystem(path, &reason) == -1) + goto failed; + + if(netdata_configured_host_prefix && *netdata_configured_host_prefix) + info("Using host prefix directory '%s'", netdata_configured_host_prefix); + + return 0; + +failed: + error("Ignoring host prefix '%s': path '%s' %s", netdata_configured_host_prefix, path, reason); + netdata_configured_host_prefix = ""; + return -1; +} + +char *strdupz_path_subpath(const char *path, const char *subpath) { + if(unlikely(!path || !*path)) path = "."; + if(unlikely(!subpath)) subpath = ""; + + // skip trailing slashes in path + size_t len = strlen(path); + while(len > 0 && path[len - 1] == '/') len--; + + // skip leading slashes in subpath + while(subpath[0] == '/') subpath++; + + // if the last character in path is / and (there is a subpath or path is now empty) + // keep the trailing slash in path and remove the additional slash + char *slash = "/"; + if(path[len] == '/' && (*subpath || len == 0)) { + slash = ""; + len++; + } + else if(!*subpath) { + // there is no subpath + // no need for trailing slash + slash = ""; + } + + char buffer[FILENAME_MAX + 1]; + snprintfz(buffer, FILENAME_MAX, "%.*s%s%s", (int)len, path, slash, subpath); + return strdupz(buffer); +} + +int path_is_dir(const char *path, const char *subpath) { + char *s = strdupz_path_subpath(path, subpath); + + size_t max_links = 100; + + int is_dir = 0; + struct stat statbuf; + while(max_links-- && stat(s, &statbuf) == 0) { + if((statbuf.st_mode & S_IFMT) == S_IFDIR) { + is_dir = 1; + break; + } + else if((statbuf.st_mode & S_IFMT) == S_IFLNK) { + char buffer[FILENAME_MAX + 1]; + ssize_t l = readlink(s, buffer, FILENAME_MAX); + if(l > 0) { + buffer[l] = '\0'; + freez(s); + s = strdupz(buffer); + continue; + } + else { + is_dir = 0; + break; + } + } + else { + is_dir = 0; + break; + } + } + + freez(s); + return is_dir; +} + +int path_is_file(const char *path, const char *subpath) { + char *s = strdupz_path_subpath(path, subpath); + + size_t max_links = 100; + + int is_file = 0; + struct stat statbuf; + while(max_links-- && stat(s, &statbuf) == 0) { + if((statbuf.st_mode & S_IFMT) == S_IFREG) { + is_file = 1; + break; + } + else if((statbuf.st_mode & S_IFMT) == S_IFLNK) { + char buffer[FILENAME_MAX + 1]; + ssize_t l = readlink(s, buffer, FILENAME_MAX); + if(l > 0) { + buffer[l] = '\0'; + freez(s); + s = strdupz(buffer); + continue; + } + else { + is_file = 0; + break; + } + } + else { + is_file = 0; + break; + } + } + + freez(s); + return is_file; +} + +void recursive_config_double_dir_load(const char *user_path, const char *stock_path, const char *subpath, int (*callback)(const char *filename, void *data), void *data, size_t depth) { + if(depth > 3) { + error("CONFIG: Max directory depth reached while reading user path '%s', stock path '%s', subpath '%s'", user_path, stock_path, subpath); + return; + } + + char *udir = strdupz_path_subpath(user_path, subpath); + char *sdir = strdupz_path_subpath(stock_path, subpath); + + debug(D_HEALTH, "CONFIG traversing user-config directory '%s', stock config directory '%s'", udir, sdir); + + DIR *dir = opendir(udir); + if (!dir) { + error("CONFIG cannot open user-config directory '%s'.", udir); + } + else { + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR || de->d_type == DT_LNK) { + if( !de->d_name[0] || + (de->d_name[0] == '.' && de->d_name[1] == '\0') || + (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + ) { + debug(D_HEALTH, "CONFIG ignoring user-config directory '%s/%s'", udir, de->d_name); + continue; + } + + if(path_is_dir(udir, de->d_name)) { + recursive_config_double_dir_load(udir, sdir, de->d_name, callback, data, depth + 1); + continue; + } + } + + if(de->d_type == DT_UNKNOWN || de->d_type == DT_REG || de->d_type == DT_LNK) { + size_t len = strlen(de->d_name); + if(path_is_file(udir, de->d_name) && + len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) { + char *filename = strdupz_path_subpath(udir, de->d_name); + debug(D_HEALTH, "CONFIG calling callback for user file '%s'", filename); + callback(filename, data); + freez(filename); + continue; + } + } + + debug(D_HEALTH, "CONFIG ignoring user-config file '%s/%s' of type %d", udir, de->d_name, (int)de->d_type); + } + + closedir(dir); + } + + debug(D_HEALTH, "CONFIG traversing stock config directory '%s', user config directory '%s'", sdir, udir); + + dir = opendir(sdir); + if (!dir) { + error("CONFIG cannot open stock config directory '%s'.", sdir); + } + else { + if (strcmp(udir, sdir)) { + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR || de->d_type == DT_LNK) { + if( !de->d_name[0] || + (de->d_name[0] == '.' && de->d_name[1] == '\0') || + (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') + ) { + debug(D_HEALTH, "CONFIG ignoring stock config directory '%s/%s'", sdir, de->d_name); + continue; + } + + if(path_is_dir(sdir, de->d_name)) { + // we recurse in stock subdirectory, only when there is no corresponding + // user subdirectory - to avoid reading the files twice + + if(!path_is_dir(udir, de->d_name)) + recursive_config_double_dir_load(udir, sdir, de->d_name, callback, data, depth + 1); + + continue; + } + } + + if(de->d_type == DT_UNKNOWN || de->d_type == DT_REG || de->d_type == DT_LNK) { + size_t len = strlen(de->d_name); + if(path_is_file(sdir, de->d_name) && !path_is_file(udir, de->d_name) && + len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) { + char *filename = strdupz_path_subpath(sdir, de->d_name); + debug(D_HEALTH, "CONFIG calling callback for stock file '%s'", filename); + callback(filename, data); + freez(filename); + continue; + } + + } + + debug(D_HEALTH, "CONFIG ignoring stock-config file '%s/%s' of type %d", udir, de->d_name, (int)de->d_type); + } + } + closedir(dir); + } + + debug(D_HEALTH, "CONFIG done traversing user-config directory '%s', stock config directory '%s'", udir, sdir); + + freez(udir); + freez(sdir); +} + +// Returns the number of bytes read from the file if file_size is not NULL. +// The actual buffer has an extra byte set to zero (not included in the count). +char *read_by_filename(char *filename, long *file_size) +{ + FILE *f = fopen(filename, "r"); + if (!f) + return NULL; + if (fseek(f, 0, SEEK_END) < 0) { + fclose(f); + return NULL; + } + long size = ftell(f); + if (size <= 0 || fseek(f, 0, SEEK_END) < 0) { + fclose(f); + return NULL; + } + char *contents = callocz(size + 1, 1); + if (!contents) { + fclose(f); + return NULL; + } + if (fseek(f, 0, SEEK_SET) < 0) { + fclose(f); + freez(contents); + return NULL; + } + size_t res = fread(contents, 1, size, f); + if ( res != (size_t)size) { + freez(contents); + fclose(f); + return NULL; + } + fclose(f); + if (file_size) + *file_size = size; + return contents; +} + +char *find_and_replace(const char *src, const char *find, const char *replace, const char *where) +{ + size_t size = strlen(src) + 1; + size_t find_len = strlen(find); + size_t repl_len = strlen(replace); + char *value, *dst; + + if (likely(where)) + size += (repl_len - find_len); + + value = mallocz(size); + dst = value; + + if (likely(where)) { + size_t count = where - src; + + memmove(dst, src, count); + src += count; + dst += count; + + memmove(dst, replace, repl_len); + src += find_len; + dst += repl_len; + } + + strcpy(dst, src); + + return value; +} + +inline int pluginsd_space(char c) { + switch(c) { + case ' ': + case '\t': + case '\r': + case '\n': + case '=': + return 1; + + default: + return 0; + } +} + +inline int config_isspace(char c) +{ + switch (c) { + case ' ': + case '\t': + case '\r': + case '\n': + case ',': + return 1; + + default: + return 0; + } +} + +// split a text into words, respecting quotes +inline size_t quoted_strings_splitter(char *str, char **words, size_t max_words, int (*custom_isspace)(char), char *recover_input, char **recover_location, int max_recover) +{ + char *s = str, quote = 0; + size_t i = 0; + int rec = 0; + char *recover = recover_input; + + // skip all white space + while (unlikely(custom_isspace(*s))) + s++; + + // check for quote + if (unlikely(*s == '\'' || *s == '"')) { + quote = *s; // remember the quote + s++; // skip the quote + } + + // store the first word + words[i++] = s; + + // while we have something + while (likely(*s)) { + // if it is escape + if (unlikely(*s == '\\' && s[1])) { + s += 2; + continue; + } + + // if it is quote + else if (unlikely(*s == quote)) { + quote = 0; + if (recover && rec < max_recover) { + recover_location[rec++] = s; + *recover++ = *s; + } + *s = ' '; + continue; + } + + // if it is a space + else if (unlikely(quote == 0 && custom_isspace(*s))) { + // terminate the word + if (recover && rec < max_recover) { + if (!rec || recover_location[rec-1] != s) { + recover_location[rec++] = s; + *recover++ = *s; + } + } + *s++ = '\0'; + + // skip all white space + while (likely(custom_isspace(*s))) + s++; + + // check for quote + if (unlikely(*s == '\'' || *s == '"')) { + quote = *s; // remember the quote + s++; // skip the quote + } + + // if we reached the end, stop + if (unlikely(!*s)) + break; + + // store the next word + if (likely(i < max_words)) + words[i++] = s; + else + break; + } + + // anything else + else + s++; + } + + if (i < max_words) + words[i] = NULL; + + return i; +} + +inline size_t pluginsd_split_words(char *str, char **words, size_t max_words, char *recover_input, char **recover_location, int max_recover) +{ + return quoted_strings_splitter(str, words, max_words, pluginsd_space, recover_input, recover_location, max_recover); +} + +bool bitmap256_get_bit(BITMAP256 *ptr, uint8_t idx) { + if (unlikely(!ptr)) + return false; + return (ptr->data[idx / 64] & (1ULL << (idx % 64))); +} + +void bitmap256_set_bit(BITMAP256 *ptr, uint8_t idx, bool value) { + if (unlikely(!ptr)) + return; + if (likely(value)) + ptr->data[idx / 64] |= (1ULL << (idx % 64)); + else + ptr->data[idx / 64] &= ~(1ULL << (idx % 64)); +} + +bool run_command_and_copy_output_to_stdout(const char *command, int max_line_length) { + pid_t pid; + FILE *fp = netdata_popen(command, &pid, NULL); + + if(fp) { + char buffer[max_line_length + 1]; + while (fgets(buffer, max_line_length, fp)) + fprintf(stdout, "%s", buffer); + } + else { + error("Failed to execute command '%s'.", command); + return false; + } + + netdata_pclose(NULL, fp, pid); + return true; +} diff --git a/libnetdata/libnetdata.h b/libnetdata/libnetdata.h new file mode 100644 index 0000000..58eaa9d --- /dev/null +++ b/libnetdata/libnetdata.h @@ -0,0 +1,507 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LIB_H +#define NETDATA_LIB_H 1 + +# ifdef __cplusplus +extern "C" { +# endif + +#ifdef HAVE_CONFIG_H +#include +#endif + +#if defined(NETDATA_DEV_MODE) && !defined(NETDATA_INTERNAL_CHECKS) +#define NETDATA_INTERNAL_CHECKS 1 +#endif + +// NETDATA_TRACE_ALLOCATIONS does not work under musl libc, so don't enable it +//#if defined(NETDATA_INTERNAL_CHECKS) && !defined(NETDATA_TRACE_ALLOCATIONS) +//#define NETDATA_TRACE_ALLOCATIONS 1 +//#endif + +#define OS_LINUX 1 +#define OS_FREEBSD 2 +#define OS_MACOS 3 + + +// ---------------------------------------------------------------------------- +// system include files for all netdata C programs + +/* select the memory allocator, based on autoconf findings */ +#if defined(ENABLE_JEMALLOC) + +#if defined(HAVE_JEMALLOC_JEMALLOC_H) +#include +#else // !defined(HAVE_JEMALLOC_JEMALLOC_H) +#include +#endif // !defined(HAVE_JEMALLOC_JEMALLOC_H) + +#elif defined(ENABLE_TCMALLOC) + +#include + +#else /* !defined(ENABLE_JEMALLOC) && !defined(ENABLE_TCMALLOC) */ + +#if !(defined(__FreeBSD__) || defined(__APPLE__)) +#include +#endif /* __FreeBSD__ || __APPLE__ */ + +#endif /* !defined(ENABLE_JEMALLOC) && !defined(ENABLE_TCMALLOC) */ + +// ---------------------------------------------------------------------------- + +#if defined(__FreeBSD__) +#include +#define NETDATA_OS_TYPE "freebsd" +#elif defined(__APPLE__) +#define NETDATA_OS_TYPE "macos" +#else +#define NETDATA_OS_TYPE "linux" +#endif /* __FreeBSD__, __APPLE__*/ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// CentOS 7 has older version that doesn't define this +// same goes for MacOS +#ifndef UUID_STR_LEN +#define UUID_STR_LEN (37) +#endif + +#ifdef HAVE_NETINET_IN_H +#include +#endif + +#ifdef HAVE_RESOLV_H +#include +#endif + +#ifdef HAVE_NETDB_H +#include +#endif + +#ifdef HAVE_SYS_PRCTL_H +#include +#endif + +#ifdef HAVE_SYS_STAT_H +#include +#endif + +#ifdef HAVE_SYS_VFS_H +#include +#endif + +#ifdef HAVE_SYS_STATFS_H +#include +#endif + +#ifdef HAVE_LINUX_MAGIC_H +#include +#endif + +#ifdef HAVE_SYS_MOUNT_H +#include +#endif + +#ifdef HAVE_SYS_STATVFS_H +#include +#endif + +// #1408 +#ifdef MAJOR_IN_MKDEV +#include +#endif +#ifdef MAJOR_IN_SYSMACROS +#include +#endif + +#ifdef STORAGE_WITH_MATH +#include +#include +#endif + +#if defined(HAVE_INTTYPES_H) +#include +#elif defined(HAVE_STDINT_H) +#include +#endif + +#ifdef NETDATA_WITH_ZLIB +#include +#endif + +#ifdef HAVE_CAPABILITY +#include +#endif + + +// ---------------------------------------------------------------------------- +// netdata common definitions + +#ifdef __GNUC__ +#define GCC_VERSION (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#endif // __GNUC__ + +#ifdef HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL +#define NEVERNULL __attribute__((returns_nonnull)) +#else +#define NEVERNULL +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_NOINLINE +#define NOINLINE __attribute__((noinline)) +#else +#define NOINLINE +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_MALLOC +#define MALLOCLIKE __attribute__((malloc)) +#else +#define MALLOCLIKE +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT +#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a))) +#else +#define PRINTFLIKE(f, a) +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_NORETURN +#define NORETURN __attribute__ ((noreturn)) +#else +#define NORETURN +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT +#define WARNUNUSED __attribute__ ((warn_unused_result)) +#else +#define WARNUNUSED +#endif + +#define ABS(x) (((x) < 0)? (-(x)) : (x)) +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) + +#define GUID_LEN 36 + +// --------------------------------------------------------------------------------------------- +// double linked list management + +#define DOUBLE_LINKED_LIST_PREPEND_UNSAFE(head, item, prev, next) \ + do { \ + (item)->next = (head); \ + \ + if(likely(head)) { \ + (item)->prev = (head)->prev; \ + (head)->prev = (item); \ + } \ + else \ + (item)->prev = (item); \ + \ + (head) = (item); \ + } while (0) + +#define DOUBLE_LINKED_LIST_APPEND_UNSAFE(head, item, prev, next) \ + do { \ + if(likely(head)) { \ + (item)->prev = (head)->prev; \ + (head)->prev->next = (item); \ + (head)->prev = (item); \ + (item)->next = NULL; \ + } \ + else { \ + (head) = (item); \ + (head)->prev = (head); \ + (head)->next = NULL; \ + } \ + \ + } while (0) + +#define DOUBLE_LINKED_LIST_REMOVE_UNSAFE(head, item, prev, next) \ + do { \ + fatal_assert((head) != NULL); \ + fatal_assert((item)->prev != NULL); \ + \ + if((item)->prev == (item)) { \ + /* it is the only item in the list */ \ + (head) = NULL; \ + } \ + else if((item) == (head)) { \ + /* it is the first item */ \ + (item)->next->prev = (item)->prev; \ + (head) = (item)->next; \ + } \ + else { \ + (item)->prev->next = (item)->next; \ + if ((item)->next) { \ + (item)->next->prev = (item)->prev; \ + } \ + else { \ + (head)->prev = (item)->prev; \ + } \ + } \ + \ + (item)->next = NULL; \ + (item)->prev = NULL; \ + } while (0) + +#define DOUBLE_LINKED_LIST_FOREACH_FORWARD(head, var, prev, next) \ + for ((var) = (head); (var) ; (var) = (var)->next) + +#define DOUBLE_LINKED_LIST_FOREACH_BACKWARD(head, var, prev, next) \ + for ((var) = (head)?(head)->prev:NULL; (var) && (var) != (head)->prev ; (var) = (var)->prev) + +// --------------------------------------------------------------------------------------------- + + +void netdata_fix_chart_id(char *s); +void netdata_fix_chart_name(char *s); + +void strreverse(char* begin, char* end); +char *mystrsep(char **ptr, char *s); +char *trim(char *s); // remove leading and trailing spaces; may return NULL +char *trim_all(char *buffer); // like trim(), but also remove duplicate spaces inside the string; may return NULL + +int vsnprintfz(char *dst, size_t n, const char *fmt, va_list args); +int snprintfz(char *dst, size_t n, const char *fmt, ...) PRINTFLIKE(3, 4); + +// memory allocation functions that handle failures +#ifdef NETDATA_TRACE_ALLOCATIONS +int malloc_trace_walkthrough(int (*callback)(void *item, void *data), void *data); + +#define strdupz(s) strdupz_int(s, __FILE__, __FUNCTION__, __LINE__) +#define callocz(nmemb, size) callocz_int(nmemb, size, __FILE__, __FUNCTION__, __LINE__) +#define mallocz(size) mallocz_int(size, __FILE__, __FUNCTION__, __LINE__) +#define reallocz(ptr, size) reallocz_int(ptr, size, __FILE__, __FUNCTION__, __LINE__) +#define freez(ptr) freez_int(ptr, __FILE__, __FUNCTION__, __LINE__) +#define mallocz_usable_size(ptr) mallocz_usable_size_int(ptr, __FILE__, __FUNCTION__, __LINE__) + +char *strdupz_int(const char *s, const char *file, const char *function, size_t line); +void *callocz_int(size_t nmemb, size_t size, const char *file, const char *function, size_t line); +void *mallocz_int(size_t size, const char *file, const char *function, size_t line); +void *reallocz_int(void *ptr, size_t size, const char *file, const char *function, size_t line); +void freez_int(void *ptr, const char *file, const char *function, size_t line); +size_t mallocz_usable_size_int(void *ptr, const char *file, const char *function, size_t line); + +#else // NETDATA_TRACE_ALLOCATIONS +char *strdupz(const char *s) MALLOCLIKE NEVERNULL; +void *callocz(size_t nmemb, size_t size) MALLOCLIKE NEVERNULL; +void *mallocz(size_t size) MALLOCLIKE NEVERNULL; +void *reallocz(void *ptr, size_t size) MALLOCLIKE NEVERNULL; +void freez(void *ptr); +#endif // NETDATA_TRACE_ALLOCATIONS + +void posix_memfree(void *ptr); + +void json_escape_string(char *dst, const char *src, size_t size); +void json_fix_string(char *s); + +void *netdata_mmap(const char *filename, size_t size, int flags, int ksm); +int netdata_munmap(void *ptr, size_t size); +int memory_file_save(const char *filename, void *mem, size_t size); + +int fd_is_valid(int fd); + +extern struct rlimit rlimit_nofile; + +extern int enable_ksm; + +char *fgets_trim_len(char *buf, size_t buf_size, FILE *fp, size_t *len); + +int verify_netdata_host_prefix(); + +int recursively_delete_dir(const char *path, const char *reason); + +extern volatile sig_atomic_t netdata_exit; + +extern const char *program_version; + +char *strdupz_path_subpath(const char *path, const char *subpath); +int path_is_dir(const char *path, const char *subpath); +int path_is_file(const char *path, const char *subpath); +void recursive_config_double_dir_load( + const char *user_path + , const char *stock_path + , const char *subpath + , int (*callback)(const char *filename, void *data) + , void *data + , size_t depth +); +char *read_by_filename(char *filename, long *file_size); +char *find_and_replace(const char *src, const char *find, const char *replace, const char *where); + +/* fix for alpine linux */ +#ifndef RUSAGE_THREAD +#ifdef RUSAGE_CHILDREN +#define RUSAGE_THREAD RUSAGE_CHILDREN +#endif +#endif + +#define BITS_IN_A_KILOBIT 1000 +#define KILOBITS_IN_A_MEGABIT 1000 + +/* misc. */ + +#define UNUSED(x) (void)(x) + +#ifdef __GNUC__ +#define UNUSED_FUNCTION(x) __attribute__((unused)) UNUSED_##x +#else +#define UNUSED_FUNCTION(x) UNUSED_##x +#endif + +#define error_report(x, args...) do { errno = 0; error(x, ##args); } while(0) + +// Taken from linux kernel +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +typedef struct bitmap256 { + uint64_t data[4]; +} BITMAP256; + +bool bitmap256_get_bit(BITMAP256 *ptr, uint8_t idx); +void bitmap256_set_bit(BITMAP256 *ptr, uint8_t idx, bool value); + +#define COMPRESSION_MAX_MSG_SIZE 0x4000 +#define PLUGINSD_LINE_MAX (COMPRESSION_MAX_MSG_SIZE - 1024) +int config_isspace(char c); +int pluginsd_space(char c); + +size_t quoted_strings_splitter(char *str, char **words, size_t max_words, int (*custom_isspace)(char), char *recover_input, char **recover_location, int max_recover); +size_t pluginsd_split_words(char *str, char **words, size_t max_words, char *recover_string, char **recover_location, int max_recover); + +static inline char *get_word(char **words, size_t num_words, size_t index) { + if (index >= num_words) + return NULL; + + return words[index]; +} + +bool run_command_and_copy_output_to_stdout(const char *command, int max_line_length); + +void netdata_cleanup_and_exit(int ret) NORETURN; +void send_statistics(const char *action, const char *action_result, const char *action_data); +extern char *netdata_configured_host_prefix; +#include "libjudy/src/Judy.h" +#include "os.h" +#include "storage_number/storage_number.h" +#include "threads/threads.h" +#include "buffer/buffer.h" +#include "locks/locks.h" +#include "circular_buffer/circular_buffer.h" +#include "avl/avl.h" +#include "inlined.h" +#include "clocks/clocks.h" +#include "completion/completion.h" +#include "popen/popen.h" +#include "simple_pattern/simple_pattern.h" +#ifdef ENABLE_HTTPS +# include "socket/security.h" +#endif +#include "socket/socket.h" +#include "config/appconfig.h" +#include "log/log.h" +#include "procfile/procfile.h" +#include "string/string.h" +#include "dictionary/dictionary.h" +#if defined(HAVE_LIBBPF) && !defined(__cplusplus) +#include "ebpf/ebpf.h" +#endif +#include "eval/eval.h" +#include "statistical/statistical.h" +#include "adaptive_resortable_list/adaptive_resortable_list.h" +#include "url/url.h" +#include "json/json.h" +#include "health/health.h" +#include "string/utf8.h" +#include "arrayalloc/arrayalloc.h" +#include "onewayalloc/onewayalloc.h" +#include "worker_utilization/worker_utilization.h" + +// BEWARE: Outside of the C code this also exists in alarm-notify.sh +#define DEFAULT_CLOUD_BASE_URL "https://api.netdata.cloud" +#define DEFAULT_CLOUD_UI_URL "https://app.netdata.cloud" + +#define RRD_STORAGE_TIERS 5 + +static inline size_t struct_natural_alignment(size_t size) __attribute__((const)); + +#define STRUCT_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2) +static inline size_t struct_natural_alignment(size_t size) { + if(unlikely(size % STRUCT_NATURAL_ALIGNMENT)) + size = size + STRUCT_NATURAL_ALIGNMENT - (size % STRUCT_NATURAL_ALIGNMENT); + + return size; +} + +#ifdef NETDATA_TRACE_ALLOCATIONS +struct malloc_trace { + avl_t avl; + + const char *function; + const char *file; + size_t line; + + size_t malloc_calls; + size_t calloc_calls; + size_t realloc_calls; + size_t strdup_calls; + size_t free_calls; + + size_t mmap_calls; + size_t munmap_calls; + + size_t allocations; + size_t bytes; + + struct rrddim *rd_bytes; + struct rrddim *rd_allocations; + struct rrddim *rd_avg_alloc; + struct rrddim *rd_ops; +}; +#endif // NETDATA_TRACE_ALLOCATIONS + +# ifdef __cplusplus +} +# endif + +#endif // NETDATA_LIB_H diff --git a/libnetdata/locks/Makefile.am b/libnetdata/locks/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/locks/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/locks/README.md b/libnetdata/locks/README.md new file mode 100644 index 0000000..9132edc --- /dev/null +++ b/libnetdata/locks/README.md @@ -0,0 +1,100 @@ + + +## How to trace netdata locks + +To enable tracing rwlocks in netdata, compile netdata by setting `CFLAGS="-DNETDATA_TRACE_RWLOCKS=1"`, like this: + +``` +CFLAGS="-O1 -ggdb -DNETDATA_TRACE_RWLOCKS=1" ./netdata-installer.sh +``` + +During compilation, the compiler will log: + +``` +libnetdata/locks/locks.c:105:2: warning: #warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT [-Wcpp] + 105 | #warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT + | ^~~~~~~ +``` + +Once compiled, netdata will do the following: + +Every call to `netdata_rwlock_*()` is now measured in time. + +### logging of slow locks/unlocks + +If any call takes more than 10 usec, it will be logged like this: + +``` +RW_LOCK ON LOCK 0x0x7fbe1f2e5190: 4157038, 'ACLK_Query_2' (function build_context_param_list() 99@web/api/formatters/rrd2json.c) WAITED to UNLOCK for 29 usec. +``` + +The time can be changed by setting this `-DNETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC=20` (or whatever number) to the CFLAGS. + +### logging of long hold times + +If any lock is holded for more than 10000 usec, it will be logged like this: + +``` +RW_LOCK ON LOCK 0x0x55a20afc1b20: 4187198, 'ANALYTICS' (function analytics_gather_mutable_meta_data() 532@daemon/analytics.c) holded a 'R' for 13232 usec. +``` + +The time can be changed by setting this `-DNETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC=20000` (or whatever number) to the CFLAGS. + +### logging for probable pauses (predictive) + +The library maintains a linked-list of all the lock holders (one entry per thread). For this linked-list a mutex is used. So every call to the r/w locks now also has a mutex lock. + +If any call is expected to pause the caller (ie the caller is attempting a read lock while there is a write lock in place and vice versa), the library will log something like this: + +``` +RW_LOCK ON LOCK 0x0x5651c9fcce20: 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) WANTS a 'W' lock (while holding 1 rwlocks and 1 mutexes). +There are 7 readers and 0 writers are holding the lock: + => 1: RW_LOCK: process 4190091 'WEB_SERVER[static14]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709847 usec. + => 2: RW_LOCK: process 4190079 'WEB_SERVER[static6]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709869 usec. + => 3: RW_LOCK: process 4190084 'WEB_SERVER[static10]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709948 usec. + => 4: RW_LOCK: process 4190076 'WEB_SERVER[static3]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710190 usec. + => 5: RW_LOCK: process 4190092 'WEB_SERVER[static15]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710195 usec. + => 6: RW_LOCK: process 4190077 'WEB_SERVER[static4]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710208 usec. + => 7: RW_LOCK: process 4190044 'WEB_SERVER[static1]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710221 usec. +``` + +And each of the above is paired with a `GOT` log, like this: + +``` +RW_LOCK ON LOCK 0x0x5651c9fcce20: 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) GOT a 'W' lock (while holding 2 rwlocks and 1 mutexes). +There are 0 readers and 1 writers are holding the lock: + => 1: RW_LOCK: process 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) is having 1 'W' lock for 36 usec. +``` + +Keep in mind that the lock and log are not atomic. The list of callers is indicative (and sometimes just empty because the original holders of the lock, unlocked it until we had the chance to print their names). + +### POSIX compliance check + +The library may also log messages about POSIX unsupported cases, like this: + +``` +RW_LOCK FATAL ON LOCK 0x0x622000109290: 3609368 'PLUGIN[proc]' (function __rrdset_check_rdlock() 10@database/rrdset.c) attempts to acquire a 'W' lock. +But it is not supported by POSIX because: ALREADY HAS THIS LOCK +At this attempt, the task is holding 1 rwlocks and 1 mutexes. +There are 1 readers and 0 writers are holding the lock requested now: + => 1: RW_LOCK: process 3609368 'PLUGIN[proc]' (function rrdset_done() 1398@database/rrdset.c) is having 1 'R' lock for 0 usec. +``` + +### nested read locks + +When compiled with `-DNETDATA_TRACE_RWLOCKS_LOG_NESTED=1` the library will also detect nested read locks and print them like this: + +``` +RW_LOCK ON LOCK 0x0x7ff6ea46d190: 4140225 'WEB_SERVER[static14]' (function rrdr_json_wrapper_begin() 34@web/api/formatters/json_wrapper.c) NESTED READ LOCK REQUEST a 'R' lock (while holding 1 rwlocks and 1 mutexes). +There are 5 readers and 0 writers are holding the lock: + => 1: RW_LOCK: process 4140225 'WEB_SERVER[static14]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 216667 usec. + => 2: RW_LOCK: process 4140211 'WEB_SERVER[static6]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. + => 3: RW_LOCK: process 4140218 'WEB_SERVER[static8]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. + => 4: RW_LOCK: process 4140224 'WEB_SERVER[static13]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. + => 5: RW_LOCK: process 4140227 'WEB_SERVER[static16]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. +``` + + + diff --git a/libnetdata/locks/locks.c b/libnetdata/locks/locks.c new file mode 100644 index 0000000..f7191be --- /dev/null +++ b/libnetdata/locks/locks.c @@ -0,0 +1,757 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +#ifdef NETDATA_TRACE_RWLOCKS + +#ifndef NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC +#define NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC 10 +#endif + +#ifndef NETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC +#define NETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC 10000 +#endif + +#ifndef NETDATA_THREAD_LOCKS_ARRAY_SIZE +#define NETDATA_THREAD_LOCKS_ARRAY_SIZE 10 +#endif +static __thread netdata_rwlock_t *netdata_thread_locks[NETDATA_THREAD_LOCKS_ARRAY_SIZE]; + + +#endif // NETDATA_TRACE_RWLOCKS + +// ---------------------------------------------------------------------------- +// automatic thread cancelability management, based on locks + +static __thread int netdata_thread_first_cancelability = 0; +static __thread int netdata_thread_nested_disables = 0; + +static __thread size_t netdata_locks_acquired_rwlocks = 0; +static __thread size_t netdata_locks_acquired_mutexes = 0; + +inline void netdata_thread_disable_cancelability(void) { + int old; + int ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old); + if(ret != 0) + error("THREAD_CANCELABILITY: pthread_setcancelstate() on thread %s returned error %d", netdata_thread_tag(), ret); + else { + if(!netdata_thread_nested_disables) + netdata_thread_first_cancelability = old; + + netdata_thread_nested_disables++; + } +} + +inline void netdata_thread_enable_cancelability(void) { + if(netdata_thread_nested_disables < 1) { + error("THREAD_CANCELABILITY: netdata_thread_enable_cancelability(): invalid thread cancelability count %d on thread %s - results will be undefined - please report this!", + netdata_thread_nested_disables, netdata_thread_tag()); + } + else if(netdata_thread_nested_disables == 1) { + int old = 1; + int ret = pthread_setcancelstate(netdata_thread_first_cancelability, &old); + if(ret != 0) + error("THREAD_CANCELABILITY: pthread_setcancelstate() on thread %s returned error %d", netdata_thread_tag(), ret); + else { + if(old != PTHREAD_CANCEL_DISABLE) + error("THREAD_CANCELABILITY: netdata_thread_enable_cancelability(): old thread cancelability on thread %s was changed, expected DISABLED (%d), found %s (%d) - please report this!", netdata_thread_tag(), PTHREAD_CANCEL_DISABLE, (old == PTHREAD_CANCEL_ENABLE)?"ENABLED":"UNKNOWN", old); + } + + netdata_thread_nested_disables = 0; + } + else + netdata_thread_nested_disables--; +} + +// ---------------------------------------------------------------------------- +// mutex + +int __netdata_mutex_init(netdata_mutex_t *mutex) { + int ret = pthread_mutex_init(mutex, NULL); + if(unlikely(ret != 0)) + error("MUTEX_LOCK: failed to initialize (code %d).", ret); + return ret; +} + +int __netdata_mutex_destroy(netdata_mutex_t *mutex) { + int ret = pthread_mutex_destroy(mutex); + if(unlikely(ret != 0)) + error("MUTEX_LOCK: failed to destroy (code %d).", ret); + return ret; +} + +int __netdata_mutex_lock(netdata_mutex_t *mutex) { + netdata_thread_disable_cancelability(); + + int ret = pthread_mutex_lock(mutex); + if(unlikely(ret != 0)) { + netdata_thread_enable_cancelability(); + error("MUTEX_LOCK: failed to get lock (code %d)", ret); + } + else + netdata_locks_acquired_mutexes++; + + return ret; +} + +int __netdata_mutex_trylock(netdata_mutex_t *mutex) { + netdata_thread_disable_cancelability(); + + int ret = pthread_mutex_trylock(mutex); + if(ret != 0) + netdata_thread_enable_cancelability(); + else + netdata_locks_acquired_mutexes++; + + return ret; +} + +int __netdata_mutex_unlock(netdata_mutex_t *mutex) { + int ret = pthread_mutex_unlock(mutex); + if(unlikely(ret != 0)) + error("MUTEX_LOCK: failed to unlock (code %d).", ret); + else { + netdata_locks_acquired_mutexes--; + netdata_thread_enable_cancelability(); + } + + return ret; +} + +#ifdef NETDATA_TRACE_RWLOCKS + +#warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT + +int netdata_mutex_init_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_init(%p) from %lu@%s, %s()", mutex, line, file, function); + + int ret = __netdata_mutex_init(mutex); + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_init(%p) = %d, from %lu@%s, %s()", mutex, ret, line, file, function); + + return ret; +} + +int netdata_mutex_destroy_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_destroy(%p) from %lu@%s, %s()", mutex, line, file, function); + + int ret = __netdata_mutex_destroy(mutex); + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_destroy(%p) = %d, from %lu@%s, %s()", mutex, ret, line, file, function); + + return ret; +} + +int netdata_mutex_lock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_lock(%p) from %lu@%s, %s()", mutex, line, file, function); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_mutex_lock(mutex); + usec_t end_s = now_monotonic_high_precision_usec(); + + // remove compiler unused variables warning + (void)start_s; + (void)end_s; + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_lock(%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_mutex_trylock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_trylock(%p) from %lu@%s, %s()", mutex, line, file, function); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_mutex_trylock(mutex); + usec_t end_s = now_monotonic_high_precision_usec(); + + // remove compiler unused variables warning + (void)start_s; + (void)end_s; + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_trylock(%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_mutex_unlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_unlock(%p) from %lu@%s, %s()", mutex, line, file, function); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_mutex_unlock(mutex); + usec_t end_s = now_monotonic_high_precision_usec(); + + // remove compiler unused variables warning + (void)start_s; + (void)end_s; + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_unlock(%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, end_s - start_s, line, file, function); + + return ret; +} + +#endif // NETDATA_TRACE_RWLOCKS + +// ---------------------------------------------------------------------------- +// rwlock + +int __netdata_rwlock_destroy(netdata_rwlock_t *rwlock) { + int ret = pthread_rwlock_destroy(&rwlock->rwlock_t); + if(unlikely(ret != 0)) + error("RW_LOCK: failed to destroy lock (code %d)", ret); + return ret; +} + +int __netdata_rwlock_init(netdata_rwlock_t *rwlock) { + int ret = pthread_rwlock_init(&rwlock->rwlock_t, NULL); + if(unlikely(ret != 0)) + error("RW_LOCK: failed to initialize lock (code %d)", ret); + return ret; +} + +int __netdata_rwlock_rdlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_rdlock(&rwlock->rwlock_t); + if(unlikely(ret != 0)) { + netdata_thread_enable_cancelability(); + error("RW_LOCK: failed to obtain read lock (code %d)", ret); + } + else + netdata_locks_acquired_rwlocks++; + + return ret; +} + +int __netdata_rwlock_wrlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_wrlock(&rwlock->rwlock_t); + if(unlikely(ret != 0)) { + error("RW_LOCK: failed to obtain write lock (code %d)", ret); + netdata_thread_enable_cancelability(); + } + else + netdata_locks_acquired_rwlocks++; + + return ret; +} + +int __netdata_rwlock_unlock(netdata_rwlock_t *rwlock) { + int ret = pthread_rwlock_unlock(&rwlock->rwlock_t); + if(unlikely(ret != 0)) + error("RW_LOCK: failed to release lock (code %d)", ret); + else { + netdata_thread_enable_cancelability(); + netdata_locks_acquired_rwlocks--; + } + + return ret; +} + +int __netdata_rwlock_tryrdlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_tryrdlock(&rwlock->rwlock_t); + if(ret != 0) + netdata_thread_enable_cancelability(); + else + netdata_locks_acquired_rwlocks++; + + return ret; +} + +int __netdata_rwlock_trywrlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_trywrlock(&rwlock->rwlock_t); + if(ret != 0) + netdata_thread_enable_cancelability(); + else + netdata_locks_acquired_rwlocks++; + + return ret; +} + +// ---------------------------------------------------------------------------- +// spinlock implementation +// https://www.youtube.com/watch?v=rmGJc9PXpuE&t=41s + +void netdata_spinlock_init(SPINLOCK *spinlock) { + *spinlock = NETDATA_SPINLOCK_INITIALIZER; +} + +void netdata_spinlock_lock(SPINLOCK *spinlock) { + static const struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 }; + + netdata_thread_disable_cancelability(); + + for(int i = 1; + __atomic_load_n(&spinlock->locked, __ATOMIC_RELAXED) || + __atomic_test_and_set(&spinlock->locked, __ATOMIC_ACQUIRE) + ; i++ + ) { + if(unlikely(i == 8)) { + i = 0; + nanosleep(&ns, NULL); + } + } + // we have the lock +} + +void netdata_spinlock_unlock(SPINLOCK *spinlock) { + __atomic_clear(&spinlock->locked, __ATOMIC_RELEASE); + netdata_thread_enable_cancelability(); +} + +#ifdef NETDATA_TRACE_RWLOCKS + +// ---------------------------------------------------------------------------- +// lockers list + +void not_supported_by_posix_rwlocks(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, char locktype, const char *reason) { + __netdata_mutex_lock(&rwlock->lockers_mutex); + fprintf(stderr, + "RW_LOCK FATAL ON LOCK %p: %d '%s' (function %s() %lu@%s) attempts to acquire a '%c' lock, but it is not supported by POSIX because: %s. At this attempt, the task is holding %zu rwlocks and %zu mutexes. There are %zu readers and %zu writers holding this lock:\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + locktype, + reason, + netdata_locks_acquired_rwlocks, netdata_locks_acquired_mutexes, + rwlock->readers, rwlock->writers); + + int i; + usec_t now = now_monotonic_high_precision_usec(); + netdata_rwlock_locker *p; + for(i = 1, p = rwlock->lockers; p ;p = p->next, i++) { + fprintf(stderr, + " => %i: RW_LOCK %p: process %d '%s' (function %s() %lu@%s) is having %zu '%c' lock for %llu usec.\n", + i, rwlock, + p->pid, p->tag, + p->function, p->line, p->file, + p->callers, p->lock, + (now - p->start_s)); + } + __netdata_mutex_unlock(&rwlock->lockers_mutex); +} + +static void log_rwlock_lockers(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, const char *reason, char locktype) { + + // this function can only be used by one thread at a time + // because otherwise, the threads may deadlock waiting for each other + static netdata_mutex_t log_lockers_mutex = NETDATA_MUTEX_INITIALIZER; + __netdata_mutex_lock(&log_lockers_mutex); + + // now work on this locker + __netdata_mutex_lock(&rwlock->lockers_mutex); + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d '%s' (function %s() %lu@%s) %s a '%c' lock (while holding %zu rwlocks and %zu mutexes). There are %zu readers and %zu writers holding this lock:\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + reason, locktype, + netdata_locks_acquired_rwlocks, netdata_locks_acquired_mutexes, + rwlock->readers, rwlock->writers); + + int i; + usec_t now = now_monotonic_high_precision_usec(); + netdata_rwlock_locker *p; + for(i = 1, p = rwlock->lockers; p ;p = p->next, i++) { + fprintf(stderr, + " => %i: RW_LOCK %p: process %d '%s' (function %s() %lu@%s) is having %zu '%c' lock for %llu usec.\n", + i, rwlock, + p->pid, p->tag, + p->function, p->line, p->file, + p->callers, p->lock, + (now - p->start_s)); + + if(p->all_caller_locks) { + // find the lock in the netdata_thread_locks[] + // and remove it + int k; + for(k = 0; k < NETDATA_THREAD_LOCKS_ARRAY_SIZE ;k++) { + if (p->all_caller_locks[k] && p->all_caller_locks[k] != rwlock) { + + // lock the other lock lockers list + __netdata_mutex_lock(&p->all_caller_locks[k]->lockers_mutex); + + // print the list of lockers of the other lock + netdata_rwlock_locker *r; + int j; + for(j = 1, r = p->all_caller_locks[k]->lockers; r ;r = r->next, j++) { + fprintf( + stderr, + " ~~~> %i: RW_LOCK %p: process %d '%s' (function %s() %lu@%s) is having %zu '%c' lock for %llu usec.\n", + j, + p->all_caller_locks[k], + r->pid, + r->tag, + r->function, + r->line, + r->file, + r->callers, + r->lock, + (now - r->start_s)); + } + + // unlock the other lock lockers list + __netdata_mutex_unlock(&p->all_caller_locks[k]->lockers_mutex); + } + } + } + + } + __netdata_mutex_unlock(&rwlock->lockers_mutex); + + // unlock this function for other threads + __netdata_mutex_unlock(&log_lockers_mutex); +} + +static netdata_rwlock_locker *add_rwlock_locker(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, char lock_type) { + netdata_rwlock_locker *p = mallocz(sizeof(netdata_rwlock_locker)); + p->pid = gettid(); + p->tag = netdata_thread_tag(); + p->lock = lock_type; + p->file = file; + p->function = function; + p->line = line; + p->callers = 1; + p->all_caller_locks = netdata_thread_locks; + p->start_s = now_monotonic_high_precision_usec(); + + // find a slot in the netdata_thread_locks[] + int i; + for(i = 0; i < NETDATA_THREAD_LOCKS_ARRAY_SIZE ;i++) { + if (!netdata_thread_locks[i]) { + netdata_thread_locks[i] = rwlock; + break; + } + } + + __netdata_mutex_lock(&rwlock->lockers_mutex); + p->next = rwlock->lockers; + rwlock->lockers = p; + if(lock_type == 'R') rwlock->readers++; + if(lock_type == 'W') rwlock->writers++; + __netdata_mutex_unlock(&rwlock->lockers_mutex); + + return p; +} + +static void remove_rwlock_locker(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock, netdata_rwlock_locker *locker) { + usec_t end_s = now_monotonic_high_precision_usec(); + + if(locker->callers == 0) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) callers should be positive but it is zero\n", + rwlock, + locker->pid, locker->tag, + locker->function, locker->line, locker->file); + + if(locker->callers > 1 && locker->lock != 'R') + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) only 'R' locks support multiple holders, but here we have %zu callers holding a '%c' lock.\n", + rwlock, + locker->pid, locker->tag, + locker->function, locker->line, locker->file, + locker->callers, locker->lock); + + __netdata_mutex_lock(&rwlock->lockers_mutex); + locker->callers--; + + if(!locker->callers) { + int doit = 0; + + if (rwlock->lockers == locker) { + rwlock->lockers = locker->next; + doit = 1; + } else { + netdata_rwlock_locker *p; + for (p = rwlock->lockers; p && p->next != locker; p = p->next) + ; + if (p && p->next == locker) { + p->next = locker->next; + doit = 1; + } + } + if(doit) { + if(locker->lock == 'R') rwlock->readers--; + if(locker->lock == 'W') rwlock->writers--; + } + + if(!doit) { + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) with %zu x '%c' lock is not found.\n", + rwlock, + locker->pid, locker->tag, + locker->function, locker->line, locker->file, + locker->callers, locker->lock); + } + else { + // find the lock in the netdata_thread_locks[] + // and remove it + int i; + for(i = 0; i < NETDATA_THREAD_LOCKS_ARRAY_SIZE ;i++) { + if (netdata_thread_locks[i] == rwlock) + netdata_thread_locks[i] = NULL; + } + + if(end_s - locker->start_s >= NETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) holded a '%c' for %llu usec.\n", + rwlock, + locker->pid, locker->tag, + locker->function, locker->line, locker->file, + locker->lock, end_s - locker->start_s); + + freez(locker); + } + } + + __netdata_mutex_unlock(&rwlock->lockers_mutex); +} + +static netdata_rwlock_locker *find_rwlock_locker(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + pid_t pid = gettid(); + netdata_rwlock_locker *p; + + __netdata_mutex_lock(&rwlock->lockers_mutex); + for(p = rwlock->lockers; p ;p = p->next) { + if(p->pid == pid) break; + } + __netdata_mutex_unlock(&rwlock->lockers_mutex); + + return p; +} + +static netdata_rwlock_locker *update_or_add_rwlock_locker(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, netdata_rwlock_locker *locker, char locktype) { + if(!locker) { + return add_rwlock_locker(file, function, line, rwlock, locktype); + } + else if(locker->lock == 'R' && locktype == 'R') { + __netdata_mutex_lock(&rwlock->lockers_mutex); + locker->callers++; + __netdata_mutex_unlock(&rwlock->lockers_mutex); + return locker; + } + else { + not_supported_by_posix_rwlocks(file, function, line, rwlock, locktype, "DEADLOCK - WANTS TO CHANGE LOCK TYPE BUT ALREADY HAS THIS LOCKED"); + return locker; + } +} + +// ---------------------------------------------------------------------------- +// debug versions of rwlock + +int netdata_rwlock_destroy_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(%p) from %lu@%s, %s()", rwlock, line, file, function); + + if(rwlock->readers) + error("RW_LOCK: destroying a rwlock with %zu readers in it", rwlock->readers); + if(rwlock->writers) + error("RW_LOCK: destroying a rwlock with %zu writers in it", rwlock->writers); + + int ret = __netdata_rwlock_destroy(rwlock); + if(!ret) { + while (rwlock->lockers) + remove_rwlock_locker(file, function, line, rwlock, rwlock->lockers); + + if (rwlock->readers) + error("RW_LOCK: internal error - empty rwlock with %zu readers in it", rwlock->readers); + if (rwlock->writers) + error("RW_LOCK: internal error - empty rwlock with %zu writers in it", rwlock->writers); + } + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(%p) = %d, from %lu@%s, %s()", rwlock, ret, line, file, function); + + return ret; +} + +int netdata_rwlock_init_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(%p) from %lu@%s, %s()", rwlock, line, file, function); + + int ret = __netdata_rwlock_init(rwlock); + if(!ret) { + __netdata_mutex_init(&rwlock->lockers_mutex); + rwlock->lockers = NULL; + rwlock->readers = 0; + rwlock->writers = 0; + } + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(%p) = %d, from %lu@%s, %s()", rwlock, ret, line, file, function); + + return ret; +} + +int netdata_rwlock_rdlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + +#ifdef NETDATA_TRACE_RWLOCKS_LOG_NESTED + if(locker && locker->lock == 'R') { + log_rwlock_lockers(file, function, line, rwlock, "NESTED READ LOCK REQUEST", 'R'); + } +#endif // NETDATA_TRACE_RWLOCKS_LOG_NESTED + + int log = 0; + if(rwlock->writers) { + log_rwlock_lockers(file, function, line, rwlock, "WANTS", 'R'); + log = 1; + } + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_rdlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(!ret) { + locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'R'); + if(log) log_rwlock_lockers(file, function, line, rwlock, "GOT", 'R'); + + } + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED for a READ lock for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_rwlock_wrlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + if(locker) + not_supported_by_posix_rwlocks(file, function, line, rwlock, 'W', "DEADLOCK - WANTS A WRITE LOCK BUT ALREADY HAVE THIS LOCKED"); + + int log = 0; + if(rwlock->readers) { + log_rwlock_lockers(file, function, line, rwlock, "WANTS", 'W'); + log = 1; + } + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_wrlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(!ret){ + locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'W'); + if(log) log_rwlock_lockers(file, function, line, rwlock, "GOT", 'W'); + } + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED for a WRITE lock for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_rwlock_unlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + if(unlikely(!locker)) + not_supported_by_posix_rwlocks(file, function, line, rwlock, 'U', "UNLOCK WITHOUT LOCK"); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_unlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED to UNLOCK for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + if(likely(!ret && locker)) remove_rwlock_locker(file, function, line, rwlock, locker); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_rwlock_tryrdlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + if(locker && locker->lock == 'W') + not_supported_by_posix_rwlocks(file, function, line, rwlock, 'R', "DEADLOCK - WANTS A READ LOCK BUT IT HAS A WRITE LOCK ALREADY"); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_tryrdlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(!ret) + locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'R'); + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED to TRYREAD for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_rwlock_trywrlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + if(locker) + not_supported_by_posix_rwlocks(file, function, line, rwlock, 'W', "ALREADY HAS THIS LOCK"); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_trywrlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(!ret) + locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'W'); + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED to TRYWRITE for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +#endif // NETDATA_TRACE_RWLOCKS diff --git a/libnetdata/locks/locks.h b/libnetdata/locks/locks.h new file mode 100644 index 0000000..4d2d165 --- /dev/null +++ b/libnetdata/locks/locks.h @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LOCKS_H +#define NETDATA_LOCKS_H 1 + +#include "../libnetdata.h" +#include "../clocks/clocks.h" + +typedef pthread_mutex_t netdata_mutex_t; +#define NETDATA_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER + +typedef struct netdata_spinlock { + bool locked; +} SPINLOCK; +#define NETDATA_SPINLOCK_INITIALIZER (SPINLOCK){ .locked = false } +void netdata_spinlock_init(SPINLOCK *spinlock); +void netdata_spinlock_lock(SPINLOCK *spinlock); +void netdata_spinlock_unlock(SPINLOCK *spinlock); + +#ifdef NETDATA_TRACE_RWLOCKS +typedef struct netdata_rwlock_locker { + pid_t pid; + const char *tag; + char lock; // 'R', 'W' + const char *file; + const char *function; + unsigned long line; + size_t callers; + usec_t start_s; + struct netdata_rwlock_t **all_caller_locks; + struct netdata_rwlock_locker *next; +} netdata_rwlock_locker; + +typedef struct netdata_rwlock_t { + pthread_rwlock_t rwlock_t; // the lock + size_t readers; // the number of reader on the lock + size_t writers; // the number of writers on the lock + netdata_mutex_t lockers_mutex; // a mutex to protect the linked list of the lock holding threads + netdata_rwlock_locker *lockers; // the linked list of the lock holding threads +} netdata_rwlock_t; + +#define NETDATA_RWLOCK_INITIALIZER { \ + .rwlock_t = PTHREAD_RWLOCK_INITIALIZER, \ + .readers = 0, \ + .writers = 0, \ + .lockers_mutex = NETDATA_MUTEX_INITIALIZER, \ + .lockers = NULL \ + } + +#else // NETDATA_TRACE_RWLOCKS + +typedef struct netdata_rwlock_t { + pthread_rwlock_t rwlock_t; +} netdata_rwlock_t; + +#define NETDATA_RWLOCK_INITIALIZER { \ + .rwlock_t = PTHREAD_RWLOCK_INITIALIZER \ + } + +#endif // NETDATA_TRACE_RWLOCKS + +int __netdata_mutex_init(netdata_mutex_t *mutex); +int __netdata_mutex_destroy(netdata_mutex_t *mutex); +int __netdata_mutex_lock(netdata_mutex_t *mutex); +int __netdata_mutex_trylock(netdata_mutex_t *mutex); +int __netdata_mutex_unlock(netdata_mutex_t *mutex); + +int __netdata_rwlock_destroy(netdata_rwlock_t *rwlock); +int __netdata_rwlock_init(netdata_rwlock_t *rwlock); +int __netdata_rwlock_rdlock(netdata_rwlock_t *rwlock); +int __netdata_rwlock_wrlock(netdata_rwlock_t *rwlock); +int __netdata_rwlock_unlock(netdata_rwlock_t *rwlock); +int __netdata_rwlock_tryrdlock(netdata_rwlock_t *rwlock); +int __netdata_rwlock_trywrlock(netdata_rwlock_t *rwlock); + +void netdata_thread_disable_cancelability(void); +void netdata_thread_enable_cancelability(void); + +#ifdef NETDATA_TRACE_RWLOCKS + +int netdata_mutex_init_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +int netdata_mutex_destroy_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +int netdata_mutex_lock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +int netdata_mutex_trylock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +int netdata_mutex_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); + +int netdata_rwlock_destroy_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_init_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_rdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_wrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_tryrdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_trywrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); + +#define netdata_mutex_init(mutex) netdata_mutex_init_debug(__FILE__, __FUNCTION__, __LINE__, mutex) +#define netdata_mutex_destroy(mutex) netdata_mutex_init_debug(__FILE__, __FUNCTION__, __LINE__, mutex) +#define netdata_mutex_lock(mutex) netdata_mutex_lock_debug(__FILE__, __FUNCTION__, __LINE__, mutex) +#define netdata_mutex_trylock(mutex) netdata_mutex_trylock_debug(__FILE__, __FUNCTION__, __LINE__, mutex) +#define netdata_mutex_unlock(mutex) netdata_mutex_unlock_debug(__FILE__, __FUNCTION__, __LINE__, mutex) + +#define netdata_rwlock_destroy(rwlock) netdata_rwlock_destroy_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_init(rwlock) netdata_rwlock_init_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_rdlock(rwlock) netdata_rwlock_rdlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_wrlock(rwlock) netdata_rwlock_wrlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_unlock(rwlock) netdata_rwlock_unlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_tryrdlock(rwlock) netdata_rwlock_tryrdlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_trywrlock(rwlock) netdata_rwlock_trywrlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) + +#else // !NETDATA_TRACE_RWLOCKS + +#define netdata_mutex_init(mutex) __netdata_mutex_init(mutex) +#define netdata_mutex_destroy(mutex) __netdata_mutex_destroy(mutex) +#define netdata_mutex_lock(mutex) __netdata_mutex_lock(mutex) +#define netdata_mutex_trylock(mutex) __netdata_mutex_trylock(mutex) +#define netdata_mutex_unlock(mutex) __netdata_mutex_unlock(mutex) + +#define netdata_rwlock_destroy(rwlock) __netdata_rwlock_destroy(rwlock) +#define netdata_rwlock_init(rwlock) __netdata_rwlock_init(rwlock) +#define netdata_rwlock_rdlock(rwlock) __netdata_rwlock_rdlock(rwlock) +#define netdata_rwlock_wrlock(rwlock) __netdata_rwlock_wrlock(rwlock) +#define netdata_rwlock_unlock(rwlock) __netdata_rwlock_unlock(rwlock) +#define netdata_rwlock_tryrdlock(rwlock) __netdata_rwlock_tryrdlock(rwlock) +#define netdata_rwlock_trywrlock(rwlock) __netdata_rwlock_trywrlock(rwlock) + +#endif // NETDATA_TRACE_RWLOCKS + +#endif //NETDATA_LOCKS_H diff --git a/libnetdata/log/Makefile.am b/libnetdata/log/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/log/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/log/README.md b/libnetdata/log/README.md new file mode 100644 index 0000000..a767dd4 --- /dev/null +++ b/libnetdata/log/README.md @@ -0,0 +1,5 @@ + + + diff --git a/libnetdata/log/log.c b/libnetdata/log/log.c new file mode 100644 index 0000000..fb3b2d0 --- /dev/null +++ b/libnetdata/log/log.c @@ -0,0 +1,1084 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include +#include "../libnetdata.h" + +#ifdef HAVE_BACKTRACE +#include +#endif + +int web_server_is_multithreaded = 1; + +const char *program_name = ""; +uint64_t debug_flags = 0; + +int access_log_syslog = 1; +int error_log_syslog = 1; +int output_log_syslog = 1; // debug log +int health_log_syslog = 1; + +int stdaccess_fd = -1; +FILE *stdaccess = NULL; + +int stdhealth_fd = -1; +FILE *stdhealth = NULL; + +const char *stdaccess_filename = NULL; +const char *stderr_filename = NULL; +const char *stdout_filename = NULL; +const char *facility_log = NULL; +const char *stdhealth_filename = NULL; + +#ifdef ENABLE_ACLK +const char *aclklog_filename = NULL; +int aclklog_fd = -1; +FILE *aclklog = NULL; +int aclklog_syslog = 1; +int aclklog_enabled = 0; +#endif + +// ---------------------------------------------------------------------------- +// Log facility(https://tools.ietf.org/html/rfc5424) +// +// The facilities accepted in the Netdata are in according with the following +// header files for their respective operating system: +// sys/syslog.h (Linux ) +// sys/sys/syslog.h (FreeBSD) +// bsd/sys/syslog.h (darwin-xnu) + +#define LOG_AUTH_KEY "auth" +#define LOG_AUTHPRIV_KEY "authpriv" +#ifdef __FreeBSD__ +# define LOG_CONSOLE_KEY "console" +#endif +#define LOG_CRON_KEY "cron" +#define LOG_DAEMON_KEY "daemon" +#define LOG_FTP_KEY "ftp" +#ifdef __APPLE__ +# define LOG_INSTALL_KEY "install" +#endif +#define LOG_KERN_KEY "kern" +#define LOG_LPR_KEY "lpr" +#define LOG_MAIL_KEY "mail" +//#define LOG_INTERNAL_MARK_KEY "mark" +#ifdef __APPLE__ +# define LOG_NETINFO_KEY "netinfo" +# define LOG_RAS_KEY "ras" +# define LOG_REMOTEAUTH_KEY "remoteauth" +#endif +#define LOG_NEWS_KEY "news" +#ifdef __FreeBSD__ +# define LOG_NTP_KEY "ntp" +#endif +#define LOG_SECURITY_KEY "security" +#define LOG_SYSLOG_KEY "syslog" +#define LOG_USER_KEY "user" +#define LOG_UUCP_KEY "uucp" +#ifdef __APPLE__ +# define LOG_LAUNCHD_KEY "launchd" +#endif +#define LOG_LOCAL0_KEY "local0" +#define LOG_LOCAL1_KEY "local1" +#define LOG_LOCAL2_KEY "local2" +#define LOG_LOCAL3_KEY "local3" +#define LOG_LOCAL4_KEY "local4" +#define LOG_LOCAL5_KEY "local5" +#define LOG_LOCAL6_KEY "local6" +#define LOG_LOCAL7_KEY "local7" + +static int log_facility_id(const char *facility_name) +{ + static int + hash_auth = 0, + hash_authpriv = 0, +#ifdef __FreeBSD__ + hash_console = 0, +#endif + hash_cron = 0, + hash_daemon = 0, + hash_ftp = 0, +#ifdef __APPLE__ + hash_install = 0, +#endif + hash_kern = 0, + hash_lpr = 0, + hash_mail = 0, +// hash_mark = 0, +#ifdef __APPLE__ + hash_netinfo = 0, + hash_ras = 0, + hash_remoteauth = 0, +#endif + hash_news = 0, +#ifdef __FreeBSD__ + hash_ntp = 0, +#endif + hash_security = 0, + hash_syslog = 0, + hash_user = 0, + hash_uucp = 0, +#ifdef __APPLE__ + hash_launchd = 0, +#endif + hash_local0 = 0, + hash_local1 = 0, + hash_local2 = 0, + hash_local3 = 0, + hash_local4 = 0, + hash_local5 = 0, + hash_local6 = 0, + hash_local7 = 0; + + if(unlikely(!hash_auth)) + { + hash_auth = simple_hash(LOG_AUTH_KEY); + hash_authpriv = simple_hash(LOG_AUTHPRIV_KEY); +#ifdef __FreeBSD__ + hash_console = simple_hash(LOG_CONSOLE_KEY); +#endif + hash_cron = simple_hash(LOG_CRON_KEY); + hash_daemon = simple_hash(LOG_DAEMON_KEY); + hash_ftp = simple_hash(LOG_FTP_KEY); +#ifdef __APPLE__ + hash_install = simple_hash(LOG_INSTALL_KEY); +#endif + hash_kern = simple_hash(LOG_KERN_KEY); + hash_lpr = simple_hash(LOG_LPR_KEY); + hash_mail = simple_hash(LOG_MAIL_KEY); +// hash_mark = simple_uhash(); +#ifdef __APPLE__ + hash_netinfo = simple_hash(LOG_NETINFO_KEY); + hash_ras = simple_hash(LOG_RAS_KEY); + hash_remoteauth = simple_hash(LOG_REMOTEAUTH_KEY); +#endif + hash_news = simple_hash(LOG_NEWS_KEY); +#ifdef __FreeBSD__ + hash_ntp = simple_hash(LOG_NTP_KEY); +#endif + hash_security = simple_hash(LOG_SECURITY_KEY); + hash_syslog = simple_hash(LOG_SYSLOG_KEY); + hash_user = simple_hash(LOG_USER_KEY); + hash_uucp = simple_hash(LOG_UUCP_KEY); +#ifdef __APPLE__ + hash_launchd = simple_hash(LOG_LAUNCHD_KEY); +#endif + hash_local0 = simple_hash(LOG_LOCAL0_KEY); + hash_local1 = simple_hash(LOG_LOCAL1_KEY); + hash_local2 = simple_hash(LOG_LOCAL2_KEY); + hash_local3 = simple_hash(LOG_LOCAL3_KEY); + hash_local4 = simple_hash(LOG_LOCAL4_KEY); + hash_local5 = simple_hash(LOG_LOCAL5_KEY); + hash_local6 = simple_hash(LOG_LOCAL6_KEY); + hash_local7 = simple_hash(LOG_LOCAL7_KEY); + } + + int hash = simple_hash(facility_name); + if ( hash == hash_auth ) + { + return LOG_AUTH; + } + else if ( hash == hash_authpriv ) + { + return LOG_AUTHPRIV; + } +#ifdef __FreeBSD__ + else if ( hash == hash_console ) + { + return LOG_CONSOLE; + } +#endif + else if ( hash == hash_cron ) + { + return LOG_CRON; + } + else if ( hash == hash_daemon ) + { + return LOG_DAEMON; + } + else if ( hash == hash_ftp ) + { + return LOG_FTP; + } +#ifdef __APPLE__ + else if ( hash == hash_install ) + { + return LOG_INSTALL; + } +#endif + else if ( hash == hash_kern ) + { + return LOG_KERN; + } + else if ( hash == hash_lpr ) + { + return LOG_LPR; + } + else if ( hash == hash_mail ) + { + return LOG_MAIL; + } + /* + else if ( hash == hash_mark ) + { + //this is internal for all OS + return INTERNAL_MARK; + } + */ +#ifdef __APPLE__ + else if ( hash == hash_netinfo ) + { + return LOG_NETINFO; + } + else if ( hash == hash_ras ) + { + return LOG_RAS; + } + else if ( hash == hash_remoteauth ) + { + return LOG_REMOTEAUTH; + } +#endif + else if ( hash == hash_news ) + { + return LOG_NEWS; + } +#ifdef __FreeBSD__ + else if ( hash == hash_ntp ) + { + return LOG_NTP; + } +#endif + else if ( hash == hash_security ) + { + //FreeBSD is the unique that does not consider + //this facility deprecated. We are keeping + //it for other OS while they are kept in their headers. +#ifdef __FreeBSD__ + return LOG_SECURITY; +#else + return LOG_AUTH; +#endif + } + else if ( hash == hash_syslog ) + { + return LOG_SYSLOG; + } + else if ( hash == hash_user ) + { + return LOG_USER; + } + else if ( hash == hash_uucp ) + { + return LOG_UUCP; + } + else if ( hash == hash_local0 ) + { + return LOG_LOCAL0; + } + else if ( hash == hash_local1 ) + { + return LOG_LOCAL1; + } + else if ( hash == hash_local2 ) + { + return LOG_LOCAL2; + } + else if ( hash == hash_local3 ) + { + return LOG_LOCAL3; + } + else if ( hash == hash_local4 ) + { + return LOG_LOCAL4; + } + else if ( hash == hash_local5 ) + { + return LOG_LOCAL5; + } + else if ( hash == hash_local6 ) + { + return LOG_LOCAL6; + } + else if ( hash == hash_local7 ) + { + return LOG_LOCAL7; + } +#ifdef __APPLE__ + else if ( hash == hash_launchd ) + { + return LOG_LAUNCHD; + } +#endif + + return LOG_DAEMON; +} + +//we do not need to use this now, but I already created this function to be +//used case necessary. +/* +char *log_facility_name(int code) +{ + char *defvalue = { "daemon" }; + switch(code) + { + case LOG_AUTH: + { + return "auth"; + } + case LOG_AUTHPRIV: + { + return "authpriv"; + } +#ifdef __FreeBSD__ + case LOG_CONSOLE: + { + return "console"; + } +#endif + case LOG_CRON: + { + return "cron"; + } + case LOG_DAEMON: + { + return defvalue; + } + case LOG_FTP: + { + return "ftp"; + } +#ifdef __APPLE__ + case LOG_INSTALL: + { + return "install"; + } +#endif + case LOG_KERN: + { + return "kern"; + } + case LOG_LPR: + { + return "lpr"; + } + case LOG_MAIL: + { + return "mail"; + } +#ifdef __APPLE__ + case LOG_NETINFO: + { + return "netinfo" ; + } + case LOG_RAS: + { + return "ras"; + } + case LOG_REMOTEAUTH: + { + return "remoteauth"; + } +#endif + case LOG_NEWS: + { + return "news"; + } +#ifdef __FreeBSD__ + case LOG_NTP: + { + return "ntp" ; + } + case LOG_SECURITY: + { + return "security"; + } +#endif + case LOG_SYSLOG: + { + return "syslog"; + } + case LOG_USER: + { + return "user"; + } + case LOG_UUCP: + { + return "uucp"; + } + case LOG_LOCAL0: + { + return "local0"; + } + case LOG_LOCAL1: + { + return "local1"; + } + case LOG_LOCAL2: + { + return "local2"; + } + case LOG_LOCAL3: + { + return "local3"; + } + case LOG_LOCAL4: + { + return "local4" ; + } + case LOG_LOCAL5: + { + return "local5"; + } + case LOG_LOCAL6: + { + return "local6"; + } + case LOG_LOCAL7: + { + return "local7" ; + } +#ifdef __APPLE__ + case LOG_LAUNCHD: + { + return "launchd"; + } +#endif + } + + return defvalue; +} +*/ + +// ---------------------------------------------------------------------------- + +void syslog_init() { + static int i = 0; + + if(!i) { + openlog(program_name, LOG_PID,log_facility_id(facility_log)); + i = 1; + } +} + +void log_date(char *buffer, size_t len, time_t now) { + if(unlikely(!buffer || !len)) + return; + + time_t t = now; + struct tm *tmp, tmbuf; + + tmp = localtime_r(&t, &tmbuf); + + if (tmp == NULL) { + buffer[0] = '\0'; + return; + } + + if (unlikely(strftime(buffer, len, "%Y-%m-%d %H:%M:%S", tmp) == 0)) + buffer[0] = '\0'; + + buffer[len - 1] = '\0'; +} + +static netdata_mutex_t log_mutex = NETDATA_MUTEX_INITIALIZER; +static inline void log_lock() { + netdata_mutex_lock(&log_mutex); +} +static inline void log_unlock() { + netdata_mutex_unlock(&log_mutex); +} + +static FILE *open_log_file(int fd, FILE *fp, const char *filename, int *enabled_syslog, int is_stdaccess, int *fd_ptr) { + int f, devnull = 0; + + if(!filename || !*filename || !strcmp(filename, "none") || !strcmp(filename, "/dev/null")) { + filename = "/dev/null"; + devnull = 1; + } + + if(!strcmp(filename, "syslog")) { + filename = "/dev/null"; + devnull = 1; + + syslog_init(); + if(enabled_syslog) *enabled_syslog = 1; + } + else if(enabled_syslog) *enabled_syslog = 0; + + // don't do anything if the user is willing + // to have the standard one + if(!strcmp(filename, "system")) { + if(fd != -1 && !is_stdaccess) { + if(fd_ptr) *fd_ptr = fd; + return fp; + } + + filename = "stderr"; + } + + if(!strcmp(filename, "stdout")) + f = STDOUT_FILENO; + + else if(!strcmp(filename, "stderr")) + f = STDERR_FILENO; + + else { + f = open(filename, O_WRONLY | O_APPEND | O_CREAT, 0664); + if(f == -1) { + error("Cannot open file '%s'. Leaving %d to its default.", filename, fd); + if(fd_ptr) *fd_ptr = fd; + return fp; + } + } + + // if there is a level-2 file pointer + // flush it before switching the level-1 fds + if(fp) + fflush(fp); + + if(devnull && is_stdaccess) { + fd = -1; + fp = NULL; + } + + if(fd != f && fd != -1) { + // it automatically closes + int t = dup2(f, fd); + if (t == -1) { + error("Cannot dup2() new fd %d to old fd %d for '%s'", f, fd, filename); + close(f); + if(fd_ptr) *fd_ptr = fd; + return fp; + } + // info("dup2() new fd %d to old fd %d for '%s'", f, fd, filename); + close(f); + } + else fd = f; + + if(!fp) { + fp = fdopen(fd, "a"); + if (!fp) + error("Cannot fdopen() fd %d ('%s')", fd, filename); + else { + if (setvbuf(fp, NULL, _IOLBF, 0) != 0) + error("Cannot set line buffering on fd %d ('%s')", fd, filename); + } + } + + if(fd_ptr) *fd_ptr = fd; + return fp; +} + +void reopen_all_log_files() { + if(stdout_filename) + open_log_file(STDOUT_FILENO, stdout, stdout_filename, &output_log_syslog, 0, NULL); + + if(stderr_filename) + open_log_file(STDERR_FILENO, stderr, stderr_filename, &error_log_syslog, 0, NULL); + +#ifdef ENABLE_ACLK + if (aclklog_enabled) + aclklog = open_log_file(aclklog_fd, aclklog, aclklog_filename, NULL, 0, &aclklog_fd); +#endif + + if(stdaccess_filename) + stdaccess = open_log_file(stdaccess_fd, stdaccess, stdaccess_filename, &access_log_syslog, 1, &stdaccess_fd); + + if(stdhealth_filename) + stdhealth = open_log_file(stdhealth_fd, stdhealth, stdhealth_filename, &health_log_syslog, 1, &stdhealth_fd); +} + +void open_all_log_files() { + // disable stdin + open_log_file(STDIN_FILENO, stdin, "/dev/null", NULL, 0, NULL); + + open_log_file(STDOUT_FILENO, stdout, stdout_filename, &output_log_syslog, 0, NULL); + open_log_file(STDERR_FILENO, stderr, stderr_filename, &error_log_syslog, 0, NULL); + +#ifdef ENABLE_ACLK + if(aclklog_enabled) + aclklog = open_log_file(aclklog_fd, aclklog, aclklog_filename, NULL, 0, &aclklog_fd); +#endif + + stdaccess = open_log_file(stdaccess_fd, stdaccess, stdaccess_filename, &access_log_syslog, 1, &stdaccess_fd); + + stdhealth = open_log_file(stdhealth_fd, stdhealth, stdhealth_filename, &health_log_syslog, 1, &stdhealth_fd); +} + +// ---------------------------------------------------------------------------- +// error log throttling + +time_t error_log_throttle_period = 1200; +unsigned long error_log_errors_per_period = 200; +unsigned long error_log_errors_per_period_backup = 0; + +int error_log_limit(int reset) { + static time_t start = 0; + static unsigned long counter = 0, prevented = 0; + + // fprintf(stderr, "FLOOD: counter=%lu, allowed=%lu, backup=%lu, period=%llu\n", counter, error_log_errors_per_period, error_log_errors_per_period_backup, (unsigned long long)error_log_throttle_period); + + // do not throttle if the period is 0 + if(error_log_throttle_period == 0) + return 0; + + // prevent all logs if the errors per period is 0 + if(error_log_errors_per_period == 0) +#ifdef NETDATA_INTERNAL_CHECKS + return 0; +#else + return 1; +#endif + + time_t now = now_monotonic_sec(); + if(!start) start = now; + + if(reset) { + if(prevented) { + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + fprintf( + stderr, + "%s: %s LOG FLOOD PROTECTION reset for process '%s' " + "(prevented %lu logs in the last %"PRId64" seconds).\n", + date, + program_name, + program_name, + prevented, + (int64_t)(now - start)); + } + + start = now; + counter = 0; + prevented = 0; + } + + // detect if we log too much + counter++; + + if(now - start > error_log_throttle_period) { + if(prevented) { + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + fprintf( + stderr, + "%s: %s LOG FLOOD PROTECTION resuming logging from process '%s' " + "(prevented %lu logs in the last %"PRId64" seconds).\n", + date, + program_name, + program_name, + prevented, + (int64_t)error_log_throttle_period); + } + + // restart the period accounting + start = now; + counter = 1; + prevented = 0; + + // log this error + return 0; + } + + if(counter > error_log_errors_per_period) { + if(!prevented) { + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + fprintf( + stderr, + "%s: %s LOG FLOOD PROTECTION too many logs (%lu logs in %"PRId64" seconds, threshold is set to %lu logs " + "in %"PRId64" seconds). Preventing more logs from process '%s' for %"PRId64" seconds.\n", + date, + program_name, + counter, + (int64_t)(now - start), + error_log_errors_per_period, + (int64_t)error_log_throttle_period, + program_name, + (int64_t)(start + error_log_throttle_period - now)); + } + + prevented++; + + // prevent logging this error +#ifdef NETDATA_INTERNAL_CHECKS + return 0; +#else + return 1; +#endif + } + + return 0; +} + +void error_log_limit_reset(void) { + log_lock(); + + error_log_errors_per_period = error_log_errors_per_period_backup; + error_log_limit(1); + + log_unlock(); +} + +void error_log_limit_unlimited(void) { + log_lock(); + + error_log_errors_per_period = error_log_errors_per_period_backup; + error_log_limit(1); + + error_log_errors_per_period = ((error_log_errors_per_period_backup * 10) < 10000) ? 10000 : (error_log_errors_per_period_backup * 10); + + log_unlock(); +} + +// ---------------------------------------------------------------------------- +// debug log + +void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) { + va_list args; + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + + va_start( args, fmt ); + printf("%s: %s DEBUG : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function); + vprintf(fmt, args); + va_end( args ); + putchar('\n'); + + if(output_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_ERR, fmt, args ); + va_end( args ); + } + + fflush(stdout); +} + +// ---------------------------------------------------------------------------- +// info log + +void info_int( const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) +{ + va_list args; + + log_lock(); + + // prevent logging too much + if (error_log_limit(0)) { + log_unlock(); + return; + } + + if(error_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_INFO, fmt, args ); + va_end( args ); + } + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + + va_start( args, fmt ); +#ifdef NETDATA_INTERNAL_CHECKS + fprintf(stderr, "%s: %s INFO : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function); +#else + fprintf(stderr, "%s: %s INFO : %s : ", date, program_name, netdata_thread_tag()); +#endif + vfprintf( stderr, fmt, args ); + va_end( args ); + + fputc('\n', stderr); + + log_unlock(); +} + +// ---------------------------------------------------------------------------- +// error log + +#if defined(STRERROR_R_CHAR_P) +// GLIBC version of strerror_r +static const char *strerror_result(const char *a, const char *b) { (void)b; return a; } +#elif defined(HAVE_STRERROR_R) +// POSIX version of strerror_r +static const char *strerror_result(int a, const char *b) { (void)a; return b; } +#elif defined(HAVE_C__GENERIC) + +// what a trick! +// http://stackoverflow.com/questions/479207/function-overloading-in-c +static const char *strerror_result_int(int a, const char *b) { (void)a; return b; } +static const char *strerror_result_string(const char *a, const char *b) { (void)b; return a; } + +#define strerror_result(a, b) _Generic((a), \ + int: strerror_result_int, \ + char *: strerror_result_string \ + )(a, b) + +#else +#error "cannot detect the format of function strerror_r()" +#endif + +void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { + if(erl->sleep_ut) + sleep_usec(erl->sleep_ut); + + // save a copy of errno - just in case this function generates a new error + int __errno = errno; + + va_list args; + + log_lock(); + + erl->count++; + time_t now = now_boottime_sec(); + if(now - erl->last_logged < erl->log_every) { + log_unlock(); + return; + } + + // prevent logging too much + if (error_log_limit(0)) { + log_unlock(); + return; + } + + if(error_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_ERR, fmt, args ); + va_end( args ); + } + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + + va_start( args, fmt ); +#ifdef NETDATA_INTERNAL_CHECKS + fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); +#else + fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); +#endif + vfprintf( stderr, fmt, args ); + va_end( args ); + + if(erl->count > 1) + fprintf(stderr, " (similar messages repeated %zu times in the last %llu secs)", erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0)); + + if(erl->sleep_ut) + fprintf(stderr, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut); + + if(__errno) { + char buf[1024]; + fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + errno = 0; + } + else + fputc('\n', stderr); + + erl->last_logged = now; + erl->count = 0; + + log_unlock(); +} + +void error_int(const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { + // save a copy of errno - just in case this function generates a new error + int __errno = errno; + + va_list args; + + log_lock(); + + // prevent logging too much + if (error_log_limit(0)) { + log_unlock(); + return; + } + + if(error_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_ERR, fmt, args ); + va_end( args ); + } + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + + va_start( args, fmt ); +#ifdef NETDATA_INTERNAL_CHECKS + fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); +#else + fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); +#endif + vfprintf( stderr, fmt, args ); + va_end( args ); + + if(__errno) { + char buf[1024]; + fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + errno = 0; + } + else + fputc('\n', stderr); + + log_unlock(); +} + +#ifdef NETDATA_INTERNAL_CHECKS +static void crash_netdata(void) { + // make Netdata core dump + abort(); +} +#endif + +#ifdef HAVE_BACKTRACE +#define BT_BUF_SIZE 100 +static void print_call_stack(void) { + int nptrs; + void *buffer[BT_BUF_SIZE]; + + nptrs = backtrace(buffer, BT_BUF_SIZE); + if(nptrs) + backtrace_symbols_fd(buffer, nptrs, fileno(stderr)); +} +#endif + +void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) { + // save a copy of errno - just in case this function generates a new error + int __errno = errno; + va_list args; + const char *thread_tag; + char os_threadname[NETDATA_THREAD_NAME_MAX + 1]; + + if(error_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_CRIT, fmt, args ); + va_end( args ); + } + + thread_tag = netdata_thread_tag(); + if (!netdata_thread_tag_exists()) { + os_thread_get_current_name_np(os_threadname); + if ('\0' != os_threadname[0]) { /* If it is not an empty string replace "MAIN" thread_tag */ + thread_tag = os_threadname; + } + } + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + + log_lock(); + + va_start( args, fmt ); +#ifdef NETDATA_INTERNAL_CHECKS + fprintf(stderr, "%s: %s FATAL : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, thread_tag, line, file, function); +#else + fprintf(stderr, "%s: %s FATAL : %s : ", date, program_name, thread_tag); +#endif + vfprintf( stderr, fmt, args ); + va_end( args ); + + perror(" # "); + fputc('\n', stderr); + + log_unlock(); + + char action_data[70+1]; + snprintfz(action_data, 70, "%04lu@%-10.10s:%-15.15s/%d", line, file, function, __errno); + char action_result[60+1]; + + snprintfz(action_result, 60, "%s:%s", program_name, strncmp(thread_tag, "STREAM_RECEIVER", strlen("STREAM_RECEIVER")) ? thread_tag : "[x]"); + send_statistics("FATAL", action_result, action_data); + +#ifdef HAVE_BACKTRACE + print_call_stack(); +#endif + +#ifdef NETDATA_INTERNAL_CHECKS + crash_netdata(); +#endif + + netdata_cleanup_and_exit(1); +} + +// ---------------------------------------------------------------------------- +// access log + +void log_access( const char *fmt, ... ) { + va_list args; + + if(access_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_INFO, fmt, args ); + va_end( args ); + } + + if(stdaccess) { + static netdata_mutex_t access_mutex = NETDATA_MUTEX_INITIALIZER; + + if(web_server_is_multithreaded) + netdata_mutex_lock(&access_mutex); + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + fprintf(stdaccess, "%s: ", date); + + va_start( args, fmt ); + vfprintf( stdaccess, fmt, args ); + va_end( args ); + fputc('\n', stdaccess); + + if(web_server_is_multithreaded) + netdata_mutex_unlock(&access_mutex); + } +} + +// ---------------------------------------------------------------------------- +// health log + +void log_health( const char *fmt, ... ) { + va_list args; + + if(health_log_syslog) { + va_start( args, fmt ); + vsyslog(LOG_INFO, fmt, args ); + va_end( args ); + } + + if(stdhealth) { + static netdata_mutex_t health_mutex = NETDATA_MUTEX_INITIALIZER; + + if(web_server_is_multithreaded) + netdata_mutex_lock(&health_mutex); + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + fprintf(stdhealth, "%s: ", date); + + va_start( args, fmt ); + vfprintf( stdhealth, fmt, args ); + va_end( args ); + fputc('\n', stdhealth); + + if(web_server_is_multithreaded) + netdata_mutex_unlock(&health_mutex); + } +} + +#ifdef ENABLE_ACLK +void log_aclk_message_bin( const char *data, const size_t data_len, int tx, const char *mqtt_topic, const char *message_name) { + if (aclklog) { + static netdata_mutex_t aclklog_mutex = NETDATA_MUTEX_INITIALIZER; + netdata_mutex_lock(&aclklog_mutex); + + char date[LOG_DATE_LENGTH]; + log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); + fprintf(aclklog, "%s: %s Msg:\"%s\", MQTT-topic:\"%s\": ", date, tx ? "OUTGOING" : "INCOMING", message_name, mqtt_topic); + + fwrite(data, data_len, 1, aclklog); + + fputc('\n', aclklog); + + netdata_mutex_unlock(&aclklog_mutex); + } +} +#endif diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h new file mode 100644 index 0000000..11dab4c --- /dev/null +++ b/libnetdata/log/log.h @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LOG_H +#define NETDATA_LOG_H 1 + +# ifdef __cplusplus +extern "C" { +# endif + +#include "../libnetdata.h" + +#define D_WEB_BUFFER 0x0000000000000001 +#define D_WEB_CLIENT 0x0000000000000002 +#define D_LISTENER 0x0000000000000004 +#define D_WEB_DATA 0x0000000000000008 +#define D_OPTIONS 0x0000000000000010 +#define D_PROCNETDEV_LOOP 0x0000000000000020 +#define D_RRD_STATS 0x0000000000000040 +#define D_WEB_CLIENT_ACCESS 0x0000000000000080 +#define D_TC_LOOP 0x0000000000000100 +#define D_DEFLATE 0x0000000000000200 +#define D_CONFIG 0x0000000000000400 +#define D_PLUGINSD 0x0000000000000800 +#define D_CHILDS 0x0000000000001000 +#define D_EXIT 0x0000000000002000 +#define D_CHECKS 0x0000000000004000 +#define D_NFACCT_LOOP 0x0000000000008000 +#define D_PROCFILE 0x0000000000010000 +#define D_RRD_CALLS 0x0000000000020000 +#define D_DICTIONARY 0x0000000000040000 +#define D_MEMORY 0x0000000000080000 +#define D_CGROUP 0x0000000000100000 +#define D_REGISTRY 0x0000000000200000 +#define D_VARIABLES 0x0000000000400000 +#define D_HEALTH 0x0000000000800000 +#define D_CONNECT_TO 0x0000000001000000 +#define D_RRDHOST 0x0000000002000000 +#define D_LOCKS 0x0000000004000000 +#define D_EXPORTING 0x0000000008000000 +#define D_STATSD 0x0000000010000000 +#define D_POLLFD 0x0000000020000000 +#define D_STREAM 0x0000000040000000 +#define D_ANALYTICS 0x0000000080000000 +#define D_RRDENGINE 0x0000000100000000 +#define D_ACLK 0x0000000200000000 +#define D_METADATALOG 0x0000000400000000 +#define D_ACLK_SYNC 0x0000000800000000 +#define D_META_SYNC 0x0000001000000000 +#define D_REPLICATION 0x0000002000000000 +#define D_SYSTEM 0x8000000000000000 + +extern int web_server_is_multithreaded; + +extern uint64_t debug_flags; + +extern const char *program_name; + +extern int stdaccess_fd; +extern FILE *stdaccess; + +extern int stdhealth_fd; +extern FILE *stdhealth; + +extern const char *stdaccess_filename; +extern const char *stderr_filename; +extern const char *stdout_filename; +extern const char *stdhealth_filename; +extern const char *facility_log; + +#ifdef ENABLE_ACLK +extern const char *aclklog_filename; +extern int aclklog_fd; +extern FILE *aclklog; +extern int aclklog_enabled; +#endif + +extern int access_log_syslog; +extern int error_log_syslog; +extern int output_log_syslog; +extern int health_log_syslog; + +extern time_t error_log_throttle_period; +extern unsigned long error_log_errors_per_period, error_log_errors_per_period_backup; +int error_log_limit(int reset); + +void open_all_log_files(); +void reopen_all_log_files(); + +#define LOG_DATE_LENGTH 26 +void log_date(char *buffer, size_t len, time_t now); + +static inline void debug_dummy(void) {} + +void error_log_limit_reset(void); +void error_log_limit_unlimited(void); + +typedef struct error_with_limit { + time_t log_every; + size_t count; + time_t last_logged; + usec_t sleep_ut; +} ERROR_LIMIT; + +#define error_limit_static_global_var(var, log_every_secs, sleep_usecs) static ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) } +#define error_limit_static_thread_var(var, log_every_secs, sleep_usecs) static __thread ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) } + +#ifdef NETDATA_INTERNAL_CHECKS +#define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) +#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0) +#define internal_fatal(condition, args...) do { if(unlikely(condition)) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) +#else +#define debug(type, args...) debug_dummy() +#define internal_error(args...) debug_dummy() +#define internal_fatal(args...) debug_dummy() +#endif + +#define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args) +#define infoerr(args...) error_int("INFO", __FILE__, __FUNCTION__, __LINE__, ##args) +#define error(args...) error_int("ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define error_limit(erl, args...) error_limit_int(erl, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define fatal(args...) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args) +#define fatal_assert(expr) ((expr) ? (void)(0) : fatal_int(__FILE__, __FUNCTION__, __LINE__, "Assertion `%s' failed", #expr)) + +void send_statistics(const char *action, const char *action_result, const char *action_data); +void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); +void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); +void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); +void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, unsigned long line __maybe_unused, const char *fmt, ... ) PRINTFLIKE(6, 7);; +void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5); +void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2); +void log_health( const char *fmt, ... ) PRINTFLIKE(1, 2); + +#ifdef ENABLE_ACLK +void log_aclk_message_bin( const char *data, const size_t data_len, int tx, const char *mqtt_topic, const char *message_name); +#endif + +# ifdef __cplusplus +} +# endif + +#endif /* NETDATA_LOG_H */ diff --git a/libnetdata/onewayalloc/Makefile.am b/libnetdata/onewayalloc/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/onewayalloc/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/onewayalloc/README.md b/libnetdata/onewayalloc/README.md new file mode 100644 index 0000000..1f459c2 --- /dev/null +++ b/libnetdata/onewayalloc/README.md @@ -0,0 +1,71 @@ + + +# One Way Allocator + +This is a very fast single-threaded-only memory allocator, that minimized system calls +when a lot of memory allocations needs to be made to perform a task, which all of them +can be freed together when the task finishes. + +It has been designed to be used for netdata context queries. + +For netdata to perform a context query, it builds a virtual chart, a chart that contains +all the dimensions of the charts having the same context. This process requires allocating +several structures for each of the dimensions to attach them to the virtual chart. All +these data can be freed immediately after the query finishes. + +## How it works + +1. The caller calls `ONEWAYALLOC *owa = onewayalloc_create(sizehint)` to create an OWA. + Internally this allocates the first memory buffer with size >= `sizehint`. + If `sizehint` is zero, it will allocate 1 hardware page (usually 4kb). + No need to check for success or failure. As with `mallocz()` in netdata, a `fatal()` + will be called if the allocation fails - although this will never fail, since Linux + does not really check if there is memory available for `mmap()` calls. + +2. The caller can then perform any number of the following calls to acquire memory: + - `onewayalloc_mallocz(owa, size)`, similar to `mallocz()` + - `onewayalloc_callocz(owa, nmemb, size)`, similar to `callocz()` + - `onewayalloc_strdupz(owa, string)`, similar to `strdupz()` + - `onewayalloc_memdupz(owa, ptr, size)`, similar to `mallocz()` and then `memcpy()` + +3. Once the caller has done all the work with the allocated buffers, all memory allocated + can be freed with `onewayalloc_destroy(owa)`. + +## How faster it is? + +On modern hardware, for any single query the performance improvement is marginal and not +noticeable at all. + +We performed the following tests using the same huge context query (1000 charts, +100 dimensions each = 100k dimensions) + +1. using `mallocz()`, 1 caller, 256 queries (sequential) +2. using `mallocz()`, 256 callers, 1 query each (parallel) +3. using `OWA`, 1 caller, 256 queries (sequential) +4. using `OWA`, 256 callers, 1 query each (parallel) + +Netdata was configured to use 24 web threads on the 24 core server we used. + +The results are as follows: + +### sequential test + +branch|transactions|time to complete|transaction rate|average response time|min response time|max response time +:---:|:---:|:---:|:---:|:---:|:---:|:---:| +`malloc()`|256|322.35s|0.79/sec|1.26s|1.01s|1.87s +`OWA`|256|310.19s|0.83/sec|1.21s|1.04s|1.63s + +For a single query, the improvement is just marginal and not noticeable at all. + +### parallel test + +branch|transactions|time to complete|transaction rate|average response time|min response time|max response time +:---:|:---:|:---:|:---:|:---:|:---:|:---:| +`malloc()`|256|84.72s|3.02/sec|68.43s|50.20s|84.71s +`OWA`|256|39.35s|6.51/sec|34.48s|20.55s|39.34s + +For parallel workload, like the one executed by netdata.cloud, `OWA` provides a 54% overall speed improvement (more than double the overall +user-experienced speed, including the data query itself). diff --git a/libnetdata/onewayalloc/onewayalloc.c b/libnetdata/onewayalloc/onewayalloc.c new file mode 100644 index 0000000..59c3b68 --- /dev/null +++ b/libnetdata/onewayalloc/onewayalloc.c @@ -0,0 +1,193 @@ +#include "onewayalloc.h" + +// https://www.gnu.org/software/libc/manual/html_node/Aligned-Memory-Blocks.html +#define OWA_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2) + +typedef struct owa_page { + size_t stats_pages; + size_t stats_pages_size; + size_t stats_mallocs_made; + size_t stats_mallocs_size; + size_t size; // the total size of the page + size_t offset; // the first free byte of the page + struct owa_page *next; // the next page on the list + struct owa_page *last; // the last page on the list - we currently allocate on this +} OWA_PAGE; + +// allocations need to be aligned to CPU register width +// https://en.wikipedia.org/wiki/Data_structure_alignment +static inline size_t natural_alignment(size_t size) { + if(unlikely(size % OWA_NATURAL_ALIGNMENT)) + size = size + OWA_NATURAL_ALIGNMENT - (size % OWA_NATURAL_ALIGNMENT); + + return size; +} + +// Create an OWA +// Once it is created, the called may call the onewayalloc_mallocz() +// any number of times, for any amount of memory. + +static OWA_PAGE *onewayalloc_create_internal(OWA_PAGE *head, size_t size_hint) { + static size_t OWA_NATURAL_PAGE_SIZE = 0; + + if(unlikely(!OWA_NATURAL_PAGE_SIZE)) { + long int page_size = sysconf(_SC_PAGE_SIZE); + if (unlikely(page_size == -1)) + OWA_NATURAL_PAGE_SIZE = 4096; + else + OWA_NATURAL_PAGE_SIZE = page_size; + } + + // our default page size + size_t size = OWA_NATURAL_PAGE_SIZE; + + // make sure the new page will fit both the requested size + // and the OWA_PAGE structure at its beginning + size_hint += natural_alignment(sizeof(OWA_PAGE)); + + // prefer the user size if it is bigger than our size + if(size_hint > size) size = size_hint; + + // try to allocate half of the total we have allocated already + if(likely(head)) { + size_t optimal_size = head->stats_pages_size / 2; + if(optimal_size > size) size = optimal_size; + } + + // Make sure our allocations are always a multiple of the hardware page size + if(size % OWA_NATURAL_PAGE_SIZE) size = size + OWA_NATURAL_PAGE_SIZE - (size % OWA_NATURAL_PAGE_SIZE); + + // OWA_PAGE *page = (OWA_PAGE *)netdata_mmap(NULL, size, MAP_ANONYMOUS|MAP_PRIVATE, 0); + // if(unlikely(!page)) fatal("Cannot allocate onewayalloc buffer of size %zu", size); + OWA_PAGE *page = (OWA_PAGE *)mallocz(size); + + page->size = size; + page->offset = natural_alignment(sizeof(OWA_PAGE)); + page->next = page->last = NULL; + + if(unlikely(!head)) { + // this is the first time we are called + head = page; + head->stats_pages = 0; + head->stats_pages_size = 0; + head->stats_mallocs_made = 0; + head->stats_mallocs_size = 0; + } + else { + // link this page into our existing linked list + head->last->next = page; + } + + head->last = page; + head->stats_pages++; + head->stats_pages_size += size; + + return page; +} + +ONEWAYALLOC *onewayalloc_create(size_t size_hint) { + return (ONEWAYALLOC *)onewayalloc_create_internal(NULL, size_hint); +} + +void *onewayalloc_mallocz(ONEWAYALLOC *owa, size_t size) { + OWA_PAGE *head = (OWA_PAGE *)owa; + OWA_PAGE *page = head->last; + + // update stats + head->stats_mallocs_made++; + head->stats_mallocs_size += size; + + // make sure the size is aligned + size = natural_alignment(size); + + if(unlikely(page->size - page->offset < size)) { + // we don't have enough space to fit the data + // let's get another page + page = onewayalloc_create_internal(head, (size > page->size)?size:page->size); + } + + char *mem = (char *)page; + mem = &mem[page->offset]; + page->offset += size; + + return (void *)mem; +} + +void *onewayalloc_callocz(ONEWAYALLOC *owa, size_t nmemb, size_t size) { + size_t total = nmemb * size; + void *mem = onewayalloc_mallocz(owa, total); + memset(mem, 0, total); + return mem; +} + +char *onewayalloc_strdupz(ONEWAYALLOC *owa, const char *s) { + size_t size = strlen(s) + 1; + char *d = onewayalloc_mallocz((OWA_PAGE *)owa, size); + memcpy(d, s, size); + return d; +} + +void *onewayalloc_memdupz(ONEWAYALLOC *owa, const void *src, size_t size) { + void *mem = onewayalloc_mallocz((OWA_PAGE *)owa, size); + // memcpy() is way faster than strcpy() since it does not check for '\0' + memcpy(mem, src, size); + return mem; +} + +void onewayalloc_freez(ONEWAYALLOC *owa __maybe_unused, const void *ptr __maybe_unused) { +#ifdef NETDATA_INTERNAL_CHECKS + // allow the caller to call us for a mallocz() allocation + // so try to find it in our memory and if it is not there + // log an error + + if (unlikely(!ptr)) + return; + + OWA_PAGE *head = (OWA_PAGE *)owa; + OWA_PAGE *page; + uintptr_t seeking = (uintptr_t)ptr; + + for(page = head; page ;page = page->next) { + uintptr_t start = (uintptr_t)page; + uintptr_t end = start + page->size; + + if(seeking >= start && seeking <= end) { + // found it - it is ours + // just return to let the caller think we actually did something + return; + } + } + + // not found - it is not ours + // let's free it with the system allocator + error("ONEWAYALLOC: request to free address 0x%p that is not allocated by this OWA", ptr); +#endif + + return; +} + +void *onewayalloc_doublesize(ONEWAYALLOC *owa, const void *src, size_t oldsize) { + size_t newsize = oldsize * 2; + void *dst = onewayalloc_mallocz(owa, newsize); + memcpy(dst, src, oldsize); + onewayalloc_freez(owa, src); + return dst; +} + +void onewayalloc_destroy(ONEWAYALLOC *owa) { + if(!owa) return; + + OWA_PAGE *head = (OWA_PAGE *)owa; + + //info("OWA: %zu allocations of %zu total bytes, in %zu pages of %zu total bytes", + // head->stats_mallocs_made, head->stats_mallocs_size, + // head->stats_pages, head->stats_pages_size); + + OWA_PAGE *page = head; + while(page) { + OWA_PAGE *p = page; + page = page->next; + // munmap(p, p->size); + freez(p); + } +} diff --git a/libnetdata/onewayalloc/onewayalloc.h b/libnetdata/onewayalloc/onewayalloc.h new file mode 100644 index 0000000..e536e05 --- /dev/null +++ b/libnetdata/onewayalloc/onewayalloc.h @@ -0,0 +1,19 @@ +#ifndef ONEWAYALLOC_H +#define ONEWAYALLOC_H 1 + +#include "../libnetdata.h" + +typedef void ONEWAYALLOC; + +ONEWAYALLOC *onewayalloc_create(size_t size_hint); +void onewayalloc_destroy(ONEWAYALLOC *owa); + +void *onewayalloc_mallocz(ONEWAYALLOC *owa, size_t size); +void *onewayalloc_callocz(ONEWAYALLOC *owa, size_t nmemb, size_t size); +char *onewayalloc_strdupz(ONEWAYALLOC *owa, const char *s); +void *onewayalloc_memdupz(ONEWAYALLOC *owa, const void *src, size_t size); +void onewayalloc_freez(ONEWAYALLOC *owa, const void *ptr); + +void *onewayalloc_doublesize(ONEWAYALLOC *owa, const void *src, size_t oldsize); + +#endif // ONEWAYALLOC_H diff --git a/libnetdata/os.c b/libnetdata/os.c new file mode 100644 index 0000000..196288a --- /dev/null +++ b/libnetdata/os.c @@ -0,0 +1,230 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "os.h" + +// ---------------------------------------------------------------------------- +// system functions +// to retrieve settings of the system + +int processors = 1; +long get_system_cpus(void) { + processors = 1; + +#ifdef __APPLE__ + int32_t tmp_processors; + + if (unlikely(GETSYSCTL_BY_NAME("hw.logicalcpu", tmp_processors))) { + error("Assuming system has %d processors.", processors); + } else { + processors = tmp_processors; + } + + return processors; +#elif __FreeBSD__ + int32_t tmp_processors; + + if (unlikely(GETSYSCTL_BY_NAME("hw.ncpu", tmp_processors))) { + error("Assuming system has %d processors.", processors); + } else { + processors = tmp_processors; + } + + return processors; +#else + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/stat", netdata_configured_host_prefix); + + procfile *ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT); + if(!ff) { + error("Cannot open file '%s'. Assuming system has %d processors.", filename, processors); + return processors; + } + + ff = procfile_readall(ff); + if(!ff) { + error("Cannot open file '%s'. Assuming system has %d processors.", filename, processors); + return processors; + } + + processors = 0; + unsigned int i; + for(i = 0; i < procfile_lines(ff); i++) { + if(!procfile_linewords(ff, i)) continue; + + if(strncmp(procfile_lineword(ff, i, 0), "cpu", 3) == 0) processors++; + } + processors--; + if(processors < 1) processors = 1; + + procfile_close(ff); + + debug(D_SYSTEM, "System has %d processors.", processors); + return processors; + +#endif /* __APPLE__, __FreeBSD__ */ +} + +pid_t pid_max = 32768; +pid_t get_system_pid_max(void) { +#ifdef __APPLE__ + // As we currently do not know a solution to query pid_max from the os + // we use the number defined in bsd/sys/proc_internal.h in XNU sources + pid_max = 99999; + return pid_max; +#elif __FreeBSD__ + int32_t tmp_pid_max; + + if (unlikely(GETSYSCTL_BY_NAME("kern.pid_max", tmp_pid_max))) { + pid_max = 99999; + error("Assuming system's maximum pid is %d.", pid_max); + } else { + pid_max = tmp_pid_max; + } + + return pid_max; +#else + + static char read = 0; + if(unlikely(read)) return pid_max; + read = 1; + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", netdata_configured_host_prefix); + + unsigned long long max = 0; + if(read_single_number_file(filename, &max) != 0) { + error("Cannot open file '%s'. Assuming system supports %d pids.", filename, pid_max); + return pid_max; + } + + if(!max) { + error("Cannot parse file '%s'. Assuming system supports %d pids.", filename, pid_max); + return pid_max; + } + + pid_max = (pid_t) max; + return pid_max; + +#endif /* __APPLE__, __FreeBSD__ */ +} + +unsigned int system_hz; +void get_system_HZ(void) { + long ticks; + + if ((ticks = sysconf(_SC_CLK_TCK)) == -1) { + error("Cannot get system clock ticks"); + } + + system_hz = (unsigned int) ticks; +} + +// ===================================================================================================================== +// FreeBSD + +#if __FreeBSD__ + +const char *os_type = "freebsd"; + +int getsysctl_by_name(const char *name, void *ptr, size_t len) { + size_t nlen = len; + + if (unlikely(sysctlbyname(name, ptr, &nlen, NULL, 0) == -1)) { + error("FREEBSD: sysctl(%s...) failed: %s", name, strerror(errno)); + return 1; + } + if (unlikely(nlen != len)) { + error("FREEBSD: sysctl(%s...) expected %lu, got %lu", name, (unsigned long)len, (unsigned long)nlen); + return 1; + } + return 0; +} + +int getsysctl_simple(const char *name, int *mib, size_t miblen, void *ptr, size_t len) { + size_t nlen = len; + + if (unlikely(!mib[0])) + if (unlikely(getsysctl_mib(name, mib, miblen))) + return 1; + + if (unlikely(sysctl(mib, miblen, ptr, &nlen, NULL, 0) == -1)) { + error("FREEBSD: sysctl(%s...) failed: %s", name, strerror(errno)); + return 1; + } + if (unlikely(nlen != len)) { + error("FREEBSD: sysctl(%s...) expected %lu, got %lu", name, (unsigned long)len, (unsigned long)nlen); + return 1; + } + + return 0; +} + +int getsysctl(const char *name, int *mib, size_t miblen, void *ptr, size_t *len) { + size_t nlen = *len; + + if (unlikely(!mib[0])) + if (unlikely(getsysctl_mib(name, mib, miblen))) + return 1; + + if (unlikely(sysctl(mib, miblen, ptr, len, NULL, 0) == -1)) { + error("FREEBSD: sysctl(%s...) failed: %s", name, strerror(errno)); + return 1; + } + if (unlikely(ptr != NULL && nlen != *len)) { + error("FREEBSD: sysctl(%s...) expected %lu, got %lu", name, (unsigned long)*len, (unsigned long)nlen); + return 1; + } + + return 0; +} + +int getsysctl_mib(const char *name, int *mib, size_t len) { + size_t nlen = len; + + if (unlikely(sysctlnametomib(name, mib, &nlen) == -1)) { + error("FREEBSD: sysctl(%s...) failed: %s", name, strerror(errno)); + return 1; + } + if (unlikely(nlen != len)) { + error("FREEBSD: sysctl(%s...) expected %lu, got %lu", name, (unsigned long)len, (unsigned long)nlen); + return 1; + } + return 0; +} + + +#endif + + +// ===================================================================================================================== +// MacOS + +#if __APPLE__ + +const char *os_type = "macos"; + +int getsysctl_by_name(const char *name, void *ptr, size_t len) { + size_t nlen = len; + + if (unlikely(sysctlbyname(name, ptr, &nlen, NULL, 0) == -1)) { + error("MACOS: sysctl(%s...) failed: %s", name, strerror(errno)); + return 1; + } + if (unlikely(nlen != len)) { + error("MACOS: sysctl(%s...) expected %lu, got %lu", name, (unsigned long)len, (unsigned long)nlen); + return 1; + } + return 0; +} + +#endif + +// ===================================================================================================================== +// Linux + +#if __linux__ + +const char *os_type = "linux"; + +#endif diff --git a/libnetdata/os.h b/libnetdata/os.h new file mode 100644 index 0000000..67abf0b --- /dev/null +++ b/libnetdata/os.h @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_OS_H +#define NETDATA_OS_H + +#include "libnetdata.h" + +// ===================================================================================================================== +// FreeBSD + +#if __FreeBSD__ + +#include + +#define GETSYSCTL_BY_NAME(name, var) getsysctl_by_name(name, &(var), sizeof(var)) +int getsysctl_by_name(const char *name, void *ptr, size_t len); + +#define GETSYSCTL_MIB(name, mib) getsysctl_mib(name, mib, sizeof(mib)/sizeof(int)) + +int getsysctl_mib(const char *name, int *mib, size_t len); + +#define GETSYSCTL_SIMPLE(name, mib, var) getsysctl_simple(name, mib, sizeof(mib)/sizeof(int), &(var), sizeof(var)) +#define GETSYSCTL_WSIZE(name, mib, var, size) getsysctl_simple(name, mib, sizeof(mib)/sizeof(int), var, size) + +int getsysctl_simple(const char *name, int *mib, size_t miblen, void *ptr, size_t len); + +#define GETSYSCTL_SIZE(name, mib, size) getsysctl(name, mib, sizeof(mib)/sizeof(int), NULL, &(size)) +#define GETSYSCTL(name, mib, var, size) getsysctl(name, mib, sizeof(mib)/sizeof(int), &(var), &(size)) + +int getsysctl(const char *name, int *mib, size_t miblen, void *ptr, size_t *len); + +#endif + +// ===================================================================================================================== +// MacOS + +#if __APPLE__ + +#include + +#define GETSYSCTL_BY_NAME(name, var) getsysctl_by_name(name, &(var), sizeof(var)) +int getsysctl_by_name(const char *name, void *ptr, size_t len); + +#endif + +// ===================================================================================================================== +// common defs for Apple/FreeBSD/Linux + +extern const char *os_type; + +extern int processors; +long get_system_cpus(void); + +extern pid_t pid_max; +pid_t get_system_pid_max(void); + +extern unsigned int system_hz; +void get_system_HZ(void); + +#include +#if defined(__FreeBSD__) || defined(__APPLE__) +#define ADJUST_TIMEX(x) ntp_adjtime(x) +#else +#define ADJUST_TIMEX(x) adjtimex(x) +#endif + +#endif //NETDATA_OS_H diff --git a/libnetdata/popen/Makefile.am b/libnetdata/popen/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/popen/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/popen/README.md b/libnetdata/popen/README.md new file mode 100644 index 0000000..db4aefa --- /dev/null +++ b/libnetdata/popen/README.md @@ -0,0 +1,5 @@ + + + diff --git a/libnetdata/popen/popen.c b/libnetdata/popen/popen.c new file mode 100644 index 0000000..57f957f --- /dev/null +++ b/libnetdata/popen/popen.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +// ---------------------------------------------------------------------------- +// popen with tracking + +static pthread_mutex_t netdata_popen_tracking_mutex; +static bool netdata_popen_tracking_enabled = false; + +struct netdata_popen { + pid_t pid; + struct netdata_popen *next; + struct netdata_popen *prev; +}; + +static struct netdata_popen *netdata_popen_root = NULL; + +// myp_add_lock takes the lock if we're tracking. +static void netdata_popen_tracking_lock(void) { + if(!netdata_popen_tracking_enabled) + return; + + netdata_mutex_lock(&netdata_popen_tracking_mutex); +} + +// myp_add_unlock release the lock if we're tracking. +static void netdata_popen_tracking_unlock(void) { + if(!netdata_popen_tracking_enabled) + return; + + netdata_mutex_unlock(&netdata_popen_tracking_mutex); +} + +// myp_add_locked adds pid if we're tracking. +// myp_add_lock must have been called previously. +static void netdata_popen_tracking_add_pid_unsafe(pid_t pid) { + if(!netdata_popen_tracking_enabled) + return; + + struct netdata_popen *mp; + + mp = mallocz(sizeof(struct netdata_popen)); + mp->pid = pid; + + DOUBLE_LINKED_LIST_PREPEND_UNSAFE(netdata_popen_root, mp, prev, next); +} + +// myp_del deletes pid if we're tracking. +static void netdata_popen_tracking_del_pid(pid_t pid) { + if(!netdata_popen_tracking_enabled) + return; + + struct netdata_popen *mp; + + netdata_mutex_lock(&netdata_popen_tracking_mutex); + + DOUBLE_LINKED_LIST_FOREACH_FORWARD(netdata_popen_root, mp, prev, next) { + if(unlikely(mp->pid == pid)) + break; + } + + if(mp) { + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(netdata_popen_root, mp, prev, next); + freez(mp); + } + else + error("Cannot find pid %d.", pid); + + netdata_mutex_unlock(&netdata_popen_tracking_mutex); +} + +// netdata_popen_tracking_init() should be called by apps which act as init +// (pid 1) so that processes created by mypopen and mypopene +// are tracked. This enables the reaper to ignore processes +// which will be handled internally, by calling myp_reap, to +// avoid issues with already reaped processes during wait calls. +// +// Callers should call myp_free() to clean up resources. +void netdata_popen_tracking_init(void) { + info("process tracking enabled."); + netdata_popen_tracking_enabled = true; + + if (netdata_mutex_init(&netdata_popen_tracking_mutex) != 0) + fatal("netdata_popen_tracking_init() mutex init failed."); +} + +// myp_free cleans up any resources allocated for process +// tracking. +void netdata_popen_tracking_cleanup(void) { + if(!netdata_popen_tracking_enabled) + return; + + netdata_mutex_lock(&netdata_popen_tracking_mutex); + netdata_popen_tracking_enabled = false; + + while(netdata_popen_root) { + struct netdata_popen *mp = netdata_popen_root; + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(netdata_popen_root, mp, prev, next); + freez(mp); + } + + netdata_mutex_unlock(&netdata_popen_tracking_mutex); +} + +// myp_reap returns 1 if pid should be reaped, 0 otherwise. +int netdata_popen_tracking_pid_shoud_be_reaped(pid_t pid) { + if(!netdata_popen_tracking_enabled) + return 0; + + netdata_mutex_lock(&netdata_popen_tracking_mutex); + + int ret = 1; + struct netdata_popen *mp; + DOUBLE_LINKED_LIST_FOREACH_FORWARD(netdata_popen_root, mp, prev, next) { + if(unlikely(mp->pid == pid)) { + ret = 0; + break; + } + } + + netdata_mutex_unlock(&netdata_popen_tracking_mutex); + return ret; +} + +// ---------------------------------------------------------------------------- +// helpers + +static inline void convert_argv_to_string(char *dst, size_t size, const char *spawn_argv[]) { + int i; + for(i = 0; spawn_argv[i] ;i++) { + if(i == 0) snprintfz(dst, size, "%s", spawn_argv[i]); + else { + size_t len = strlen(dst); + snprintfz(&dst[len], size - len, " '%s'", spawn_argv[i]); + } + } +} + +// ---------------------------------------------------------------------------- +// the core of netdata popen + +/* + * Returns -1 on failure, 0 on success. When POPEN_FLAG_CREATE_PIPE is set, on success set the FILE *fp pointer. + */ +#define PIPE_READ 0 +#define PIPE_WRITE 1 + +static int popene_internal(volatile pid_t *pidptr, char **env, uint8_t flags, FILE **fpp_child_stdin, FILE **fpp_child_stdout, const char *command, const char *spawn_argv[]) { + // create a string to be logged about the command we are running + char command_to_be_logged[2048]; + convert_argv_to_string(command_to_be_logged, sizeof(command_to_be_logged), spawn_argv); + // info("custom_popene() running command: %s", command_to_be_logged); + + int ret = 0; // success by default + int attr_rc = 1; // failure by default + + FILE *fp_child_stdin = NULL, *fp_child_stdout = NULL; + int pipefd_stdin[2] = { -1, -1 }; + int pipefd_stdout[2] = { -1, -1 }; + + pid_t pid; + posix_spawnattr_t attr; + posix_spawn_file_actions_t fa; + + int stdin_fd_to_exclude_from_closing = -1; + int stdout_fd_to_exclude_from_closing = -1; + + if(posix_spawn_file_actions_init(&fa)) { + error("POPEN: posix_spawn_file_actions_init() failed."); + ret = -1; + goto set_return_values_and_return; + } + + if(fpp_child_stdin) { + if (pipe(pipefd_stdin) == -1) { + error("POPEN: stdin pipe() failed"); + ret = -1; + goto cleanup_and_return; + } + + if ((fp_child_stdin = fdopen(pipefd_stdin[PIPE_WRITE], "w")) == NULL) { + error("POPEN: fdopen() stdin failed"); + ret = -1; + goto cleanup_and_return; + } + + if(posix_spawn_file_actions_adddup2(&fa, pipefd_stdin[PIPE_READ], STDIN_FILENO)) { + error("POPEN: posix_spawn_file_actions_adddup2() on stdin failed."); + ret = -1; + goto cleanup_and_return; + } + } + else { + if (posix_spawn_file_actions_addopen(&fa, STDIN_FILENO, "/dev/null", O_RDONLY, 0)) { + error("POPEN: posix_spawn_file_actions_addopen() on stdin to /dev/null failed."); + // this is not a fatal error + stdin_fd_to_exclude_from_closing = STDIN_FILENO; + } + } + + if (fpp_child_stdout) { + if (pipe(pipefd_stdout) == -1) { + error("POPEN: stdout pipe() failed"); + ret = -1; + goto cleanup_and_return; + } + + if ((fp_child_stdout = fdopen(pipefd_stdout[PIPE_READ], "r")) == NULL) { + error("POPEN: fdopen() stdout failed"); + ret = -1; + goto cleanup_and_return; + } + + if(posix_spawn_file_actions_adddup2(&fa, pipefd_stdout[PIPE_WRITE], STDOUT_FILENO)) { + error("POPEN: posix_spawn_file_actions_adddup2() on stdout failed."); + ret = -1; + goto cleanup_and_return; + } + } + else { + if (posix_spawn_file_actions_addopen(&fa, STDOUT_FILENO, "/dev/null", O_WRONLY, 0)) { + error("POPEN: posix_spawn_file_actions_addopen() on stdout to /dev/null failed."); + // this is not a fatal error + stdout_fd_to_exclude_from_closing = STDOUT_FILENO; + } + } + + if(flags & POPEN_FLAG_CLOSE_FD) { + // Mark all files to be closed by the exec() stage of posix_spawn() + for(int i = (int)(sysconf(_SC_OPEN_MAX) - 1); i >= 0; i--) { + if(likely(i != STDERR_FILENO && i != stdin_fd_to_exclude_from_closing && i != stdout_fd_to_exclude_from_closing)) + (void)fcntl(i, F_SETFD, FD_CLOEXEC); + } + } + + attr_rc = posix_spawnattr_init(&attr); + if(attr_rc) { + // failed + error("POPEN: posix_spawnattr_init() failed."); + } + else { + // success + // reset all signals in the child + + if (posix_spawnattr_setflags(&attr, POSIX_SPAWN_SETSIGMASK | POSIX_SPAWN_SETSIGDEF)) + error("POPEN: posix_spawnattr_setflags() failed."); + + sigset_t mask; + sigemptyset(&mask); + + if (posix_spawnattr_setsigmask(&attr, &mask)) + error("POPEN: posix_spawnattr_setsigmask() failed."); + } + + // Take the lock while we fork to ensure we don't race with SIGCHLD + // delivery on a process which exits quickly. + netdata_popen_tracking_lock(); + if (!posix_spawn(&pid, command, &fa, &attr, (char * const*)spawn_argv, env)) { + // success + *pidptr = pid; + netdata_popen_tracking_add_pid_unsafe(pid); + netdata_popen_tracking_unlock(); + } + else { + // failure + netdata_popen_tracking_unlock(); + error("POPEN: failed to spawn command: \"%s\" from parent pid %d.", command_to_be_logged, getpid()); + ret = -1; + goto cleanup_and_return; + } + + // the normal cleanup will run + // but ret == 0 at this point + +cleanup_and_return: + if(!attr_rc) { + // posix_spawnattr_init() succeeded + if (posix_spawnattr_destroy(&attr)) + error("POPEN: posix_spawnattr_destroy() failed"); + } + + if (posix_spawn_file_actions_destroy(&fa)) + error("POPEN: posix_spawn_file_actions_destroy() failed"); + + // the child end - close it + if(pipefd_stdin[PIPE_READ] != -1) + close(pipefd_stdin[PIPE_READ]); + + // our end + if(ret == -1 || !fpp_child_stdin) { + if (fp_child_stdin) + fclose(fp_child_stdin); + else if (pipefd_stdin[PIPE_WRITE] != -1) + close(pipefd_stdin[PIPE_WRITE]); + + fp_child_stdin = NULL; + } + + // the child end - close it + if (pipefd_stdout[PIPE_WRITE] != -1) + close(pipefd_stdout[PIPE_WRITE]); + + // our end + if (ret == -1 || !fpp_child_stdout) { + if (fp_child_stdout) + fclose(fp_child_stdout); + else if (pipefd_stdout[PIPE_READ] != -1) + close(pipefd_stdout[PIPE_READ]); + + fp_child_stdout = NULL; + } + +set_return_values_and_return: + if(fpp_child_stdin) + *fpp_child_stdin = fp_child_stdin; + + if(fpp_child_stdout) + *fpp_child_stdout = fp_child_stdout; + + return ret; +} + +int netdata_popene_variadic_internal_dont_use_directly(volatile pid_t *pidptr, char **env, uint8_t flags, FILE **fpp_child_input, FILE **fpp_child_output, const char *command, ...) { + // convert the variable list arguments into what posix_spawn() needs + // all arguments are expected strings + va_list args; + int args_count; + + // count the number variable parameters + // the variable parameters are expected NULL terminated + { + const char *s; + + va_start(args, command); + args_count = 0; + while ((s = va_arg(args, const char *))) args_count++; + va_end(args); + } + + // create a string pointer array as needed by posix_spawn() + // variable array in the stack + const char *spawn_argv[args_count + 1]; + { + const char *s; + va_start(args, command); + int i; + for (i = 0; i < args_count; i++) { + s = va_arg(args, const char *); + spawn_argv[i] = s; + } + spawn_argv[args_count] = NULL; + va_end(args); + } + + return popene_internal(pidptr, env, flags, fpp_child_input, fpp_child_output, command, spawn_argv); +} + +// See man environ +extern char **environ; + +FILE *netdata_popen(const char *command, volatile pid_t *pidptr, FILE **fpp_child_input) { + FILE *fp_child_output = NULL; + const char *spawn_argv[] = { + "sh", + "-c", + command, + NULL + }; + (void)popene_internal(pidptr, environ, POPEN_FLAG_CLOSE_FD, fpp_child_input, &fp_child_output, "/bin/sh", spawn_argv); + return fp_child_output; +} + +FILE *netdata_popene(const char *command, volatile pid_t *pidptr, char **env, FILE **fpp_child_input) { + FILE *fp_child_output = NULL; + const char *spawn_argv[] = { + "sh", + "-c", + command, + NULL + }; + (void)popene_internal(pidptr, env, POPEN_FLAG_CLOSE_FD, fpp_child_input, &fp_child_output, "/bin/sh", spawn_argv); + return fp_child_output; +} + +// returns 0 on success, -1 on failure +int netdata_spawn(const char *command, volatile pid_t *pidptr) { + const char *spawn_argv[] = { + "sh", + "-c", + command, + NULL + }; + return popene_internal(pidptr, environ, POPEN_FLAG_NONE, NULL, NULL, "/bin/sh", spawn_argv); +} + +int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid) { + int ret; + siginfo_t info; + + debug(D_EXIT, "Request to netdata_pclose() on pid %d", pid); + + if (fp_child_input) + fclose(fp_child_input); + + if (fp_child_output) + fclose(fp_child_output); + + errno = 0; + + ret = waitid(P_PID, (id_t) pid, &info, WEXITED); + netdata_popen_tracking_del_pid(pid); + + if (ret != -1) { + switch (info.si_code) { + case CLD_EXITED: + if(info.si_status) + error("child pid %d exited with code %d.", info.si_pid, info.si_status); + return(info.si_status); + + case CLD_KILLED: + if(info.si_status == 15) { + info("child pid %d killed by signal %d.", info.si_pid, info.si_status); + return(0); + } + else { + error("child pid %d killed by signal %d.", info.si_pid, info.si_status); + return(-1); + } + + case CLD_DUMPED: + error("child pid %d core dumped by signal %d.", info.si_pid, info.si_status); + return(-2); + + case CLD_STOPPED: + error("child pid %d stopped by signal %d.", info.si_pid, info.si_status); + return(0); + + case CLD_TRAPPED: + error("child pid %d trapped by signal %d.", info.si_pid, info.si_status); + return(-4); + + case CLD_CONTINUED: + error("child pid %d continued by signal %d.", info.si_pid, info.si_status); + return(0); + + default: + error("child pid %d gave us a SIGCHLD with code %d and status %d.", info.si_pid, info.si_code, info.si_status); + return(-5); + } + } + else + error("Cannot waitid() for pid %d", pid); + + return 0; +} + +int netdata_spawn_waitpid(pid_t pid) { + return netdata_pclose(NULL, NULL, pid); +} diff --git a/libnetdata/popen/popen.h b/libnetdata/popen/popen.h new file mode 100644 index 0000000..c57a35a --- /dev/null +++ b/libnetdata/popen/popen.h @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_POPEN_H +#define NETDATA_POPEN_H 1 + +#include "../libnetdata.h" + +#define PIPE_READ 0 +#define PIPE_WRITE 1 + +/* custom_popene_variadic_internal_dont_use_directly flag definitions */ +#define POPEN_FLAG_NONE 0 +#define POPEN_FLAG_CLOSE_FD (1 << 0) // Close all file descriptors other than STDIN_FILENO, STDOUT_FILENO, STDERR_FILENO + +// the flags to be used by default +#define POPEN_FLAGS_DEFAULT (POPEN_FLAG_CLOSE_FD) + +// mypopen_raw is the interface to use instead of custom_popene_variadic_internal_dont_use_directly() +// mypopen_raw will add the terminating NULL at the arguments list +// we append the parameter 'command' twice - this is because the underlying call needs the command to execute and the argv[0] to pass to it +#define netdata_popen_raw_default_flags_and_environment(pidptr, fpp_child_input, fpp_child_output, command, args...) netdata_popene_variadic_internal_dont_use_directly(pidptr, environ, POPEN_FLAGS_DEFAULT, fpp_child_input, fpp_child_output, command, command, ##args, NULL) +#define netdata_popen_raw_default_flags(pidptr, env, fpp_child_input, fpp_child_output, command, args...) netdata_popene_variadic_internal_dont_use_directly(pidptr, env, POPEN_FLAGS_DEFAULT, fpp_child_input, fpp_child_output, command, command, ##args, NULL) +#define netdata_popen_raw(pidptr, env, flags, fpp_child_input, fpp_child_output, command, args...) netdata_popene_variadic_internal_dont_use_directly(pidptr, env, flags, fpp_child_input, fpp_child_output, command, command, ##args, NULL) + +FILE *netdata_popen(const char *command, volatile pid_t *pidptr, FILE **fp_child_input); +FILE *netdata_popene(const char *command, volatile pid_t *pidptr, char **env, FILE **fp_child_input); +int netdata_popene_variadic_internal_dont_use_directly(volatile pid_t *pidptr, char **env, uint8_t flags, FILE **fpp_child_input, FILE **fpp_child_output, const char *command, ...); +int netdata_pclose(FILE *fp_child_input, FILE *fp_child_output, pid_t pid); + +int netdata_spawn(const char *command, volatile pid_t *pidptr); +int netdata_spawn_waitpid(pid_t pid); + +void netdata_popen_tracking_init(void); +void netdata_popen_tracking_cleanup(void); +int netdata_popen_tracking_pid_shoud_be_reaped(pid_t pid); + +void signals_unblock(void); +void signals_reset(void); + +#endif /* NETDATA_POPEN_H */ diff --git a/libnetdata/procfile/Makefile.am b/libnetdata/procfile/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/procfile/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/procfile/README.md b/libnetdata/procfile/README.md new file mode 100644 index 0000000..6563803 --- /dev/null +++ b/libnetdata/procfile/README.md @@ -0,0 +1,67 @@ + + +# PROCFILE + +procfile is a library for reading text data files (i.e `/proc` files) in the fastest possible way. + +## How it works + +The library automatically adapts (through the iterations) its memory so that each file +is read with single `read()` call. + +Then the library splits the file into words, using the supplied separators. +The library also supported quoted words (i.e. strings within of which the separators are ignored). + +### Initialization + +Initially the caller: + +- calls `procfile_open()` to open the file and allocate the structures needed. + +### Iterations + +For each iteration, the caller: + +- calls `procfile_readall()` to read updated contents. + This call also rewinds (`lseek()` to 0) before reading it. + + For every file, a [BUFFER](/libnetdata/buffer/README.md) is used that is automatically adjusted to fit the entire + file contents of the file. So the file is read with a single `read()` call (providing atomicity / consistency when + the data are read from the kernel). + + Once the data are read, 2 arrays of pointers are updated: + + - a `words` array, pointing to each word in the data read + - a `lines` array, pointing to the first word for each line + + This is highly optimized. Both arrays are automatically adjusted to + fit all contents and are updated in a single pass on the data. + + The library provides a number of macros: + + - `procfile_lines()` returns the # of lines read + - `procfile_linewords()` returns the # of words in the given line + - `procfile_word()` returns a pointer the given word # + - `procfile_line()` returns a pointer to the first word of the given line # + - `procfile_lineword()` returns a pointer to the given word # of the given line # + +### Cleanup + +When the caller exits: + +- calls `procfile_free()` to close the file and free all memory used. + +### Performance + +- a **raspberry Pi 1** (the oldest single core one) can process 5.000+ `/proc` files per second. +- a **J1900 Celeron** processor can process 23.000+ `/proc` files per second per core. + +To achieve this kind of performance, the library tries to work in batches so that the code +and the data are inside the processor's caches. + +This library is extensively used in Netdata and its plugins. + + diff --git a/libnetdata/procfile/procfile.c b/libnetdata/procfile/procfile.c new file mode 100644 index 0000000..eb04316 --- /dev/null +++ b/libnetdata/procfile/procfile.c @@ -0,0 +1,479 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +#define PF_PREFIX "PROCFILE" + +#define PFWORDS_INCREASE_STEP 2000 +#define PFLINES_INCREASE_STEP 200 +#define PROCFILE_INCREMENT_BUFFER 4096 + +int procfile_open_flags = O_RDONLY; + +int procfile_adaptive_initial_allocation = 0; + +// if adaptive allocation is set, these store the +// max values we have seen so far +size_t procfile_max_lines = PFLINES_INCREASE_STEP; +size_t procfile_max_words = PFWORDS_INCREASE_STEP; +size_t procfile_max_allocation = PROCFILE_INCREMENT_BUFFER; + + +// ---------------------------------------------------------------------------- + +char *procfile_filename(procfile *ff) { + if(ff->filename) + return ff->filename; + + char filename[FILENAME_MAX + 1]; + char buffer[FILENAME_MAX + 1]; + snprintfz(buffer, FILENAME_MAX, "/proc/self/fd/%d", ff->fd); + + ssize_t l = readlink(buffer, filename, FILENAME_MAX); + if(unlikely(l == -1)) + snprintfz(filename, FILENAME_MAX, "unknown filename for fd %d", ff->fd); + else + filename[l] = '\0'; + + + ff->filename = strdupz(filename); + + // on non-linux systems, something like this will be needed + // fcntl(ff->fd, F_GETPATH, ff->filename) + + return ff->filename; +} + +// ---------------------------------------------------------------------------- +// An array of words + +static inline void procfile_words_add(procfile *ff, char *str) { + // debug(D_PROCFILE, PF_PREFIX ": adding word No %d: '%s'", fw->len, str); + + pfwords *fw = ff->words; + if(unlikely(fw->len == fw->size)) { + // debug(D_PROCFILE, PF_PREFIX ": expanding words"); + size_t minimum = PFWORDS_INCREASE_STEP; + size_t optimal = fw->size / 2; + size_t wanted = (optimal > minimum)?optimal:minimum; + + ff->words = fw = reallocz(fw, sizeof(pfwords) + (fw->size + wanted) * sizeof(char *)); + fw->size += wanted; + } + + fw->words[fw->len++] = str; +} + +NEVERNULL +static inline pfwords *procfile_words_create(void) { + // debug(D_PROCFILE, PF_PREFIX ": initializing words"); + + size_t size = (procfile_adaptive_initial_allocation) ? procfile_max_words : PFWORDS_INCREASE_STEP; + + pfwords *new = mallocz(sizeof(pfwords) + size * sizeof(char *)); + new->len = 0; + new->size = size; + return new; +} + +static inline void procfile_words_reset(pfwords *fw) { + // debug(D_PROCFILE, PF_PREFIX ": resetting words"); + fw->len = 0; +} + +static inline void procfile_words_free(pfwords *fw) { + // debug(D_PROCFILE, PF_PREFIX ": freeing words"); + + freez(fw); +} + + +// ---------------------------------------------------------------------------- +// An array of lines + +NEVERNULL +static inline size_t *procfile_lines_add(procfile *ff) { + // debug(D_PROCFILE, PF_PREFIX ": adding line %d at word %d", fl->len, first_word); + + pflines *fl = ff->lines; + if(unlikely(fl->len == fl->size)) { + // debug(D_PROCFILE, PF_PREFIX ": expanding lines"); + size_t minimum = PFLINES_INCREASE_STEP; + size_t optimal = fl->size / 2; + size_t wanted = (optimal > minimum)?optimal:minimum; + + ff->lines = fl = reallocz(fl, sizeof(pflines) + (fl->size + wanted) * sizeof(ffline)); + fl->size += wanted; + } + + ffline *ffl = &fl->lines[fl->len++]; + ffl->words = 0; + ffl->first = ff->words->len; + + return &ffl->words; +} + +NEVERNULL +static inline pflines *procfile_lines_create(void) { + // debug(D_PROCFILE, PF_PREFIX ": initializing lines"); + + size_t size = (unlikely(procfile_adaptive_initial_allocation)) ? procfile_max_words : PFLINES_INCREASE_STEP; + + pflines *new = mallocz(sizeof(pflines) + size * sizeof(ffline)); + new->len = 0; + new->size = size; + return new; +} + +static inline void procfile_lines_reset(pflines *fl) { + // debug(D_PROCFILE, PF_PREFIX ": resetting lines"); + + fl->len = 0; +} + +static inline void procfile_lines_free(pflines *fl) { + // debug(D_PROCFILE, PF_PREFIX ": freeing lines"); + + freez(fl); +} + + +// ---------------------------------------------------------------------------- +// The procfile + +void procfile_close(procfile *ff) { + if(unlikely(!ff)) return; + + debug(D_PROCFILE, PF_PREFIX ": Closing file '%s'", procfile_filename(ff)); + + freez(ff->filename); + procfile_lines_free(ff->lines); + procfile_words_free(ff->words); + + if(likely(ff->fd != -1)) close(ff->fd); + freez(ff); +} + +NOINLINE +static void procfile_parser(procfile *ff) { + // debug(D_PROCFILE, PF_PREFIX ": Parsing file '%s'", ff->filename); + + char *s = ff->data // our current position + , *e = &ff->data[ff->len] // the terminating null + , *t = ff->data; // the first character of a word (or quoted / parenthesized string) + + // the look up array to find our type of character + PF_CHAR_TYPE *separators = ff->separators; + + char quote = 0; // the quote character - only when in quoted string + size_t opened = 0; // counts the number of open parenthesis + + size_t *line_words = procfile_lines_add(ff); + + while(s < e) { + PF_CHAR_TYPE ct = separators[(unsigned char)(*s)]; + + // this is faster than a switch() + // read more here: http://lazarenko.me/switch/ + if(likely(ct == PF_CHAR_IS_WORD)) { + s++; + } + else if(likely(ct == PF_CHAR_IS_SEPARATOR)) { + if(!quote && !opened) { + if (s != t) { + // separator, but we have word before it + *s = '\0'; + procfile_words_add(ff, t); + (*line_words)++; + t = ++s; + } + else { + // separator at the beginning + // skip it + t = ++s; + } + } + else { + // we are inside a quote or parenthesized string + s++; + } + } + else if(likely(ct == PF_CHAR_IS_NEWLINE)) { + // end of line + + *s = '\0'; + procfile_words_add(ff, t); + (*line_words)++; + t = ++s; + + // debug(D_PROCFILE, PF_PREFIX ": ended line %d with %d words", l, ff->lines->lines[l].words); + + line_words = procfile_lines_add(ff); + } + else if(likely(ct == PF_CHAR_IS_QUOTE)) { + if(unlikely(!quote && s == t)) { + // quote opened at the beginning + quote = *s; + t = ++s; + } + else if(unlikely(quote && quote == *s)) { + // quote closed + quote = 0; + + *s = '\0'; + procfile_words_add(ff, t); + (*line_words)++; + t = ++s; + } + else + s++; + } + else if(likely(ct == PF_CHAR_IS_OPEN)) { + if(s == t) { + opened++; + t = ++s; + } + else if(opened) { + opened++; + s++; + } + else + s++; + } + else if(likely(ct == PF_CHAR_IS_CLOSE)) { + if(opened) { + opened--; + + if(!opened) { + *s = '\0'; + procfile_words_add(ff, t); + (*line_words)++; + t = ++s; + } + else + s++; + } + else + s++; + } + else + fatal("Internal Error: procfile_readall() does not handle all the cases."); + } + + if(likely(s > t && t < e)) { + // the last word + if(unlikely(ff->len >= ff->size)) { + // we are going to loose the last byte + s = &ff->data[ff->size - 1]; + } + + *s = '\0'; + procfile_words_add(ff, t); + (*line_words)++; + // t = ++s; + } +} + +procfile *procfile_readall(procfile *ff) { + // debug(D_PROCFILE, PF_PREFIX ": Reading file '%s'.", ff->filename); + + ff->len = 0; // zero the used size + ssize_t r = 1; // read at least once + while(r > 0) { + ssize_t s = ff->len; + ssize_t x = ff->size - s; + + if(unlikely(!x)) { + size_t minimum = PROCFILE_INCREMENT_BUFFER; + size_t optimal = ff->size / 2; + size_t wanted = (optimal > minimum)?optimal:minimum; + + debug(D_PROCFILE, PF_PREFIX ": Expanding data buffer for file '%s' by %zu bytes.", procfile_filename(ff), wanted); + ff = reallocz(ff, sizeof(procfile) + ff->size + wanted); + ff->size += wanted; + } + + debug(D_PROCFILE, "Reading file '%s', from position %zd with length %zd", procfile_filename(ff), s, (ssize_t)(ff->size - s)); + r = read(ff->fd, &ff->data[s], ff->size - s); + if(unlikely(r == -1)) { + if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot read from file '%s' on fd %d", procfile_filename(ff), ff->fd); + procfile_close(ff); + return NULL; + } + + ff->len += r; + } + + // debug(D_PROCFILE, "Rewinding file '%s'", ff->filename); + if(unlikely(lseek(ff->fd, 0, SEEK_SET) == -1)) { + if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot rewind on file '%s'.", procfile_filename(ff)); + procfile_close(ff); + return NULL; + } + + procfile_lines_reset(ff->lines); + procfile_words_reset(ff->words); + procfile_parser(ff); + + if(unlikely(procfile_adaptive_initial_allocation)) { + if(unlikely(ff->len > procfile_max_allocation)) procfile_max_allocation = ff->len; + if(unlikely(ff->lines->len > procfile_max_lines)) procfile_max_lines = ff->lines->len; + if(unlikely(ff->words->len > procfile_max_words)) procfile_max_words = ff->words->len; + } + + // debug(D_PROCFILE, "File '%s' updated.", ff->filename); + return ff; +} + +static PF_CHAR_TYPE procfile_default_separators[256]; +__attribute__((constructor)) void procfile_initialize_default_separators(void) { + int i = 256; + while(i--) { + if(unlikely(i == '\n' || i == '\r')) + procfile_default_separators[i] = PF_CHAR_IS_NEWLINE; + + else if(unlikely(isspace(i) || !isprint(i))) + procfile_default_separators[i] = PF_CHAR_IS_SEPARATOR; + + else + procfile_default_separators[i] = PF_CHAR_IS_WORD; + } +} + +NOINLINE +static void procfile_set_separators(procfile *ff, const char *separators) { + // set the separators + if(unlikely(!separators)) + separators = " \t=|"; + + // copy the default + memcpy(ff->separators, procfile_default_separators, 256 * sizeof(PF_CHAR_TYPE)); + + PF_CHAR_TYPE *ffs = ff->separators; + const char *s = separators; + while(*s) + ffs[(int)*s++] = PF_CHAR_IS_SEPARATOR; +} + +void procfile_set_quotes(procfile *ff, const char *quotes) { + PF_CHAR_TYPE *ffs = ff->separators; + + // remove all quotes + int i = 256; + while(i--) + if(unlikely(ffs[i] == PF_CHAR_IS_QUOTE)) + ffs[i] = PF_CHAR_IS_WORD; + + // if nothing given, return + if(unlikely(!quotes || !*quotes)) + return; + + // set the quotes + const char *s = quotes; + while(*s) + ffs[(int)*s++] = PF_CHAR_IS_QUOTE; +} + +void procfile_set_open_close(procfile *ff, const char *open, const char *close) { + PF_CHAR_TYPE *ffs = ff->separators; + + // remove all open/close + int i = 256; + while(i--) + if(unlikely(ffs[i] == PF_CHAR_IS_OPEN || ffs[i] == PF_CHAR_IS_CLOSE)) + ffs[i] = PF_CHAR_IS_WORD; + + // if nothing given, return + if(unlikely(!open || !*open || !close || !*close)) + return; + + // set the openings + const char *s = open; + while(*s) + ffs[(int)*s++] = PF_CHAR_IS_OPEN; + + // set the closings + s = close; + while(*s) + ffs[(int)*s++] = PF_CHAR_IS_CLOSE; +} + +procfile *procfile_open(const char *filename, const char *separators, uint32_t flags) { + debug(D_PROCFILE, PF_PREFIX ": Opening file '%s'", filename); + + int fd = open(filename, procfile_open_flags, 0666); + if(unlikely(fd == -1)) { + if(unlikely(!(flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot open file '%s'", filename); + return NULL; + } + + // info("PROCFILE: opened '%s' on fd %d", filename, fd); + + size_t size = (unlikely(procfile_adaptive_initial_allocation)) ? procfile_max_allocation : PROCFILE_INCREMENT_BUFFER; + procfile *ff = mallocz(sizeof(procfile) + size); + + //strncpyz(ff->filename, filename, FILENAME_MAX); + ff->filename = NULL; + ff->fd = fd; + ff->size = size; + ff->len = 0; + ff->flags = flags; + + ff->lines = procfile_lines_create(); + ff->words = procfile_words_create(); + + procfile_set_separators(ff, separators); + + debug(D_PROCFILE, "File '%s' opened.", filename); + return ff; +} + +procfile *procfile_reopen(procfile *ff, const char *filename, const char *separators, uint32_t flags) { + if(unlikely(!ff)) return procfile_open(filename, separators, flags); + + if(likely(ff->fd != -1)) { + // info("PROCFILE: closing fd %d", ff->fd); + close(ff->fd); + } + + ff->fd = open(filename, procfile_open_flags, 0666); + if(unlikely(ff->fd == -1)) { + procfile_close(ff); + return NULL; + } + + // info("PROCFILE: opened '%s' on fd %d", filename, ff->fd); + + //strncpyz(ff->filename, filename, FILENAME_MAX); + freez(ff->filename); + ff->filename = NULL; + ff->flags = flags; + + // do not do the separators again if NULL is given + if(likely(separators)) procfile_set_separators(ff, separators); + + return ff; +} + +// ---------------------------------------------------------------------------- +// example parsing of procfile data + +void procfile_print(procfile *ff) { + size_t lines = procfile_lines(ff), l; + char *s; + (void)s; + + debug(D_PROCFILE, "File '%s' with %zu lines and %zu words", procfile_filename(ff), ff->lines->len, ff->words->len); + + for(l = 0; likely(l < lines) ;l++) { + size_t words = procfile_linewords(ff, l); + + debug(D_PROCFILE, " line %zu starts at word %zu and has %zu words", l, ff->lines->lines[l].first, ff->lines->lines[l].words); + + size_t w; + for(w = 0; likely(w < words) ;w++) { + s = procfile_lineword(ff, l, w); + debug(D_PROCFILE, " [%zu.%zu] '%s'", l, w, s); + } + } +} diff --git a/libnetdata/procfile/procfile.h b/libnetdata/procfile/procfile.h new file mode 100644 index 0000000..cae4ad4 --- /dev/null +++ b/libnetdata/procfile/procfile.h @@ -0,0 +1,108 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_PROCFILE_H +#define NETDATA_PROCFILE_H 1 + +#include "../libnetdata.h" + +// ---------------------------------------------------------------------------- +// An array of words + +typedef struct { + size_t len; // used entries + size_t size; // capacity + char *words[]; // array of pointers +} pfwords; + + +// ---------------------------------------------------------------------------- +// An array of lines + +typedef struct { + size_t words; // how many words this line has + size_t first; // the id of the first word of this line + // in the words array +} ffline; + +typedef struct { + size_t len; // used entries + size_t size; // capacity + ffline lines[]; // array of lines +} pflines; + + +// ---------------------------------------------------------------------------- +// The procfile + +#define PROCFILE_FLAG_DEFAULT 0x00000000 +#define PROCFILE_FLAG_NO_ERROR_ON_FILE_IO 0x00000001 + +typedef enum __attribute__ ((__packed__)) procfile_separator { + PF_CHAR_IS_SEPARATOR, + PF_CHAR_IS_NEWLINE, + PF_CHAR_IS_WORD, + PF_CHAR_IS_QUOTE, + PF_CHAR_IS_OPEN, + PF_CHAR_IS_CLOSE +} PF_CHAR_TYPE; + +typedef struct procfile { + char *filename; // not populated until procfile_filename() is called + uint32_t flags; + int fd; // the file descriptor + size_t len; // the bytes we have placed into data + size_t size; // the bytes we have allocated for data + pflines *lines; + pfwords *words; + PF_CHAR_TYPE separators[256]; + char data[]; // allocated buffer to keep file contents +} procfile; + +// close the proc file and free all related memory +void procfile_close(procfile *ff); + +// (re)read and parse the proc file +procfile *procfile_readall(procfile *ff); + +// open a /proc or /sys file +procfile *procfile_open(const char *filename, const char *separators, uint32_t flags); + +// re-open a file +// if separators == NULL, the last separators are used +procfile *procfile_reopen(procfile *ff, const char *filename, const char *separators, uint32_t flags); + +// example walk-through a procfile parsed file +void procfile_print(procfile *ff); + +void procfile_set_quotes(procfile *ff, const char *quotes); +void procfile_set_open_close(procfile *ff, const char *open, const char *close); + +char *procfile_filename(procfile *ff); + +// ---------------------------------------------------------------------------- + +// set to the O_XXXX flags, to have procfile_open and procfile_reopen use them when opening proc files +extern int procfile_open_flags; + +// set this to 1, to have procfile adapt its initial buffer allocation to the max allocation used so far +extern int procfile_adaptive_initial_allocation; + +// return the number of lines present +#define procfile_lines(ff) ((ff)->lines->len) + +// return the number of words of the Nth line +#define procfile_linewords(ff, line) (((line) < procfile_lines(ff)) ? (ff)->lines->lines[(line)].words : 0) + +// return the Nth word of the file, or empty string +#define procfile_word(ff, word) (((word) < (ff)->words->len) ? (ff)->words->words[(word)] : "") + +// return the first word of the Nth line, or empty string +#define procfile_line(ff, line) (((line) < procfile_lines(ff)) ? procfile_word((ff), (ff)->lines->lines[(line)].first) : "") + +// return the Nth word of the current line +#define procfile_lineword(ff, line, word) (((line) < procfile_lines(ff) && (word) < procfile_linewords((ff), (line))) ? procfile_word((ff), (ff)->lines->lines[(line)].first + (word)) : "") + +// Open file without logging file IO error if any +#define procfile_open_no_log(filename, separators, flags) procfile_open(filename, separators, flags | PROCFILE_FLAG_NO_ERROR_ON_FILE_IO) + +#endif /* NETDATA_PROCFILE_H */ diff --git a/libnetdata/required_dummies.h b/libnetdata/required_dummies.h new file mode 100644 index 0000000..ad1e8fb --- /dev/null +++ b/libnetdata/required_dummies.h @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LIB_DUMMIES_H +#define NETDATA_LIB_DUMMIES_H 1 + +// callback required by fatal() +void netdata_cleanup_and_exit(int ret) +{ + exit(ret); +} + +void send_statistics(const char *action, const char *action_result, const char *action_data) +{ + (void)action; + (void)action_result; + (void)action_data; + return; +} + +// callbacks required by popen() +void signals_block(void){}; +void signals_unblock(void){}; +void signals_reset(void){}; + +#ifndef UNIT_TESTING +// callback required by eval() +int health_variable_lookup(STRING *variable, struct rrdcalc *rc, NETDATA_DOUBLE *result) +{ + (void)variable; + (void)rc; + (void)result; + return 0; +}; +#endif + +void rrdset_thread_rda_free(void){}; +void sender_thread_buffer_free(void){}; +void query_target_free(void){}; + +// required by get_system_cpus() +char *netdata_configured_host_prefix = ""; + +#endif // NETDATA_LIB_DUMMIES_H diff --git a/libnetdata/simple_pattern/Makefile.am b/libnetdata/simple_pattern/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/simple_pattern/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/simple_pattern/README.md b/libnetdata/simple_pattern/README.md new file mode 100644 index 0000000..cb377f8 --- /dev/null +++ b/libnetdata/simple_pattern/README.md @@ -0,0 +1,43 @@ + + +# Netdata simple patterns + +Unix prefers regular expressions. But they are just too hard, too cryptic +to use, write and understand. + +So, Netdata supports **simple patterns**. + +Simple patterns are a space separated list of words, that can have `*` +as a wildcard. Each world may use any number of `*`. Simple patterns +allow **negative** matches by prefixing a word with `!`. + +So, `pattern = !*bad* *` will match anything, except all those that +contain the word `bad`. + +Simple patterns are quite powerful: `pattern = *foobar* !foo* !*bar *` +matches everything containing `foobar`, except strings that start +with `foo` or end with `bar`. + +You can use the Netdata command line to check simple patterns, +like this: + +```sh +# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world' +RESULT: MATCHED - pattern '*foobar* !foo* !*bar *' matches 'hello world' + +# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world bar' +RESULT: NOT MATCHED - pattern '*foobar* !foo* !*bar *' does not match 'hello world bar' + +# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world foobar' +RESULT: MATCHED - pattern '*foobar* !foo* !*bar *' matches 'hello world foobar' +``` + +Netdata stops processing to the first positive or negative match +(left to right). If it is not matched by either positive or negative +patterns, it is denied at the end. + + diff --git a/libnetdata/simple_pattern/simple_pattern.c b/libnetdata/simple_pattern/simple_pattern.c new file mode 100644 index 0000000..81c2ed0 --- /dev/null +++ b/libnetdata/simple_pattern/simple_pattern.c @@ -0,0 +1,363 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +struct simple_pattern { + const char *match; + size_t len; + + SIMPLE_PREFIX_MODE mode; + char negative; + + struct simple_pattern *child; + + struct simple_pattern *next; +}; + +static inline struct simple_pattern *parse_pattern(char *str, SIMPLE_PREFIX_MODE default_mode) { + // fprintf(stderr, "PARSING PATTERN: '%s'\n", str); + + SIMPLE_PREFIX_MODE mode; + struct simple_pattern *child = NULL; + + char *s = str, *c = str; + + // skip asterisks in front + while(*c == '*') c++; + + // find the next asterisk + while(*c && *c != '*') c++; + + // do we have an asterisk in the middle? + if(*c == '*' && c[1] != '\0') { + // yes, we have + child = parse_pattern(c, default_mode); + c[1] = '\0'; + } + + // check what this one matches + + size_t len = strlen(s); + if(len >= 2 && *s == '*' && s[len - 1] == '*') { + s[len - 1] = '\0'; + s++; + mode = SIMPLE_PATTERN_SUBSTRING; + } + else if(len >= 1 && *s == '*') { + s++; + mode = SIMPLE_PATTERN_SUFFIX; + } + else if(len >= 1 && s[len - 1] == '*') { + s[len - 1] = '\0'; + mode = SIMPLE_PATTERN_PREFIX; + } + else + mode = default_mode; + + // allocate the structure + struct simple_pattern *m = callocz(1, sizeof(struct simple_pattern)); + if(*s) { + m->match = strdupz(s); + m->len = strlen(m->match); + m->mode = mode; + } + else { + m->mode = SIMPLE_PATTERN_SUBSTRING; + } + + m->child = child; + + return m; +} + +SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode) { + struct simple_pattern *root = NULL, *last = NULL; + + if(unlikely(!list || !*list)) return root; + + int isseparator[256] = { + [' '] = 1 // space + , ['\t'] = 1 // tab + , ['\r'] = 1 // carriage return + , ['\n'] = 1 // new line + , ['\f'] = 1 // form feed + , ['\v'] = 1 // vertical tab + }; + + if (unlikely(separators && *separators)) { + memset(&isseparator[0], 0, sizeof(isseparator)); + while(*separators) isseparator[(unsigned char)*separators++] = 1; + } + + char *buf = mallocz(strlen(list) + 1); + const char *s = list; + + while(s && *s) { + buf[0] = '\0'; + char *c = buf; + + char negative = 0; + + // skip all spaces + while(isseparator[(unsigned char)*s]) + s++; + + if(*s == '!') { + negative = 1; + s++; + } + + // empty string + if(unlikely(!*s)) + break; + + // find the next space + char escape = 0; + while(*s) { + if(*s == '\\' && !escape) { + escape = 1; + s++; + } + else { + if (isseparator[(unsigned char)*s] && !escape) { + s++; + break; + } + + *c++ = *s++; + escape = 0; + } + } + + // terminate our string + *c = '\0'; + + // if we matched the empty string, skip it + if(unlikely(!*buf)) + continue; + + // fprintf(stderr, "FOUND PATTERN: '%s'\n", buf); + struct simple_pattern *m = parse_pattern(buf, default_mode); + m->negative = negative; + + // link it at the end + if(unlikely(!root)) + root = last = m; + else { + last->next = m; + last = m; + } + } + + freez(buf); + return (SIMPLE_PATTERN *)root; +} + +static inline char *add_wildcarded(const char *matched, size_t matched_size, char *wildcarded, size_t *wildcarded_size) { + //if(matched_size) { + // char buf[matched_size + 1]; + // strncpyz(buf, matched, matched_size); + // fprintf(stderr, "ADD WILDCARDED '%s' of length %zu\n", buf, matched_size); + //} + + if(unlikely(wildcarded && *wildcarded_size && matched && *matched && matched_size)) { + size_t wss = *wildcarded_size - 1; + size_t len = (matched_size < wss)?matched_size:wss; + if(likely(len)) { + strncpyz(wildcarded, matched, len); + + *wildcarded_size -= len; + return &wildcarded[len]; + } + } + + return wildcarded; +} + +static inline int match_pattern(struct simple_pattern *m, const char *str, size_t len, char *wildcarded, size_t *wildcarded_size) { + char *s; + + if(m->len <= len) { + switch(m->mode) { + case SIMPLE_PATTERN_SUBSTRING: + if(!m->len) return 1; + if((s = strstr(str, m->match))) { + wildcarded = add_wildcarded(str, s - str, wildcarded, wildcarded_size); + if(!m->child) { + wildcarded = add_wildcarded(&s[m->len], len - (&s[m->len] - str), wildcarded, wildcarded_size); + return 1; + } + return match_pattern(m->child, &s[m->len], len - (s - str) - m->len, wildcarded, wildcarded_size); + } + break; + + case SIMPLE_PATTERN_PREFIX: + if(unlikely(strncmp(str, m->match, m->len) == 0)) { + if(!m->child) { + wildcarded = add_wildcarded(&str[m->len], len - m->len, wildcarded, wildcarded_size); + return 1; + } + return match_pattern(m->child, &str[m->len], len - m->len, wildcarded, wildcarded_size); + } + break; + + case SIMPLE_PATTERN_SUFFIX: + if(unlikely(strcmp(&str[len - m->len], m->match) == 0)) { + wildcarded = add_wildcarded(str, len - m->len, wildcarded, wildcarded_size); + if(!m->child) return 1; + return 0; + } + break; + + case SIMPLE_PATTERN_EXACT: + default: + if(unlikely(strcmp(str, m->match) == 0)) { + if(!m->child) return 1; + return 0; + } + break; + } + } + + return 0; +} + +int simple_pattern_matches_extract(SIMPLE_PATTERN *list, const char *str, char *wildcarded, size_t wildcarded_size) { + struct simple_pattern *m, *root = (struct simple_pattern *)list; + + if(unlikely(!root || !str || !*str)) return 0; + + size_t len = strlen(str); + for(m = root; m ; m = m->next) { + char *ws = wildcarded; + size_t wss = wildcarded_size; + if(unlikely(ws)) *ws = '\0'; + + if (match_pattern(m, str, len, ws, &wss)) { + + //if(ws && wss) + // fprintf(stderr, "FINAL WILDCARDED '%s' of length %zu\n", ws, strlen(ws)); + + if (m->negative) return 0; + return 1; + } + } + + return 0; +} + +static inline void free_pattern(struct simple_pattern *m) { + if(!m) return; + + free_pattern(m->child); + free_pattern(m->next); + freez((void *)m->match); + freez(m); +} + +void simple_pattern_free(SIMPLE_PATTERN *list) { + if(!list) return; + + free_pattern(((struct simple_pattern *)list)); +} + +/* Debugging patterns + + This code should be dead - it is useful for debugging but should not be called by production code. + Feel free to comment it out, but please leave it in the file. +*/ +extern void simple_pattern_dump(uint64_t debug_type, SIMPLE_PATTERN *p) +{ + struct simple_pattern *root = (struct simple_pattern *)p; + if(root==NULL) { + debug(debug_type,"dump_pattern(NULL)"); + return; + } + debug(debug_type,"dump_pattern(%p) child=%p next=%p mode=%u match=%s", root, root->child, root->next, root->mode, + root->match); + if(root->child!=NULL) + simple_pattern_dump(debug_type, (SIMPLE_PATTERN*)root->child); + if(root->next!=NULL) + simple_pattern_dump(debug_type, (SIMPLE_PATTERN*)root->next); +} + +/* Heuristic: decide if the pattern could match a DNS name. + + Although this functionality is used directly by socket.c:connection_allowed() it must be in this file + because of the SIMPLE_PATTERN/simple_pattern structure hiding. + Based on RFC952 / RFC1123. We need to decide if the pattern may match a DNS name, or not. For the negative + cases we need to be sure that it can only match an ipv4 or ipv6 address: + * IPv6 addresses contain ':', which are illegal characters in DNS. + * IPv4 addresses cannot contain alpha- characters. + * DNS TLDs must be alphanumeric to distinguish from IPv4. + Some patterns (e.g. "*a*" ) could match multiple cases (i.e. DNS or IPv6). + Some patterns will be awkward (e.g. "192.168.*") as they look like they are intended to match IPv4-only + but could match DNS (i.e. "192.168.com" is a valid name). +*/ +static void scan_is_potential_name(struct simple_pattern *p, int *alpha, int *colon, int *wildcards) +{ + while (p) { + if (p->match) { + if(p->mode == SIMPLE_PATTERN_EXACT && !strcmp("localhost", p->match)) { + p = p->child; + continue; + } + char const *scan = p->match; + while (*scan != 0) { + if ((*scan >= 'a' && *scan <= 'z') || (*scan >= 'A' && *scan <= 'Z')) + *alpha = 1; + if (*scan == ':') + *colon = 1; + scan++; + } + if (p->mode != SIMPLE_PATTERN_EXACT) + *wildcards = 1; + p = p->child; + } + } +} + +extern int simple_pattern_is_potential_name(SIMPLE_PATTERN *p) +{ + int alpha=0, colon=0, wildcards=0; + struct simple_pattern *root = (struct simple_pattern*)p; + while (root != NULL) { + if (root->match != NULL) { + scan_is_potential_name(root, &alpha, &colon, &wildcards); + } + if (root->mode != SIMPLE_PATTERN_EXACT) + wildcards = 1; + root = root->next; + } + return (alpha || wildcards) && !colon; +} + +char *simple_pattern_trim_around_equal(char *src) { + char *store = mallocz(strlen(src) + 1); + + char *dst = store; + while (*src) { + if (*src == '=') { + if (*(dst -1) == ' ') + dst--; + + *dst++ = *src++; + if (*src == ' ') + src++; + } + + *dst++ = *src++; + } + *dst = 0x00; + + return store; +} + +char *simple_pattern_iterate(SIMPLE_PATTERN **p) +{ + struct simple_pattern *root = (struct simple_pattern *) *p; + struct simple_pattern **Proot = (struct simple_pattern **)p; + + (*Proot) = (*Proot)->next; + return (char *) root->match; +} diff --git a/libnetdata/simple_pattern/simple_pattern.h b/libnetdata/simple_pattern/simple_pattern.h new file mode 100644 index 0000000..7282053 --- /dev/null +++ b/libnetdata/simple_pattern/simple_pattern.h @@ -0,0 +1,42 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SIMPLE_PATTERN_H +#define NETDATA_SIMPLE_PATTERN_H + +#include "../libnetdata.h" + + +typedef enum { + SIMPLE_PATTERN_EXACT, + SIMPLE_PATTERN_PREFIX, + SIMPLE_PATTERN_SUFFIX, + SIMPLE_PATTERN_SUBSTRING +} SIMPLE_PREFIX_MODE; + +typedef void SIMPLE_PATTERN; + +// create a simple_pattern from the string given +// default_mode is used in cases where EXACT matches, without an asterisk, +// should be considered PREFIX matches. +SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, SIMPLE_PREFIX_MODE default_mode); + +// test if string str is matched from the pattern and fill 'wildcarded' with the parts matched by '*' +int simple_pattern_matches_extract(SIMPLE_PATTERN *list, const char *str, char *wildcarded, size_t wildcarded_size); + +// test if string str is matched from the pattern +#define simple_pattern_matches(list, str) simple_pattern_matches_extract(list, str, NULL, 0) + +// free a simple_pattern that was created with simple_pattern_create() +// list can be NULL, in which case, this does nothing. +void simple_pattern_free(SIMPLE_PATTERN *list); + +void simple_pattern_dump(uint64_t debug_type, SIMPLE_PATTERN *p) ; +int simple_pattern_is_potential_name(SIMPLE_PATTERN *p) ; +char *simple_pattern_iterate(SIMPLE_PATTERN **p); + +// Auxiliary function to create a pattern +char *simple_pattern_trim_around_equal(char *src); + +#define is_valid_sp(x) ((x) && *(x) && !((x)[0] == '*' && (x)[1] == '\0')) + +#endif //NETDATA_SIMPLE_PATTERN_H diff --git a/libnetdata/socket/Makefile.am b/libnetdata/socket/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/socket/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/socket/README.md b/libnetdata/socket/README.md new file mode 100644 index 0000000..70bfd34 --- /dev/null +++ b/libnetdata/socket/README.md @@ -0,0 +1,5 @@ + + + diff --git a/libnetdata/socket/security.c b/libnetdata/socket/security.c new file mode 100644 index 0000000..88b3f6d --- /dev/null +++ b/libnetdata/socket/security.c @@ -0,0 +1,390 @@ +#include "../libnetdata.h" + +#ifdef ENABLE_HTTPS + +SSL_CTX *netdata_ssl_exporting_ctx =NULL; +SSL_CTX *netdata_ssl_client_ctx =NULL; +SSL_CTX *netdata_ssl_srv_ctx =NULL; +const char *netdata_ssl_security_key =NULL; +const char *netdata_ssl_security_cert =NULL; +const char *tls_version=NULL; +const char *tls_ciphers=NULL; +int netdata_ssl_validate_server = NETDATA_SSL_VALID_CERTIFICATE; + +/** + * Info Callback + * + * Function used as callback for the OpenSSL Library + * + * @param ssl a pointer to the SSL structure of the client + * @param where the variable with the flags set. + * @param ret the return of the caller + */ +static void security_info_callback(const SSL *ssl, int where, int ret __maybe_unused) { + (void)ssl; + if (where & SSL_CB_ALERT) { + debug(D_WEB_CLIENT,"SSL INFO CALLBACK %s %s", SSL_alert_type_string(ret), SSL_alert_desc_string_long(ret)); + } +} + +/** + * OpenSSL Library + * + * Starts the openssl library for the Netdata. + */ +void security_openssl_library() +{ +#if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 +# if (SSLEAY_VERSION_NUMBER >= OPENSSL_VERSION_097) + OPENSSL_config(NULL); +# endif + + SSL_load_error_strings(); + + SSL_library_init(); +#else + if (OPENSSL_init_ssl(OPENSSL_INIT_LOAD_CONFIG, NULL) != 1) { + error("SSL library cannot be initialized."); + } +#endif +} + +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_110 +/** + * TLS version + * + * Returns the TLS version depending of the user input. + * + * @param lversion is the user input. + * + * @return it returns the version number. + */ +int tls_select_version(const char *lversion) { + if (!strcmp(lversion, "1") || !strcmp(lversion, "1.0")) + return TLS1_VERSION; + else if (!strcmp(lversion, "1.1")) + return TLS1_1_VERSION; + else if (!strcmp(lversion, "1.2")) + return TLS1_2_VERSION; +#if defined(TLS1_3_VERSION) + else if (!strcmp(lversion, "1.3")) + return TLS1_3_VERSION; +#endif + +#if defined(TLS_MAX_VERSION) + return TLS_MAX_VERSION; +#else + return TLS1_2_VERSION; +#endif +} +#endif + +/** + * OpenSSL common options + * + * Clients and SERVER have common options, this function is responsible to set them in the context. + * + * @param ctx the initialized SSL context. + * @param side 0 means server, and 1 client. + */ +void security_openssl_common_options(SSL_CTX *ctx, int side) { +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_110 + if (!side) { + int version = tls_select_version(tls_version) ; +#endif +#if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 + SSL_CTX_set_options (ctx,SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3|SSL_OP_NO_COMPRESSION); +#else + SSL_CTX_set_min_proto_version(ctx, TLS1_VERSION); + SSL_CTX_set_max_proto_version(ctx, version); + + if(tls_ciphers && strcmp(tls_ciphers, "none") != 0) { + if (!SSL_CTX_set_cipher_list(ctx, tls_ciphers)) { + error("SSL error. cannot set the cipher list"); + } + } + } +#endif + + SSL_CTX_set_mode(ctx, SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER); +} + +/** + * Initialize Openssl Client + * + * Starts the client context with TLS 1.2. + * + * @return It returns the context on success or NULL otherwise + */ +SSL_CTX * security_initialize_openssl_client() { + SSL_CTX *ctx; +#if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 + ctx = SSL_CTX_new(SSLv23_client_method()); +#else + ctx = SSL_CTX_new(TLS_client_method()); +#endif + if(ctx) { +#if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 + SSL_CTX_set_options (ctx,SSL_OP_NO_SSLv2|SSL_OP_NO_SSLv3|SSL_OP_NO_COMPRESSION); +#else + SSL_CTX_set_min_proto_version(ctx, TLS1_VERSION); +# if defined(TLS_MAX_VERSION) + SSL_CTX_set_max_proto_version(ctx, TLS_MAX_VERSION); +# elif defined(TLS1_3_VERSION) + SSL_CTX_set_max_proto_version(ctx, TLS1_3_VERSION); +# elif defined(TLS1_2_VERSION) + SSL_CTX_set_max_proto_version(ctx, TLS1_2_VERSION); +# endif +#endif + } + + return ctx; +} + +/** + * Initialize OpenSSL server + * + * Starts the server context with TLS 1.2 and load the certificate. + * + * @return It returns the context on success or NULL otherwise + */ +static SSL_CTX * security_initialize_openssl_server() { + SSL_CTX *ctx; + char lerror[512]; + static int netdata_id_context = 1; + + //TO DO: Confirm the necessity to check return for other OPENSSL function +#if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 + ctx = SSL_CTX_new(SSLv23_server_method()); + if (!ctx) { + error("Cannot create a new SSL context, netdata won't encrypt communication"); + return NULL; + } + + SSL_CTX_use_certificate_file(ctx, netdata_ssl_security_cert, SSL_FILETYPE_PEM); +#else + ctx = SSL_CTX_new(TLS_server_method()); + if (!ctx) { + error("Cannot create a new SSL context, netdata won't encrypt communication"); + return NULL; + } + + SSL_CTX_use_certificate_chain_file(ctx, netdata_ssl_security_cert); +#endif + security_openssl_common_options(ctx, 0); + + SSL_CTX_use_PrivateKey_file(ctx, netdata_ssl_security_key,SSL_FILETYPE_PEM); + + if (!SSL_CTX_check_private_key(ctx)) { + ERR_error_string_n(ERR_get_error(),lerror,sizeof(lerror)); + error("SSL cannot check the private key: %s",lerror); + SSL_CTX_free(ctx); + return NULL; + } + + SSL_CTX_set_session_id_context(ctx,(void*)&netdata_id_context,(unsigned int)sizeof(netdata_id_context)); + SSL_CTX_set_info_callback(ctx,security_info_callback); + +#if (OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_095) + SSL_CTX_set_verify_depth(ctx,1); +#endif + debug(D_WEB_CLIENT,"SSL GLOBAL CONTEXT STARTED\n"); + + return ctx; +} + +/** + * Start SSL + * + * Call the correct function to start the SSL context. + * + * @param selector informs the context that must be initialized, the following list has the valid values: + * NETDATA_SSL_CONTEXT_SERVER - the server context + * NETDATA_SSL_CONTEXT_STREAMING - Starts the streaming context. + * NETDATA_SSL_CONTEXT_EXPORTING - Starts the OpenTSDB context + */ +void security_start_ssl(int selector) { + static SPINLOCK sp = NETDATA_SPINLOCK_INITIALIZER; + netdata_spinlock_lock(&sp); + + switch (selector) { + case NETDATA_SSL_CONTEXT_SERVER: { + if(!netdata_ssl_srv_ctx) { + struct stat statbuf; + if (stat(netdata_ssl_security_key, &statbuf) || stat(netdata_ssl_security_cert, &statbuf)) + info("To use encryption it is necessary to set \"ssl certificate\" and \"ssl key\" in [web] !\n"); + else { + netdata_ssl_srv_ctx = security_initialize_openssl_server(); + SSL_CTX_set_mode(netdata_ssl_srv_ctx, SSL_MODE_ENABLE_PARTIAL_WRITE); + } + } + break; + } + + case NETDATA_SSL_CONTEXT_STREAMING: { + if(!netdata_ssl_client_ctx) { + netdata_ssl_client_ctx = security_initialize_openssl_client(); + //This is necessary for the stream, because it is working sometimes with nonblock socket. + //It returns the bitmask after to change, there is not any description of errors in the documentation + SSL_CTX_set_mode(netdata_ssl_client_ctx, + SSL_MODE_ENABLE_PARTIAL_WRITE | SSL_MODE_ACCEPT_MOVING_WRITE_BUFFER | + SSL_MODE_AUTO_RETRY); + } + break; + } + + case NETDATA_SSL_CONTEXT_EXPORTING: { + if(!netdata_ssl_exporting_ctx) + netdata_ssl_exporting_ctx = security_initialize_openssl_client(); + break; + } + } + + netdata_spinlock_unlock(&sp); +} + +/** + * Clean Open SSL + * + * Clean all the allocated contexts from netdata. + */ +void security_clean_openssl() +{ + if (netdata_ssl_srv_ctx) { + SSL_CTX_free(netdata_ssl_srv_ctx); + } + + if (netdata_ssl_client_ctx) { + SSL_CTX_free(netdata_ssl_client_ctx); + } + + if (netdata_ssl_exporting_ctx) { + SSL_CTX_free(netdata_ssl_exporting_ctx); + } + +#if OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110 + ERR_free_strings(); +#endif +} + +/** + * Process accept + * + * Process the SSL handshake with the client case it is necessary. + * + * @param ssl is a pointer for the SSL structure + * @param msg is a copy of the first 8 bytes of the initial message received + * + * @return it returns 0 case it performs the handshake, 8 case it is clean connection + * and another integer power of 2 otherwise. + */ +int security_process_accept(SSL *ssl,int msg) { + int sock = SSL_get_fd(ssl); + int test; + if (msg > 0x17) + { + return NETDATA_SSL_NO_HANDSHAKE; + } + + ERR_clear_error(); + if ((test = SSL_accept(ssl)) <= 0) { + int sslerrno = SSL_get_error(ssl, test); + switch(sslerrno) { + case SSL_ERROR_WANT_READ: + { + error("SSL handshake did not finish and it wanna read on socket %d!", sock); + return NETDATA_SSL_WANT_READ; + } + case SSL_ERROR_WANT_WRITE: + { + error("SSL handshake did not finish and it wanna read on socket %d!", sock); + return NETDATA_SSL_WANT_WRITE; + } + case SSL_ERROR_NONE: + case SSL_ERROR_SSL: + case SSL_ERROR_SYSCALL: + default: + { + u_long err; + char buf[256]; + int counter = 0; + while ((err = ERR_get_error()) != 0) { + ERR_error_string_n(err, buf, sizeof(buf)); + info("%d SSL Handshake error (%s) on socket %d ", counter++, ERR_error_string((long)SSL_get_error(ssl, test), NULL), sock); + } + return NETDATA_SSL_NO_HANDSHAKE; + } + } + } + + if (SSL_is_init_finished(ssl)) + { + debug(D_WEB_CLIENT_ACCESS,"SSL Handshake finished %s errno %d on socket fd %d", ERR_error_string((long)SSL_get_error(ssl, test), NULL), errno, sock); + } + + return NETDATA_SSL_HANDSHAKE_COMPLETE; +} + +/** + * Test Certificate + * + * Check the certificate of Netdata parent + * + * @param ssl is the connection structure + * + * @return It returns 0 on success and -1 otherwise + */ +int security_test_certificate(SSL *ssl) { + X509* cert = SSL_get_peer_certificate(ssl); + int ret; + long status; + if (!cert) { + return -1; + } + + status = SSL_get_verify_result(ssl); + if((X509_V_OK != status)) + { + char error[512]; + ERR_error_string_n(ERR_get_error(), error, sizeof(error)); + error("SSL RFC4158 check: We have a invalid certificate, the tests result with %ld and message %s", status, error); + ret = -1; + } else { + ret = 0; + } + + return ret; +} + +/** + * Location for context + * + * Case the user give us a directory with the certificates available and + * the Netdata parent certificate, we use this function to validate the certificate. + * + * @param ctx the context where the path will be set. + * @param file the file with Netdata parent certificate. + * @param path the directory where the certificates are stored. + * + * @return It returns 0 on success and -1 otherwise. + */ +int ssl_security_location_for_context(SSL_CTX *ctx, char *file, char *path) { + int load_custom = 1, load_default = 1; + if (file || path) { + if(!SSL_CTX_load_verify_locations(ctx, file, path)) { + info("Netdata can not verify custom CAfile or CApath for parent's SSL certificate, so it will use the default OpenSSL configuration to validate certificates!"); + load_custom = 0; + } + } + + if(!SSL_CTX_set_default_verify_paths(ctx)) { + info("Can not verify default OpenSSL configuration to validate certificates!"); + load_default = 0; + } + + if (load_custom == 0 && load_default == 0) + return -1; + + return 0; +} +#endif diff --git a/libnetdata/socket/security.h b/libnetdata/socket/security.h new file mode 100644 index 0000000..ae7c595 --- /dev/null +++ b/libnetdata/socket/security.h @@ -0,0 +1,63 @@ +#ifndef NETDATA_SECURITY_H +# define NETDATA_SECURITY_H + +# define NETDATA_SSL_HANDSHAKE_COMPLETE 0 //All the steps were successful +# define NETDATA_SSL_START 1 //Starting handshake, conn variable is NULL +# define NETDATA_SSL_WANT_READ 2 //The connection wanna read from socket +# define NETDATA_SSL_WANT_WRITE 4 //The connection wanna write on socket +# define NETDATA_SSL_NO_HANDSHAKE 8 //Continue without encrypt connection. +# define NETDATA_SSL_OPTIONAL 16 //Flag to define the HTTP request +# define NETDATA_SSL_FORCE 32 //We only accepts HTTPS request +# define NETDATA_SSL_INVALID_CERTIFICATE 64 //Accepts invalid certificate +# define NETDATA_SSL_VALID_CERTIFICATE 128 //Accepts invalid certificate +# define NETDATA_SSL_PROXY_HTTPS 256 //Proxy is using HTTPS + +#define NETDATA_SSL_CONTEXT_SERVER 0 +#define NETDATA_SSL_CONTEXT_STREAMING 1 +#define NETDATA_SSL_CONTEXT_EXPORTING 2 + +# ifdef ENABLE_HTTPS + +#define OPENSSL_VERSION_095 0x00905100L +#define OPENSSL_VERSION_097 0x0907000L +#define OPENSSL_VERSION_110 0x10100000L +#define OPENSSL_VERSION_111 0x10101000L +#define OPENSSL_VERSION_300 0x30000000L + +# include +# include +# include +# include +# if (SSLEAY_VERSION_NUMBER >= OPENSSL_VERSION_097) && (OPENSSL_VERSION_NUMBER < OPENSSL_VERSION_110) +# include +# endif + +#if OPENSSL_VERSION_NUMBER >= OPENSSL_VERSION_300 +#include +#include +#endif + +struct netdata_ssl { + SSL *conn; //SSL connection + uint32_t flags; //The flags for SSL connection +}; + +extern SSL_CTX *netdata_ssl_exporting_ctx; +extern SSL_CTX *netdata_ssl_client_ctx; +extern SSL_CTX *netdata_ssl_srv_ctx; +extern const char *netdata_ssl_security_key; +extern const char *netdata_ssl_security_cert; +extern const char *tls_version; +extern const char *tls_ciphers; +extern int netdata_ssl_validate_server; +int ssl_security_location_for_context(SSL_CTX *ctx,char *file,char *path); + +void security_openssl_library(); +void security_clean_openssl(); +void security_start_ssl(int selector); +int security_process_accept(SSL *ssl,int msg); +int security_test_certificate(SSL *ssl); +SSL_CTX * security_initialize_openssl_client(); + +# endif //ENABLE_HTTPS +#endif //NETDATA_SECURITY_H diff --git a/libnetdata/socket/socket.c b/libnetdata/socket/socket.c new file mode 100644 index 0000000..40271b6 --- /dev/null +++ b/libnetdata/socket/socket.c @@ -0,0 +1,1914 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +// -------------------------------------------------------------------------------------------------------------------- +// various library calls + +#ifdef __gnu_linux__ +#define LARGE_SOCK_SIZE 33554431 // don't ask why - I found it at brubeck source - I guess it is just a large number +#else +#define LARGE_SOCK_SIZE 4096 +#endif + +int sock_setnonblock(int fd) { + int flags; + + flags = fcntl(fd, F_GETFL); + flags |= O_NONBLOCK; + + int ret = fcntl(fd, F_SETFL, flags); + if(ret < 0) + error("Failed to set O_NONBLOCK on socket %d", fd); + + return ret; +} + +int sock_delnonblock(int fd) { + int flags; + + flags = fcntl(fd, F_GETFL); + flags &= ~O_NONBLOCK; + + int ret = fcntl(fd, F_SETFL, flags); + if(ret < 0) + error("Failed to remove O_NONBLOCK on socket %d", fd); + + return ret; +} + +int sock_setreuse(int fd, int reuse) { + int ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuse, sizeof(reuse)); + + if(ret == -1) + error("Failed to set SO_REUSEADDR on socket %d", fd); + + return ret; +} + +int sock_setreuse_port(int fd, int reuse) { + int ret; + +#ifdef SO_REUSEPORT + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuse, sizeof(reuse)); + if(ret == -1 && errno != ENOPROTOOPT) + error("failed to set SO_REUSEPORT on socket %d", fd); +#else + ret = -1; +#endif + + return ret; +} + +int sock_enlarge_in(int fd) { + int ret, bs = LARGE_SOCK_SIZE; + + ret = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &bs, sizeof(bs)); + + if(ret == -1) + error("Failed to set SO_RCVBUF on socket %d", fd); + + return ret; +} + +int sock_enlarge_out(int fd) { + int ret, bs = LARGE_SOCK_SIZE; + ret = setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &bs, sizeof(bs)); + + if(ret == -1) + error("Failed to set SO_SNDBUF on socket %d", fd); + + return ret; +} + + +// -------------------------------------------------------------------------------------------------------------------- + +char *strdup_client_description(int family, const char *protocol, const char *ip, uint16_t port) { + char buffer[100 + 1]; + + switch(family) { + case AF_INET: + snprintfz(buffer, 100, "%s:%s:%d", protocol, ip, port); + break; + + case AF_INET6: + default: + snprintfz(buffer, 100, "%s:[%s]:%d", protocol, ip, port); + break; + + case AF_UNIX: + snprintfz(buffer, 100, "%s:%s", protocol, ip); + break; + } + + return strdupz(buffer); +} + +// -------------------------------------------------------------------------------------------------------------------- +// listening sockets + +int create_listen_socket_unix(const char *path, int listen_backlog) { + int sock; + + debug(D_LISTENER, "LISTENER: UNIX creating new listening socket on path '%s'", path); + + sock = socket(AF_UNIX, SOCK_STREAM, 0); + if(sock < 0) { + error("LISTENER: UNIX socket() on path '%s' failed.", path); + return -1; + } + + sock_setnonblock(sock); + sock_enlarge_in(sock); + + struct sockaddr_un name; + memset(&name, 0, sizeof(struct sockaddr_un)); + name.sun_family = AF_UNIX; + strncpy(name.sun_path, path, sizeof(name.sun_path)-1); + + errno = 0; + if (unlink(path) == -1 && errno != ENOENT) + error("LISTENER: failed to remove existing (probably obsolete or left-over) file on UNIX socket path '%s'.", path); + + if(bind (sock, (struct sockaddr *) &name, sizeof (name)) < 0) { + close(sock); + error("LISTENER: UNIX bind() on path '%s' failed.", path); + return -1; + } + + // we have to chmod this to 0777 so that the client will be able + // to read from and write to this socket. + if(chmod(path, 0777) == -1) + error("LISTENER: failed to chmod() socket file '%s'.", path); + + if(listen(sock, listen_backlog) < 0) { + close(sock); + error("LISTENER: UNIX listen() on path '%s' failed.", path); + return -1; + } + + debug(D_LISTENER, "LISTENER: Listening on UNIX path '%s'", path); + return sock; +} + +int create_listen_socket4(int socktype, const char *ip, uint16_t port, int listen_backlog) { + int sock; + + debug(D_LISTENER, "LISTENER: IPv4 creating new listening socket on ip '%s' port %d, socktype %d", ip, port, socktype); + + sock = socket(AF_INET, socktype, 0); + if(sock < 0) { + error("LISTENER: IPv4 socket() on ip '%s' port %d, socktype %d failed.", ip, port, socktype); + return -1; + } + + sock_setreuse(sock, 1); + sock_setreuse_port(sock, 1); + sock_setnonblock(sock); + sock_enlarge_in(sock); + + struct sockaddr_in name; + memset(&name, 0, sizeof(struct sockaddr_in)); + name.sin_family = AF_INET; + name.sin_port = htons (port); + + int ret = inet_pton(AF_INET, ip, (void *)&name.sin_addr.s_addr); + if(ret != 1) { + error("LISTENER: Failed to convert IP '%s' to a valid IPv4 address.", ip); + close(sock); + return -1; + } + + if(bind (sock, (struct sockaddr *) &name, sizeof (name)) < 0) { + close(sock); + error("LISTENER: IPv4 bind() on ip '%s' port %d, socktype %d failed.", ip, port, socktype); + return -1; + } + + if(socktype == SOCK_STREAM && listen(sock, listen_backlog) < 0) { + close(sock); + error("LISTENER: IPv4 listen() on ip '%s' port %d, socktype %d failed.", ip, port, socktype); + return -1; + } + + debug(D_LISTENER, "LISTENER: Listening on IPv4 ip '%s' port %d, socktype %d", ip, port, socktype); + return sock; +} + +int create_listen_socket6(int socktype, uint32_t scope_id, const char *ip, int port, int listen_backlog) { + int sock; + int ipv6only = 1; + + debug(D_LISTENER, "LISTENER: IPv6 creating new listening socket on ip '%s' port %d, socktype %d", ip, port, socktype); + + sock = socket(AF_INET6, socktype, 0); + if (sock < 0) { + error("LISTENER: IPv6 socket() on ip '%s' port %d, socktype %d, failed.", ip, port, socktype); + return -1; + } + + sock_setreuse(sock, 1); + sock_setreuse_port(sock, 1); + sock_setnonblock(sock); + sock_enlarge_in(sock); + + /* IPv6 only */ + if(setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY, (void*)&ipv6only, sizeof(ipv6only)) != 0) + error("LISTENER: Cannot set IPV6_V6ONLY on ip '%s' port %d, socktype %d.", ip, port, socktype); + + struct sockaddr_in6 name; + memset(&name, 0, sizeof(struct sockaddr_in6)); + name.sin6_family = AF_INET6; + name.sin6_port = htons ((uint16_t) port); + name.sin6_scope_id = scope_id; + + int ret = inet_pton(AF_INET6, ip, (void *)&name.sin6_addr.s6_addr); + if(ret != 1) { + error("LISTENER: Failed to convert IP '%s' to a valid IPv6 address.", ip); + close(sock); + return -1; + } + + name.sin6_scope_id = scope_id; + + if (bind (sock, (struct sockaddr *) &name, sizeof (name)) < 0) { + close(sock); + error("LISTENER: IPv6 bind() on ip '%s' port %d, socktype %d failed.", ip, port, socktype); + return -1; + } + + if (socktype == SOCK_STREAM && listen(sock, listen_backlog) < 0) { + close(sock); + error("LISTENER: IPv6 listen() on ip '%s' port %d, socktype %d failed.", ip, port, socktype); + return -1; + } + + debug(D_LISTENER, "LISTENER: Listening on IPv6 ip '%s' port %d, socktype %d", ip, port, socktype); + return sock; +} + +static inline int listen_sockets_add(LISTEN_SOCKETS *sockets, int fd, int family, int socktype, const char *protocol, const char *ip, uint16_t port, int acl_flags) { + if(sockets->opened >= MAX_LISTEN_FDS) { + error("LISTENER: Too many listening sockets. Failed to add listening %s socket at ip '%s' port %d, protocol %s, socktype %d", protocol, ip, port, protocol, socktype); + close(fd); + return -1; + } + + sockets->fds[sockets->opened] = fd; + sockets->fds_types[sockets->opened] = socktype; + sockets->fds_families[sockets->opened] = family; + sockets->fds_names[sockets->opened] = strdup_client_description(family, protocol, ip, port); + sockets->fds_acl_flags[sockets->opened] = acl_flags; + + sockets->opened++; + return 0; +} + +int listen_sockets_check_is_member(LISTEN_SOCKETS *sockets, int fd) { + size_t i; + for(i = 0; i < sockets->opened ;i++) + if(sockets->fds[i] == fd) return 1; + + return 0; +} + +static inline void listen_sockets_init(LISTEN_SOCKETS *sockets) { + size_t i; + for(i = 0; i < MAX_LISTEN_FDS ;i++) { + sockets->fds[i] = -1; + sockets->fds_names[i] = NULL; + sockets->fds_types[i] = -1; + } + + sockets->opened = 0; + sockets->failed = 0; +} + +void listen_sockets_close(LISTEN_SOCKETS *sockets) { + size_t i; + for(i = 0; i < sockets->opened ;i++) { + close(sockets->fds[i]); + sockets->fds[i] = -1; + + freez(sockets->fds_names[i]); + sockets->fds_names[i] = NULL; + + sockets->fds_types[i] = -1; + } + + sockets->opened = 0; + sockets->failed = 0; +} + +/* + * SSL ACL + * + * Search the SSL acl and apply it case it is set. + * + * @param acl is the acl given by the user. + */ +WEB_CLIENT_ACL socket_ssl_acl(char *acl) { + char *ssl = strchr(acl,'^'); + if(ssl) { + //Due the format of the SSL command it is always the last command, + //we finish it here to avoid problems with the ACLs + *ssl = '\0'; +#ifdef ENABLE_HTTPS + ssl++; + if (!strncmp("SSL=",ssl,4)) { + ssl += 4; + if (!strcmp(ssl,"optional")) { + return WEB_CLIENT_ACL_SSL_OPTIONAL; + } + else if (!strcmp(ssl,"force")) { + return WEB_CLIENT_ACL_SSL_FORCE; + } + } +#endif + } + + return WEB_CLIENT_ACL_NONE; +} + +WEB_CLIENT_ACL read_acl(char *st) { + WEB_CLIENT_ACL ret = socket_ssl_acl(st); + + if (!strcmp(st,"dashboard")) ret |= WEB_CLIENT_ACL_DASHBOARD; + if (!strcmp(st,"registry")) ret |= WEB_CLIENT_ACL_REGISTRY; + if (!strcmp(st,"badges")) ret |= WEB_CLIENT_ACL_BADGE; + if (!strcmp(st,"management")) ret |= WEB_CLIENT_ACL_MGMT; + if (!strcmp(st,"streaming")) ret |= WEB_CLIENT_ACL_STREAMING; + if (!strcmp(st,"netdata.conf")) ret |= WEB_CLIENT_ACL_NETDATACONF; + + return ret; +} + +static inline int bind_to_this(LISTEN_SOCKETS *sockets, const char *definition, uint16_t default_port, int listen_backlog) { + int added = 0; + WEB_CLIENT_ACL acl_flags = WEB_CLIENT_ACL_NONE; + + struct addrinfo hints; + struct addrinfo *result = NULL, *rp = NULL; + + char buffer[strlen(definition) + 1]; + strcpy(buffer, definition); + + char buffer2[10 + 1]; + snprintfz(buffer2, 10, "%d", default_port); + + char *ip = buffer, *port = buffer2, *interface = "", *portconfig;; + + int protocol = IPPROTO_TCP, socktype = SOCK_STREAM; + const char *protocol_str = "tcp"; + + if(strncmp(ip, "tcp:", 4) == 0) { + ip += 4; + protocol = IPPROTO_TCP; + socktype = SOCK_STREAM; + protocol_str = "tcp"; + } + else if(strncmp(ip, "udp:", 4) == 0) { + ip += 4; + protocol = IPPROTO_UDP; + socktype = SOCK_DGRAM; + protocol_str = "udp"; + } + else if(strncmp(ip, "unix:", 5) == 0) { + char *path = ip + 5; + socktype = SOCK_STREAM; + protocol_str = "unix"; + int fd = create_listen_socket_unix(path, listen_backlog); + if (fd == -1) { + error("LISTENER: Cannot create unix socket '%s'", path); + sockets->failed++; + } else { + acl_flags = WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_REGISTRY | WEB_CLIENT_ACL_BADGE | WEB_CLIENT_ACL_MGMT | WEB_CLIENT_ACL_NETDATACONF | WEB_CLIENT_ACL_STREAMING | WEB_CLIENT_ACL_SSL_DEFAULT; + listen_sockets_add(sockets, fd, AF_UNIX, socktype, protocol_str, path, 0, acl_flags); + added++; + } + return added; + } + + char *e = ip; + if(*e == '[') { + e = ++ip; + while(*e && *e != ']') e++; + if(*e == ']') { + *e = '\0'; + e++; + } + } + else { + while(*e && *e != ':' && *e != '%' && *e != '=') e++; + } + + if(*e == '%') { + *e = '\0'; + e++; + interface = e; + while(*e && *e != ':' && *e != '=') e++; + } + + if(*e == ':') { + port = e + 1; + *e = '\0'; + e++; + while(*e && *e != '=') e++; + } + + if(*e == '=') { + *e='\0'; + e++; + portconfig = e; + while (*e != '\0') { + if (*e == '|') { + *e = '\0'; + acl_flags |= read_acl(portconfig); + e++; + portconfig = e; + continue; + } + e++; + } + acl_flags |= read_acl(portconfig); + } else { + acl_flags = WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_REGISTRY | WEB_CLIENT_ACL_BADGE | WEB_CLIENT_ACL_MGMT | WEB_CLIENT_ACL_NETDATACONF | WEB_CLIENT_ACL_STREAMING | WEB_CLIENT_ACL_SSL_DEFAULT; + } + + //Case the user does not set the option SSL in the "bind to", but he has + //the certificates, I must redirect, so I am assuming here the default option + if(!(acl_flags & WEB_CLIENT_ACL_SSL_OPTIONAL) && !(acl_flags & WEB_CLIENT_ACL_SSL_FORCE)) { + acl_flags |= WEB_CLIENT_ACL_SSL_DEFAULT; + } + + uint32_t scope_id = 0; + if(*interface) { + scope_id = if_nametoindex(interface); + if(!scope_id) + error("LISTENER: Cannot find a network interface named '%s'. Continuing with limiting the network interface", interface); + } + + if(!*ip || *ip == '*' || !strcmp(ip, "any") || !strcmp(ip, "all")) + ip = NULL; + + if(!*port) + port = buffer2; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_UNSPEC; /* Allow IPv4 or IPv6 */ + hints.ai_socktype = socktype; + hints.ai_flags = AI_PASSIVE; /* For wildcard IP address */ + hints.ai_protocol = protocol; + hints.ai_canonname = NULL; + hints.ai_addr = NULL; + hints.ai_next = NULL; + + int r = getaddrinfo(ip, port, &hints, &result); + if (r != 0) { + error("LISTENER: getaddrinfo('%s', '%s'): %s\n", ip, port, gai_strerror(r)); + return -1; + } + + for (rp = result; rp != NULL; rp = rp->ai_next) { + int fd = -1; + int family; + + char rip[INET_ADDRSTRLEN + INET6_ADDRSTRLEN] = "INVALID"; + uint16_t rport = default_port; + + family = rp->ai_addr->sa_family; + switch (family) { + case AF_INET: { + struct sockaddr_in *sin = (struct sockaddr_in *) rp->ai_addr; + inet_ntop(AF_INET, &sin->sin_addr, rip, INET_ADDRSTRLEN); + rport = ntohs(sin->sin_port); + // info("Attempting to listen on IPv4 '%s' ('%s'), port %d ('%s'), socktype %d", rip, ip, rport, port, socktype); + fd = create_listen_socket4(socktype, rip, rport, listen_backlog); + break; + } + + case AF_INET6: { + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) rp->ai_addr; + inet_ntop(AF_INET6, &sin6->sin6_addr, rip, INET6_ADDRSTRLEN); + rport = ntohs(sin6->sin6_port); + // info("Attempting to listen on IPv6 '%s' ('%s'), port %d ('%s'), socktype %d", rip, ip, rport, port, socktype); + fd = create_listen_socket6(socktype, scope_id, rip, rport, listen_backlog); + break; + } + + default: + debug(D_LISTENER, "LISTENER: Unknown socket family %d", family); + break; + } + + if (fd == -1) { + error("LISTENER: Cannot bind to ip '%s', port %d", rip, rport); + sockets->failed++; + } + else { + listen_sockets_add(sockets, fd, family, socktype, protocol_str, rip, rport, acl_flags); + added++; + } + } + + freeaddrinfo(result); + + return added; +} + +int listen_sockets_setup(LISTEN_SOCKETS *sockets) { + listen_sockets_init(sockets); + + sockets->backlog = (int) appconfig_get_number(sockets->config, sockets->config_section, "listen backlog", sockets->backlog); + + long long int old_port = sockets->default_port; + long long int new_port = appconfig_get_number(sockets->config, sockets->config_section, "default port", sockets->default_port); + if(new_port < 1 || new_port > 65535) { + error("LISTENER: Invalid listen port %lld given. Defaulting to %lld.", new_port, old_port); + sockets->default_port = (uint16_t) appconfig_set_number(sockets->config, sockets->config_section, "default port", old_port); + } + else sockets->default_port = (uint16_t)new_port; + + debug(D_OPTIONS, "LISTENER: Default listen port set to %d.", sockets->default_port); + + char *s = appconfig_get(sockets->config, sockets->config_section, "bind to", sockets->default_bind_to); + while(*s) { + char *e = s; + + // skip separators, moving both s(tart) and e(nd) + while(isspace(*e) || *e == ',') s = ++e; + + // move e(nd) to the first separator + while(*e && !isspace(*e) && *e != ',') e++; + + // is there anything? + if(!*s || s == e) break; + + char buf[e - s + 1]; + strncpyz(buf, s, e - s); + bind_to_this(sockets, buf, sockets->default_port, sockets->backlog); + + s = e; + } + + if(sockets->failed) { + size_t i; + for(i = 0; i < sockets->opened ;i++) + info("LISTENER: Listen socket %s opened successfully.", sockets->fds_names[i]); + } + + return (int)sockets->opened; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// connect to another host/port + +// connect_to_this_unix() +// path the path of the unix socket +// timeout the timeout for establishing a connection + +static inline int connect_to_unix(const char *path, struct timeval *timeout) { + int fd = socket(AF_UNIX, SOCK_STREAM, 0); + if(fd == -1) { + error("Failed to create UNIX socket() for '%s'", path); + return -1; + } + + if(timeout) { + if(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (char *) timeout, sizeof(struct timeval)) < 0) + error("Failed to set timeout on UNIX socket '%s'", path); + } + + struct sockaddr_un addr; + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + strncpy(addr.sun_path, path, sizeof(addr.sun_path)-1); + + if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == -1) { + error("Cannot connect to UNIX socket on path '%s'.", path); + close(fd); + return -1; + } + + debug(D_CONNECT_TO, "Connected to UNIX socket on path '%s'.", path); + + return fd; +} + +// connect_to_this_ip46() +// protocol IPPROTO_TCP, IPPROTO_UDP +// socktype SOCK_STREAM, SOCK_DGRAM +// host the destination hostname or IP address (IPv4 or IPv6) to connect to +// if it resolves to many IPs, all are tried (IPv4 and IPv6) +// scope_id the if_index id of the interface to use for connecting (0 = any) +// (used only under IPv6) +// service the service name or port to connect to +// timeout the timeout for establishing a connection + +int connect_to_this_ip46(int protocol, int socktype, const char *host, uint32_t scope_id, const char *service, struct timeval *timeout) { + struct addrinfo hints; + struct addrinfo *ai_head = NULL, *ai = NULL; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = PF_UNSPEC; /* Allow IPv4 or IPv6 */ + hints.ai_socktype = socktype; + hints.ai_protocol = protocol; + + int ai_err = getaddrinfo(host, service, &hints, &ai_head); + if (ai_err != 0) { + error("Cannot resolve host '%s', port '%s': %s", host, service, gai_strerror(ai_err)); + return -1; + } + + int fd = -1; + for (ai = ai_head; ai != NULL && fd == -1; ai = ai->ai_next) { + + if (ai->ai_family == PF_INET6) { + struct sockaddr_in6 *pSadrIn6 = (struct sockaddr_in6 *) ai->ai_addr; + if(pSadrIn6->sin6_scope_id == 0) { + pSadrIn6->sin6_scope_id = scope_id; + } + } + + char hostBfr[NI_MAXHOST + 1]; + char servBfr[NI_MAXSERV + 1]; + + getnameinfo(ai->ai_addr, + ai->ai_addrlen, + hostBfr, + sizeof(hostBfr), + servBfr, + sizeof(servBfr), + NI_NUMERICHOST | NI_NUMERICSERV); + + debug(D_CONNECT_TO, "Address info: host = '%s', service = '%s', ai_flags = 0x%02X, ai_family = %d (PF_INET = %d, PF_INET6 = %d), ai_socktype = %d (SOCK_STREAM = %d, SOCK_DGRAM = %d), ai_protocol = %d (IPPROTO_TCP = %d, IPPROTO_UDP = %d), ai_addrlen = %lu (sockaddr_in = %lu, sockaddr_in6 = %lu)", + hostBfr, + servBfr, + (unsigned int)ai->ai_flags, + ai->ai_family, + PF_INET, + PF_INET6, + ai->ai_socktype, + SOCK_STREAM, + SOCK_DGRAM, + ai->ai_protocol, + IPPROTO_TCP, + IPPROTO_UDP, + (unsigned long)ai->ai_addrlen, + (unsigned long)sizeof(struct sockaddr_in), + (unsigned long)sizeof(struct sockaddr_in6)); + + switch (ai->ai_addr->sa_family) { + case PF_INET: { + struct sockaddr_in *pSadrIn = (struct sockaddr_in *)ai->ai_addr; + (void)pSadrIn; + + debug(D_CONNECT_TO, "ai_addr = sin_family: %d (AF_INET = %d, AF_INET6 = %d), sin_addr: '%s', sin_port: '%s'", + pSadrIn->sin_family, + AF_INET, + AF_INET6, + hostBfr, + servBfr); + break; + } + + case PF_INET6: { + struct sockaddr_in6 *pSadrIn6 = (struct sockaddr_in6 *) ai->ai_addr; + (void)pSadrIn6; + + debug(D_CONNECT_TO,"ai_addr = sin6_family: %d (AF_INET = %d, AF_INET6 = %d), sin6_addr: '%s', sin6_port: '%s', sin6_flowinfo: %u, sin6_scope_id: %u", + pSadrIn6->sin6_family, + AF_INET, + AF_INET6, + hostBfr, + servBfr, + pSadrIn6->sin6_flowinfo, + pSadrIn6->sin6_scope_id); + break; + } + + default: { + debug(D_CONNECT_TO, "Unknown protocol family %d.", ai->ai_family); + continue; + } + } + + fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + if(fd != -1) { + if(timeout) { + if(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (char *) timeout, sizeof(struct timeval)) < 0) + error("Failed to set timeout on the socket to ip '%s' port '%s'", hostBfr, servBfr); + } + + errno = 0; + if(connect(fd, ai->ai_addr, ai->ai_addrlen) < 0) { + if(errno == EALREADY || errno == EINPROGRESS) { + info("Waiting for connection to ip %s port %s to be established", hostBfr, servBfr); + + fd_set fds; + FD_ZERO(&fds); + FD_SET(0, &fds); + int rc = select (1, NULL, &fds, NULL, timeout); + + if(rc > 0 && FD_ISSET(fd, &fds)) { + info("connect() to ip %s port %s completed successfully", hostBfr, servBfr); + } + else if(rc == -1) { + error("Failed to connect to '%s', port '%s'. select() returned %d", hostBfr, servBfr, rc); + close(fd); + fd = -1; + } + else { + error("Timed out while connecting to '%s', port '%s'. select() returned %d", hostBfr, servBfr, rc); + close(fd); + fd = -1; + } + } + else { + error("Failed to connect to '%s', port '%s'", hostBfr, servBfr); + close(fd); + fd = -1; + } + } + + if(fd != -1) + debug(D_CONNECT_TO, "Connected to '%s' on port '%s'.", hostBfr, servBfr); + } + } + + freeaddrinfo(ai_head); + + return fd; +} + +// connect_to_this() +// +// definition format: +// +// [PROTOCOL:]IP[%INTERFACE][:PORT] +// +// PROTOCOL = tcp or udp +// IP = IPv4 or IPv6 IP or hostname, optionally enclosed in [] (required for IPv6) +// INTERFACE = for IPv6 only, the network interface to use +// PORT = port number or service name + +int connect_to_this(const char *definition, int default_port, struct timeval *timeout) { + char buffer[strlen(definition) + 1]; + strcpy(buffer, definition); + + char default_service[10 + 1]; + snprintfz(default_service, 10, "%d", default_port); + + char *host = buffer, *service = default_service, *interface = ""; + int protocol = IPPROTO_TCP, socktype = SOCK_STREAM; + uint32_t scope_id = 0; + + if(strncmp(host, "tcp:", 4) == 0) { + host += 4; + protocol = IPPROTO_TCP; + socktype = SOCK_STREAM; + } + else if(strncmp(host, "udp:", 4) == 0) { + host += 4; + protocol = IPPROTO_UDP; + socktype = SOCK_DGRAM; + } + else if(strncmp(host, "unix:", 5) == 0) { + char *path = host + 5; + return connect_to_unix(path, timeout); + } + else if(*host == '/') { + char *path = host; + return connect_to_unix(path, timeout); + } + + char *e = host; + if(*e == '[') { + e = ++host; + while(*e && *e != ']') e++; + if(*e == ']') { + *e = '\0'; + e++; + } + } + else { + while(*e && *e != ':' && *e != '%') e++; + } + + if(*e == '%') { + *e = '\0'; + e++; + interface = e; + while(*e && *e != ':') e++; + } + + if(*e == ':') { + *e = '\0'; + e++; + service = e; + } + + debug(D_CONNECT_TO, "Attempting connection to host = '%s', service = '%s', interface = '%s', protocol = %d (tcp = %d, udp = %d)", host, service, interface, protocol, IPPROTO_TCP, IPPROTO_UDP); + + if(!*host) { + error("Definition '%s' does not specify a host.", definition); + return -1; + } + + if(*interface) { + scope_id = if_nametoindex(interface); + if(!scope_id) + error("Cannot find a network interface named '%s'. Continuing with limiting the network interface", interface); + } + + if(!*service) + service = default_service; + + + return connect_to_this_ip46(protocol, socktype, host, scope_id, service, timeout); +} + +void foreach_entry_in_connection_string(const char *destination, bool (*callback)(char *entry, void *data), void *data) { + const char *s = destination; + while(*s) { + const char *e = s; + + // skip separators, moving both s(tart) and e(nd) + while(isspace(*e) || *e == ',') s = ++e; + + // move e(nd) to the first separator + while(*e && !isspace(*e) && *e != ',') e++; + + // is there anything? + if(!*s || s == e) break; + + char buf[e - s + 1]; + strncpyz(buf, s, e - s); + + if(callback(buf, data)) break; + + s = e; + } +} + +struct connect_to_one_of_data { + int default_port; + struct timeval *timeout; + size_t *reconnects_counter; + char *connected_to; + size_t connected_to_size; + int sock; +}; + +static bool connect_to_one_of_callback(char *entry, void *data) { + struct connect_to_one_of_data *t = data; + + if(t->reconnects_counter) + t->reconnects_counter++; + + t->sock = connect_to_this(entry, t->default_port, t->timeout); + if(t->sock != -1) { + if(t->connected_to && t->connected_to_size) { + strncpyz(t->connected_to, entry, t->connected_to_size); + t->connected_to[t->connected_to_size - 1] = '\0'; + } + + return true; + } + + return false; +} + +int connect_to_one_of(const char *destination, int default_port, struct timeval *timeout, size_t *reconnects_counter, char *connected_to, size_t connected_to_size) { + struct connect_to_one_of_data t = { + .default_port = default_port, + .timeout = timeout, + .reconnects_counter = reconnects_counter, + .connected_to = connected_to, + .connected_to_size = connected_to_size, + .sock = -1, + }; + + foreach_entry_in_connection_string(destination, connect_to_one_of_callback, &t); + + return t.sock; +} + +static bool connect_to_one_of_urls_callback(char *entry, void *data) { + char *s = strchr(entry, '/'); + if(s) *s = '\0'; + + return connect_to_one_of_callback(entry, data); +} + +int connect_to_one_of_urls(const char *destination, int default_port, struct timeval *timeout, size_t *reconnects_counter, char *connected_to, size_t connected_to_size) { + struct connect_to_one_of_data t = { + .default_port = default_port, + .timeout = timeout, + .reconnects_counter = reconnects_counter, + .connected_to = connected_to, + .connected_to_size = connected_to_size, + .sock = -1, + }; + + foreach_entry_in_connection_string(destination, connect_to_one_of_urls_callback, &t); + + return t.sock; +} + + +#ifdef ENABLE_HTTPS +ssize_t netdata_ssl_read(SSL *ssl, void *buf, size_t num) { + error_limit_static_thread_var(erl, 1, 0); + + int bytes, err, retries = 0; + + //do { + bytes = SSL_read(ssl, buf, (int)num); + err = SSL_get_error(ssl, bytes); + retries++; + //} while (bytes <= 0 && (err == SSL_ERROR_WANT_READ)); + + if(unlikely(bytes <= 0)) + error("SSL_read() returned %d bytes, SSL error %d", bytes, err); + + if(retries > 1) + error_limit(&erl, "SSL_read() retried %d times", retries); + + return bytes; +} + +ssize_t netdata_ssl_write(SSL *ssl, const void *buf, size_t num) { + error_limit_static_thread_var(erl, 1, 0); + + int bytes, err, retries = 0; + size_t total = 0; + + //do { + bytes = SSL_write(ssl, (uint8_t *)buf + total, (int)(num - total)); + err = SSL_get_error(ssl, bytes); + retries++; + + if(bytes > 0) + total += bytes; + + //} while ((bytes <= 0 && (err == SSL_ERROR_WANT_WRITE)) || (bytes > 0 && total < num)); + + if(unlikely(bytes <= 0)) + error("SSL_write() returned %d bytes, SSL error %d", bytes, err); + + if(retries > 1) + error_limit(&erl, "SSL_write() retried %d times", retries); + + return bytes; +} +#endif + +// -------------------------------------------------------------------------------------------------------------------- +// helpers to send/receive data in one call, in blocking mode, with a timeout + +#ifdef ENABLE_HTTPS +ssize_t recv_timeout(struct netdata_ssl *ssl,int sockfd, void *buf, size_t len, int flags, int timeout) { +#else +ssize_t recv_timeout(int sockfd, void *buf, size_t len, int flags, int timeout) { +#endif + + for(;;) { + struct pollfd fd = { + .fd = sockfd, + .events = POLLIN, + .revents = 0 + }; + + errno = 0; + int retval = poll(&fd, 1, timeout * 1000); + + if(retval == -1) { + // failed + + if(errno == EINTR || errno == EAGAIN) + continue; + + return -1; + } + + if(!retval) { + // timeout + return 0; + } + + if(fd.events & POLLIN) break; + } + +#ifdef ENABLE_HTTPS + if (ssl->conn && ssl->flags == NETDATA_SSL_HANDSHAKE_COMPLETE) + return netdata_ssl_read(ssl->conn, buf, len); +#endif + + return recv(sockfd, buf, len, flags); +} + +#ifdef ENABLE_HTTPS +ssize_t send_timeout(struct netdata_ssl *ssl,int sockfd, void *buf, size_t len, int flags, int timeout) { +#else +ssize_t send_timeout(int sockfd, void *buf, size_t len, int flags, int timeout) { +#endif + + for(;;) { + struct pollfd fd = { + .fd = sockfd, + .events = POLLOUT, + .revents = 0 + }; + + errno = 0; + int retval = poll(&fd, 1, timeout * 1000); + + if(retval == -1) { + // failed + + if(errno == EINTR || errno == EAGAIN) + continue; + + return -1; + } + + if(!retval) { + // timeout + return 0; + } + + if(fd.events & POLLOUT) break; + } + +#ifdef ENABLE_HTTPS + if(ssl->conn) { + if (ssl->flags == NETDATA_SSL_HANDSHAKE_COMPLETE) { + return netdata_ssl_write(ssl->conn, buf, len); + } + else { + error("cannot write to SSL connection - connection is not ready."); + return -1; + } + } +#endif + return send(sockfd, buf, len, flags); +} + + +// -------------------------------------------------------------------------------------------------------------------- +// accept4() replacement for systems that do not have one + +#ifndef HAVE_ACCEPT4 +int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flags) { + int fd = accept(sock, addr, addrlen); + int newflags = 0; + + if (fd < 0) return fd; + + if (flags & SOCK_NONBLOCK) { + newflags |= O_NONBLOCK; + flags &= ~SOCK_NONBLOCK; + } + +#ifdef SOCK_CLOEXEC +#ifdef O_CLOEXEC + if (flags & SOCK_CLOEXEC) { + newflags |= O_CLOEXEC; + flags &= ~SOCK_CLOEXEC; + } +#endif +#endif + + if (flags) { + close(fd); + errno = EINVAL; + return -1; + } + + if (fcntl(fd, F_SETFL, newflags) < 0) { + int saved_errno = errno; + close(fd); + errno = saved_errno; + return -1; + } + + return fd; +} +#endif + +/* + * --------------------------------------------------------------------------------------------------------------------- + * connection_allowed() - if there is an access list then check the connection matches a pattern. + * Numeric patterns are checked against the IP address first, only if they + * do not match is the hostname resolved (reverse-DNS) and checked. If the + * hostname matches then we perform forward DNS resolution to check the IP + * is really associated with the DNS record. This call is repeatable: the + * web server may check more refined matches against the connection. Will + * update the client_host if uninitialized - ensure the hostsize is the number + * of *writable* bytes (i.e. be aware of the strdup used to compact the pollinfo). + */ +extern int connection_allowed(int fd, char *client_ip, char *client_host, size_t hostsize, SIMPLE_PATTERN *access_list, + const char *patname, int allow_dns) +{ + debug(D_LISTENER,"checking %s... (allow_dns=%d)", patname, allow_dns); + if (!access_list) + return 1; + if (simple_pattern_matches(access_list, client_ip)) + return 1; + // If the hostname is unresolved (and needed) then attempt the DNS lookups. + //if (client_host[0]==0 && simple_pattern_is_potential_name(access_list)) + if (client_host[0]==0 && allow_dns) + { + struct sockaddr_storage sadr; + socklen_t addrlen = sizeof(sadr); + int err = getpeername(fd, (struct sockaddr*)&sadr, &addrlen); + if (err != 0 || + (err = getnameinfo((struct sockaddr *)&sadr, addrlen, client_host, (socklen_t)hostsize, + NULL, 0, NI_NAMEREQD)) != 0) { + error("Incoming %s on '%s' does not match a numeric pattern, and host could not be resolved (err=%s)", + patname, client_ip, gai_strerror(err)); + if (hostsize >= 8) + strcpy(client_host,"UNKNOWN"); + return 0; + } + struct addrinfo *addr_infos = NULL; + if (getaddrinfo(client_host, NULL, NULL, &addr_infos) !=0 ) { + error("LISTENER: cannot validate hostname '%s' from '%s' by resolving it", + client_host, client_ip); + if (hostsize >= 8) + strcpy(client_host,"UNKNOWN"); + return 0; + } + struct addrinfo *scan = addr_infos; + int validated = 0; + while (scan) { + char address[INET6_ADDRSTRLEN]; + address[0] = 0; + switch (scan->ai_addr->sa_family) { + case AF_INET: + inet_ntop(AF_INET, &((struct sockaddr_in*)(scan->ai_addr))->sin_addr, address, INET6_ADDRSTRLEN); + break; + case AF_INET6: + inet_ntop(AF_INET6, &((struct sockaddr_in6*)(scan->ai_addr))->sin6_addr, address, INET6_ADDRSTRLEN); + break; + } + debug(D_LISTENER, "Incoming ip %s rev-resolved onto %s, validating against forward-resolution %s", + client_ip, client_host, address); + if (!strcmp(client_ip, address)) { + validated = 1; + break; + } + scan = scan->ai_next; + } + if (!validated) { + error("LISTENER: Cannot validate '%s' as ip of '%s', not listed in DNS", client_ip, client_host); + if (hostsize >= 8) + strcpy(client_host,"UNKNOWN"); + } + if (addr_infos!=NULL) + freeaddrinfo(addr_infos); + } + if (!simple_pattern_matches(access_list, client_host)) { + debug(D_LISTENER, "Incoming connection on '%s' (%s) does not match allowed pattern for %s", + client_ip, client_host, patname); + return 0; + } + return 1; +} + +// -------------------------------------------------------------------------------------------------------------------- +// accept_socket() - accept a socket and store client IP and port +int accept_socket(int fd, int flags, char *client_ip, size_t ipsize, char *client_port, size_t portsize, + char *client_host, size_t hostsize, SIMPLE_PATTERN *access_list, int allow_dns) { + struct sockaddr_storage sadr; + socklen_t addrlen = sizeof(sadr); + + int nfd = accept4(fd, (struct sockaddr *)&sadr, &addrlen, flags); + if (likely(nfd >= 0)) { + if (getnameinfo((struct sockaddr *)&sadr, addrlen, client_ip, (socklen_t)ipsize, + client_port, (socklen_t)portsize, NI_NUMERICHOST | NI_NUMERICSERV) != 0) { + error("LISTENER: cannot getnameinfo() on received client connection."); + strncpyz(client_ip, "UNKNOWN", ipsize); + strncpyz(client_port, "UNKNOWN", portsize); + } + if (!strcmp(client_ip, "127.0.0.1") || !strcmp(client_ip, "::1")) { + strncpyz(client_ip, "localhost", ipsize); + } + +#ifdef __FreeBSD__ + if(((struct sockaddr *)&sadr)->sa_family == AF_LOCAL) + strncpyz(client_ip, "localhost", ipsize); +#endif + + client_ip[ipsize - 1] = '\0'; + client_port[portsize - 1] = '\0'; + + switch (((struct sockaddr *)&sadr)->sa_family) { + case AF_UNIX: + debug(D_LISTENER, "New UNIX domain web client from %s on socket %d.", client_ip, fd); + // set the port - certain versions of libc return garbage on unix sockets + strncpyz(client_port, "UNIX", portsize); + break; + + case AF_INET: + debug(D_LISTENER, "New IPv4 web client from %s port %s on socket %d.", client_ip, client_port, fd); + break; + + case AF_INET6: + if (strncmp(client_ip, "::ffff:", 7) == 0) { + memmove(client_ip, &client_ip[7], strlen(&client_ip[7]) + 1); + debug(D_LISTENER, "New IPv4 web client from %s port %s on socket %d.", client_ip, client_port, fd); + } + else + debug(D_LISTENER, "New IPv6 web client from %s port %s on socket %d.", client_ip, client_port, fd); + break; + + default: + debug(D_LISTENER, "New UNKNOWN web client from %s port %s on socket %d.", client_ip, client_port, fd); + break; + } + if (!connection_allowed(nfd, client_ip, client_host, hostsize, access_list, "connection", allow_dns)) { + errno = 0; + error("Permission denied for client '%s', port '%s'", client_ip, client_port); + close(nfd); + nfd = -1; + errno = EPERM; + } + } +#ifdef HAVE_ACCEPT4 + else if (errno == ENOSYS) + error("netdata has been compiled with the assumption that the system has the accept4() call, but it is not here. Recompile netdata like this: ./configure --disable-accept4 ..."); +#endif + + return nfd; +} + + +// -------------------------------------------------------------------------------------------------------------------- +// poll() based listener +// this should be the fastest possible listener for up to 100 sockets +// above 100, an epoll() interface is needed on Linux + +#define POLL_FDS_INCREASE_STEP 10 + +inline POLLINFO *poll_add_fd(POLLJOB *p + , int fd + , int socktype + , WEB_CLIENT_ACL port_acl + , uint32_t flags + , const char *client_ip + , const char *client_port + , const char *client_host + , void *(*add_callback)(POLLINFO * /*pi*/, short int * /*events*/, void * /*data*/) + , void (*del_callback)(POLLINFO * /*pi*/) + , int (*rcv_callback)(POLLINFO * /*pi*/, short int * /*events*/) + , int (*snd_callback)(POLLINFO * /*pi*/, short int * /*events*/) + , void *data +) { + debug(D_POLLFD, "POLLFD: ADD: request to add fd %d, slots = %zu, used = %zu, min = %zu, max = %zu, next free = %zd", fd, p->slots, p->used, p->min, p->max, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); + + if(unlikely(fd < 0)) return NULL; + + //if(p->limit && p->used >= p->limit) { + // info("Max sockets limit reached (%zu sockets), dropping connection", p->used); + // close(fd); + // return NULL; + //} + + if(unlikely(!p->first_free)) { + size_t new_slots = p->slots + POLL_FDS_INCREASE_STEP; + debug(D_POLLFD, "POLLFD: ADD: increasing size (current = %zu, new = %zu, used = %zu, min = %zu, max = %zu)", p->slots, new_slots, p->used, p->min, p->max); + + p->fds = reallocz(p->fds, sizeof(struct pollfd) * new_slots); + p->inf = reallocz(p->inf, sizeof(POLLINFO) * new_slots); + + // reset all the newly added slots + ssize_t i; + for(i = new_slots - 1; i >= (ssize_t)p->slots ; i--) { + debug(D_POLLFD, "POLLFD: ADD: resetting new slot %zd", i); + p->fds[i].fd = -1; + p->fds[i].events = 0; + p->fds[i].revents = 0; + + p->inf[i].p = p; + p->inf[i].slot = (size_t)i; + p->inf[i].flags = 0; + p->inf[i].socktype = -1; + p->inf[i].port_acl = -1; + + p->inf[i].client_ip = NULL; + p->inf[i].client_port = NULL; + p->inf[i].client_host = NULL; + p->inf[i].del_callback = p->del_callback; + p->inf[i].rcv_callback = p->rcv_callback; + p->inf[i].snd_callback = p->snd_callback; + p->inf[i].data = NULL; + + // link them so that the first free will be earlier in the array + // (we loop decrementing i) + p->inf[i].next = p->first_free; + p->first_free = &p->inf[i]; + } + + p->slots = new_slots; + } + + POLLINFO *pi = p->first_free; + p->first_free = p->first_free->next; + + debug(D_POLLFD, "POLLFD: ADD: selected slot %zu, next free is %zd", pi->slot, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); + + struct pollfd *pf = &p->fds[pi->slot]; + pf->fd = fd; + pf->events = POLLIN; + pf->revents = 0; + + pi->fd = fd; + pi->p = p; + pi->socktype = socktype; + pi->port_acl = port_acl; + pi->flags = flags; + pi->next = NULL; + pi->client_ip = strdupz(client_ip); + pi->client_port = strdupz(client_port); + pi->client_host = strdupz(client_host); + + pi->del_callback = del_callback; + pi->rcv_callback = rcv_callback; + pi->snd_callback = snd_callback; + + pi->connected_t = now_boottime_sec(); + pi->last_received_t = 0; + pi->last_sent_t = 0; + pi->last_sent_t = 0; + pi->recv_count = 0; + pi->send_count = 0; + + netdata_thread_disable_cancelability(); + p->used++; + if(unlikely(pi->slot > p->max)) + p->max = pi->slot; + + if(pi->flags & POLLINFO_FLAG_CLIENT_SOCKET) { + pi->data = add_callback(pi, &pf->events, data); + } + + if(pi->flags & POLLINFO_FLAG_SERVER_SOCKET) { + p->min = pi->slot; + } + netdata_thread_enable_cancelability(); + + debug(D_POLLFD, "POLLFD: ADD: completed, slots = %zu, used = %zu, min = %zu, max = %zu, next free = %zd", p->slots, p->used, p->min, p->max, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); + + return pi; +} + +inline void poll_close_fd(POLLINFO *pi) { + POLLJOB *p = pi->p; + + struct pollfd *pf = &p->fds[pi->slot]; + debug(D_POLLFD, "POLLFD: DEL: request to clear slot %zu (fd %d), old next free was %zd", pi->slot, pf->fd, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); + + if(unlikely(pf->fd == -1)) return; + + netdata_thread_disable_cancelability(); + + if(pi->flags & POLLINFO_FLAG_CLIENT_SOCKET) { + pi->del_callback(pi); + + if(likely(!(pi->flags & POLLINFO_FLAG_DONT_CLOSE))) { + if(close(pf->fd) == -1) + error("Failed to close() poll_events() socket %d", pf->fd); + } + } + + pf->fd = -1; + pf->events = 0; + pf->revents = 0; + + pi->fd = -1; + pi->socktype = -1; + pi->flags = 0; + pi->data = NULL; + + pi->del_callback = NULL; + pi->rcv_callback = NULL; + pi->snd_callback = NULL; + + freez(pi->client_ip); + pi->client_ip = NULL; + + freez(pi->client_port); + pi->client_port = NULL; + + freez(pi->client_host); + pi->client_host = NULL; + + pi->next = p->first_free; + p->first_free = pi; + + p->used--; + if(unlikely(p->max == pi->slot)) { + p->max = p->min; + ssize_t i; + for(i = (ssize_t)pi->slot; i > (ssize_t)p->min ;i--) { + if (unlikely(p->fds[i].fd != -1)) { + p->max = (size_t)i; + break; + } + } + } + netdata_thread_enable_cancelability(); + + debug(D_POLLFD, "POLLFD: DEL: completed, slots = %zu, used = %zu, min = %zu, max = %zu, next free = %zd", p->slots, p->used, p->min, p->max, p->first_free?(ssize_t)p->first_free->slot:(ssize_t)-1); +} + +void *poll_default_add_callback(POLLINFO *pi, short int *events, void *data) { + (void)pi; + (void)events; + (void)data; + + // error("POLLFD: internal error: poll_default_add_callback() called"); + + return NULL; +} + +void poll_default_del_callback(POLLINFO *pi) { + if(pi->data) + error("POLLFD: internal error: del_callback_default() called with data pointer - possible memory leak"); +} + +int poll_default_rcv_callback(POLLINFO *pi, short int *events) { + *events |= POLLIN; + + char buffer[1024 + 1]; + + ssize_t rc; + do { + rc = recv(pi->fd, buffer, 1024, MSG_DONTWAIT); + if (rc < 0) { + // read failed + if (errno != EWOULDBLOCK && errno != EAGAIN) { + error("POLLFD: poll_default_rcv_callback(): recv() failed with %zd.", rc); + return -1; + } + } else if (rc) { + // data received + info("POLLFD: internal error: poll_default_rcv_callback() is discarding %zd bytes received on socket %d", rc, pi->fd); + } + } while (rc != -1); + + return 0; +} + +int poll_default_snd_callback(POLLINFO *pi, short int *events) { + *events &= ~POLLOUT; + + info("POLLFD: internal error: poll_default_snd_callback(): nothing to send on socket %d", pi->fd); + return 0; +} + +void poll_default_tmr_callback(void *timer_data) { + (void)timer_data; +} + +static void poll_events_cleanup(void *data) { + POLLJOB *p = (POLLJOB *)data; + + size_t i; + for(i = 0 ; i <= p->max ; i++) { + POLLINFO *pi = &p->inf[i]; + poll_close_fd(pi); + } + + freez(p->fds); + freez(p->inf); +} + +static int poll_process_error(POLLINFO *pi, struct pollfd *pf, short int revents) { + error("POLLFD: LISTENER: received %s %s %s on socket at slot %zu (fd %d) client '%s' port '%s' expecting %s %s %s, having %s %s %s" + , revents & POLLERR ? "POLLERR" : "" + , revents & POLLHUP ? "POLLHUP" : "" + , revents & POLLNVAL ? "POLLNVAL" : "" + , pi->slot + , pi->fd + , pi->client_ip ? pi->client_ip : "" + , pi->client_port ? pi->client_port : "" + , pf->events & POLLIN ? "POLLIN" : "", pf->events & POLLOUT ? "POLLOUT" : "", pf->events & POLLPRI ? "POLLPRI" : "" + , revents & POLLIN ? "POLLIN" : "", revents & POLLOUT ? "POLLOUT" : "", revents & POLLPRI ? "POLLPRI" : "" + ); + + pf->events = 0; + poll_close_fd(pi); + return 1; +} + +static inline int poll_process_send(POLLJOB *p, POLLINFO *pi, struct pollfd *pf, time_t now) { + pi->last_sent_t = now; + pi->send_count++; + + debug(D_POLLFD, "POLLFD: LISTENER: sending data to socket on slot %zu (fd %d)", pi->slot, pf->fd); + + pf->events = 0; + + // remember the slot, in case we need to close it later + // the callback may manipulate the socket list and our pf and pi pointers may be invalid after that call + size_t slot = pi->slot; + + if (unlikely(pi->snd_callback(pi, &pf->events) == -1)) + poll_close_fd(&p->inf[slot]); + + // IMPORTANT: + // pf and pi may be invalid below this point, they may have been reallocated. + + return 1; +} + +static inline int poll_process_tcp_read(POLLJOB *p, POLLINFO *pi, struct pollfd *pf, time_t now) { + pi->last_received_t = now; + pi->recv_count++; + + debug(D_POLLFD, "POLLFD: LISTENER: reading data from TCP client slot %zu (fd %d)", pi->slot, pf->fd); + + pf->events = 0; + + // remember the slot, in case we need to close it later + // the callback may manipulate the socket list and our pf and pi pointers may be invalid after that call + size_t slot = pi->slot; + + if (pi->rcv_callback(pi, &pf->events) == -1) + poll_close_fd(&p->inf[slot]); + + // IMPORTANT: + // pf and pi may be invalid below this point, they may have been reallocated. + + return 1; +} + +static inline int poll_process_udp_read(POLLINFO *pi, struct pollfd *pf, time_t now __maybe_unused) { + pi->last_received_t = now; + pi->recv_count++; + + debug(D_POLLFD, "POLLFD: LISTENER: reading data from UDP slot %zu (fd %d)", pi->slot, pf->fd); + + // TODO: access_list is not applied to UDP + // but checking the access list on every UDP packet will destroy + // performance, especially for statsd. + + pf->events = 0; + if(pi->rcv_callback(pi, &pf->events) == -1) + return 0; + + // IMPORTANT: + // pf and pi may be invalid below this point, they may have been reallocated. + + return 1; +} + +static int poll_process_new_tcp_connection(POLLJOB *p, POLLINFO *pi, struct pollfd *pf, time_t now) { + pi->last_received_t = now; + pi->recv_count++; + + debug(D_POLLFD, "POLLFD: LISTENER: accepting connections from slot %zu (fd %d)", pi->slot, pf->fd); + + char client_ip[INET6_ADDRSTRLEN] = ""; + char client_port[NI_MAXSERV] = ""; + char client_host[NI_MAXHOST] = ""; + + debug(D_POLLFD, "POLLFD: LISTENER: calling accept4() slot %zu (fd %d)", pi->slot, pf->fd); + + int nfd = accept_socket( + pf->fd,SOCK_NONBLOCK, + client_ip, INET6_ADDRSTRLEN, client_port,NI_MAXSERV, client_host, NI_MAXHOST, + p->access_list, p->allow_dns + ); + + if (unlikely(nfd < 0)) { + // accept failed + + debug(D_POLLFD, "POLLFD: LISTENER: accept4() slot %zu (fd %d) failed.", pi->slot, pf->fd); + + if(unlikely(errno == EMFILE)) { + error_limit_static_global_var(erl, 10, 1000); + error_limit(&erl, "POLLFD: LISTENER: too many open files - used by this thread %zu, max for this thread %zu", + p->used, p->limit); + } + else if(unlikely(errno != EWOULDBLOCK && errno != EAGAIN)) + error("POLLFD: LISTENER: accept() failed."); + + } + else { + // accept ok + + poll_add_fd(p + , nfd + , SOCK_STREAM + , pi->port_acl + , POLLINFO_FLAG_CLIENT_SOCKET + , client_ip + , client_port + , client_host + , p->add_callback + , p->del_callback + , p->rcv_callback + , p->snd_callback + , NULL + ); + + // IMPORTANT: + // pf and pi may be invalid below this point, they may have been reallocated. + + return 1; + } + + return 0; +} + +void poll_events(LISTEN_SOCKETS *sockets + , void *(*add_callback)(POLLINFO * /*pi*/, short int * /*events*/, void * /*data*/) + , void (*del_callback)(POLLINFO * /*pi*/) + , int (*rcv_callback)(POLLINFO * /*pi*/, short int * /*events*/) + , int (*snd_callback)(POLLINFO * /*pi*/, short int * /*events*/) + , void (*tmr_callback)(void * /*timer_data*/) + , SIMPLE_PATTERN *access_list + , int allow_dns + , void *data + , time_t tcp_request_timeout_seconds + , time_t tcp_idle_timeout_seconds + , time_t timer_milliseconds + , void *timer_data + , size_t max_tcp_sockets +) { + if(!sockets || !sockets->opened) { + error("POLLFD: internal error: no listening sockets are opened"); + return; + } + + if(timer_milliseconds <= 0) timer_milliseconds = 0; + + int retval; + + POLLJOB p = { + .slots = 0, + .used = 0, + .max = 0, + .limit = max_tcp_sockets, + .fds = NULL, + .inf = NULL, + .first_free = NULL, + + .complete_request_timeout = tcp_request_timeout_seconds, + .idle_timeout = tcp_idle_timeout_seconds, + .checks_every = (tcp_idle_timeout_seconds / 3) + 1, + + .access_list = access_list, + .allow_dns = allow_dns, + + .timer_milliseconds = timer_milliseconds, + .timer_data = timer_data, + + .add_callback = add_callback?add_callback:poll_default_add_callback, + .del_callback = del_callback?del_callback:poll_default_del_callback, + .rcv_callback = rcv_callback?rcv_callback:poll_default_rcv_callback, + .snd_callback = snd_callback?snd_callback:poll_default_snd_callback, + .tmr_callback = tmr_callback?tmr_callback:poll_default_tmr_callback + }; + + size_t i; + for(i = 0; i < sockets->opened ;i++) { + + POLLINFO *pi = poll_add_fd(&p + , sockets->fds[i] + , sockets->fds_types[i] + , sockets->fds_acl_flags[i] + , POLLINFO_FLAG_SERVER_SOCKET + , (sockets->fds_names[i])?sockets->fds_names[i]:"UNKNOWN" + , "" + , "" + , p.add_callback + , p.del_callback + , p.rcv_callback + , p.snd_callback + , NULL + ); + + pi->data = data; + info("POLLFD: LISTENER: listening on '%s'", (sockets->fds_names[i])?sockets->fds_names[i]:"UNKNOWN"); + } + + int listen_sockets_active = 1; + + int timeout_ms = 1000; // in milliseconds + time_t last_check = now_boottime_sec(); + + usec_t timer_usec = timer_milliseconds * USEC_PER_MS; + usec_t now_usec = 0, next_timer_usec = 0, last_timer_usec = 0; + (void)last_timer_usec; + + if(unlikely(timer_usec)) { + now_usec = now_boottime_usec(); + next_timer_usec = now_usec - (now_usec % timer_usec) + timer_usec; + } + + netdata_thread_cleanup_push(poll_events_cleanup, &p); + + while(!netdata_exit) { + if(unlikely(timer_usec)) { + now_usec = now_boottime_usec(); + + if(unlikely(timer_usec && now_usec >= next_timer_usec)) { + debug(D_POLLFD, "Calling timer callback after %zu usec", (size_t)(now_usec - last_timer_usec)); + last_timer_usec = now_usec; + p.tmr_callback(p.timer_data); + now_usec = now_boottime_usec(); + next_timer_usec = now_usec - (now_usec % timer_usec) + timer_usec; + } + + usec_t dt_usec = next_timer_usec - now_usec; + if(dt_usec < 1000 * USEC_PER_MS) + timeout_ms = 1000; + else + timeout_ms = (int)(dt_usec / USEC_PER_MS); + } + + // enable or disable the TCP listening sockets, based on the current number of sockets used and the limit set + if((listen_sockets_active && (p.limit && p.used >= p.limit)) || (!listen_sockets_active && (!p.limit || p.used < p.limit))) { + listen_sockets_active = !listen_sockets_active; + info("%s listening sockets (used TCP sockets %zu, max allowed for this worker %zu)", (listen_sockets_active)?"ENABLING":"DISABLING", p.used, p.limit); + for (i = 0; i <= p.max; i++) { + if(p.inf[i].flags & POLLINFO_FLAG_SERVER_SOCKET && p.inf[i].socktype == SOCK_STREAM) { + p.fds[i].events = (short int) ((listen_sockets_active) ? POLLIN : 0); + } + } + } + + debug(D_POLLFD, "POLLFD: LISTENER: Waiting on %zu sockets for %zu ms...", p.max + 1, (size_t)timeout_ms); + retval = poll(p.fds, p.max + 1, timeout_ms); + time_t now = now_boottime_sec(); + + if(unlikely(retval == -1)) { + error("POLLFD: LISTENER: poll() failed while waiting on %zu sockets.", p.max + 1); + break; + } + else if(unlikely(!retval)) { + debug(D_POLLFD, "POLLFD: LISTENER: poll() timeout."); + } + else { + POLLINFO *pi; + struct pollfd *pf; + size_t idx, processed = 0; + short int revents; + + // keep fast lookup arrays per function + // to avoid looping through the entire list every time + size_t sends[p.max + 1], sends_max = 0; + size_t reads[p.max + 1], reads_max = 0; + size_t conns[p.max + 1], conns_max = 0; + size_t udprd[p.max + 1], udprd_max = 0; + + for (i = 0; i <= p.max; i++) { + pi = &p.inf[i]; + pf = &p.fds[i]; + revents = pf->revents; + + if(unlikely(revents == 0 || pf->fd == -1)) + continue; + + if (unlikely(revents & (POLLERR|POLLHUP|POLLNVAL))) { + // something is wrong to one of our sockets + + pf->revents = 0; + processed += poll_process_error(pi, pf, revents); + } + else if (likely(revents & POLLOUT)) { + // a client is ready to receive data + + sends[sends_max++] = i; + } + else if (likely(revents & (POLLIN|POLLPRI))) { + if (pi->flags & POLLINFO_FLAG_CLIENT_SOCKET) { + // a client sent data to us + + reads[reads_max++] = i; + } + else if (pi->flags & POLLINFO_FLAG_SERVER_SOCKET) { + // something is coming to our server sockets + + if(pi->socktype == SOCK_DGRAM) { + // UDP receive, directly on our listening socket + + udprd[udprd_max++] = i; + } + else if(pi->socktype == SOCK_STREAM) { + // new TCP connection + + conns[conns_max++] = i; + } + else + error("POLLFD: LISTENER: server slot %zu (fd %d) connection from %s port %s using unhandled socket type %d." + , i + , pi->fd + , pi->client_ip ? pi->client_ip : "" + , pi->client_port ? pi->client_port : "" + , pi->socktype + ); + } + else + error("POLLFD: LISTENER: client slot %zu (fd %d) data from %s port %s using flags %08X is neither client nor server." + , i + , pi->fd + , pi->client_ip ? pi->client_ip : "" + , pi->client_port ? pi->client_port : "" + , pi->flags + ); + } + else + error("POLLFD: LISTENER: socket slot %zu (fd %d) client %s port %s unhandled event id %d." + , i + , pi->fd + , pi->client_ip ? pi->client_ip : "" + , pi->client_port ? pi->client_port : "" + , revents + ); + } + + // process sends + for (idx = 0; idx < sends_max; idx++) { + i = sends[idx]; + pi = &p.inf[i]; + pf = &p.fds[i]; + pf->revents = 0; + processed += poll_process_send(&p, pi, pf, now); + } + + // process UDP reads + for (idx = 0; idx < udprd_max; idx++) { + i = udprd[idx]; + pi = &p.inf[i]; + pf = &p.fds[i]; + pf->revents = 0; + processed += poll_process_udp_read(pi, pf, now); + } + + // process TCP reads + for (idx = 0; idx < reads_max; idx++) { + i = reads[idx]; + pi = &p.inf[i]; + pf = &p.fds[i]; + pf->revents = 0; + processed += poll_process_tcp_read(&p, pi, pf, now); + } + + if(!processed && (!p.limit || p.used < p.limit)) { + // nothing processed above (rcv, snd) and we have room for another TCP connection + // so, accept one TCP connection + for (idx = 0; idx < conns_max; idx++) { + i = conns[idx]; + pi = &p.inf[i]; + pf = &p.fds[i]; + pf->revents = 0; + if (poll_process_new_tcp_connection(&p, pi, pf, now)) + break; + } + } + } + + if(unlikely(p.checks_every > 0 && now - last_check > p.checks_every)) { + last_check = now; + + // cleanup old sockets + for(i = 0; i <= p.max; i++) { + POLLINFO *pi = &p.inf[i]; + + if(likely(pi->flags & POLLINFO_FLAG_CLIENT_SOCKET)) { + if (unlikely(pi->send_count == 0 && p.complete_request_timeout > 0 && (now - pi->connected_t) >= p.complete_request_timeout)) { + info("POLLFD: LISTENER: client slot %zu (fd %d) from %s port %s has not sent a complete request in %zu seconds - closing it. " + , i + , pi->fd + , pi->client_ip ? pi->client_ip : "" + , pi->client_port ? pi->client_port : "" + , (size_t) p.complete_request_timeout + ); + poll_close_fd(pi); + } + else if(unlikely(pi->recv_count && p.idle_timeout > 0 && now - ((pi->last_received_t > pi->last_sent_t) ? pi->last_received_t : pi->last_sent_t) >= p.idle_timeout )) { + info("POLLFD: LISTENER: client slot %zu (fd %d) from %s port %s is idle for more than %zu seconds - closing it. " + , i + , pi->fd + , pi->client_ip ? pi->client_ip : "" + , pi->client_port ? pi->client_port : "" + , (size_t) p.idle_timeout + ); + poll_close_fd(pi); + } + } + } + } + } + + netdata_thread_cleanup_pop(1); + debug(D_POLLFD, "POLLFD: LISTENER: cleanup completed"); +} diff --git a/libnetdata/socket/socket.h b/libnetdata/socket/socket.h new file mode 100644 index 0000000..2823242 --- /dev/null +++ b/libnetdata/socket/socket.h @@ -0,0 +1,215 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SOCKET_H +#define NETDATA_SOCKET_H + +#include "../libnetdata.h" + +#ifndef MAX_LISTEN_FDS +#define MAX_LISTEN_FDS 50 +#endif + +typedef enum web_client_acl { + WEB_CLIENT_ACL_NONE = 0, + WEB_CLIENT_ACL_NOCHECK = 0, + WEB_CLIENT_ACL_DASHBOARD = 1 << 0, + WEB_CLIENT_ACL_REGISTRY = 1 << 1, + WEB_CLIENT_ACL_BADGE = 1 << 2, + WEB_CLIENT_ACL_MGMT = 1 << 3, + WEB_CLIENT_ACL_STREAMING = 1 << 4, + WEB_CLIENT_ACL_NETDATACONF = 1 << 5, + WEB_CLIENT_ACL_SSL_OPTIONAL = 1 << 6, + WEB_CLIENT_ACL_SSL_FORCE = 1 << 7, + WEB_CLIENT_ACL_SSL_DEFAULT = 1 << 8, + WEB_CLIENT_ACL_ACLK = 1 << 9, +} WEB_CLIENT_ACL; + +#define WEB_CLIENT_ACL_ALL 0xFFFF + +#define web_client_can_access_dashboard(w) ((w)->acl & WEB_CLIENT_ACL_DASHBOARD) +#define web_client_can_access_registry(w) ((w)->acl & WEB_CLIENT_ACL_REGISTRY) +#define web_client_can_access_badges(w) ((w)->acl & WEB_CLIENT_ACL_BADGE) +#define web_client_can_access_mgmt(w) ((w)->acl & WEB_CLIENT_ACL_MGMT) +#define web_client_can_access_stream(w) ((w)->acl & WEB_CLIENT_ACL_STREAMING) +#define web_client_can_access_netdataconf(w) ((w)->acl & WEB_CLIENT_ACL_NETDATACONF) +#define web_client_is_using_ssl_optional(w) ((w)->port_acl & WEB_CLIENT_ACL_SSL_OPTIONAL) +#define web_client_is_using_ssl_force(w) ((w)->port_acl & WEB_CLIENT_ACL_SSL_FORCE) +#define web_client_is_using_ssl_default(w) ((w)->port_acl & WEB_CLIENT_ACL_SSL_DEFAULT) + +typedef struct listen_sockets { + struct config *config; // the config file to use + const char *config_section; // the netdata configuration section to read settings from + const char *default_bind_to; // the default bind to configuration string + uint16_t default_port; // the default port to use + int backlog; // the default listen backlog to use + + size_t opened; // the number of sockets opened + size_t failed; // the number of sockets attempted to open, but failed + int fds[MAX_LISTEN_FDS]; // the open sockets + char *fds_names[MAX_LISTEN_FDS]; // descriptions for the open sockets + int fds_types[MAX_LISTEN_FDS]; // the socktype for the open sockets (SOCK_STREAM, SOCK_DGRAM) + int fds_families[MAX_LISTEN_FDS]; // the family of the open sockets (AF_UNIX, AF_INET, AF_INET6) + WEB_CLIENT_ACL fds_acl_flags[MAX_LISTEN_FDS]; // the acl to apply to the open sockets (dashboard, badges, streaming, netdata.conf, management) +} LISTEN_SOCKETS; + +char *strdup_client_description(int family, const char *protocol, const char *ip, uint16_t port); + +int listen_sockets_setup(LISTEN_SOCKETS *sockets); +void listen_sockets_close(LISTEN_SOCKETS *sockets); + +void foreach_entry_in_connection_string(const char *destination, bool (*callback)(char *entry, void *data), void *data); +int connect_to_this_ip46(int protocol, int socktype, const char *host, uint32_t scope_id, const char *service, struct timeval *timeout); +int connect_to_this(const char *definition, int default_port, struct timeval *timeout); +int connect_to_one_of(const char *destination, int default_port, struct timeval *timeout, size_t *reconnects_counter, char *connected_to, size_t connected_to_size); +int connect_to_one_of_urls(const char *destination, int default_port, struct timeval *timeout, size_t *reconnects_counter, char *connected_to, size_t connected_to_size); + + +#ifdef ENABLE_HTTPS +ssize_t recv_timeout(struct netdata_ssl *ssl,int sockfd, void *buf, size_t len, int flags, int timeout); +ssize_t send_timeout(struct netdata_ssl *ssl,int sockfd, void *buf, size_t len, int flags, int timeout); +ssize_t netdata_ssl_read(SSL *ssl, void *buf, size_t num); +ssize_t netdata_ssl_write(SSL *ssl, const void *buf, size_t num); +#else +ssize_t recv_timeout(int sockfd, void *buf, size_t len, int flags, int timeout); +ssize_t send_timeout(int sockfd, void *buf, size_t len, int flags, int timeout); +#endif + +int sock_setnonblock(int fd); +int sock_delnonblock(int fd); +int sock_setreuse(int fd, int reuse); +int sock_setreuse_port(int fd, int reuse); +int sock_enlarge_in(int fd); +int sock_enlarge_out(int fd); + +int connection_allowed(int fd, char *client_ip, char *client_host, size_t hostsize, + SIMPLE_PATTERN *access_list, const char *patname, int allow_dns); +int accept_socket(int fd, int flags, char *client_ip, size_t ipsize, char *client_port, size_t portsize, + char *client_host, size_t hostsize, SIMPLE_PATTERN *access_list, int allow_dns); + +#ifndef HAVE_ACCEPT4 +int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flags); + +#ifndef SOCK_NONBLOCK +#define SOCK_NONBLOCK 00004000 +#endif /* #ifndef SOCK_NONBLOCK */ + +#ifndef SOCK_CLOEXEC +#define SOCK_CLOEXEC 02000000 +#endif /* #ifndef SOCK_CLOEXEC */ + +#endif /* #ifndef HAVE_ACCEPT4 */ + + +// ---------------------------------------------------------------------------- +// poll() based listener + +#define POLLINFO_FLAG_SERVER_SOCKET 0x00000001 +#define POLLINFO_FLAG_CLIENT_SOCKET 0x00000002 +#define POLLINFO_FLAG_DONT_CLOSE 0x00000004 + +typedef struct poll POLLJOB; + +typedef struct pollinfo { + POLLJOB *p; // the parent + size_t slot; // the slot id + + int fd; // the file descriptor + int socktype; // the client socket type + WEB_CLIENT_ACL port_acl; // the access lists permitted on this web server port (it's -1 for client sockets) + char *client_ip; // Max INET6_ADDRSTRLEN bytes + char *client_port; // Max NI_MAXSERV bytes + char *client_host; // Max NI_MAXHOST bytes + + time_t connected_t; // the time the socket connected + time_t last_received_t; // the time the socket last received data + time_t last_sent_t; // the time the socket last sent data + + size_t recv_count; // the number of times the socket was ready for inbound traffic + size_t send_count; // the number of times the socket was ready for outbound traffic + + uint32_t flags; // internal flags + + // callbacks for this socket + void (*del_callback)(struct pollinfo *pi); + int (*rcv_callback)(struct pollinfo *pi, short int *events); + int (*snd_callback)(struct pollinfo *pi, short int *events); + + // the user data + void *data; + + // linking of free pollinfo structures + // for quickly finding the next available + // this is like a stack, it grows and shrinks + // (with gaps - lower empty slots are preferred) + struct pollinfo *next; +} POLLINFO; + +struct poll { + size_t slots; + size_t used; + size_t min; + size_t max; + + size_t limit; + + time_t complete_request_timeout; + time_t idle_timeout; + time_t checks_every; + + time_t timer_milliseconds; + void *timer_data; + + struct pollfd *fds; + struct pollinfo *inf; + struct pollinfo *first_free; + + SIMPLE_PATTERN *access_list; + int allow_dns; + + void *(*add_callback)(POLLINFO *pi, short int *events, void *data); + void (*del_callback)(POLLINFO *pi); + int (*rcv_callback)(POLLINFO *pi, short int *events); + int (*snd_callback)(POLLINFO *pi, short int *events); + void (*tmr_callback)(void *timer_data); +}; + +#define pollinfo_from_slot(p, slot) (&((p)->inf[(slot)])) + +int poll_default_snd_callback(POLLINFO *pi, short int *events); +int poll_default_rcv_callback(POLLINFO *pi, short int *events); +void poll_default_del_callback(POLLINFO *pi); +void *poll_default_add_callback(POLLINFO *pi, short int *events, void *data); + +POLLINFO *poll_add_fd(POLLJOB *p + , int fd + , int socktype + , WEB_CLIENT_ACL port_acl + , uint32_t flags + , const char *client_ip + , const char *client_port + , const char *client_host + , void *(*add_callback)(POLLINFO *pi, short int *events, void *data) + , void (*del_callback)(POLLINFO *pi) + , int (*rcv_callback)(POLLINFO *pi, short int *events) + , int (*snd_callback)(POLLINFO *pi, short int *events) + , void *data +); +void poll_close_fd(POLLINFO *pi); + +void poll_events(LISTEN_SOCKETS *sockets + , void *(*add_callback)(POLLINFO *pi, short int *events, void *data) + , void (*del_callback)(POLLINFO *pi) + , int (*rcv_callback)(POLLINFO *pi, short int *events) + , int (*snd_callback)(POLLINFO *pi, short int *events) + , void (*tmr_callback)(void *timer_data) + , SIMPLE_PATTERN *access_list + , int allow_dns + , void *data + , time_t tcp_request_timeout_seconds + , time_t tcp_idle_timeout_seconds + , time_t timer_milliseconds + , void *timer_data + , size_t max_tcp_sockets +); + +#endif //NETDATA_SOCKET_H diff --git a/libnetdata/statistical/Makefile.am b/libnetdata/statistical/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/statistical/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/statistical/README.md b/libnetdata/statistical/README.md new file mode 100644 index 0000000..f254081 --- /dev/null +++ b/libnetdata/statistical/README.md @@ -0,0 +1,5 @@ + + + diff --git a/libnetdata/statistical/statistical.c b/libnetdata/statistical/statistical.c new file mode 100644 index 0000000..ef9fe4e --- /dev/null +++ b/libnetdata/statistical/statistical.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +NETDATA_DOUBLE default_single_exponential_smoothing_alpha = 0.1; + +void log_series_to_stderr(NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE result, const char *msg) { + const NETDATA_DOUBLE *value, *end = &series[entries]; + + fprintf(stderr, "%s of %zu entries [ ", msg, entries); + for(value = series; value < end ;value++) { + if(value != series) fprintf(stderr, ", "); + fprintf(stderr, "%" NETDATA_DOUBLE_MODIFIER, *value); + } + fprintf(stderr, " ] results in " NETDATA_DOUBLE_FORMAT "\n", result); +} + +// -------------------------------------------------------------------------------------------------------------------- + +inline NETDATA_DOUBLE sum_and_count(const NETDATA_DOUBLE *series, size_t entries, size_t *count) { + const NETDATA_DOUBLE *value, *end = &series[entries]; + NETDATA_DOUBLE sum = 0; + size_t c = 0; + + for(value = series; value < end ; value++) { + if(netdata_double_isnumber(*value)) { + sum += *value; + c++; + } + } + + if(unlikely(!c)) sum = NAN; + if(likely(count)) *count = c; + + return sum; +} + +inline NETDATA_DOUBLE sum(const NETDATA_DOUBLE *series, size_t entries) { + return sum_and_count(series, entries, NULL); +} + +inline NETDATA_DOUBLE average(const NETDATA_DOUBLE *series, size_t entries) { + size_t count = 0; + NETDATA_DOUBLE sum = sum_and_count(series, entries, &count); + + if(unlikely(!count)) return NAN; + return sum / (NETDATA_DOUBLE)count; +} + +// -------------------------------------------------------------------------------------------------------------------- + +NETDATA_DOUBLE moving_average(const NETDATA_DOUBLE *series, size_t entries, size_t period) { + if(unlikely(period <= 0)) + return 0.0; + + size_t i, count; + NETDATA_DOUBLE sum = 0, avg = 0; + NETDATA_DOUBLE p[period]; + + for(count = 0; count < period ; count++) + p[count] = 0.0; + + for(i = 0, count = 0; i < entries; i++) { + NETDATA_DOUBLE value = series[i]; + if(unlikely(!netdata_double_isnumber(value))) continue; + + if(unlikely(count < period)) { + sum += value; + avg = (count == period - 1) ? sum / (NETDATA_DOUBLE)period : 0; + } + else { + sum = sum - p[count % period] + value; + avg = sum / (NETDATA_DOUBLE)period; + } + + p[count % period] = value; + count++; + } + + return avg; +} + +// -------------------------------------------------------------------------------------------------------------------- + +static int qsort_compare(const void *a, const void *b) { + NETDATA_DOUBLE *p1 = (NETDATA_DOUBLE *)a, *p2 = (NETDATA_DOUBLE *)b; + NETDATA_DOUBLE n1 = *p1, n2 = *p2; + + if(unlikely(isnan(n1) || isnan(n2))) { + if(isnan(n1) && !isnan(n2)) return -1; + if(!isnan(n1) && isnan(n2)) return 1; + return 0; + } + if(unlikely(isinf(n1) || isinf(n2))) { + if(!isinf(n1) && isinf(n2)) return -1; + if(isinf(n1) && !isinf(n2)) return 1; + return 0; + } + + if(unlikely(n1 < n2)) return -1; + if(unlikely(n1 > n2)) return 1; + return 0; +} + +inline void sort_series(NETDATA_DOUBLE *series, size_t entries) { + qsort(series, entries, sizeof(NETDATA_DOUBLE), qsort_compare); +} + +inline NETDATA_DOUBLE *copy_series(const NETDATA_DOUBLE *series, size_t entries) { + NETDATA_DOUBLE *copy = mallocz(sizeof(NETDATA_DOUBLE) * entries); + memcpy(copy, series, sizeof(NETDATA_DOUBLE) * entries); + return copy; +} + +NETDATA_DOUBLE median_on_sorted_series(const NETDATA_DOUBLE *series, size_t entries) { + if(unlikely(entries == 0)) return NAN; + if(unlikely(entries == 1)) return series[0]; + if(unlikely(entries == 2)) return (series[0] + series[1]) / 2; + + NETDATA_DOUBLE average; + if(entries % 2 == 0) { + size_t m = entries / 2; + average = (series[m] + series[m + 1]) / 2; + } + else { + average = series[entries / 2]; + } + + return average; +} + +NETDATA_DOUBLE median(const NETDATA_DOUBLE *series, size_t entries) { + if(unlikely(entries == 0)) return NAN; + if(unlikely(entries == 1)) return series[0]; + + if(unlikely(entries == 2)) + return (series[0] + series[1]) / 2; + + NETDATA_DOUBLE *copy = copy_series(series, entries); + sort_series(copy, entries); + + NETDATA_DOUBLE avg = median_on_sorted_series(copy, entries); + + freez(copy); + return avg; +} + +// -------------------------------------------------------------------------------------------------------------------- + +NETDATA_DOUBLE moving_median(const NETDATA_DOUBLE *series, size_t entries, size_t period) { + if(entries <= period) + return median(series, entries); + + NETDATA_DOUBLE *data = copy_series(series, entries); + + size_t i; + for(i = period; i < entries; i++) { + data[i - period] = median(&series[i - period], period); + } + + NETDATA_DOUBLE avg = median(data, entries - period); + freez(data); + return avg; +} + +// -------------------------------------------------------------------------------------------------------------------- + +// http://stackoverflow.com/a/15150143/4525767 +NETDATA_DOUBLE running_median_estimate(const NETDATA_DOUBLE *series, size_t entries) { + NETDATA_DOUBLE median = 0.0f; + NETDATA_DOUBLE average = 0.0f; + size_t i; + + for(i = 0; i < entries ; i++) { + NETDATA_DOUBLE value = series[i]; + if(unlikely(!netdata_double_isnumber(value))) continue; + + average += ( value - average ) * 0.1f; // rough running average. + median += copysignndd( average * 0.01, value - median ); + } + + return median; +} + +// -------------------------------------------------------------------------------------------------------------------- + +NETDATA_DOUBLE standard_deviation(const NETDATA_DOUBLE *series, size_t entries) { + if(unlikely(entries == 0)) return NAN; + if(unlikely(entries == 1)) return series[0]; + + const NETDATA_DOUBLE *value, *end = &series[entries]; + size_t count; + NETDATA_DOUBLE sum; + + for(count = 0, sum = 0, value = series ; value < end ;value++) { + if(likely(netdata_double_isnumber(*value))) { + count++; + sum += *value; + } + } + + if(unlikely(count == 0)) return NAN; + if(unlikely(count == 1)) return sum; + + NETDATA_DOUBLE average = sum / (NETDATA_DOUBLE)count; + + for(count = 0, sum = 0, value = series ; value < end ;value++) { + if(netdata_double_isnumber(*value)) { + count++; + sum += powndd(*value - average, 2); + } + } + + if(unlikely(count == 0)) return NAN; + if(unlikely(count == 1)) return average; + + NETDATA_DOUBLE variance = sum / (NETDATA_DOUBLE)(count); // remove -1 from count to have a population stddev + NETDATA_DOUBLE stddev = sqrtndd(variance); + return stddev; +} + +// -------------------------------------------------------------------------------------------------------------------- + +NETDATA_DOUBLE single_exponential_smoothing(const NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE alpha) { + if(unlikely(entries == 0)) + return NAN; + + if(unlikely(isnan(alpha))) + alpha = default_single_exponential_smoothing_alpha; + + const NETDATA_DOUBLE *value = series, *end = &series[entries]; + NETDATA_DOUBLE level = (1.0 - alpha) * (*value); + + for(value++ ; value < end; value++) { + if(likely(netdata_double_isnumber(*value))) + level = alpha * (*value) + (1.0 - alpha) * level; + } + + return level; +} + +NETDATA_DOUBLE single_exponential_smoothing_reverse(const NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE alpha) { + if(unlikely(entries == 0)) + return NAN; + + if(unlikely(isnan(alpha))) + alpha = default_single_exponential_smoothing_alpha; + + const NETDATA_DOUBLE *value = &series[entries -1]; + NETDATA_DOUBLE level = (1.0 - alpha) * (*value); + + for(value++ ; value >= series; value--) { + if(likely(netdata_double_isnumber(*value))) + level = alpha * (*value) + (1.0 - alpha) * level; + } + + return level; +} + +// -------------------------------------------------------------------------------------------------------------------- + +// http://grisha.org/blog/2016/02/16/triple-exponential-smoothing-forecasting-part-ii/ +NETDATA_DOUBLE double_exponential_smoothing(const NETDATA_DOUBLE *series, size_t entries, + NETDATA_DOUBLE alpha, + NETDATA_DOUBLE beta, + NETDATA_DOUBLE *forecast) { + if(unlikely(entries == 0)) + return NAN; + + NETDATA_DOUBLE level, trend; + + if(unlikely(isnan(alpha))) + alpha = 0.3; + + if(unlikely(isnan(beta))) + beta = 0.05; + + level = series[0]; + + if(likely(entries > 1)) + trend = series[1] - series[0]; + else + trend = 0; + + const NETDATA_DOUBLE *value = series; + for(value++ ; value >= series; value--) { + if(likely(netdata_double_isnumber(*value))) { + NETDATA_DOUBLE last_level = level; + level = alpha * *value + (1.0 - alpha) * (level + trend); + trend = beta * (level - last_level) + (1.0 - beta) * trend; + + } + } + + if(forecast) + *forecast = level + trend; + + return level; +} + +// -------------------------------------------------------------------------------------------------------------------- + +/* + * Based on th R implementation + * + * a: level component + * b: trend component + * s: seasonal component + * + * Additive: + * + * Yhat[t+h] = a[t] + h * b[t] + s[t + 1 + (h - 1) mod p], + * a[t] = α (Y[t] - s[t-p]) + (1-α) (a[t-1] + b[t-1]) + * b[t] = β (a[t] - a[t-1]) + (1-β) b[t-1] + * s[t] = γ (Y[t] - a[t]) + (1-γ) s[t-p] + * + * Multiplicative: + * + * Yhat[t+h] = (a[t] + h * b[t]) * s[t + 1 + (h - 1) mod p], + * a[t] = α (Y[t] / s[t-p]) + (1-α) (a[t-1] + b[t-1]) + * b[t] = β (a[t] - a[t-1]) + (1-β) b[t-1] + * s[t] = γ (Y[t] / a[t]) + (1-γ) s[t-p] + */ +static int __HoltWinters( + const NETDATA_DOUBLE *series, + int entries, // start_time + h + + NETDATA_DOUBLE alpha, // alpha parameter of Holt-Winters Filter. + NETDATA_DOUBLE + beta, // beta parameter of Holt-Winters Filter. If set to 0, the function will do exponential smoothing. + NETDATA_DOUBLE + gamma, // gamma parameter used for the seasonal component. If set to 0, an non-seasonal model is fitted. + + const int *seasonal, + const int *period, + const NETDATA_DOUBLE *a, // Start value for level (a[0]). + const NETDATA_DOUBLE *b, // Start value for trend (b[0]). + NETDATA_DOUBLE *s, // Vector of start values for the seasonal component (s_1[0] ... s_p[0]) + + /* return values */ + NETDATA_DOUBLE *SSE, // The final sum of squared errors achieved in optimizing + NETDATA_DOUBLE *level, // Estimated values for the level component (size entries - t + 2) + NETDATA_DOUBLE *trend, // Estimated values for the trend component (size entries - t + 2) + NETDATA_DOUBLE *season // Estimated values for the seasonal component (size entries - t + 2) +) +{ + if(unlikely(entries < 4)) + return 0; + + int start_time = 2; + + NETDATA_DOUBLE res = 0, xhat = 0, stmp = 0; + int i, i0, s0; + + /* copy start values to the beginning of the vectors */ + level[0] = *a; + if(beta > 0) trend[0] = *b; + if(gamma > 0) memcpy(season, s, *period * sizeof(NETDATA_DOUBLE)); + + for(i = start_time - 1; i < entries; i++) { + /* indices for period i */ + i0 = i - start_time + 2; + s0 = i0 + *period - 1; + + /* forecast *for* period i */ + xhat = level[i0 - 1] + (beta > 0 ? trend[i0 - 1] : 0); + stmp = gamma > 0 ? season[s0 - *period] : (*seasonal != 1); + if (*seasonal == 1) + xhat += stmp; + else + xhat *= stmp; + + /* Sum of Squared Errors */ + res = series[i] - xhat; + *SSE += res * res; + + /* estimate of level *in* period i */ + if (*seasonal == 1) + level[i0] = alpha * (series[i] - stmp) + + (1 - alpha) * (level[i0 - 1] + trend[i0 - 1]); + else + level[i0] = alpha * (series[i] / stmp) + + (1 - alpha) * (level[i0 - 1] + trend[i0 - 1]); + + /* estimate of trend *in* period i */ + if (beta > 0) + trend[i0] = beta * (level[i0] - level[i0 - 1]) + + (1 - beta) * trend[i0 - 1]; + + /* estimate of seasonal component *in* period i */ + if (gamma > 0) { + if (*seasonal == 1) + season[s0] = gamma * (series[i] - level[i0]) + + (1 - gamma) * stmp; + else + season[s0] = gamma * (series[i] / level[i0]) + + (1 - gamma) * stmp; + } + } + + return 1; +} + +NETDATA_DOUBLE holtwinters(const NETDATA_DOUBLE *series, size_t entries, + NETDATA_DOUBLE alpha, + NETDATA_DOUBLE beta, + NETDATA_DOUBLE gamma, + NETDATA_DOUBLE *forecast) { + if(unlikely(isnan(alpha))) + alpha = 0.3; + + if(unlikely(isnan(beta))) + beta = 0.05; + + if(unlikely(isnan(gamma))) + gamma = 0; + + int seasonal = 0; + int period = 0; + NETDATA_DOUBLE a0 = series[0]; + NETDATA_DOUBLE b0 = 0; + NETDATA_DOUBLE s[] = {}; + + NETDATA_DOUBLE errors = 0.0; + size_t nb_computations = entries; + NETDATA_DOUBLE *estimated_level = callocz(nb_computations, sizeof(NETDATA_DOUBLE)); + NETDATA_DOUBLE *estimated_trend = callocz(nb_computations, sizeof(NETDATA_DOUBLE)); + NETDATA_DOUBLE *estimated_season = callocz(nb_computations, sizeof(NETDATA_DOUBLE)); + + int ret = __HoltWinters( + series, + (int)entries, + alpha, + beta, + gamma, + &seasonal, + &period, + &a0, + &b0, + s, + &errors, + estimated_level, + estimated_trend, + estimated_season + ); + + NETDATA_DOUBLE value = estimated_level[nb_computations - 1]; + + if(forecast) + *forecast = 0.0; + + freez(estimated_level); + freez(estimated_trend); + freez(estimated_season); + + if(!ret) + return 0.0; + + return value; +} diff --git a/libnetdata/statistical/statistical.h b/libnetdata/statistical/statistical.h new file mode 100644 index 0000000..f3ecfad --- /dev/null +++ b/libnetdata/statistical/statistical.h @@ -0,0 +1,34 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STATISTICAL_H +#define NETDATA_STATISTICAL_H 1 + +#include "../libnetdata.h" + +void log_series_to_stderr(NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE result, const char *msg); + +NETDATA_DOUBLE average(const NETDATA_DOUBLE *series, size_t entries); +NETDATA_DOUBLE moving_average(const NETDATA_DOUBLE *series, size_t entries, size_t period); +NETDATA_DOUBLE median(const NETDATA_DOUBLE *series, size_t entries); +NETDATA_DOUBLE moving_median(const NETDATA_DOUBLE *series, size_t entries, size_t period); +NETDATA_DOUBLE running_median_estimate(const NETDATA_DOUBLE *series, size_t entries); +NETDATA_DOUBLE standard_deviation(const NETDATA_DOUBLE *series, size_t entries); +NETDATA_DOUBLE single_exponential_smoothing(const NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE alpha); +extern NETDATA_DOUBLE +single_exponential_smoothing_reverse(const NETDATA_DOUBLE *series, size_t entries, NETDATA_DOUBLE alpha); +NETDATA_DOUBLE double_exponential_smoothing(const NETDATA_DOUBLE *series, size_t entries, + NETDATA_DOUBLE alpha, + NETDATA_DOUBLE beta, + NETDATA_DOUBLE *forecast); +NETDATA_DOUBLE holtwinters(const NETDATA_DOUBLE *series, size_t entries, + NETDATA_DOUBLE alpha, + NETDATA_DOUBLE beta, + NETDATA_DOUBLE gamma, + NETDATA_DOUBLE *forecast); +NETDATA_DOUBLE sum_and_count(const NETDATA_DOUBLE *series, size_t entries, size_t *count); +NETDATA_DOUBLE sum(const NETDATA_DOUBLE *series, size_t entries); +NETDATA_DOUBLE median_on_sorted_series(const NETDATA_DOUBLE *series, size_t entries); +NETDATA_DOUBLE *copy_series(const NETDATA_DOUBLE *series, size_t entries); +void sort_series(NETDATA_DOUBLE *series, size_t entries); + +#endif //NETDATA_STATISTICAL_H diff --git a/libnetdata/storage_number/Makefile.am b/libnetdata/storage_number/Makefile.am new file mode 100644 index 0000000..c5f8450 --- /dev/null +++ b/libnetdata/storage_number/Makefile.am @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +SUBDIRS = \ + tests \ + $(NULL) + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/storage_number/README.md b/libnetdata/storage_number/README.md new file mode 100644 index 0000000..4cd19a9 --- /dev/null +++ b/libnetdata/storage_number/README.md @@ -0,0 +1,17 @@ + + +# Netdata storage number + +Although `netdata` does all its calculations using `long double`, it stores all values using +a **custom-made 32-bit number**. + +This custom-made number can store in 29 bits values from `-167772150000000.0` to `167772150000000.0` +with a precision of 0.00001 (yes, it's a floating point number, meaning that higher integer values +have less decimal precision) and 3 bits for flags. + +This provides an extremely optimized memory footprint with just 0.0001% max accuracy loss. + + diff --git a/libnetdata/storage_number/storage_number.c b/libnetdata/storage_number/storage_number.c new file mode 100644 index 0000000..7511f3a --- /dev/null +++ b/libnetdata/storage_number/storage_number.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +storage_number pack_storage_number(NETDATA_DOUBLE value, SN_FLAGS flags) { + // bit 32 = sign 0:positive, 1:negative + // bit 31 = 0:divide, 1:multiply + // bit 30, 29, 28 = (multiplier or divider) 0-7 (8 total) + // bit 27 SN_EXISTS_100 + // bit 26 SN_EXISTS_RESET + // bit 25 SN_ANOMALY_BIT = 0: anomalous, 1: not anomalous + // bit 24 to bit 1 = the value + + if(unlikely(fpclassify(value) == FP_NAN || fpclassify(value) == FP_INFINITE)) + return SN_EMPTY_SLOT; + + storage_number r = flags & SN_USER_FLAGS; + + if(unlikely(fpclassify(value) == FP_ZERO || fpclassify(value) == FP_SUBNORMAL)) + return r; + + int m = 0; + NETDATA_DOUBLE n = value, factor = 10; + + // if the value is negative + // add the sign bit and make it positive + if(n < 0) { + r += SN_FLAG_NEGATIVE; // the sign bit 32 + n = -n; + } + + if(n / 10000000.0 > 0x00ffffff) { + factor = 100; + r |= SN_FLAG_NOT_EXISTS_MUL100; + } + + // make its integer part fit in 0x00ffffff + // by dividing it by 10 up to 7 times + // and increasing the multiplier + while(m < 7 && n > (NETDATA_DOUBLE)0x00ffffff) { + n /= factor; + m++; + } + + if(m) { + // the value was too big, and we divided it + // so, we add a multiplier to unpack it + r += SN_FLAG_MULTIPLY + (m << 27); // the multiplier m + + if(n > (NETDATA_DOUBLE)0x00ffffff) { + #ifdef NETDATA_INTERNAL_CHECKS + error("Number " NETDATA_DOUBLE_FORMAT " is too big.", value); + #endif + r += 0x00ffffff; + return r; + } + } + else { + // 0x0019999e is the number that can be multiplied + // by 10 to give 0x00ffffff + // while the value is below 0x0019999e we can + // multiply it by 10, up to 7 times, increasing + // the multiplier + while(m < 7 && n < (NETDATA_DOUBLE)0x0019999e) { + n *= 10; + m++; + } + + if (unlikely(n > (NETDATA_DOUBLE)0x00ffffff)) { + n /= 10; + m--; + } + // the value was small enough, and we multiplied it + // so, we add a divider to unpack it + r += (m << 27); // the divider m + } + +#ifdef STORAGE_WITH_MATH + // without this there are rounding problems + // example: 0.9 becomes 0.89 + r += lrint((double) n); +#else + r += (storage_number)n; +#endif + + return r; +} + +// Lookup table to make storage number unpacking efficient. +NETDATA_DOUBLE unpack_storage_number_lut10x[4 * 8]; + +__attribute__((constructor)) void initialize_lut(void) { + // The lookup table is partitioned in 4 subtables based on the + // values of the factor and exp bits. + for (int i = 0; i < 8; i++) { + // factor = 0 + unpack_storage_number_lut10x[0 * 8 + i] = 1 / pow(10, i); // exp = 0 + unpack_storage_number_lut10x[1 * 8 + i] = pow(10, i); // exp = 1 + + // factor = 1 + unpack_storage_number_lut10x[2 * 8 + i] = 1 / pow(100, i); // exp = 0 + unpack_storage_number_lut10x[3 * 8 + i] = pow(100, i); // exp = 1 + } +} + +/* +int print_netdata_double(char *str, NETDATA_DOUBLE value) +{ + char *wstr = str; + + int sign = (value < 0) ? 1 : 0; + if(sign) value = -value; + +#ifdef STORAGE_WITH_MATH + // without llrintl() there are rounding problems + // for example 0.9 becomes 0.89 + unsigned long long uvalue = (unsigned long long int) llrintl(value * (NETDATA_DOUBLE)100000); +#else + unsigned long long uvalue = value * (NETDATA_DOUBLE)100000; +#endif + + wstr = print_number_llu_r_smart(str, uvalue); + + // make sure we have 6 bytes at least + while((wstr - str) < 6) *wstr++ = '0'; + + // put the sign back + if(sign) *wstr++ = '-'; + + // reverse it + char *begin = str, *end = --wstr, aux; + while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; + // wstr--; + // strreverse(str, wstr); + + // remove trailing zeros + int decimal = 5; + while(decimal > 0 && *wstr == '0') { + *wstr-- = '\0'; + decimal--; + } + + // terminate it, one position to the right + // to let space for a dot + wstr[2] = '\0'; + + // make space for the dot + int i; + for(i = 0; i < decimal ;i++) { + wstr[1] = wstr[0]; + wstr--; + } + + // put the dot + if(wstr[2] == '\0') { wstr[1] = '\0'; decimal--; } + else wstr[1] = '.'; + + // return the buffer length + return (int) ((wstr - str) + 2 + decimal ); +} +*/ + +int print_netdata_double(char *str, NETDATA_DOUBLE value) { + // info("printing number " NETDATA_DOUBLE_FORMAT, value); + char integral_str[50], fractional_str[50]; + + char *wstr = str; + + if(unlikely(value < 0)) { + *wstr++ = '-'; + value = -value; + } + + NETDATA_DOUBLE integral, fractional; + +#ifdef STORAGE_WITH_MATH + fractional = modfndd(value, &integral) * 10000000.0; +#else + integral = (NETDATA_DOUBLE)((unsigned long long)(value * 10000000ULL) / 10000000ULL); + fractional = (NETDATA_DOUBLE)((unsigned long long)(value * 10000000ULL) % 10000000ULL); +#endif + + unsigned long long integral_int = (unsigned long long)integral; + unsigned long long fractional_int = (unsigned long long)llrintndd(fractional); + if(unlikely(fractional_int >= 10000000)) { + integral_int += 1; + fractional_int -= 10000000; + } + + // info("integral " NETDATA_DOUBLE_FORMAT " (%llu), fractional " NETDATA_DOUBLE_FORMAT " (%llu)", integral, integral_int, fractional, fractional_int); + + char *istre; + if(unlikely(integral_int == 0)) { + integral_str[0] = '0'; + istre = &integral_str[1]; + } + else + // convert the integral part to string (reversed) + istre = print_number_llu_r_smart(integral_str, integral_int); + + // copy reversed the integral string + istre--; + while( istre >= integral_str ) *wstr++ = *istre--; + + if(likely(fractional_int != 0)) { + // add a dot + *wstr++ = '.'; + + // convert the fractional part to string (reversed) + char *fstre = print_number_llu_r_smart(fractional_str, fractional_int); + + // prepend zeros to reach 7 digits length + int decimal = 7; + int len = (int)(fstre - fractional_str); + while(len < decimal) { + *wstr++ = '0'; + len++; + } + + char *begin = fractional_str; + while(begin < fstre && *begin == '0') begin++; + + // copy reversed the fractional string + fstre--; + while( fstre >= begin ) *wstr++ = *fstre--; + } + + *wstr = '\0'; + // info("printed number '%s'", str); + return (int)(wstr - str); +} diff --git a/libnetdata/storage_number/storage_number.h b/libnetdata/storage_number/storage_number.h new file mode 100644 index 0000000..faea477 --- /dev/null +++ b/libnetdata/storage_number/storage_number.h @@ -0,0 +1,232 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STORAGE_NUMBER_H +#define NETDATA_STORAGE_NUMBER_H 1 + +#include +#include "../libnetdata.h" + +#ifdef NETDATA_WITH_LONG_DOUBLE + +typedef long double NETDATA_DOUBLE; +#define NETDATA_DOUBLE_FORMAT "%0.7Lf" +#define NETDATA_DOUBLE_FORMAT_ZERO "%0.0Lf" +#define NETDATA_DOUBLE_FORMAT_AUTO "%Lf" +#define NETDATA_DOUBLE_MODIFIER "Lf" + +#define NETDATA_DOUBLE_MAX LDBL_MAX + +#define strtondd(s, endptr) strtold(s, endptr) +#define powndd(x, y) powl(x, y) +#define llrintndd(x) llrintl(x) +#define roundndd(x) roundl(x) +#define sqrtndd(x) sqrtl(x) +#define copysignndd(x, y) copysignl(x, y) +#define modfndd(x, y) modfl(x, y) +#define fabsndd(x) fabsl(x) + +#else // NETDATA_WITH_LONG_DOUBLE + +typedef double NETDATA_DOUBLE; +#define NETDATA_DOUBLE_FORMAT "%0.7f" +#define NETDATA_DOUBLE_FORMAT_ZERO "%0.0f" +#define NETDATA_DOUBLE_FORMAT_AUTO "%f" +#define NETDATA_DOUBLE_MODIFIER "f" + +#define NETDATA_DOUBLE_MAX DBL_MAX + +#define strtondd(s, endptr) strtod(s, endptr) +#define powndd(x, y) pow(x, y) +#define llrintndd(x) llrint(x) +#define roundndd(x) round(x) +#define sqrtndd(x) sqrt(x) +#define copysignndd(x, y) copysign(x, y) +#define modfndd(x, y) modf(x, y) +#define fabsndd(x) fabs(x) + +#endif // NETDATA_WITH_LONG_DOUBLE + +typedef long long collected_number; +#define COLLECTED_NUMBER_FORMAT "%lld" + +#define epsilonndd (NETDATA_DOUBLE)0.0000001 +#define considered_equal_ndd(a, b) (fabsndd((a) - (b)) < epsilonndd) + +#if defined(HAVE_ISFINITE) || defined(isfinite) +// The isfinite() macro shall determine whether its argument has a +// finite value (zero, subnormal, or normal, and not infinite or NaN). +#define netdata_double_isnumber(a) (isfinite(a)) +#elif defined(HAVE_FINITE) || defined(finite) +#define netdata_double_isnumber(a) (finite(a)) +#else +#define netdata_double_isnumber(a) (fpclassify(a) != FP_NAN && fpclassify(a) != FP_INFINITE) +#endif + +typedef uint32_t storage_number; + +typedef struct storage_number_tier1 { + float sum_value; + float min_value; + float max_value; + uint16_t count; + uint16_t anomaly_count; +} storage_number_tier1_t; + +#define STORAGE_NUMBER_FORMAT "%u" + +typedef enum { + SN_FLAG_NONE = 0, + SN_FLAG_NOT_ANOMALOUS = (1 << 24), // the anomaly bit of the value (0:anomalous, 1:not anomalous) + SN_FLAG_RESET = (1 << 25), // the value has been overflown + SN_FLAG_NOT_EXISTS_MUL100 = (1 << 26), // very large value (multiplier is 100 instead of 10) + SN_FLAG_MULTIPLY = (1 << 30), // multiply, else divide + SN_FLAG_NEGATIVE = (1 << 31), // negative, else positive +} SN_FLAGS; + +#define SN_USER_FLAGS (SN_FLAG_NOT_ANOMALOUS | SN_FLAG_RESET) + +// default flags for all storage numbers +// anomaly bit is reversed, so we set it by default +#define SN_DEFAULT_FLAGS SN_FLAG_NOT_ANOMALOUS + +// When the calculated number is zero and the value is anomalous (ie. it's bit +// is zero) we want to return a storage_number representation that is +// different from the empty slot. We achieve this by mapping zero to +// SN_EXISTS_100. Unpacking the SN_EXISTS_100 value will return zero because +// its fraction field (as well as its exponent factor field) will be zero. +#define SN_EMPTY_SLOT SN_FLAG_NOT_EXISTS_MUL100 + +// checks +#define does_storage_number_exist(value) (((storage_number)(value)) != SN_EMPTY_SLOT) +#define did_storage_number_reset(value) ((((storage_number)(value)) & SN_FLAG_RESET)) +#define is_storage_number_anomalous(value) (does_storage_number_exist(value) && !(((storage_number)(value)) & SN_FLAG_NOT_ANOMALOUS)) + +storage_number pack_storage_number(NETDATA_DOUBLE value, SN_FLAGS flags) __attribute__((const)); +static inline NETDATA_DOUBLE unpack_storage_number(storage_number value) __attribute__((const)); + +int print_netdata_double(char *str, NETDATA_DOUBLE value); + +// sign div/mul <--- multiplier / divider ---> 10/100 RESET EXISTS VALUE +#define STORAGE_NUMBER_POSITIVE_MAX_RAW (storage_number)( (0 << 31) | (1 << 30) | (1 << 29) | (1 << 28) | (1 << 27) | (1 << 26) | (0 << 25) | (1 << 24) | 0x00ffffff ) +#define STORAGE_NUMBER_POSITIVE_MIN_RAW (storage_number)( (0 << 31) | (0 << 30) | (1 << 29) | (1 << 28) | (1 << 27) | (0 << 26) | (0 << 25) | (1 << 24) | 0x00000001 ) +#define STORAGE_NUMBER_NEGATIVE_MAX_RAW (storage_number)( (1 << 31) | (0 << 30) | (1 << 29) | (1 << 28) | (1 << 27) | (0 << 26) | (0 << 25) | (1 << 24) | 0x00000001 ) +#define STORAGE_NUMBER_NEGATIVE_MIN_RAW (storage_number)( (1 << 31) | (1 << 30) | (1 << 29) | (1 << 28) | (1 << 27) | (1 << 26) | (0 << 25) | (1 << 24) | 0x00ffffff ) + +// accepted accuracy loss +#define ACCURACY_LOSS_ACCEPTED_PERCENT 0.0001 +#define accuracy_loss(t1, t2) (((t1) == (t2) || (t1) == 0.0 || (t2) == 0.0) ? 0.0 : (100.0 - (((t1) > (t2)) ? ((t2) * 100.0 / (t1) ) : ((t1) * 100.0 / (t2))))) + +// Maximum acceptable rate of increase for counters. With a rate of 10% netdata can safely detect overflows with a +// period of at least every other 10 samples. +#define MAX_INCREMENTAL_PERCENT_RATE 10 + + +static inline NETDATA_DOUBLE unpack_storage_number(storage_number value) { + extern NETDATA_DOUBLE unpack_storage_number_lut10x[4 * 8]; + + if(unlikely(value == SN_EMPTY_SLOT)) + return NAN; + + int sign = 1, exp = 0; + int factor = 0; + + // bit 32 = 0:positive, 1:negative + if(unlikely(value & SN_FLAG_NEGATIVE)) + sign = -1; + + // bit 31 = 0:divide, 1:multiply + if(unlikely(value & SN_FLAG_MULTIPLY)) + exp = 1; + + // bit 27 SN_FLAG_NOT_EXISTS_MUL100 + if(unlikely(value & SN_FLAG_NOT_EXISTS_MUL100)) + factor = 1; + + // bit 26 SN_FLAG_RESET + // bit 25 SN_FLAG_NOT_ANOMALOUS + + // bit 30, 29, 28 = (multiplier or divider) 0-7 (8 total) + int mul = (int)((value & ((1<<29)|(1<<28)|(1<<27))) >> 27); + + // bit 24 to bit 1 = the value, so remove all other bits + value ^= value & ((1<<31)|(1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<26)|(1<<25)|(1<<24)); + + NETDATA_DOUBLE n = value; + + // fprintf(stderr, "UNPACK: %08X, sign = %d, exp = %d, mul = %d, factor = %d, n = " CALCULATED_NUMBER_FORMAT "\n", value, sign, exp, mul, factor, n); + + return sign * unpack_storage_number_lut10x[(factor * 16) + (exp * 8) + mul] * n; +} + +static inline NETDATA_DOUBLE str2ndd(const char *s, char **endptr) { + int negative = 0; + const char *start = s; + unsigned long long integer_part = 0; + unsigned long decimal_part = 0; + size_t decimal_digits = 0; + + switch(*s) { + case '-': + s++; + negative = 1; + break; + + case '+': + s++; + break; + + case 'n': + if(s[1] == 'a' && s[2] == 'n') { + if(endptr) *endptr = (char *)&s[3]; + return NAN; + } + break; + + case 'i': + if(s[1] == 'n' && s[2] == 'f') { + if(endptr) *endptr = (char *)&s[3]; + return INFINITY; + } + break; + + default: + break; + } + + while (*s >= '0' && *s <= '9') { + integer_part = (integer_part * 10) + (*s - '0'); + s++; + } + + if(unlikely(*s == '.')) { + decimal_part = 0; + s++; + + while (*s >= '0' && *s <= '9') { + decimal_part = (decimal_part * 10) + (*s - '0'); + s++; + decimal_digits++; + } + } + + if(unlikely(*s == 'e' || *s == 'E')) + return strtondd(start, endptr); + + if(unlikely(endptr)) + *endptr = (char *)s; + + if(unlikely(negative)) { + if(unlikely(decimal_digits)) + return -((NETDATA_DOUBLE)integer_part + (NETDATA_DOUBLE)decimal_part / powndd(10.0, decimal_digits)); + else + return -((NETDATA_DOUBLE)integer_part); + } + else { + if(unlikely(decimal_digits)) + return (NETDATA_DOUBLE)integer_part + (NETDATA_DOUBLE)decimal_part / powndd(10.0, decimal_digits); + else + return (NETDATA_DOUBLE)integer_part; + } +} + +#endif /* NETDATA_STORAGE_NUMBER_H */ diff --git a/libnetdata/storage_number/tests/Makefile.am b/libnetdata/storage_number/tests/Makefile.am new file mode 100644 index 0000000..babdcf0 --- /dev/null +++ b/libnetdata/storage_number/tests/Makefile.am @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in diff --git a/libnetdata/storage_number/tests/test_storage_number.c b/libnetdata/storage_number/tests/test_storage_number.c new file mode 100644 index 0000000..19309e5 --- /dev/null +++ b/libnetdata/storage_number/tests/test_storage_number.c @@ -0,0 +1,52 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../../libnetdata.h" +#include "../../required_dummies.h" +#include +#include + +static void test_number_printing(void **state) +{ + (void)state; + + char value[50]; + + print_netdata_double(value, 0); + assert_string_equal(value, "0"); + + print_netdata_double(value, 0.0000001); + assert_string_equal(value, "0.0000001"); + + print_netdata_double(value, 0.00000009); + assert_string_equal(value, "0.0000001"); + + print_netdata_double(value, 0.000000001); + assert_string_equal(value, "0"); + + print_netdata_double(value, 99.99999999999999999); + assert_string_equal(value, "100"); + + print_netdata_double(value, -99.99999999999999999); + assert_string_equal(value, "-100"); + + print_netdata_double(value, 123.4567890123456789); + assert_string_equal(value, "123.456789"); + + print_netdata_double(value, 9999.9999999); + assert_string_equal(value, "9999.9999999"); + + print_netdata_double(value, -9999.9999999); + assert_string_equal(value, "-9999.9999999"); + + print_netdata_double(value, unpack_storage_number(pack_storage_number(16.777218L, SN_DEFAULT_FLAGS))); + assert_string_equal(value, "16.77722"); +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_number_printing) + }; + + return cmocka_run_group_tests_name("storage_number", tests, NULL, NULL); +} diff --git a/libnetdata/string/Makefile.am b/libnetdata/string/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/string/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/string/README.md b/libnetdata/string/README.md new file mode 100644 index 0000000..e73ab26 --- /dev/null +++ b/libnetdata/string/README.md @@ -0,0 +1,20 @@ + + +# STRING + +STRING provides a way to allocate and free text strings, while de-duplicating them. + +It can be used similarly to libc string functions: + + - `strdup()` and `strdupz()` become `string_strdupz()`. + - `strlen()` becomes `string_strlen()` (and it does not walkthrough the bytes of the string). + - `free()` and `freez()` become `string_freez()`. + +There is also a special `string_dup()` function that increases the reference counter of a STRING, avoiding the +index lookup to find it. + +Once there is a `STRING *`, the actual `const char *` can be accessed with `string2str()`. + +All STRING should be constant. Changing the contents of a `const char *` that has been acquired by `string2str()` should never happen. \ No newline at end of file diff --git a/libnetdata/string/string.c b/libnetdata/string/string.c new file mode 100644 index 0000000..d2db8aa --- /dev/null +++ b/libnetdata/string/string.c @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" +#include + +typedef int32_t REFCOUNT; + +// ---------------------------------------------------------------------------- +// STRING implementation - dedup all STRING + +struct netdata_string { + uint32_t length; // the string length including the terminating '\0' + + REFCOUNT refcount; // how many times this string is used + // We use a signed number to be able to detect duplicate frees of a string. + // If at any point this goes below zero, we have a duplicate free. + + const char str[]; // the string itself, is appended to this structure +}; + +static struct string_hashtable { + Pvoid_t JudyHSArray; // the Judy array - hashtable + netdata_rwlock_t rwlock; // the R/W lock to protect the Judy array + + long int entries; // the number of entries in the index + long int active_references; // the number of active references alive + long int memory; // the memory used, without the JudyHS index + + size_t inserts; // the number of successful inserts to the index + size_t deletes; // the number of successful deleted from the index + size_t searches; // the number of successful searches in the index + size_t duplications; // when a string is referenced + size_t releases; // when a string is unreferenced + +#ifdef NETDATA_INTERNAL_CHECKS + // internal statistics + size_t found_deleted_on_search; + size_t found_available_on_search; + size_t found_deleted_on_insert; + size_t found_available_on_insert; + size_t spins; +#endif + +} string_base = { + .JudyHSArray = NULL, + .rwlock = NETDATA_RWLOCK_INITIALIZER, +}; + +#ifdef NETDATA_INTERNAL_CHECKS +#define string_internal_stats_add(var, val) __atomic_add_fetch(&string_base.var, val, __ATOMIC_RELAXED) +#else +#define string_internal_stats_add(var, val) do {;} while(0) +#endif + +#define string_stats_atomic_increment(var) __atomic_add_fetch(&string_base.var, 1, __ATOMIC_RELAXED) +#define string_stats_atomic_decrement(var) __atomic_sub_fetch(&string_base.var, 1, __ATOMIC_RELAXED) + +void string_statistics(size_t *inserts, size_t *deletes, size_t *searches, size_t *entries, size_t *references, size_t *memory, size_t *duplications, size_t *releases) { + *inserts = string_base.inserts; + *deletes = string_base.deletes; + *searches = string_base.searches; + *entries = (size_t)string_base.entries; + *references = (size_t)string_base.active_references; + *memory = (size_t)string_base.memory; + *duplications = string_base.duplications; + *releases = string_base.releases; +} + +#define string_entry_acquire(se) __atomic_add_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST); +#define string_entry_release(se) __atomic_sub_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST); + +static inline bool string_entry_check_and_acquire(STRING *se) { + REFCOUNT expected, desired, count = 0; + + expected = __atomic_load_n(&se->refcount, __ATOMIC_SEQ_CST); + + do { + count++; + + if(expected <= 0) { + // We cannot use this. + // The reference counter reached value zero, + // so another thread is deleting this. + string_internal_stats_add(spins, count - 1); + return false; + } + + desired = expected + 1; + + } while(!__atomic_compare_exchange_n(&se->refcount, &expected, desired, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST)); + + string_internal_stats_add(spins, count - 1); + + // statistics + // string_base.active_references is altered at the in string_strdupz() and string_freez() + string_stats_atomic_increment(duplications); + + return true; +} + +STRING *string_dup(STRING *string) { + if(unlikely(!string)) return NULL; + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(__atomic_load_n(&string->refcount, __ATOMIC_SEQ_CST) <= 0)) + fatal("STRING: tried to %s() a string that is freed (it has %d references).", __FUNCTION__, string->refcount); +#endif + + string_entry_acquire(string); + + // statistics + string_stats_atomic_increment(active_references); + string_stats_atomic_increment(duplications); + + return string; +} + +// Search the index and return an ACQUIRED string entry, or NULL +static inline STRING *string_index_search(const char *str, size_t length) { + STRING *string; + + // Find the string in the index + // With a read-lock so that multiple readers can use the index concurrently. + + netdata_rwlock_rdlock(&string_base.rwlock); + + Pvoid_t *Rc; + Rc = JudyHSGet(string_base.JudyHSArray, (void *)str, length); + if(likely(Rc)) { + // found in the hash table + string = *Rc; + + if(string_entry_check_and_acquire(string)) { + // we can use this entry + string_internal_stats_add(found_available_on_search, 1); + } + else { + // this entry is about to be deleted by another thread + // do not touch it, let it go... + string = NULL; + string_internal_stats_add(found_deleted_on_search, 1); + } + } + else { + // not found in the hash table + string = NULL; + } + + string_stats_atomic_increment(searches); + netdata_rwlock_unlock(&string_base.rwlock); + + return string; +} + +// Insert a string to the index and return an ACQUIRED string entry, +// or NULL if the call needs to be retried (a deleted entry with the same key is still in the index) +// The returned entry is ACQUIRED, and it can either be: +// 1. a new item inserted, or +// 2. an item found in the index that is not currently deleted +static inline STRING *string_index_insert(const char *str, size_t length) { + STRING *string; + + netdata_rwlock_wrlock(&string_base.rwlock); + + STRING **ptr; + { + JError_t J_Error; + Pvoid_t *Rc = JudyHSIns(&string_base.JudyHSArray, (void *)str, length, &J_Error); + if (unlikely(Rc == PJERR)) { + fatal( + "STRING: Cannot insert entry with name '%s' to JudyHS, JU_ERRNO_* == %u, ID == %d", + str, + JU_ERRNO(&J_Error), + JU_ERRID(&J_Error)); + } + ptr = (STRING **)Rc; + } + + if (likely(*ptr == 0)) { + // a new item added to the index + size_t mem_size = sizeof(STRING) + length; + string = mallocz(mem_size); + strcpy((char *)string->str, str); + string->length = length; + string->refcount = 1; + *ptr = string; + string_base.inserts++; + string_base.entries++; + string_base.memory += (long)mem_size; + } + else { + // the item is already in the index + string = *ptr; + + if(string_entry_check_and_acquire(string)) { + // we can use this entry + string_internal_stats_add(found_available_on_insert, 1); + } + else { + // this entry is about to be deleted by another thread + // do not touch it, let it go... + string = NULL; + string_internal_stats_add(found_deleted_on_insert, 1); + } + + string_stats_atomic_increment(searches); + } + + netdata_rwlock_unlock(&string_base.rwlock); + return string; +} + +// delete an entry from the index +static inline void string_index_delete(STRING *string) { + netdata_rwlock_wrlock(&string_base.rwlock); + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(__atomic_load_n(&string->refcount, __ATOMIC_SEQ_CST) != 0)) + fatal("STRING: tried to delete a string at %s() that is already freed (it has %d references).", __FUNCTION__, string->refcount); +#endif + + bool deleted = false; + + if (likely(string_base.JudyHSArray)) { + JError_t J_Error; + int ret = JudyHSDel(&string_base.JudyHSArray, (void *)string->str, string->length, &J_Error); + if (unlikely(ret == JERR)) { + error( + "STRING: Cannot delete entry with name '%s' from JudyHS, JU_ERRNO_* == %u, ID == %d", + string->str, + JU_ERRNO(&J_Error), + JU_ERRID(&J_Error)); + } else + deleted = true; + } + + if (unlikely(!deleted)) + error("STRING: tried to delete '%s' that is not in the index. Ignoring it.", string->str); + else { + size_t mem_size = sizeof(STRING) + string->length; + string_base.deletes++; + string_base.entries--; + string_base.memory -= (long)mem_size; + freez(string); + } + + netdata_rwlock_unlock(&string_base.rwlock); +} + +STRING *string_strdupz(const char *str) { + if(unlikely(!str || !*str)) return NULL; + + size_t length = strlen(str) + 1; + STRING *string = string_index_search(str, length); + + while(!string) { + // The search above did not find anything, + // We loop here, because during insert we may find an entry that is being deleted by another thread. + // So, we have to let it go and retry to insert it again. + + string = string_index_insert(str, length); + } + + // statistics + string_stats_atomic_increment(active_references); + + return string; +} + +void string_freez(STRING *string) { + if(unlikely(!string)) return; + + REFCOUNT refcount = string_entry_release(string); + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(refcount < 0)) + fatal("STRING: tried to %s() a string that is already freed (it has %d references).", __FUNCTION__, string->refcount); +#endif + + if(unlikely(refcount == 0)) + string_index_delete(string); + + // statistics + string_stats_atomic_decrement(active_references); + string_stats_atomic_increment(releases); +} + +size_t string_strlen(STRING *string) { + if(unlikely(!string)) return 0; + return string->length - 1; +} + +const char *string2str(STRING *string) { + if(unlikely(!string)) return ""; + return string->str; +} + +STRING *string_2way_merge(STRING *a, STRING *b) { + static STRING *X = NULL; + + if(unlikely(!X)) { + X = string_strdupz("[x]"); + } + + if(unlikely(a == b)) return string_dup(a); + if(unlikely(a == X)) return string_dup(a); + if(unlikely(b == X)) return string_dup(b); + if(unlikely(!a)) return string_dup(X); + if(unlikely(!b)) return string_dup(X); + + size_t alen = string_strlen(a); + size_t blen = string_strlen(b); + size_t length = alen + blen + string_strlen(X) + 1; + char buf1[length + 1], buf2[length + 1], *dst1; + const char *s1, *s2; + + s1 = string2str(a); + s2 = string2str(b); + dst1 = buf1; + for( ; *s1 && *s2 && *s1 == *s2 ;s1++, s2++) + *dst1++ = *s1; + + *dst1 = '\0'; + + if(*s1 != '\0' || *s2 != '\0') { + *dst1++ = '['; + *dst1++ = 'x'; + *dst1++ = ']'; + + s1 = &(string2str(a))[alen - 1]; + s2 = &(string2str(b))[blen - 1]; + char *dst2 = &buf2[length]; + *dst2 = '\0'; + for (; *s1 && *s2 && *s1 == *s2; s1--, s2--) + *(--dst2) = *s1; + + strcpy(dst1, dst2); + } + + return string_strdupz(buf1); +} + +// ---------------------------------------------------------------------------- +// STRING unit test + +struct thread_unittest { + int join; + int dups; +}; + +static void *string_thread(void *arg) { + struct thread_unittest *tu = arg; + + for(; 1 ;) { + if(__atomic_load_n(&tu->join, __ATOMIC_RELAXED)) + break; + + STRING *s = string_strdupz("string thread checking 1234567890"); + + for(int i = 0; i < tu->dups ; i++) + string_dup(s); + + for(int i = 0; i < tu->dups ; i++) + string_freez(s); + + string_freez(s); + } + + return arg; +} + +static char **string_unittest_generate_names(size_t entries) { + char **names = mallocz(sizeof(char *) * entries); + for(size_t i = 0; i < entries ;i++) { + char buf[25 + 1] = ""; + snprintfz(buf, 25, "name.%zu.0123456789.%zu \t !@#$%%^&*(),./[]{}\\|~`", i, entries / 2 + i); + names[i] = strdupz(buf); + } + return names; +} + +static void string_unittest_free_char_pp(char **pp, size_t entries) { + for(size_t i = 0; i < entries ;i++) + freez(pp[i]); + + freez(pp); +} + +int string_unittest(size_t entries) { + size_t errors = 0; + + fprintf(stderr, "Generating %zu names and values...\n", entries); + char **names = string_unittest_generate_names(entries); + + // check string + { + long int string_entries_starting = string_base.entries; + + fprintf(stderr, "\nChecking strings...\n"); + + STRING *s1 = string_strdupz("hello unittest"); + STRING *s2 = string_strdupz("hello unittest"); + if(s1 != s2) { + errors++; + fprintf(stderr, "ERROR: duplicating strings are not deduplicated\n"); + } + else + fprintf(stderr, "OK: duplicating string are deduplicated\n"); + + STRING *s3 = string_dup(s1); + if(s3 != s1) { + errors++; + fprintf(stderr, "ERROR: cloning strings are not deduplicated\n"); + } + else + fprintf(stderr, "OK: cloning string are deduplicated\n"); + + if(s1->refcount != 3) { + errors++; + fprintf(stderr, "ERROR: string refcount is not 3\n"); + } + else + fprintf(stderr, "OK: string refcount is 3\n"); + + STRING *s4 = string_strdupz("world unittest"); + if(s4 == s1) { + errors++; + fprintf(stderr, "ERROR: string is sharing pointers on different strings\n"); + } + else + fprintf(stderr, "OK: string is properly handling different strings\n"); + + usec_t start_ut, end_ut; + STRING **strings = mallocz(entries * sizeof(STRING *)); + + start_ut = now_realtime_usec(); + for(size_t i = 0; i < entries ;i++) { + strings[i] = string_strdupz(names[i]); + } + end_ut = now_realtime_usec(); + fprintf(stderr, "Created %zu strings in %llu usecs\n", entries, end_ut - start_ut); + + start_ut = now_realtime_usec(); + for(size_t i = 0; i < entries ;i++) { + strings[i] = string_dup(strings[i]); + } + end_ut = now_realtime_usec(); + fprintf(stderr, "Cloned %zu strings in %llu usecs\n", entries, end_ut - start_ut); + + start_ut = now_realtime_usec(); + for(size_t i = 0; i < entries ;i++) { + strings[i] = string_strdupz(string2str(strings[i])); + } + end_ut = now_realtime_usec(); + fprintf(stderr, "Found %zu existing strings in %llu usecs\n", entries, end_ut - start_ut); + + start_ut = now_realtime_usec(); + for(size_t i = 0; i < entries ;i++) { + string_freez(strings[i]); + } + end_ut = now_realtime_usec(); + fprintf(stderr, "Released %zu referenced strings in %llu usecs\n", entries, end_ut - start_ut); + + start_ut = now_realtime_usec(); + for(size_t i = 0; i < entries ;i++) { + string_freez(strings[i]); + } + end_ut = now_realtime_usec(); + fprintf(stderr, "Released (again) %zu referenced strings in %llu usecs\n", entries, end_ut - start_ut); + + start_ut = now_realtime_usec(); + for(size_t i = 0; i < entries ;i++) { + string_freez(strings[i]); + } + end_ut = now_realtime_usec(); + fprintf(stderr, "Freed %zu strings in %llu usecs\n", entries, end_ut - start_ut); + + freez(strings); + + if(string_base.entries != string_entries_starting + 2) { + errors++; + fprintf(stderr, "ERROR: strings dictionary should have %ld items but it has %ld\n", string_entries_starting + 2, string_base.entries); + } + else + fprintf(stderr, "OK: strings dictionary has 2 items\n"); + } + + // check 2-way merge + { + struct testcase { + char *src1; char *src2; char *expected; + } tests[] = { + { "", "", ""}, + { "a", "", "[x]"}, + { "", "a", "[x]"}, + { "a", "a", "a"}, + { "abcd", "abcd", "abcd"}, + { "foo_cs", "bar_cs", "[x]_cs"}, + { "cp_UNIQUE_INFIX_cs", "cp_unique_infix_cs", "cp_[x]_cs"}, + { "cp_UNIQUE_INFIX_ci_unique_infix_cs", "cp_unique_infix_ci_UNIQUE_INFIX_cs", "cp_[x]_cs"}, + { "foo[1234]", "foo[4321]", "foo[[x]]"}, + { NULL, NULL, NULL }, + }; + + for (struct testcase *tc = &tests[0]; tc->expected != NULL; tc++) { + STRING *src1 = string_strdupz(tc->src1); + STRING *src2 = string_strdupz(tc->src2); + STRING *expected = string_strdupz(tc->expected); + + STRING *result = string_2way_merge(src1, src2); + if (string_cmp(result, expected) != 0) { + fprintf(stderr, "string_2way_merge(\"%s\", \"%s\") -> \"%s\" (expected=\"%s\")\n", + string2str(src1), + string2str(src2), + string2str(result), + string2str(expected)); + errors++; + } + + string_freez(src1); + string_freez(src2); + string_freez(expected); + string_freez(result); + } + } + + // threads testing of string + { + struct thread_unittest tu = { + .dups = 1, + .join = 0, + }; + +#ifdef NETDATA_INTERNAL_CHECKS + size_t ofound_deleted_on_search = string_base.found_deleted_on_search, + ofound_available_on_search = string_base.found_available_on_search, + ofound_deleted_on_insert = string_base.found_deleted_on_insert, + ofound_available_on_insert = string_base.found_available_on_insert, + ospins = string_base.spins; +#endif + + size_t oinserts, odeletes, osearches, oentries, oreferences, omemory, oduplications, oreleases; + string_statistics(&oinserts, &odeletes, &osearches, &oentries, &oreferences, &omemory, &oduplications, &oreleases); + + time_t seconds_to_run = 5; + int threads_to_create = 2; + fprintf( + stderr, + "Checking string concurrency with %d threads for %lld seconds...\n", + threads_to_create, + (long long)seconds_to_run); + // check string concurrency + netdata_thread_t threads[threads_to_create]; + tu.join = 0; + for (int i = 0; i < threads_to_create; i++) { + char buf[100 + 1]; + snprintf(buf, 100, "string%d", i); + netdata_thread_create( + &threads[i], buf, NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, string_thread, &tu); + } + sleep_usec(seconds_to_run * USEC_PER_SEC); + + __atomic_store_n(&tu.join, 1, __ATOMIC_RELAXED); + for (int i = 0; i < threads_to_create; i++) { + void *retval; + netdata_thread_join(threads[i], &retval); + } + + size_t inserts, deletes, searches, sentries, references, memory, duplications, releases; + string_statistics(&inserts, &deletes, &searches, &sentries, &references, &memory, &duplications, &releases); + + fprintf(stderr, "inserts %zu, deletes %zu, searches %zu, entries %zu, references %zu, memory %zu, duplications %zu, releases %zu\n", + inserts - oinserts, deletes - odeletes, searches - osearches, sentries - oentries, references - oreferences, memory - omemory, duplications - oduplications, releases - oreleases); + +#ifdef NETDATA_INTERNAL_CHECKS + size_t found_deleted_on_search = string_base.found_deleted_on_search, + found_available_on_search = string_base.found_available_on_search, + found_deleted_on_insert = string_base.found_deleted_on_insert, + found_available_on_insert = string_base.found_available_on_insert, + spins = string_base.spins; + + fprintf(stderr, "on insert: %zu ok + %zu deleted\non search: %zu ok + %zu deleted\nspins: %zu\n", + found_available_on_insert - ofound_available_on_insert, + found_deleted_on_insert - ofound_deleted_on_insert, + found_available_on_search - ofound_available_on_search, + found_deleted_on_search - ofound_deleted_on_search, + spins - ospins + ); +#endif + } + + string_unittest_free_char_pp(names, entries); + + fprintf(stderr, "\n%zu errors found\n", errors); + return errors ? 1 : 0; +} diff --git a/libnetdata/string/string.h b/libnetdata/string/string.h new file mode 100644 index 0000000..cec44eb --- /dev/null +++ b/libnetdata/string/string.h @@ -0,0 +1,30 @@ + +#ifndef NETDATA_STRING_H +#define NETDATA_STRING_H 1 + +#include "../libnetdata.h" + +// ---------------------------------------------------------------------------- +// STRING implementation + +typedef struct netdata_string STRING; +STRING *string_strdupz(const char *str); +STRING *string_dup(STRING *string); +void string_freez(STRING *string); +size_t string_strlen(STRING *string); +const char *string2str(STRING *string) NEVERNULL; + +// keep common prefix/suffix and replace everything else with [x] +STRING *string_2way_merge(STRING *a, STRING *b); + +static inline int string_cmp(STRING *s1, STRING *s2) { + // STRINGs are deduplicated, so the same strings have the same pointer + // when they differ, we do the typical strcmp() comparison + return (s1 == s2)?0:strcmp(string2str(s1), string2str(s2)); +} + +void string_statistics(size_t *inserts, size_t *deletes, size_t *searches, size_t *entries, size_t *references, size_t *memory, size_t *duplications, size_t *releases); + +int string_unittest(size_t entries); + +#endif diff --git a/libnetdata/string/utf8.h b/libnetdata/string/utf8.h new file mode 100644 index 0000000..133ec71 --- /dev/null +++ b/libnetdata/string/utf8.h @@ -0,0 +1,9 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STRING_UTF8_H +#define NETDATA_STRING_UTF8_H 1 + +#define IS_UTF8_BYTE(x) (x & 0x80) +#define IS_UTF8_STARTBYTE(x) (IS_UTF8_BYTE(x)&&(x & 0x40)) + +#endif /* NETDATA_STRING_UTF8_H */ diff --git a/libnetdata/tests/Makefile.am b/libnetdata/tests/Makefile.am new file mode 100644 index 0000000..babdcf0 --- /dev/null +++ b/libnetdata/tests/Makefile.am @@ -0,0 +1,4 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in diff --git a/libnetdata/tests/test_str2ld.c b/libnetdata/tests/test_str2ld.c new file mode 100644 index 0000000..8b97a70 --- /dev/null +++ b/libnetdata/tests/test_str2ld.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" +#include "../required_dummies.h" +#include +#include + +static void test_str2ld(void **state) +{ + (void)state; + char *values[] = { + "1.2345678", + "-35.6", + "0.00123", + "23842384234234.2", + ".1", + "1.2e-10", + "hello", + "1wrong", + "nan", + "inf", + NULL + }; + + for (int i = 0; values[i]; i++) { + char *e_mine = "hello", *e_sys = "world"; + NETDATA_DOUBLE mine = str2ndd(values[i], &e_mine); + NETDATA_DOUBLE sys = strtondd(values[i], &e_sys); + + if (isnan(mine)) + assert_true(isnan(sys)); + else if (isinf(mine)) + assert_true(isinf(sys)); + else if (mine != sys) + assert_false(ABS(mine - sys) > 0.000001); + + assert_ptr_equal(e_mine, e_sys); + } +} + +int main(void) +{ + const struct CMUnitTest tests[] = { + cmocka_unit_test(test_str2ld) + }; + + return cmocka_run_group_tests_name("str2ld", tests, NULL, NULL); +} diff --git a/libnetdata/threads/Makefile.am b/libnetdata/threads/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/threads/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/threads/README.md b/libnetdata/threads/README.md new file mode 100644 index 0000000..75ab11b --- /dev/null +++ b/libnetdata/threads/README.md @@ -0,0 +1,5 @@ + + + diff --git a/libnetdata/threads/threads.c b/libnetdata/threads/threads.c new file mode 100644 index 0000000..5c3d267 --- /dev/null +++ b/libnetdata/threads/threads.c @@ -0,0 +1,267 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +static size_t default_stacksize = 0, wanted_stacksize = 0; +static pthread_attr_t *attr = NULL; + +// ---------------------------------------------------------------------------- +// per thread data + +typedef struct { + void *arg; + pthread_t *thread; + const char *tag; + void *(*start_routine) (void *); + NETDATA_THREAD_OPTIONS options; +} NETDATA_THREAD; + +static __thread NETDATA_THREAD *netdata_thread = NULL; + +inline int netdata_thread_tag_exists(void) { + return (netdata_thread && netdata_thread->tag && *netdata_thread->tag); +} + +const char *netdata_thread_tag(void) { + return (netdata_thread_tag_exists() ? netdata_thread->tag : "MAIN"); +} + +// ---------------------------------------------------------------------------- +// compatibility library functions + +static __thread pid_t gettid_cached_tid = 0; +pid_t gettid(void) { + pid_t tid = 0; + + if(likely(gettid_cached_tid > 0)) + return gettid_cached_tid; + +#ifdef __FreeBSD__ + + tid = (pid_t)pthread_getthreadid_np(); + +#elif defined(__APPLE__) + + #if (defined __MAC_OS_X_VERSION_MIN_REQUIRED && __MAC_OS_X_VERSION_MIN_REQUIRED >= 1060) + uint64_t curthreadid; + pthread_threadid_np(NULL, &curthreadid); + tid = (pid_t)curthreadid; + #else /* __MAC_OS_X_VERSION_MIN_REQUIRED */ + tid = (pid_t)pthread_self; + #endif /* __MAC_OS_X_VERSION_MIN_REQUIRED */ + +#else /* __APPLE__*/ + + tid = (pid_t)syscall(SYS_gettid); + +#endif /* __FreeBSD__, __APPLE__*/ + + gettid_cached_tid = tid; + return tid; +} + +// ---------------------------------------------------------------------------- +// early initialization + +size_t netdata_threads_init(void) { + int i; + + // -------------------------------------------------------------------- + // get the required stack size of the threads of netdata + + attr = callocz(1, sizeof(pthread_attr_t)); + i = pthread_attr_init(attr); + if(i != 0) + fatal("pthread_attr_init() failed with code %d.", i); + + i = pthread_attr_getstacksize(attr, &default_stacksize); + if(i != 0) + fatal("pthread_attr_getstacksize() failed with code %d.", i); + else + debug(D_OPTIONS, "initial pthread stack size is %zu bytes", default_stacksize); + + return default_stacksize; +} + +// ---------------------------------------------------------------------------- +// late initialization + +void netdata_threads_init_after_fork(size_t stacksize) { + wanted_stacksize = stacksize; + int i; + + // ------------------------------------------------------------------------ + // set default pthread stack size + + if(attr && default_stacksize < wanted_stacksize && wanted_stacksize > 0) { + i = pthread_attr_setstacksize(attr, wanted_stacksize); + if(i != 0) + fatal("pthread_attr_setstacksize() to %zu bytes, failed with code %d.", wanted_stacksize, i); + else + debug(D_SYSTEM, "Successfully set pthread stacksize to %zu bytes", wanted_stacksize); + } +} + + +// ---------------------------------------------------------------------------- +// netdata_thread_create + +extern void rrdset_thread_rda_free(void); +extern void sender_thread_buffer_free(void); +extern void query_target_free(void); + +static void thread_cleanup(void *ptr) { + if(netdata_thread != ptr) { + NETDATA_THREAD *info = (NETDATA_THREAD *)ptr; + error("THREADS: internal error - thread local variable does not match the one passed to this function. Expected thread '%s', passed thread '%s'", netdata_thread->tag, info->tag); + } + + if(!(netdata_thread->options & NETDATA_THREAD_OPTION_DONT_LOG_CLEANUP)) + info("thread with task id %d finished", gettid()); + + sender_thread_buffer_free(); + rrdset_thread_rda_free(); + query_target_free(); + thread_cache_destroy(); + + freez((void *)netdata_thread->tag); + netdata_thread->tag = NULL; + + freez(netdata_thread); + netdata_thread = NULL; +} + +static void thread_set_name_np(NETDATA_THREAD *nt) { + + if (nt->tag) { + int ret = 0; + + char threadname[NETDATA_THREAD_NAME_MAX+1]; + strncpyz(threadname, nt->tag, NETDATA_THREAD_NAME_MAX); + +#if defined(__FreeBSD__) + pthread_set_name_np(pthread_self(), threadname); +#elif defined(__APPLE__) + ret = pthread_setname_np(threadname); +#else + ret = pthread_setname_np(pthread_self(), threadname); +#endif + + if (ret != 0) + error("cannot set pthread name of %d to %s. ErrCode: %d", gettid(), threadname, ret); + else + info("set name of thread %d to %s", gettid(), threadname); + + } +} + +void uv_thread_set_name_np(uv_thread_t ut, const char* name) { + int ret = 0; + + char threadname[NETDATA_THREAD_NAME_MAX+1]; + strncpyz(threadname, name, NETDATA_THREAD_NAME_MAX); + +#if defined(__FreeBSD__) + pthread_set_name_np(ut, threadname); +#elif defined(__APPLE__) + // Apple can only set its own name + UNUSED(ut); +#else + ret = pthread_setname_np(ut, threadname); +#endif + + if (ret) + info("cannot set libuv thread name to %s. Err: %d", threadname, ret); +} + +void os_thread_get_current_name_np(char threadname[NETDATA_THREAD_NAME_MAX + 1]) +{ + threadname[0] = '\0'; +#if defined(__FreeBSD__) + pthread_get_name_np(pthread_self(), threadname, NETDATA_THREAD_NAME_MAX + 1); +#elif defined(HAVE_PTHREAD_GETNAME_NP) /* Linux & macOS */ + (void)pthread_getname_np(pthread_self(), threadname, NETDATA_THREAD_NAME_MAX + 1); +#endif +} + +static void *thread_start(void *ptr) { + netdata_thread = (NETDATA_THREAD *)ptr; + + if(!(netdata_thread->options & NETDATA_THREAD_OPTION_DONT_LOG_STARTUP)) + info("thread created with task id %d", gettid()); + + if(pthread_setcanceltype(PTHREAD_CANCEL_DEFERRED, NULL) != 0) + error("cannot set pthread cancel type to DEFERRED."); + + if(pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, NULL) != 0) + error("cannot set pthread cancel state to ENABLE."); + + thread_set_name_np(ptr); + + void *ret = NULL; + pthread_cleanup_push(thread_cleanup, ptr); + ret = netdata_thread->start_routine(netdata_thread->arg); + pthread_cleanup_pop(1); + + return ret; +} + +int netdata_thread_create(netdata_thread_t *thread, const char *tag, NETDATA_THREAD_OPTIONS options, void *(*start_routine) (void *), void *arg) { + NETDATA_THREAD *info = mallocz(sizeof(NETDATA_THREAD)); + info->arg = arg; + info->thread = thread; + info->tag = strdupz(tag); + info->start_routine = start_routine; + info->options = options; + + int ret = pthread_create(thread, attr, thread_start, info); + if(ret != 0) + error("failed to create new thread for %s. pthread_create() failed with code %d", tag, ret); + + else { + if (!(options & NETDATA_THREAD_OPTION_JOINABLE)) { + int ret2 = pthread_detach(*thread); + if (ret2 != 0) + error("cannot request detach of newly created %s thread. pthread_detach() failed with code %d", tag, ret2); + } + } + + return ret; +} + +// ---------------------------------------------------------------------------- +// netdata_thread_cancel +#ifdef NETDATA_INTERNAL_CHECKS +int netdata_thread_cancel_with_trace(netdata_thread_t thread, int line, const char *file, const char *function) { +#else +int netdata_thread_cancel(netdata_thread_t thread) { +#endif + int ret = pthread_cancel(thread); + if(ret != 0) +#ifdef NETDATA_INTERNAL_CHECKS + error("cannot cancel thread. pthread_cancel() failed with code %d at %d@%s, function %s()", ret, line, file, function); +#else + error("cannot cancel thread. pthread_cancel() failed with code %d.", ret); +#endif + + return ret; +} + +// ---------------------------------------------------------------------------- +// netdata_thread_join + +int netdata_thread_join(netdata_thread_t thread, void **retval) { + int ret = pthread_join(thread, retval); + if(ret != 0) + error("cannot join thread. pthread_join() failed with code %d.", ret); + + return ret; +} + +int netdata_thread_detach(pthread_t thread) { + int ret = pthread_detach(thread); + if(ret != 0) + error("cannot detach thread. pthread_detach() failed with code %d.", ret); + + return ret; +} diff --git a/libnetdata/threads/threads.h b/libnetdata/threads/threads.h new file mode 100644 index 0000000..ccc18af --- /dev/null +++ b/libnetdata/threads/threads.h @@ -0,0 +1,49 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_THREADS_H +#define NETDATA_THREADS_H 1 + +#include "../libnetdata.h" + +pid_t gettid(void); + +typedef enum { + NETDATA_THREAD_OPTION_DEFAULT = 0 << 0, + NETDATA_THREAD_OPTION_JOINABLE = 1 << 0, + NETDATA_THREAD_OPTION_DONT_LOG_STARTUP = 1 << 1, + NETDATA_THREAD_OPTION_DONT_LOG_CLEANUP = 1 << 2, + NETDATA_THREAD_OPTION_DONT_LOG = NETDATA_THREAD_OPTION_DONT_LOG_STARTUP|NETDATA_THREAD_OPTION_DONT_LOG_CLEANUP, +} NETDATA_THREAD_OPTIONS; + +#define netdata_thread_cleanup_push(func, arg) pthread_cleanup_push(func, arg) +#define netdata_thread_cleanup_pop(execute) pthread_cleanup_pop(execute) + +typedef pthread_t netdata_thread_t; + +#define NETDATA_THREAD_TAG_MAX 100 +const char *netdata_thread_tag(void); +int netdata_thread_tag_exists(void); + +size_t netdata_threads_init(void); +void netdata_threads_init_after_fork(size_t stacksize); + +int netdata_thread_create(netdata_thread_t *thread, const char *tag, NETDATA_THREAD_OPTIONS options, void *(*start_routine) (void *), void *arg); + +#ifdef NETDATA_INTERNAL_CHECKS +#define netdata_thread_cancel(thread) netdata_thread_cancel_with_trace(thread, __LINE__, __FILE__, __FUNCTION__) +int netdata_thread_cancel_with_trace(netdata_thread_t thread, int line, const char *file, const char *function); +#else +int netdata_thread_cancel(netdata_thread_t thread); +#endif + +int netdata_thread_join(netdata_thread_t thread, void **retval); +int netdata_thread_detach(pthread_t thread); + +#define NETDATA_THREAD_NAME_MAX 15 +void uv_thread_set_name_np(uv_thread_t ut, const char* name); +void os_thread_get_current_name_np(char threadname[NETDATA_THREAD_NAME_MAX + 1]); + +#define netdata_thread_self pthread_self +#define netdata_thread_testcancel pthread_testcancel + +#endif //NETDATA_THREADS_H diff --git a/libnetdata/url/Makefile.am b/libnetdata/url/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/url/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/url/README.md b/libnetdata/url/README.md new file mode 100644 index 0000000..bd289d9 --- /dev/null +++ b/libnetdata/url/README.md @@ -0,0 +1,5 @@ + + + diff --git a/libnetdata/url/url.c b/libnetdata/url/url.c new file mode 100644 index 0000000..f90b3d5 --- /dev/null +++ b/libnetdata/url/url.c @@ -0,0 +1,391 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +// ---------------------------------------------------------------------------- +// URL encode / decode +// code from: http://www.geekhideout.com/urlcode.shtml + +/* Converts a hex character to its integer value */ +char from_hex(char ch) { + return (char)(isdigit(ch) ? ch - '0' : tolower(ch) - 'a' + 10); +} + +/* Converts an integer value to its hex character*/ +char to_hex(char code) { + static char hex[] = "0123456789abcdef"; + return hex[code & 15]; +} + +/* Returns a url-encoded version of str */ +/* IMPORTANT: be sure to free() the returned string after use */ +char *url_encode(char *str) { + char *buf, *pbuf; + + pbuf = buf = mallocz(strlen(str) * 3 + 1); + + while (*str) { + if (isalnum(*str) || *str == '-' || *str == '_' || *str == '.' || *str == '~') + *pbuf++ = *str; + + else if (*str == ' ') + *pbuf++ = '+'; + + else{ + *pbuf++ = '%'; + *pbuf++ = to_hex(*str >> 4); + *pbuf++ = to_hex(*str & 15); + } + + str++; + } + *pbuf = '\0'; + + pbuf = strdupz(buf); + freez(buf); + return pbuf; +} + +/** + * Percentage escape decode + * + * Decode %XX character or return 0 if cannot + * + * @param s the string to decode + * + * @return The character decoded on success and 0 otherwise + */ +char url_percent_escape_decode(char *s) { + if(likely(s[1] && s[2])) + return from_hex(s[1]) << 4 | from_hex(s[2]); + return 0; +} + +/** + * Get byte length + * + * This (utf8 string related) should be moved in separate file in future + * + * @param c is the utf8 character + * * + * @return It returns the length of the specific character. + */ +char url_utf8_get_byte_length(char c) { + if(!IS_UTF8_BYTE(c)) + return 1; + + char length = 0; + while(likely(c & 0x80)) { + length++; + c <<= 1; + } + //4 byte is max size for UTF-8 char + //10XX XXXX is not valid character -> check length == 1 + if(length > 4 || length == 1) + return -1; + + return length; +} + +/** + * Decode Multibyte UTF8 + * + * Decode % encoded UTF-8 characters and copy them to *d + * + * @param s first address + * @param d + * @param d_end last address + * + * @return count of bytes written to *d + */ +char url_decode_multibyte_utf8(char *s, char *d, char *d_end) { + char first_byte = url_percent_escape_decode(s); + + if(unlikely(!first_byte || !IS_UTF8_STARTBYTE(first_byte))) + return 0; + + char byte_length = url_utf8_get_byte_length(first_byte); + + if(unlikely(byte_length <= 0 || d+byte_length >= d_end)) + return 0; + + char to_read = byte_length; + while(to_read > 0) { + char c = url_percent_escape_decode(s); + + if(unlikely( !IS_UTF8_BYTE(c) )) + return 0; + if((to_read != byte_length) && IS_UTF8_STARTBYTE(c)) + return 0; + + *d++ = c; + s+=3; + to_read--; + } + + return byte_length; +} + +/* + * The utf8_check() function scans the '\0'-terminated string starting + * at s. It returns a pointer to the first byte of the first malformed + * or overlong UTF-8 sequence found, or NULL if the string contains + * only correct UTF-8. It also spots UTF-8 sequences that could cause + * trouble if converted to UTF-16, namely surrogate characters + * (U+D800..U+DFFF) and non-Unicode positions (U+FFFE..U+FFFF). This + * routine is very likely to find a malformed sequence if the input + * uses any other encoding than UTF-8. It therefore can be used as a + * very effective heuristic for distinguishing between UTF-8 and other + * encodings. + * + * Markus Kuhn -- 2005-03-30 + * License: http://www.cl.cam.ac.uk/~mgk25/short-license.html + */ +unsigned char *utf8_check(unsigned char *s) +{ + while (*s) + { + if (*s < 0x80) + /* 0xxxxxxx */ + s++; + else if ((s[0] & 0xe0) == 0xc0) + { + /* 110XXXXx 10xxxxxx */ + if ((s[1] & 0xc0) != 0x80 || + (s[0] & 0xfe) == 0xc0) /* overlong? */ + return s; + else + s += 2; + } + else if ((s[0] & 0xf0) == 0xe0) + { + /* 1110XXXX 10Xxxxxx 10xxxxxx */ + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[0] == 0xe0 && (s[1] & 0xe0) == 0x80) || /* overlong? */ + (s[0] == 0xed && (s[1] & 0xe0) == 0xa0) || /* surrogate? */ + (s[0] == 0xef && s[1] == 0xbf && + (s[2] & 0xfe) == 0xbe)) /* U+FFFE or U+FFFF? */ + return s; + else + s += 3; + } + else if ((s[0] & 0xf8) == 0xf0) + { + /* 11110XXX 10XXxxxx 10xxxxxx 10xxxxxx */ + if ((s[1] & 0xc0) != 0x80 || + (s[2] & 0xc0) != 0x80 || + (s[3] & 0xc0) != 0x80 || + (s[0] == 0xf0 && (s[1] & 0xf0) == 0x80) || /* overlong? */ + (s[0] == 0xf4 && s[1] > 0x8f) || s[0] > 0xf4) /* > U+10FFFF? */ + return s; + else + s += 4; + } + else + return s; + } + + return NULL; +} + +char *url_decode_r(char *to, char *url, size_t size) { + char *s = url, // source + *d = to, // destination + *e = &to[size - 1]; // destination end + + while(*s && d < e) { + if(unlikely(*s == '%')) { + char t = url_percent_escape_decode(s); + if(IS_UTF8_BYTE(t)) { + char bytes_written = url_decode_multibyte_utf8(s, d, e); + if(likely(bytes_written)){ + d += bytes_written; + s += (bytes_written * 3)-1; + } + else { + goto fail_cleanup; + } + } + else if(likely(t) && isprint(t)) { + // avoid HTTP header injection + *d++ = t; + s += 2; + } + else + goto fail_cleanup; + } + else if(unlikely(*s == '+')) + *d++ = ' '; + + else + *d++ = *s; + + s++; + } + + *d = '\0'; + + if(unlikely( utf8_check((unsigned char *)to) )) //NULL means success here + return NULL; + + return to; + +fail_cleanup: + *d = '\0'; + return NULL; +} + +/** + * Is request complete? + * + * Check whether the request is complete. + * This function cannot check all the requests METHODS, for example, case you are working with POST, it will fail. + * + * @param begin is the first character of the sequence to analyse. + * @param end is the last character of the sequence + * @param length is the length of the total of bytes read, it is not the difference between end and begin. + * + * @return It returns 1 when the request is complete and 0 otherwise. + */ +inline int url_is_request_complete(char *begin, char *end, size_t length) { + + if ( begin == end) { + //Message cannot be complete when first and last address are the same + return 0; + } + + //This math to verify the last is valid, because we are discarding the POST + if (length > 4) { + begin = end - 4; + } + + return (strstr(begin, "\r\n\r\n"))?1:0; +} + +/** + * Find protocol + * + * Search for the string ' HTTP/' in the message given. + * + * @param s is the start of the user request. + * @return + */ +inline char *url_find_protocol(char *s) { + while(*s) { + // find the next space + while (*s && *s != ' ') s++; + + // is it SPACE + "HTTP/" ? + if(*s && !strncmp(s, " HTTP/", 6)) break; + else s++; + } + + return s; +} + +/** + * Map query string + * + * Map the query string fields that will be decoded. + * This functions must be called after to check the presence of query strings, + * here we are assuming that you already tested this. + * + * @param out the pointer to pointers that will be used to map + * @param url the input url that we are decoding. + * + * @return It returns the number of total variables in the query string. + */ +int url_map_query_string(char **out, char *url) { + (void)out; + (void)url; + int count = 0; + + //First we try to parse considering that there was not URL encode process + char *moveme = url; + char *ptr; + + //We always we have at least one here, so I can set this. + out[count++] = moveme; + while(moveme) { + ptr = strchr((moveme+1), '&'); + if(ptr) { + out[count++] = ptr; + } + + moveme = ptr; + } + + //I could not find any '&', so I am assuming now it is like '%26' + if (count == 1) { + moveme = url; + while(moveme) { + ptr = strchr((moveme+1), '%'); + if(ptr) { + char *test = (ptr+1); + if (!strncmp(test, "3f", 2) || !strncmp(test, "3F", 2)) { + out[count++] = ptr; + } + } + moveme = ptr; + } + } + + return count; +} + +/** + * Parse query string + * + * Parse the query string mapped and store it inside output. + * + * @param output is a vector where I will store the string. + * @param max is the maximum length of the output + * @param map the map done by the function url_map_query_string. + * @param total the total number of variables inside map + * + * @return It returns 0 on success and -1 otherwise + */ +int url_parse_query_string(char *output, size_t max, char **map, int total) { + if(!total) { + return 0; + } + + int counter, next; + size_t length; + char *end; + char *begin = map[0]; + char save; + size_t copied = 0; + for(counter = 0, next=1 ; next <= total ; ++counter, ++next) { + if (next != total) { + end = map[next]; + length = (size_t) (end - begin); + save = *end; + *end = 0x00; + } else { + length = strlen(begin); + end = NULL; + } + length++; + + if (length > (max - copied)) { + error("Parsing query string: we cannot parse a query string so big"); + break; + } + + if(!url_decode_r(output, begin, length)) { + return -1; + } + length = strlen(output); + copied += length; + output += length; + + begin = end; + if (begin) { + *begin = save; + } + } + + return 0; +} diff --git a/libnetdata/url/url.h b/libnetdata/url/url.h new file mode 100644 index 0000000..da0f69a --- /dev/null +++ b/libnetdata/url/url.h @@ -0,0 +1,35 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_URL_H +#define NETDATA_URL_H 1 + +#include "../libnetdata.h" + +// ---------------------------------------------------------------------------- +// URL encode / decode +// code from: http://www.geekhideout.com/urlcode.shtml + +/* Converts a hex character to its integer value */ +char from_hex(char ch); + +/* Converts an integer value to its hex character*/ +char to_hex(char code); + +/* Returns a url-encoded version of str */ +/* IMPORTANT: be sure to free() the returned string after use */ +char *url_encode(char *str); + +/* Returns a url-decoded version of str */ +/* IMPORTANT: be sure to free() the returned string after use */ +char *url_decode(char *str); + +char *url_decode_r(char *to, char *url, size_t size); + +#define WEB_FIELDS_MAX 400 +int url_map_query_string(char **out, char *url); +int url_parse_query_string(char *output, size_t max, char **map, int total); + +int url_is_request_complete(char *begin,char *end,size_t length); +char *url_find_protocol(char *s); + +#endif /* NETDATA_URL_H */ diff --git a/libnetdata/worker_utilization/Makefile.am b/libnetdata/worker_utilization/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/worker_utilization/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/worker_utilization/README.md b/libnetdata/worker_utilization/README.md new file mode 100644 index 0000000..35f30b4 --- /dev/null +++ b/libnetdata/worker_utilization/README.md @@ -0,0 +1,90 @@ + + +# Worker Utilization + +This library is to be used when there are 1 or more worker threads accepting requests +of some kind and servicing them. The goal is to provide a very simple way to monitor +worker threads utilization, as a percentage of the time they are busy and the amount +of requests served. + +## Design goals + +1. Minimal, if any, impact on the performance of the workers +2. Easy to be integrated into any kind of worker +3. No state of any kind at the worker side + +## How to use + +When a working thread starts, call: + +```c +void worker_register(const char *name); +``` + +This will create the necessary structures for the library to work. +No need to keep a pointer to them. They are allocated as `__thread` variables. + +Then job types need to be defined. Job types are anything a worker does that can be +counted and their execution time needs to be reported. The library is fast enough to +be integrated even on workers that perform hundreds of thousands of actions per second. + +Job types are defined like this: + +```c +void worker_register_job_type(size_t id, const char *name); +``` + +`id` is a number starting from zero. The library is compiled with a fixed size of 50 +ids (0 to 49). More can be allocated by setting `WORKER_UTILIZATION_MAX_JOB_TYPES` in +`worker_utilization.h`. `name` can be any string up to 22 characters. This can be +changed by setting `WORKER_UTILIZATION_MAX_JOB_NAME_LENGTH` in `worker_utilization.h`. + +Each thread that calls `worker_register(name)` will allocate about 3kB for maintaining +the information required. + +When the thread stops, call: + +```c +void worker_unregister(void); +``` + +Again, no parameters, or return values. + +> IMPORTANT: cancellable threads need to add a call to `worker_unregister()` to the +> `pop` function that cleans up the thread. Failure to do so, will result in about +> 3kB of memory leak for every thread that is stopped. + +When you are about to do some work in the working thread, call: + +```c +void worker_is_busy(size_t id); +``` + +When you finish doing the job, call: + +```c +void worker_is_idle(void); +``` + +Calls to `worker_is_busy(id)` can be made one after another (without calling +`worker_is_idle()` between them) to switch jobs without losing any time between +them and eliminating one of the 2 clock calls involved. + +## Implementation details + +Totally lockless, extremely fast, it should not introduce any kind of problems to the +workers. Every time `worker_is_busy(id)` or `worker_is_idle()` are called, a call to +`now_realtime_usec()` is done and a couple of variables are updated. That's it! + +The worker does not need to update the variables regularly. Based on the last status +of the worker, the statistics collector of netdata will calculate if the thread is +busy or idle all the time or part of the time. Works well for both thousands of jobs +per second and unlimited working time (being totally busy with a single request for +ages). + +The statistics collector is called by the global statistics thread of netdata. So, +even if the workers are extremely busy with their jobs, netdata will be able to know +how busy they are. diff --git a/libnetdata/worker_utilization/worker_utilization.c b/libnetdata/worker_utilization/worker_utilization.c new file mode 100644 index 0000000..afaff20 --- /dev/null +++ b/libnetdata/worker_utilization/worker_utilization.c @@ -0,0 +1,362 @@ +#include "worker_utilization.h" + +#define WORKER_IDLE 'I' +#define WORKER_BUSY 'B' + +struct worker_job_type { + STRING *name; + STRING *units; + + // statistics controlled variables + size_t statistics_last_jobs_started; + usec_t statistics_last_busy_time; + NETDATA_DOUBLE statistics_last_custom_value; + + // worker controlled variables + volatile size_t worker_jobs_started; + volatile usec_t worker_busy_time; + + WORKER_METRIC_TYPE type; + NETDATA_DOUBLE custom_value; +}; + +struct worker { + pid_t pid; + const char *tag; + const char *workname; + + // statistics controlled variables + volatile usec_t statistics_last_checkpoint; + size_t statistics_last_jobs_started; + usec_t statistics_last_busy_time; + + // the worker controlled variables + size_t worker_max_job_id; + volatile size_t job_id; + volatile size_t jobs_started; + volatile usec_t busy_time; + volatile usec_t last_action_timestamp; + volatile char last_action; + + struct worker_job_type per_job_type[WORKER_UTILIZATION_MAX_JOB_TYPES]; + + struct worker *next; + struct worker *prev; +}; + +struct workers_workname { // this is what we add to JudyHS + SPINLOCK spinlock; + struct worker *base; +}; + +static struct workers_globals { + SPINLOCK spinlock; + Pvoid_t worknames_JudyHS; + +} workers_globals = { // workers globals, the base of all worknames + .spinlock = NETDATA_SPINLOCK_INITIALIZER, // a lock for the worknames index + .worknames_JudyHS = NULL, // the worknames index +}; + +static __thread struct worker *worker = NULL; // the current thread worker + +void worker_register(const char *name) { + if(unlikely(worker)) return; + + worker = callocz(1, sizeof(struct worker)); + worker->pid = gettid(); + worker->tag = strdupz(netdata_thread_tag()); + worker->workname = strdupz(name); + + usec_t now = now_monotonic_usec(); + worker->statistics_last_checkpoint = now; + worker->last_action_timestamp = now; + worker->last_action = WORKER_IDLE; + + size_t name_size = strlen(name) + 1; + netdata_spinlock_lock(&workers_globals.spinlock); + + Pvoid_t *PValue = JudyHSGet(workers_globals.worknames_JudyHS, (void *)name, name_size); + if(!PValue) + PValue = JudyHSIns(&workers_globals.worknames_JudyHS, (void *)name, name_size, PJE0); + + struct workers_workname *workname = *PValue; + if(!workname) { + workname = mallocz(sizeof(struct workers_workname)); + workname->spinlock = NETDATA_SPINLOCK_INITIALIZER; + workname->base = NULL; + *PValue = workname; + } + + netdata_spinlock_lock(&workname->spinlock); + DOUBLE_LINKED_LIST_APPEND_UNSAFE(workname->base, worker, prev, next); + netdata_spinlock_unlock(&workname->spinlock); + + netdata_spinlock_unlock(&workers_globals.spinlock); +} + +void worker_register_job_custom_metric(size_t job_id, const char *name, const char *units, WORKER_METRIC_TYPE type) { + if(unlikely(!worker)) return; + + if(unlikely(job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES)) { + error("WORKER_UTILIZATION: job_id %zu is too big. Max is %zu", job_id, (size_t)(WORKER_UTILIZATION_MAX_JOB_TYPES - 1)); + return; + } + + if(job_id > worker->worker_max_job_id) + worker->worker_max_job_id = job_id; + + if(worker->per_job_type[job_id].name) { + if(strcmp(string2str(worker->per_job_type[job_id].name), name) != 0 || worker->per_job_type[job_id].type != type || strcmp(string2str(worker->per_job_type[job_id].units), units) != 0) + error("WORKER_UTILIZATION: duplicate job registration: worker '%s' job id %zu is '%s', ignoring the later '%s'", worker->workname, job_id, string2str(worker->per_job_type[job_id].name), name); + return; + } + + worker->per_job_type[job_id].name = string_strdupz(name); + worker->per_job_type[job_id].units = string_strdupz(units); + worker->per_job_type[job_id].type = type; +} + +void worker_register_job_name(size_t job_id, const char *name) { + worker_register_job_custom_metric(job_id, name, "", WORKER_METRIC_IDLE_BUSY); +} + +void worker_unregister(void) { + if(unlikely(!worker)) return; + + size_t workname_size = strlen(worker->workname) + 1; + netdata_spinlock_lock(&workers_globals.spinlock); + Pvoid_t *PValue = JudyHSGet(workers_globals.worknames_JudyHS, (void *)worker->workname, workname_size); + if(PValue) { + struct workers_workname *workname = *PValue; + netdata_spinlock_lock(&workname->spinlock); + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(workname->base, worker, prev, next); + netdata_spinlock_unlock(&workname->spinlock); + + if(!workname->base) { + JudyHSDel(&workers_globals.worknames_JudyHS, (void *) worker->workname, workname_size, PJE0); + freez(workname); + } + } + netdata_spinlock_unlock(&workers_globals.spinlock); + + for(int i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { + string_freez(worker->per_job_type[i].name); + string_freez(worker->per_job_type[i].units); + } + + freez((void *)worker->tag); + freez((void *)worker->workname); + freez(worker); + + worker = NULL; +} + +static inline void worker_is_idle_with_time(usec_t now) { + usec_t delta = now - worker->last_action_timestamp; + worker->busy_time += delta; + worker->per_job_type[worker->job_id].worker_busy_time += delta; + + // the worker was busy + // set it to idle before we set the timestamp + + worker->last_action = WORKER_IDLE; + if(likely(worker->last_action_timestamp < now)) + worker->last_action_timestamp = now; +} + +void worker_is_idle(void) { + if(unlikely(!worker || worker->last_action != WORKER_BUSY)) return; + + worker_is_idle_with_time(now_monotonic_usec()); +} + +void worker_is_busy(size_t job_id) { + if(unlikely(!worker || job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES)) + return; + + usec_t now = now_monotonic_usec(); + + if(worker->last_action == WORKER_BUSY) + worker_is_idle_with_time(now); + + // the worker was idle + // set the timestamp and then set it to busy + + worker->job_id = job_id; + worker->per_job_type[job_id].worker_jobs_started++; + worker->jobs_started++; + worker->last_action_timestamp = now; + worker->last_action = WORKER_BUSY; +} + +void worker_set_metric(size_t job_id, NETDATA_DOUBLE value) { + if(unlikely(!worker)) return; + if(unlikely(job_id >= WORKER_UTILIZATION_MAX_JOB_TYPES)) + return; + + switch(worker->per_job_type[job_id].type) { + case WORKER_METRIC_INCREMENT: + worker->per_job_type[job_id].custom_value += value; + break; + + case WORKER_METRIC_INCREMENTAL_TOTAL: + case WORKER_METRIC_ABSOLUTE: + default: + worker->per_job_type[job_id].custom_value = value; + break; + } +} + +// statistics interface + +void workers_foreach(const char *name, void (*callback)( + void *data + , pid_t pid + , const char *thread_tag + , size_t max_job_id + , size_t utilization_usec + , size_t duration_usec + , size_t jobs_started, size_t is_running + , STRING **job_types_names + , STRING **job_types_units + , WORKER_METRIC_TYPE *job_metric_types + , size_t *job_types_jobs_started + , usec_t *job_types_busy_time + , NETDATA_DOUBLE *job_custom_values + ) + , void *data) { + netdata_spinlock_lock(&workers_globals.spinlock); + usec_t busy_time, delta; + size_t i, jobs_started, jobs_running; + + size_t workname_size = strlen(name) + 1; + struct workers_workname *workname; + Pvoid_t *PValue = JudyHSGet(workers_globals.worknames_JudyHS, (void *)name, workname_size); + if(PValue) { + workname = *PValue; + netdata_spinlock_lock(&workname->spinlock); + } + else + workname = NULL; + + netdata_spinlock_unlock(&workers_globals.spinlock); + + if(!workname) + return; + + struct worker *p; + DOUBLE_LINKED_LIST_FOREACH_FORWARD(workname->base, p, prev, next) { + usec_t now = now_monotonic_usec(); + + // find per job type statistics + STRING *per_job_type_name[WORKER_UTILIZATION_MAX_JOB_TYPES]; + STRING *per_job_type_units[WORKER_UTILIZATION_MAX_JOB_TYPES]; + WORKER_METRIC_TYPE per_job_metric_type[WORKER_UTILIZATION_MAX_JOB_TYPES]; + size_t per_job_type_jobs_started[WORKER_UTILIZATION_MAX_JOB_TYPES]; + usec_t per_job_type_busy_time[WORKER_UTILIZATION_MAX_JOB_TYPES]; + NETDATA_DOUBLE per_job_custom_values[WORKER_UTILIZATION_MAX_JOB_TYPES]; + + size_t max_job_id = p->worker_max_job_id; + for(i = 0; i <= max_job_id ;i++) { + per_job_type_name[i] = p->per_job_type[i].name; + per_job_type_units[i] = p->per_job_type[i].units; + per_job_metric_type[i] = p->per_job_type[i].type; + + switch(p->per_job_type[i].type) { + default: + case WORKER_METRIC_EMPTY: { + per_job_type_jobs_started[i] = 0; + per_job_type_busy_time[i] = 0; + per_job_custom_values[i] = NAN; + break; + } + + case WORKER_METRIC_IDLE_BUSY: { + size_t tmp_jobs_started = p->per_job_type[i].worker_jobs_started; + per_job_type_jobs_started[i] = tmp_jobs_started - p->per_job_type[i].statistics_last_jobs_started; + p->per_job_type[i].statistics_last_jobs_started = tmp_jobs_started; + + usec_t tmp_busy_time = p->per_job_type[i].worker_busy_time; + per_job_type_busy_time[i] = tmp_busy_time - p->per_job_type[i].statistics_last_busy_time; + p->per_job_type[i].statistics_last_busy_time = tmp_busy_time; + + per_job_custom_values[i] = NAN; + break; + } + + case WORKER_METRIC_ABSOLUTE: { + per_job_type_jobs_started[i] = 0; + per_job_type_busy_time[i] = 0; + + per_job_custom_values[i] = p->per_job_type[i].custom_value; + break; + } + + case WORKER_METRIC_INCREMENTAL_TOTAL: + case WORKER_METRIC_INCREMENT: { + per_job_type_jobs_started[i] = 0; + per_job_type_busy_time[i] = 0; + + NETDATA_DOUBLE tmp_custom_value = p->per_job_type[i].custom_value; + per_job_custom_values[i] = tmp_custom_value - p->per_job_type[i].statistics_last_custom_value; + p->per_job_type[i].statistics_last_custom_value = tmp_custom_value; + + break; + } + } + } + + // get a copy of the worker variables + size_t worker_job_id = p->job_id; + usec_t worker_busy_time = p->busy_time; + size_t worker_jobs_started = p->jobs_started; + char worker_last_action = p->last_action; + usec_t worker_last_action_timestamp = p->last_action_timestamp; + + delta = now - p->statistics_last_checkpoint; + p->statistics_last_checkpoint = now; + + // this is the only variable both the worker thread and the statistics thread are writing + // we set this only when the worker is busy, so that the worker will not + // accumulate all the busy time, but only the time after the point we collected statistics + if(worker_last_action == WORKER_BUSY && p->last_action_timestamp == worker_last_action_timestamp && p->last_action == WORKER_BUSY) + p->last_action_timestamp = now; + + // calculate delta busy time + busy_time = worker_busy_time - p->statistics_last_busy_time; + p->statistics_last_busy_time = worker_busy_time; + + // calculate delta jobs done + jobs_started = worker_jobs_started - p->statistics_last_jobs_started; + p->statistics_last_jobs_started = worker_jobs_started; + + jobs_running = 0; + if(worker_last_action == WORKER_BUSY) { + // the worker is still busy with something + // let's add that busy time to the reported one + usec_t dt = now - worker_last_action_timestamp; + busy_time += dt; + per_job_type_busy_time[worker_job_id] += dt; + jobs_running = 1; + } + + callback(data + , p->pid + , p->tag + , max_job_id + , busy_time + , delta + , jobs_started + , jobs_running + , per_job_type_name + , per_job_type_units + , per_job_metric_type + , per_job_type_jobs_started + , per_job_type_busy_time + , per_job_custom_values + ); + } + + netdata_spinlock_unlock(&workname->spinlock); +} diff --git a/libnetdata/worker_utilization/worker_utilization.h b/libnetdata/worker_utilization/worker_utilization.h new file mode 100644 index 0000000..f1412e6 --- /dev/null +++ b/libnetdata/worker_utilization/worker_utilization.h @@ -0,0 +1,47 @@ +#ifndef WORKER_UTILIZATION_H +#define WORKER_UTILIZATION_H 1 + +#include "../libnetdata.h" + +// workers interfaces + +#define WORKER_UTILIZATION_MAX_JOB_TYPES 50 + +typedef enum { + WORKER_METRIC_EMPTY = 0, + WORKER_METRIC_IDLE_BUSY = 1, + WORKER_METRIC_ABSOLUTE = 2, + WORKER_METRIC_INCREMENT = 3, + WORKER_METRIC_INCREMENTAL_TOTAL = 4, +} WORKER_METRIC_TYPE; + +void worker_register(const char *name); +void worker_register_job_name(size_t job_id, const char *name); +void worker_register_job_custom_metric(size_t job_id, const char *name, const char *units, WORKER_METRIC_TYPE type); +void worker_unregister(void); + +void worker_is_idle(void); +void worker_is_busy(size_t job_id); +void worker_set_metric(size_t job_id, NETDATA_DOUBLE value); + +// statistics interface + +void workers_foreach(const char *name, void (*callback)( + void *data + , pid_t pid + , const char *thread_tag + , size_t max_job_id + , size_t utilization_usec + , size_t duration_usec + , size_t jobs_started + , size_t is_running + , STRING **job_types_names + , STRING **job_types_units + , WORKER_METRIC_TYPE *job_metric_types + , size_t *job_types_jobs_started + , usec_t *job_types_busy_time + , NETDATA_DOUBLE *job_custom_values + ) + , void *data); + +#endif // WORKER_UTILIZATION_H -- cgit v1.2.3