diff options
Diffstat (limited to '')
-rw-r--r-- | src/libnetdata/README.md | 14 | ||||
-rw-r--r-- | src/libnetdata/adaptive_resortable_list/README.md | 103 | ||||
-rw-r--r-- | src/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c (renamed from libnetdata/adaptive_resortable_list/adaptive_resortable_list.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h (renamed from libnetdata/adaptive_resortable_list/adaptive_resortable_list.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/aral/README.md | 173 | ||||
-rw-r--r-- | src/libnetdata/aral/aral.c (renamed from libnetdata/aral/aral.c) | 6 | ||||
-rw-r--r-- | src/libnetdata/aral/aral.h (renamed from libnetdata/aral/aral.h) | 4 | ||||
-rw-r--r-- | src/libnetdata/avl/README.md | 21 | ||||
-rw-r--r-- | src/libnetdata/avl/avl.c (renamed from libnetdata/avl/avl.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/avl/avl.h (renamed from libnetdata/avl/avl.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/bitmap.h | 81 | ||||
-rw-r--r-- | src/libnetdata/buffer/README.md | 20 | ||||
-rw-r--r-- | src/libnetdata/buffer/buffer.c (renamed from libnetdata/buffer/buffer.c) | 35 | ||||
-rw-r--r-- | src/libnetdata/buffer/buffer.h (renamed from libnetdata/buffer/buffer.h) | 91 | ||||
-rw-r--r-- | src/libnetdata/buffered_reader/README.md (renamed from mqtt_websockets/c_rhash/README.md) | 0 | ||||
-rw-r--r-- | src/libnetdata/buffered_reader/buffered_reader.c (renamed from libnetdata/buffered_reader/buffered_reader.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/buffered_reader/buffered_reader.h (renamed from libnetdata/buffered_reader/buffered_reader.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/byteorder.h | 32 | ||||
-rw-r--r-- | src/libnetdata/circular_buffer/README.md | 14 | ||||
-rw-r--r-- | src/libnetdata/circular_buffer/circular_buffer.c (renamed from libnetdata/circular_buffer/circular_buffer.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/circular_buffer/circular_buffer.h (renamed from libnetdata/circular_buffer/circular_buffer.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/clocks/README.md | 10 | ||||
-rw-r--r-- | src/libnetdata/clocks/clocks.c (renamed from libnetdata/clocks/clocks.c) | 4 | ||||
-rw-r--r-- | src/libnetdata/clocks/clocks.h (renamed from libnetdata/clocks/clocks.h) | 7 | ||||
-rw-r--r-- | src/libnetdata/completion/completion.c | 99 | ||||
-rw-r--r-- | src/libnetdata/completion/completion.h (renamed from libnetdata/completion/completion.h) | 4 | ||||
-rw-r--r-- | src/libnetdata/config/README.md | 58 | ||||
-rw-r--r-- | src/libnetdata/config/appconfig.c (renamed from libnetdata/config/appconfig.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/config/appconfig.h (renamed from libnetdata/config/appconfig.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/config/dyncfg.c | 298 | ||||
-rw-r--r-- | src/libnetdata/config/dyncfg.h | 88 | ||||
-rw-r--r-- | src/libnetdata/datetime/README.md | 11 | ||||
-rw-r--r-- | src/libnetdata/datetime/iso8601.c (renamed from libnetdata/datetime/iso8601.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/datetime/iso8601.h (renamed from libnetdata/datetime/iso8601.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/datetime/rfc3339.c (renamed from libnetdata/datetime/rfc3339.c) | 2 | ||||
-rw-r--r-- | src/libnetdata/datetime/rfc3339.h (renamed from libnetdata/datetime/rfc3339.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/datetime/rfc7231.c (renamed from libnetdata/datetime/rfc7231.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/datetime/rfc7231.h (renamed from libnetdata/datetime/rfc7231.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/dictionary/README.md | 235 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary-callbacks.h | 93 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary-hashtable.h | 263 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary-internals.h | 259 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary-item.h | 555 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary-locks.h | 112 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary-refcount.h | 247 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary-statistics.h | 246 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary-traversal.c | 268 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary-unittest.c | 1195 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary.c | 767 | ||||
-rw-r--r-- | src/libnetdata/dictionary/dictionary.h (renamed from libnetdata/dictionary/dictionary.h) | 36 | ||||
-rw-r--r-- | src/libnetdata/dictionary/thread-cache.c | 47 | ||||
-rw-r--r-- | src/libnetdata/dictionary/thread-cache.h | 15 | ||||
-rw-r--r-- | src/libnetdata/ebpf/README.md | 13 | ||||
-rw-r--r-- | src/libnetdata/ebpf/ebpf.c | 1683 | ||||
-rw-r--r-- | src/libnetdata/ebpf/ebpf.h | 491 | ||||
-rw-r--r-- | src/libnetdata/eval/README.md (renamed from libnetdata/eval/README.md) | 0 | ||||
-rw-r--r-- | src/libnetdata/eval/eval.c (renamed from libnetdata/eval/eval.c) | 298 | ||||
-rw-r--r-- | src/libnetdata/eval/eval.h | 56 | ||||
-rw-r--r-- | src/libnetdata/facets/README.md (renamed from spawn/README.md) | 0 | ||||
-rw-r--r-- | src/libnetdata/facets/facets.c (renamed from libnetdata/facets/facets.c) | 18 | ||||
-rw-r--r-- | src/libnetdata/facets/facets.h (renamed from libnetdata/facets/facets.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/functions_evloop/README.md (renamed from web/api/ilove/README.md) | 0 | ||||
-rw-r--r-- | src/libnetdata/functions_evloop/functions_evloop.c | 440 | ||||
-rw-r--r-- | src/libnetdata/functions_evloop/functions_evloop.h | 156 | ||||
-rw-r--r-- | src/libnetdata/gorilla/README.md (renamed from libnetdata/gorilla/README.md) | 0 | ||||
-rwxr-xr-x | src/libnetdata/gorilla/benchmark.sh (renamed from libnetdata/gorilla/benchmark.sh) | 0 | ||||
-rwxr-xr-x | src/libnetdata/gorilla/fuzzer.sh (renamed from libnetdata/gorilla/fuzzer.sh) | 0 | ||||
-rw-r--r-- | src/libnetdata/gorilla/gorilla.cc (renamed from libnetdata/gorilla/gorilla.cc) | 4 | ||||
-rw-r--r-- | src/libnetdata/gorilla/gorilla.h (renamed from libnetdata/gorilla/gorilla.h) | 4 | ||||
-rw-r--r-- | src/libnetdata/http/content_type.c | 96 | ||||
-rw-r--r-- | src/libnetdata/http/content_type.h | 45 | ||||
-rw-r--r-- | src/libnetdata/http/http_access.c | 186 | ||||
-rw-r--r-- | src/libnetdata/http/http_access.h | 148 | ||||
-rw-r--r-- | src/libnetdata/http/http_defs.c | 245 | ||||
-rw-r--r-- | src/libnetdata/http/http_defs.h | 61 | ||||
-rw-r--r-- | src/libnetdata/inlined.h (renamed from libnetdata/inlined.h) | 109 | ||||
-rw-r--r-- | src/libnetdata/json/README.md | 14 | ||||
-rw-r--r-- | src/libnetdata/json/jsmn.c (renamed from libnetdata/json/jsmn.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/json/jsmn.h (renamed from libnetdata/json/jsmn.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/json/json-c-parser-inline.h | 177 | ||||
-rw-r--r-- | src/libnetdata/json/json.c | 557 | ||||
-rw-r--r-- | src/libnetdata/json/json.h (renamed from libnetdata/json/json.h) | 9 | ||||
-rw-r--r-- | src/libnetdata/july/README.md | 14 | ||||
-rw-r--r-- | src/libnetdata/july/july.c (renamed from libnetdata/july/july.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/july/july.h (renamed from libnetdata/july/july.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/Judy.h (renamed from libnetdata/libjudy/src/Judy.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyCommon/JudyMalloc.c (renamed from libnetdata/libjudy/src/JudyCommon/JudyMalloc.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyCommon/JudyPrivate.h (renamed from libnetdata/libjudy/src/JudyCommon/JudyPrivate.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h (renamed from libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h (renamed from libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyHS/JudyHS.c (renamed from libnetdata/libjudy/src/JudyHS/JudyHS.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyL.h (renamed from libnetdata/libjudy/src/JudyL/JudyL.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLByCount.c (renamed from libnetdata/libjudy/src/JudyL/JudyLByCount.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLCascade.c (renamed from libnetdata/libjudy/src/JudyL/JudyLCascade.c) | 1 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLCount.c (renamed from libnetdata/libjudy/src/JudyL/JudyLCount.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c (renamed from libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLDecascade.c (renamed from libnetdata/libjudy/src/JudyL/JudyLDecascade.c) | 2 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLDel.c (renamed from libnetdata/libjudy/src/JudyL/JudyLDel.c) | 1 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLFirst.c (renamed from libnetdata/libjudy/src/JudyL/JudyLFirst.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLFreeArray.c (renamed from libnetdata/libjudy/src/JudyL/JudyLFreeArray.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLGet.c (renamed from libnetdata/libjudy/src/JudyL/JudyLGet.c) | 2 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLIns.c (renamed from libnetdata/libjudy/src/JudyL/JudyLIns.c) | 1 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLInsArray.c (renamed from libnetdata/libjudy/src/JudyL/JudyLInsArray.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c (renamed from libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLMallocIF.c (renamed from libnetdata/libjudy/src/JudyL/JudyLMallocIF.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLMemActive.c (renamed from libnetdata/libjudy/src/JudyL/JudyLMemActive.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLMemUsed.c (renamed from libnetdata/libjudy/src/JudyL/JudyLMemUsed.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLNext.c (renamed from libnetdata/libjudy/src/JudyL/JudyLNext.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c (renamed from libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLPrev.c (renamed from libnetdata/libjudy/src/JudyL/JudyLPrev.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c (renamed from libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/JudyLTables.c (renamed from libnetdata/libjudy/src/JudyL/JudyLTables.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libjudy/src/JudyL/j__udyLGet.c (renamed from libnetdata/libjudy/src/JudyL/j__udyLGet.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/libnetdata.c (renamed from libnetdata/libnetdata.c) | 188 | ||||
-rw-r--r-- | src/libnetdata/libnetdata.h | 601 | ||||
-rw-r--r-- | src/libnetdata/line_splitter/README.md | 14 | ||||
-rw-r--r-- | src/libnetdata/line_splitter/line_splitter.c (renamed from libnetdata/line_splitter/line_splitter.c) | 9 | ||||
-rw-r--r-- | src/libnetdata/line_splitter/line_splitter.h (renamed from libnetdata/line_splitter/line_splitter.h) | 5 | ||||
-rw-r--r-- | src/libnetdata/linked-lists.h | 133 | ||||
-rw-r--r-- | src/libnetdata/locks/README.md | 107 | ||||
-rw-r--r-- | src/libnetdata/locks/locks.c (renamed from libnetdata/locks/locks.c) | 49 | ||||
-rw-r--r-- | src/libnetdata/locks/locks.h (renamed from libnetdata/locks/locks.h) | 4 | ||||
-rw-r--r-- | src/libnetdata/log/README.md | 223 | ||||
-rw-r--r-- | src/libnetdata/log/journal.c (renamed from libnetdata/log/journal.c) | 14 | ||||
-rw-r--r-- | src/libnetdata/log/journal.h (renamed from libnetdata/log/journal.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/log/log.c (renamed from libnetdata/log/log.c) | 178 | ||||
-rw-r--r-- | src/libnetdata/log/log.h (renamed from libnetdata/log/log.h) | 11 | ||||
-rw-r--r-- | src/libnetdata/log/systemd-cat-native.c (renamed from libnetdata/log/systemd-cat-native.c) | 10 | ||||
-rw-r--r-- | src/libnetdata/log/systemd-cat-native.h (renamed from libnetdata/log/systemd-cat-native.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/log/systemd-cat-native.md (renamed from libnetdata/log/systemd-cat-native.md) | 0 | ||||
-rw-r--r-- | src/libnetdata/maps/local-sockets.h | 1283 | ||||
-rw-r--r-- | src/libnetdata/maps/system-groups.h | 67 | ||||
-rw-r--r-- | src/libnetdata/maps/system-users.h | 67 | ||||
-rw-r--r-- | src/libnetdata/onewayalloc/README.md | 75 | ||||
-rw-r--r-- | src/libnetdata/onewayalloc/onewayalloc.c (renamed from libnetdata/onewayalloc/onewayalloc.c) | 2 | ||||
-rw-r--r-- | src/libnetdata/onewayalloc/onewayalloc.h (renamed from libnetdata/onewayalloc/onewayalloc.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/os.c (renamed from libnetdata/os.c) | 6 | ||||
-rw-r--r-- | src/libnetdata/os.h (renamed from libnetdata/os.h) | 2 | ||||
-rw-r--r-- | src/libnetdata/popen/README.md | 15 | ||||
-rw-r--r-- | src/libnetdata/popen/popen.c (renamed from libnetdata/popen/popen.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/popen/popen.h (renamed from libnetdata/popen/popen.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/procfile/README.md | 71 | ||||
-rw-r--r-- | src/libnetdata/procfile/procfile.c (renamed from libnetdata/procfile/procfile.c) | 2 | ||||
-rw-r--r-- | src/libnetdata/procfile/procfile.h (renamed from libnetdata/procfile/procfile.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/query_progress/README.md (renamed from web/rtc/README.md) | 0 | ||||
-rw-r--r-- | src/libnetdata/query_progress/progress.c | 660 | ||||
-rw-r--r-- | src/libnetdata/query_progress/progress.h | 19 | ||||
-rw-r--r-- | src/libnetdata/required_dummies.h | 30 | ||||
-rw-r--r-- | src/libnetdata/simple_hashtable.h | 544 | ||||
-rw-r--r-- | src/libnetdata/simple_hashtable_undef.h | 35 | ||||
-rw-r--r-- | src/libnetdata/simple_pattern/README.md | 47 | ||||
-rw-r--r-- | src/libnetdata/simple_pattern/simple_pattern.c (renamed from libnetdata/simple_pattern/simple_pattern.c) | 23 | ||||
-rw-r--r-- | src/libnetdata/simple_pattern/simple_pattern.h (renamed from libnetdata/simple_pattern/simple_pattern.h) | 7 | ||||
-rw-r--r-- | src/libnetdata/socket/README.md | 8 | ||||
-rw-r--r-- | src/libnetdata/socket/security.c (renamed from libnetdata/socket/security.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/socket/security.h (renamed from libnetdata/socket/security.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/socket/socket.c (renamed from libnetdata/socket/socket.c) | 92 | ||||
-rw-r--r-- | src/libnetdata/socket/socket.h | 207 | ||||
-rw-r--r-- | src/libnetdata/statistical/README.md | 12 | ||||
-rw-r--r-- | src/libnetdata/statistical/statistical.c (renamed from libnetdata/statistical/statistical.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/statistical/statistical.h (renamed from libnetdata/statistical/statistical.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/storage-point.h | 127 | ||||
-rw-r--r-- | src/libnetdata/storage_number/README.md | 21 | ||||
-rw-r--r-- | src/libnetdata/storage_number/storage_number.c (renamed from libnetdata/storage_number/storage_number.c) | 63 | ||||
-rw-r--r-- | src/libnetdata/storage_number/storage_number.h (renamed from libnetdata/storage_number/storage_number.h) | 12 | ||||
-rw-r--r-- | src/libnetdata/storage_number/tests/test_storage_number.c (renamed from libnetdata/storage_number/tests/test_storage_number.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/string/README.md | 25 | ||||
-rw-r--r-- | src/libnetdata/string/string.c (renamed from libnetdata/string/string.c) | 23 | ||||
-rw-r--r-- | src/libnetdata/string/string.h (renamed from libnetdata/string/string.h) | 3 | ||||
-rw-r--r-- | src/libnetdata/string/utf8.h (renamed from libnetdata/string/utf8.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/template-enum.h | 40 | ||||
-rw-r--r-- | src/libnetdata/tests/test_str2ld.c (renamed from libnetdata/tests/test_str2ld.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/threads/README.md | 12 | ||||
-rw-r--r-- | src/libnetdata/threads/threads.c (renamed from libnetdata/threads/threads.c) | 5 | ||||
-rw-r--r-- | src/libnetdata/threads/threads.h (renamed from libnetdata/threads/threads.h) | 4 | ||||
-rw-r--r-- | src/libnetdata/url/README.md | 14 | ||||
-rw-r--r-- | src/libnetdata/url/url.c (renamed from libnetdata/url/url.c) | 34 | ||||
-rw-r--r-- | src/libnetdata/url/url.h (renamed from libnetdata/url/url.h) | 2 | ||||
-rw-r--r-- | src/libnetdata/uuid/README.md | 13 | ||||
-rw-r--r-- | src/libnetdata/uuid/uuid.c (renamed from libnetdata/uuid/uuid.c) | 20 | ||||
-rw-r--r-- | src/libnetdata/uuid/uuid.h (renamed from libnetdata/uuid/uuid.h) | 18 | ||||
-rw-r--r-- | src/libnetdata/worker_utilization/README.md | 94 | ||||
-rw-r--r-- | src/libnetdata/worker_utilization/worker_utilization.c (renamed from libnetdata/worker_utilization/worker_utilization.c) | 0 | ||||
-rw-r--r-- | src/libnetdata/worker_utilization/worker_utilization.h (renamed from libnetdata/worker_utilization/worker_utilization.h) | 0 | ||||
-rw-r--r-- | src/libnetdata/xxhash.h (renamed from libnetdata/xxhash.h) | 0 |
185 files changed, 15106 insertions, 588 deletions
diff --git a/src/libnetdata/README.md b/src/libnetdata/README.md new file mode 100644 index 000000000..fd2c79730 --- /dev/null +++ b/src/libnetdata/README.md @@ -0,0 +1,14 @@ +<!-- +title: "libnetdata" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/README.md +sidebar_label: "libnetdata" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# libnetdata + +`libnetdata` is a collection of library code that is used by all Netdata `C` programs. + + diff --git a/src/libnetdata/adaptive_resortable_list/README.md b/src/libnetdata/adaptive_resortable_list/README.md new file mode 100644 index 000000000..9aa864c9e --- /dev/null +++ b/src/libnetdata/adaptive_resortable_list/README.md @@ -0,0 +1,103 @@ +<!-- +title: "Adaptive Re-sortable List (ARL)" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/adaptive_resortable_list/README.md +sidebar_label: "Adaptive Re-sortable List (ARL)" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Adaptive Re-sortable List (ARL) + +This library allows Netdata to read a series of `name - value` pairs +in the **fastest possible way**. + +ARLs are used all over Netdata, as they are the most +CPU utilization efficient way to process `/proc` files. They are used to +process both vertical (csv like) and horizontal (one pair per line) `name - value` pairs. + +## How ARL works + +It maintains a linked list of all `NAME` (keywords), sorted in the +order found in the data source. The linked list is kept +sorted at all times - the data source may change at any time, the +linked list will adapt at the next iteration. + +### Initialization + +During initialization (just once), the caller: + +- calls `arl_create()` to create the ARL + +- calls `arl_expect()` multiple times to register the expected keywords + +The library will call the `processor()` function (given to +`arl_create()`), for each expected keyword found. +The default `processor()` expects `dst` to be an `unsigned long long *`. + +Each `name` keyword may have a different `processor()` (by calling +`arl_expect_custom()` instead of `arl_expect()`). + +### Data collection iterations + +For each iteration through the data source, the caller: + +- calls `arl_begin()` to initiate a data collection iteration. + This is to be called just ONCE every time the source is re-evaluated. + +- calls `arl_check()` for each entry read from the file. + +### Cleanup + +When the caller exits: + +- calls `arl_free()` to destroy this and free all memory. + +### Performance + +ARL maintains a list of `name` keywords found in the data source (even the ones +that are not useful for data collection). + +If the data source maintains the same order on the `name-value` pairs, for each +each call to `arl_check()` only an `strcmp()` is executed to verify the +expected order has not changed, a counter is incremented and a pointer is changed. +So, if the data source has 100 `name-value` pairs, and their order remains constant +over time, 100 successful `strcmp()` are executed. + +In the unlikely event that an iteration sees the data source with a different order, +for each out-of-order keyword, a full search of the remaining keywords is made. But +this search uses 32bit hashes, not string comparisons, so it should also be fast. + +When all expectations are satisfied (even in the middle of an iteration), +the call to `arl_check()` will return 1, to signal the caller to stop the loop, +saving valuable CPU resources for the rest of the data source. + +In the following test we used alternative methods to process, **1M times**, +a data source like `/proc/meminfo`, already tokenized, in memory, +to extract the same number of expected metrics: + +|test|code|string comparison|number parsing|duration| +|:--:|:--:|:---------------:|:------------:|:------:| +|1|if-else-if-else-if|`strcmp()`|`strtoull()`|4630.337 ms| +|2|nested loops|inline `simple_hash()` and `strcmp()`|`strtoull()`|1597.481 ms| +|3|nested loops|inline `simple_hash()` and `strcmp()`|`str2ull()`|923.523 ms| +|4|if-else-if-else-if|inline `simple_hash()` and `strcmp()`|`strtoull()`|854.574 ms| +|5|if-else-if-else-if|statement expression `simple_hash()` and `strcmp()`|`strtoull()`|912.013 ms| +|6|if-continue|inline `simple_hash()` and `strcmp()`|`strtoull()`|842.279 ms| +|7|if-else-if-else-if|inline `simple_hash()` and `strcmp()`|`str2ull()`|602.837 ms| +|8|ARL|ARL|`strtoull()`|350.360 ms| +|9|ARL|ARL|`str2ull()`|157.237 ms| + +Compared to unoptimized code (test No 1: 4.6sec): + +- before ARL Netdata was using test No **7** with hashing and a custom `str2ull()` to achieve 602ms. +- the current ARL implementation is test No **9** that needs only 157ms (29 times faster vs unoptimized code, about 4 times faster vs optimized code). + +[Check the source code of this test](https://raw.githubusercontent.com/netdata/netdata/master/tests/profile/benchmark-value-pairs.c). + +## Limitations + +Do not use ARL if the a name/keyword may appear more than once in the +source data. + + diff --git a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c b/src/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c index b645927d4..b645927d4 100644 --- a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c +++ b/src/libnetdata/adaptive_resortable_list/adaptive_resortable_list.c diff --git a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h b/src/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h index bca0ff272..bca0ff272 100644 --- a/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h +++ b/src/libnetdata/adaptive_resortable_list/adaptive_resortable_list.h diff --git a/src/libnetdata/aral/README.md b/src/libnetdata/aral/README.md new file mode 100644 index 000000000..d999e820a --- /dev/null +++ b/src/libnetdata/aral/README.md @@ -0,0 +1,173 @@ +<!-- +title: "Array Allocator" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/aral/README.md +sidebar_label: "Array allocator" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Array Allocator + +Come on! Array allocators are embedded in libc! Why do we need such a thing in Netdata? + +Well, we have a couple of problems to solve: + +1. **Fragmentation** - It is important for Netdata to keeps its overall memory footprint as low as possible. libc does an amazing job when the same thread allocates and frees some memory. But it simply cannot do better without knowing the specifics of the application when memory is allocated and freed randomly between threads. +2. **Speed** - Especially when allocations and de-allocations happen across threads, the speed penalty is tremendous. + +In Netdata we have a few moments that are very tough. Imagine collecting 1 million metrics per second. You have a buffer for each metric and put append new points there. This works beautifully, of course! But then, when the buffers get full, imagine the situation. You suddenly need 1 million buffers, at once! + +To solve this problem we first spread out the buffers. So, the first time each metric asks for a buffer, it gets a smaller one. We added logic there to spread them as evenly as possible across time. Solved? Not exactly! + +We have 3 tiers for each metric. For the metrics of tier 0 (per second resolution) we have a max buffer for 1024 points and every new metrics gets a random size between 3 points and 1024. So they are distributed across time. For 1 million metrics, we have about 1000 buffers beings created every second. + +But at some point, the end of the minute will come, and suddenly all the metrics will need a new buffer for tier 1 (per minute). Oops! We will spread tier 1 buffers across time too, but the first minute is a tough one. We really need 1 million buffers instantly. + +And if that minute happens to also be the beginning of an hour... tier 2 (per hour) kicks in. For that instant we are going to need 2 million buffers instantly. + +The problem becomes even bigger when we collect 2, or even 10 million metrics... + +So solve it, Netdata uses a special implementation of an array allocator that is tightly integrated with the structures we need. + +## Features + +1. Malloc, or MMAP modes. File based MMAP is also supported to put the data in file backed up shared memory. +2. Fully asynchronous operations. There are just a couple of points where spin-locks protect a few counters and pointers. +3. Optional defragmenter, that once enabled it will make free operation slower while trying to maintain a sorted list of fragments to offer first during allocations. The defragmenter can be enabled / disabled at run time. The defragmenter can hurt performance on application with intense turn-around of allocation, like Netdata dbengine caches. So, it is disabled by default. +4. Without the defragmenter enabled, ARAL still tries to keep pages full, but the depth of the search is limited to 3 pages (so, a page with a free slot will either become 1st, 2nd, or 3rd). At the same time, during allocations, ARAL will evaluate the first 2 pages to find the one that is more full than the other, to use it for the new allocation. + +## How it works + +Allocations are organized in pages. Pages have a minimum size (a system page, usually 4KB) and a maximum defined by for each different kind of object. + +Initially every page is free. When an allocation request is made, the free space is split, and the first element is reserved. Free space is now considered there rest. + +This continuous until the page gets full, where a new page is allocated and the process is repeated. + +Each allocation returned has a pointer appended to it. The pointer points to the page the allocation belongs to. + +When a pointer is freed, the page it belongs is identified, its space is marked free, and it is prepended in a single linked list that resides in the page itself. So, each page has its own list of free slots to use. + +Pages are then on another linked list. This is a double linked list and at its beginning has the pages with free space and at the end the pages that are full. + +When the defragmenter is enabled the pages double linked list is also sorted, like this: the fewer the free slots on a page, the earlier in the linked list the page will be, except if it does not have any free slot, in which case it will be at the end. So, the defragmenter tries to have pages full. + +When a page is entirerly free, it is given back to the system immediately. There is no caching of free pages. + + +Parallelism is achieved like this: + +When some threads are waiting for a page to be allocated, free operations are allowed. If a free operation happens before a new page is allocated, any waiting thread will get the slot that is freed on another page. + +Free operations happen in parallel, even for the same page. There is a spin-lock on each page to protect the base pointer of the page's free slots single linked list. But, this is instant. All preparative work happens lockless, then to add the free slot to the page, the page spinlock is acquired, the free slot is prepended to the linked list on the page, the spinlock is released. Such free operations on different pages are totally parallel. + +Once the free operation on a page has finished, the pages double linked list spinlock is acquired to put the page first on that linked list. If the defragmenter is enabled, the spinlock is retained for a little longer, to find the exact position of the page in the linked list. + +During allocations, the reverse order is used. First get the pages double linked list spinlock, get the first page and decrement its free slots counter, then release the spinlock. If the first page does not have any free slots, a page allocation is spawn, without any locks acquired. All threads are spinning waiting for a page with free slots, either from the newly allocated one or from a free operation that may happen in parallel. + +Once a page is acquired, each thread locks its own page to get the first free slot and releases the lock immediately. This is guaranteed to succeed, because when the page was given to that thread its free slots counter was decremented. So, there is a free slot for every thread that got that page. All preparative work to return a pointer to the caller is done lock free. Allocations on different pages are done in parallel, without any intervention between them. + + +## What to expect + +Systems not designed for parallelism achieve their top performance single threaded. The single threaded speed is the baseline. Adding more threads makes them slower. + +The baseline for ARAL is the following, the included stress test when running single threaded: + +``` +Running stress test of 1 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:04:50: netdata INFO : TH[0] : set name of thread 1314983 to TH[0] +ARAL executes 12.27 M malloc and 12.26 M free operations/s +ARAL executes 12.29 M malloc and 12.29 M free operations/s +ARAL executes 12.30 M malloc and 12.30 M free operations/s +ARAL executes 12.30 M malloc and 12.29 M free operations/s +ARAL executes 12.29 M malloc and 12.29 M free operations/s +Waiting the threads to finish... +2023-01-29 17:04:55: netdata INFO : MAIN : ARAL: did 61487356 malloc, 61487356 free, using 1 threads, in 5003808 usecs +``` + +The same test with 2 threads, both threads on the same ARAL of course. As you see performance improved: + +``` +Running stress test of 2 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:05:25: netdata INFO : TH[0] : set name of thread 1315537 to TH[0] +2023-01-29 17:05:25: netdata INFO : TH[1] : set name of thread 1315538 to TH[1] +ARAL executes 17.75 M malloc and 17.73 M free operations/s +ARAL executes 17.93 M malloc and 17.93 M free operations/s +ARAL executes 18.17 M malloc and 18.18 M free operations/s +ARAL executes 18.33 M malloc and 18.32 M free operations/s +ARAL executes 18.36 M malloc and 18.36 M free operations/s +Waiting the threads to finish... +2023-01-29 17:05:30: netdata INFO : MAIN : ARAL: did 90976190 malloc, 90976190 free, using 2 threads, in 5029462 usecs +``` + +The same test with 4 threads: + +``` +Running stress test of 4 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:10:12: netdata INFO : TH[0] : set name of thread 1319552 to TH[0] +2023-01-29 17:10:12: netdata INFO : TH[1] : set name of thread 1319553 to TH[1] +2023-01-29 17:10:12: netdata INFO : TH[2] : set name of thread 1319554 to TH[2] +2023-01-29 17:10:12: netdata INFO : TH[3] : set name of thread 1319555 to TH[3] +ARAL executes 19.95 M malloc and 19.91 M free operations/s +ARAL executes 20.08 M malloc and 20.08 M free operations/s +ARAL executes 20.85 M malloc and 20.85 M free operations/s +ARAL executes 20.84 M malloc and 20.84 M free operations/s +ARAL executes 21.37 M malloc and 21.37 M free operations/s +Waiting the threads to finish... +2023-01-29 17:10:17: netdata INFO : MAIN : ARAL: did 103549747 malloc, 103549747 free, using 4 threads, in 5023325 usecs +``` + +The same with 8 threads: + +``` +Running stress test of 8 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:07:06: netdata INFO : TH[0] : set name of thread 1317608 to TH[0] +2023-01-29 17:07:06: netdata INFO : TH[1] : set name of thread 1317609 to TH[1] +2023-01-29 17:07:06: netdata INFO : TH[2] : set name of thread 1317610 to TH[2] +2023-01-29 17:07:06: netdata INFO : TH[3] : set name of thread 1317611 to TH[3] +2023-01-29 17:07:06: netdata INFO : TH[4] : set name of thread 1317612 to TH[4] +2023-01-29 17:07:06: netdata INFO : TH[5] : set name of thread 1317613 to TH[5] +2023-01-29 17:07:06: netdata INFO : TH[6] : set name of thread 1317614 to TH[6] +2023-01-29 17:07:06: netdata INFO : TH[7] : set name of thread 1317615 to TH[7] +ARAL executes 15.73 M malloc and 15.66 M free operations/s +ARAL executes 13.95 M malloc and 13.94 M free operations/s +ARAL executes 15.59 M malloc and 15.58 M free operations/s +ARAL executes 15.49 M malloc and 15.49 M free operations/s +ARAL executes 16.16 M malloc and 16.16 M free operations/s +Waiting the threads to finish... +2023-01-29 17:07:11: netdata INFO : MAIN : ARAL: did 78427750 malloc, 78427750 free, using 8 threads, in 5088591 usecs +``` + +The same with 16 threads: + +``` +Running stress test of 16 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:08:04: netdata INFO : TH[0] : set name of thread 1318663 to TH[0] +2023-01-29 17:08:04: netdata INFO : TH[1] : set name of thread 1318664 to TH[1] +2023-01-29 17:08:04: netdata INFO : TH[2] : set name of thread 1318665 to TH[2] +2023-01-29 17:08:04: netdata INFO : TH[3] : set name of thread 1318666 to TH[3] +2023-01-29 17:08:04: netdata INFO : TH[4] : set name of thread 1318667 to TH[4] +2023-01-29 17:08:04: netdata INFO : TH[5] : set name of thread 1318668 to TH[5] +2023-01-29 17:08:04: netdata INFO : TH[6] : set name of thread 1318669 to TH[6] +2023-01-29 17:08:04: netdata INFO : TH[7] : set name of thread 1318670 to TH[7] +2023-01-29 17:08:04: netdata INFO : TH[8] : set name of thread 1318671 to TH[8] +2023-01-29 17:08:04: netdata INFO : TH[9] : set name of thread 1318672 to TH[9] +2023-01-29 17:08:04: netdata INFO : TH[10] : set name of thread 1318673 to TH[10] +2023-01-29 17:08:04: netdata INFO : TH[11] : set name of thread 1318674 to TH[11] +2023-01-29 17:08:04: netdata INFO : TH[12] : set name of thread 1318675 to TH[12] +2023-01-29 17:08:04: netdata INFO : TH[13] : set name of thread 1318676 to TH[13] +2023-01-29 17:08:04: netdata INFO : TH[14] : set name of thread 1318677 to TH[14] +2023-01-29 17:08:04: netdata INFO : TH[15] : set name of thread 1318678 to TH[15] +ARAL executes 11.77 M malloc and 11.62 M free operations/s +ARAL executes 12.80 M malloc and 12.81 M free operations/s +ARAL executes 13.26 M malloc and 13.25 M free operations/s +ARAL executes 13.30 M malloc and 13.29 M free operations/s +ARAL executes 13.23 M malloc and 13.25 M free operations/s +Waiting the threads to finish... +2023-01-29 17:08:09: netdata INFO : MAIN : ARAL: did 65302122 malloc, 65302122 free, using 16 threads, in 5066009 usecs +``` + +As you can see, the top performance is with 4 threads, almost double the single thread speed. +16 threads performance is still better than single threaded, despite the intense concurrency. diff --git a/libnetdata/aral/aral.c b/src/libnetdata/aral/aral.c index 7223ee359..b8ed47f04 100644 --- a/libnetdata/aral/aral.c +++ b/src/libnetdata/aral/aral.c @@ -464,6 +464,12 @@ static inline ARAL_PAGE *aral_acquire_a_free_slot(ARAL *ar TRACE_ALLOCATIONS_FUN return page; } +void *aral_callocz_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { + void *r = aral_mallocz_internal(ar TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); + memset(r, 0, ar->config.requested_element_size); + return r; +} + void *aral_mallocz_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { #ifdef FSANITIZE_ADDRESS return mallocz(ar->config.requested_element_size); diff --git a/libnetdata/aral/aral.h b/src/libnetdata/aral/aral.h index 96f5a9c44..2e749bc4c 100644 --- a/libnetdata/aral/aral.h +++ b/src/libnetdata/aral/aral.h @@ -46,10 +46,12 @@ int aral_unittest(size_t elements); #ifdef NETDATA_TRACE_ALLOCATIONS +#define aral_callocz(ar) aral_callocz_internal(ar, __FILE__, __FUNCTION__, __LINE__) #define aral_mallocz(ar) aral_mallocz_internal(ar, __FILE__, __FUNCTION__, __LINE__) #define aral_freez(ar, ptr) aral_freez_internal(ar, ptr, __FILE__, __FUNCTION__, __LINE__) #define aral_destroy(ar) aral_destroy_internal(ar, __FILE__, __FUNCTION__, __LINE__) +void *aral_callocz_internal(ARAL *ar, const char *file, const char *function, size_t line); void *aral_mallocz_internal(ARAL *ar, const char *file, const char *function, size_t line); void aral_freez_internal(ARAL *ar, void *ptr, const char *file, const char *function, size_t line); void aral_destroy_internal(ARAL *ar, const char *file, const char *function, size_t line); @@ -57,9 +59,11 @@ void aral_destroy_internal(ARAL *ar, const char *file, const char *function, siz #else // NETDATA_TRACE_ALLOCATIONS #define aral_mallocz(ar) aral_mallocz_internal(ar) +#define aral_callocz(ar) aral_callocz_internal(ar) #define aral_freez(ar, ptr) aral_freez_internal(ar, ptr) #define aral_destroy(ar) aral_destroy_internal(ar) +void *aral_callocz_internal(ARAL *ar); void *aral_mallocz_internal(ARAL *ar); void aral_freez_internal(ARAL *ar, void *ptr); void aral_destroy_internal(ARAL *ar); diff --git a/src/libnetdata/avl/README.md b/src/libnetdata/avl/README.md new file mode 100644 index 000000000..eb85f884e --- /dev/null +++ b/src/libnetdata/avl/README.md @@ -0,0 +1,21 @@ +<!-- +title: "AVL" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/avl/README.md +sidebar_label: "AVL" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# AVL + +AVL is a library indexing objects in B-Trees. + +`avl_insert()`, `avl_remove()` and `avl_search()` are adaptations +of the AVL algorithm found in `libavl` v2.0.3, so that they do not +use any memory allocations and their memory footprint is optimized +(by eliminating non-necessary data members). + +In addition to the above, this version of AVL, provides versions using locks +and traversal functions. + diff --git a/libnetdata/avl/avl.c b/src/libnetdata/avl/avl.c index eef4c3116..eef4c3116 100644 --- a/libnetdata/avl/avl.c +++ b/src/libnetdata/avl/avl.c diff --git a/libnetdata/avl/avl.h b/src/libnetdata/avl/avl.h index 595d6ec6c..595d6ec6c 100644 --- a/libnetdata/avl/avl.h +++ b/src/libnetdata/avl/avl.h diff --git a/src/libnetdata/bitmap.h b/src/libnetdata/bitmap.h new file mode 100644 index 000000000..184f94cb6 --- /dev/null +++ b/src/libnetdata/bitmap.h @@ -0,0 +1,81 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_BITMAP_H +#define NETDATA_BITMAP_H + +#ifdef ENV32BIT + +typedef struct bitmapX { + uint32_t bits; + uint32_t data[]; +} BITMAPX; + +typedef struct bitmap256 { + uint32_t bits; + uint32_t data[256 / 32]; +} BITMAP256; + +typedef struct bitmap1024 { + uint32_t bits; + uint32_t data[1024 / 32]; +} BITMAP1024; + +static inline BITMAPX *bitmapX_create(uint32_t bits) { + BITMAPX *bmp = (BITMAPX *)callocz(1, sizeof(BITMAPX) + sizeof(uint32_t) * ((bits + 31) / 32)); + uint32_t *p = (uint32_t *)&bmp->bits; + *p = bits; + return bmp; +} + +#define bitmapX_get_bit(ptr, idx) ((ptr)->data[(idx) >> 5] & (1U << ((idx) & 31))) +#define bitmapX_set_bit(ptr, idx, value) do { \ + register uint32_t _bitmask = 1U << ((idx) & 31); \ + if (value) \ + (ptr)->data[(idx) >> 5] |= _bitmask; \ + else \ + (ptr)->data[(idx) >> 5] &= ~_bitmask; \ +} while(0) + +#else // 64bit version of bitmaps + +typedef struct bitmapX { + uint32_t bits; + uint64_t data[]; +} BITMAPX; + +typedef struct bitmap256 { + uint32_t bits; + uint64_t data[256 / 64]; +} BITMAP256; + +typedef struct bitmap1024 { + uint32_t bits; + uint64_t data[1024 / 64]; +} BITMAP1024; + +static inline BITMAPX *bitmapX_create(uint32_t bits) { + BITMAPX *bmp = (BITMAPX *)callocz(1, sizeof(BITMAPX) + sizeof(uint64_t) * ((bits + 63) / 64)); + bmp->bits = bits; + return bmp; +} + +#define bitmapX_get_bit(ptr, idx) ((ptr)->data[(idx) >> 6] & (1ULL << ((idx) & 63))) +#define bitmapX_set_bit(ptr, idx, value) do { \ + register uint64_t _bitmask = 1ULL << ((idx) & 63); \ + if (value) \ + (ptr)->data[(idx) >> 6] |= _bitmask; \ + else \ + (ptr)->data[(idx) >> 6] &= ~_bitmask; \ +} while(0) + +#endif // 64bit version of bitmaps + +#define BITMAPX_INITIALIZER(wanted_bits) { .bits = (wanted_bits), .data = {0} } +#define BITMAP256_INITIALIZER (BITMAP256)BITMAPX_INITIALIZER(256) +#define BITMAP1024_INITIALIZER (BITMAP1024)BITMAPX_INITIALIZER(1024) +#define bitmap256_get_bit(ptr, idx) bitmapX_get_bit((BITMAPX *)ptr, idx) +#define bitmap256_set_bit(ptr, idx, value) bitmapX_set_bit((BITMAPX *)ptr, idx, value) +#define bitmap1024_get_bit(ptr, idx) bitmapX_get_bit((BITMAPX *)ptr, idx) +#define bitmap1024_set_bit(ptr, idx, value) bitmapX_set_bit((BITMAPX *)ptr, idx, value) + +#endif //NETDATA_BITMAP_H diff --git a/src/libnetdata/buffer/README.md b/src/libnetdata/buffer/README.md new file mode 100644 index 000000000..a7850df72 --- /dev/null +++ b/src/libnetdata/buffer/README.md @@ -0,0 +1,20 @@ +<!-- +title: "BUFFER" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/buffer/README.md +sidebar_label: "BUFFER library" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# BUFFER + +`BUFFER` is a convenience library for working with strings in `C`. +Mainly, `BUFFER`s eliminate the need for tracking the string length, thus providing +a safe alternative for string operations. + +Also, they are super fast in printing and appending data to the string and its `buffer_strlen()` +is just a lookup (it does not traverse the string). + +Netdata uses `BUFFER`s for preparing web responses and buffering data to be sent upstream or +to external databases. diff --git a/libnetdata/buffer/buffer.c b/src/libnetdata/buffer/buffer.c index 64f9cce47..119216dd9 100644 --- a/libnetdata/buffer/buffer.c +++ b/src/libnetdata/buffer/buffer.c @@ -259,23 +259,23 @@ void buffer_free(BUFFER *b) { void buffer_increase(BUFFER *b, size_t free_size_required) { buffer_overflow_check(b); - size_t left = b->size - b->len; - if(left >= free_size_required) return; + size_t remaining = b->size - b->len; + if(remaining >= free_size_required) return; - size_t wanted = free_size_required - left; - size_t minimum = WEB_DATA_LENGTH_INCREASE_STEP; - if(minimum > wanted) wanted = minimum; + size_t increase = free_size_required - remaining; + size_t minimum = 128; + if(minimum > increase) increase = minimum; size_t optimal = (b->size > 5*1024*1024) ? b->size / 2 : b->size; - if(optimal > wanted) wanted = optimal; + if(optimal > increase) increase = optimal; - netdata_log_debug(D_WEB_BUFFER, "Increasing data buffer from size %zu to %zu.", b->size, b->size + wanted); + netdata_log_debug(D_WEB_BUFFER, "Increasing data buffer from size %zu to %zu.", b->size, b->size + increase); - b->buffer = reallocz(b->buffer, b->size + wanted + sizeof(BUFFER_OVERFLOW_EOF) + 2); - b->size += wanted; + b->buffer = reallocz(b->buffer, b->size + increase + sizeof(BUFFER_OVERFLOW_EOF) + 2); + b->size += increase; if(b->statistics) - __atomic_add_fetch(b->statistics, wanted, __ATOMIC_RELAXED); + __atomic_add_fetch(b->statistics, increase, __ATOMIC_RELAXED); buffer_overflow_init(b); buffer_overflow_check(b); @@ -341,8 +341,10 @@ __attribute__((constructor)) void initialize_ascii_maps(void) { base64_value_from_ascii[i] = 255; } - for(size_t i = 0; i < 16 ; i++) - hex_value_from_ascii[(int)hex_digits[i]] = i; + for(size_t i = 0; i < 16 ; i++) { + hex_value_from_ascii[(int)toupper(hex_digits[i])] = i; + hex_value_from_ascii[(int)tolower(hex_digits[i])] = i; + } for(size_t i = 0; i < 64 ; i++) base64_value_from_ascii[(int)base64_digits[i]] = i; @@ -492,12 +494,3 @@ int buffer_unittest(void) { buffer_free(wb); return errors; } - -#ifdef ENABLE_H2O -h2o_iovec_t buffer_to_h2o_iovec(BUFFER *wb) { - h2o_iovec_t ret; - ret.base = wb->buffer; - ret.len = wb->len; - return ret; -} -#endif diff --git a/libnetdata/buffer/buffer.h b/src/libnetdata/buffer/buffer.h index 88d3f0282..900907d49 100644 --- a/libnetdata/buffer/buffer.h +++ b/src/libnetdata/buffer/buffer.h @@ -6,12 +6,6 @@ #include "../string/utf8.h" #include "../libnetdata.h" -#ifdef ENABLE_H2O -#include "h2o/memory.h" -#endif - -#define WEB_DATA_LENGTH_INCREASE_STEP 1024 - #define BUFFER_JSON_MAX_DEPTH 32 // max is 255 extern const char hex_digits[16]; @@ -38,37 +32,6 @@ typedef enum __attribute__ ((__packed__)) { } BUFFER_OPTIONS; typedef enum __attribute__ ((__packed__)) { - CT_NONE = 0, - CT_APPLICATION_JSON, - CT_TEXT_PLAIN, - CT_TEXT_HTML, - CT_APPLICATION_X_JAVASCRIPT, - CT_TEXT_CSS, - CT_TEXT_XML, - CT_APPLICATION_XML, - CT_TEXT_XSL, - CT_APPLICATION_OCTET_STREAM, - CT_APPLICATION_X_FONT_TRUETYPE, - CT_APPLICATION_X_FONT_OPENTYPE, - CT_APPLICATION_FONT_WOFF, - CT_APPLICATION_FONT_WOFF2, - CT_APPLICATION_VND_MS_FONTOBJ, - CT_IMAGE_SVG_XML, - CT_IMAGE_PNG, - CT_IMAGE_JPG, - CT_IMAGE_GIF, - CT_IMAGE_XICON, - CT_IMAGE_ICNS, - CT_IMAGE_BMP, - CT_PROMETHEUS, - CT_AUDIO_MPEG, - CT_AUDIO_OGG, - CT_VIDEO_MP4, - CT_APPLICATION_PDF, - CT_APPLICATION_ZIP, -} HTTP_CONTENT_TYPE; - -typedef enum __attribute__ ((__packed__)) { BUFFER_JSON_OPTIONS_DEFAULT = 0, BUFFER_JSON_OPTIONS_MINIFY = (1 << 0), BUFFER_JSON_OPTIONS_NEWLINE_ON_ARRAY_ITEMS = (1 << 1), @@ -109,7 +72,7 @@ typedef struct web_buffer { #define buffer_overflow_check(b) #endif -static inline void _buffer_overflow_check(BUFFER *b) { +static inline void _buffer_overflow_check(BUFFER *b __maybe_unused) { assert(b->len <= b->size && "BUFFER: length is above buffer size."); @@ -150,10 +113,6 @@ void buffer_char_replace(BUFFER *wb, char from, char to); void buffer_print_sn_flags(BUFFER *wb, SN_FLAGS flags, bool send_anomaly_bit); -#ifdef ENABLE_H2O -h2o_iovec_t buffer_to_h2o_iovec(BUFFER *wb); -#endif - static inline void buffer_need_bytes(BUFFER *buffer, size_t needed_free_size) { if(unlikely(buffer->len + needed_free_size >= buffer->size)) buffer_increase(buffer, needed_free_size + 1); @@ -166,6 +125,9 @@ void buffer_json_finalize(BUFFER *wb); static const char *buffer_tostring(BUFFER *wb) { + if(unlikely(!wb)) + return NULL; + buffer_need_bytes(wb, 1); wb->buffer[wb->len] = '\0'; @@ -309,7 +271,8 @@ static inline void buffer_memcat(BUFFER *wb, const void *mem, size_t bytes) { buffer_overflow_check(wb); } -static inline void buffer_json_strcat(BUFFER *wb, const char *txt) { +static inline void buffer_json_strcat(BUFFER *wb, const char *txt) +{ if(unlikely(!txt || !*txt)) return; const unsigned char *t = (const unsigned char *)txt; @@ -871,6 +834,26 @@ static inline void buffer_json_add_array_item_string(BUFFER *wb, const char *val wb->json.stack[wb->json.depth].count++; } +static inline void buffer_json_add_array_item_uuid(BUFFER *wb, uuid_t *value) { + if(value && !uuid_is_null(*value)) { + char uuid[GUID_LEN + 1]; + uuid_unparse_lower(*value, uuid); + buffer_json_add_array_item_string(wb, uuid); + } + else + buffer_json_add_array_item_string(wb, NULL); +} + +static inline void buffer_json_add_array_item_uuid_compact(BUFFER *wb, uuid_t *value) { + if(value && !uuid_is_null(*value)) { + char uuid[GUID_LEN + 1]; + uuid_unparse_lower_compact(*value, uuid); + buffer_json_add_array_item_string(wb, uuid); + } + else + buffer_json_add_array_item_string(wb, NULL); +} + static inline void buffer_json_add_array_item_double(BUFFER *wb, NETDATA_DOUBLE value) { buffer_print_json_comma_newline_spacing(wb); @@ -1225,4 +1208,26 @@ buffer_rrdf_table_add_field(BUFFER *wb, size_t field_id, const char *key, const buffer_json_object_close(wb); } +static inline void buffer_copy(BUFFER *dst, BUFFER *src) { + if(!src || !dst) + return; + + buffer_contents_replace(dst, buffer_tostring(src), buffer_strlen(src)); + + dst->content_type = src->content_type; + dst->options = src->options; + dst->date = src->date; + dst->expires = src->expires; + dst->json = src->json; +} + +static inline BUFFER *buffer_dup(BUFFER *src) { + if(!src) + return NULL; + + BUFFER *dst = buffer_create(buffer_strlen(src) + 1, src->statistics); + buffer_copy(dst, src); + return dst; +} + #endif /* NETDATA_WEB_BUFFER_H */ diff --git a/mqtt_websockets/c_rhash/README.md b/src/libnetdata/buffered_reader/README.md index e69de29bb..e69de29bb 100644 --- a/mqtt_websockets/c_rhash/README.md +++ b/src/libnetdata/buffered_reader/README.md diff --git a/libnetdata/buffered_reader/buffered_reader.c b/src/libnetdata/buffered_reader/buffered_reader.c index 7cd17abfe..7cd17abfe 100644 --- a/libnetdata/buffered_reader/buffered_reader.c +++ b/src/libnetdata/buffered_reader/buffered_reader.c diff --git a/libnetdata/buffered_reader/buffered_reader.h b/src/libnetdata/buffered_reader/buffered_reader.h index 4db57cd29..4db57cd29 100644 --- a/libnetdata/buffered_reader/buffered_reader.h +++ b/src/libnetdata/buffered_reader/buffered_reader.h diff --git a/src/libnetdata/byteorder.h b/src/libnetdata/byteorder.h new file mode 100644 index 000000000..28fc9e663 --- /dev/null +++ b/src/libnetdata/byteorder.h @@ -0,0 +1,32 @@ +#ifndef LIBNETDATA_BYTE_ORDER_H +#define LIBNETDATA_BYTE_ORDER_H + +/** compatibility header for endian.h + * This is a simple compatibility shim to convert + * BSD/Linux endian macros to the Mac OS X equivalents. + * It is public domain. + * */ + +#ifndef __APPLE__ +#error "This header file (endian.h) is MacOS X specific.\n" +#endif /* __APPLE__ */ + + +#include <libkern/OSByteOrder.h> + +#define htobe16(x) OSSwapHostToBigInt16(x) +#define htole16(x) OSSwapHostToLittleInt16(x) +#define be16toh(x) OSSwapBigToHostInt16(x) +#define le16toh(x) OSSwapLittleToHostInt16(x) + +#define htobe32(x) OSSwapHostToBigInt32(x) +#define htole32(x) OSSwapHostToLittleInt32(x) +#define be32toh(x) OSSwapBigToHostInt32(x) +#define le32toh(x) OSSwapLittleToHostInt32(x) + +#define htobe64(x) OSSwapHostToBigInt64(x) +#define htole64(x) OSSwapHostToLittleInt64(x) +#define be64toh(x) OSSwapBigToHostInt64(x) +#define le64toh(x) OSSwapLittleToHostInt64(x) + +#endif /* LIBNETDATA_BYTE_ORDER_H */ diff --git a/src/libnetdata/circular_buffer/README.md b/src/libnetdata/circular_buffer/README.md new file mode 100644 index 000000000..b2d580cb9 --- /dev/null +++ b/src/libnetdata/circular_buffer/README.md @@ -0,0 +1,14 @@ +<!-- +title: "Circular Buffer" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/circular_buffer/README.md +sidebar_label: "Circular Buffer" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Circular Buffer + +`struct circular_buffer` is an adaptive circular buffer. It will start at an initial size +and grow up to a maximum size as it fills. Two indices within the structure track the current +`read` and `write` position for data. diff --git a/libnetdata/circular_buffer/circular_buffer.c b/src/libnetdata/circular_buffer/circular_buffer.c index 7ffe6b8bc..7ffe6b8bc 100644 --- a/libnetdata/circular_buffer/circular_buffer.c +++ b/src/libnetdata/circular_buffer/circular_buffer.c diff --git a/libnetdata/circular_buffer/circular_buffer.h b/src/libnetdata/circular_buffer/circular_buffer.h index 9d29a84d7..9d29a84d7 100644 --- a/libnetdata/circular_buffer/circular_buffer.h +++ b/src/libnetdata/circular_buffer/circular_buffer.h diff --git a/src/libnetdata/clocks/README.md b/src/libnetdata/clocks/README.md new file mode 100644 index 000000000..0ede05e83 --- /dev/null +++ b/src/libnetdata/clocks/README.md @@ -0,0 +1,10 @@ +<!-- +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/libnetdata/clocks/README.md" +title: "Clocks" +sidebar_label: "Clocks" +learn_status: "Published" +learn_topic_type: "References" +learn_rel_path: "Developers/libnetdata" +--> + +# Clocks
\ No newline at end of file diff --git a/libnetdata/clocks/clocks.c b/src/libnetdata/clocks/clocks.c index adbad045f..12f8f72e6 100644 --- a/libnetdata/clocks/clocks.c +++ b/src/libnetdata/clocks/clocks.c @@ -60,7 +60,7 @@ static usec_t get_clock_resolution(clockid_t clock) { if(clock_getres(clock, &ts) == 0) { usec_t ret = (usec_t)ts.tv_sec * USEC_PER_SEC + (usec_t)ts.tv_nsec / NSEC_PER_USEC; - if(!ret && ts.tv_nsec > 0 && ts.tv_nsec < NSEC_PER_USEC) + if(!ret && ts.tv_nsec > 0 && ts.tv_nsec < (long int)NSEC_PER_USEC) return (usec_t)1; else if(ret > MAX_CLOCK_RESOLUTION_UT) { @@ -243,7 +243,7 @@ void sleep_to_absolute_time(usec_t usec) { sleep_usec(usec); } } -}; +} #endif #define HEARTBEAT_ALIGNMENT_STATISTICS_SIZE 10 diff --git a/libnetdata/clocks/clocks.h b/src/libnetdata/clocks/clocks.h index 2beb14ed9..f989fd6b8 100644 --- a/libnetdata/clocks/clocks.h +++ b/src/libnetdata/clocks/clocks.h @@ -5,15 +5,14 @@ #include "../libnetdata.h" -#ifndef HAVE_STRUCT_TIMESPEC +#ifndef HAVE_CLOCK_GETTIME struct timespec { time_t tv_sec; /* seconds */ long tv_nsec; /* nanoseconds */ }; #endif -#ifndef HAVE_CLOCKID_T -typedef int clockid_t; +#ifndef HAVE_CLOCK_GETTIME #endif typedef uint64_t nsec_t; @@ -143,7 +142,7 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick); void heartbeat_statistics(usec_t *min_ptr, usec_t *max_ptr, usec_t *average_ptr, size_t *count_ptr); void sleep_usec_with_now(usec_t usec, usec_t started_ut); -#define sleep_usec(usec) sleep_usec_with_now(usec, 0); +#define sleep_usec(usec) sleep_usec_with_now(usec, 0) void clocks_init(void); diff --git a/src/libnetdata/completion/completion.c b/src/libnetdata/completion/completion.c new file mode 100644 index 000000000..113423835 --- /dev/null +++ b/src/libnetdata/completion/completion.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "completion.h" + +void completion_init(struct completion *p) +{ + p->completed = 0; + p->completed_jobs = 0; + fatal_assert(0 == uv_cond_init(&p->cond)); + fatal_assert(0 == uv_mutex_init(&p->mutex)); +} + +void completion_destroy(struct completion *p) +{ + uv_cond_destroy(&p->cond); + uv_mutex_destroy(&p->mutex); +} + +void completion_wait_for(struct completion *p) +{ + uv_mutex_lock(&p->mutex); + while (0 == p->completed) { + uv_cond_wait(&p->cond, &p->mutex); + } + fatal_assert(1 == p->completed); + uv_mutex_unlock(&p->mutex); +} + +bool completion_timedwait_for(struct completion *p, uint64_t timeout) +{ + timeout *= NSEC_PER_SEC; + + uint64_t start_time = uv_hrtime(); + bool result = true; + + uv_mutex_lock(&p->mutex); + while (!p->completed) { + int rc = uv_cond_timedwait(&p->cond, &p->mutex, timeout); + + if (rc == 0) { + result = true; + break; + } else if (rc == UV_ETIMEDOUT) { + result = false; + break; + } + + /* + * handle spurious wakeups + */ + + uint64_t elapsed = uv_hrtime() - start_time; + if (elapsed >= timeout) { + result = false; + break; + } + timeout -= elapsed; + } + uv_mutex_unlock(&p->mutex); + + return result; +} + +void completion_mark_complete(struct completion *p) +{ + uv_mutex_lock(&p->mutex); + p->completed = 1; + uv_cond_broadcast(&p->cond); + uv_mutex_unlock(&p->mutex); +} + +unsigned completion_wait_for_a_job(struct completion *p, unsigned completed_jobs) +{ + uv_mutex_lock(&p->mutex); + while (0 == p->completed && p->completed_jobs <= completed_jobs) { + uv_cond_wait(&p->cond, &p->mutex); + } + completed_jobs = p->completed_jobs; + uv_mutex_unlock(&p->mutex); + + return completed_jobs; +} + +void completion_mark_complete_a_job(struct completion *p) +{ + uv_mutex_lock(&p->mutex); + p->completed_jobs++; + uv_cond_broadcast(&p->cond); + uv_mutex_unlock(&p->mutex); +} + +bool completion_is_done(struct completion *p) +{ + bool ret; + uv_mutex_lock(&p->mutex); + ret = p->completed; + uv_mutex_unlock(&p->mutex); + return ret; +} diff --git a/libnetdata/completion/completion.h b/src/libnetdata/completion/completion.h index 723f73688..908ccfaf6 100644 --- a/libnetdata/completion/completion.h +++ b/src/libnetdata/completion/completion.h @@ -18,6 +18,10 @@ void completion_destroy(struct completion *p); void completion_wait_for(struct completion *p); +// Wait for at most `timeout` seconds. Return true on success, false on +// error or timeout. +bool completion_timedwait_for(struct completion *p, uint64_t timeout); + void completion_mark_complete(struct completion *p); unsigned completion_wait_for_a_job(struct completion *p, unsigned completed_jobs); diff --git a/src/libnetdata/config/README.md b/src/libnetdata/config/README.md new file mode 100644 index 000000000..665a7196c --- /dev/null +++ b/src/libnetdata/config/README.md @@ -0,0 +1,58 @@ +<!-- +title: "Netdata ini config files" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/config/README.md +sidebar_label: "Netdata ini config files" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Netdata ini config files + +Configuration files `netdata.conf` and `stream.conf` are Netdata ini files. + +## Motivation + +The whole idea came up when we were evaluating the documentation involved +in maintaining a complex configuration system. Our intention was to give +configuration options for everything imaginable. But then, documenting all +these options would require a tremendous amount of time, users would have +to search through endless pages for the option they need, etc. + +We concluded then that **configuring software like that is a waste of time +and effort**. Of course there must be plenty of configuration options, but +the implementation itself should require a lot less effort for both the +developers and the users. + +So, we did this: + +1. No configuration is required to run Netdata +2. There are plenty of options to tweak +3. There is minimal documentation (or no at all) + +## Why this works? + +The configuration file is a `name = value` dictionary with `[sections]`. +Write whatever you like there as long as it follows this simple format. + +Netdata loads this dictionary and then when the code needs a value from +it, it just looks up the `name` in the dictionary at the proper `section`. +In all places, in the code, there are both the `names` and their +`default values`, so if something is not found in the configuration +file, the default is used. The lookup is made using B-Trees and hashes +(no string comparisons), so they are super fast. Also the `names` of the +settings can be `my super duper setting that once set to yes, will turn the world upside down = no` + +- so goodbye to most of the documentation involved. + +Next, Netdata can generate a valid configuration for the user to edit. +No need to remember anything or copy and paste settings. Just get the +configuration from the server (`/netdata.conf` on your Netdata server), +edit it and save it. + +Last, what about options you believe you have set, but you misspelled? +When you get the configuration file from the server, there will be a +comment above all `name = value` pairs the server does not use. +So you know that whatever you wrote there, is not used. + + diff --git a/libnetdata/config/appconfig.c b/src/libnetdata/config/appconfig.c index fe4c1222d..fe4c1222d 100644 --- a/libnetdata/config/appconfig.c +++ b/src/libnetdata/config/appconfig.c diff --git a/libnetdata/config/appconfig.h b/src/libnetdata/config/appconfig.h index 214a15edd..214a15edd 100644 --- a/libnetdata/config/appconfig.h +++ b/src/libnetdata/config/appconfig.h diff --git a/src/libnetdata/config/dyncfg.c b/src/libnetdata/config/dyncfg.c new file mode 100644 index 000000000..0d6d5058d --- /dev/null +++ b/src/libnetdata/config/dyncfg.c @@ -0,0 +1,298 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +// ---------------------------------------------------------------------------- + +static struct { + DYNCFG_TYPE type; + const char *name; +} dyncfg_types[] = { + { .type = DYNCFG_TYPE_SINGLE, .name = "single" }, + { .type = DYNCFG_TYPE_TEMPLATE, .name = "template" }, + { .type = DYNCFG_TYPE_JOB, .name = "job" }, +}; + +DYNCFG_TYPE dyncfg_type2id(const char *type) { + if(!type || !*type) + return DYNCFG_TYPE_SINGLE; + + size_t entries = sizeof(dyncfg_types) / sizeof(dyncfg_types[0]); + for(size_t i = 0; i < entries ;i++) { + if(strcmp(dyncfg_types[i].name, type) == 0) + return dyncfg_types[i].type; + } + + return DYNCFG_TYPE_SINGLE; +} + +const char *dyncfg_id2type(DYNCFG_TYPE type) { + size_t entries = sizeof(dyncfg_types) / sizeof(dyncfg_types[0]); + for(size_t i = 0; i < entries ;i++) { + if(type == dyncfg_types[i].type) + return dyncfg_types[i].name; + } + + return "single"; +} + +// ---------------------------------------------------------------------------- + +static struct { + DYNCFG_SOURCE_TYPE source_type; + const char *name; +} dyncfg_source_types[] = { + { .source_type = DYNCFG_SOURCE_TYPE_INTERNAL, .name = "internal" }, + { .source_type = DYNCFG_SOURCE_TYPE_STOCK, .name = "stock" }, + { .source_type = DYNCFG_SOURCE_TYPE_USER, .name = "user" }, + { .source_type = DYNCFG_SOURCE_TYPE_DYNCFG, .name = "dyncfg" }, + { .source_type = DYNCFG_SOURCE_TYPE_DISCOVERED, .name = "discovered" }, +}; + +DYNCFG_SOURCE_TYPE dyncfg_source_type2id(const char *source_type) { + if(!source_type || !*source_type) + return DYNCFG_SOURCE_TYPE_INTERNAL; + + size_t entries = sizeof(dyncfg_source_types) / sizeof(dyncfg_source_types[0]); + for(size_t i = 0; i < entries ;i++) { + if(strcmp(dyncfg_source_types[i].name, source_type) == 0) + return dyncfg_source_types[i].source_type; + } + + return DYNCFG_SOURCE_TYPE_INTERNAL; +} + +const char *dyncfg_id2source_type(DYNCFG_SOURCE_TYPE source_type) { + size_t entries = sizeof(dyncfg_source_types) / sizeof(dyncfg_source_types[0]); + for(size_t i = 0; i < entries ;i++) { + if(source_type == dyncfg_source_types[i].source_type) + return dyncfg_source_types[i].name; + } + + return "internal"; +} + +// ---------------------------------------------------------------------------- + +static struct { + DYNCFG_STATUS status; + const char *name; +} dyncfg_statuses[] = { + { .status = DYNCFG_STATUS_NONE, .name = "none" }, + { .status = DYNCFG_STATUS_ACCEPTED, .name = "accepted" }, + { .status = DYNCFG_STATUS_RUNNING, .name = "running" }, + { .status = DYNCFG_STATUS_FAILED, .name = "failed" }, + { .status = DYNCFG_STATUS_DISABLED, .name = "disabled" }, + { .status = DYNCFG_STATUS_ORPHAN, .name = "orphan" }, + { .status = DYNCFG_STATUS_INCOMPLETE, .name = "incomplete" }, +}; + +DYNCFG_STATUS dyncfg_status2id(const char *status) { + if(!status || !*status) + return DYNCFG_STATUS_NONE; + + size_t entries = sizeof(dyncfg_statuses) / sizeof(dyncfg_statuses[0]); + for(size_t i = 0; i < entries ;i++) { + if(strcmp(dyncfg_statuses[i].name, status) == 0) + return dyncfg_statuses[i].status; + } + + return DYNCFG_STATUS_NONE; +} + +const char *dyncfg_id2status(DYNCFG_STATUS status) { + size_t entries = sizeof(dyncfg_statuses) / sizeof(dyncfg_statuses[0]); + for(size_t i = 0; i < entries ;i++) { + if(status == dyncfg_statuses[i].status) + return dyncfg_statuses[i].name; + } + + return "none"; +} + +// ---------------------------------------------------------------------------- + +static struct { + DYNCFG_CMDS cmd; + const char *name; +} cmd_map[] = { + { .cmd = DYNCFG_CMD_GET, .name = "get" }, + { .cmd = DYNCFG_CMD_SCHEMA, .name = "schema" }, + { .cmd = DYNCFG_CMD_UPDATE, .name = "update" }, + { .cmd = DYNCFG_CMD_ADD, .name = "add" }, + { .cmd = DYNCFG_CMD_TEST, .name = "test" }, + { .cmd = DYNCFG_CMD_REMOVE, .name = "remove" }, + { .cmd = DYNCFG_CMD_ENABLE, .name = "enable" }, + { .cmd = DYNCFG_CMD_DISABLE, .name = "disable" }, + { .cmd = DYNCFG_CMD_RESTART, .name = "restart" } +}; + +const char *dyncfg_id2cmd_one(DYNCFG_CMDS cmd) { + for (size_t i = 0; i < sizeof(cmd_map) / sizeof(cmd_map[0]); i++) { + if(cmd == cmd_map[i].cmd) + return cmd_map[i].name; + } + + return NULL; +} + +DYNCFG_CMDS dyncfg_cmds2id(const char *cmds) { + if(!cmds || !*cmds) + return DYNCFG_CMD_NONE; + + DYNCFG_CMDS result = DYNCFG_CMD_NONE; + const char *p = cmds; + size_t len, i; + + while (*p) { + // Skip any leading spaces + while (*p == ' ') p++; + + // Find the end of the current word + const char *end = p; + while (*end && *end != ' ') end++; + len = end - p; + + // Compare with known commands + for (i = 0; i < sizeof(cmd_map) / sizeof(cmd_map[0]); i++) { + if (strncmp(p, cmd_map[i].name, len) == 0 && cmd_map[i].name[len] == '\0') { + result |= cmd_map[i].cmd; + break; + } + } + + // Move to the next word + p = end; + } + + return result; +} + +void dyncfg_cmds2fp(DYNCFG_CMDS cmds, FILE *fp) { + for (size_t i = 0; i < sizeof(cmd_map) / sizeof(cmd_map[0]); i++) { + if(cmds & cmd_map[i].cmd) + fprintf(fp, "%s ", cmd_map[i].name); + } +} + +void dyncfg_cmds2json_array(DYNCFG_CMDS cmds, const char *key, BUFFER *wb) { + buffer_json_member_add_array(wb, key); + for (size_t i = 0; i < sizeof(cmd_map) / sizeof(cmd_map[0]); i++) { + if(cmds & cmd_map[i].cmd) + buffer_json_add_array_item_string(wb, cmd_map[i].name); + } + buffer_json_array_close(wb); +} + +void dyncfg_cmds2buffer(DYNCFG_CMDS cmds, BUFFER *wb) { + size_t added = 0; + for (size_t i = 0; i < sizeof(cmd_map) / sizeof(cmd_map[0]); i++) { + if(cmds & cmd_map[i].cmd) { + if(added) + buffer_fast_strcat(wb, " ", 1); + + buffer_strcat(wb, cmd_map[i].name); + added++; + } + } +} + +// ---------------------------------------------------------------------------- + +bool dyncfg_is_valid_id(const char *id) { + const char *s = id; + + while(*s) { + if(isspace(*s) || *s == '\'') return false; + s++; + } + + return true; +} + +char *dyncfg_escape_id_for_filename(const char *id) { + if (id == NULL) return NULL; + + // Allocate memory for the worst case, where every character is escaped. + char *escaped = mallocz(strlen(id) * 3 + 1); // Each char can become '%XX', plus '\0' + if (!escaped) return NULL; + + const char *src = id; + char *dest = escaped; + + while (*src) { + if (*src == '/' || isspace(*src) || !isprint(*src)) { + sprintf(dest, "%%%02X", (unsigned char)*src); + dest += 3; + } else { + *dest++ = *src; + } + src++; + } + + *dest = '\0'; + return escaped; +} + +// ---------------------------------------------------------------------------- + +int dyncfg_default_response(BUFFER *wb, int code, const char *msg) { + buffer_flush(wb); + wb->content_type = CT_APPLICATION_JSON; + wb->expires = now_realtime_sec(); + + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + buffer_json_member_add_uint64(wb, "status", code); + buffer_json_member_add_string(wb, "message", msg); + buffer_json_finalize(wb); + + return code; +} + +int dyncfg_node_find_and_call(DICTIONARY *dyncfg_nodes, const char *transaction, const char *function, + usec_t *stop_monotonic_ut, bool *cancelled, + BUFFER *payload, HTTP_ACCESS access, const char *source, BUFFER *result) { + if(!function || !*function) + return dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "command received is empty"); + + char buf[strlen(function) + 1]; + memcpy(buf, function, sizeof(buf)); + + char *words[MAX_FUNCTION_PARAMETERS]; // an array of pointers for the words in this line + size_t num_words = quoted_strings_splitter_pluginsd(buf, words, MAX_FUNCTION_PARAMETERS); + + const char *id = get_word(words, num_words, 1); + const char *action = get_word(words, num_words, 2); + const char *add_name = get_word(words, num_words, 3); + + if(!id || !*id) + return dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "dyncfg node: id is missing from the request"); + + if(!action || !*action) + return dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "dyncfg node: action is missing from the request"); + + DYNCFG_CMDS cmd = dyncfg_cmds2id(action); + if(cmd == DYNCFG_CMD_NONE) + return dyncfg_default_response(result, HTTP_RESP_BAD_REQUEST, "dyncfg node: action given in request is unknown"); + + const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dyncfg_nodes, id); + if(!item) + return dyncfg_default_response(result, HTTP_RESP_NOT_FOUND, "dyncfg node: id is not found"); + + struct dyncfg_node *df = dictionary_acquired_item_value(item); + + buffer_flush(result); + result->content_type = CT_APPLICATION_JSON; + + int code = df->cb(transaction, id, cmd, add_name, payload, stop_monotonic_ut, cancelled, result, access, source, df->data); + + if(!result->expires) + result->expires = now_realtime_sec(); + + if(!buffer_tostring(result)) + dyncfg_default_response(result, code, ""); + + dictionary_acquired_item_release(dyncfg_nodes, item); + + return code; +} diff --git a/src/libnetdata/config/dyncfg.h b/src/libnetdata/config/dyncfg.h new file mode 100644 index 000000000..eb31f8c25 --- /dev/null +++ b/src/libnetdata/config/dyncfg.h @@ -0,0 +1,88 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef LIBNETDATA_DYNCFG_H +#define LIBNETDATA_DYNCFG_H + +#define DYNCFG_VERSION (size_t)1 + +#define DYNCFG_RESP_SUCCESS(code) (code >= 200 && code <= 299) +#define DYNCFG_RESP_RUNNING 200 // accepted and running +#define DYNCFG_RESP_ACCEPTED 202 // accepted, but not running yet +#define DYNCFG_RESP_ACCEPTED_DISABLED 298 // accepted, but is disabled +#define DYNCFG_RESP_ACCEPTED_RESTART_REQUIRED 299 // accepted, but restart is required to apply it + +typedef enum __attribute__((packed)) { + DYNCFG_TYPE_SINGLE = 0, + DYNCFG_TYPE_TEMPLATE, + DYNCFG_TYPE_JOB, +} DYNCFG_TYPE; +DYNCFG_TYPE dyncfg_type2id(const char *type); +const char *dyncfg_id2type(DYNCFG_TYPE type); + +typedef enum __attribute__((packed)) { + DYNCFG_SOURCE_TYPE_INTERNAL = 0, + DYNCFG_SOURCE_TYPE_STOCK, + DYNCFG_SOURCE_TYPE_USER, + DYNCFG_SOURCE_TYPE_DYNCFG, + DYNCFG_SOURCE_TYPE_DISCOVERED, +} DYNCFG_SOURCE_TYPE; +DYNCFG_SOURCE_TYPE dyncfg_source_type2id(const char *source_type); +const char *dyncfg_id2source_type(DYNCFG_SOURCE_TYPE source_type); + +typedef enum __attribute__((packed)) { + DYNCFG_STATUS_NONE = 0, + DYNCFG_STATUS_ACCEPTED, // the plugin has accepted the configuration + DYNCFG_STATUS_RUNNING, // the plugin runs the accepted configuration + DYNCFG_STATUS_FAILED, // the plugin fails to run the accepted configuration + DYNCFG_STATUS_DISABLED, // the configuration is disabled by a user + DYNCFG_STATUS_ORPHAN, // no plugin has claimed this configurations + DYNCFG_STATUS_INCOMPLETE, // a special kind of failed configuration +} DYNCFG_STATUS; +DYNCFG_STATUS dyncfg_status2id(const char *status); +const char *dyncfg_id2status(DYNCFG_STATUS status); + +typedef enum __attribute__((packed)) { + DYNCFG_CMD_NONE = 0, + DYNCFG_CMD_GET = (1 << 0), + DYNCFG_CMD_SCHEMA = (1 << 1), + DYNCFG_CMD_UPDATE = (1 << 2), + DYNCFG_CMD_ADD = (1 << 3), + DYNCFG_CMD_TEST = (1 << 4), + DYNCFG_CMD_REMOVE = (1 << 5), + DYNCFG_CMD_ENABLE = (1 << 6), + DYNCFG_CMD_DISABLE = (1 << 7), + DYNCFG_CMD_RESTART = (1 << 8), +} DYNCFG_CMDS; +DYNCFG_CMDS dyncfg_cmds2id(const char *cmds); +void dyncfg_cmds2buffer(DYNCFG_CMDS cmds, struct web_buffer *wb); +void dyncfg_cmds2json_array(DYNCFG_CMDS cmds, const char *key, struct web_buffer *wb); +void dyncfg_cmds2fp(DYNCFG_CMDS cmds, FILE *fp); +const char *dyncfg_id2cmd_one(DYNCFG_CMDS cmd); + +bool dyncfg_is_valid_id(const char *id); +char *dyncfg_escape_id_for_filename(const char *id); + +#include "../clocks/clocks.h" +#include "../buffer/buffer.h" +#include "../dictionary/dictionary.h" + +typedef int (*dyncfg_cb_t)(const char *transaction, const char *id, DYNCFG_CMDS cmd, const char *add_name, + BUFFER *payload, usec_t *stop_monotonic_ut, bool *cancelled, BUFFER *result, + HTTP_ACCESS access, const char *source, void *data); + +struct dyncfg_node { + DYNCFG_TYPE type; + DYNCFG_CMDS cmds; + dyncfg_cb_t cb; + void *data; +}; + +#define dyncfg_nodes_dictionary_create() dictionary_create_advanced(DICT_OPTION_FIXED_SIZE, NULL, sizeof(struct dyncfg_node)) + +int dyncfg_default_response(BUFFER *wb, int code, const char *msg); + +int dyncfg_node_find_and_call(DICTIONARY *dyncfg_nodes, const char *transaction, const char *function, + usec_t *stop_monotonic_ut, bool *cancelled, + BUFFER *payload, HTTP_ACCESS access, const char *source, BUFFER *result); + +#endif //LIBNETDATA_DYNCFG_H diff --git a/src/libnetdata/datetime/README.md b/src/libnetdata/datetime/README.md new file mode 100644 index 000000000..303ba8bf2 --- /dev/null +++ b/src/libnetdata/datetime/README.md @@ -0,0 +1,11 @@ +<!-- +title: "Datetime" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/datetime/README.md +sidebar_label: "Datetime" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Datetime + +Formatting dates and timestamps. diff --git a/libnetdata/datetime/iso8601.c b/src/libnetdata/datetime/iso8601.c index 8e3f4e027..8e3f4e027 100644 --- a/libnetdata/datetime/iso8601.c +++ b/src/libnetdata/datetime/iso8601.c diff --git a/libnetdata/datetime/iso8601.h b/src/libnetdata/datetime/iso8601.h index ce4800963..ce4800963 100644 --- a/libnetdata/datetime/iso8601.h +++ b/src/libnetdata/datetime/iso8601.h diff --git a/libnetdata/datetime/rfc3339.c b/src/libnetdata/datetime/rfc3339.c index 157e340d3..ac9f59597 100644 --- a/libnetdata/datetime/rfc3339.c +++ b/src/libnetdata/datetime/rfc3339.c @@ -27,7 +27,7 @@ size_t rfc3339_datetime_ut(char *buffer, size_t len, usec_t now_ut, size_t fract return 0; } - if (fractional_digits >= 0 && fractional_digits <= 9) { + if (fractional_digits >= 1 && fractional_digits <= 9) { int fractional_part = (int)(now_ut % USEC_PER_SEC); if (fractional_part && len - used_length > fractional_digits + 1) { char format[] = ".%01d"; diff --git a/libnetdata/datetime/rfc3339.h b/src/libnetdata/datetime/rfc3339.h index 88ebb3ec8..88ebb3ec8 100644 --- a/libnetdata/datetime/rfc3339.h +++ b/src/libnetdata/datetime/rfc3339.h diff --git a/libnetdata/datetime/rfc7231.c b/src/libnetdata/datetime/rfc7231.c index 4925ed2c9..4925ed2c9 100644 --- a/libnetdata/datetime/rfc7231.c +++ b/src/libnetdata/datetime/rfc7231.c diff --git a/libnetdata/datetime/rfc7231.h b/src/libnetdata/datetime/rfc7231.h index 5ba93053f..5ba93053f 100644 --- a/libnetdata/datetime/rfc7231.h +++ b/src/libnetdata/datetime/rfc7231.h diff --git a/src/libnetdata/dictionary/README.md b/src/libnetdata/dictionary/README.md new file mode 100644 index 000000000..59a8f6b92 --- /dev/null +++ b/src/libnetdata/dictionary/README.md @@ -0,0 +1,235 @@ +<!-- +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/dictionary/README.md +sidebar_label: "Dictionaries" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Dictionaries + +Netdata dictionaries associate a `name` with a `value`: + +- A `name` can be any string. +- A `value` can be anything. + +Such a pair of a `name` and a `value` consists of an `item` or an `entry` in the dictionary. + +Dictionaries provide an interface to: + +- **Add** an item to the dictionary +- **Get** an item from the dictionary (provided its `name`) +- **Delete** an item from the dictionary (provided its `name`) +- **Traverse** the list of items in the dictionary + +Dictionaries are **ordered**, meaning that the order they have been added, is preserved while traversing them. The caller may reverse this order by passing the flag `DICT_OPTION_ADD_IN_FRONT` when creating the dictionary. + +Dictionaries guarantee **uniqueness** of all items added to them, meaning that only one item with a given `name` can exist in the dictionary at any given time. + +Dictionaries are extremely fast in all operations. They are indexing the keys with `JudyHS` and they utilize a double-linked-list for the traversal operations. Deletion is the most expensive operation, usually somewhat slower than insertion. + +## Memory management + +Dictionaries come with 2 memory management options: + +- **Clone** (copy) the `name` and/or the `value` to memory allocated by the dictionary. +- **Link** the `name` and/or the `value`, without allocating any memory about them. + +In **clone** mode, the dictionary guarantees that all operations on the dictionary items, will automatically take care of the memory used by the `name` and/or the `value`. In case the `value` is an object that needs to have user allocated memory, the following callback functions can be registered: + +1. `dictionary_register_insert_callback()` that can be called just after the insertion of an item to the dictionary, or after the replacement of the value of a dictionary item. +2. `dictionary_register_delete_callback()` that will be called just prior to the deletion of an item from the dictionary, or prior to the replacement of the value of a dictionary item. +3. `dictionary_register_conflict_callback()` that will be called when `DICT_OPTION_DONT_OVERWRITE_VALUE` is set, and another `value` is attempted to be inserted for the same key. +4. `dictionary_register_react_callback()` that will be called after the the `insert` and the `conflict` callbacks. The `conflict` callback is called while the dictionary hash table is available for other threads. + +In **link** mode, the `name` and/or the `value` are just linked to the dictionary item, and it is the user's responsibility to free the memory they use after an item is deleted from the dictionary or when the dictionary is destroyed. + +By default, **clone** mode is used for both the name and the value. + +To use **link** mode for names, add `DICT_OPTION_NAME_LINK_DONT_CLONE` to the flags when creating the dictionary. + +To use **link** mode for values, add `DICT_OPTION_VALUE_LINK_DONT_CLONE` to the flags when creating the dictionary. + +## Locks + +The dictionary allows both **single-threaded** operation (no locks - faster) and **multi-threaded** operation utilizing a read-write lock. + +The default is **multi-threaded**. To enable **single-threaded** add `DICT_OPTION_SINGLE_THREADED` to the flags when creating the dictionary. + +When in **multi-threaded** mode, the dictionaries have 2 independent R/W locks. One for the linked list and one for the hash table (index). An insertion and a deletion will acquire both independently (one after another) for as long as they are needed, but a traversal may hold the the linked list for longer durations. The hash table (index) lock may be acquired while the linked list is acquired, but not the other way around (and the way the code is structured, it is not technically possible to hold and index lock and then lock the linked list one). + +These locks are R/W locks. They allow multiple readers, but only one writer. + +Unlike POSIX standards, the linked-list lock, allows one writer to lock it multiple times. This has been implemented in such a way, so that a traversal to the items of the dictionary in write-lock mode, allows the writing thread to call `dictionary_set()` or `dictionary_del()`, which alter the dictionary index and the linked list. Especially for the deletion of the currently working item, the dictionary support delayed removal, so it will remove it from the index immediately and mark it as deleted, so that it can be added to the dictionary again with a different value and the traversal will still proceed from the point it was. + +## Hash table operations + +The dictionary supports the following operations supported by the hash table: + +- `dictionary_set()` to add an item to the dictionary, or change its value. +- `dictionary_get()` and `dictionary_get_and_acquire_item()` to get an item from the dictionary. +- `dictionary_del()` to delete an item from the dictionary. + +For all the calls, there are also `*_advanced()` versions of them, that support more parameters. Check the header file for more information about them. + +## Creation and destruction + +Use `dictionary_create()` to create a dictionary. + +Use `dictionary_destroy()` to destroy a dictionary. When destroyed, a dictionary frees all the memory it has allocated on its own. This can be complemented by the registration of a deletion callback function that can be called upon deletion of each item in the dictionary, which may free additional resources linked to it. + +### dictionary_set() + +This call is used to: + +- **add** an item to the dictionary. +- **reset** the value of an existing item in the dictionary. + +If **resetting** is not desired, add `DICT_OPTION_DONT_OVERWRITE_VALUE` to the flags when creating the dictionary. In this case, `dictionary_set()` will return the value of the original item found in the dictionary instead of resetting it and the value passed to the call will be ignored. Optionally a conflict callback function can be registered, to manipulate (probably merge or extend) the original value, based on the new value attempted to be added to the dictionary. + +The format is: + +```c +value = dictionary_set(dict, name, value, value_len); +``` + +Where: + +* `dict` is a pointer to the dictionary previously created. +* `name` is a pointer to a string to be used as the key of this item. The name must not be `NULL` and must not be an empty string `""`. +* `value` is a pointer to the value associated with this item. In **clone** mode, if `value` is `NULL`, a new memory allocation will be made of `value_len` size and will be initialized to zero. +* `value_len` is the size of the `value` data in bytes. If `value_len` is zero, no allocation will be done and the dictionary item will permanently have the `NULL` value. + +### dictionary_get() + +This call is used to get the `value` of an item, given its `name`. It utilizes the hash table (index) for making the lookup. + +For **multi-threaded** operation, the `dictionary_get()` call gets a shared read lock on the index lock (multiple readers are allowed). The linked-list lock is not used. + +In clone mode, the value returned is not guaranteed to be valid, as any other thread may delete the item from the dictionary at any time. To ensure the value will be available, use `dictionary_get_and_acquire_item()`, which uses a reference counter to defer deletes until the item is released with `dictionary_acquired_item_release()`. + +The format is: + +```c +value = dictionary_get(dict, name); +``` + +Where: + +* `dict` is a pointer to the dictionary previously created. +* `name` is a pointer to a string to be used as the key of this item. The name must not be `NULL` and must not be an empty string `""`. + +### dictionary_del() + +This call is used to delete an item from the dictionary, given its name. + +If there is a deletion callback registered to the dictionary (`dictionary_register_delete_callback()`), it is called prior to the actual deletion of the item. + +The format is: + +```c +value = dictionary_del(dict, name); +``` + +Where: + +* `dict` is a pointer to the dictionary previously created. +* `name` is a pointer to a string to be used as the key of this item. The name must not be `NULL` and must not be an empty string `""`. + +### dictionary_get_and_acquire_item() + +This call can be used to search and acquire a dictionary item, while ensuring that it will be available for use, until `dictionary_acquired_item_release()` is called. + +This call **does not return the value** of the dictionary item. It returns an internal pointer to a structure that maintains the reference counter used to protect the actual value. To get the value of the item (the same value as returned by `dictionary_get()`), the function `dictionary_acquired_item_value()` has to be called. + +Example: + +```c +// create the dictionary +DICTIONARY *dict = dictionary_create(DICT_OPTION_NONE); + +// add an item to it +dictionary_set(dict, "name", "value", 6); + +// find the item we added and acquire it +const DICTIONARY_ITEM *item = dictionary_get_and_acquire_item(dict, "name"); + +// extract its value +char *value = (char *)dictionary_acquired_item_value(dict, item); + +// now value points to the string "value" +printf("I got value = '%s'\n", value); + +// release the item, so that it can deleted +dictionary_acquired_item_release(dict, item); + +// destroy the dictionary +dictionary_destroy(dict); +``` + +When items are acquired, a reference counter is maintained to keep track of how many users exist for it. If an item with a non-zero number of users is deleted, it is removed from the index, it can be added again to the index (without conflict), and although it exists in the linked-list, it is not offered during traversal. Garbage collection to actually delete the item happens every time another item is added or removed from the linked-list and items are deleted only if no users are using them. + +If any item is still acquired when the dictionary is destroyed, the destruction of the dictionary is also deferred until all the acquired items are released. When the dictionary is destroyed like that, all operations on the dictionary fail (traversals do not traverse, insertions do not insert, deletions do not delete, searches do not find any items, etc). Once the last item in the dictionary is released, the dictionary is automatically destroyed too. + +## Traversal + +Dictionaries offer 3 ways to traverse the entire dictionary: + +- **walkthrough**, implemented by setting a callback function to be called for every item. +- **sorted walkthrough**, which first sorts the dictionary and then call a callback function for every item. +- **foreach**, a way to traverse the dictionary with a for-next loop. + +All these methods are available in **read**, **write**, or **reentrant** mode. In **read** mode only lookups are allowed to the dictionary. In **write** lookups but also insertions and deletions are allowed, and in **reentrant** mode the dictionary is unlocked outside dictionary code. + +### walkthrough (callback) + +There are 4 calls: + +- `dictionary_walkthrough_read()` and `dictionary_sorted_walkthrough_read()` acquire a shared read lock on the linked-list, and they call a callback function for every item of the dictionary. +- `dictionary_walkthrough_write()` and `dictionary_sorted_walkthrough_write()` acquire a write lock on the linked-list, and they call a callback function for every item of the dictionary. This is to be used when items need to be added to or removed from the dictionary. The `write` versions can be used to delete any or all the items from the dictionary, including the currently working one. For the `sorted` version, all items in the dictionary maintain a reference counter, so all deletions are deferred until the sorted walkthrough finishes. + +The non sorted versions traverse the items in the same order they have been added to the dictionary (or the reverse order if the flag `DICT_OPTION_ADD_IN_FRONT` is set during dictionary creation). The sorted versions sort alphabetically the items based on their name, and then they traverse them in the sorted order. + +The callback function returns an `int`. If this value is negative, traversal of the dictionary is stopped immediately and the negative value is returned to the caller. If the returned value of all callback calls is zero or positive, the walkthrough functions return the sum of the return values of all callbacks. So, if you are just interested to know how many items fall into some condition, write a callback function that returns 1 when the item satisfies that condition and 0 when it does not and the walkthrough function will return how many tested positive. + +### foreach (for-next loop) + +The following is a snippet of such a loop: + +```c +MY_STRUCTURE *x; +dfe_start_read(dict, x) { + printf("hey, I got an item named '%s' with value ptr %08X", x_dfe.name, x); +} +dfe_done(x); +``` + +The `x` parameter gives the name of the pointer to be used while iterating the items. Any name is accepted. `x` points to the `value` of the item in the dictionary. + +The `x_dfe.name` is a variable that is automatically created, by concatenating whatever is given as `x` and `_dfe`. It is an object and it has a few members, including `x_dfe.counter` that counts the iterations made so far, `x_dfe.item` that provides the acquired item from the dictionary and which can be used to pass it over for further processing, etc. Check the header file for more info. So, if you call `dfe_start_read(dict, myvar)`, the name will be `myvar_dfe`. + +Both `dfe_start_read(dict, item)` and `dfe_done(item)` are together inside a `do { ... } while(0)` loop, so that the following will work: + +```c +MY_ITEM *item; + +if(a = 1) + // do { + dfe_start_read(dict, x) + printf("hey, I got an item named '%s' with value ptr %08X", x_dfe.name, x); + dfe_done(x); + // } while(0); +else + something else; +``` + +In the above, the `if(a == 1)` condition will work as expected. It will do the foreach loop when a is 1, otherwise it will run `something else`. + +There are 2 versions of `dfe_start`: + +- `dfe_start_read()` that acquires a shared read linked-list lock to the dictionary. +- `dfe_start_write()` that acquires an exclusive write linked-list lock to the dictionary. + +While in the loop, depending on the read or write versions of `dfe_start`, the caller may lookup or manipulate the dictionary. The rules are the same with the unsorted walkthrough callback functions. + +PS: DFE is Dictionary For Each. diff --git a/src/libnetdata/dictionary/dictionary-callbacks.h b/src/libnetdata/dictionary/dictionary-callbacks.h new file mode 100644 index 000000000..38da3df09 --- /dev/null +++ b/src/libnetdata/dictionary/dictionary-callbacks.h @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DICTIONARY_CALLBACKS_H +#define NETDATA_DICTIONARY_CALLBACKS_H + +#include "dictionary-internals.h" + +// ---------------------------------------------------------------------------- +// callbacks execution + +static inline void dictionary_execute_insert_callback(DICTIONARY *dict, DICTIONARY_ITEM *item, void *constructor_data) { + if(likely(!dict->hooks || !dict->hooks->insert_callback)) + return; + + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + internal_error(false, + "DICTIONARY: Running insert callback on item '%s' of dictionary created from %s() %zu@%s.", + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + dict->hooks->insert_callback(item, item->shared->value, constructor_data?constructor_data:dict->hooks->insert_callback_data); + DICTIONARY_STATS_CALLBACK_INSERTS_PLUS1(dict); +} + +static inline bool dictionary_execute_conflict_callback(DICTIONARY *dict, DICTIONARY_ITEM *item, void *new_value, void *constructor_data) { + if(likely(!dict->hooks || !dict->hooks->conflict_callback)) + return false; + + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + internal_error(false, + "DICTIONARY: Running conflict callback on item '%s' of dictionary created from %s() %zu@%s.", + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + bool ret = dict->hooks->conflict_callback( + item, item->shared->value, new_value, + constructor_data ? constructor_data : dict->hooks->conflict_callback_data); + + DICTIONARY_STATS_CALLBACK_CONFLICTS_PLUS1(dict); + + return ret; +} + +static inline void dictionary_execute_react_callback(DICTIONARY *dict, DICTIONARY_ITEM *item, void *constructor_data) { + if(likely(!dict->hooks || !dict->hooks->react_callback)) + return; + + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + internal_error(false, + "DICTIONARY: Running react callback on item '%s' of dictionary created from %s() %zu@%s.", + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + dict->hooks->react_callback(item, item->shared->value, + constructor_data?constructor_data:dict->hooks->react_callback_data); + + DICTIONARY_STATS_CALLBACK_REACTS_PLUS1(dict); +} + +static inline void dictionary_execute_delete_callback(DICTIONARY *dict, DICTIONARY_ITEM *item) { + if(likely(!dict->hooks || !dict->hooks->delete_callback)) + return; + + // We may execute delete callback on items deleted from a view, + // because we may have references to it, after the master is gone + // so, the shared structure will remain until the last reference is released. + + internal_error(false, + "DICTIONARY: Running delete callback on item '%s' of dictionary created from %s() %zu@%s.", + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + dict->hooks->delete_callback(item, item->shared->value, dict->hooks->delelte_callback_data); + + DICTIONARY_STATS_CALLBACK_DELETES_PLUS1(dict); +} + + +#endif //NETDATA_DICTIONARY_CALLBACKS_H diff --git a/src/libnetdata/dictionary/dictionary-hashtable.h b/src/libnetdata/dictionary/dictionary-hashtable.h new file mode 100644 index 000000000..ace22d91d --- /dev/null +++ b/src/libnetdata/dictionary/dictionary-hashtable.h @@ -0,0 +1,263 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DICTIONARY_HASHTABLE_H +#define NETDATA_DICTIONARY_HASHTABLE_H + +#include "dictionary-internals.h" + +// ---------------------------------------------------------------------------- +// hashtable operations with simple hashtable + +static inline bool compare_keys(void *key1, void *key2) { + const char *k1 = key1; + const char *k2 = key2; + return strcmp(k1, k2) == 0; +} + +static inline void *item_to_key(DICTIONARY_ITEM *item) { + return (void *)item_get_name(item); +} + +#define SIMPLE_HASHTABLE_VALUE_TYPE DICTIONARY_ITEM +#define SIMPLE_HASHTABLE_NAME _DICTIONARY +#define SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION item_to_key +#define SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION compare_keys +#include "..//simple_hashtable.h" + +static inline size_t hashtable_init_hashtable(DICTIONARY *dict) { + SIMPLE_HASHTABLE_DICTIONARY *ht = callocz(1, sizeof(*ht)); + simple_hashtable_init_DICTIONARY(ht, 4); + dict->index.JudyHSArray = ht; + return 0; +} + +static inline size_t hashtable_destroy_hashtable(DICTIONARY *dict) { + SIMPLE_HASHTABLE_DICTIONARY *ht = dict->index.JudyHSArray; + if(unlikely(!ht)) return 0; + + size_t mem = sizeof(*ht) + ht->size * sizeof(SIMPLE_HASHTABLE_SLOT_DICTIONARY); + simple_hashtable_destroy_DICTIONARY(ht); + freez(ht); + dict->index.JudyHSArray = NULL; + + return mem; +} + +static inline void *hashtable_insert_hashtable(DICTIONARY *dict, const char *name, size_t name_len) { + SIMPLE_HASHTABLE_DICTIONARY *ht = dict->index.JudyHSArray; + + char key[name_len+1]; + memcpy(key, name, name_len); + key[name_len] = '\0'; + + XXH64_hash_t hash = XXH3_64bits(name, name_len); + SIMPLE_HASHTABLE_SLOT_DICTIONARY *sl = simple_hashtable_get_slot_DICTIONARY(ht, hash, key, true); + sl->hash = hash; // we will need it in insert later - it is ok to overwrite - it is the same already + return sl; +} + +static inline DICTIONARY_ITEM *hashtable_insert_handle_to_item_hashtable(DICTIONARY *dict, void *handle) { + (void)dict; + SIMPLE_HASHTABLE_SLOT_DICTIONARY *sl = handle; + DICTIONARY_ITEM *item = SIMPLE_HASHTABLE_SLOT_DATA(sl); + return item; +} + +static inline void hashtable_set_item_hashtable(DICTIONARY *dict, void *handle, DICTIONARY_ITEM *item) { + SIMPLE_HASHTABLE_DICTIONARY *ht = dict->index.JudyHSArray; + SIMPLE_HASHTABLE_SLOT_DICTIONARY *sl = handle; + simple_hashtable_set_slot_DICTIONARY(ht, sl, sl->hash, item); +} + +static inline int hashtable_delete_hashtable(DICTIONARY *dict, const char *name, size_t name_len, DICTIONARY_ITEM *item_to_delete) { + (void)item_to_delete; + SIMPLE_HASHTABLE_DICTIONARY *ht = dict->index.JudyHSArray; + + char key[name_len+1]; + memcpy(key, name, name_len); + key[name_len] = '\0'; + + XXH64_hash_t hash = XXH3_64bits(name, name_len); + SIMPLE_HASHTABLE_SLOT_DICTIONARY *sl = simple_hashtable_get_slot_DICTIONARY(ht, hash, key, false); + DICTIONARY_ITEM *item = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(!item) return 0; // return not-found + + simple_hashtable_del_slot_DICTIONARY(ht, sl); + return 1; // return deleted +} + +static inline DICTIONARY_ITEM *hashtable_get_hashtable(DICTIONARY *dict, const char *name, size_t name_len) { + SIMPLE_HASHTABLE_DICTIONARY *ht = dict->index.JudyHSArray; + if(unlikely(!ht)) return NULL; + + char key[name_len+1]; + memcpy(key, name, name_len); + key[name_len] = '\0'; + + XXH64_hash_t hash = XXH3_64bits(name, name_len); + SIMPLE_HASHTABLE_SLOT_DICTIONARY *sl = simple_hashtable_get_slot_DICTIONARY(ht, hash, key, false); + return SIMPLE_HASHTABLE_SLOT_DATA(sl); +} + +// ---------------------------------------------------------------------------- +// hashtable operations with Judy + +static inline size_t hashtable_init_judy(DICTIONARY *dict) { + dict->index.JudyHSArray = NULL; + return 0; +} + +static inline size_t hashtable_destroy_judy(DICTIONARY *dict) { + if(unlikely(!dict->index.JudyHSArray)) return 0; + + pointer_destroy_index(dict); + + JError_t J_Error; + Word_t ret = JudyHSFreeArray(&dict->index.JudyHSArray, &J_Error); + if(unlikely(ret == (Word_t) JERR)) { + netdata_log_error("DICTIONARY: Cannot destroy JudyHS, JU_ERRNO_* == %u, ID == %d", + JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + } + + netdata_log_debug(D_DICTIONARY, "Dictionary: hash table freed %lu bytes", ret); + + dict->index.JudyHSArray = NULL; + return (size_t)ret; +} + +static inline void *hashtable_insert_judy(DICTIONARY *dict, const char *name, size_t name_len) { + JError_t J_Error; + Pvoid_t *Rc = JudyHSIns(&dict->index.JudyHSArray, (void *)name, name_len, &J_Error); + if (unlikely(Rc == PJERR)) { + netdata_log_error("DICTIONARY: Cannot insert entry with name '%s' to JudyHS, JU_ERRNO_* == %u, ID == %d", + name, JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + } + + // if *Rc == 0, new item added to the array + // otherwise the existing item value is returned in *Rc + + // we return a pointer to a pointer, so that the caller can + // put anything needed at the value of the index. + // The pointer to pointer we return has to be used before + // any other operation that may change the index (insert/delete). + return (void *)Rc; +} + +static inline DICTIONARY_ITEM *hashtable_insert_handle_to_item_judy(DICTIONARY *dict, void *handle) { + (void)dict; + DICTIONARY_ITEM **item_pptr = handle; + return *item_pptr; +} + +static inline void hashtable_set_item_judy(DICTIONARY *dict, void *handle, DICTIONARY_ITEM *item) { + (void)dict; + DICTIONARY_ITEM **item_pptr = handle; + *item_pptr = item; +} + +static inline int hashtable_delete_judy(DICTIONARY *dict, const char *name, size_t name_len, DICTIONARY_ITEM *item) { + (void)item; + if(unlikely(!dict->index.JudyHSArray)) return 0; + + JError_t J_Error; + int ret = JudyHSDel(&dict->index.JudyHSArray, (void *)name, name_len, &J_Error); + if(unlikely(ret == JERR)) { + netdata_log_error("DICTIONARY: Cannot delete entry with name '%s' from JudyHS, JU_ERRNO_* == %u, ID == %d", + name, + JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + return 0; + } + + // Hey, this is problematic! We need the value back, not just an int with a status! + // https://sourceforge.net/p/judy/feature-requests/23/ + + if(unlikely(ret == 0)) { + // not found in the dictionary + return 0; + } + else { + // found and deleted from the dictionary + return 1; + } +} + +static inline DICTIONARY_ITEM *hashtable_get_judy(DICTIONARY *dict, const char *name, size_t name_len) { + if(unlikely(!dict->index.JudyHSArray)) return NULL; + + Pvoid_t *Rc; + Rc = JudyHSGet(dict->index.JudyHSArray, (void *)name, name_len); + if(likely(Rc)) { + // found in the hash table + pointer_check(dict, (DICTIONARY_ITEM *)*Rc); + return (DICTIONARY_ITEM *)*Rc; + } + else { + // not found in the hash table + return NULL; + } +} + +// -------------------------------------------------------------------------------------------------------------------- +// select the right hashtable + +static inline size_t hashtable_init_unsafe(DICTIONARY *dict) { + if(dict->options & DICT_OPTION_INDEX_JUDY) + return hashtable_init_judy(dict); + else + return hashtable_init_hashtable(dict); +} + +static inline size_t hashtable_destroy_unsafe(DICTIONARY *dict) { + pointer_destroy_index(dict); + + if(dict->options & DICT_OPTION_INDEX_JUDY) + return hashtable_destroy_judy(dict); + else + return hashtable_destroy_hashtable(dict); +} + +static inline void *hashtable_insert_unsafe(DICTIONARY *dict, const char *name, size_t name_len) { + if(dict->options & DICT_OPTION_INDEX_JUDY) + return hashtable_insert_judy(dict, name, name_len); + else + return hashtable_insert_hashtable(dict, name, name_len); +} + +static inline DICTIONARY_ITEM *hashtable_insert_handle_to_item_unsafe(DICTIONARY *dict, void *handle) { + if(dict->options & DICT_OPTION_INDEX_JUDY) + return hashtable_insert_handle_to_item_judy(dict, handle); + else + return hashtable_insert_handle_to_item_hashtable(dict, handle); +} + +static inline int hashtable_delete_unsafe(DICTIONARY *dict, const char *name, size_t name_len, DICTIONARY_ITEM *item) { + if(dict->options & DICT_OPTION_INDEX_JUDY) + return hashtable_delete_judy(dict, name, name_len, item); + else + return hashtable_delete_hashtable(dict, name, name_len, item); +} + +static inline DICTIONARY_ITEM *hashtable_get_unsafe(DICTIONARY *dict, const char *name, size_t name_len) { + DICTIONARY_STATS_SEARCHES_PLUS1(dict); + + DICTIONARY_ITEM *item; + + if(dict->options & DICT_OPTION_INDEX_JUDY) + item = hashtable_get_judy(dict, name, name_len); + else + item = hashtable_get_hashtable(dict, name, name_len); + + if(item) + pointer_check(dict, item); + + return item; +} + +static inline void hashtable_set_item_unsafe(DICTIONARY *dict, void *handle, DICTIONARY_ITEM *item) { + if(dict->options & DICT_OPTION_INDEX_JUDY) + hashtable_set_item_judy(dict, handle, item); + else + hashtable_set_item_hashtable(dict, handle, item); +} + +#endif //NETDATA_DICTIONARY_HASHTABLE_H diff --git a/src/libnetdata/dictionary/dictionary-internals.h b/src/libnetdata/dictionary/dictionary-internals.h new file mode 100644 index 000000000..54e59564f --- /dev/null +++ b/src/libnetdata/dictionary/dictionary-internals.h @@ -0,0 +1,259 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DICTIONARY_INTERNALS_H +#define NETDATA_DICTIONARY_INTERNALS_H + +#define DICTIONARY_INTERNALS +#include "../libnetdata.h" + +// runtime flags of the dictionary - must be checked with atomics +typedef enum __attribute__ ((__packed__)) { + DICT_FLAG_NONE = 0, + DICT_FLAG_DESTROYED = (1 << 0), // this dictionary has been destroyed +} DICT_FLAGS; + +#define dict_flag_check(dict, flag) (__atomic_load_n(&((dict)->flags), __ATOMIC_RELAXED) & (flag)) +#define dict_flag_set(dict, flag) __atomic_or_fetch(&((dict)->flags), flag, __ATOMIC_RELAXED) +#define dict_flag_clear(dict, flag) __atomic_and_fetch(&((dict)->flags), ~(flag), __ATOMIC_RELAXED) + +// flags macros +#define is_dictionary_destroyed(dict) dict_flag_check(dict, DICT_FLAG_DESTROYED) + +// configuration options macros +#define is_dictionary_single_threaded(dict) ((dict)->options & DICT_OPTION_SINGLE_THREADED) +#define is_view_dictionary(dict) ((dict)->master) +#define is_master_dictionary(dict) (!is_view_dictionary(dict)) + +typedef enum __attribute__ ((__packed__)) item_options { + ITEM_OPTION_NONE = 0, + ITEM_OPTION_ALLOCATED_NAME = (1 << 0), // the name pointer is a STRING + + // IMPORTANT: This is 1-bit - to add more change ITEM_OPTIONS_BITS +} ITEM_OPTIONS; + +typedef enum __attribute__ ((__packed__)) item_flags { + ITEM_FLAG_NONE = 0, + ITEM_FLAG_DELETED = (1 << 0), // this item is marked deleted, so it is not available for traversal (deleted from the index too) + ITEM_FLAG_BEING_CREATED = (1 << 1), // this item is currently being created - this flag is removed when construction finishes + + // IMPORTANT: This is 8-bit +} ITEM_FLAGS; + +#define item_flag_check(item, flag) (__atomic_load_n(&((item)->flags), __ATOMIC_RELAXED) & (flag)) +#define item_flag_set(item, flag) __atomic_or_fetch(&((item)->flags), flag, __ATOMIC_RELAXED) +#define item_flag_clear(item, flag) __atomic_and_fetch(&((item)->flags), ~(flag), __ATOMIC_RELAXED) + +#define item_shared_flag_check(item, flag) (__atomic_load_n(&((item)->shared->flags), __ATOMIC_RELAXED) & (flag)) +#define item_shared_flag_set(item, flag) __atomic_or_fetch(&((item)->shared->flags), flag, __ATOMIC_RELAXED) +#define item_shared_flag_clear(item, flag) __atomic_and_fetch(&((item)->shared->flags), ~(flag), __ATOMIC_RELAXED) + +#define REFCOUNT_DELETING (-100) + +#define ITEM_FLAGS_TYPE uint8_t +#define KEY_LEN_TYPE uint32_t +#define VALUE_LEN_TYPE uint32_t + +#define ITEM_OPTIONS_BITS 1 +#define KEY_LEN_BITS ((sizeof(KEY_LEN_TYPE) * 8) - (sizeof(ITEM_FLAGS_TYPE) * 8) - ITEM_OPTIONS_BITS) +#define KEY_LEN_MAX ((1 << KEY_LEN_BITS) - 1) + +#define VALUE_LEN_BITS ((sizeof(VALUE_LEN_TYPE) * 8) - (sizeof(ITEM_FLAGS_TYPE) * 8)) +#define VALUE_LEN_MAX ((1 << VALUE_LEN_BITS) - 1) + + +/* + * Every item in the dictionary has the following structure. + */ + +typedef int32_t REFCOUNT; + +typedef struct dictionary_item_shared { + void *value; // the value of the dictionary item + + // the order of the following items is important! + // The total of their storage should be 64-bits + + REFCOUNT links; // how many links this item has + VALUE_LEN_TYPE value_len:VALUE_LEN_BITS; // the size of the value + ITEM_FLAGS_TYPE flags; // shared flags +} DICTIONARY_ITEM_SHARED; + +struct dictionary_item { +#ifdef NETDATA_INTERNAL_CHECKS + DICTIONARY *dict; + pid_t creator_pid; + pid_t deleter_pid; + pid_t ll_adder_pid; + pid_t ll_remover_pid; +#endif + + DICTIONARY_ITEM_SHARED *shared; + + struct dictionary_item *next; // a double linked list to allow fast insertions and deletions + struct dictionary_item *prev; + + union { + STRING *string_name; // the name of the dictionary item + char *caller_name; // the user supplied string pointer + // void *key_ptr; // binary key pointer + }; + + // the order of the following items is important! + // The total of their storage should be 64-bits + + REFCOUNT refcount; // the private reference counter + + KEY_LEN_TYPE key_len:KEY_LEN_BITS; // the size of key indexed (for strings, including the null terminator) + // this is (2^23 - 1) = 8.388.607 bytes max key length. + + ITEM_OPTIONS options:ITEM_OPTIONS_BITS; // permanent configuration options + // (no atomic operations on this - they never change) + + ITEM_FLAGS_TYPE flags; // runtime changing flags for this item (atomic operations on this) + // cannot be a bit field because of atomics. +}; + +struct dictionary_hooks { + REFCOUNT links; + usec_t last_master_deletion_us; + + dict_cb_insert_t insert_callback; + void *insert_callback_data; + + dict_cb_conflict_t conflict_callback; + void *conflict_callback_data; + + dict_cb_react_t react_callback; + void *react_callback_data; + + dict_cb_delete_t delete_callback; + void *delelte_callback_data; +}; + +struct dictionary { +#ifdef NETDATA_INTERNAL_CHECKS + const char *creation_function; + const char *creation_file; + size_t creation_line; + pid_t creation_tid; +#endif + + usec_t last_gc_run_us; + DICT_OPTIONS options; // the configuration flags of the dictionary (they never change - no atomics) + DICT_FLAGS flags; // run time flags for the dictionary (they change all the time - atomics needed) + + ARAL *value_aral; + + struct { // support for multiple indexing engines + Pvoid_t JudyHSArray; // the hash table + RW_SPINLOCK rw_spinlock; // protect the index + } index; + + struct { + DICTIONARY_ITEM *list; // the double linked list of all items in the dictionary + RW_SPINLOCK rw_spinlock; // protect the linked-list + pid_t writer_pid; // the gettid() of the writer + uint32_t writer_depth; // nesting of write locks + } items; + + struct dictionary_hooks *hooks; // pointer to external function callbacks to be called at certain points + struct dictionary_stats *stats; // statistics data, when DICT_OPTION_STATS is set + + DICTIONARY *master; // the master dictionary + DICTIONARY *next; // linked list for delayed destruction (garbage collection of whole dictionaries) + + uint32_t version; // the current version of the dictionary + // it is incremented when: + // - item added + // - item removed + // - item value reset + // - conflict callback returns true + // - function dictionary_version_increment() is called + + int32_t entries; // how many items are currently in the index (the linked list may have more) + int32_t referenced_items; // how many items of the dictionary are currently being used by 3rd parties + int32_t pending_deletion_items; // how many items of the dictionary have been deleted, but have not been removed yet + +#ifdef NETDATA_DICTIONARY_VALIDATE_POINTERS + netdata_mutex_t global_pointer_registry_mutex; + Pvoid_t global_pointer_registry; +#endif +}; + +// ---------------------------------------------------------------------------- +// forward definitions of functions used in reverse order in the code + +void garbage_collect_pending_deletes(DICTIONARY *dict); +static inline void item_linked_list_remove(DICTIONARY *dict, DICTIONARY_ITEM *item); +static size_t dict_item_free_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM *item); +static inline const char *item_get_name(const DICTIONARY_ITEM *item); +static inline int hashtable_delete_unsafe(DICTIONARY *dict, const char *name, size_t name_len, DICTIONARY_ITEM *item); +static void item_release(DICTIONARY *dict, DICTIONARY_ITEM *item); +static bool dict_item_set_deleted(DICTIONARY *dict, DICTIONARY_ITEM *item); + +#define RC_ITEM_OK ( 0) +#define RC_ITEM_MARKED_FOR_DELETION (-1) // the item is marked for deletion +#define RC_ITEM_IS_CURRENTLY_BEING_DELETED (-2) // the item is currently being deleted +#define RC_ITEM_IS_CURRENTLY_BEING_CREATED (-3) // the item is currently being deleted +#define RC_ITEM_IS_REFERENCED (-4) // the item is currently referenced +#define item_check_and_acquire(dict, item) (item_check_and_acquire_advanced(dict, item, false) == RC_ITEM_OK) +static int item_check_and_acquire_advanced(DICTIONARY *dict, DICTIONARY_ITEM *item, bool having_index_lock); +#define item_is_not_referenced_and_can_be_removed(dict, item) (item_is_not_referenced_and_can_be_removed_advanced(dict, item) == RC_ITEM_OK) +static inline int item_is_not_referenced_and_can_be_removed_advanced(DICTIONARY *dict, DICTIONARY_ITEM *item); + +// ---------------------------------------------------------------------------- +// validate each pointer is indexed once - internal checks only + +#ifdef NETDATA_DICTIONARY_VALIDATE_POINTERS +static inline void pointer_index_init(DICTIONARY *dict __maybe_unused) { + netdata_mutex_init(&dict->global_pointer_registry_mutex); +} + +static inline void pointer_destroy_index(DICTIONARY *dict __maybe_unused) { + netdata_mutex_lock(&dict->global_pointer_registry_mutex); + JudyHSFreeArray(&dict->global_pointer_registry, PJE0); + netdata_mutex_unlock(&dict->global_pointer_registry_mutex); +} +static inline void pointer_add(DICTIONARY *dict __maybe_unused, DICTIONARY_ITEM *item __maybe_unused) { + netdata_mutex_lock(&dict->global_pointer_registry_mutex); + Pvoid_t *PValue = JudyHSIns(&dict->global_pointer_registry, &item, sizeof(void *), PJE0); + if(*PValue != NULL) + fatal("pointer already exists in registry"); + *PValue = item; + netdata_mutex_unlock(&dict->global_pointer_registry_mutex); +} + +static inline void pointer_check(DICTIONARY *dict __maybe_unused, DICTIONARY_ITEM *item __maybe_unused) { + netdata_mutex_lock(&dict->global_pointer_registry_mutex); + Pvoid_t *PValue = JudyHSGet(dict->global_pointer_registry, &item, sizeof(void *)); + if(PValue == NULL) + fatal("pointer is not found in registry"); + netdata_mutex_unlock(&dict->global_pointer_registry_mutex); +} + +static inline void pointer_del(DICTIONARY *dict __maybe_unused, DICTIONARY_ITEM *item __maybe_unused) { + netdata_mutex_lock(&dict->global_pointer_registry_mutex); + int ret = JudyHSDel(&dict->global_pointer_registry, &item, sizeof(void *), PJE0); + if(!ret) + fatal("pointer to be deleted does not exist in registry"); + netdata_mutex_unlock(&dict->global_pointer_registry_mutex); +} +#else // !NETDATA_DICTIONARY_VALIDATE_POINTERS +#define pointer_index_init(dict) debug_dummy() +#define pointer_destroy_index(dict) debug_dummy() +#define pointer_add(dict, item) debug_dummy() +#define pointer_check(dict, item) debug_dummy() +#define pointer_del(dict, item) debug_dummy() +#endif // !NETDATA_DICTIONARY_VALIDATE_POINTERS + +extern ARAL *dict_items_aral; +extern ARAL *dict_shared_items_aral; + +#include "dictionary-statistics.h" +#include "dictionary-locks.h" +#include "dictionary-refcount.h" +#include "dictionary-hashtable.h" +#include "dictionary-callbacks.h" +#include "dictionary-item.h" + +#endif //NETDATA_DICTIONARY_INTERNALS_H diff --git a/src/libnetdata/dictionary/dictionary-item.h b/src/libnetdata/dictionary/dictionary-item.h new file mode 100644 index 000000000..d9c67bcb5 --- /dev/null +++ b/src/libnetdata/dictionary/dictionary-item.h @@ -0,0 +1,555 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DICTIONARY_ITEM_H +#define NETDATA_DICTIONARY_ITEM_H + +#include "dictionary-internals.h" + +// ---------------------------------------------------------------------------- +// ITEM initialization and updates + +static inline size_t item_set_name(DICTIONARY *dict, DICTIONARY_ITEM *item, const char *name, size_t name_len) { + if(likely(dict->options & DICT_OPTION_NAME_LINK_DONT_CLONE)) { + item->caller_name = (char *)name; + item->key_len = name_len; + } + else { + item->string_name = string_strdupz(name); + item->key_len = string_strlen(item->string_name); + item->options |= ITEM_OPTION_ALLOCATED_NAME; + } + + return item->key_len; +} + +static inline size_t item_free_name(DICTIONARY *dict, DICTIONARY_ITEM *item) { + if(likely(!(dict->options & DICT_OPTION_NAME_LINK_DONT_CLONE))) + string_freez(item->string_name); + + return item->key_len; +} + +static inline const char *item_get_name(const DICTIONARY_ITEM *item) { + if(item->options & ITEM_OPTION_ALLOCATED_NAME) + return string2str(item->string_name); + else + return item->caller_name; +} + +static inline size_t item_get_name_len(const DICTIONARY_ITEM *item) { + if(item->options & ITEM_OPTION_ALLOCATED_NAME) + return string_strlen(item->string_name); + else + return strlen(item->caller_name); +} + +// ---------------------------------------------------------------------------- + +static inline DICTIONARY_ITEM *dict_item_create(DICTIONARY *dict __maybe_unused, size_t *allocated_bytes, DICTIONARY_ITEM *master_item) { + DICTIONARY_ITEM *item; + + size_t size = sizeof(DICTIONARY_ITEM); + item = aral_mallocz(dict_items_aral); + memset(item, 0, sizeof(DICTIONARY_ITEM)); + +#ifdef NETDATA_INTERNAL_CHECKS + item->creator_pid = gettid(); +#endif + + item->refcount = 1; + item->flags = ITEM_FLAG_BEING_CREATED; + + *allocated_bytes += size; + + if(master_item) { + item->shared = master_item->shared; + + if(unlikely(__atomic_add_fetch(&item->shared->links, 1, __ATOMIC_ACQUIRE) <= 1)) + fatal("DICTIONARY: attempted to link to a shared item structure that had zero references"); + } + else { + size = sizeof(DICTIONARY_ITEM_SHARED); + item->shared = aral_mallocz(dict_shared_items_aral); + memset(item->shared, 0, sizeof(DICTIONARY_ITEM_SHARED)); + + item->shared->links = 1; + *allocated_bytes += size; + } + +#ifdef NETDATA_INTERNAL_CHECKS + item->dict = dict; +#endif + return item; +} + +static inline void *dict_item_value_mallocz(DICTIONARY *dict, size_t value_len) { + if(dict->value_aral) { + internal_fatal(aral_element_size(dict->value_aral) != value_len, + "DICTIONARY: item value size %zu does not match the configured fixed one %zu", + value_len, aral_element_size(dict->value_aral)); + return aral_mallocz(dict->value_aral); + } + else + return mallocz(value_len); +} + +static inline void dict_item_value_freez(DICTIONARY *dict, void *ptr) { + if(dict->value_aral) + aral_freez(dict->value_aral, ptr); + else + freez(ptr); +} + +static inline void *dict_item_value_create(DICTIONARY *dict, void *value, size_t value_len) { + void *ptr = NULL; + + if(likely(value_len)) { + if (likely(value)) { + // a value has been supplied + // copy it + ptr = dict_item_value_mallocz(dict, value_len); + memcpy(ptr, value, value_len); + } + else { + // no value has been supplied + // allocate a clear memory block + ptr = dict_item_value_mallocz(dict, value_len); + memset(ptr, 0, value_len); + } + } + // else + // the caller wants an item without any value + + return ptr; +} + +static inline DICTIONARY_ITEM *dict_item_create_with_hooks(DICTIONARY *dict, const char *name, size_t name_len, void *value, size_t value_len, void *constructor_data, DICTIONARY_ITEM *master_item) { +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(name_len > KEY_LEN_MAX)) + fatal("DICTIONARY: tried to index a key of size %zu, but the maximum acceptable is %zu", name_len, (size_t)KEY_LEN_MAX); + + if(unlikely(value_len > VALUE_LEN_MAX)) + fatal("DICTIONARY: tried to add an item of size %zu, but the maximum acceptable is %zu", value_len, (size_t)VALUE_LEN_MAX); +#endif + + size_t item_size = 0, key_size = 0, value_size = 0; + + DICTIONARY_ITEM *item = dict_item_create(dict, &item_size, master_item); + key_size += item_set_name(dict, item, name, name_len); + + if(unlikely(is_view_dictionary(dict))) { + // we are on a view dictionary + // do not touch the value + ; + +#ifdef NETDATA_INTERNAL_CHECKS + if(unlikely(!master_item)) + fatal("DICTIONARY: cannot add an item to a view without a master item."); +#endif + } + else { + // we are on the master dictionary + + if(unlikely(dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE)) + item->shared->value = value; + else + item->shared->value = dict_item_value_create(dict, value, value_len); + + item->shared->value_len = value_len; + value_size += value_len; + + dictionary_execute_insert_callback(dict, item, constructor_data); + } + + DICTIONARY_ENTRIES_PLUS1(dict); + DICTIONARY_STATS_PLUS_MEMORY(dict, key_size, item_size, value_size); + + return item; +} + +static inline void dict_item_reset_value_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM *item, void *value, size_t value_len, void *constructor_data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: %s() should never be called on views.", __FUNCTION__ ); + + netdata_log_debug(D_DICTIONARY, "Dictionary entry with name '%s' found. Changing its value.", item_get_name(item)); + + DICTIONARY_VALUE_RESETS_PLUS1(dict); + + if(item->shared->value_len != value_len) { + DICTIONARY_STATS_PLUS_MEMORY(dict, 0, 0, value_len); + DICTIONARY_STATS_MINUS_MEMORY(dict, 0, 0, item->shared->value_len); + } + + dictionary_execute_delete_callback(dict, item); + + if(likely(dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE)) { + netdata_log_debug(D_DICTIONARY, "Dictionary: linking value to '%s'", item_get_name(item)); + item->shared->value = value; + item->shared->value_len = value_len; + } + else { + netdata_log_debug(D_DICTIONARY, "Dictionary: cloning value to '%s'", item_get_name(item)); + + void *old_value = item->shared->value; + void *new_value = NULL; + if(value_len) { + new_value = dict_item_value_mallocz(dict, value_len); + if(value) memcpy(new_value, value, value_len); + else memset(new_value, 0, value_len); + } + item->shared->value = new_value; + item->shared->value_len = value_len; + + netdata_log_debug(D_DICTIONARY, "Dictionary: freeing old value of '%s'", item_get_name(item)); + dict_item_value_freez(dict, old_value); + } + + dictionary_execute_insert_callback(dict, item, constructor_data); +} + +static inline size_t dict_item_free_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM *item) { + netdata_log_debug(D_DICTIONARY, "Destroying name value entry for name '%s'.", item_get_name(item)); + + if(!item_flag_check(item, ITEM_FLAG_DELETED)) + DICTIONARY_ENTRIES_MINUS1(dict); + + size_t item_size = 0, key_size = 0, value_size = 0; + + key_size += item->key_len; + if(unlikely(!(dict->options & DICT_OPTION_NAME_LINK_DONT_CLONE))) + item_free_name(dict, item); + + if(item_shared_release_and_check_if_it_can_be_freed(dict, item)) { + dictionary_execute_delete_callback(dict, item); + + if(unlikely(!(dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE))) { + netdata_log_debug(D_DICTIONARY, "Dictionary freeing value of '%s'", item_get_name(item)); + dict_item_value_freez(dict, item->shared->value); + item->shared->value = NULL; + } + value_size += item->shared->value_len; + + aral_freez(dict_shared_items_aral, item->shared); + item->shared = NULL; + item_size += sizeof(DICTIONARY_ITEM_SHARED); + } + + aral_freez(dict_items_aral, item); + + item_size += sizeof(DICTIONARY_ITEM); + + DICTIONARY_STATS_MINUS_MEMORY(dict, key_size, item_size, value_size); + + // we return the memory we actually freed + return item_size + ((dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE) ? 0 : value_size); +} + +// ---------------------------------------------------------------------------- +// linked list management + +static inline void item_linked_list_add(DICTIONARY *dict, DICTIONARY_ITEM *item) { + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + + if(dict->options & DICT_OPTION_ADD_IN_FRONT) + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(dict->items.list, item, prev, next); + else + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(dict->items.list, item, prev, next); + +#ifdef NETDATA_INTERNAL_CHECKS + item->ll_adder_pid = gettid(); +#endif + + // clear the BEING created flag, + // after it has been inserted into the linked list + item_flag_clear(item, ITEM_FLAG_BEING_CREATED); + + garbage_collect_pending_deletes(dict); + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); +} + +static inline void item_linked_list_remove(DICTIONARY *dict, DICTIONARY_ITEM *item) { + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(dict->items.list, item, prev, next); + +#ifdef NETDATA_INTERNAL_CHECKS + item->ll_remover_pid = gettid(); +#endif + + garbage_collect_pending_deletes(dict); + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); +} + +// ---------------------------------------------------------------------------- +// item operations + +static inline void dict_item_shared_set_deleted(DICTIONARY *dict, DICTIONARY_ITEM *item) { + if(is_master_dictionary(dict)) { + item_shared_flag_set(item, ITEM_FLAG_DELETED); + + if(dict->hooks) + __atomic_store_n(&dict->hooks->last_master_deletion_us, now_realtime_usec(), __ATOMIC_RELAXED); + } +} + +// returns true if we set the deleted flag on this item +static inline bool dict_item_set_deleted(DICTIONARY *dict, DICTIONARY_ITEM *item) { + ITEM_FLAGS expected, desired; + + expected = __atomic_load_n(&item->flags, __ATOMIC_RELAXED); + + do { + + if (expected & ITEM_FLAG_DELETED) + return false; + + desired = expected | ITEM_FLAG_DELETED; + + } while(!__atomic_compare_exchange_n(&item->flags, &expected, desired, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)); + + DICTIONARY_ENTRIES_MINUS1(dict); + return true; +} + +static inline void dict_item_free_or_mark_deleted(DICTIONARY *dict, DICTIONARY_ITEM *item) { + int rc = item_is_not_referenced_and_can_be_removed_advanced(dict, item); + switch(rc) { + case RC_ITEM_OK: + // the item is ours, refcount set to -100 + dict_item_shared_set_deleted(dict, item); + item_linked_list_remove(dict, item); + dict_item_free_with_hooks(dict, item); + break; + + case RC_ITEM_IS_REFERENCED: + case RC_ITEM_IS_CURRENTLY_BEING_CREATED: + // the item is currently referenced by others + dict_item_shared_set_deleted(dict, item); + dict_item_set_deleted(dict, item); + // after this point do not touch the item + break; + + case RC_ITEM_IS_CURRENTLY_BEING_DELETED: + // an item that is currently being deleted by someone else - don't touch it + break; + + default: + internal_error(true, "Hey dev! You forgot to add the new condition here!"); + break; + } +} + +// this is used by traversal functions to remove the current item +// if it is deleted, and it has zero references. This will eliminate +// the need for the garbage collector to kick-in later. +// Most deletions happen during traversal, so this is a nice hack +// to speed up everything! +static inline void dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(DICTIONARY *dict, DICTIONARY_ITEM *item, char rw) { + if(rw == DICTIONARY_LOCK_WRITE) { + bool should_be_deleted = item_flag_check(item, ITEM_FLAG_DELETED); + + item_release(dict, item); + + if(should_be_deleted && item_is_not_referenced_and_can_be_removed(dict, item)) { + // this has to be before removing from the linked list, + // otherwise the garbage collector will also kick in! + DICTIONARY_PENDING_DELETES_MINUS1(dict); + + item_linked_list_remove(dict, item); + dict_item_free_with_hooks(dict, item); + } + } + else { + // we can't do anything under this mode + item_release(dict, item); + } +} + +static inline bool dict_item_del(DICTIONARY *dict, const char *name, ssize_t name_len) { + if(name_len == -1) + name_len = (ssize_t)strlen(name); + + netdata_log_debug(D_DICTIONARY, "DEL dictionary entry with name '%s'.", name); + + // Unfortunately, the JudyHSDel() does not return the value of the + // item that was deleted, so we have to find it before we delete it, + // since we need to release our structures too. + + dictionary_index_lock_wrlock(dict); + + int ret; + DICTIONARY_ITEM *item = hashtable_get_unsafe(dict, name, name_len); + if(unlikely(!item)) { + dictionary_index_wrlock_unlock(dict); + ret = false; + } + else { + if(hashtable_delete_unsafe(dict, name, name_len, item) == 0) + netdata_log_error("DICTIONARY: INTERNAL ERROR: tried to delete item with name '%s', " + "name_len %zd that is not in the index", + name, name_len); + else + pointer_del(dict, item); + + dictionary_index_wrlock_unlock(dict); + + dict_item_free_or_mark_deleted(dict, item); + ret = true; + } + + return ret; +} + +static inline DICTIONARY_ITEM *dict_item_add_or_reset_value_and_acquire(DICTIONARY *dict, const char *name, ssize_t name_len, void *value, size_t value_len, void *constructor_data, DICTIONARY_ITEM *master_item) { + if(unlikely(!name || !*name)) { + internal_error( + true, + "DICTIONARY: attempted to %s() without a name on a dictionary created from %s() %zu@%s.", + __FUNCTION__, + dict->creation_function, + dict->creation_line, + dict->creation_file); + return NULL; + } + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_set() on a destroyed dictionary"); + return NULL; + } + + if(name_len == -1) + name_len = (ssize_t)strlen(name); + + netdata_log_debug(D_DICTIONARY, "SET dictionary entry with name '%s'.", name); + + // DISCUSSION: + // Is it better to gain a read-lock and do a hashtable_get_unsafe() + // before we write lock to do hashtable_insert_unsafe()? + // + // Probably this depends on the use case. + // For statsd for example that does dictionary_set() to update received values, + // it could be beneficial to do a get() before we insert(). + // + // But the caller has the option to do this on his/her own. + // So, let's do the fastest here and let the caller decide the flow of calls. + + dictionary_index_lock_wrlock(dict); + + bool added_or_updated = false; + size_t spins = 0; + DICTIONARY_ITEM *item = NULL; + do { + void *handle = hashtable_insert_unsafe(dict, name, name_len); + item = hashtable_insert_handle_to_item_unsafe(dict, handle); + if (likely(item == NULL)) { + // a new item added to the index + + // create the dictionary item + item = dict_item_create_with_hooks(dict, name, name_len, value, value_len, constructor_data, master_item); + + pointer_add(dict, item); + + hashtable_set_item_unsafe(dict, handle, item); + + // unlock the index lock, before we add it to the linked list + // DON'T DO IT THE OTHER WAY AROUND - DO NOT CROSS THE LOCKS! + dictionary_index_wrlock_unlock(dict); + + item_linked_list_add(dict, item); + + added_or_updated = true; + } + else { + pointer_check(dict, item); + + if(item_check_and_acquire_advanced(dict, item, true) != RC_ITEM_OK) { + spins++; + continue; + } + + // the item is already in the index + // so, either we will return the old one + // or overwrite the value, depending on dictionary flags + + // We should not compare the values here! + // even if they are the same, we have to do the whole job + // so that the callbacks will be called. + + if(is_view_dictionary(dict)) { + // view dictionary + // the item is already there and can be used + if(item->shared != master_item->shared) + netdata_log_error("DICTIONARY: changing the master item on a view is not supported. The previous item will remain. To change the key of an item in a view, delete it and add it again."); + } + else { + // master dictionary + // the user wants to reset its value + + if (!(dict->options & DICT_OPTION_DONT_OVERWRITE_VALUE)) { + dict_item_reset_value_with_hooks(dict, item, value, value_len, constructor_data); + added_or_updated = true; + } + + else if (dictionary_execute_conflict_callback(dict, item, value, constructor_data)) { + dictionary_version_increment(dict); + added_or_updated = true; + } + + else { + // conflict callback returned false + // we did really nothing! + ; + } + } + + dictionary_index_wrlock_unlock(dict); + } + } while(!item); + + + if(unlikely(spins > 0)) + DICTIONARY_STATS_INSERT_SPINS_PLUS(dict, spins); + + if(is_master_dictionary(dict) && added_or_updated) + dictionary_execute_react_callback(dict, item, constructor_data); + + return item; +} + +static inline DICTIONARY_ITEM *dict_item_find_and_acquire(DICTIONARY *dict, const char *name, ssize_t name_len) { + if(unlikely(!name || !*name)) { + internal_error( + true, + "DICTIONARY: attempted to %s() without a name on a dictionary created from %s() %zu@%s.", + __FUNCTION__, + dict->creation_function, + dict->creation_line, + dict->creation_file); + return NULL; + } + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_get() on a destroyed dictionary"); + return NULL; + } + + if(name_len == -1) + name_len = (ssize_t)strlen(name); + + netdata_log_debug(D_DICTIONARY, "GET dictionary entry with name '%s'.", name); + + dictionary_index_lock_rdlock(dict); + + DICTIONARY_ITEM *item = hashtable_get_unsafe(dict, name, name_len); + if(unlikely(item && !item_check_and_acquire(dict, item))) { + item = NULL; + DICTIONARY_STATS_SEARCH_IGNORES_PLUS1(dict); + } + + dictionary_index_rdlock_unlock(dict); + + return item; +} + + +#endif //NETDATA_DICTIONARY_ITEM_H diff --git a/src/libnetdata/dictionary/dictionary-locks.h b/src/libnetdata/dictionary/dictionary-locks.h new file mode 100644 index 000000000..01eca1546 --- /dev/null +++ b/src/libnetdata/dictionary/dictionary-locks.h @@ -0,0 +1,112 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DICTIONARY_LOCKS_H +#define NETDATA_DICTIONARY_LOCKS_H + +#include "dictionary-internals.h" + +// ---------------------------------------------------------------------------- +// dictionary locks + +static inline size_t dictionary_locks_init(DICTIONARY *dict) { + if(likely(!is_dictionary_single_threaded(dict))) { + rw_spinlock_init(&dict->index.rw_spinlock); + rw_spinlock_init(&dict->items.rw_spinlock); + } + + return 0; +} + +static inline size_t dictionary_locks_destroy(DICTIONARY *dict __maybe_unused) { + return 0; +} + +static inline void ll_recursive_lock_set_thread_as_writer(DICTIONARY *dict) { + pid_t expected = 0, desired = gettid(); + if(!__atomic_compare_exchange_n(&dict->items.writer_pid, &expected, desired, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) + fatal("DICTIONARY: Cannot set thread %d as exclusive writer, expected %d, desired %d, found %d.", gettid(), expected, desired, __atomic_load_n(&dict->items.writer_pid, __ATOMIC_RELAXED)); +} + +static inline void ll_recursive_unlock_unset_thread_writer(DICTIONARY *dict) { + pid_t expected = gettid(), desired = 0; + if(!__atomic_compare_exchange_n(&dict->items.writer_pid, &expected, desired, false, __ATOMIC_RELAXED, __ATOMIC_RELAXED)) + fatal("DICTIONARY: Cannot unset thread %d as exclusive writer, expected %d, desired %d, found %d.", gettid(), expected, desired, __atomic_load_n(&dict->items.writer_pid, __ATOMIC_RELAXED)); +} + +static inline bool ll_recursive_lock_is_thread_the_writer(DICTIONARY *dict) { + pid_t tid = gettid(); + return tid > 0 && tid == __atomic_load_n(&dict->items.writer_pid, __ATOMIC_RELAXED); +} + +static inline void ll_recursive_lock(DICTIONARY *dict, char rw) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + if(ll_recursive_lock_is_thread_the_writer(dict)) { + dict->items.writer_depth++; + return; + } + + if(rw == DICTIONARY_LOCK_READ || rw == DICTIONARY_LOCK_REENTRANT || rw == 'R') { + // read lock + rw_spinlock_read_lock(&dict->items.rw_spinlock); + } + else { + // write lock + rw_spinlock_write_lock(&dict->items.rw_spinlock); + ll_recursive_lock_set_thread_as_writer(dict); + } +} + +static inline void ll_recursive_unlock(DICTIONARY *dict, char rw) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + if(ll_recursive_lock_is_thread_the_writer(dict) && dict->items.writer_depth > 0) { + dict->items.writer_depth--; + return; + } + + if(rw == DICTIONARY_LOCK_READ || rw == DICTIONARY_LOCK_REENTRANT || rw == 'R') { + // read unlock + + rw_spinlock_read_unlock(&dict->items.rw_spinlock); + } + else { + // write unlock + + ll_recursive_unlock_unset_thread_writer(dict); + + rw_spinlock_write_unlock(&dict->items.rw_spinlock); + } +} + +static inline void dictionary_index_lock_rdlock(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + rw_spinlock_read_lock(&dict->index.rw_spinlock); +} + +static inline void dictionary_index_rdlock_unlock(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + rw_spinlock_read_unlock(&dict->index.rw_spinlock); +} + +static inline void dictionary_index_lock_wrlock(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + rw_spinlock_write_lock(&dict->index.rw_spinlock); +} +static inline void dictionary_index_wrlock_unlock(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return; + + rw_spinlock_write_unlock(&dict->index.rw_spinlock); +} + + +#endif //NETDATA_DICTIONARY_LOCKS_H diff --git a/src/libnetdata/dictionary/dictionary-refcount.h b/src/libnetdata/dictionary/dictionary-refcount.h new file mode 100644 index 000000000..8c7627f52 --- /dev/null +++ b/src/libnetdata/dictionary/dictionary-refcount.h @@ -0,0 +1,247 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DICTIONARY_REFCOUNT_H +#define NETDATA_DICTIONARY_REFCOUNT_H + +#include "dictionary-internals.h" + +// ---------------------------------------------------------------------------- +// reference counters + +static inline size_t reference_counter_init(DICTIONARY *dict __maybe_unused) { + // allocate memory required for reference counters + // return number of bytes + return 0; +} + +static inline size_t reference_counter_free(DICTIONARY *dict __maybe_unused) { + // free memory required for reference counters + // return number of bytes + return 0; +} + +static inline void item_acquire(DICTIONARY *dict, DICTIONARY_ITEM *item) { + REFCOUNT refcount; + + if(unlikely(is_dictionary_single_threaded(dict))) + refcount = ++item->refcount; + + else + // increment the refcount + refcount = __atomic_add_fetch(&item->refcount, 1, __ATOMIC_SEQ_CST); + + + if(refcount <= 0) { + internal_error( + true, + "DICTIONARY: attempted to acquire item which is deleted (refcount = %d): " + "'%s' on dictionary created by %s() (%zu@%s)", + refcount - 1, + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + fatal( + "DICTIONARY: request to acquire item '%s', which is deleted (refcount = %d)!", + item_get_name(item), + refcount - 1); + } + + if(refcount == 1) { + // referenced items counts number of unique items referenced + // so, we increase it only when refcount == 1 + DICTIONARY_REFERENCED_ITEMS_PLUS1(dict); + + // if this is a deleted item, but the counter increased to 1 + // we need to remove it from the pending items to delete + if(item_flag_check(item, ITEM_FLAG_DELETED)) + DICTIONARY_PENDING_DELETES_MINUS1(dict); + } +} + +static inline void item_release(DICTIONARY *dict, DICTIONARY_ITEM *item) { + // this function may be called without any lock on the dictionary + // or even when someone else has 'write' lock on the dictionary + + bool is_deleted; + REFCOUNT refcount; + + if(unlikely(is_dictionary_single_threaded(dict))) { + is_deleted = item->flags & ITEM_FLAG_DELETED; + refcount = --item->refcount; + } + else { + // get the flags before decrementing any reference counters + // (the other way around may lead to use-after-free) + is_deleted = item_flag_check(item, ITEM_FLAG_DELETED); + + // decrement the refcount + refcount = __atomic_sub_fetch(&item->refcount, 1, __ATOMIC_RELEASE); + } + + if(refcount < 0) { + internal_error( + true, + "DICTIONARY: attempted to release item without references (refcount = %d): " + "'%s' on dictionary created by %s() (%zu@%s)", + refcount + 1, + item_get_name(item), + dict->creation_function, + dict->creation_line, + dict->creation_file); + + fatal( + "DICTIONARY: attempted to release item '%s' without references (refcount = %d)", + item_get_name(item), + refcount + 1); + } + + if(refcount == 0) { + + if(is_deleted) + DICTIONARY_PENDING_DELETES_PLUS1(dict); + + // referenced items counts number of unique items referenced + // so, we decrease it only when refcount == 0 + DICTIONARY_REFERENCED_ITEMS_MINUS1(dict); + } +} + +static inline int item_check_and_acquire_advanced(DICTIONARY *dict, DICTIONARY_ITEM *item, bool having_index_lock) { + size_t spins = 0; + REFCOUNT refcount, desired; + + int ret = RC_ITEM_OK; + + refcount = DICTIONARY_ITEM_REFCOUNT_GET(dict, item); + + do { + spins++; + + if(refcount < 0) { + // we can't use this item + ret = RC_ITEM_IS_CURRENTLY_BEING_DELETED; + break; + } + + if(item_flag_check(item, ITEM_FLAG_DELETED)) { + // we can't use this item + ret = RC_ITEM_MARKED_FOR_DELETION; + break; + } + + desired = refcount + 1; + + } while(!__atomic_compare_exchange_n(&item->refcount, &refcount, desired, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)); + + // if ret == ITEM_OK, we acquired the item + + if(ret == RC_ITEM_OK) { + if (unlikely(is_view_dictionary(dict) && + item_shared_flag_check(item, ITEM_FLAG_DELETED) && + !item_flag_check(item, ITEM_FLAG_DELETED))) { + // but, we can't use this item + + if (having_index_lock) { + // delete it from the hashtable + if(hashtable_delete_unsafe(dict, item_get_name(item), item->key_len, item) == 0) + netdata_log_error("DICTIONARY: INTERNAL ERROR VIEW: tried to delete item with name '%s', " + "name_len %u that is not in the index", + item_get_name(item), (KEY_LEN_TYPE)(item->key_len)); + else + pointer_del(dict, item); + + // mark it in our dictionary as deleted too, + // this is safe to be done here, because we have got + // a reference counter on item + dict_item_set_deleted(dict, item); + + // decrement the refcount we incremented above + if (__atomic_sub_fetch(&item->refcount, 1, __ATOMIC_RELEASE) == 0) { + // this is a deleted item, and we are the last one + DICTIONARY_PENDING_DELETES_PLUS1(dict); + } + + // do not touch the item below this point + } else { + // this is traversal / walkthrough + // decrement the refcount we incremented above + __atomic_sub_fetch(&item->refcount, 1, __ATOMIC_RELEASE); + } + + return RC_ITEM_MARKED_FOR_DELETION; + } + + if(desired == 1) + DICTIONARY_REFERENCED_ITEMS_PLUS1(dict); + } + + if(unlikely(spins > 1)) + DICTIONARY_STATS_CHECK_SPINS_PLUS(dict, spins - 1); + + return ret; +} + +// if a dictionary item can be deleted, return true, otherwise return false +// we use the private reference counter +static inline int item_is_not_referenced_and_can_be_removed_advanced(DICTIONARY *dict, DICTIONARY_ITEM *item) { + // if we can set refcount to REFCOUNT_DELETING, we can delete this item + + size_t spins = 0; + REFCOUNT refcount, desired = REFCOUNT_DELETING; + + int ret = RC_ITEM_OK; + + refcount = DICTIONARY_ITEM_REFCOUNT_GET(dict, item); + + do { + spins++; + + if(refcount < 0) { + // we can't use this item + ret = RC_ITEM_IS_CURRENTLY_BEING_DELETED; + break; + } + + if(refcount > 0) { + // we can't delete this + ret = RC_ITEM_IS_REFERENCED; + break; + } + + if(item_flag_check(item, ITEM_FLAG_BEING_CREATED)) { + // we can't use this item + ret = RC_ITEM_IS_CURRENTLY_BEING_CREATED; + break; + } + } while(!__atomic_compare_exchange_n(&item->refcount, &refcount, desired, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)); + +#ifdef NETDATA_INTERNAL_CHECKS + if(ret == RC_ITEM_OK) + item->deleter_pid = gettid(); +#endif + + if(unlikely(spins > 1)) + DICTIONARY_STATS_DELETE_SPINS_PLUS(dict, spins - 1); + + return ret; +} + +// if a dictionary item can be freed, return true, otherwise return false +// we use the shared reference counter +static inline bool item_shared_release_and_check_if_it_can_be_freed(DICTIONARY *dict __maybe_unused, DICTIONARY_ITEM *item) { + // if we can set refcount to REFCOUNT_DELETING, we can delete this item + + REFCOUNT links = __atomic_sub_fetch(&item->shared->links, 1, __ATOMIC_RELEASE); + if(links == 0 && __atomic_compare_exchange_n(&item->shared->links, &links, REFCOUNT_DELETING, false, __ATOMIC_ACQUIRE, __ATOMIC_RELAXED)) { + + // we can delete it + return true; + } + + // we can't delete it + return false; +} + +#endif //NETDATA_DICTIONARY_REFCOUNT_H diff --git a/src/libnetdata/dictionary/dictionary-statistics.h b/src/libnetdata/dictionary/dictionary-statistics.h new file mode 100644 index 000000000..20eb81599 --- /dev/null +++ b/src/libnetdata/dictionary/dictionary-statistics.h @@ -0,0 +1,246 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_DICTIONARY_STATISTICS_H +#define NETDATA_DICTIONARY_STATISTICS_H + +#include "dictionary-internals.h" + +// ---------------------------------------------------------------------------- +// memory statistics + +#ifdef DICT_WITH_STATS +static inline void DICTIONARY_STATS_PLUS_MEMORY(DICTIONARY *dict, size_t key_size, size_t item_size, size_t value_size) { + if(key_size) + __atomic_fetch_add(&dict->stats->memory.index, (long)JUDYHS_INDEX_SIZE_ESTIMATE(key_size), __ATOMIC_RELAXED); + + if(item_size) + __atomic_fetch_add(&dict->stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); + + if(value_size) + __atomic_fetch_add(&dict->stats->memory.values, (long)value_size, __ATOMIC_RELAXED); +} + +static inline void DICTIONARY_STATS_MINUS_MEMORY(DICTIONARY *dict, size_t key_size, size_t item_size, size_t value_size) { + if(key_size) + __atomic_fetch_sub(&dict->stats->memory.index, (long)JUDYHS_INDEX_SIZE_ESTIMATE(key_size), __ATOMIC_RELAXED); + + if(item_size) + __atomic_fetch_sub(&dict->stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); + + if(value_size) + __atomic_fetch_sub(&dict->stats->memory.values, (long)value_size, __ATOMIC_RELAXED); +} +#else +#define DICTIONARY_STATS_PLUS_MEMORY(dict, key_size, item_size, value_size) do {(void)item_size;} while(0) +#define DICTIONARY_STATS_MINUS_MEMORY(dict, key_size, item_size, value_size) do {;} while(0) +#endif + +// ---------------------------------------------------------------------------- +// internal statistics API + +#ifdef DICT_WITH_STATS +static inline void DICTIONARY_STATS_SEARCHES_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.searches, 1, __ATOMIC_RELAXED); +} +#else +#define DICTIONARY_STATS_SEARCHES_PLUS1(dict) do {;} while(0) +#endif + +static inline void DICTIONARY_ENTRIES_PLUS1(DICTIONARY *dict) { +#ifdef DICT_WITH_STATS + // statistics + __atomic_fetch_add(&dict->stats->items.entries, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->stats->items.referenced, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->stats->ops.inserts, 1, __ATOMIC_RELAXED); +#endif + + if(unlikely(is_dictionary_single_threaded(dict))) { + dict->version++; + dict->entries++; + dict->referenced_items++; + + } + else { + __atomic_fetch_add(&dict->version, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->entries, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->referenced_items, 1, __ATOMIC_RELAXED); + } +} + +static inline void DICTIONARY_ENTRIES_MINUS1(DICTIONARY *dict) { +#ifdef DICT_WITH_STATS + // statistics + __atomic_fetch_add(&dict->stats->ops.deletes, 1, __ATOMIC_RELAXED); + __atomic_fetch_sub(&dict->stats->items.entries, 1, __ATOMIC_RELAXED); +#endif + + size_t entries; (void)entries; + if(unlikely(is_dictionary_single_threaded(dict))) { + dict->version++; + entries = dict->entries--; + } + else { + __atomic_fetch_add(&dict->version, 1, __ATOMIC_RELAXED); + entries = __atomic_fetch_sub(&dict->entries, 1, __ATOMIC_RELAXED); + } + + internal_fatal(entries == 0, + "DICT: negative number of entries in dictionary created from %s() (%zu@%s)", + dict->creation_function, + dict->creation_line, + dict->creation_file); +} + +static inline void DICTIONARY_VALUE_RESETS_PLUS1(DICTIONARY *dict) { +#ifdef DICT_WITH_STATS + __atomic_fetch_add(&dict->stats->ops.resets, 1, __ATOMIC_RELAXED); +#endif + + if(unlikely(is_dictionary_single_threaded(dict))) + dict->version++; + else + __atomic_fetch_add(&dict->version, 1, __ATOMIC_RELAXED); +} + +#ifdef DICT_WITH_STATS +static inline void DICTIONARY_STATS_TRAVERSALS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.traversals, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_WALKTHROUGHS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.walkthroughs, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_CHECK_SPINS_PLUS(DICTIONARY *dict, size_t count) { + __atomic_fetch_add(&dict->stats->spin_locks.use_spins, count, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_INSERT_SPINS_PLUS(DICTIONARY *dict, size_t count) { + __atomic_fetch_add(&dict->stats->spin_locks.insert_spins, count, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DELETE_SPINS_PLUS(DICTIONARY *dict, size_t count) { + __atomic_fetch_add(&dict->stats->spin_locks.delete_spins, count, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_SEARCH_IGNORES_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->spin_locks.search_spins, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_CALLBACK_INSERTS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->callbacks.inserts, 1, __ATOMIC_RELEASE); +} +static inline void DICTIONARY_STATS_CALLBACK_CONFLICTS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->callbacks.conflicts, 1, __ATOMIC_RELEASE); +} +static inline void DICTIONARY_STATS_CALLBACK_REACTS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->callbacks.reacts, 1, __ATOMIC_RELEASE); +} +static inline void DICTIONARY_STATS_CALLBACK_DELETES_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->callbacks.deletes, 1, __ATOMIC_RELEASE); +} +static inline void DICTIONARY_STATS_GARBAGE_COLLECTIONS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.garbage_collections, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_CREATIONS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->dictionaries.active, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->stats->ops.creations, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_DESTRUCTIONS_PLUS1(DICTIONARY *dict) { + __atomic_fetch_sub(&dict->stats->dictionaries.active, 1, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->stats->ops.destructions, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_DESTROY_QUEUED_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->dictionaries.deleted, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_DESTROY_QUEUED_MINUS1(DICTIONARY *dict) { + __atomic_fetch_sub(&dict->stats->dictionaries.deleted, 1, __ATOMIC_RELAXED); +} +static inline void DICTIONARY_STATS_DICT_FLUSHES_PLUS1(DICTIONARY *dict) { + __atomic_fetch_add(&dict->stats->ops.flushes, 1, __ATOMIC_RELAXED); +} +#else +#define DICTIONARY_STATS_TRAVERSALS_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_WALKTHROUGHS_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_CHECK_SPINS_PLUS(dict, count) do {;} while(0) +#define DICTIONARY_STATS_INSERT_SPINS_PLUS(dict, count) do {;} while(0) +#define DICTIONARY_STATS_DELETE_SPINS_PLUS(dict, count) do {;} while(0) +#define DICTIONARY_STATS_SEARCH_IGNORES_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_CALLBACK_INSERTS_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_CALLBACK_CONFLICTS_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_CALLBACK_REACTS_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_CALLBACK_DELETES_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_GARBAGE_COLLECTIONS_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_DICT_CREATIONS_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_DICT_DESTRUCTIONS_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_DICT_DESTROY_QUEUED_PLUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_DICT_DESTROY_QUEUED_MINUS1(dict) do {;} while(0) +#define DICTIONARY_STATS_DICT_FLUSHES_PLUS1(dict) do {;} while(0) +#endif + +static inline void DICTIONARY_REFERENCED_ITEMS_PLUS1(DICTIONARY *dict) { +#ifdef DICT_WITH_STATS + __atomic_fetch_add(&dict->stats->items.referenced, 1, __ATOMIC_RELAXED); +#endif + + if(unlikely(is_dictionary_single_threaded(dict))) + ++dict->referenced_items; + else + __atomic_add_fetch(&dict->referenced_items, 1, __ATOMIC_RELAXED); +} + +static inline void DICTIONARY_REFERENCED_ITEMS_MINUS1(DICTIONARY *dict) { +#ifdef DICT_WITH_STATS + __atomic_fetch_sub(&dict->stats->items.referenced, 1, __ATOMIC_RELAXED); +#endif + + long int referenced_items; (void)referenced_items; + if(unlikely(is_dictionary_single_threaded(dict))) + referenced_items = --dict->referenced_items; + else + referenced_items = __atomic_sub_fetch(&dict->referenced_items, 1, __ATOMIC_SEQ_CST); + + internal_fatal(referenced_items < 0, + "DICT: negative number of referenced items (%ld) in dictionary created from %s() (%zu@%s)", + referenced_items, + dict->creation_function, + dict->creation_line, + dict->creation_file); +} + +static inline void DICTIONARY_PENDING_DELETES_PLUS1(DICTIONARY *dict) { +#ifdef DICT_WITH_STATS + __atomic_fetch_add(&dict->stats->items.pending_deletion, 1, __ATOMIC_RELAXED); +#endif + + if(unlikely(is_dictionary_single_threaded(dict))) + ++dict->pending_deletion_items; + else + __atomic_add_fetch(&dict->pending_deletion_items, 1, __ATOMIC_RELEASE); +} + +static inline long int DICTIONARY_PENDING_DELETES_MINUS1(DICTIONARY *dict) { +#ifdef DICT_WITH_STATS + __atomic_fetch_sub(&dict->stats->items.pending_deletion, 1, __ATOMIC_RELEASE); +#endif + + if(unlikely(is_dictionary_single_threaded(dict))) + return --dict->pending_deletion_items; + else + return __atomic_sub_fetch(&dict->pending_deletion_items, 1, __ATOMIC_ACQUIRE); +} + +static inline long int DICTIONARY_PENDING_DELETES_GET(DICTIONARY *dict) { + if(unlikely(is_dictionary_single_threaded(dict))) + return dict->pending_deletion_items; + else + return __atomic_load_n(&dict->pending_deletion_items, __ATOMIC_SEQ_CST); +} + +static inline REFCOUNT DICTIONARY_ITEM_REFCOUNT_GET(DICTIONARY *dict, DICTIONARY_ITEM *item) { + if(unlikely(dict && is_dictionary_single_threaded(dict))) // this is an exception, dict can be null + return item->refcount; + else + return (REFCOUNT)__atomic_load_n(&item->refcount, __ATOMIC_ACQUIRE); +} + +static inline REFCOUNT DICTIONARY_ITEM_REFCOUNT_GET_SOLE(DICTIONARY_ITEM *item) { + return (REFCOUNT)__atomic_load_n(&item->refcount, __ATOMIC_ACQUIRE); +} + + +#endif //NETDATA_DICTIONARY_STATISTICS_H diff --git a/src/libnetdata/dictionary/dictionary-traversal.c b/src/libnetdata/dictionary/dictionary-traversal.c new file mode 100644 index 000000000..1e55dcbb7 --- /dev/null +++ b/src/libnetdata/dictionary/dictionary-traversal.c @@ -0,0 +1,268 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dictionary-internals.h" + + +// ---------------------------------------------------------------------------- +// traversal with loop + +void *dictionary_foreach_start_rw(DICTFE *dfe, DICTIONARY *dict, char rw) { + if(unlikely(!dfe || !dict)) return NULL; + + DICTIONARY_STATS_TRAVERSALS_PLUS1(dict); + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_foreach_start_rw() on a destroyed dictionary"); + dfe->counter = 0; + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + return NULL; + } + + dfe->counter = 0; + dfe->dict = dict; + dfe->rw = rw; + dfe->locked = true; + ll_recursive_lock(dict, dfe->rw); + + // get the first item from the list + DICTIONARY_ITEM *item = dict->items.list; + + // skip all the deleted items + while(item && !item_check_and_acquire(dict, item)) + item = item->next; + + if(likely(item)) { + dfe->item = item; + dfe->name = (char *)item_get_name(item); + dfe->value = item->shared->value; + } + else { + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + } + + if(unlikely(dfe->rw == DICTIONARY_LOCK_REENTRANT)) { + ll_recursive_unlock(dfe->dict, dfe->rw); + dfe->locked = false; + } + + return dfe->value; +} + +void *dictionary_foreach_next(DICTFE *dfe) { + if(unlikely(!dfe || !dfe->dict)) return NULL; + + if(unlikely(is_dictionary_destroyed(dfe->dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_foreach_next() on a destroyed dictionary"); + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + return NULL; + } + + if(unlikely(dfe->rw == DICTIONARY_LOCK_REENTRANT) || !dfe->locked) { + ll_recursive_lock(dfe->dict, dfe->rw); + dfe->locked = true; + } + + // the item we just did + DICTIONARY_ITEM *item = dfe->item; + + // get the next item from the list + DICTIONARY_ITEM *item_next = (item) ? item->next : NULL; + + // skip all the deleted items until one that can be acquired is found + while(item_next && !item_check_and_acquire(dfe->dict, item_next)) + item_next = item_next->next; + + if(likely(item)) { + dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(dfe->dict, item, dfe->rw); + // item_release(dfe->dict, item); + } + + item = item_next; + if(likely(item)) { + dfe->item = item; + dfe->name = (char *)item_get_name(item); + dfe->value = item->shared->value; + dfe->counter++; + } + else { + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + } + + if(unlikely(dfe->rw == DICTIONARY_LOCK_REENTRANT)) { + ll_recursive_unlock(dfe->dict, dfe->rw); + dfe->locked = false; + } + + return dfe->value; +} + +void dictionary_foreach_unlock(DICTFE *dfe) { + if(dfe->locked) { + ll_recursive_unlock(dfe->dict, dfe->rw); + dfe->locked = false; + } +} + +void dictionary_foreach_done(DICTFE *dfe) { + if(unlikely(!dfe || !dfe->dict)) return; + + if(unlikely(is_dictionary_destroyed(dfe->dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_foreach_next() on a destroyed dictionary"); + return; + } + + // the item we just did + DICTIONARY_ITEM *item = dfe->item; + + // release it, so that it can possibly be deleted + if(likely(item)) { + dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(dfe->dict, item, dfe->rw); + // item_release(dfe->dict, item); + } + + if(likely(dfe->rw != DICTIONARY_LOCK_REENTRANT) && dfe->locked) { + ll_recursive_unlock(dfe->dict, dfe->rw); + dfe->locked = false; + } + + dfe->dict = NULL; + dfe->item = NULL; + dfe->name = NULL; + dfe->value = NULL; + dfe->counter = 0; +} + +// ---------------------------------------------------------------------------- +// API - walk through the dictionary. +// The dictionary is locked for reading while this happens +// do not use other dictionary calls while walking the dictionary - deadlock! + +int dictionary_walkthrough_rw(DICTIONARY *dict, char rw, dict_walkthrough_callback_t walkthrough_callback, void *data) { + if(unlikely(!dict || !walkthrough_callback)) return 0; + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_walkthrough_rw() on a destroyed dictionary"); + return 0; + } + + ll_recursive_lock(dict, rw); + + DICTIONARY_STATS_WALKTHROUGHS_PLUS1(dict); + + // written in such a way, that the callback can delete the active element + + int ret = 0; + DICTIONARY_ITEM *item = dict->items.list, *item_next; + while(item) { + + // skip the deleted items + if(unlikely(!item_check_and_acquire(dict, item))) { + item = item->next; + continue; + } + + if(unlikely(rw == DICTIONARY_LOCK_REENTRANT)) + ll_recursive_unlock(dict, rw); + + int r = walkthrough_callback(item, item->shared->value, data); + + if(unlikely(rw == DICTIONARY_LOCK_REENTRANT)) + ll_recursive_lock(dict, rw); + + // since we have a reference counter, this item cannot be deleted + // until we release the reference counter, so the pointers are there + item_next = item->next; + + dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(dict, item, rw); + // item_release(dict, item); + + if(unlikely(r < 0)) { + ret = r; + break; + } + + ret += r; + + item = item_next; + } + + ll_recursive_unlock(dict, rw); + + return ret; +} + +// ---------------------------------------------------------------------------- +// sorted walkthrough + +typedef int (*qsort_compar)(const void *item1, const void *item2); + +static int dictionary_sort_compar(const void *item1, const void *item2) { + return strcmp(item_get_name((*(DICTIONARY_ITEM **)item1)), item_get_name((*(DICTIONARY_ITEM **)item2))); +} + +int dictionary_sorted_walkthrough_rw(DICTIONARY *dict, char rw, dict_walkthrough_callback_t walkthrough_callback, void *data, dict_item_comparator_t item_comparator) { + if(unlikely(!dict || !walkthrough_callback)) return 0; + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to dictionary_sorted_walkthrough_rw() on a destroyed dictionary"); + return 0; + } + + DICTIONARY_STATS_WALKTHROUGHS_PLUS1(dict); + + ll_recursive_lock(dict, rw); + size_t entries = __atomic_load_n(&dict->entries, __ATOMIC_RELAXED); + DICTIONARY_ITEM **array = mallocz(sizeof(DICTIONARY_ITEM *) * entries); + + size_t i; + DICTIONARY_ITEM *item; + for(item = dict->items.list, i = 0; item && i < entries; item = item->next) { + if(likely(item_check_and_acquire(dict, item))) + array[i++] = item; + } + ll_recursive_unlock(dict, rw); + + if(unlikely(i != entries)) + entries = i; + + if(item_comparator) + qsort(array, entries, sizeof(DICTIONARY_ITEM *), (qsort_compar) item_comparator); + else + qsort(array, entries, sizeof(DICTIONARY_ITEM *), dictionary_sort_compar); + + bool callit = true; + int ret = 0, r; + for(i = 0; i < entries ;i++) { + item = array[i]; + + if(callit) + r = walkthrough_callback(item, item->shared->value, data); + + dict_item_release_and_check_if_it_is_deleted_and_can_be_removed_under_this_lock_mode(dict, item, rw); + // item_release(dict, item); + + if(r < 0) { + ret = r; + r = 0; + + // stop calling the callback, + // but we have to continue, to release all the reference counters + callit = false; + } + else + ret += r; + } + + freez(array); + + return ret; +} + diff --git a/src/libnetdata/dictionary/dictionary-unittest.c b/src/libnetdata/dictionary/dictionary-unittest.c new file mode 100644 index 000000000..6db427690 --- /dev/null +++ b/src/libnetdata/dictionary/dictionary-unittest.c @@ -0,0 +1,1195 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dictionary-internals.h" + +// ---------------------------------------------------------------------------- +// unit test + +static void dictionary_unittest_free_char_pp(char **pp, size_t entries) { + for(size_t i = 0; i < entries ;i++) + freez(pp[i]); + + freez(pp); +} + +static char **dictionary_unittest_generate_names(size_t entries) { + char **names = mallocz(sizeof(char *) * entries); + for(size_t i = 0; i < entries ;i++) { + char buf[25 + 1] = ""; + snprintfz(buf, sizeof(buf), "name.%zu.0123456789.%zu!@#$%%^&*(),./[]{}\\|~`", i, entries / 2 + i); + names[i] = strdupz(buf); + } + return names; +} + +static char **dictionary_unittest_generate_values(size_t entries) { + char **values = mallocz(sizeof(char *) * entries); + for(size_t i = 0; i < entries ;i++) { + char buf[25 + 1] = ""; + snprintfz(buf, sizeof(buf), "value-%zu-0987654321.%zu%%^&*(),. \t !@#$/[]{}\\|~`", i, entries / 2 + i); + values[i] = strdupz(buf); + } + return values; +} + +static size_t dictionary_unittest_set_clone(DICTIONARY *dict, char **names, char **values, size_t entries) { + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(values[i]); + char *val = (char *)dictionary_set(dict, names[i], values[i], vallen); + if(val == values[i]) { fprintf(stderr, ">>> %s() returns reference to value\n", __FUNCTION__); errors++; } + if(!val || memcmp(val, values[i], vallen) != 0) { fprintf(stderr, ">>> %s() returns invalid value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_set_null(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)values; + size_t errors = 0; + size_t i = 0; + for(; i < entries ;i++) { + void *val = dictionary_set(dict, names[i], NULL, 0); + if(val != NULL) { fprintf(stderr, ">>> %s() returns a non NULL value\n", __FUNCTION__); errors++; } + } + if(dictionary_entries(dict) != i) { + fprintf(stderr, ">>> %s() dictionary items do not match\n", __FUNCTION__); + errors++; + } + return errors; +} + + +static size_t dictionary_unittest_set_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries) { + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(values[i]); + char *val = (char *)dictionary_set(dict, names[i], values[i], vallen); + if(val != values[i]) { fprintf(stderr, ">>> %s() returns invalid pointer to value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_get_clone(DICTIONARY *dict, char **names, char **values, size_t entries) { + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(values[i]); + char *val = (char *)dictionary_get(dict, names[i]); + if(val == values[i]) { fprintf(stderr, ">>> %s() returns reference to value\n", __FUNCTION__); errors++; } + if(!val || memcmp(val, values[i], vallen) != 0) { fprintf(stderr, ">>> %s() returns invalid value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_get_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries) { + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + char *val = (char *)dictionary_get(dict, names[i]); + if(val != values[i]) { fprintf(stderr, ">>> %s() returns invalid pointer to value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_get_nonexisting(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + char *val = (char *)dictionary_get(dict, values[i]); + if(val) { fprintf(stderr, ">>> %s() returns non-existing item\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_del_nonexisting(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + bool ret = dictionary_del(dict, values[i]); + if(ret) { fprintf(stderr, ">>> %s() deleted non-existing item\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_del_existing(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)values; + size_t errors = 0; + + size_t forward_from = 0, forward_to = entries / 3; + size_t middle_from = forward_to, middle_to = entries * 2 / 3; + size_t backward_from = middle_to, backward_to = entries; + + for(size_t i = forward_from; i < forward_to ;i++) { + bool ret = dictionary_del(dict, names[i]); + if(!ret) { fprintf(stderr, ">>> %s() didn't delete (forward) existing item\n", __FUNCTION__); errors++; } + } + + for(size_t i = middle_to - 1; i >= middle_from ;i--) { + bool ret = dictionary_del(dict, names[i]); + if(!ret) { fprintf(stderr, ">>> %s() didn't delete (middle) existing item\n", __FUNCTION__); errors++; } + } + + for(size_t i = backward_to - 1; i >= backward_from ;i--) { + bool ret = dictionary_del(dict, names[i]); + if(!ret) { fprintf(stderr, ">>> %s() didn't delete (backward) existing item\n", __FUNCTION__); errors++; } + } + + return errors; +} + +static size_t dictionary_unittest_reset_clone(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)values; + // set the name as value too + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(names[i]); + char *val = (char *)dictionary_set(dict, names[i], names[i], vallen); + if(val == names[i]) { fprintf(stderr, ">>> %s() returns reference to value\n", __FUNCTION__); errors++; } + if(!val || memcmp(val, names[i], vallen) != 0) { fprintf(stderr, ">>> %s() returns invalid value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_reset_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)values; + // set the name as value too + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(names[i]); + char *val = (char *)dictionary_set(dict, names[i], names[i], vallen); + if(val != names[i]) { fprintf(stderr, ">>> %s() returns invalid pointer to value\n", __FUNCTION__); errors++; } + if(!val) { fprintf(stderr, ">>> %s() returns invalid value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static size_t dictionary_unittest_reset_dont_overwrite_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries) { + // set the name as value too + size_t errors = 0; + for(size_t i = 0; i < entries ;i++) { + size_t vallen = strlen(names[i]); + char *val = (char *)dictionary_set(dict, names[i], names[i], vallen); + if(val != values[i]) { fprintf(stderr, ">>> %s() returns invalid pointer to value\n", __FUNCTION__); errors++; } + } + return errors; +} + +static int dictionary_unittest_walkthrough_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value __maybe_unused, void *data __maybe_unused) { + return 1; +} + +static size_t dictionary_unittest_walkthrough(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + int sum = dictionary_walkthrough_read(dict, dictionary_unittest_walkthrough_callback, NULL); + if(sum < (int)entries) return entries - sum; + else return sum - entries; +} + +static int dictionary_unittest_walkthrough_delete_this_callback(const DICTIONARY_ITEM *item, void *value __maybe_unused, void *data) { + const char *name = dictionary_acquired_item_name((DICTIONARY_ITEM *)item); + + if(!dictionary_del((DICTIONARY *)data, name)) + return 0; + + return 1; +} + +static size_t dictionary_unittest_walkthrough_delete_this(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + int sum = dictionary_walkthrough_write(dict, dictionary_unittest_walkthrough_delete_this_callback, dict); + if(sum < (int)entries) return entries - sum; + else return sum - entries; +} + +static int dictionary_unittest_walkthrough_stop_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value __maybe_unused, void *data __maybe_unused) { + return -1; +} + +static size_t dictionary_unittest_walkthrough_stop(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + (void)entries; + int sum = dictionary_walkthrough_read(dict, dictionary_unittest_walkthrough_stop_callback, NULL); + if(sum != -1) return 1; + return 0; +} + +static size_t dictionary_unittest_foreach(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + (void)entries; + size_t count = 0; + char *item; + dfe_start_read(dict, item) + count++; + dfe_done(item); + + if(count > entries) return count - entries; + return entries - count; +} + +static size_t dictionary_unittest_foreach_delete_this(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + (void)entries; + size_t count = 0; + char *item; + dfe_start_write(dict, item) + if(dictionary_del(dict, item_dfe.name)) count++; + dfe_done(item); + + if(count > entries) return count - entries; + return entries - count; +} + +static size_t dictionary_unittest_destroy(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + (void)entries; + size_t bytes = dictionary_destroy(dict); + fprintf(stderr, " %s() freed %zu bytes,", __FUNCTION__, bytes); + return 0; +} + +static usec_t dictionary_unittest_run_and_measure_time(DICTIONARY *dict, char *message, char **names, char **values, size_t entries, size_t *errors, size_t (*callback)(DICTIONARY *dict, char **names, char **values, size_t entries)) { + fprintf(stderr, "%40s ... ", message); + + usec_t started = now_realtime_usec(); + size_t errs = callback(dict, names, values, entries); + usec_t ended = now_realtime_usec(); + usec_t dt = ended - started; + + if(callback == dictionary_unittest_destroy) dict = NULL; + + long int found_ok = 0, found_deleted = 0, found_referenced = 0; + if(dict) { + DICTIONARY_ITEM *item; + DOUBLE_LINKED_LIST_FOREACH_FORWARD(dict->items.list, item, prev, next) { + if(item->refcount >= 0 && !(item ->flags & ITEM_FLAG_DELETED)) + found_ok++; + else + found_deleted++; + + if(item->refcount > 0) + found_referenced++; + } + } + + fprintf(stderr, " %zu errors, %d (found %ld) items in dictionary, %d (found %ld) referenced, %d (found %ld) deleted, %"PRIu64" usec \n", + errs, dict?dict->entries:0, found_ok, dict?dict->referenced_items:0, found_referenced, dict?dict->pending_deletion_items:0, found_deleted, dt); + *errors += errs; + return dt; +} + +static void dictionary_unittest_clone(DICTIONARY *dict, char **names, char **values, size_t entries, size_t *errors) { + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, errors, dictionary_unittest_set_clone); + dictionary_unittest_run_and_measure_time(dict, "getting entries", names, values, entries, errors, dictionary_unittest_get_clone); + dictionary_unittest_run_and_measure_time(dict, "getting non-existing entries", names, values, entries, errors, dictionary_unittest_get_nonexisting); + dictionary_unittest_run_and_measure_time(dict, "resetting entries", names, values, entries, errors, dictionary_unittest_reset_clone); + dictionary_unittest_run_and_measure_time(dict, "deleting non-existing entries", names, values, entries, errors, dictionary_unittest_del_nonexisting); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop", names, values, entries, errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback", names, values, entries, errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback stop", names, values, entries, errors, dictionary_unittest_walkthrough_stop); + dictionary_unittest_run_and_measure_time(dict, "deleting existing entries", names, values, entries, errors, dictionary_unittest_del_existing); + dictionary_unittest_run_and_measure_time(dict, "walking through empty", names, values, 0, errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach empty", names, values, 0, errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "destroying empty dictionary", names, values, entries, errors, dictionary_unittest_destroy); +} + +static void dictionary_unittest_nonclone(DICTIONARY *dict, char **names, char **values, size_t entries, size_t *errors) { + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, errors, dictionary_unittest_set_nonclone); + dictionary_unittest_run_and_measure_time(dict, "getting entries", names, values, entries, errors, dictionary_unittest_get_nonclone); + dictionary_unittest_run_and_measure_time(dict, "getting non-existing entries", names, values, entries, errors, dictionary_unittest_get_nonexisting); + dictionary_unittest_run_and_measure_time(dict, "resetting entries", names, values, entries, errors, dictionary_unittest_reset_nonclone); + dictionary_unittest_run_and_measure_time(dict, "deleting non-existing entries", names, values, entries, errors, dictionary_unittest_del_nonexisting); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop", names, values, entries, errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback", names, values, entries, errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback stop", names, values, entries, errors, dictionary_unittest_walkthrough_stop); + dictionary_unittest_run_and_measure_time(dict, "deleting existing entries", names, values, entries, errors, dictionary_unittest_del_existing); + dictionary_unittest_run_and_measure_time(dict, "walking through empty", names, values, 0, errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach empty", names, values, 0, errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "destroying empty dictionary", names, values, entries, errors, dictionary_unittest_destroy); +} + +struct dictionary_unittest_sorting { + const char *old_name; + const char *old_value; + size_t count; +}; + +static int dictionary_unittest_sorting_callback(const DICTIONARY_ITEM *item, void *value, void *data) { + const char *name = dictionary_acquired_item_name((DICTIONARY_ITEM *)item); + struct dictionary_unittest_sorting *t = (struct dictionary_unittest_sorting *)data; + const char *v = (const char *)value; + + int ret = 0; + if(t->old_name && strcmp(t->old_name, name) > 0) { + fprintf(stderr, "name '%s' should be after '%s'\n", t->old_name, name); + ret = 1; + } + t->count++; + t->old_name = name; + t->old_value = v; + + return ret; +} + +static size_t dictionary_unittest_sorted_walkthrough(DICTIONARY *dict, char **names, char **values, size_t entries) { + (void)names; + (void)values; + struct dictionary_unittest_sorting tmp = { .old_name = NULL, .old_value = NULL, .count = 0 }; + size_t errors; + errors = dictionary_sorted_walkthrough_read(dict, dictionary_unittest_sorting_callback, &tmp); + + if(tmp.count != entries) { + fprintf(stderr, "Expected %zu entries, counted %zu\n", entries, tmp.count); + errors++; + } + return errors; +} + +static void dictionary_unittest_sorting(DICTIONARY *dict, char **names, char **values, size_t entries, size_t *errors) { + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, errors, dictionary_unittest_set_clone); + dictionary_unittest_run_and_measure_time(dict, "sorted walkthrough", names, values, entries, errors, dictionary_unittest_sorted_walkthrough); +} + +static void dictionary_unittest_null_dfe(DICTIONARY *dict, char **names, char **values, size_t entries, size_t *errors) { + dictionary_unittest_run_and_measure_time(dict, "adding null value entries", names, values, entries, errors, dictionary_unittest_set_null); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop", names, values, entries, errors, dictionary_unittest_foreach); +} + + +static int unittest_check_dictionary_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value __maybe_unused, void *data __maybe_unused) { + return 1; +} + +static size_t unittest_check_dictionary(const char *label, DICTIONARY *dict, size_t traversable, size_t active_items, size_t deleted_items, size_t referenced_items, size_t pending_deletion) { + size_t errors = 0; + + size_t ll = 0; + void *t; + dfe_start_read(dict, t) + ll++; + dfe_done(t); + + fprintf(stderr, "DICT %-20s: dictionary foreach entries %zu, expected %zu...\t\t\t\t\t", + label, ll, traversable); + if(ll != traversable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + ll = dictionary_walkthrough_read(dict, unittest_check_dictionary_callback, NULL); + fprintf(stderr, "DICT %-20s: dictionary walkthrough entries %zu, expected %zu...\t\t\t\t", + label, ll, traversable); + if(ll != traversable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + ll = dictionary_sorted_walkthrough_read(dict, unittest_check_dictionary_callback, NULL); + fprintf(stderr, "DICT %-20s: dictionary sorted walkthrough entries %zu, expected %zu...\t\t\t", + label, ll, traversable); + if(ll != traversable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + DICTIONARY_ITEM *item; + size_t active = 0, deleted = 0, referenced = 0, pending = 0; + for(item = dict->items.list; item; item = item->next) { + if(!(item->flags & ITEM_FLAG_DELETED) && !(item->shared->flags & ITEM_FLAG_DELETED)) + active++; + else { + deleted++; + + if(item->refcount == 0) + pending++; + } + + if(item->refcount > 0) + referenced++; + } + + fprintf(stderr, "DICT %-20s: dictionary active items reported %d, counted %zu, expected %zu...\t\t\t", + label, dict->entries, active, active_items); + if(active != active_items || active != (size_t)dict->entries) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "DICT %-20s: dictionary deleted items counted %zu, expected %zu...\t\t\t\t", + label, deleted, deleted_items); + if(deleted != deleted_items) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "DICT %-20s: dictionary referenced items reported %d, counted %zu, expected %zu...\t\t", + label, dict->referenced_items, referenced, referenced_items); + if(referenced != referenced_items || dict->referenced_items != (long int)referenced) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "DICT %-20s: dictionary pending deletion items reported %d, counted %zu, expected %zu...\t", + label, dict->pending_deletion_items, pending, pending_deletion); + if(pending != pending_deletion || pending != (size_t)dict->pending_deletion_items) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + return errors; +} + +static int check_item_callback(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data) { + return value == data; +} + +static size_t unittest_check_item(const char *label, DICTIONARY *dict, + DICTIONARY_ITEM *item, const char *name, const char *value, int refcount, + ITEM_FLAGS deleted_flags, bool searchable, bool browsable, bool linked) { + size_t errors = 0; + + fprintf(stderr, "ITEM %-20s: name is '%s', expected '%s'...\t\t\t\t\t\t", label, item_get_name(item), name); + if(strcmp(item_get_name(item), name) != 0) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "ITEM %-20s: value is '%s', expected '%s'...\t\t\t\t\t", label, (const char *)item->shared->value, value); + if(strcmp((const char *)item->shared->value, value) != 0) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "ITEM %-20s: refcount is %d, expected %d...\t\t\t\t\t\t\t", label, item->refcount, refcount); + if (item->refcount != refcount) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + fprintf(stderr, "ITEM %-20s: deleted flag is %s, expected %s...\t\t\t\t\t", label, + (item->flags & ITEM_FLAG_DELETED || item->shared->flags & ITEM_FLAG_DELETED)?"true":"false", + (deleted_flags & ITEM_FLAG_DELETED)?"true":"false"); + + if ((item->flags & ITEM_FLAG_DELETED || item->shared->flags & ITEM_FLAG_DELETED) != (deleted_flags & ITEM_FLAG_DELETED)) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + void *v = dictionary_get(dict, name); + bool found = v == item->shared->value; + fprintf(stderr, "ITEM %-20s: searchable %5s, expected %5s...\t\t\t\t\t\t", label, + found?"true":"false", searchable?"true":"false"); + if(found != searchable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + found = false; + void *t; + dfe_start_read(dict, t) { + if(t == item->shared->value) found = true; + } + dfe_done(t); + + fprintf(stderr, "ITEM %-20s: dfe browsable %5s, expected %5s...\t\t\t\t\t", label, + found?"true":"false", browsable?"true":"false"); + if(found != browsable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + found = dictionary_walkthrough_read(dict, check_item_callback, item->shared->value); + fprintf(stderr, "ITEM %-20s: walkthrough browsable %5s, expected %5s...\t\t\t\t", label, + found?"true":"false", browsable?"true":"false"); + if(found != browsable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + found = dictionary_sorted_walkthrough_read(dict, check_item_callback, item->shared->value); + fprintf(stderr, "ITEM %-20s: sorted walkthrough browsable %5s, expected %5s...\t\t\t", label, + found?"true":"false", browsable?"true":"false"); + if(found != browsable) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + found = false; + DICTIONARY_ITEM *n; + for(n = dict->items.list; n ;n = n->next) + if(n == item) found = true; + + fprintf(stderr, "ITEM %-20s: linked %5s, expected %5s...\t\t\t\t\t\t", label, + found?"true":"false", linked?"true":"false"); + if(found != linked) { + fprintf(stderr, "FAILED\n"); + errors++; + } + else + fprintf(stderr, "OK\n"); + + return errors; +} + +struct thread_unittest { + int join; + DICTIONARY *dict; + int dups; + + netdata_thread_t thread; + struct dictionary_stats stats; +}; + +static void *unittest_dict_thread(void *arg) { + struct thread_unittest *tu = arg; + for(; 1 ;) { + if(__atomic_load_n(&tu->join, __ATOMIC_RELAXED)) + break; + + DICT_ITEM_CONST DICTIONARY_ITEM *item = + dictionary_set_and_acquire_item_advanced(tu->dict, "dict thread checking 1234567890", + -1, NULL, 0, NULL); + tu->stats.ops.inserts++; + + dictionary_get(tu->dict, dictionary_acquired_item_name(item)); + tu->stats.ops.searches++; + + void *t1; + dfe_start_write(tu->dict, t1) { + + // this should delete the referenced item + dictionary_del(tu->dict, t1_dfe.name); + tu->stats.ops.deletes++; + + void *t2; + dfe_start_write(tu->dict, t2) { + // this should add another + dictionary_set(tu->dict, t2_dfe.name, NULL, 0); + tu->stats.ops.inserts++; + + dictionary_get(tu->dict, dictionary_acquired_item_name(item)); + tu->stats.ops.searches++; + + // and this should delete it again + dictionary_del(tu->dict, t2_dfe.name); + tu->stats.ops.deletes++; + } + dfe_done(t2); + tu->stats.ops.traversals++; + + // this should fail to add it + dictionary_set(tu->dict, t1_dfe.name, NULL, 0); + tu->stats.ops.inserts++; + + dictionary_del(tu->dict, t1_dfe.name); + tu->stats.ops.deletes++; + } + dfe_done(t1); + tu->stats.ops.traversals++; + + for(int i = 0; i < tu->dups ; i++) { + dictionary_acquired_item_dup(tu->dict, item); + dictionary_get(tu->dict, dictionary_acquired_item_name(item)); + tu->stats.ops.searches++; + } + + for(int i = 0; i < tu->dups ; i++) { + dictionary_acquired_item_release(tu->dict, item); + dictionary_del(tu->dict, dictionary_acquired_item_name(item)); + tu->stats.ops.deletes++; + } + + dictionary_acquired_item_release(tu->dict, item); + dictionary_del(tu->dict, "dict thread checking 1234567890"); + tu->stats.ops.deletes++; + + // test concurrent deletions and flushes + { + if(gettid() % 2) { + char buf [256 + 1]; + + for (int i = 0; i < 1000; i++) { + snprintfz(buf, sizeof(buf), "del/flush test %d", i); + dictionary_set(tu->dict, buf, NULL, 0); + tu->stats.ops.inserts++; + } + + for (int i = 0; i < 1000; i++) { + snprintfz(buf, sizeof(buf), "del/flush test %d", i); + dictionary_del(tu->dict, buf); + tu->stats.ops.deletes++; + } + } + else { + for (int i = 0; i < 10; i++) { + dictionary_flush(tu->dict); + tu->stats.ops.flushes++; + } + } + } + } + + return arg; +} + +static int dictionary_unittest_threads() { + time_t seconds_to_run = 5; + int threads_to_create = 2; + + struct thread_unittest tu[threads_to_create]; + memset(tu, 0, sizeof(struct thread_unittest) * threads_to_create); + + fprintf( + stderr, + "\nChecking dictionary concurrency with %d threads for %lld seconds...\n", + threads_to_create, + (long long)seconds_to_run); + + // threads testing of dictionary + struct dictionary_stats stats = {}; + tu[0].join = 0; + tu[0].dups = 1; + tu[0].dict = dictionary_create_advanced(DICT_OPTION_DONT_OVERWRITE_VALUE, &stats, 0); + + for (int i = 0; i < threads_to_create; i++) { + if(i) + tu[i] = tu[0]; + + char buf[100 + 1]; + snprintf(buf, 100, "dict%d", i); + netdata_thread_create( + &tu[i].thread, + buf, + NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, + unittest_dict_thread, + &tu[i]); + } + + sleep_usec(seconds_to_run * USEC_PER_SEC); + + for (int i = 0; i < threads_to_create; i++) { + __atomic_store_n(&tu[i].join, 1, __ATOMIC_RELAXED); + + void *retval; + netdata_thread_join(tu[i].thread, &retval); + + if(i) { + tu[0].stats.ops.inserts += tu[i].stats.ops.inserts; + tu[0].stats.ops.deletes += tu[i].stats.ops.deletes; + tu[0].stats.ops.searches += tu[i].stats.ops.searches; + tu[0].stats.ops.flushes += tu[i].stats.ops.flushes; + tu[0].stats.ops.traversals += tu[i].stats.ops.traversals; + } + } + + fprintf(stderr, + "CALLS : inserts %zu" + ", deletes %zu" + ", searches %zu" + ", traversals %zu" + ", flushes %zu" + "\n", + tu[0].stats.ops.inserts, + tu[0].stats.ops.deletes, + tu[0].stats.ops.searches, + tu[0].stats.ops.traversals, + tu[0].stats.ops.flushes + ); + +#ifdef DICT_WITH_STATS + fprintf(stderr, + "ACTUAL: inserts %zu" + ", deletes %zu" + ", searches %zu" + ", traversals %zu" + ", resets %zu" + ", flushes %zu" + ", entries %d" + ", referenced_items %d" + ", pending deletions %d" + ", check spins %zu" + ", insert spins %zu" + ", delete spins %zu" + ", search ignores %zu" + "\n", + stats.ops.inserts, + stats.ops.deletes, + stats.ops.searches, + stats.ops.traversals, + stats.ops.resets, + stats.ops.flushes, + tu[0].dict->entries, + tu[0].dict->referenced_items, + tu[0].dict->pending_deletion_items, + stats.spin_locks.use_spins, + stats.spin_locks.insert_spins, + stats.spin_locks.delete_spins, + stats.spin_locks.search_spins + ); +#endif + + dictionary_destroy(tu[0].dict); + return 0; +} + +struct thread_view_unittest { + int join; + DICTIONARY *master; + DICTIONARY *view; + DICTIONARY_ITEM *item_master; + int dups; +}; + +static void *unittest_dict_master_thread(void *arg) { + struct thread_view_unittest *tv = arg; + + DICTIONARY_ITEM *item = NULL; + int loops = 0; + while(!__atomic_load_n(&tv->join, __ATOMIC_RELAXED)) { + + if(!item) + item = dictionary_set_and_acquire_item(tv->master, "ITEM1", "123", strlen("123")); + + if(__atomic_load_n(&tv->item_master, __ATOMIC_RELAXED) != NULL) { + dictionary_acquired_item_release(tv->master, item); + dictionary_del(tv->master, "ITEM1"); + item = NULL; + loops++; + continue; + } + + dictionary_acquired_item_dup(tv->master, item); // for the view thread + __atomic_store_n(&tv->item_master, item, __ATOMIC_RELAXED); + dictionary_del(tv->master, "ITEM1"); + + + for(int i = 0; i < tv->dups + loops ; i++) { + dictionary_acquired_item_dup(tv->master, item); + } + + for(int i = 0; i < tv->dups + loops ; i++) { + dictionary_acquired_item_release(tv->master, item); + } + + dictionary_acquired_item_release(tv->master, item); + + item = NULL; + loops = 0; + } + + return arg; +} + +static void *unittest_dict_view_thread(void *arg) { + struct thread_view_unittest *tv = arg; + + DICTIONARY_ITEM *m_item = NULL; + + while(!__atomic_load_n(&tv->join, __ATOMIC_RELAXED)) { + if(!(m_item = __atomic_load_n(&tv->item_master, __ATOMIC_RELAXED))) + continue; + + DICTIONARY_ITEM *v_item = dictionary_view_set_and_acquire_item(tv->view, "ITEM2", m_item); + dictionary_acquired_item_release(tv->master, m_item); + __atomic_store_n(&tv->item_master, NULL, __ATOMIC_RELAXED); + + for(int i = 0; i < tv->dups ; i++) { + dictionary_acquired_item_dup(tv->view, v_item); + } + + for(int i = 0; i < tv->dups ; i++) { + dictionary_acquired_item_release(tv->view, v_item); + } + + dictionary_del(tv->view, "ITEM2"); + + while(!__atomic_load_n(&tv->join, __ATOMIC_RELAXED) && !(m_item = __atomic_load_n(&tv->item_master, __ATOMIC_RELAXED))) { + dictionary_acquired_item_dup(tv->view, v_item); + dictionary_acquired_item_release(tv->view, v_item); + } + + dictionary_acquired_item_release(tv->view, v_item); + } + + return arg; +} + +static int dictionary_unittest_view_threads() { + + struct thread_view_unittest tv = { + .join = 0, + .master = NULL, + .view = NULL, + .item_master = NULL, + .dups = 1, + }; + + // threads testing of dictionary + struct dictionary_stats stats_master = {}; + struct dictionary_stats stats_view = {}; + tv.master = dictionary_create_advanced(DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE, &stats_master, 0); + tv.view = dictionary_create_view(tv.master); + tv.view->stats = &stats_view; + + time_t seconds_to_run = 5; + fprintf( + stderr, + "\nChecking dictionary concurrency with 1 master and 1 view threads for %lld seconds...\n", + (long long)seconds_to_run); + + netdata_thread_t master_thread, view_thread; + tv.join = 0; + + netdata_thread_create( + &master_thread, + "master", + NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, + unittest_dict_master_thread, + &tv); + + netdata_thread_create( + &view_thread, + "view", + NETDATA_THREAD_OPTION_DONT_LOG | NETDATA_THREAD_OPTION_JOINABLE, + unittest_dict_view_thread, + &tv); + + sleep_usec(seconds_to_run * USEC_PER_SEC); + + __atomic_store_n(&tv.join, 1, __ATOMIC_RELAXED); + void *retval; + netdata_thread_join(view_thread, &retval); + netdata_thread_join(master_thread, &retval); + +#ifdef DICT_WITH_STATS + fprintf(stderr, + "MASTER: inserts %zu" + ", deletes %zu" + ", searches %zu" + ", resets %zu" + ", entries %d" + ", referenced_items %d" + ", pending deletions %d" + ", check spins %zu" + ", insert spins %zu" + ", delete spins %zu" + ", search ignores %zu" + "\n", + stats_master.ops.inserts, + stats_master.ops.deletes, + stats_master.ops.searches, + stats_master.ops.resets, + tv.master->entries, + tv.master->referenced_items, + tv.master->pending_deletion_items, + stats_master.spin_locks.use_spins, + stats_master.spin_locks.insert_spins, + stats_master.spin_locks.delete_spins, + stats_master.spin_locks.search_spins + ); + fprintf(stderr, + "VIEW : inserts %zu" + ", deletes %zu" + ", searches %zu" + ", resets %zu" + ", entries %d" + ", referenced_items %d" + ", pending deletions %d" + ", check spins %zu" + ", insert spins %zu" + ", delete spins %zu" + ", search ignores %zu" + "\n", + stats_view.ops.inserts, + stats_view.ops.deletes, + stats_view.ops.searches, + stats_view.ops.resets, + tv.view->entries, + tv.view->referenced_items, + tv.view->pending_deletion_items, + stats_view.spin_locks.use_spins, + stats_view.spin_locks.insert_spins, + stats_view.spin_locks.delete_spins, + stats_view.spin_locks.search_spins + ); +#endif + + dictionary_destroy(tv.master); + dictionary_destroy(tv.view); + + return 0; +} + +size_t dictionary_unittest_views(void) { + size_t errors = 0; + struct dictionary_stats stats = {}; + DICTIONARY *master = dictionary_create_advanced(DICT_OPTION_NONE, &stats, 0); + DICTIONARY *view = dictionary_create_view(master); + + fprintf(stderr, "\n\nChecking dictionary views...\n"); + + // Add an item to both master and view, then remove the view first and the master second + fprintf(stderr, "\nPASS 1: Adding 1 item to master:\n"); + DICTIONARY_ITEM *item1_on_master = dictionary_set_and_acquire_item(master, "KEY 1", "VALUE1", strlen("VALUE1") + 1); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 1, 0); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 1: Adding master item to view:\n"); + DICTIONARY_ITEM *item1_on_view = dictionary_view_set_and_acquire_item(view, "KEY 1 ON VIEW", item1_on_master); + errors += unittest_check_dictionary("view", view, 1, 1, 0, 1, 0); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 1: Deleting view item:\n"); + dictionary_del(view, "KEY 1 ON VIEW"); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 1, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 1, 0); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nPASS 1: Releasing the deleted view item:\n"); + dictionary_acquired_item_release(view, item1_on_view); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 1, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 0, 1); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 1: Releasing the acquired master item:\n"); + dictionary_acquired_item_release(master, item1_on_master); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 0, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 0, 1); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 0, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 1: Deleting the released master item:\n"); + dictionary_del(master, "KEY 1"); + errors += unittest_check_dictionary("master", master, 0, 0, 0, 0, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 0, 1); + + // The other way now: + // Add an item to both master and view, then remove the master first and verify it is deleted on the view also + fprintf(stderr, "\nPASS 2: Adding 1 item to master:\n"); + item1_on_master = dictionary_set_and_acquire_item(master, "KEY 1", "VALUE1", strlen("VALUE1") + 1); + errors += unittest_check_dictionary("master", master, 1, 1, 0, 1, 0); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 2: Adding master item to view:\n"); + item1_on_view = dictionary_view_set_and_acquire_item(view, "KEY 1 ON VIEW", item1_on_master); + errors += unittest_check_dictionary("view", view, 1, 1, 0, 1, 0); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nPASS 2: Deleting master item:\n"); + dictionary_del(master, "KEY 1"); + garbage_collect_pending_deletes(view); + errors += unittest_check_dictionary("master", master, 0, 0, 1, 1, 0); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 1, 0); + errors += unittest_check_item("master", master, item1_on_master, "KEY 1", item1_on_master->shared->value, 1, ITEM_FLAG_DELETED, false, false, true); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nPASS 2: Releasing the acquired master item:\n"); + dictionary_acquired_item_release(master, item1_on_master); + errors += unittest_check_dictionary("master", master, 0, 0, 1, 0, 1); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 1, 0); + errors += unittest_check_item("view", view, item1_on_view, "KEY 1 ON VIEW", item1_on_master->shared->value, 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nPASS 2: Releasing the deleted view item:\n"); + dictionary_acquired_item_release(view, item1_on_view); + errors += unittest_check_dictionary("master", master, 0, 0, 1, 0, 1); + errors += unittest_check_dictionary("view", view, 0, 0, 1, 0, 1); + + dictionary_destroy(master); + dictionary_destroy(view); + return errors; +} + +/* + * FIXME: a dictionary-related leak is reported when running the address + * sanitizer. Need to investigate if it's introduced by the unit-test itself, + * or the dictionary implementation. +*/ +int dictionary_unittest(size_t entries) { + if(entries < 10) entries = 10; + + DICTIONARY *dict; + size_t errors = 0; + + fprintf(stderr, "Generating %zu names and values...\n", entries); + char **names = dictionary_unittest_generate_names(entries); + char **values = dictionary_unittest_generate_values(entries); + + fprintf(stderr, "\nCreating dictionary single threaded, clone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + dictionary_unittest_clone(dict, names, values, entries, &errors); + + fprintf(stderr, "\nCreating dictionary multi threaded, clone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_NONE); + dictionary_unittest_clone(dict, names, values, entries, &errors); + + fprintf(stderr, "\nCreating dictionary single threaded, non-clone, add-in-front options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_SINGLE_THREADED | DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | + DICT_OPTION_ADD_IN_FRONT); + dictionary_unittest_nonclone(dict, names, values, entries, &errors); + + fprintf(stderr, "\nCreating dictionary multi threaded, non-clone, add-in-front options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_ADD_IN_FRONT); + dictionary_unittest_nonclone(dict, names, values, entries, &errors); + + fprintf(stderr, "\nCreating dictionary single-threaded, non-clone, don't overwrite options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_SINGLE_THREADED | DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | + DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, &errors, dictionary_unittest_set_nonclone); + dictionary_unittest_run_and_measure_time(dict, "resetting non-overwrite entries", names, values, entries, &errors, dictionary_unittest_reset_dont_overwrite_nonclone); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop", names, values, entries, &errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback", names, values, entries, &errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback stop", names, values, entries, &errors, dictionary_unittest_walkthrough_stop); + dictionary_unittest_run_and_measure_time(dict, "destroying full dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary multi-threaded, non-clone, don't overwrite options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, &errors, dictionary_unittest_set_nonclone); + dictionary_unittest_run_and_measure_time(dict, "walkthrough write delete this", names, values, entries, &errors, dictionary_unittest_walkthrough_delete_this); + dictionary_unittest_run_and_measure_time(dict, "destroying empty dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary multi-threaded, non-clone, don't overwrite options, %zu items\n", entries); + dict = dictionary_create( + DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_VALUE_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_unittest_run_and_measure_time(dict, "adding entries", names, values, entries, &errors, dictionary_unittest_set_nonclone); + dictionary_unittest_run_and_measure_time(dict, "foreach write delete this", names, values, entries, &errors, dictionary_unittest_foreach_delete_this); + dictionary_unittest_run_and_measure_time(dict, "traverse foreach read loop empty", names, values, 0, &errors, dictionary_unittest_foreach); + dictionary_unittest_run_and_measure_time(dict, "walkthrough read callback empty", names, values, 0, &errors, dictionary_unittest_walkthrough); + dictionary_unittest_run_and_measure_time(dict, "destroying empty dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary single threaded, clone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + dictionary_unittest_sorting(dict, names, values, entries, &errors); + dictionary_unittest_run_and_measure_time(dict, "destroying full dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary single threaded, clone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_SINGLE_THREADED); + dictionary_unittest_null_dfe(dict, names, values, entries, &errors); + dictionary_unittest_run_and_measure_time(dict, "destroying full dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + fprintf(stderr, "\nCreating dictionary single threaded, noclone, %zu items\n", entries); + dict = dictionary_create(DICT_OPTION_SINGLE_THREADED | DICT_OPTION_VALUE_LINK_DONT_CLONE); + dictionary_unittest_null_dfe(dict, names, values, entries, &errors); + dictionary_unittest_run_and_measure_time(dict, "destroying full dictionary", names, values, entries, &errors, dictionary_unittest_destroy); + + // check reference counters + { + fprintf(stderr, "\nTesting reference counters:\n"); + dict = dictionary_create(DICT_OPTION_NONE | DICT_OPTION_NAME_LINK_DONT_CLONE); + errors += unittest_check_dictionary("", dict, 0, 0, 0, 0, 0); + + fprintf(stderr, "\nAdding test item to dictionary and acquiring it\n"); + dictionary_set(dict, "test", "ITEM1", 6); + DICTIONARY_ITEM *item = (DICTIONARY_ITEM *)dictionary_get_and_acquire_item(dict, "test"); + + errors += unittest_check_dictionary("", dict, 1, 1, 0, 1, 0); + errors += unittest_check_item("ACQUIRED", dict, item, "test", "ITEM1", 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nChecking that reference counters are increased:\n"); + void *t; + dfe_start_read(dict, t) { + errors += unittest_check_dictionary("", dict, 1, 1, 0, 1, 0); + errors += unittest_check_item("ACQUIRED TRAVERSAL", dict, item, "test", "ITEM1", 2, ITEM_FLAG_NONE, true, true, true); + } + dfe_done(t); + + fprintf(stderr, "\nChecking that reference counters are decreased:\n"); + errors += unittest_check_dictionary("", dict, 1, 1, 0, 1, 0); + errors += unittest_check_item("ACQUIRED TRAVERSAL 2", dict, item, "test", "ITEM1", 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nDeleting the item we have acquired:\n"); + dictionary_del(dict, "test"); + + errors += unittest_check_dictionary("", dict, 0, 0, 1, 1, 0); + errors += unittest_check_item("DELETED", dict, item, "test", "ITEM1", 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nAdding another item with the same name of the item we deleted, while being acquired:\n"); + dictionary_set(dict, "test", "ITEM2", 6); + errors += unittest_check_dictionary("", dict, 1, 1, 1, 1, 0); + + fprintf(stderr, "\nAcquiring the second item:\n"); + DICTIONARY_ITEM *item2 = (DICTIONARY_ITEM *)dictionary_get_and_acquire_item(dict, "test"); + errors += unittest_check_item("FIRST", dict, item, "test", "ITEM1", 1, ITEM_FLAG_DELETED, false, false, true); + errors += unittest_check_item("SECOND", dict, item2, "test", "ITEM2", 1, ITEM_FLAG_NONE, true, true, true); + errors += unittest_check_dictionary("", dict, 1, 1, 1, 2, 0); + + fprintf(stderr, "\nReleasing the second item (the first is still acquired):\n"); + dictionary_acquired_item_release(dict, (DICTIONARY_ITEM *)item2); + errors += unittest_check_dictionary("", dict, 1, 1, 1, 1, 0); + errors += unittest_check_item("FIRST", dict, item, "test", "ITEM1", 1, ITEM_FLAG_DELETED, false, false, true); + errors += unittest_check_item("SECOND RELEASED", dict, item2, "test", "ITEM2", 0, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nDeleting the second item (the first is still acquired):\n"); + dictionary_del(dict, "test"); + errors += unittest_check_dictionary("", dict, 0, 0, 1, 1, 0); + errors += unittest_check_item("ACQUIRED DELETED", dict, item, "test", "ITEM1", 1, ITEM_FLAG_DELETED, false, false, true); + + fprintf(stderr, "\nReleasing the first item (which we have already deleted):\n"); + dictionary_acquired_item_release(dict, (DICTIONARY_ITEM *)item); + dfe_start_write(dict, item) ; dfe_done(item); + errors += unittest_check_dictionary("", dict, 0, 0, 1, 0, 1); + + fprintf(stderr, "\nAdding again the test item to dictionary and acquiring it\n"); + dictionary_set(dict, "test", "ITEM1", 6); + item = (DICTIONARY_ITEM *)dictionary_get_and_acquire_item(dict, "test"); + + errors += unittest_check_dictionary("", dict, 1, 1, 0, 1, 0); + errors += unittest_check_item("RE-ADDITION", dict, item, "test", "ITEM1", 1, ITEM_FLAG_NONE, true, true, true); + + fprintf(stderr, "\nDestroying the dictionary while we have acquired an item\n"); + dictionary_destroy(dict); + + fprintf(stderr, "Releasing the item (on a destroyed dictionary)\n"); + dictionary_acquired_item_release(dict, (DICTIONARY_ITEM *)item); + item = NULL; + dict = NULL; + } + + dictionary_unittest_free_char_pp(names, entries); + dictionary_unittest_free_char_pp(values, entries); + + errors += dictionary_unittest_views(); + errors += dictionary_unittest_threads(); + errors += dictionary_unittest_view_threads(); + + cleanup_destroyed_dictionaries(); + + fprintf(stderr, "\n%zu errors found\n", errors); + return errors ? 1 : 0; +} diff --git a/src/libnetdata/dictionary/dictionary.c b/src/libnetdata/dictionary/dictionary.c new file mode 100644 index 000000000..18d6596d7 --- /dev/null +++ b/src/libnetdata/dictionary/dictionary.c @@ -0,0 +1,767 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "dictionary-internals.h" + +ARAL *dict_items_aral = NULL; +ARAL *dict_shared_items_aral = NULL; + +struct dictionary_stats dictionary_stats_category_other = { + .name = "other", +}; + +// ---------------------------------------------------------------------------- +// public locks API + +inline void dictionary_write_lock(DICTIONARY *dict) { + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); +} + +inline void dictionary_write_unlock(DICTIONARY *dict) { + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); +} + +// ---------------------------------------------------------------------------- +// callbacks registration + +static inline void dictionary_hooks_allocate(DICTIONARY *dict) { + if(dict->hooks) return; + + dict->hooks = callocz(1, sizeof(struct dictionary_hooks)); + dict->hooks->links = 1; + + DICTIONARY_STATS_PLUS_MEMORY(dict, 0, sizeof(struct dictionary_hooks), 0); +} + +static inline size_t dictionary_hooks_free(DICTIONARY *dict) { + if(!dict->hooks) return 0; + + REFCOUNT links = __atomic_sub_fetch(&dict->hooks->links, 1, __ATOMIC_ACQUIRE); + if(links == 0) { + freez(dict->hooks); + dict->hooks = NULL; + + DICTIONARY_STATS_MINUS_MEMORY(dict, 0, sizeof(struct dictionary_hooks), 0); + return sizeof(struct dictionary_hooks); + } + + return 0; +} + +void dictionary_register_insert_callback(DICTIONARY *dict, dict_cb_insert_t insert_callback, void *data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + dictionary_hooks_allocate(dict); + dict->hooks->insert_callback = insert_callback; + dict->hooks->insert_callback_data = data; +} + +void dictionary_register_conflict_callback(DICTIONARY *dict, dict_cb_conflict_t conflict_callback, void *data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + internal_error(!(dict->options & DICT_OPTION_DONT_OVERWRITE_VALUE), "DICTIONARY: registering conflict callback without DICT_OPTION_DONT_OVERWRITE_VALUE"); + dict->options |= DICT_OPTION_DONT_OVERWRITE_VALUE; + + dictionary_hooks_allocate(dict); + dict->hooks->conflict_callback = conflict_callback; + dict->hooks->conflict_callback_data = data; +} + +void dictionary_register_react_callback(DICTIONARY *dict, dict_cb_react_t react_callback, void *data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + dictionary_hooks_allocate(dict); + dict->hooks->react_callback = react_callback; + dict->hooks->react_callback_data = data; +} + +void dictionary_register_delete_callback(DICTIONARY *dict, dict_cb_delete_t delete_callback, void *data) { + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: called %s() on a view.", __FUNCTION__ ); + + dictionary_hooks_allocate(dict); + dict->hooks->delete_callback = delete_callback; + dict->hooks->delelte_callback_data = data; +} + +// ---------------------------------------------------------------------------- +// dictionary statistics API + +size_t dictionary_version(DICTIONARY *dict) { + if(unlikely(!dict)) return 0; + + // this is required for views to return the right number + // garbage_collect_pending_deletes(dict); + + return __atomic_load_n(&dict->version, __ATOMIC_RELAXED); +} +size_t dictionary_entries(DICTIONARY *dict) { + if(unlikely(!dict)) return 0; + + // this is required for views to return the right number + // garbage_collect_pending_deletes(dict); + + long int entries = __atomic_load_n(&dict->entries, __ATOMIC_RELAXED); + internal_fatal(entries < 0, "DICTIONARY: entries is negative: %ld", entries); + + return entries; +} +size_t dictionary_referenced_items(DICTIONARY *dict) { + if(unlikely(!dict)) return 0; + + long int referenced_items = __atomic_load_n(&dict->referenced_items, __ATOMIC_RELAXED); + if(referenced_items < 0) + fatal("DICTIONARY: referenced items is negative: %ld", referenced_items); + + return referenced_items; +} + +void dictionary_version_increment(DICTIONARY *dict) { + __atomic_fetch_add(&dict->version, 1, __ATOMIC_RELAXED); +} + +// ---------------------------------------------------------------------------- +// items garbage collector + +void garbage_collect_pending_deletes(DICTIONARY *dict) { + usec_t last_master_deletion_us = dict->hooks?__atomic_load_n(&dict->hooks->last_master_deletion_us, __ATOMIC_RELAXED):0; + usec_t last_gc_run_us = __atomic_load_n(&dict->last_gc_run_us, __ATOMIC_RELAXED); + + bool is_view = is_view_dictionary(dict); + + if(likely(!( + DICTIONARY_PENDING_DELETES_GET(dict) > 0 || + (is_view && last_master_deletion_us > last_gc_run_us) + ))) + return; + + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + + __atomic_store_n(&dict->last_gc_run_us, now_realtime_usec(), __ATOMIC_RELAXED); + + if(is_view) + dictionary_index_lock_wrlock(dict); + + DICTIONARY_STATS_GARBAGE_COLLECTIONS_PLUS1(dict); + + size_t deleted = 0, pending = 0, examined = 0; + DICTIONARY_ITEM *item = dict->items.list, *item_next; + while(item) { + examined++; + + // this will clean up + item_next = item->next; + int rc = item_check_and_acquire_advanced(dict, item, is_view); + + if(rc == RC_ITEM_MARKED_FOR_DELETION) { + // we didn't get a reference + + if(item_is_not_referenced_and_can_be_removed(dict, item)) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(dict->items.list, item, prev, next); + dict_item_free_with_hooks(dict, item); + deleted++; + + pending = DICTIONARY_PENDING_DELETES_MINUS1(dict); + if (!pending) + break; + } + } + else if(rc == RC_ITEM_IS_CURRENTLY_BEING_DELETED) + ; // do not touch this item (we didn't get a reference) + + else if(rc == RC_ITEM_OK) + item_release(dict, item); + + item = item_next; + } + + if(is_view) + dictionary_index_wrlock_unlock(dict); + + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); + + (void)deleted; + (void)examined; + + internal_error(false, "DICTIONARY: garbage collected dictionary created by %s (%zu@%s), " + "examined %zu items, deleted %zu items, still pending %zu items", + dict->creation_function, dict->creation_line, dict->creation_file, + examined, deleted, pending); +} + +void dictionary_garbage_collect(DICTIONARY *dict) { + if(!dict) return; + garbage_collect_pending_deletes(dict); +} + +// ---------------------------------------------------------------------------- + +void dictionary_static_items_aral_init(void) { + static SPINLOCK spinlock; + + if(unlikely(!dict_items_aral || !dict_shared_items_aral)) { + spinlock_lock(&spinlock); + + // we have to check again + if(!dict_items_aral) + dict_items_aral = aral_create( + "dict-items", + sizeof(DICTIONARY_ITEM), + 0, + 65536, + aral_by_size_statistics(), + NULL, NULL, false, false); + + // we have to check again + if(!dict_shared_items_aral) + dict_shared_items_aral = aral_create( + "dict-shared-items", + sizeof(DICTIONARY_ITEM_SHARED), + 0, + 65536, + aral_by_size_statistics(), + NULL, NULL, false, false); + + spinlock_unlock(&spinlock); + } +} + +// ---------------------------------------------------------------------------- +// delayed destruction of dictionaries + +static bool dictionary_free_all_resources(DICTIONARY *dict, size_t *mem, bool force) { + if(mem) + *mem = 0; + + if(!force && dictionary_referenced_items(dict)) + return false; + + size_t dict_size = 0, counted_items = 0, item_size = 0, index_size = 0; + (void)counted_items; + +#ifdef NETDATA_INTERNAL_CHECKS + long int entries = dict->entries; + long int referenced_items = dict->referenced_items; + long int pending_deletion_items = dict->pending_deletion_items; + const char *creation_function = dict->creation_function; + const char *creation_file = dict->creation_file; + size_t creation_line = dict->creation_line; +#endif + + // destroy the index + dictionary_index_lock_wrlock(dict); + index_size += hashtable_destroy_unsafe(dict); + dictionary_index_wrlock_unlock(dict); + + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + DICTIONARY_ITEM *item = dict->items.list; + while (item) { + // cache item->next + // because we are going to free item + DICTIONARY_ITEM *item_next = item->next; + + item_size += dict_item_free_with_hooks(dict, item); + item = item_next; + + // to speed up destruction, we don't unlink the item + // from the linked-list here + + counted_items++; + } + dict->items.list = NULL; + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); + + dict_size += dictionary_locks_destroy(dict); + dict_size += reference_counter_free(dict); + dict_size += dictionary_hooks_free(dict); + dict_size += sizeof(DICTIONARY); + DICTIONARY_STATS_MINUS_MEMORY(dict, 0, sizeof(DICTIONARY), 0); + + if(dict->value_aral) + aral_by_size_release(dict->value_aral); + + freez(dict); + + internal_error( + false, + "DICTIONARY: Freed dictionary created from %s() %zu@%s, having %ld (counted %zu) entries, %ld referenced, %ld pending deletion, total freed memory: %zu bytes (sizeof(dict) = %zu, sizeof(item) = %zu).", + creation_function, + creation_line, + creation_file, + entries, counted_items, referenced_items, pending_deletion_items, + dict_size + item_size, sizeof(DICTIONARY), sizeof(DICTIONARY_ITEM) + sizeof(DICTIONARY_ITEM_SHARED)); + + if(mem) + *mem = dict_size + item_size + index_size; + + return true; +} + +netdata_mutex_t dictionaries_waiting_to_be_destroyed_mutex = NETDATA_MUTEX_INITIALIZER; +static DICTIONARY *dictionaries_waiting_to_be_destroyed = NULL; + +static void dictionary_queue_for_destruction(DICTIONARY *dict) { + if(is_dictionary_destroyed(dict)) + return; + + DICTIONARY_STATS_DICT_DESTROY_QUEUED_PLUS1(dict); + dict_flag_set(dict, DICT_FLAG_DESTROYED); + + netdata_mutex_lock(&dictionaries_waiting_to_be_destroyed_mutex); + + dict->next = dictionaries_waiting_to_be_destroyed; + dictionaries_waiting_to_be_destroyed = dict; + + netdata_mutex_unlock(&dictionaries_waiting_to_be_destroyed_mutex); +} + +void cleanup_destroyed_dictionaries(void) { + if(!dictionaries_waiting_to_be_destroyed) + return; + + netdata_mutex_lock(&dictionaries_waiting_to_be_destroyed_mutex); + + DICTIONARY *dict, *last = NULL, *next = NULL; + for(dict = dictionaries_waiting_to_be_destroyed; dict ; dict = next) { + next = dict->next; + +#ifdef NETDATA_INTERNAL_CHECKS + size_t line = dict->creation_line; + const char *file = dict->creation_file; + const char *function = dict->creation_function; + pid_t pid = dict->creation_tid; +#endif + + DICTIONARY_STATS_DICT_DESTROY_QUEUED_MINUS1(dict); + if(dictionary_free_all_resources(dict, NULL, false)) { + + internal_error( + true, + "DICTIONARY: freed dictionary with delayed destruction, created from %s() %zu@%s pid %d.", + function, line, file, pid); + + if(last) last->next = next; + else dictionaries_waiting_to_be_destroyed = next; + } + else { + + internal_error( + true, + "DICTIONARY: cannot free dictionary with delayed destruction, created from %s() %zu@%s pid %d.", + function, line, file, pid); + + DICTIONARY_STATS_DICT_DESTROY_QUEUED_PLUS1(dict); + last = dict; + } + } + + netdata_mutex_unlock(&dictionaries_waiting_to_be_destroyed_mutex); +} + +// ---------------------------------------------------------------------------- +// API internal checks + +#ifdef NETDATA_INTERNAL_CHECKS +#define api_internal_check(dict, item, allow_null_dict, allow_null_item) api_internal_check_with_trace(dict, item, __FUNCTION__, allow_null_dict, allow_null_item) +static inline void api_internal_check_with_trace(DICTIONARY *dict, DICTIONARY_ITEM *item, const char *function, bool allow_null_dict, bool allow_null_item) { + if(!allow_null_dict && !dict) { + internal_error( + item, + "DICTIONARY: attempted to %s() with a NULL dictionary, passing an item created from %s() %zu@%s.", + function, + item->dict->creation_function, + item->dict->creation_line, + item->dict->creation_file); + fatal("DICTIONARY: attempted to %s() but dict is NULL", function); + } + + if(!allow_null_item && !item) { + internal_error( + true, + "DICTIONARY: attempted to %s() without an item on a dictionary created from %s() %zu@%s.", + function, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + fatal("DICTIONARY: attempted to %s() but item is NULL", function); + } + + if(dict && item && dict != item->dict) { + internal_error( + true, + "DICTIONARY: attempted to %s() an item on a dictionary created from %s() %zu@%s, but the item belongs to the dictionary created from %s() %zu@%s.", + function, + dict->creation_function, + dict->creation_line, + dict->creation_file, + item->dict->creation_function, + item->dict->creation_line, + item->dict->creation_file + ); + fatal("DICTIONARY: %s(): item does not belong to this dictionary.", function); + } + + if(item) { + REFCOUNT refcount = DICTIONARY_ITEM_REFCOUNT_GET(dict, item); + if (unlikely(refcount <= 0)) { + internal_error( + true, + "DICTIONARY: attempted to %s() of an item with reference counter = %d on a dictionary created from %s() %zu@%s", + function, + refcount, + item->dict->creation_function, + item->dict->creation_line, + item->dict->creation_file); + fatal("DICTIONARY: attempted to %s but item is having refcount = %d", function, refcount); + } + } +} +#else +#define api_internal_check(dict, item, allow_null_dict, allow_null_item) debug_dummy() +#endif + +#define api_is_name_good(dict, name, name_len) api_is_name_good_with_trace(dict, name, name_len, __FUNCTION__) +static bool api_is_name_good_with_trace(DICTIONARY *dict __maybe_unused, const char *name, ssize_t name_len __maybe_unused, const char *function __maybe_unused) { + if(unlikely(!name)) { + internal_error( + true, + "DICTIONARY: attempted to %s() with name = NULL on a dictionary created from %s() %zu@%s.", + function, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + return false; + } + + if(unlikely(!*name)) { + internal_error( + true, + "DICTIONARY: attempted to %s() with empty name on a dictionary created from %s() %zu@%s.", + function, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + return false; + } + + internal_error( + name_len > 0 && name_len != (ssize_t)strlen(name), + "DICTIONARY: attempted to %s() with a name of '%s', having length of %zu, " + "but the supplied name_len = %ld, on a dictionary created from %s() %zu@%s.", + function, + name, + strlen(name), + (long int) name_len, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + + internal_error( + name_len <= 0 && name_len != -1, + "DICTIONARY: attempted to %s() with a name of '%s', having length of %zu, " + "but the supplied name_len = %ld, on a dictionary created from %s() %zu@%s.", + function, + name, + strlen(name), + (long int) name_len, + dict?dict->creation_function:"unknown", + dict?dict->creation_line:0, + dict?dict->creation_file:"unknown"); + + return true; +} + +// ---------------------------------------------------------------------------- +// API - dictionary management + +static DICTIONARY *dictionary_create_internal(DICT_OPTIONS options, struct dictionary_stats *stats, size_t fixed_size) { + cleanup_destroyed_dictionaries(); + + DICTIONARY *dict = callocz(1, sizeof(DICTIONARY)); + dict->options = options; + dict->stats = stats; + + if((dict->options & DICT_OPTION_FIXED_SIZE) && !fixed_size) { + dict->options &= ~DICT_OPTION_FIXED_SIZE; + internal_fatal(true, "DICTIONARY: requested fixed size dictionary, without setting the size"); + } + if(!(dict->options & DICT_OPTION_FIXED_SIZE) && fixed_size) { + dict->options |= DICT_OPTION_FIXED_SIZE; + internal_fatal(true, "DICTIONARY: set a fixed size for the items, without setting DICT_OPTION_FIXED_SIZE flag"); + } + + if(dict->options & DICT_OPTION_FIXED_SIZE) + dict->value_aral = aral_by_size_acquire(fixed_size); + else + dict->value_aral = NULL; + + if(!(dict->options & (DICT_OPTION_INDEX_JUDY|DICT_OPTION_INDEX_HASHTABLE))) + dict->options |= DICT_OPTION_INDEX_JUDY; + + size_t dict_size = 0; + dict_size += sizeof(DICTIONARY); + dict_size += dictionary_locks_init(dict); + dict_size += reference_counter_init(dict); + dict_size += hashtable_init_unsafe(dict); + + dictionary_static_items_aral_init(); + pointer_index_init(dict); + + DICTIONARY_STATS_PLUS_MEMORY(dict, 0, dict_size, 0); + + return dict; +} + +#ifdef NETDATA_INTERNAL_CHECKS +DICTIONARY *dictionary_create_advanced_with_trace(DICT_OPTIONS options, struct dictionary_stats *stats, size_t fixed_size, const char *function, size_t line, const char *file) { +#else +DICTIONARY *dictionary_create_advanced(DICT_OPTIONS options, struct dictionary_stats *stats, size_t fixed_size) { +#endif + + DICTIONARY *dict = dictionary_create_internal(options, stats?stats:&dictionary_stats_category_other, fixed_size); + +#ifdef NETDATA_INTERNAL_CHECKS + dict->creation_function = function; + dict->creation_file = file; + dict->creation_line = line; +#endif + + DICTIONARY_STATS_DICT_CREATIONS_PLUS1(dict); + return dict; +} + +#ifdef NETDATA_INTERNAL_CHECKS +DICTIONARY *dictionary_create_view_with_trace(DICTIONARY *master, const char *function, size_t line, const char *file) { +#else +DICTIONARY *dictionary_create_view(DICTIONARY *master) { +#endif + + DICTIONARY *dict = dictionary_create_internal(master->options, master->stats, + master->value_aral ? aral_element_size(master->value_aral) : 0); + + dict->master = master; + + dictionary_hooks_allocate(master); + + if(unlikely(__atomic_load_n(&master->hooks->links, __ATOMIC_RELAXED)) < 1) + fatal("DICTIONARY: attempted to create a view that has %d links", master->hooks->links); + + dict->hooks = master->hooks; + __atomic_add_fetch(&master->hooks->links, 1, __ATOMIC_ACQUIRE); + +#ifdef NETDATA_INTERNAL_CHECKS + dict->creation_function = function; + dict->creation_file = file; + dict->creation_line = line; + dict->creation_tid = gettid(); +#endif + + DICTIONARY_STATS_DICT_CREATIONS_PLUS1(dict); + return dict; +} + +void dictionary_flush(DICTIONARY *dict) { + if(unlikely(!dict)) + return; + + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + + DICTIONARY_ITEM *item, *next = NULL; + for(item = dict->items.list; item ;item = next) { + next = item->next; + dict_item_del(dict, item_get_name(item), (ssize_t)item_get_name_len(item)); + } + + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); + + DICTIONARY_STATS_DICT_FLUSHES_PLUS1(dict); +} + +size_t dictionary_destroy(DICTIONARY *dict) { + cleanup_destroyed_dictionaries(); + + if(!dict) return 0; + + ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); + + dict_flag_set(dict, DICT_FLAG_DESTROYED); + DICTIONARY_STATS_DICT_DESTRUCTIONS_PLUS1(dict); + + size_t referenced_items = dictionary_referenced_items(dict); + if(referenced_items) { + dictionary_flush(dict); + dictionary_queue_for_destruction(dict); + + internal_error( + true, + "DICTIONARY: delaying destruction of dictionary created from %s() %zu@%s, because it has %d referenced items in it (%d total).", + dict->creation_function, + dict->creation_line, + dict->creation_file, + dict->referenced_items, + dict->entries); + + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); + return 0; + } + + ll_recursive_unlock(dict, DICTIONARY_LOCK_WRITE); + + size_t freed; + dictionary_free_all_resources(dict, &freed, true); + + return freed; +} + +// ---------------------------------------------------------------------------- +// SET an item to the dictionary + +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_set_and_acquire_item_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, void *value, size_t value_len, void *constructor_data) { + if(unlikely(!api_is_name_good(dict, name, name_len))) + return NULL; + + api_internal_check(dict, NULL, false, true); + + if(unlikely(is_view_dictionary(dict))) + fatal("DICTIONARY: this dictionary is a view, you cannot add items other than the ones from the master dictionary."); + + DICTIONARY_ITEM *item = + dict_item_add_or_reset_value_and_acquire(dict, name, name_len, value, value_len, constructor_data, NULL); + api_internal_check(dict, item, false, false); + return item; +} + +void *dictionary_set_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, void *value, size_t value_len, void *constructor_data) { + DICTIONARY_ITEM *item = dictionary_set_and_acquire_item_advanced(dict, name, name_len, value, value_len, constructor_data); + + if(likely(item)) { + void *v = item->shared->value; + item_release(dict, item); + return v; + } + + return NULL; +} + +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_view_set_and_acquire_item_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, DICTIONARY_ITEM *master_item) { + if(unlikely(!api_is_name_good(dict, name, name_len))) + return NULL; + + api_internal_check(dict, NULL, false, true); + + if(unlikely(is_master_dictionary(dict))) + fatal("DICTIONARY: this dictionary is a master, you cannot add items from other dictionaries."); + + garbage_collect_pending_deletes(dict); + + dictionary_acquired_item_dup(dict->master, master_item); + DICTIONARY_ITEM *item = dict_item_add_or_reset_value_and_acquire(dict, name, name_len, NULL, 0, NULL, master_item); + dictionary_acquired_item_release(dict->master, master_item); + + api_internal_check(dict, item, false, false); + return item; +} + +void *dictionary_view_set_advanced(DICTIONARY *dict, const char *name, ssize_t name_len, DICTIONARY_ITEM *master_item) { + DICTIONARY_ITEM *item = dictionary_view_set_and_acquire_item_advanced(dict, name, name_len, master_item); + + if(likely(item)) { + void *v = item->shared->value; + item_release(dict, item); + return v; + } + + return NULL; +} + +// ---------------------------------------------------------------------------- +// GET an item from the dictionary + +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_get_and_acquire_item_advanced(DICTIONARY *dict, const char *name, ssize_t name_len) { + if(unlikely(!api_is_name_good(dict, name, name_len))) + return NULL; + + api_internal_check(dict, NULL, false, true); + DICTIONARY_ITEM *item = dict_item_find_and_acquire(dict, name, name_len); + api_internal_check(dict, item, false, true); + return item; +} + +void *dictionary_get_advanced(DICTIONARY *dict, const char *name, ssize_t name_len) { + DICTIONARY_ITEM *item = dictionary_get_and_acquire_item_advanced(dict, name, name_len); + + if(likely(item)) { + void *v = item->shared->value; + item_release(dict, item); + return v; + } + + return NULL; +} + +// ---------------------------------------------------------------------------- +// DUP/REL an item (increase/decrease its reference counter) + +DICT_ITEM_CONST DICTIONARY_ITEM *dictionary_acquired_item_dup(DICTIONARY *dict, DICT_ITEM_CONST DICTIONARY_ITEM *item) { + // we allow the item to be NULL here + api_internal_check(dict, item, false, true); + + if(likely(item)) { + item_acquire(dict, item); + api_internal_check(dict, item, false, false); + } + + return item; +} + +void dictionary_acquired_item_release(DICTIONARY *dict, DICT_ITEM_CONST DICTIONARY_ITEM *item) { + // we allow the item to be NULL here + api_internal_check(dict, item, false, true); + + // no need to get a lock here + // we pass the last parameter to reference_counter_release() as true + // so that the release may get a write-lock if required to clean up + + if(likely(item)) + item_release(dict, item); +} + +// ---------------------------------------------------------------------------- +// get the name/value of an item + +const char *dictionary_acquired_item_name(DICT_ITEM_CONST DICTIONARY_ITEM *item) { + return item_get_name(item); +} + +void *dictionary_acquired_item_value(DICT_ITEM_CONST DICTIONARY_ITEM *item) { + if(likely(item)) + return item->shared->value; + + return NULL; +} + +size_t dictionary_acquired_item_references(DICT_ITEM_CONST DICTIONARY_ITEM *item) { + if(likely(item)) + return DICTIONARY_ITEM_REFCOUNT_GET_SOLE(item); + + return 0; +} + +// ---------------------------------------------------------------------------- +// DEL an item + +bool dictionary_del_advanced(DICTIONARY *dict, const char *name, ssize_t name_len) { + if(unlikely(!api_is_name_good(dict, name, name_len))) + return false; + + api_internal_check(dict, NULL, false, true); + + if(unlikely(is_dictionary_destroyed(dict))) { + internal_error(true, "DICTIONARY: attempted to delete item on a destroyed dictionary"); + return false; + } + + return dict_item_del(dict, name, name_len); +} diff --git a/libnetdata/dictionary/dictionary.h b/src/libnetdata/dictionary/dictionary.h index 391be4ee5..231fbfebd 100644 --- a/libnetdata/dictionary/dictionary.h +++ b/src/libnetdata/dictionary/dictionary.h @@ -58,6 +58,8 @@ typedef enum __attribute__((packed)) dictionary_options { DICT_OPTION_DONT_OVERWRITE_VALUE = (1 << 3), // don't overwrite values of dictionary items (default: overwrite) DICT_OPTION_ADD_IN_FRONT = (1 << 4), // add dictionary items at the front of the linked list (default: at the end) DICT_OPTION_FIXED_SIZE = (1 << 5), // the items of the dictionary have a fixed size + DICT_OPTION_INDEX_JUDY = (1 << 6), // the default, if no other indexing is set + DICT_OPTION_INDEX_HASHTABLE = (1 << 7), // use SIMPLE_HASHTABLE for indexing } DICT_OPTIONS; struct dictionary_stats { @@ -130,22 +132,26 @@ DICTIONARY *dictionary_create_view(DICTIONARY *master); // an insert callback to be called just after an item is added to the dictionary // this callback is called while the dictionary is write locked! -void dictionary_register_insert_callback(DICTIONARY *dict, void (*ins_callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data); +typedef void (*dict_cb_insert_t)(const DICTIONARY_ITEM *item, void *value, void *data); +void dictionary_register_insert_callback(DICTIONARY *dict, dict_cb_insert_t insert_callback, void *data); // a delete callback to be called just before an item is deleted forever // this callback is called while the dictionary is write locked! -void dictionary_register_delete_callback(DICTIONARY *dict, void (*del_callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data); +typedef void (*dict_cb_delete_t)(const DICTIONARY_ITEM *item, void *value, void *data); +void dictionary_register_delete_callback(DICTIONARY *dict, dict_cb_delete_t delete_callback, void *data); // a merge callback to be called when DICT_OPTION_DONT_OVERWRITE_VALUE // and an item is already found in the dictionary - the dictionary does nothing else in this case // the old_value will remain in the dictionary - the new_value is ignored // The callback should return true if the value has been updated (it increases the dictionary version). -void dictionary_register_conflict_callback(DICTIONARY *dict, bool (*conflict_callback)(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data), void *data); +typedef bool (*dict_cb_conflict_t)(const DICTIONARY_ITEM *item, void *old_value, void *new_value, void *data); +void dictionary_register_conflict_callback(DICTIONARY *dict, dict_cb_conflict_t conflict_callback, void *data); // a reaction callback to be called after every item insertion or conflict // after the constructors have finished and the items are fully available for use // and the dictionary is not write locked anymore -void dictionary_register_react_callback(DICTIONARY *dict, void (*react_callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data); +typedef void (*dict_cb_react_t)(const DICTIONARY_ITEM *item, void *value, void *data); +void dictionary_register_react_callback(DICTIONARY *dict, dict_cb_react_t react_callback, void *data); // Destroy a dictionary // Returns the number of bytes freed @@ -236,15 +242,17 @@ size_t dictionary_acquired_item_references(DICT_ITEM_CONST DICTIONARY_ITEM *item // You cannot alter the dictionary from inside a dictionary_walkthrough_read() - deadlock! // You can only delete the current item from inside a dictionary_walkthrough_write() - you can add as many as you want. // +typedef int (*dict_walkthrough_callback_t)(const DICTIONARY_ITEM *item, void *value, void *data); + #define dictionary_walkthrough_read(dict, callback, data) dictionary_walkthrough_rw(dict, 'r', callback, data) #define dictionary_walkthrough_write(dict, callback, data) dictionary_walkthrough_rw(dict, 'w', callback, data) -int dictionary_walkthrough_rw(DICTIONARY *dict, char rw, int (*callback)(const DICTIONARY_ITEM *item, void *value, void *data), void *data); +int dictionary_walkthrough_rw(DICTIONARY *dict, char rw, dict_walkthrough_callback_t walkthrough_callback, void *data); -typedef int (*dictionary_sorted_compar)(const DICTIONARY_ITEM **item1, const DICTIONARY_ITEM **item2); +typedef int (*dict_item_comparator_t)(const DICTIONARY_ITEM **item1, const DICTIONARY_ITEM **item2); #define dictionary_sorted_walkthrough_read(dict, callback, data) dictionary_sorted_walkthrough_rw(dict, 'r', callback, data, NULL) #define dictionary_sorted_walkthrough_write(dict, callback, data) dictionary_sorted_walkthrough_rw(dict, 'w', callback, data, NULL) -int dictionary_sorted_walkthrough_rw(DICTIONARY *dict, char rw, int (*callback)(const DICTIONARY_ITEM *item, void *entry, void *data), void *data, dictionary_sorted_compar compar); +int dictionary_sorted_walkthrough_rw(DICTIONARY *dict, char rw, dict_walkthrough_callback_t walkthrough_callback, void *data, dict_item_comparator_t item_comparator_callback); // ---------------------------------------------------------------------------- // Traverse with foreach @@ -294,7 +302,7 @@ typedef DICTFE_CONST struct dictionary_foreach { DICTFE value ## _dfe = {}; \ (void)(value); /* needed to avoid warning when looping without using this */ \ for((value) = dictionary_foreach_start_rw(&value ## _dfe, (dict), (mode)); \ - (value ## _dfe.item) ; \ + (value ## _dfe.item) || (value) ; \ (value) = dictionary_foreach_next(&value ## _dfe)) \ { @@ -303,7 +311,7 @@ typedef DICTFE_CONST struct dictionary_foreach { dictionary_foreach_done(&value ## _dfe); \ } while(0) -#define dfe_unlock(value) dictionary_foreach_unlock(&value ## _dfe); +#define dfe_unlock(value) dictionary_foreach_unlock(&value ## _dfe) void *dictionary_foreach_start_rw(DICTFE *dfe, DICTIONARY *dict, char rw); void *dictionary_foreach_next(DICTFE *dfe); @@ -322,14 +330,4 @@ extern struct dictionary_stats dictionary_stats_category_other; int dictionary_unittest(size_t entries); -// ---------------------------------------------------------------------------- -// THREAD CACHE - -void *thread_cache_entry_get_or_set(void *key, - ssize_t key_length, - void *value, - void *(*transform_the_value_before_insert)(void *key, size_t key_length, void *value)); - -void thread_cache_destroy(void); - #endif /* NETDATA_DICTIONARY_H */ diff --git a/src/libnetdata/dictionary/thread-cache.c b/src/libnetdata/dictionary/thread-cache.c new file mode 100644 index 000000000..9dc3de817 --- /dev/null +++ b/src/libnetdata/dictionary/thread-cache.c @@ -0,0 +1,47 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "thread-cache.h" + +static __thread Pvoid_t thread_cache_judy_array = NULL; + +void *thread_cache_entry_get_or_set(void *key, + ssize_t key_length, + void *value, + void *(*transform_the_value_before_insert)(void *key, size_t key_length, void *value) +) { + if(unlikely(!key || !key_length)) return NULL; + + if(key_length == -1) + key_length = (ssize_t)strlen((char *)key); + + JError_t J_Error; + Pvoid_t *Rc = JudyHSIns(&thread_cache_judy_array, key, key_length, &J_Error); + if (unlikely(Rc == PJERR)) { + fatal("THREAD_CACHE: Cannot insert entry to JudyHS, JU_ERRNO_* == %u, ID == %d", + JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + } + + if(*Rc == 0) { + // new item added + + *Rc = (transform_the_value_before_insert) ? transform_the_value_before_insert(key, key_length, value) : value; + } + + return *Rc; +} + +void thread_cache_destroy(void) { + if(unlikely(!thread_cache_judy_array)) return; + + JError_t J_Error; + Word_t ret = JudyHSFreeArray(&thread_cache_judy_array, &J_Error); + if(unlikely(ret == (Word_t) JERR)) { + netdata_log_error("THREAD_CACHE: Cannot destroy JudyHS, JU_ERRNO_* == %u, ID == %d", + JU_ERRNO(&J_Error), JU_ERRID(&J_Error)); + } + + internal_error(true, "THREAD_CACHE: hash table freed %lu bytes", ret); + + thread_cache_judy_array = NULL; +} + diff --git a/src/libnetdata/dictionary/thread-cache.h b/src/libnetdata/dictionary/thread-cache.h new file mode 100644 index 000000000..4495ad7d3 --- /dev/null +++ b/src/libnetdata/dictionary/thread-cache.h @@ -0,0 +1,15 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_THREAD_CACHE_H +#define NETDATA_THREAD_CACHE_H + +#include "../libnetdata.h" + +void *thread_cache_entry_get_or_set(void *key, + ssize_t key_length, + void *value, + void *(*transform_the_value_before_insert)(void *key, size_t key_length, void *value)); + +void thread_cache_destroy(void); + +#endif //NETDATA_THREAD_CACHE_H diff --git a/src/libnetdata/ebpf/README.md b/src/libnetdata/ebpf/README.md new file mode 100644 index 000000000..34b30a6a2 --- /dev/null +++ b/src/libnetdata/ebpf/README.md @@ -0,0 +1,13 @@ +<!-- +title: "eBPF" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/ebpf/README.md +sidebar_label: "eBPF" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# eBPF library + +Netdata's eBPF library supports the [eBPF collector](https://github.com/netdata/netdata/blob/master/src/collectors/ebpf.plugin/README.md). + diff --git a/src/libnetdata/ebpf/ebpf.c b/src/libnetdata/ebpf/ebpf.c new file mode 100644 index 000000000..087454dba --- /dev/null +++ b/src/libnetdata/ebpf/ebpf.c @@ -0,0 +1,1683 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <dlfcn.h> +#include <sys/utsname.h> + +#include "ebpf.h" +#include "../libnetdata.h" + +char *ebpf_user_config_dir = CONFIG_DIR; +char *ebpf_stock_config_dir = LIBCONFIG_DIR; + +/* +static int clean_kprobe_event(FILE *out, char *filename, char *father_pid, netdata_ebpf_events_t *ptr) +{ + int fd = open(filename, O_WRONLY | O_APPEND, 0); + if (fd < 0) { + if (out) { + fprintf(out, "Cannot open %s : %s\n", filename, strerror(errno)); + } + return 1; + } + + char cmd[1024]; + int length = snprintf(cmd, 1023, "-:kprobes/%c_netdata_%s_%s", ptr->type, ptr->name, father_pid); + int ret = 0; + if (length > 0) { + ssize_t written = write(fd, cmd, strlen(cmd)); + if (written < 0) { + if (out) { + fprintf( + out, "Cannot remove the event (%d, %d) '%s' from %s : %s\n", getppid(), getpid(), cmd, filename, + strerror((int)errno)); + } + ret = 1; + } + } + + close(fd); + + return ret; +} + +int clean_kprobe_events(FILE *out, int pid, netdata_ebpf_events_t *ptr) +{ + char filename[FILENAME_MAX + 1]; + snprintf(filename, FILENAME_MAX, "%s%s", NETDATA_DEBUGFS, "kprobe_events"); + + char removeme[16]; + snprintf(removeme, 15, "%d", pid); + + int i; + for (i = 0; ptr[i].name; i++) { + if (clean_kprobe_event(out, filename, removeme, &ptr[i])) { + break; + } + } + + return 0; +} +*/ + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Get Kernel version + * + * Get the current kernel from /proc and returns an integer value representing it + * + * @return it returns a value representing the kernel version. + */ +int ebpf_get_kernel_version() +{ + char major[16], minor[16], patch[16]; + char ver[VERSION_STRING_LEN]; + char *version = ver; + + int fd = open("/proc/sys/kernel/osrelease", O_RDONLY | O_CLOEXEC); + if (fd < 0) + return -1; + + ssize_t len = read(fd, ver, sizeof(ver)); + if (len < 0) { + close(fd); + return -1; + } + + close(fd); + + char *move = major; + while (*version && *version != '.') + *move++ = *version++; + *move = '\0'; + + version++; + move = minor; + while (*version && *version != '.') + *move++ = *version++; + *move = '\0'; + + if (*version) + version++; + else + return -1; + + move = patch; + while (*version && *version != '\n' && *version != '-') + *move++ = *version++; + *move = '\0'; + + // This new rule is fixing kernel version according the formula: + // KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + ((c) > 255 ? 255 : (c))) + // that was extracted from /usr/include/linux/version.h + int ipatch = (int)str2l(patch); + if (ipatch > 255) + ipatch = 255; + + return ((int)(str2l(major) * 65536) + (int)(str2l(minor) * 256) + ipatch); +} + +/** + * Get RH release + * + * Read Red Hat release from /etc/redhat-release + * + * @return It returns RH release on success and -1 otherwise + */ +int get_redhat_release() +{ + char buffer[VERSION_STRING_LEN + 1]; + int major, minor; + FILE *fp = fopen("/etc/redhat-release", "r"); + + if (fp) { + major = 0; + minor = -1; + size_t length = fread(buffer, sizeof(char), VERSION_STRING_LEN, fp); + if (length > 4) { + buffer[length] = '\0'; + char *end = strchr(buffer, '.'); + char *start; + if (end) { + *end = 0x0; + + if (end > buffer) { + start = end - 1; + + major = strtol(start, NULL, 10); + start = ++end; + + end++; + if (end) { + end = 0x00; + minor = strtol(start, NULL, 10); + } else { + minor = -1; + } + } + } + } + + fclose(fp); + return ((major * 256) + minor); + } else { + return -1; + } +} + +/** + * Check if the kernel is in a list of rejected ones + * + * @return Returns 1 if the kernel is rejected, 0 otherwise. + */ +static int kernel_is_rejected() +{ + // Get kernel version from system + char version_string[VERSION_STRING_LEN + 1]; + int version_string_len = 0; + + if (read_txt_file("/proc/version_signature", version_string, sizeof(version_string))) { + if (read_txt_file("/proc/version", version_string, sizeof(version_string))) { + struct utsname uname_buf; + if (!uname(&uname_buf)) { + netdata_log_info("Cannot check kernel version"); + return 0; + } + version_string_len = + snprintfz(version_string, VERSION_STRING_LEN, "%s %s", uname_buf.release, uname_buf.version); + } + } + + if (!version_string_len) + version_string_len = strlen(version_string); + + // Open a file with a list of rejected kernels + char *config_dir = getenv("NETDATA_USER_CONFIG_DIR"); + if (config_dir == NULL) { + config_dir = CONFIG_DIR; + } + + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/ebpf.d/%s", config_dir, EBPF_KERNEL_REJECT_LIST_FILE); + FILE *kernel_reject_list = fopen(filename, "r"); + + if (!kernel_reject_list) { + // Keep this to have compatibility with old versions + snprintfz(filename, FILENAME_MAX, "%s/%s", config_dir, EBPF_KERNEL_REJECT_LIST_FILE); + kernel_reject_list = fopen(filename, "r"); + + if (!kernel_reject_list) { + config_dir = getenv("NETDATA_STOCK_CONFIG_DIR"); + if (config_dir == NULL) { + config_dir = LIBCONFIG_DIR; + } + + snprintfz(filename, FILENAME_MAX, "%s/ebpf.d/%s", config_dir, EBPF_KERNEL_REJECT_LIST_FILE); + kernel_reject_list = fopen(filename, "r"); + + if (!kernel_reject_list) + return 0; + } + } + + // Find if the kernel is in the reject list + char *reject_string = NULL; + size_t buf_len = 0; + ssize_t reject_string_len; + while ((reject_string_len = getline(&reject_string, &buf_len, kernel_reject_list) - 1) > 0) { + if (version_string_len >= reject_string_len) { + if (!strncmp(version_string, reject_string, reject_string_len)) { + netdata_log_info("A buggy kernel is detected"); + fclose(kernel_reject_list); + freez(reject_string); + return 1; + } + } + } + + fclose(kernel_reject_list); + free(reject_string); + + return 0; +} + +/** + * Check Kernel Version + * + * Test kernel version + * + * @param version current kernel version + * + * @return It returns 1 when kernel is supported and 0 otherwise + */ +int ebpf_check_kernel_version(int version) +{ + if (kernel_is_rejected()) + return 0; + + // Kernel 4.11.0 or RH > 7.5 + return (version >= NETDATA_MINIMUM_EBPF_KERNEL || get_redhat_release() >= NETDATA_MINIMUM_RH_VERSION); +} + +/** + * Am I running as Root + * + * Verify the user that is running the collector. + * + * @return It returns 1 for root and 0 otherwise. + */ +int is_ebpf_plugin_running_as_root() +{ + uid_t uid = getuid(), euid = geteuid(); + + if (uid == 0 || euid == 0) { + return 1; + } + + return 0; +} + +/** + * Can the plugin run eBPF code + * + * This function checks kernel version and permissions. + * + * @param kver the kernel version + * @param name the plugin name. + * + * @return It returns 0 on success and -1 otherwise + */ +int ebpf_can_plugin_load_code(int kver, char *plugin_name) +{ + if (!ebpf_check_kernel_version(kver)) { + netdata_log_error("The current collector cannot run on this kernel."); + return -1; + } + + if (!is_ebpf_plugin_running_as_root()) { + netdata_log_error( + "%s should either run as root (now running with uid %u, euid %u) or have special capabilities.", + plugin_name, (unsigned int)getuid(), (unsigned int)geteuid()); + return -1; + } + + return 0; +} + +/** + * Adjust memory + * + * Adjust memory values to load eBPF programs. + * + * @return It returns 0 on success and -1 otherwise + */ +int ebpf_adjust_memory_limit() +{ + struct rlimit r = { RLIM_INFINITY, RLIM_INFINITY }; + if (setrlimit(RLIMIT_MEMLOCK, &r)) { + netdata_log_error("Setrlimit(RLIMIT_MEMLOCK)"); + return -1; + } + + return 0; +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Kernel Name + * + * Select kernel name used by eBPF programs + * + * Netdata delivers for users eBPF programs with specific suffixes that represent the kernels they were + * compiled, when we load the eBPF program, the suffix must be the nereast possible of the kernel running. + * + * @param selector select the kernel version. + * + * @return It returns the string to load kernel. + */ +static char *ebpf_select_kernel_name(uint32_t selector) +{ + static char *kernel_names[] = { NETDATA_IDX_STR_V3_10, NETDATA_IDX_STR_V4_14, NETDATA_IDX_STR_V4_16, + NETDATA_IDX_STR_V4_18, NETDATA_IDX_STR_V5_4, NETDATA_IDX_STR_V5_10, + NETDATA_IDX_STR_V5_11, NETDATA_IDX_STR_V5_14, NETDATA_IDX_STR_V5_15, + NETDATA_IDX_STR_V5_16 + }; + + return kernel_names[selector]; +} + +/** + * Select Max Index + * + * Select last index that will be tested on host. + * + * @param is_rhf is Red Hat fammily? + * @param kver the kernel version + * + * @return it returns the index to access kernel string. + */ +static int ebpf_select_max_index(int is_rhf, uint32_t kver) +{ + if (is_rhf > 0) { // Is Red Hat family + if (kver >= NETDATA_EBPF_KERNEL_5_14) + return NETDATA_IDX_V5_14; + else if (kver >= NETDATA_EBPF_KERNEL_5_4 && kver < NETDATA_EBPF_KERNEL_5_5) // For Oracle Linux + return NETDATA_IDX_V5_4; + else if (kver >= NETDATA_EBPF_KERNEL_4_11) + return NETDATA_IDX_V4_18; + } else { // Kernels from kernel.org + if (kver >= NETDATA_EBPF_KERNEL_5_16) + return NETDATA_IDX_V5_16; + else if (kver >= NETDATA_EBPF_KERNEL_5_15) + return NETDATA_IDX_V5_15; + else if (kver >= NETDATA_EBPF_KERNEL_5_11) + return NETDATA_IDX_V5_11; + else if (kver >= NETDATA_EBPF_KERNEL_5_10) + return NETDATA_IDX_V5_10; + else if (kver >= NETDATA_EBPF_KERNEL_4_17) + return NETDATA_IDX_V5_4; + else if (kver >= NETDATA_EBPF_KERNEL_4_15) + return NETDATA_IDX_V4_16; + else if (kver >= NETDATA_EBPF_KERNEL_4_11) + return NETDATA_IDX_V4_14; + } + + return NETDATA_IDX_V3_10; +} + +/** + * Select Index + * + * Select index to load data. + * + * @param kernels is the variable with kernel versions. + * @param is_rhf is Red Hat fammily? + * param kver the kernel version + */ +static uint32_t ebpf_select_index(uint32_t kernels, int is_rhf, uint32_t kver) +{ + uint32_t start = ebpf_select_max_index(is_rhf, kver); + uint32_t idx; + + if (is_rhf == -1) + kernels &= ~NETDATA_V5_14; + + for (idx = start; idx; idx--) { + if (kernels & 1 << idx) + break; + } + + return idx; +} + +/** + * Mount Name + * + * Mount name of eBPF program to be loaded. + * + * Netdata eBPF programs has the following format: + * + * Tnetdata_ebpf_N.V.o + * + * where: + * T - Is the eBPF type. When starts with 'p', this means we are only adding probes, + * and when they start with 'r' we are using retprobes. + * N - The eBPF program name. + * V - The kernel version in string format. + * + * @param out the vector where the name will be stored + * @param len the size of the out vector. + * @param path where the binaries are stored + * @param kver the kernel version + * @param name the eBPF program name. + * @param is_return is return or entry ? + */ +static void ebpf_mount_name(char *out, size_t len, char *path, uint32_t kver, const char *name, + int is_return, int is_rhf) +{ + char *version = ebpf_select_kernel_name(kver); + snprintfz(out, len, "%s/ebpf.d/%cnetdata_ebpf_%s.%s%s.o", + path, + (is_return) ? 'r' : 'p', + name, + version, + (is_rhf != -1) ? ".rhf" : ""); +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Statistics from targets + * + * Count the information from targets. + * + * @param report the output structure + * @param targets vector with information about the eBPF plugin. + * @param value factor used to update calculation + */ +static void ebpf_stats_targets(ebpf_plugin_stats_t *report, netdata_ebpf_targets_t *targets, int value) +{ + if (!targets) { + report->probes = report->tracepoints = report->trampolines = 0; + return; + } + + int i = 0; + while (targets[i].name) { + switch (targets[i].mode) { + case EBPF_LOAD_PROBE: { + report->probes += value; + break; + } + case EBPF_LOAD_RETPROBE: { + report->retprobes += value; + break; + } + case EBPF_LOAD_TRACEPOINT: { + report->tracepoints += value; + break; + } + case EBPF_LOAD_TRAMPOLINE: { + report->trampolines += value; + break; + } + } + + i++; + } +} + +/** + * Update General stats + * + * Update eBPF plugin statistics that has relationship with the thread. + * + * This function must be called with mutex associated to charts is locked. + * + * @param report the output structure + * @param em the structure with information about how the module/thread is working. + */ +void ebpf_update_stats(ebpf_plugin_stats_t *report, ebpf_module_t *em) +{ + int value; + + // It is not necessary to report more information. + if (em->enabled > NETDATA_THREAD_EBPF_FUNCTION_RUNNING) + value = -1; + else + value = 1; + + report->threads += value; + report->running += value; + + // In theory the `else if` is useless, because when this function is called, the module should not stay in + // EBPF_LOAD_PLAY_DICE. We have this additional condition to detect errors from developers. + if (em->load & EBPF_LOAD_LEGACY) + report->legacy += value; + else if (em->load & EBPF_LOAD_CORE) + report->core += value; + + if (em->maps_per_core) + report->hash_percpu += value; + else + report->hash_unique += value; + + ebpf_stats_targets(report, em->targets, value); +} + +/** + * Update Kernel memory with memory + * + * This algorithm is an adaptation of https://elixir.bootlin.com/linux/v6.1.14/source/tools/bpf/bpftool/common.c#L402 + * to get 'memlock' data and update report. + * + * @param report the output structure + * @param map pointer to a map. + * @param action What action will be done with this map. + */ +void ebpf_update_kernel_memory(ebpf_plugin_stats_t *report, ebpf_local_maps_t *map, ebpf_stats_action_t action) +{ + char filename[FILENAME_MAX+1]; + snprintfz(filename, FILENAME_MAX, "/proc/self/fdinfo/%d", map->map_fd); + procfile *ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + netdata_log_error("Cannot open %s", filename); + return; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return; + + unsigned long j, lines = procfile_lines(ff); + char *memlock = { "memlock" }; + for (j = 0; j < lines ; j++) { + char *cmp = procfile_lineword(ff, j,0); + if (!strncmp(memlock, cmp, 7)) { + uint64_t memsize = (uint64_t) str2l(procfile_lineword(ff, j,1)); + switch (action) { + case EBPF_ACTION_STAT_ADD: { + report->memlock_kern += memsize; + report->hash_tables += 1; +#ifdef NETDATA_DEV_MODE + netdata_log_info("Hash table %u: %s (FD = %d) is consuming %lu bytes totalizing %lu bytes", + report->hash_tables, map->name, map->map_fd, memsize, report->memlock_kern); +#endif + break; + } + case EBPF_ACTION_STAT_REMOVE: { + report->memlock_kern -= memsize; + report->hash_tables -= 1; +#ifdef NETDATA_DEV_MODE + netdata_log_info("Hash table %s (FD = %d) was removed releasing %lu bytes, now we have %u tables loaded totalizing %lu bytes.", + map->name, map->map_fd, memsize, report->hash_tables, report->memlock_kern); +#endif + break; + } + default: { + break; + } + } + break; + } + } + + procfile_close(ff); +} + +/** + * Update Kernel memory with memory + * + * This algorithm is an adaptation of https://elixir.bootlin.com/linux/v6.1.14/source/tools/bpf/bpftool/common.c#L402 + * to get 'memlock' data and update report. + * + * @param report the output structure + * @param map pointer to a map. Last map must fish with name = NULL + * @param action should plugin add or remove values from amount. + */ +void ebpf_update_kernel_memory_with_vector(ebpf_plugin_stats_t *report, + ebpf_local_maps_t *maps, + ebpf_stats_action_t action) +{ + if (!maps) + return; + + ebpf_local_maps_t *map; + int i = 0; + for (map = &maps[i]; maps[i].name; i++, map = &maps[i]) { + int fd = map->map_fd; + if (fd == ND_EBPF_MAP_FD_NOT_INITIALIZED) + continue; + + ebpf_update_kernel_memory(report, map, action); + } +} + +//---------------------------------------------------------------------------------------------------------------------- + +void ebpf_update_pid_table(ebpf_local_maps_t *pid, ebpf_module_t *em) +{ + pid->user_input = em->pid_map_size; +} + +/** + * Update map size + * + * Update map size with information read from configuration files. + * + * @param map the structure with file descriptor to update. + * @param lmap the structure with information from configuration files. + * @param em the structure with information about how the module/thread is working. + * @param map_name the name of the file used to log. + */ +void ebpf_update_map_size(struct bpf_map *map, ebpf_local_maps_t *lmap, ebpf_module_t *em, const char *map_name __maybe_unused) +{ + uint32_t define_size = 0; + uint32_t apps_type = NETDATA_EBPF_MAP_PID | NETDATA_EBPF_MAP_RESIZABLE; + if (lmap->user_input && lmap->user_input != lmap->internal_input) { + define_size = lmap->internal_input; +#ifdef NETDATA_INTERNAL_CHECKS + netdata_log_info("Changing map %s from size %u to %u ", map_name, lmap->internal_input, lmap->user_input); +#endif + } else if (((lmap->type & apps_type) == apps_type) && (!em->apps_charts) && (!em->cgroup_charts)) { + lmap->user_input = ND_EBPF_DEFAULT_MIN_PID; + } else if (((em->apps_charts) || (em->cgroup_charts)) && (em->apps_level != NETDATA_APPS_NOT_SET)) { + switch (em->apps_level) { + case NETDATA_APPS_LEVEL_ALL: { + define_size = lmap->user_input; + break; + } + case NETDATA_APPS_LEVEL_PARENT: { + define_size = ND_EBPF_DEFAULT_PID_SIZE / 2; + break; + } + case NETDATA_APPS_LEVEL_REAL_PARENT: + default: { + define_size = ND_EBPF_DEFAULT_PID_SIZE / 3; + } + } + } + + if (!define_size) + return; + +#ifdef LIBBPF_MAJOR_VERSION + bpf_map__set_max_entries(map, define_size); +#else + bpf_map__resize(map, define_size); +#endif +} + +#ifdef LIBBPF_MAJOR_VERSION +/** + * Update map type + * + * Update map type with information given. + * + * @param map the map we want to modify + * @param w a structure with user input + */ +void ebpf_update_map_type(struct bpf_map *map, ebpf_local_maps_t *w) +{ + if (bpf_map__set_type(map, w->map_type)) { + netdata_log_error("Cannot modify map type for %s", w->name); + } +} + +/** + * Define map type + * + * This PR defines the type used by hash tables according user input. + * + * @param maps the list of maps used with a hash table. + * @param maps_per_core define if map type according user specification. + * @param kver kernel version host is running. + */ +void ebpf_define_map_type(ebpf_local_maps_t *maps, int maps_per_core, int kver) +{ + if (!maps) + return; + + // Before kernel 4.06 there was not percpu hash tables + if (kver < NETDATA_EBPF_KERNEL_4_06) + maps_per_core = CONFIG_BOOLEAN_NO; + + int i = 0; + while (maps[i].name) { + ebpf_local_maps_t *map = &maps[i]; + // maps_per_core is a boolean value in configuration files. + if (maps_per_core) { + if (map->map_type == BPF_MAP_TYPE_HASH) + map->map_type = BPF_MAP_TYPE_PERCPU_HASH; + else if (map->map_type == BPF_MAP_TYPE_ARRAY) + map->map_type = BPF_MAP_TYPE_PERCPU_ARRAY; + } else { + if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH) + map->map_type = BPF_MAP_TYPE_HASH; + else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) + map->map_type = BPF_MAP_TYPE_ARRAY; + } + + i++; + } +} +#endif + +/** + * Update Legacy map + * + * Update map for eBPF legacy code. + * + * @param program the structure with values read from binary. + * @param em the structure with information about how the module/thread is working. + */ +static void ebpf_update_legacy_map(struct bpf_object *program, ebpf_module_t *em) +{ + struct bpf_map *map; + ebpf_local_maps_t *maps = em->maps; + if (!maps) + return; + + bpf_map__for_each(map, program) + { + const char *map_name = bpf_map__name(map); + int i = 0; + while (maps[i].name) { + ebpf_local_maps_t *w = &maps[i]; + + if (!strcmp(w->name, map_name)) { + // Modify size + if (w->type & NETDATA_EBPF_MAP_RESIZABLE) { + ebpf_update_map_size(map, w, em, map_name); + } + +#ifdef LIBBPF_MAJOR_VERSION + ebpf_update_map_type(map, w); +#endif + } + + i++; + } + } +} + +size_t ebpf_count_programs(struct bpf_object *obj) +{ + size_t tot = 0; + struct bpf_program *prog; + bpf_object__for_each_program(prog, obj) + { + tot++; + } + + return tot; +} + +static ebpf_specify_name_t *ebpf_find_names(ebpf_specify_name_t *names, const char *prog_name) +{ + size_t i = 0; + while (names[i].program_name) { + if (!strcmp(prog_name, names[i].program_name)) + return &names[i]; + + i++; + } + + return NULL; +} + +static struct bpf_link **ebpf_attach_programs(struct bpf_object *obj, size_t length, ebpf_specify_name_t *names) +{ + struct bpf_link **links = callocz(length , sizeof(struct bpf_link *)); + size_t i = 0; + struct bpf_program *prog; + ebpf_specify_name_t *w; + bpf_object__for_each_program(prog, obj) + { + if (names) { + const char *name = bpf_program__name(prog); + w = ebpf_find_names(names, name); + } else + w = NULL; + + if (w) { + enum bpf_prog_type type = bpf_program__get_type(prog); + if (type == BPF_PROG_TYPE_KPROBE) + links[i] = bpf_program__attach_kprobe(prog, w->retprobe, w->optional); + } else + links[i] = bpf_program__attach(prog); + + if (libbpf_get_error(links[i])) { + links[i] = NULL; + } + + i++; + } + + return links; +} + +static void ebpf_update_maps(ebpf_module_t *em, struct bpf_object *obj) +{ + if (!em->maps) + return; + + ebpf_local_maps_t *maps = em->maps; + struct bpf_map *map; + bpf_map__for_each(map, obj) + { + int fd = bpf_map__fd(map); + if (maps) { + const char *map_name = bpf_map__name(map); + int j = 0; + while (maps[j].name) { + ebpf_local_maps_t *w = &maps[j]; + if (w->map_fd == ND_EBPF_MAP_FD_NOT_INITIALIZED && !strcmp(map_name, w->name)) + w->map_fd = fd; + + j++; + } + } + } +} + +/** + * Update Controller + * + * Update controller value with user input. + * + * @param fd the table file descriptor + * @param em structure with information about eBPF program we will load. + */ +void ebpf_update_controller(int fd, ebpf_module_t *em) +{ + uint32_t values[NETDATA_CONTROLLER_END] = { + (em->apps_charts & NETDATA_EBPF_APPS_FLAG_YES) | em->cgroup_charts, + em->apps_level, 0, 0, 0, 0 + }; + uint32_t key; + uint32_t end = NETDATA_CONTROLLER_PID_TABLE_ADD; + + for (key = NETDATA_CONTROLLER_APPS_ENABLED; key < end; key++) { + int ret = bpf_map_update_elem(fd, &key, &values[key], BPF_ANY); + if (ret) + netdata_log_error("Add key(%u) for controller table failed.", key); + } +} + +/** + * Update Legacy controller + * + * Update legacy controller table when eBPF program has it. + * + * @param em structure with information about eBPF program we will load. + * @param obj bpf object with tables. + */ +static void ebpf_update_legacy_controller(ebpf_module_t *em, struct bpf_object *obj) +{ + ebpf_local_maps_t *maps = em->maps; + if (!maps) + return; + + struct bpf_map *map; + bpf_map__for_each(map, obj) + { + size_t i = 0; + while (maps[i].name) { + ebpf_local_maps_t *w = &maps[i]; + if (w->map_fd != ND_EBPF_MAP_FD_NOT_INITIALIZED && (w->type & NETDATA_EBPF_MAP_CONTROLLER)) { + w->type &= ~NETDATA_EBPF_MAP_CONTROLLER; + w->type |= NETDATA_EBPF_MAP_CONTROLLER_UPDATED; + + ebpf_update_controller(w->map_fd, em); + } + i++; + } + } +} + +/** + * Load Program + * + * Load eBPF program into kernel + * + * @param plugins_dir directory where binary are stored + * @param em structure with information about eBPF program we will load. + * @param kver the kernel version according /usr/include/linux/version.h + * @param is_rhf is a kernel from Red Hat Family? + * @param obj structure where we will store object loaded. + * + * @return it returns a link for each target we associated an eBPF program. + */ +struct bpf_link **ebpf_load_program(char *plugins_dir, ebpf_module_t *em, int kver, int is_rhf, + struct bpf_object **obj) +{ + char lpath[4096]; + + uint32_t idx = ebpf_select_index(em->kernels, is_rhf, kver); + + ebpf_mount_name(lpath, 4095, plugins_dir, idx, em->info.thread_name, em->mode, is_rhf); + + // When this function is called ebpf.plugin is using legacy code, so we should reset the variable + em->load &= ~ NETDATA_EBPF_LOAD_METHODS; + em->load |= EBPF_LOAD_LEGACY; + + *obj = bpf_object__open_file(lpath, NULL); + if (!*obj) + return NULL; + + if (libbpf_get_error(obj)) { + bpf_object__close(*obj); + return NULL; + } + + ebpf_update_legacy_map(*obj, em); + + if (bpf_object__load(*obj)) { + netdata_log_error("ERROR: loading BPF object file failed %s\n", lpath); + bpf_object__close(*obj); + return NULL; + } + + ebpf_update_maps(em, *obj); + ebpf_update_legacy_controller(em, *obj); + + size_t count_programs = ebpf_count_programs(*obj); + +#ifdef NETDATA_INTERNAL_CHECKS + netdata_log_info("eBPF program %s loaded with success!", lpath); +#endif + + return ebpf_attach_programs(*obj, count_programs, em->names); +} + +char *ebpf_find_symbol(char *search) +{ + char filename[FILENAME_MAX + 1]; + char *ret = NULL; + snprintfz(filename, FILENAME_MAX, "%s%s", netdata_configured_host_prefix, NETDATA_KALLSYMS); + procfile *ff = procfile_open(filename, " \t", PROCFILE_FLAG_DEFAULT); + if(unlikely(!ff)) { + netdata_log_error("Cannot open %s%s", netdata_configured_host_prefix, NETDATA_KALLSYMS); + return ret; + } + + ff = procfile_readall(ff); + if(unlikely(!ff)) + return ret; + + unsigned long i, lines = procfile_lines(ff); + size_t length = strlen(search); + for(i = 0; i < lines ; i++) { + char *cmp = procfile_lineword(ff, i,2); + if (!strncmp(search, cmp, length)) { + ret = strdupz(cmp); + break; + } + } + + procfile_close(ff); + + return ret; +} + +void ebpf_update_names(ebpf_specify_name_t *opt, ebpf_module_t *em) +{ + int mode = em->mode; + em->names = opt; + + size_t i = 0; + while (opt[i].program_name) { + opt[i].retprobe = (mode == MODE_RETURN); + opt[i].optional = ebpf_find_symbol(opt[i].function_to_attach); + + i++; + } +} + +//---------------------------------------------------------------------------------------------------------------------- + +void ebpf_mount_config_name(char *filename, size_t length, char *path, const char *config) +{ + snprintf(filename, length, "%s/ebpf.d/%s", path, config); +} + +int ebpf_load_config(struct config *config, char *filename) +{ + return appconfig_load(config, filename, 0, NULL); +} + + +static netdata_run_mode_t ebpf_select_mode(char *mode) +{ + if (!strcasecmp(mode,EBPF_CFG_LOAD_MODE_RETURN )) + return MODE_RETURN; + else if (!strcasecmp(mode, "dev")) + return MODE_DEVMODE; + + return MODE_ENTRY; +} + +static void ebpf_select_mode_string(char *output, size_t len, netdata_run_mode_t sel) +{ + if (sel == MODE_RETURN) + strncpyz(output, EBPF_CFG_LOAD_MODE_RETURN, len); + else + strncpyz(output, EBPF_CFG_LOAD_MODE_DEFAULT, len); +} + +/** + * Convert string to load mode + * + * Convert the string given as argument to value present in enum. + * + * @param str value read from configuration file. + * + * @return It returns the value to be used. + */ +netdata_ebpf_load_mode_t epbf_convert_string_to_load_mode(char *str) +{ + if (!strcasecmp(str, EBPF_CFG_CORE_PROGRAM)) + return EBPF_LOAD_CORE; + else if (!strcasecmp(str, EBPF_CFG_LEGACY_PROGRAM)) + return EBPF_LOAD_LEGACY; + + return EBPF_LOAD_PLAY_DICE; +} + +/** + * Convert load mode to string + * + * @param mode value that will select the string + * + * @return It returns the string associated to mode. + */ +static char *ebpf_convert_load_mode_to_string(netdata_ebpf_load_mode_t mode) +{ + if (mode & EBPF_LOAD_CORE) + return EBPF_CFG_CORE_PROGRAM; + else if (mode & EBPF_LOAD_LEGACY) + return EBPF_CFG_LEGACY_PROGRAM; + + return EBPF_CFG_DEFAULT_PROGRAM; +} + +/** + * Convert collect pid to string + * + * @param level value that will select the string + * + * @return It returns the string associated to level. + */ +static char *ebpf_convert_collect_pid_to_string(netdata_apps_level_t level) +{ + if (level == NETDATA_APPS_LEVEL_REAL_PARENT) + return EBPF_CFG_PID_REAL_PARENT; + else if (level == NETDATA_APPS_LEVEL_PARENT) + return EBPF_CFG_PID_PARENT; + else if (level == NETDATA_APPS_LEVEL_ALL) + return EBPF_CFG_PID_ALL; + + return EBPF_CFG_PID_INTERNAL_USAGE; +} + +/** + * Convert string to apps level + * + * @param str the argument read from config files + * + * @return it returns the level associated to the string or default when it is a wrong value + */ +netdata_apps_level_t ebpf_convert_string_to_apps_level(char *str) +{ + if (!strcasecmp(str, EBPF_CFG_PID_REAL_PARENT)) + return NETDATA_APPS_LEVEL_REAL_PARENT; + else if (!strcasecmp(str, EBPF_CFG_PID_PARENT)) + return NETDATA_APPS_LEVEL_PARENT; + else if (!strcasecmp(str, EBPF_CFG_PID_ALL)) + return NETDATA_APPS_LEVEL_ALL; + + return NETDATA_APPS_NOT_SET; +} + +/** + * CO-RE type + * + * Select the preferential type of CO-RE + * + * @param str value read from configuration file. + * @param lmode load mode used by collector. + */ +netdata_ebpf_program_loaded_t ebpf_convert_core_type(char *str, netdata_run_mode_t lmode) +{ + if (!strcasecmp(str, EBPF_CFG_ATTACH_TRACEPOINT)) + return EBPF_LOAD_TRACEPOINT; + else if (!strcasecmp(str, EBPF_CFG_ATTACH_PROBE)) { + return (lmode == MODE_ENTRY) ? EBPF_LOAD_PROBE : EBPF_LOAD_RETPROBE; + } + + return EBPF_LOAD_TRAMPOLINE; +} + +#ifdef LIBBPF_MAJOR_VERSION +/** + * Adjust Thread Load + * + * Adjust thread configuration according specified load. + * + * @param mod the main structure that will be adjusted. + * @param file the btf file used with thread. + */ +void ebpf_adjust_thread_load(ebpf_module_t *mod, struct btf *file) +{ + if (!file) { + mod->load &= ~EBPF_LOAD_CORE; + mod->load |= EBPF_LOAD_LEGACY; + } else if (mod->load == EBPF_LOAD_PLAY_DICE && file) { + mod->load &= ~EBPF_LOAD_LEGACY; + mod->load |= EBPF_LOAD_CORE; + } +} + +/** + * Parse BTF file + * + * Parse a specific BTF file present on filesystem + * + * @param filename the file that will be parsed. + * + * @return It returns a pointer for the file on success and NULL otherwise. + */ +struct btf *ebpf_parse_btf_file(const char *filename) +{ + struct btf *bf = btf__parse(filename, NULL); + if (libbpf_get_error(bf)) { + fprintf(stderr, "Cannot parse btf file"); + btf__free(bf); + return NULL; + } + + return bf; +} + +/** + * Load default btf file + * + * Load the default BTF file on environment. + * + * @param path is the fullpath + * @param filename is the file inside BTF path. + */ +struct btf *ebpf_load_btf_file(char *path, char *filename) +{ + char fullpath[PATH_MAX + 1]; + snprintfz(fullpath, PATH_MAX, "%s/%s", path, filename); + struct btf *ret = ebpf_parse_btf_file(fullpath); + if (!ret) + netdata_log_info("Your environment does not have BTF file %s/%s. The plugin will work with 'legacy' code.", + path, filename); + + return ret; +} + +/** + * Find BTF attach type + * + * Search type fr current btf file. + * + * @param file is the structure for the btf file already parsed. + */ +static inline const struct btf_type *ebpf_find_btf_attach_type(struct btf *file) +{ + int id = btf__find_by_name_kind(file, "bpf_attach_type", BTF_KIND_ENUM); + if (id < 0) { + fprintf(stderr, "Cannot find 'bpf_attach_type'"); + + return NULL; + } + + return btf__type_by_id(file, id); +} + +/** + * Is function inside BTF + * + * Look for a specific function inside the given BTF file. + * + * @param file is the structure for the btf file already parsed. + * @param function is the function that we want to find. + */ +int ebpf_is_function_inside_btf(struct btf *file, char *function) +{ + const struct btf_type *type = ebpf_find_btf_attach_type(file); + if (!type) + return -1; + + const struct btf_enum *e = btf_enum(type); + int i, id; + for (id = -1, i = 0; i < btf_vlen(type); i++, e++) { + if (!strcmp(btf__name_by_offset(file, e->name_off), "BPF_TRACE_FENTRY")) { + id = btf__find_by_name_kind(file, function, BTF_KIND_FUNC); + break; + } + } + + return (id > 0) ? 1 : 0; +} +#endif + +/** + * Update target with configuration + * + * Update target load mode with value. + * + * @param em the module structure + * @param value value used to update. + */ +static void ebpf_update_target_with_conf(ebpf_module_t *em, netdata_ebpf_program_loaded_t value) +{ + netdata_ebpf_targets_t *targets = em->targets; + if (!targets) { + return; + } + + int i = 0; + while (targets[i].name) { + targets[i].mode = value; + i++; + } +} + +/** + * Select Load Mode + * + * Select the load mode according the given inputs. + * + * @param btf_file a pointer to the loaded btf file. + * @parma load current value. + * @param btf_file a pointer to the loaded btf file. + * @param is_rhf is Red Hat family? + * + * @return it returns the new load mode. + */ +static netdata_ebpf_load_mode_t ebpf_select_load_mode(struct btf *btf_file, netdata_ebpf_load_mode_t load, + int kver, int is_rh) +{ +#ifdef LIBBPF_MAJOR_VERSION + if ((load & EBPF_LOAD_CORE) || (load & EBPF_LOAD_PLAY_DICE)) { + // Quick fix for Oracle linux 8.x + load = (!btf_file || (is_rh && (kver >= NETDATA_EBPF_KERNEL_5_4 && kver < NETDATA_EBPF_KERNEL_5_5))) ? + EBPF_LOAD_LEGACY : EBPF_LOAD_CORE; + } +#else + load = EBPF_LOAD_LEGACY; +#endif + + return load; +} + +/** + * Update Module using config + * + * Update configuration for a specific thread. + * + * @param modules structure that will be updated + * @param origin specify the configuration file loaded + * @param btf_file a pointer to the loaded btf file. + * @param is_rhf is Red Hat family? + */ +void ebpf_update_module_using_config(ebpf_module_t *modules, netdata_ebpf_load_mode_t origin, struct btf *btf_file, + int kver, int is_rh) +{ + char default_value[EBPF_MAX_MODE_LENGTH + 1]; + ebpf_select_mode_string(default_value, EBPF_MAX_MODE_LENGTH, modules->mode); + char *load_mode = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_LOAD_MODE, default_value); + modules->mode = ebpf_select_mode(load_mode); + + modules->update_every = (int)appconfig_get_number(modules->cfg, EBPF_GLOBAL_SECTION, + EBPF_CFG_UPDATE_EVERY, modules->update_every); + + modules->apps_charts = appconfig_get_boolean(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_APPLICATION, + (int) (modules->apps_charts & NETDATA_EBPF_APPS_FLAG_YES)); + + modules->cgroup_charts = appconfig_get_boolean(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_CGROUP, + modules->cgroup_charts); + + modules->pid_map_size = (uint32_t)appconfig_get_number(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_PID_SIZE, + modules->pid_map_size); + + modules->lifetime = (uint32_t) appconfig_get_number(modules->cfg, EBPF_GLOBAL_SECTION, + EBPF_CFG_LIFETIME, EBPF_DEFAULT_LIFETIME); + + char *value = ebpf_convert_load_mode_to_string(modules->load & NETDATA_EBPF_LOAD_METHODS); + char *type_format = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_TYPE_FORMAT, value); + netdata_ebpf_load_mode_t load = epbf_convert_string_to_load_mode(type_format); + load = ebpf_select_load_mode(btf_file, load, kver, is_rh); + modules->load = origin | load; + + char *core_attach = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_CORE_ATTACH, EBPF_CFG_ATTACH_TRAMPOLINE); + netdata_ebpf_program_loaded_t fill_lm = ebpf_convert_core_type(core_attach, modules->mode); + ebpf_update_target_with_conf(modules, fill_lm); + + value = ebpf_convert_collect_pid_to_string(modules->apps_level); + char *collect_pid = appconfig_get(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_COLLECT_PID, value); + modules->apps_level = ebpf_convert_string_to_apps_level(collect_pid); + + modules->maps_per_core = appconfig_get_boolean(modules->cfg, EBPF_GLOBAL_SECTION, EBPF_CFG_MAPS_PER_CORE, + modules->maps_per_core); + if (kver < NETDATA_EBPF_KERNEL_4_06) + modules->maps_per_core = CONFIG_BOOLEAN_NO; + +#ifdef NETDATA_DEV_MODE + netdata_log_info("The thread %s was configured with: mode = %s; update every = %d; apps = %s; cgroup = %s; ebpf type format = %s; ebpf co-re tracing = %s; collect pid = %s; maps per core = %s, lifetime=%u", + modules->info.thread_name, + load_mode, + modules->update_every, + (modules->apps_charts)?"enabled":"disabled", + (modules->cgroup_charts)?"enabled":"disabled", + type_format, + core_attach, + collect_pid, + (modules->maps_per_core)?"enabled":"disabled", + modules->lifetime + ); +#endif +} + +/** + * Update module + * + * When this function is called, it will load the configuration file and after this + * it updates the global information of ebpf_module. + * If the module has specific configuration, this function will load it, but it will not + * update the variables. + * + * @param em the module structure + * @param btf_file a pointer to the loaded btf file. + * @param is_rhf is Red Hat family? + * @param kver the kernel version + */ +void ebpf_update_module(ebpf_module_t *em, struct btf *btf_file, int kver, int is_rh) +{ + char filename[FILENAME_MAX+1]; + netdata_ebpf_load_mode_t origin; + + ebpf_mount_config_name(filename, FILENAME_MAX, ebpf_user_config_dir, em->config_file); + if (!ebpf_load_config(em->cfg, filename)) { + ebpf_mount_config_name(filename, FILENAME_MAX, ebpf_stock_config_dir, em->config_file); + if (!ebpf_load_config(em->cfg, filename)) { + netdata_log_error("Cannot load the ebpf configuration file %s", em->config_file); + return; + } + // If user defined data globally, we will have here EBPF_LOADED_FROM_USER, we need to consider this, to avoid + // forcing users to configure thread by thread. + origin = (!(em->load & NETDATA_EBPF_LOAD_SOURCE)) ? EBPF_LOADED_FROM_STOCK : em->load & NETDATA_EBPF_LOAD_SOURCE; + } else + origin = EBPF_LOADED_FROM_USER; + + ebpf_update_module_using_config(em, origin, btf_file, kver, is_rh); +} + +/** + * Adjust Apps Cgroup + * + * Apps and cgroup has internal cleanup that needs attaching tracers to release_task, to avoid overload the function + * we will enable this integration by default, if and only if, we are running with trampolines. + * + * @param em a pointer to the main thread structure. + * @param mode is the mode used with different + */ +void ebpf_adjust_apps_cgroup(ebpf_module_t *em, netdata_ebpf_program_loaded_t mode) +{ + if ((em->load & EBPF_LOADED_FROM_STOCK) && + (em->apps_charts || em->cgroup_charts) && + mode != EBPF_LOAD_TRAMPOLINE) { + em->apps_charts = NETDATA_EBPF_APPS_FLAG_NO; + em->cgroup_charts = 0; + } +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Load Address + * + * Helper used to get address from /proc/kallsym + * + * @param fa address structure + * @param fd file descriptor loaded inside kernel. If a negative value is given + * the function will load address and it won't update hash table. + */ +void ebpf_load_addresses(ebpf_addresses_t *fa, int fd) +{ + if (fa->addr) + return ; + + procfile *ff = procfile_open("/proc/kallsyms", " \t:", PROCFILE_FLAG_DEFAULT); + if (!ff) + return; + + ff = procfile_readall(ff); + if (!ff) + return; + + fa->hash = simple_hash(fa->function); + + size_t lines = procfile_lines(ff), l; + for(l = 0; l < lines ;l++) { + char *fcnt = procfile_lineword(ff, l, 2); + uint32_t hash = simple_hash(fcnt); + if (fa->hash == hash && !strcmp(fcnt, fa->function)) { + char *type = procfile_lineword(ff, l, 2); + fa->type = type[0]; + if (fd > 0) { + char addr[128]; + snprintf(addr, 127, "0x%s", procfile_lineword(ff, l, 0)); + fa->addr = (unsigned long) strtoul(addr, NULL, 16); + uint32_t key = 0; + bpf_map_update_elem(fd, &key, &fa->addr, BPF_ANY); + } else + fa->addr = 1; + break; + } + } + + procfile_close(ff); +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Fill Algorithms + * + * Set one unique dimension for all vector position. + * + * @param algorithms the output vector + * @param length number of elements of algorithms vector + * @param algorithm algorithm used on charts. +*/ +void ebpf_fill_algorithms(int *algorithms, size_t length, int algorithm) +{ + size_t i; + for (i = 0; i < length; i++) { + algorithms[i] = algorithm; + } +} + +/** + * Fill Histogram dimension + * + * Fill the histogram dimension with the specified ranges + */ +char **ebpf_fill_histogram_dimension(size_t maximum) +{ + char *dimensions[] = { "us", "ms", "s"}; + int previous_dim = 0, current_dim = 0; + uint32_t previous_level = 1000, current_level = 1000; + uint32_t previous_divisor = 1, current_divisor = 1; + uint32_t current = 1, previous = 0; + uint32_t selector; + char **out = callocz(maximum, sizeof(char *)); + char range[128]; + size_t end = maximum - 1; + for (selector = 0; selector < end; selector++) { + snprintf(range, 127, "%u%s->%u%s", previous/previous_divisor, dimensions[previous_dim], + current/current_divisor, dimensions[current_dim]); + out[selector] = strdupz(range); + previous = current; + current <<= 1; + + if (previous_dim != 2 && previous > previous_level) { + previous_dim++; + + previous_divisor *= 1000; + previous_level *= 1000; + } + + if (current_dim != 2 && current > current_level) { + current_dim++; + + current_divisor *= 1000; + current_level *= 1000; + } + } + snprintf(range, 127, "%u%s->+Inf", previous/previous_divisor, dimensions[previous_dim]); + out[selector] = strdupz(range); + + return out; +} + +/** + * Histogram dimension cleanup + * + * Cleanup dimensions allocated with function ebpf_fill_histogram_dimension + * + * @param ptr + * @param length + */ +void ebpf_histogram_dimension_cleanup(char **ptr, size_t length) +{ + size_t i; + for (i = 0; i < length; i++) { + freez(ptr[i]); + } + freez(ptr); +} + +//---------------------------------------------------------------------------------------------------------------------- + +/** + * Open tracepoint path + * + * @param filename pointer to store the path + * @param length file length + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * @param flags flags used with syscall open + * + * @return it returns a positive value on success and a negative otherwise. + */ +static inline int ebpf_open_tracepoint_path(char *filename, size_t length, char *subsys, char *eventname, int flags) +{ + snprintfz(filename, length, "%s/events/%s/%s/enable", NETDATA_DEBUGFS, subsys, eventname); + return open(filename, flags | O_CLOEXEC, 0); +} + +/** + * Is tracepoint enabled + * + * Check whether the tracepoint is enabled. + * + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * + * @return it returns 1 when it is enabled, 0 when it is disabled and -1 on error. + */ +int ebpf_is_tracepoint_enabled(char *subsys, char *eventname) +{ + char text[FILENAME_MAX + 1]; + int fd = ebpf_open_tracepoint_path(text, FILENAME_MAX, subsys, eventname, O_RDONLY); + if (fd < 0) { + return -1; + } + + ssize_t length = read(fd, text, 1); + if (length != 1) { + close(fd); + return -1; + } + close(fd); + + return (text[0] == '1') ? CONFIG_BOOLEAN_YES : CONFIG_BOOLEAN_NO; +} + +/** + * Change Tracing values + * + * Change value for specific tracepoint enabling or disabling it according value given. + * + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * @param value a value to enable (1) or disable (0) a tracepoint. + * + * @return It returns 0 on success and -1 otherwise + */ +static int ebpf_change_tracing_values(char *subsys, char *eventname, char *value) +{ + if (strcmp("0", value) && strcmp("1", value)) { + netdata_log_error("Invalid value given to either enable or disable a tracepoint."); + return -1; + } + + char filename[1024]; + int fd = ebpf_open_tracepoint_path(filename, 1023, subsys, eventname, O_WRONLY); + if (fd < 0) { + return -1; + } + + ssize_t written = write(fd, value, strlen(value)); + if (written < 0) { + close(fd); + return -1; + } + + close(fd); + return 0; +} + +/** + * Enable tracing values + * + * Enable a tracepoint on a system + * + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * + * @return It returns 0 on success and -1 otherwise + */ +int ebpf_enable_tracing_values(char *subsys, char *eventname) +{ + return ebpf_change_tracing_values(subsys, eventname, "1"); +} + +/** + * Disable tracing values + * + * Disable tracing points enabled by collector + * + * @param subsys is the name of your subsystem. + * @param eventname is the name of the event to trace. + * + * @return It returns 0 on success and -1 otherwise + */ +int ebpf_disable_tracing_values(char *subsys, char *eventname) +{ + return ebpf_change_tracing_values(subsys, eventname, "0"); +} + +/** + * Select PC prefix + * + * Identify the prefix to run on PC architecture. + * + * @return It returns 32 or 64 according to host arch. + */ +static uint32_t ebpf_select_pc_prefix() +{ + long counter = 1; + uint32_t i; + for (i = 0; i < 128; i++) { + counter <<= 1; + if (counter < 0) + break; + } + + return counter; +} + +/** + * Select Host Prefix + * + * Select prefix to syscall when host is running a kernel newer than 4.17.0 + * + * @param output the vector to store data. + * @param length length of output vector. + * @param syscall the syscall that prefix will be attached; + * @param kver the current kernel version in format MAJOR*65536 + MINOR*256 + PATCH + */ +void ebpf_select_host_prefix(char *output, size_t length, char *syscall, int kver) +{ + if (kver < NETDATA_EBPF_KERNEL_4_17) + snprintfz(output, length, "sys_%s", syscall); + else { + uint32_t arch = ebpf_select_pc_prefix(); + // Prefix selected according https://www.kernel.org/doc/html/latest/process/adding-syscalls.html + char *prefix = (arch == 32) ? "__ia32" : "__x64"; + snprintfz(output, length, "%s_sys_%s", prefix, syscall); + } +} + diff --git a/src/libnetdata/ebpf/ebpf.h b/src/libnetdata/ebpf/ebpf.h new file mode 100644 index 000000000..50eb69630 --- /dev/null +++ b/src/libnetdata/ebpf/ebpf.h @@ -0,0 +1,491 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EBPF_H +#define NETDATA_EBPF_H 1 + +#define NETDATA_EBPF_PLUGIN_NAME "ebpf.plugin" + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#ifdef LIBBPF_DEPRECATED +#include <bpf/btf.h> +#include <linux/btf.h> +#endif +#include <stdlib.h> // Necessary for stdtoul +#include "libnetdata/aral/aral.h" + +#define NETDATA_DEBUGFS "/sys/kernel/debug/tracing/" +#define NETDATA_KALLSYMS "/proc/kallsyms" + +// Config files +#define EBPF_GLOBAL_SECTION "global" +#define EBPF_CFG_LOAD_MODE "ebpf load mode" +#define EBPF_CFG_LOAD_MODE_DEFAULT "entry" +#define EBPF_CFG_LOAD_MODE_RETURN "return" +#define EBPF_MAX_MODE_LENGTH 6 + +#define EBPF_CFG_TYPE_FORMAT "ebpf type format" +#define EBPF_CFG_DEFAULT_PROGRAM "auto" +#define EBPF_CFG_CORE_PROGRAM "CO-RE" +#define EBPF_CFG_LEGACY_PROGRAM "legacy" + +#define EBPF_CFG_COLLECT_PID "collect pid" +#define EBPF_CFG_PID_REAL_PARENT "real parent" +#define EBPF_CFG_PID_PARENT "parent" +#define EBPF_CFG_PID_ALL "all" +#define EBPF_CFG_PID_INTERNAL_USAGE "not used" + +#define EBPF_CFG_CORE_ATTACH "ebpf co-re tracing" +#define EBPF_CFG_ATTACH_TRAMPOLINE "trampoline" +#define EBPF_CFG_ATTACH_TRACEPOINT "tracepoint" +#define EBPF_CFG_ATTACH_PROBE "probe" + +#define EBPF_CFG_PROGRAM_PATH "btf path" + +#define EBPF_CFG_MAPS_PER_CORE "maps per core" + +#define EBPF_CFG_UPDATE_EVERY "update every" +#define EBPF_CFG_LIFETIME "lifetime" +#define EBPF_CFG_UPDATE_APPS_EVERY_DEFAULT 10 +#define EBPF_CFG_PID_SIZE "pid table size" +#define EBPF_CFG_APPLICATION "apps" +#define EBPF_CFG_CGROUP "cgroups" + +#define EBPF_COMMON_FNCT_CLEAN_UP "release_task" + +/** + * The RedHat magic number was got doing: + * + * 1797 = 7*256 + 5 + * + * For more details, please, read /usr/include/linux/version.h + * in any Red Hat installation. + */ +#define NETDATA_MINIMUM_RH_VERSION 1797 + +/** + * 2048 = 8*256 + 0 + */ +#define NETDATA_RH_8 2048 + +/** + * Kernel Version + * + * Kernel versions are calculated using the following formula: + * + * VERSION = LINUX_VERSION_MAJOR*65536 + LINUX_VERSION_PATCHLEVEL*256 + LINUX_VERSION_SUBLEVEL + * + * Where LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL, and LINUX_VERSION_SUBLEVEL are extracted + * from /usr/include/linux/version.h. + * + * LINUX_VERSION_SUBLEVEL has the maximum value 255, but linux can have more SUBLEVELS. + * + */ +enum netdata_ebpf_kernel_versions { + NETDATA_EBPF_KERNEL_4_06 = 263680, // 264960 = 4 * 65536 + 6 * 256 + NETDATA_EBPF_KERNEL_4_11 = 264960, // 264960 = 4 * 65536 + 15 * 256 + NETDATA_EBPF_KERNEL_4_14 = 265728, // 264960 = 4 * 65536 + 14 * 256 + NETDATA_EBPF_KERNEL_4_15 = 265984, // 265984 = 4 * 65536 + 15 * 256 + NETDATA_EBPF_KERNEL_4_17 = 266496, // 266496 = 4 * 65536 + 17 * 256 + NETDATA_EBPF_KERNEL_5_0 = 327680, // 327680 = 5 * 65536 + 0 * 256 + NETDATA_EBPF_KERNEL_5_4 = 328704, // 327680 = 5 * 65536 + 4 * 256 + NETDATA_EBPF_KERNEL_5_5 = 328960, // 327680 = 5 * 65536 + 5 * 256 + NETDATA_EBPF_KERNEL_5_10 = 330240, // 330240 = 5 * 65536 + 10 * 256 + NETDATA_EBPF_KERNEL_5_11 = 330496, // 330240 = 5 * 65536 + 11 * 256 + NETDATA_EBPF_KERNEL_5_14 = 331264, // 331264 = 5 * 65536 + 14 * 256 + NETDATA_EBPF_KERNEL_5_15 = 331520, // 331520 = 5 * 65536 + 15 * 256 + NETDATA_EBPF_KERNEL_5_16 = 331776 // 331776 = 5 * 65536 + 16 * 256 +}; + +enum netdata_kernel_flag { + NETDATA_V3_10 = 1 << 0, + NETDATA_V4_14 = 1 << 1, + NETDATA_V4_16 = 1 << 2, + NETDATA_V4_18 = 1 << 3, + NETDATA_V5_4 = 1 << 4, + NETDATA_V5_10 = 1 << 5, + NETDATA_V5_11 = 1 << 6, + NETDATA_V5_14 = 1 << 7, + NETDATA_V5_15 = 1 << 8, + NETDATA_V5_16 = 1 << 9 +}; + +enum netdata_kernel_idx { + NETDATA_IDX_V3_10, + NETDATA_IDX_V4_14, + NETDATA_IDX_V4_16, + NETDATA_IDX_V4_18, + NETDATA_IDX_V5_4 , + NETDATA_IDX_V5_10, + NETDATA_IDX_V5_11, + NETDATA_IDX_V5_14, + NETDATA_IDX_V5_15, + NETDATA_IDX_V5_16 +}; + +#define NETDATA_IDX_STR_V3_10 "3.10" +#define NETDATA_IDX_STR_V4_14 "4.14" +#define NETDATA_IDX_STR_V4_16 "4.16" +#define NETDATA_IDX_STR_V4_18 "4.18" +#define NETDATA_IDX_STR_V5_4 "5.4" +#define NETDATA_IDX_STR_V5_10 "5.10" +#define NETDATA_IDX_STR_V5_11 "5.11" +#define NETDATA_IDX_STR_V5_14 "5.14" +#define NETDATA_IDX_STR_V5_15 "5.15" +#define NETDATA_IDX_STR_V5_16 "5.16" + +/** + * Minimum value has relationship with libbpf support. + */ +#define NETDATA_MINIMUM_EBPF_KERNEL NETDATA_EBPF_KERNEL_4_11 + +#define VERSION_STRING_LEN 256 +#define EBPF_KERNEL_REJECT_LIST_FILE "ebpf_kernel_reject_list.txt" + +#define ND_EBPF_DEFAULT_MIN_PID 1U +#define ND_EBPF_MAP_FD_NOT_INITIALIZED ((int)-1) + +typedef struct ebpf_addresses { + char *function; + uint32_t hash; + // We use long as address, because it matches system length + unsigned long addr; + uint32_t type; +} ebpf_addresses_t; + +extern char *ebpf_user_config_dir; +extern char *ebpf_stock_config_dir; + +typedef struct ebpf_data { + int *map_fd; + + char *kernel_string; + uint32_t running_on_kernel; + int isrh; +} ebpf_data_t; + +typedef enum { + MODE_RETURN = 0, // This attaches kprobe when the function returns + MODE_DEVMODE, // This stores log given description about the errors raised + MODE_ENTRY // This attaches kprobe when the function is called +} netdata_run_mode_t; + +#define ND_EBPF_DEFAULT_PID_SIZE 32768U + +enum netdata_ebpf_map_type { + NETDATA_EBPF_MAP_STATIC = 0, + NETDATA_EBPF_MAP_RESIZABLE = 1, + NETDATA_EBPF_MAP_CONTROLLER = 2, + NETDATA_EBPF_MAP_CONTROLLER_UPDATED = 4, + NETDATA_EBPF_MAP_PID = 8 +}; + +enum netdata_controller { + NETDATA_CONTROLLER_APPS_ENABLED, + NETDATA_CONTROLLER_APPS_LEVEL, + + // These index show the number of elements + // stored inside hash tables. + // + // We have indexes to count increase and + // decrease events, because __sync_fetch_and_sub + // generates compilation errors. + NETDATA_CONTROLLER_PID_TABLE_ADD, + NETDATA_CONTROLLER_PID_TABLE_DEL, + NETDATA_CONTROLLER_TEMP_TABLE_ADD, + NETDATA_CONTROLLER_TEMP_TABLE_DEL, + + NETDATA_CONTROLLER_END +}; + +// Control how Netdata will monitor PIDs (apps and cgroups) +typedef enum netdata_apps_level { + NETDATA_APPS_LEVEL_REAL_PARENT, + NETDATA_APPS_LEVEL_PARENT, + NETDATA_APPS_LEVEL_ALL, + + // Present only in user ring + NETDATA_APPS_NOT_SET +} netdata_apps_level_t; + +typedef struct ebpf_local_maps { + char *name; + uint32_t internal_input; + uint32_t user_input; + uint32_t type; + int map_fd; +#ifdef LIBBPF_MAJOR_VERSION + enum bpf_map_type map_type; +#endif +} ebpf_local_maps_t; + +typedef struct ebpf_specify_name { + char *program_name; + char *function_to_attach; + char *optional; + bool retprobe; +} ebpf_specify_name_t; + +typedef enum netdata_ebpf_load_mode { + EBPF_LOAD_LEGACY = 1<<0, // Select legacy mode, this means we will load binaries + EBPF_LOAD_CORE = 1<<1, // When CO-RE is used, it is necessary to use the source code + EBPF_LOAD_PLAY_DICE = 1<<2, // Take a look on environment and choose the best option + EBPF_LOADED_FROM_STOCK = 1<<3, // Configuration loaded from Stock file + EBPF_LOADED_FROM_USER = 1<<4 // Configuration loaded from user +} netdata_ebpf_load_mode_t; +#define NETDATA_EBPF_LOAD_METHODS (EBPF_LOAD_LEGACY|EBPF_LOAD_CORE|EBPF_LOAD_PLAY_DICE) +#define NETDATA_EBPF_LOAD_SOURCE (EBPF_LOADED_FROM_STOCK|EBPF_LOADED_FROM_USER) + +typedef enum netdata_ebpf_program_loaded { + EBPF_LOAD_PROBE, // Attach probes on targets + EBPF_LOAD_RETPROBE, // Attach retprobes on targets + EBPF_LOAD_TRACEPOINT, // This stores log given description about the errors raised + EBPF_LOAD_TRAMPOLINE, // This attaches kprobe when the function is called +} netdata_ebpf_program_loaded_t; + +typedef struct netdata_ebpf_targets { + char *name; + netdata_ebpf_program_loaded_t mode; +} netdata_ebpf_targets_t; + +typedef struct ebpf_plugin_stats { + // Load options + uint32_t legacy; // Legacy codes + uint32_t core; // CO-RE codes, this means we are using source code compiled. + + uint32_t threads; // Total number of threads + uint32_t running; // total number of threads running + + uint32_t probes; // Number of kprobes loaded + uint32_t retprobes; // Number of kretprobes loaded + uint32_t tracepoints; // Number of tracepoints used + uint32_t trampolines; // Number of trampolines used + + uint64_t memlock_kern; // The same information reported by bpftool, but it is not accurated + // https://lore.kernel.org/linux-mm/20230112155326.26902-5-laoar.shao@gmail.com/T/ + uint32_t hash_tables; // Number of hash tables used on the system. + + uint32_t hash_percpu; // Number of threads running per cpu maps + uint32_t hash_unique; // Number of threads running an unique map for all cores. +} ebpf_plugin_stats_t; + +typedef enum ebpf_stats_action { + EBPF_ACTION_STAT_ADD, + EBPF_ACTION_STAT_REMOVE, +} ebpf_stats_action_t; + +typedef enum netdata_apps_integration_flags { + NETDATA_EBPF_APPS_FLAG_NO, + NETDATA_EBPF_APPS_FLAG_YES, + NETDATA_EBPF_APPS_FLAG_CHART_CREATED +} netdata_apps_integration_flags_t; + +#define NETDATA_EBPF_CHART_MEM_LENGTH 48 +#define NETDATA_EBPF_STAT_DIMENSION_MEMORY "memory" +#define NETDATA_EBPF_STAT_DIMENSION_ARAL "aral" + +enum ebpf_threads_status { + NETDATA_THREAD_EBPF_RUNNING, // started by plugin + NETDATA_THREAD_EBPF_FUNCTION_RUNNING, // started by function + NETDATA_THREAD_EBPF_STOPPING, // stopping thread + NETDATA_THREAD_EBPF_STOPPED, // thread stopped + NETDATA_THREAD_EBPF_NOT_RUNNING // thread was never started +}; + +enum ebpf_global_table_values { + NETDATA_EBPF_GLOBAL_TABLE_PID_TABLE_ADD, // Count elements added inside PID table + NETDATA_EBPF_GLOBAL_TABLE_PID_TABLE_DEL, // Count elements removed from PID table + NETDATA_EBPF_GLOBAL_TABLE_TEMP_TABLE_ADD, // Count elements added inside TEMP table + NETDATA_EBPF_GLOBAL_TABLE_TEMP_TABLE_DEL, // Count elements removed from TEMP table + + NETDATA_EBPF_GLOBAL_TABLE_STATUS_END +}; + +typedef uint64_t netdata_idx_t; + +typedef struct ebpf_module { + // Constants used with module + struct { + const char *thread_name; + const char *config_name; + const char *thread_description; + } info; + + // Helpers used with plugin + struct { + void *(*start_routine)(void *); // the thread function + void (*apps_routine)(struct ebpf_module *em, void *ptr); // the apps charts + void (*fnct_routine)(BUFFER *bf, struct ebpf_module *em); // the function used for exteernal requests + const char *fcnt_name; // name given to cloud + const char *fcnt_desc; // description given about function + const char *fcnt_thread_chart_name; + int order_thread_chart; + const char *fcnt_thread_lifetime_name; + int order_thread_lifetime; + } functions; + + enum ebpf_threads_status enabled; + int update_every; + int global_charts; + netdata_apps_integration_flags_t apps_charts; + netdata_apps_level_t apps_level; + int cgroup_charts; + netdata_run_mode_t mode; + uint32_t thread_id; + int optional; + ebpf_local_maps_t *maps; + ebpf_specify_name_t *names; + uint32_t pid_map_size; + struct config *cfg; + const char *config_file; + uint64_t kernels; + netdata_ebpf_load_mode_t load; + netdata_ebpf_targets_t *targets; + struct bpf_link **probe_links; + struct bpf_object *objects; + struct netdata_static_thread *thread; + + // charts + char memory_usage[NETDATA_EBPF_CHART_MEM_LENGTH]; + char memory_allocations[NETDATA_EBPF_CHART_MEM_LENGTH]; + int maps_per_core; + + // period to run + uint32_t running_time; // internal usage, this is used to reset a value when a new request happens. + uint32_t lifetime; + + netdata_idx_t hash_table_stats[NETDATA_EBPF_GLOBAL_TABLE_STATUS_END]; +} ebpf_module_t; + +#define EBPF_DEFAULT_LIFETIME 300 +// This will be present until all functions are merged. The deadline is planned for 68 years since plugin start +#define EBPF_NON_FUNCTION_LIFE_TIME UINT_MAX + +int ebpf_get_kernel_version(); +int get_redhat_release(); +char *ebpf_kernel_suffix(int version, int isrh); +struct bpf_link **ebpf_load_program(char *plugins_dir, ebpf_module_t *em, int kver, int is_rhf, + struct bpf_object **obj); + +void ebpf_mount_config_name(char *filename, size_t length, char *path, const char *config); +int ebpf_load_config(struct config *config, char *filename); +void ebpf_update_module(ebpf_module_t *em, struct btf *btf_file, int kver, int is_rh); +void ebpf_update_names(ebpf_specify_name_t *opt, ebpf_module_t *em); +void ebpf_adjust_apps_cgroup(ebpf_module_t *em, netdata_ebpf_program_loaded_t mode); +char *ebpf_find_symbol(char *search); +void ebpf_load_addresses(ebpf_addresses_t *fa, int fd); +void ebpf_fill_algorithms(int *algorithms, size_t length, int algorithm); +char **ebpf_fill_histogram_dimension(size_t maximum); +void ebpf_update_stats(ebpf_plugin_stats_t *report, ebpf_module_t *em); +void ebpf_update_controller(int fd, ebpf_module_t *em); +void ebpf_update_map_size(struct bpf_map *map, ebpf_local_maps_t *lmap, ebpf_module_t *em, const char *map_name); + +// Histogram +#define NETDATA_EBPF_HIST_MAX_BINS 24UL +#define NETDATA_DISK_MAX 256U +#define NETDATA_DISK_HISTOGRAM_LENGTH (NETDATA_DISK_MAX * NETDATA_EBPF_HIST_MAX_BINS) + +typedef struct netdata_ebpf_histogram { + char *name; + char *title; + char *ctx; + int order; + uint64_t histogram[NETDATA_EBPF_HIST_MAX_BINS]; +} netdata_ebpf_histogram_t; + +enum fs_btf_counters { + NETDATA_KEY_BTF_READ, + NETDATA_KEY_BTF_WRITE, + NETDATA_KEY_BTF_OPEN, + NETDATA_KEY_BTF_SYNC_ATTR, + NETDATA_KEY_BTF_OPEN2, + + NETDATA_FS_BTF_END +}; + +typedef struct ebpf_filesystem_partitions { + char *filesystem; + char *optional_filesystem; + char *family; + char *family_name; + struct bpf_object *objects; + struct bpf_link **probe_links; + + netdata_ebpf_histogram_t hread; + netdata_ebpf_histogram_t hwrite; + netdata_ebpf_histogram_t hopen; + netdata_ebpf_histogram_t hadditional; + + uint32_t flags; + uint32_t enabled; + + ebpf_addresses_t addresses; + uint64_t kernels; + ebpf_local_maps_t *fs_maps; + + // BPF structure +#ifdef LIBBPF_MAJOR_VERSION + struct filesystem_bpf *fs_obj; +#else + void *fs_obj; +#endif + const char *functions[NETDATA_FS_BTF_END]; +} ebpf_filesystem_partitions_t; + +typedef struct ebpf_sync_syscalls { + char *syscall; + int enabled; + uint32_t flags; + + // BTF structure + struct bpf_object *objects; + struct bpf_link **probe_links; + + // BPF structure +#ifdef LIBBPF_MAJOR_VERSION + struct sync_bpf *sync_obj; +#else + void *sync_obj; +#endif + ebpf_local_maps_t *sync_maps; +} ebpf_sync_syscalls_t; + +void ebpf_histogram_dimension_cleanup(char **ptr, size_t length); + +// Tracepoint helpers +// For more information related to tracepoints read https://www.kernel.org/doc/html/latest/trace/tracepoints.html +int ebpf_is_tracepoint_enabled(char *subsys, char *eventname); +int ebpf_enable_tracing_values(char *subsys, char *eventname); +int ebpf_disable_tracing_values(char *subsys, char *eventname); + +// BTF Section +#define EBPF_DEFAULT_BTF_FILE "vmlinux" +#define EBPF_DEFAULT_BTF_PATH "/sys/kernel/btf" +#define EBPF_DEFAULT_ERROR_MSG "Cannot open or load BPF file for thread" + +// BTF helpers +#define NETDATA_EBPF_MAX_SYSCALL_LENGTH 255 + +netdata_ebpf_load_mode_t epbf_convert_string_to_load_mode(char *str); +netdata_ebpf_program_loaded_t ebpf_convert_core_type(char *str, netdata_run_mode_t lmode); +void ebpf_select_host_prefix(char *output, size_t length, char *syscall, int kver); +#ifdef LIBBPF_MAJOR_VERSION +void ebpf_adjust_thread_load(ebpf_module_t *mod, struct btf *file); +struct btf *ebpf_parse_btf_file(const char *filename); +struct btf *ebpf_load_btf_file(char *path, char *filename); +int ebpf_is_function_inside_btf(struct btf *file, char *function); +void ebpf_update_map_type(struct bpf_map *map, ebpf_local_maps_t *w); +void ebpf_define_map_type(ebpf_local_maps_t *maps, int maps_per_core, int kver); +#endif + +void ebpf_update_kernel_memory_with_vector(ebpf_plugin_stats_t *report, ebpf_local_maps_t *maps, + ebpf_stats_action_t action); +void ebpf_update_kernel_memory(ebpf_plugin_stats_t *report, ebpf_local_maps_t *map, ebpf_stats_action_t action); +int ebpf_statistic_create_aral_chart(char *name, ebpf_module_t *em); +void ebpf_statistic_obsolete_aral_chart(ebpf_module_t *em, int prio); +void ebpf_send_data_aral_chart(ARAL *memory, ebpf_module_t *em); + +int ebpf_can_plugin_load_code(int kver, char *plugin_name); +int ebpf_adjust_memory_limit(); + +#endif /* NETDATA_EBPF_H */ diff --git a/libnetdata/eval/README.md b/src/libnetdata/eval/README.md index 8b1378917..8b1378917 100644 --- a/libnetdata/eval/README.md +++ b/src/libnetdata/eval/README.md diff --git a/libnetdata/eval/eval.c b/src/libnetdata/eval/eval.c index a1ac4483c..7e968632a 100644 --- a/libnetdata/eval/eval.c +++ b/src/libnetdata/eval/eval.c @@ -2,11 +2,23 @@ #include "../libnetdata.h" +typedef enum __attribute__((packed)) { + EVAL_VALUE_INVALID = 0, + EVAL_VALUE_NUMBER, + EVAL_VALUE_VARIABLE, + EVAL_VALUE_EXPRESSION +} EVAL_VALUE_TYPE; + // ---------------------------------------------------------------------------- // data structures for storing the parsed expression in memory +typedef struct eval_variable { + STRING *name; + struct eval_variable *next; +} EVAL_VARIABLE; + typedef struct eval_value { - int type; + EVAL_VALUE_TYPE type; union { NETDATA_DOUBLE number; @@ -24,6 +36,21 @@ typedef struct eval_node { EVAL_VALUE ops[]; } EVAL_NODE; +struct eval_expression { + STRING *source; + STRING *parsed_as; + + NETDATA_DOUBLE result; + + int error; + BUFFER *error_msg; + + EVAL_NODE *nodes; + + void *variable_lookup_cb_data; + eval_expression_variable_lookup_t variable_lookup_cb; +}; + // these are used for EVAL_NODE.operator // they are used as internal IDs to identify an operator // THEY ARE NOT USED FOR PARSING OPERATORS LIKE THAT @@ -62,124 +89,9 @@ static inline void print_parsed_as_constant(BUFFER *out, NETDATA_DOUBLE n); // evaluation of expressions static inline NETDATA_DOUBLE eval_variable(EVAL_EXPRESSION *exp, EVAL_VARIABLE *v, int *error) { - static STRING - *this_string = NULL, - *now_string = NULL, - *after_string = NULL, - *before_string = NULL, - *status_string = NULL, - *removed_string = NULL, - *uninitialized_string = NULL, - *undefined_string = NULL, - *clear_string = NULL, - *warning_string = NULL, - *critical_string = NULL; - NETDATA_DOUBLE n; - if(unlikely(this_string == NULL)) { - this_string = string_strdupz("this"); - now_string = string_strdupz("now"); - after_string = string_strdupz("after"); - before_string = string_strdupz("before"); - status_string = string_strdupz("status"); - removed_string = string_strdupz("REMOVED"); - uninitialized_string = string_strdupz("UNINITIALIZED"); - undefined_string = string_strdupz("UNDEFINED"); - clear_string = string_strdupz("CLEAR"); - warning_string = string_strdupz("WARNING"); - critical_string = string_strdupz("CRITICAL"); - } - - if(unlikely(v->name == this_string)) { - n = (exp->myself)?*exp->myself:NAN; - buffer_strcat(exp->error_msg, "[ $this = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == after_string)) { - n = (exp->after && *exp->after)?*exp->after:NAN; - buffer_strcat(exp->error_msg, "[ $after = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == before_string)) { - n = (exp->before && *exp->before)?*exp->before:NAN; - buffer_strcat(exp->error_msg, "[ $before = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == now_string)) { - n = (NETDATA_DOUBLE)now_realtime_sec(); - buffer_strcat(exp->error_msg, "[ $now = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == status_string)) { - n = (exp->status)?*exp->status:RRDCALC_STATUS_UNINITIALIZED; - buffer_strcat(exp->error_msg, "[ $status = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == removed_string)) { - n = RRDCALC_STATUS_REMOVED; - buffer_strcat(exp->error_msg, "[ $REMOVED = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == uninitialized_string)) { - n = RRDCALC_STATUS_UNINITIALIZED; - buffer_strcat(exp->error_msg, "[ $UNINITIALIZED = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == undefined_string)) { - n = RRDCALC_STATUS_UNDEFINED; - buffer_strcat(exp->error_msg, "[ $UNDEFINED = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == clear_string)) { - n = RRDCALC_STATUS_CLEAR; - buffer_strcat(exp->error_msg, "[ $CLEAR = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == warning_string)) { - n = RRDCALC_STATUS_WARNING; - buffer_strcat(exp->error_msg, "[ $WARNING = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(unlikely(v->name == critical_string)) { - n = RRDCALC_STATUS_CRITICAL; - buffer_strcat(exp->error_msg, "[ $CRITICAL = "); - print_parsed_as_constant(exp->error_msg, n); - buffer_strcat(exp->error_msg, " ] "); - return n; - } - - if(exp->rrdcalc && health_variable_lookup(v->name, exp->rrdcalc, &n)) { + if(exp->variable_lookup_cb && exp->variable_lookup_cb(v->name, exp->variable_lookup_cb_data, &n)) { buffer_sprintf(exp->error_msg, "[ ${%s} = ", string2str(v->name)); print_parsed_as_constant(exp->error_msg, n); buffer_strcat(exp->error_msg, " ] "); @@ -1074,7 +986,7 @@ int expression_evaluate(EVAL_EXPRESSION *expression) { expression->error = EVAL_ERROR_OK; buffer_reset(expression->error_msg); - expression->result = eval_node(expression, (EVAL_NODE *)expression->nodes, &expression->error); + expression->result = eval_node(expression, expression->nodes, &expression->error); if(unlikely(isnan(expression->result))) { if(expression->error == EVAL_ERROR_OK) @@ -1104,6 +1016,9 @@ int expression_evaluate(EVAL_EXPRESSION *expression) { } EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, int *error) { + if(!string || !*string) + return NULL; + const char *s = string; int err = EVAL_ERROR_OK; @@ -1137,12 +1052,12 @@ EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, in EVAL_EXPRESSION *exp = callocz(1, sizeof(EVAL_EXPRESSION)); - exp->source = strdupz(string); - exp->parsed_as = strdupz(buffer_tostring(out)); + exp->source = string_strdupz(string); + exp->parsed_as = string_strdupz(buffer_tostring(out)); buffer_free(out); exp->error_msg = buffer_create(100, NULL); - exp->nodes = (void *)op; + exp->nodes = op; return exp; } @@ -1150,9 +1065,9 @@ EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, in void expression_free(EVAL_EXPRESSION *expression) { if(!expression) return; - if(expression->nodes) eval_node_free((EVAL_NODE *)expression->nodes); - freez((void *)expression->source); - freez((void *)expression->parsed_as); + if(expression->nodes) eval_node_free(expression->nodes); + string_freez((void *)expression->source); + string_freez((void *)expression->parsed_as); buffer_free(expression->error_msg); freez(expression); } @@ -1199,3 +1114,138 @@ const char *expression_strerror(int error) { return "unknown error"; } } + +const char *expression_source(EVAL_EXPRESSION *expression) { + if(!expression) + return string2str(NULL); + + return string2str(expression->source); +} + +const char *expression_parsed_as(EVAL_EXPRESSION *expression) { + if(!expression) + return string2str(NULL); + + return string2str(expression->parsed_as); +} + +const char *expression_error_msg(EVAL_EXPRESSION *expression) { + if(!expression || !expression->error_msg) + return ""; + + return buffer_tostring(expression->error_msg); +} + +NETDATA_DOUBLE expression_result(EVAL_EXPRESSION *expression) { + if(!expression) + return NAN; + + return expression->result; +} + +void expression_set_variable_lookup_callback(EVAL_EXPRESSION *expression, eval_expression_variable_lookup_t cb, void *data) { + if(!expression) + return; + + expression->variable_lookup_cb = cb; + expression->variable_lookup_cb_data = data; +} + +static size_t expression_hardcode_node_variable(EVAL_NODE *node, STRING *variable, NETDATA_DOUBLE value) { + size_t matches = 0; + + for(int i = 0; i < node->count; i++) { + switch(node->ops[i].type) { + case EVAL_VALUE_NUMBER: + case EVAL_VALUE_INVALID: + break; + + case EVAL_VALUE_VARIABLE: + if(node->ops[i].variable->name == variable) { + string_freez(node->ops[i].variable->name); + freez(node->ops[i].variable); + node->ops[i].type = EVAL_VALUE_NUMBER; + node->ops[i].number = value; + matches++; + } + break; + + case EVAL_VALUE_EXPRESSION: + matches += expression_hardcode_node_variable(node->ops[i].expression, variable, value); + break; + } + } + + return matches; +} + +void expression_hardcode_variable(EVAL_EXPRESSION *expression, STRING *variable, NETDATA_DOUBLE value) { + if (!expression || !variable || isnan(value)) + return; + + size_t matches = expression_hardcode_node_variable(expression->nodes, variable, value); + if (matches) { + char replace[1024]; + snprintfz(replace, sizeof(replace), NETDATA_DOUBLE_FORMAT_AUTO, value); + size_t replace_len = strlen(replace); + + size_t source_len = string_strlen(expression->source); + const char *source_str = string2str(expression->source); + + // Allocate enough space to accommodate all replacements. + char buf[source_len + 1 + matches * (replace_len + 1)]; + + char find1[string_strlen(variable) + 1 + 1]; + snprintfz(find1, sizeof(find1), "$%s", string2str(variable)); + size_t find1_len = strlen(find1); + + char find2[string_strlen(variable) + 1 + 3]; + snprintfz(find2, sizeof(find2), "${%s}", string2str(variable)); + size_t find2_len = strlen(find2); + + size_t found = 0; + char *buf_ptr = buf; + const char *source_ptr = source_str; + + while (*source_ptr) { + char *s1 = strstr(source_ptr, find1); + char *s2 = strstr(source_ptr, find2); + + char *s = s1; + size_t len = find1_len; + if (s2 && (!s1 || s2 < s1)) { + s = s2; + len = find2_len; + } + + if (s) { + if (s == s1 && (isalnum(s[len]) || s[len] == '_')) { + // Move past the variable if it's part of a larger word. + source_ptr = s + len; + continue; + } + + // Copy the part before the variable. + memcpy(buf_ptr, source_ptr, s - source_ptr); + buf_ptr += (s - source_ptr); + + // Copy the replacement. + memcpy(buf_ptr, replace, replace_len); + buf_ptr += replace_len; + *buf_ptr = '\0'; + + // Move the source pointer past the replaced variable. + source_ptr = s + len; + found++; + } else { + // Copy the rest of the string if no more variables are found. + strcpy(buf_ptr, source_ptr); + break; + } + } + + // Update the expression source with the new string. + string_freez(expression->source); + expression->source = string_strdupz(buf); + } +} diff --git a/src/libnetdata/eval/eval.h b/src/libnetdata/eval/eval.h new file mode 100644 index 000000000..48a3b073f --- /dev/null +++ b/src/libnetdata/eval/eval.h @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_EVAL_H +#define NETDATA_EVAL_H 1 + +#include "../libnetdata.h" + +#define EVAL_MAX_VARIABLE_NAME_LENGTH 300 + +struct eval_expression; +typedef struct eval_expression EVAL_EXPRESSION; +typedef bool (*eval_expression_variable_lookup_t)(STRING *variable, void *data, NETDATA_DOUBLE *result); + +// parsing and evaluation +#define EVAL_ERROR_OK 0 + +// parsing errors +#define EVAL_ERROR_MISSING_CLOSE_SUBEXPRESSION 1 +#define EVAL_ERROR_UNKNOWN_OPERAND 2 +#define EVAL_ERROR_MISSING_OPERAND 3 +#define EVAL_ERROR_MISSING_OPERATOR 4 +#define EVAL_ERROR_REMAINING_GARBAGE 5 +#define EVAL_ERROR_IF_THEN_ELSE_MISSING_ELSE 6 + +// evaluation errors +#define EVAL_ERROR_INVALID_VALUE 101 +#define EVAL_ERROR_INVALID_NUMBER_OF_OPERANDS 102 +#define EVAL_ERROR_VALUE_IS_NAN 103 +#define EVAL_ERROR_VALUE_IS_INFINITE 104 +#define EVAL_ERROR_UNKNOWN_VARIABLE 105 + +// parse the given string as an expression and return: +// a pointer to an expression if it parsed OK +// NULL in which case the pointer to error has the error code +EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, int *error); + +// free all resources allocated for an expression +void expression_free(EVAL_EXPRESSION *expression); + +// convert an error code to a message +const char *expression_strerror(int error); + +// evaluate an expression and return +// 1 = OK, the result is in: expression->result +// 2 = FAILED, the error message is in: buffer_tostring(expression->error_msg) +int expression_evaluate(EVAL_EXPRESSION *expression); + +const char *expression_source(EVAL_EXPRESSION *expression); +const char *expression_parsed_as(EVAL_EXPRESSION *expression); +const char *expression_error_msg(EVAL_EXPRESSION *expression); +NETDATA_DOUBLE expression_result(EVAL_EXPRESSION *expression); +void expression_set_variable_lookup_callback(EVAL_EXPRESSION *expression, eval_expression_variable_lookup_t cb, void *data); + +void expression_hardcode_variable(EVAL_EXPRESSION *expression, STRING *variable, NETDATA_DOUBLE value); + +#endif //NETDATA_EVAL_H diff --git a/spawn/README.md b/src/libnetdata/facets/README.md index e69de29bb..e69de29bb 100644 --- a/spawn/README.md +++ b/src/libnetdata/facets/README.md diff --git a/libnetdata/facets/facets.c b/src/libnetdata/facets/facets.c index 4a5f5442b..a5379e68b 100644 --- a/libnetdata/facets/facets.c +++ b/src/libnetdata/facets/facets.c @@ -102,10 +102,7 @@ static inline bool is_valid_string_hash(const char *s) { // hashtable for FACET_VALUE // cleanup hashtable defines -#undef SIMPLE_HASHTABLE_SORT_FUNCTION -#undef SIMPLE_HASHTABLE_VALUE_TYPE -#undef SIMPLE_HASHTABLE_NAME -#undef NETDATA_SIMPLE_HASHTABLE_H +#include "../../libnetdata/simple_hashtable_undef.h" struct facet_value; // #define SIMPLE_HASHTABLE_SORT_FUNCTION compare_facet_value @@ -117,10 +114,7 @@ struct facet_value; // hashtable for FACET_KEY // cleanup hashtable defines -#undef SIMPLE_HASHTABLE_SORT_FUNCTION -#undef SIMPLE_HASHTABLE_VALUE_TYPE -#undef SIMPLE_HASHTABLE_NAME -#undef NETDATA_SIMPLE_HASHTABLE_H +#include "../../libnetdata/simple_hashtable_undef.h" struct facet_key; // #define SIMPLE_HASHTABLE_SORT_FUNCTION compare_facet_key @@ -439,12 +433,12 @@ static inline void FACET_VALUE_ADD_CONFLICT(FACET_KEY *k, FACET_VALUE *v, const } static inline FACET_VALUE *FACET_VALUE_GET_FROM_INDEX(FACET_KEY *k, FACETS_HASH hash) { - SIMPLE_HASHTABLE_SLOT_VALUE *slot = simple_hashtable_get_slot_VALUE(&k->values.ht, hash, true); + SIMPLE_HASHTABLE_SLOT_VALUE *slot = simple_hashtable_get_slot_VALUE(&k->values.ht, hash, NULL, true); return SIMPLE_HASHTABLE_SLOT_DATA(slot); } static inline FACET_VALUE *FACET_VALUE_ADD_TO_INDEX(FACET_KEY *k, const FACET_VALUE * const tv) { - SIMPLE_HASHTABLE_SLOT_VALUE *slot = simple_hashtable_get_slot_VALUE(&k->values.ht, tv->hash, true); + SIMPLE_HASHTABLE_SLOT_VALUE *slot = simple_hashtable_get_slot_VALUE(&k->values.ht, tv->hash, NULL, true); if(SIMPLE_HASHTABLE_SLOT_DATA(slot)) { // already exists @@ -634,7 +628,7 @@ static inline void FACETS_KEYS_INDEX_DESTROY(FACETS *facets) { } static inline FACET_KEY *FACETS_KEY_GET_FROM_INDEX(FACETS *facets, FACETS_HASH hash) { - SIMPLE_HASHTABLE_SLOT_KEY *slot = simple_hashtable_get_slot_KEY(&facets->keys.ht, hash, true); + SIMPLE_HASHTABLE_SLOT_KEY *slot = simple_hashtable_get_slot_KEY(&facets->keys.ht, hash, NULL, true); return SIMPLE_HASHTABLE_SLOT_DATA(slot); } @@ -714,7 +708,7 @@ static inline FACET_KEY *FACETS_KEY_CREATE(FACETS *facets, FACETS_HASH hash, con static inline FACET_KEY *FACETS_KEY_ADD_TO_INDEX(FACETS *facets, FACETS_HASH hash, const char *name, size_t name_length, FACET_KEY_OPTIONS options) { facets->operations.keys.registered++; - SIMPLE_HASHTABLE_SLOT_KEY *slot = simple_hashtable_get_slot_KEY(&facets->keys.ht, hash, true); + SIMPLE_HASHTABLE_SLOT_KEY *slot = simple_hashtable_get_slot_KEY(&facets->keys.ht, hash, NULL, true); if(unlikely(!SIMPLE_HASHTABLE_SLOT_DATA(slot))) { // we have to add it diff --git a/libnetdata/facets/facets.h b/src/libnetdata/facets/facets.h index 8364d8612..8364d8612 100644 --- a/libnetdata/facets/facets.h +++ b/src/libnetdata/facets/facets.h diff --git a/web/api/ilove/README.md b/src/libnetdata/functions_evloop/README.md index e69de29bb..e69de29bb 100644 --- a/web/api/ilove/README.md +++ b/src/libnetdata/functions_evloop/README.md diff --git a/src/libnetdata/functions_evloop/functions_evloop.c b/src/libnetdata/functions_evloop/functions_evloop.c new file mode 100644 index 000000000..b21abe629 --- /dev/null +++ b/src/libnetdata/functions_evloop/functions_evloop.c @@ -0,0 +1,440 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "functions_evloop.h" + +static void functions_evloop_config_cb(const char *transaction, char *function, usec_t *stop_monotonic_ut, + bool *cancelled, BUFFER *payload, HTTP_ACCESS access, + const char *source, void *data); + +struct functions_evloop_worker_job { + bool used; + bool running; + bool cancelled; + usec_t stop_monotonic_ut; + char *cmd; + const char *transaction; + time_t timeout; + + BUFFER *payload; + HTTP_ACCESS access; + const char *source; + + functions_evloop_worker_execute_t cb; + void *cb_data; +}; + +static void worker_job_cleanup(struct functions_evloop_worker_job *j) { + freez((void *)j->cmd); + freez((void *)j->transaction); + freez((void *)j->source); + buffer_free(j->payload); +} + +struct rrd_functions_expectation { + const char *function; + size_t function_length; + functions_evloop_worker_execute_t cb; + void *cb_data; + time_t default_timeout; + struct rrd_functions_expectation *prev, *next; +}; + +struct functions_evloop_globals { + const char *tag; + + DICTIONARY *worker_queue; + pthread_mutex_t worker_mutex; + pthread_cond_t worker_cond_var; + size_t workers; + + netdata_mutex_t *stdout_mutex; + bool *plugin_should_exit; + + netdata_thread_t reader_thread; + netdata_thread_t *worker_threads; + + struct { + DICTIONARY *nodes; + } dyncfg; + + struct rrd_functions_expectation *expectations; +}; + +static void *rrd_functions_worker_globals_worker_main(void *arg) { + struct functions_evloop_globals *wg = arg; + + bool last_acquired = true; + while (true) { + pthread_mutex_lock(&wg->worker_mutex); + + if(dictionary_entries(wg->worker_queue) == 0 || !last_acquired) + pthread_cond_wait(&wg->worker_cond_var, &wg->worker_mutex); + + const DICTIONARY_ITEM *acquired = NULL; + struct functions_evloop_worker_job *j; + dfe_start_write(wg->worker_queue, j) { + if(j->running || j->cancelled) + continue; + + acquired = dictionary_acquired_item_dup(wg->worker_queue, j_dfe.item); + j->running = true; + break; + } + dfe_done(j); + + pthread_mutex_unlock(&wg->worker_mutex); + + if(acquired) { + ND_LOG_STACK lgs[] = { + ND_LOG_FIELD_TXT(NDF_REQUEST, j->cmd), + ND_LOG_FIELD_END(), + }; + ND_LOG_STACK_PUSH(lgs); + + last_acquired = true; + j = dictionary_acquired_item_value(acquired); + j->cb(j->transaction, j->cmd, &j->stop_monotonic_ut, &j->cancelled, j->payload, j->access, j->source, j->cb_data); + dictionary_del(wg->worker_queue, j->transaction); + dictionary_acquired_item_release(wg->worker_queue, acquired); + dictionary_garbage_collect(wg->worker_queue); + } + else + last_acquired = false; + } + return NULL; +} + +static void worker_add_job(struct functions_evloop_globals *wg, const char *keyword, char *transaction, char *function, char *timeout_s, BUFFER *payload, const char *access, const char *source) { + if(!transaction || !*transaction || !timeout_s || !*timeout_s || !function || !*function) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "Received incomplete %s (transaction = '%s', timeout = '%s', function = '%s'). Ignoring it.", + keyword, + transaction?transaction:"(unset)", + timeout_s?timeout_s:"(unset)", + function?function:"(unset)"); + } + else { + int timeout = str2i(timeout_s); + + const char *msg = "No function with this name found"; + bool found = false; + struct rrd_functions_expectation *we; + for(we = wg->expectations; we ;we = we->next) { + if(strncmp(function, we->function, we->function_length) == 0) { + if(timeout <= 0) + timeout = (int)we->default_timeout; + + struct functions_evloop_worker_job t = { + .cmd = strdupz(function), + .transaction = strdupz(transaction), + .running = false, + .cancelled = false, + .timeout = timeout, + .stop_monotonic_ut = now_monotonic_usec() + (timeout * USEC_PER_SEC), + .used = false, + .payload = buffer_dup(payload), + .access = http_access_from_hex(access), + .source = source ? strdupz(source) : NULL, + .cb = we->cb, + .cb_data = we->cb_data, + }; + struct functions_evloop_worker_job *j = dictionary_set(wg->worker_queue, transaction, &t, sizeof(t)); + if(j->used) { + nd_log(NDLS_COLLECTORS, NDLP_WARNING, "Received duplicate function transaction '%s'. Ignoring it.", transaction); + worker_job_cleanup(&t); + msg = "Duplicate function transaction. Ignoring it."; + } + else { + found = true; + j->used = true; + pthread_cond_signal(&wg->worker_cond_var); + } + } + } + + if(!found) { + netdata_mutex_lock(wg->stdout_mutex); + pluginsd_function_json_error_to_stdout(transaction, HTTP_RESP_NOT_FOUND, msg); + netdata_mutex_unlock(wg->stdout_mutex); + } + } +} + +static void *rrd_functions_worker_globals_reader_main(void *arg) { + struct functions_evloop_globals *wg = arg; + + struct { + size_t last_len; // to remember the last pos - do not use a pointer, the buffer may realloc... + bool enabled; + char *transaction; + char *function; + char *timeout_s; + char *access; + char *source; + char *content_type; + } deferred = { 0 }; + + struct buffered_reader reader = { 0 }; + buffered_reader_init(&reader); + BUFFER *buffer = buffer_create(sizeof(reader.read_buffer) + 2, NULL); + + while(!(*wg->plugin_should_exit)) { + if(unlikely(!buffered_reader_next_line(&reader, buffer))) { + buffered_reader_ret_t ret = buffered_reader_read_timeout( + &reader, + fileno((FILE *)stdin), + 2 * 60 * MSEC_PER_SEC, + false + ); + + if(unlikely(ret != BUFFERED_READER_READ_OK && ret != BUFFERED_READER_READ_POLL_TIMEOUT)) + break; + + continue; + } + + if(deferred.enabled) { + char *s = (char *)buffer_tostring(buffer); + + if(strstr(&s[deferred.last_len], PLUGINSD_CALL_FUNCTION_PAYLOAD_END "\n") != NULL) { + if(deferred.last_len > 0) + // remove the trailing newline from the buffer + deferred.last_len--; + + s[deferred.last_len] = '\0'; + buffer->len = deferred.last_len; + buffer->content_type = content_type_string2id(deferred.content_type); + worker_add_job(wg, + PLUGINSD_CALL_FUNCTION_PAYLOAD_BEGIN, deferred.transaction, deferred.function, + deferred.timeout_s, buffer, deferred.access, deferred.source); + buffer_flush(buffer); + + freez(deferred.transaction); + freez(deferred.function); + freez(deferred.timeout_s); + freez(deferred.access); + freez(deferred.source); + freez(deferred.content_type); + memset(&deferred, 0, sizeof(deferred)); + } + else + deferred.last_len = buffer->len; + + continue; + } + + char *words[MAX_FUNCTION_PARAMETERS] = { NULL }; + size_t num_words = quoted_strings_splitter_pluginsd((char *)buffer_tostring(buffer), words, MAX_FUNCTION_PARAMETERS); + + const char *keyword = get_word(words, num_words, 0); + + if(keyword && (strcmp(keyword, PLUGINSD_CALL_FUNCTION) == 0)) { + char *transaction = get_word(words, num_words, 1); + char *timeout_s = get_word(words, num_words, 2); + char *function = get_word(words, num_words, 3); + char *access = get_word(words, num_words, 4); + char *source = get_word(words, num_words, 5); + worker_add_job(wg, keyword, transaction, function, timeout_s, NULL, access, source); + } + else if(keyword && (strcmp(keyword, PLUGINSD_CALL_FUNCTION_PAYLOAD_BEGIN) == 0)) { + char *transaction = get_word(words, num_words, 1); + char *timeout_s = get_word(words, num_words, 2); + char *function = get_word(words, num_words, 3); + char *access = get_word(words, num_words, 4); + char *source = get_word(words, num_words, 5); + char *content_type = get_word(words, num_words, 6); + + deferred.transaction = strdupz(transaction ? transaction : ""); + deferred.timeout_s = strdupz(timeout_s ? timeout_s : ""); + deferred.function = strdupz(function ? function : ""); + deferred.access = strdupz(access ? access : ""); + deferred.source = strdupz(source ? source : ""); + deferred.content_type = strdupz(content_type ? content_type : ""); + deferred.last_len = 0; + deferred.enabled = true; + } + else if(keyword && strcmp(keyword, PLUGINSD_CALL_FUNCTION_CANCEL) == 0) { + char *transaction = get_word(words, num_words, 1); + const DICTIONARY_ITEM *acquired = dictionary_get_and_acquire_item(wg->worker_queue, transaction); + if(acquired) { + struct functions_evloop_worker_job *j = dictionary_acquired_item_value(acquired); + __atomic_store_n(&j->cancelled, true, __ATOMIC_RELAXED); + dictionary_acquired_item_release(wg->worker_queue, acquired); + dictionary_del(wg->worker_queue, transaction); + dictionary_garbage_collect(wg->worker_queue); + } + else + nd_log(NDLS_COLLECTORS, NDLP_NOTICE, "Received CANCEL for transaction '%s', but it not available here", transaction); + } + else if(keyword && strcmp(keyword, PLUGINSD_CALL_FUNCTION_PROGRESS) == 0) { + char *transaction = get_word(words, num_words, 1); + const DICTIONARY_ITEM *acquired = dictionary_get_and_acquire_item(wg->worker_queue, transaction); + if(acquired) { + struct functions_evloop_worker_job *j = dictionary_acquired_item_value(acquired); + + functions_stop_monotonic_update_on_progress(&j->stop_monotonic_ut); + + dictionary_acquired_item_release(wg->worker_queue, acquired); + } + else + nd_log(NDLS_COLLECTORS, NDLP_NOTICE, "Received PROGRESS for transaction '%s', but it not available here", transaction); + } + else + nd_log(NDLS_COLLECTORS, NDLP_NOTICE, "Received unknown command: %s", keyword?keyword:"(unset)"); + + buffer_flush(buffer); + } + + if(!(*wg->plugin_should_exit)) + nd_log(NDLS_COLLECTORS, NDLP_ERR, "Read error on stdin"); + + *wg->plugin_should_exit = true; + exit(1); +} + +void worker_queue_delete_cb(const DICTIONARY_ITEM *item __maybe_unused, void *value, void *data __maybe_unused) { + struct functions_evloop_worker_job *j = value; + worker_job_cleanup(j); +} + +struct functions_evloop_globals *functions_evloop_init(size_t worker_threads, const char *tag, netdata_mutex_t *stdout_mutex, bool *plugin_should_exit) { + struct functions_evloop_globals *wg = callocz(1, sizeof(struct functions_evloop_globals)); + + wg->worker_queue = dictionary_create(DICT_OPTION_DONT_OVERWRITE_VALUE); + dictionary_register_delete_callback(wg->worker_queue, worker_queue_delete_cb, NULL); + + wg->dyncfg.nodes = dyncfg_nodes_dictionary_create(); + + pthread_mutex_init(&wg->worker_mutex, NULL); + pthread_cond_init(&wg->worker_cond_var, NULL); + + wg->plugin_should_exit = plugin_should_exit; + wg->stdout_mutex = stdout_mutex; + wg->workers = worker_threads; + wg->worker_threads = callocz(wg->workers, sizeof(netdata_thread_t )); + wg->tag = tag; + + char tag_buffer[NETDATA_THREAD_TAG_MAX + 1]; + snprintfz(tag_buffer, NETDATA_THREAD_TAG_MAX, "%s_READER", wg->tag); + netdata_thread_create(&wg->reader_thread, tag_buffer, NETDATA_THREAD_OPTION_DONT_LOG, + rrd_functions_worker_globals_reader_main, wg); + + for(size_t i = 0; i < wg->workers ; i++) { + snprintfz(tag_buffer, NETDATA_THREAD_TAG_MAX, "%s_WORK[%zu]", wg->tag, i+1); + netdata_thread_create(&wg->worker_threads[i], tag_buffer, NETDATA_THREAD_OPTION_DONT_LOG, + rrd_functions_worker_globals_worker_main, wg); + } + + functions_evloop_add_function(wg, "config", functions_evloop_config_cb, 120, wg); + + return wg; +} + +void functions_evloop_add_function(struct functions_evloop_globals *wg, const char *function, functions_evloop_worker_execute_t cb, time_t default_timeout, void *data) { + struct rrd_functions_expectation *we = callocz(1, sizeof(*we)); + we->function = function; + we->function_length = strlen(we->function); + we->cb = cb; + we->cb_data = data; + we->default_timeout = default_timeout; + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(wg->expectations, we, prev, next); +} + +void functions_evloop_cancel_threads(struct functions_evloop_globals *wg){ + for(size_t i = 0; i < wg->workers ; i++) + netdata_thread_cancel(wg->worker_threads[i]); + + netdata_thread_cancel(wg->reader_thread); +} + +// ---------------------------------------------------------------------------- + +static void functions_evloop_config_cb(const char *transaction, char *function, usec_t *stop_monotonic_ut, bool *cancelled, + BUFFER *payload, HTTP_ACCESS access, const char *source, void *data) { + struct functions_evloop_globals *wg = data; + + CLEAN_BUFFER *result = buffer_create(1024, NULL); + int code = dyncfg_node_find_and_call(wg->dyncfg.nodes, transaction, function, stop_monotonic_ut, + cancelled, payload, access, source, result); + + netdata_mutex_lock(wg->stdout_mutex); + pluginsd_function_result_begin_to_stdout(transaction, code, content_type_id2string(result->content_type), result->expires); + printf("%s", buffer_tostring(result)); + pluginsd_function_result_end_to_stdout(); + fflush(stdout); + netdata_mutex_unlock(wg->stdout_mutex); +} + +void functions_evloop_dyncfg_add(struct functions_evloop_globals *wg, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, + const char *source, DYNCFG_CMDS cmds, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + dyncfg_cb_t cb, void *data) { + + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return; + } + + struct dyncfg_node tmp = { + .cmds = cmds, + .type = type, + .cb = cb, + .data = data, + }; + dictionary_set(wg->dyncfg.nodes, id, &tmp, sizeof(tmp)); + + CLEAN_BUFFER *c = buffer_create(100, NULL); + dyncfg_cmds2buffer(cmds, c); + + netdata_mutex_lock(wg->stdout_mutex); + + fprintf(stdout, + PLUGINSD_KEYWORD_CONFIG " '%s' " PLUGINSD_KEYWORD_CONFIG_ACTION_CREATE " '%s' '%s' '%s' '%s' '%s' '%s' "HTTP_ACCESS_FORMAT" "HTTP_ACCESS_FORMAT"\n", + id, + dyncfg_id2status(status), + dyncfg_id2type(type), path, + dyncfg_id2source_type(source_type), + source, + buffer_tostring(c), + (HTTP_ACCESS_FORMAT_CAST)view_access, + (HTTP_ACCESS_FORMAT_CAST)edit_access + ); + fflush(stdout); + + netdata_mutex_unlock(wg->stdout_mutex); +} + +void functions_evloop_dyncfg_del(struct functions_evloop_globals *wg, const char *id) { + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return; + } + + dictionary_del(wg->dyncfg.nodes, id); + + netdata_mutex_lock(wg->stdout_mutex); + + fprintf(stdout, + PLUGINSD_KEYWORD_CONFIG " %s " PLUGINSD_KEYWORD_CONFIG_ACTION_DELETE "\n", + id); + fflush(stdout); + + netdata_mutex_unlock(wg->stdout_mutex); +} + +void functions_evloop_dyncfg_status(struct functions_evloop_globals *wg, const char *id, DYNCFG_STATUS status) { + if(!dyncfg_is_valid_id(id)) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "DYNCFG: id '%s' is invalid. Ignoring dynamic configuration for it.", id); + return; + } + + netdata_mutex_lock(wg->stdout_mutex); + + fprintf(stdout, + PLUGINSD_KEYWORD_CONFIG " %s " PLUGINSD_KEYWORD_CONFIG_ACTION_STATUS " %s\n", + id, dyncfg_id2status(status)); + + fflush(stdout); + + netdata_mutex_unlock(wg->stdout_mutex); +} diff --git a/src/libnetdata/functions_evloop/functions_evloop.h b/src/libnetdata/functions_evloop/functions_evloop.h new file mode 100644 index 000000000..5c575bd17 --- /dev/null +++ b/src/libnetdata/functions_evloop/functions_evloop.h @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_FUNCTIONS_EVLOOP_H +#define NETDATA_FUNCTIONS_EVLOOP_H + +#include "../libnetdata.h" + +#define MAX_FUNCTION_PARAMETERS 1024 +#define PLUGINS_FUNCTIONS_TIMEOUT_DEFAULT 10 // seconds + +// plugins.d 1st version of the external plugins and streaming protocol +#define PLUGINSD_KEYWORD_CHART "CHART" +#define PLUGINSD_KEYWORD_CHART_DEFINITION_END "CHART_DEFINITION_END" +#define PLUGINSD_KEYWORD_DIMENSION "DIMENSION" +#define PLUGINSD_KEYWORD_BEGIN "BEGIN" +#define PLUGINSD_KEYWORD_SET "SET" +#define PLUGINSD_KEYWORD_END "END" +#define PLUGINSD_KEYWORD_FLUSH "FLUSH" +#define PLUGINSD_KEYWORD_DISABLE "DISABLE" +#define PLUGINSD_KEYWORD_VARIABLE "VARIABLE" +#define PLUGINSD_KEYWORD_LABEL "LABEL" +#define PLUGINSD_KEYWORD_OVERWRITE "OVERWRITE" +#define PLUGINSD_KEYWORD_CLABEL "CLABEL" +#define PLUGINSD_KEYWORD_CLABEL_COMMIT "CLABEL_COMMIT" +#define PLUGINSD_KEYWORD_EXIT "EXIT" + +// high-speed versions of BEGIN, SET, END +#define PLUGINSD_KEYWORD_BEGIN_V2 "BEGIN2" +#define PLUGINSD_KEYWORD_SET_V2 "SET2" +#define PLUGINSD_KEYWORD_END_V2 "END2" + +// super high-speed versions of BEGIN, SET, END have this as first parameter +// enabled with the streaming capability STREAM_CAP_SLOTS +#define PLUGINSD_KEYWORD_SLOT "SLOT" // to change the length of this, update pluginsd_extract_chart_slot() too + +// virtual hosts (only for external plugins - for streaming virtual hosts are like all other hosts) +#define PLUGINSD_KEYWORD_HOST_DEFINE "HOST_DEFINE" +#define PLUGINSD_KEYWORD_HOST_DEFINE_END "HOST_DEFINE_END" +#define PLUGINSD_KEYWORD_HOST_LABEL "HOST_LABEL" +#define PLUGINSD_KEYWORD_HOST "HOST" + +// replication +// enabled with STREAM_CAP_REPLICATION +#define PLUGINSD_KEYWORD_REPLAY_CHART "REPLAY_CHART" +#define PLUGINSD_KEYWORD_REPLAY_BEGIN "RBEGIN" +#define PLUGINSD_KEYWORD_REPLAY_SET "RSET" +#define PLUGINSD_KEYWORD_REPLAY_RRDDIM_STATE "RDSTATE" +#define PLUGINSD_KEYWORD_REPLAY_RRDSET_STATE "RSSTATE" +#define PLUGINSD_KEYWORD_REPLAY_END "REND" + +// plugins.d accepts these for functions (from external plugins or streaming children) +// related to STREAM_CAP_FUNCTIONS, STREAM_CAP_PROGRESS +#define PLUGINSD_KEYWORD_FUNCTION "FUNCTION" // define a function +#define PLUGINSD_KEYWORD_FUNCTION_PROGRESS "FUNCTION_PROGRESS" // send updates about function progress +#define PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN "FUNCTION_RESULT_BEGIN" // the result of a function transaction +#define PLUGINSD_KEYWORD_FUNCTION_RESULT_END "FUNCTION_RESULT_END" // the end of the result of a func. trans. + +// plugins.d sends these for functions (to external plugins or streaming children) +// related to STREAM_CAP_FUNCTIONS, STREAM_CAP_PROGRESS +#define PLUGINSD_CALL_FUNCTION "FUNCTION" // call a function to a plugin or remote host +#define PLUGINSD_CALL_FUNCTION_PAYLOAD_BEGIN "FUNCTION_PAYLOAD" // call a function with a payload +#define PLUGINSD_CALL_FUNCTION_PAYLOAD_END "FUNCTION_PAYLOAD_END" // function payload ends +#define PLUGINSD_CALL_FUNCTION_CANCEL "FUNCTION_CANCEL" // cancel a running function transaction +#define PLUGINSD_CALL_FUNCTION_PROGRESS "FUNCTION_PROGRESS" // let the function know the user is waiting + +// dyncfg +// enabled with STREAM_CAP_DYNCFG +#define PLUGINSD_KEYWORD_CONFIG "CONFIG" +#define PLUGINSD_KEYWORD_CONFIG_ACTION_CREATE "create" +#define PLUGINSD_KEYWORD_CONFIG_ACTION_DELETE "delete" +#define PLUGINSD_KEYWORD_CONFIG_ACTION_STATUS "status" +#define PLUGINSD_FUNCTION_CONFIG "config" + +typedef void (*functions_evloop_worker_execute_t)(const char *transaction, char *function, usec_t *stop_monotonic_ut, + bool *cancelled, BUFFER *payload, HTTP_ACCESS access, + const char *source, void *data); + +struct functions_evloop_worker_job; +struct functions_evloop_globals *functions_evloop_init(size_t worker_threads, const char *tag, netdata_mutex_t *stdout_mutex, bool *plugin_should_exit); +void functions_evloop_add_function(struct functions_evloop_globals *wg, const char *function, functions_evloop_worker_execute_t cb, time_t default_timeout, void *data); +void functions_evloop_cancel_threads(struct functions_evloop_globals *wg); + +#define FUNCTIONS_EXTENDED_TIME_ON_PROGRESS_UT (10 * USEC_PER_SEC) +static inline void functions_stop_monotonic_update_on_progress(usec_t *stop_monotonic_ut) { + usec_t now_ut = now_monotonic_usec(); + if(now_ut + FUNCTIONS_EXTENDED_TIME_ON_PROGRESS_UT > *stop_monotonic_ut) { + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Extending function timeout due to PROGRESS update..."); + __atomic_store_n(stop_monotonic_ut, now_ut + FUNCTIONS_EXTENDED_TIME_ON_PROGRESS_UT, __ATOMIC_RELAXED); + } + else + nd_log(NDLS_DAEMON, NDLP_DEBUG, "Received PROGRESS update..."); +} + +#define pluginsd_function_result_begin_to_buffer(wb, transaction, code, content_type, expires) \ + buffer_sprintf(wb \ + , PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " \"%s\" %d \"%s\" %ld\n" \ + , (transaction) ? (transaction) : "" \ + , (int)(code) \ + , (content_type) ? (content_type) : "" \ + , (long int)(expires) \ + ) + +#define pluginsd_function_result_end_to_buffer(wb) \ + buffer_strcat(wb, "\n" PLUGINSD_KEYWORD_FUNCTION_RESULT_END "\n") + +#define pluginsd_function_result_begin_to_stdout(transaction, code, content_type, expires) \ + fprintf(stdout \ + , PLUGINSD_KEYWORD_FUNCTION_RESULT_BEGIN " \"%s\" %d \"%s\" %ld\n" \ + , (transaction) ? (transaction) : "" \ + , (int)(code) \ + , (content_type) ? (content_type) : "" \ + , (long int)(expires) \ + ) + +#define pluginsd_function_result_end_to_stdout() \ + fprintf(stdout, "\n" PLUGINSD_KEYWORD_FUNCTION_RESULT_END "\n") + +static inline void pluginsd_function_json_error_to_stdout(const char *transaction, int code, const char *msg) { + char buffer[PLUGINSD_LINE_MAX + 1]; + json_escape_string(buffer, msg, PLUGINSD_LINE_MAX); + + pluginsd_function_result_begin_to_stdout(transaction, code, "application/json", now_realtime_sec()); + fprintf(stdout, "{\"status\":%d,\"error_message\":\"%s\"}", code, buffer); + pluginsd_function_result_end_to_stdout(); + fflush(stdout); +} + +static inline void pluginsd_function_result_to_stdout(const char *transaction, int code, const char *content_type, time_t expires, BUFFER *result) { + pluginsd_function_result_begin_to_stdout(transaction, code, content_type, expires); + fwrite(buffer_tostring(result), buffer_strlen(result), 1, stdout); + pluginsd_function_result_end_to_stdout(); + fflush(stdout); +} + +static inline void pluginsd_function_progress_to_stdout(const char *transaction, size_t done, size_t all) { + fprintf(stdout, PLUGINSD_KEYWORD_FUNCTION_PROGRESS " '%s' %zu %zu\n", + transaction, done, all); + fflush(stdout); +} + +static inline void send_newline_and_flush(pthread_mutex_t *mutex) { + netdata_mutex_lock(mutex); + fprintf(stdout, "\n"); + fflush(stdout); + netdata_mutex_unlock(mutex); +} + +void functions_evloop_dyncfg_add(struct functions_evloop_globals *wg, const char *id, const char *path, + DYNCFG_STATUS status, DYNCFG_TYPE type, DYNCFG_SOURCE_TYPE source_type, const char *source, DYNCFG_CMDS cmds, + HTTP_ACCESS view_access, HTTP_ACCESS edit_access, + dyncfg_cb_t cb, void *data); + +void functions_evloop_dyncfg_del(struct functions_evloop_globals *wg, const char *id); +void functions_evloop_dyncfg_status(struct functions_evloop_globals *wg, const char *id, DYNCFG_STATUS status); + +#endif //NETDATA_FUNCTIONS_EVLOOP_H diff --git a/libnetdata/gorilla/README.md b/src/libnetdata/gorilla/README.md index dc3718d13..dc3718d13 100644 --- a/libnetdata/gorilla/README.md +++ b/src/libnetdata/gorilla/README.md diff --git a/libnetdata/gorilla/benchmark.sh b/src/libnetdata/gorilla/benchmark.sh index a5d111435..a5d111435 100755 --- a/libnetdata/gorilla/benchmark.sh +++ b/src/libnetdata/gorilla/benchmark.sh diff --git a/libnetdata/gorilla/fuzzer.sh b/src/libnetdata/gorilla/fuzzer.sh index 19098a615..19098a615 100755 --- a/libnetdata/gorilla/fuzzer.sh +++ b/src/libnetdata/gorilla/fuzzer.sh diff --git a/libnetdata/gorilla/gorilla.cc b/src/libnetdata/gorilla/gorilla.cc index e6138ce38..c76018365 100644 --- a/libnetdata/gorilla/gorilla.cc +++ b/src/libnetdata/gorilla/gorilla.cc @@ -212,7 +212,7 @@ bool gorilla_writer_serialize(const gorilla_writer_t *gw, uint8_t *dst, uint32_t do { const gorilla_buffer_t *next_gbuf = curr_gbuf->header.next; - size_t bytes = GORILLA_BUFFER_SIZE; + size_t bytes = RRDENG_GORILLA_32BIT_BUFFER_SIZE; if (bytes > dst_size) return false; @@ -232,7 +232,7 @@ uint32_t gorilla_buffer_patch(gorilla_buffer_t *gbuf) { while (curr_gbuf->header.next) { uint32_t *buf = reinterpret_cast<uint32_t *>(gbuf); - gbuf = reinterpret_cast<gorilla_buffer_t *>(&buf[GORILLA_BUFFER_SLOTS]); + gbuf = reinterpret_cast<gorilla_buffer_t *>(&buf[RRDENG_GORILLA_32BIT_BUFFER_SLOTS]); assert(((uintptr_t) (gbuf) % sizeof(uintptr_t)) == 0 && "Gorilla buffer not aligned to uintptr_t"); diff --git a/libnetdata/gorilla/gorilla.h b/src/libnetdata/gorilla/gorilla.h index d57c07cf0..7975d85ee 100644 --- a/libnetdata/gorilla/gorilla.h +++ b/src/libnetdata/gorilla/gorilla.h @@ -67,8 +67,8 @@ uint32_t gorilla_buffer_patch(gorilla_buffer_t *buf); gorilla_reader_t gorilla_reader_init(gorilla_buffer_t *buf); bool gorilla_reader_read(gorilla_reader_t *gr, uint32_t *number); -#define GORILLA_BUFFER_SLOTS 128 -#define GORILLA_BUFFER_SIZE (GORILLA_BUFFER_SLOTS * sizeof(uint32_t)) +#define RRDENG_GORILLA_32BIT_BUFFER_SLOTS 128 +#define RRDENG_GORILLA_32BIT_BUFFER_SIZE (RRDENG_GORILLA_32BIT_BUFFER_SLOTS * sizeof(uint32_t)) #ifdef __cplusplus } diff --git a/src/libnetdata/http/content_type.c b/src/libnetdata/http/content_type.c new file mode 100644 index 000000000..05bede17f --- /dev/null +++ b/src/libnetdata/http/content_type.c @@ -0,0 +1,96 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "content_type.h" + + +static struct { + const char *format; + HTTP_CONTENT_TYPE content_type; + bool needs_charset; + const char *options; +} content_types[] = { + // primary - preferred during id-to-string conversions + { .format = "text/html", CT_TEXT_HTML, true }, + { .format = "text/plain", CT_TEXT_PLAIN, true }, + { .format = "text/css", CT_TEXT_CSS, true }, + { .format = "text/yaml", CT_TEXT_YAML, true }, + { .format = "text/xml", CT_TEXT_XML, true }, + { .format = "text/xsl", CT_TEXT_XSL, true }, + { .format = "application/json", CT_APPLICATION_JSON, true }, + { .format = "application/xml", CT_APPLICATION_XML, true }, + { .format = "application/javascript", CT_APPLICATION_X_JAVASCRIPT, true }, + { .format = "application/octet-stream", CT_APPLICATION_OCTET_STREAM, false }, + { .format = "image/svg+xml", CT_IMAGE_SVG_XML, false }, + { .format = "application/x-font-truetype", CT_APPLICATION_X_FONT_TRUETYPE, false }, + { .format = "application/x-font-opentype", CT_APPLICATION_X_FONT_OPENTYPE, false }, + { .format = "application/font-woff", CT_APPLICATION_FONT_WOFF, false }, + { .format = "application/font-woff2", CT_APPLICATION_FONT_WOFF2, false }, + { .format = "application/vnd.ms-fontobject",CT_APPLICATION_VND_MS_FONTOBJ, false }, + { .format = "image/png", CT_IMAGE_PNG, false }, + { .format = "image/jpeg", CT_IMAGE_JPG, false }, + { .format = "image/gif", CT_IMAGE_GIF, false }, + { .format = "image/x-icon", CT_IMAGE_XICON, false }, + { .format = "image/bmp", CT_IMAGE_BMP, false }, + { .format = "image/icns", CT_IMAGE_ICNS, false }, + { .format = "audio/mpeg", CT_AUDIO_MPEG, false }, + { .format = "audio/ogg", CT_AUDIO_OGG, false }, + { .format = "video/mp4", CT_VIDEO_MP4, false }, + { .format = "application/pdf", CT_APPLICATION_PDF, false }, + { .format = "application/zip", CT_APPLICATION_ZIP, false }, + { .format = "image/png", CT_IMAGE_PNG, false }, + + // secondary - overlapping with primary + + { .format = "text/plain", CT_PROMETHEUS, false, "version=0.0.4" }, + { .format = "prometheus", CT_PROMETHEUS }, + { .format = "text", CT_TEXT_PLAIN }, + { .format = "txt", CT_TEXT_PLAIN }, + { .format = "json", CT_APPLICATION_JSON }, + { .format = "html", CT_TEXT_HTML }, + { .format = "xml", CT_APPLICATION_XML }, + + // terminator + { .format = NULL, CT_TEXT_PLAIN }, +}; + +HTTP_CONTENT_TYPE content_type_string2id(const char *format) { + if(format && *format) { + for (int i = 0; content_types[i].format; i++) + if (strcmp(content_types[i].format, format) == 0) + return content_types[i].content_type; + } + + return CT_TEXT_PLAIN; +} + +const char *content_type_id2string(HTTP_CONTENT_TYPE content_type) { + for (int i = 0; content_types[i].format; i++) + if (content_types[i].content_type == content_type) + return content_types[i].format; + + return "text/plain"; +} + +void http_header_content_type(BUFFER *wb, HTTP_CONTENT_TYPE content_type) { + buffer_strcat(wb, "Content-Type: "); + + for (int i = 0; content_types[i].format; i++) { + if (content_types[i].content_type == content_type) { + buffer_strcat(wb, content_types[i].format); + + if(content_types[i].needs_charset) { + buffer_strcat(wb, "; charset=utf-8"); + } + if(content_types[i].options) { + buffer_strcat(wb, "; "); + buffer_strcat(wb, content_types[i].options); + } + + buffer_strcat(wb, "\r\n"); + + return; + } + } + + buffer_strcat(wb, "text/plain; charset=utf-8\r\n"); +} diff --git a/src/libnetdata/http/content_type.h b/src/libnetdata/http/content_type.h new file mode 100644 index 000000000..66fba95a8 --- /dev/null +++ b/src/libnetdata/http/content_type.h @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_CONTENT_TYPE_H +#define NETDATA_CONTENT_TYPE_H + +typedef enum __attribute__ ((__packed__)) { + CT_NONE = 0, + CT_APPLICATION_JSON, + CT_TEXT_PLAIN, + CT_TEXT_HTML, + CT_APPLICATION_X_JAVASCRIPT, + CT_TEXT_CSS, + CT_TEXT_XML, + CT_APPLICATION_XML, + CT_TEXT_XSL, + CT_APPLICATION_OCTET_STREAM, + CT_APPLICATION_X_FONT_TRUETYPE, + CT_APPLICATION_X_FONT_OPENTYPE, + CT_APPLICATION_FONT_WOFF, + CT_APPLICATION_FONT_WOFF2, + CT_APPLICATION_VND_MS_FONTOBJ, + CT_IMAGE_SVG_XML, + CT_IMAGE_PNG, + CT_IMAGE_JPG, + CT_IMAGE_GIF, + CT_IMAGE_XICON, + CT_IMAGE_ICNS, + CT_IMAGE_BMP, + CT_PROMETHEUS, + CT_AUDIO_MPEG, + CT_AUDIO_OGG, + CT_VIDEO_MP4, + CT_APPLICATION_PDF, + CT_APPLICATION_ZIP, + CT_TEXT_YAML, +} HTTP_CONTENT_TYPE; + +HTTP_CONTENT_TYPE content_type_string2id(const char *format); +const char *content_type_id2string(HTTP_CONTENT_TYPE content_type); + +#include "../libnetdata.h" + +void http_header_content_type(struct web_buffer *wb, HTTP_CONTENT_TYPE type); + +#endif //NETDATA_CONTENT_TYPE_H diff --git a/src/libnetdata/http/http_access.c b/src/libnetdata/http/http_access.c new file mode 100644 index 000000000..5be63bb19 --- /dev/null +++ b/src/libnetdata/http/http_access.c @@ -0,0 +1,186 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +static struct { + HTTP_USER_ROLE access; + const char *name; +} user_roles[] = { + { .access = HTTP_USER_ROLE_NONE, .name = "none" }, + { .access = HTTP_USER_ROLE_ADMIN, .name = "admin" }, + { .access = HTTP_USER_ROLE_MANAGER, .name = "manager" }, + { .access = HTTP_USER_ROLE_TROUBLESHOOTER, .name = "troubleshooter" }, + { .access = HTTP_USER_ROLE_OBSERVER, .name = "observer" }, + { .access = HTTP_USER_ROLE_MEMBER, .name = "member" }, + { .access = HTTP_USER_ROLE_BILLING, .name = "billing" }, + { .access = HTTP_USER_ROLE_ANY, .name = "any" }, + + { .access = HTTP_USER_ROLE_MEMBER, .name = "members" }, + { .access = HTTP_USER_ROLE_ADMIN, .name = "admins" }, + { .access = HTTP_USER_ROLE_ANY, .name = "all" }, + + // terminator + { .access = 0, .name = NULL }, +}; + +HTTP_USER_ROLE http_user_role2id(const char *role) { + if(!role || !*role) + return HTTP_USER_ROLE_MEMBER; + + for(size_t i = 0; user_roles[i].name ;i++) { + if(strcmp(user_roles[i].name, role) == 0) + return user_roles[i].access; + } + + nd_log(NDLS_DAEMON, NDLP_WARNING, "HTTP user role '%s' is not valid", role); + return HTTP_USER_ROLE_NONE; +} + +const char *http_id2user_role(HTTP_USER_ROLE role) { + for(size_t i = 0; user_roles[i].name ;i++) { + if(role == user_roles[i].access) + return user_roles[i].name; + } + + nd_log(NDLS_DAEMON, NDLP_WARNING, "HTTP user role %d is not valid", role); + return "none"; +} + +// -------------------------------------------------------------------------------------------------------------------- + +static struct { + const char *name; + uint32_t hash; + HTTP_ACCESS value; +} http_accesses[] = { + {"none" , 0 , HTTP_ACCESS_NONE} + , {"signed-in" , 0 , HTTP_ACCESS_SIGNED_ID} + , {"same-space" , 0 , HTTP_ACCESS_SAME_SPACE} + , {"commercial" , 0 , HTTP_ACCESS_COMMERCIAL_SPACE} + , {"anonymous-data" , 0 , HTTP_ACCESS_ANONYMOUS_DATA} + , {"sensitive-data" , 0 , HTTP_ACCESS_SENSITIVE_DATA} + , {"view-config" , 0 , HTTP_ACCESS_VIEW_AGENT_CONFIG} + , {"edit-config" , 0 , HTTP_ACCESS_EDIT_AGENT_CONFIG} + , {"view-notifications-config" , 0 , HTTP_ACCESS_VIEW_NOTIFICATIONS_CONFIG} + , {"edit-notifications-config" , 0 , HTTP_ACCESS_EDIT_NOTIFICATIONS_CONFIG} + , {"view-alerts-silencing" , 0 , HTTP_ACCESS_VIEW_ALERTS_SILENCING} + , {"edit-alerts-silencing" , 0 , HTTP_ACCESS_EDIT_ALERTS_SILENCING} + + , {NULL , 0 , 0} +}; + +inline HTTP_ACCESS http_access2id_one(const char *str) { + HTTP_ACCESS ret = 0; + + if(!str || !*str) return ret; + + uint32_t hash = simple_hash(str); + int i; + for(i = 0; http_accesses[i].name ; i++) { + if(unlikely(!http_accesses[i].hash)) + http_accesses[i].hash = simple_hash(http_accesses[i].name); + + if (unlikely(hash == http_accesses[i].hash && !strcmp(str, http_accesses[i].name))) { + ret |= http_accesses[i].value; + break; + } + } + + return ret; +} + +inline HTTP_ACCESS http_access2id(char *str) { + HTTP_ACCESS ret = 0; + char *tok; + + while(str && *str && (tok = strsep_skip_consecutive_separators(&str, ", |"))) { + if(!*tok) continue; + ret |= http_access2id_one(tok); + } + + return ret; +} + +void http_access2buffer_json_array(BUFFER *wb, const char *key, HTTP_ACCESS access) { + buffer_json_member_add_array(wb, key); + + HTTP_ACCESS used = 0; // to prevent adding duplicates + for(int i = 0; http_accesses[i].name ; i++) { + if (unlikely((http_accesses[i].value & access) && !(http_accesses[i].value & used))) { + const char *name = http_accesses[i].name; + used |= http_accesses[i].value; + + buffer_json_add_array_item_string(wb, name); + } + } + + buffer_json_array_close(wb); +} + +void http_access2txt(char *buf, size_t size, const char *separator, HTTP_ACCESS access) { + char *write = buf; + char *end = &buf[size - 1]; + + HTTP_ACCESS used = 0; // to prevent adding duplicates + int added = 0; + for(int i = 0; http_accesses[i].name ; i++) { + if (unlikely((http_accesses[i].value & access) && !(http_accesses[i].value & used))) { + const char *name = http_accesses[i].name; + used |= http_accesses[i].value; + + if(added && write < end) { + const char *s = separator; + while(*s && write < end) + *write++ = *s++; + } + + while(*name && write < end) + *write++ = *name++; + + added++; + } + } + *write = *end = '\0'; +} + +HTTP_ACCESS http_access_from_hex_mapping_old_roles(const char *str) { + if(!str || !*str) + return HTTP_ACCESS_NONE; + + if(strcmp(str, "any") == 0 || strcmp(str, "all") == 0) + return HTTP_ACCESS_MAP_OLD_ANY; + + if(strcmp(str, "member") == 0 || strcmp(str, "members") == 0) + return HTTP_ACCESS_MAP_OLD_MEMBER; + + else if(strcmp(str, "admin") == 0 || strcmp(str, "admins") == 0) + return HTTP_ACCESS_MAP_OLD_ADMIN; + + return (HTTP_ACCESS)strtoull(str, NULL, 16) & HTTP_ACCESS_ALL; +} + +HTTP_ACCESS http_access_from_hex(const char *str) { + if(!str || !*str) + return HTTP_ACCESS_NONE; + + return (HTTP_ACCESS)strtoull(str, NULL, 16) & HTTP_ACCESS_ALL; +} + +HTTP_ACCESS http_access_from_source(const char *str) { + if(!str || !*str) + return HTTP_ACCESS_NONE; + + HTTP_ACCESS access = HTTP_ACCESS_NONE; + + const char *permissions = strstr(str, "permissions="); + if(permissions) + access = (HTTP_ACCESS)strtoull(permissions + 12, NULL, 16) & HTTP_ACCESS_ALL; + + return access; +} + +bool log_cb_http_access_to_hex(BUFFER *wb, void *data) { + HTTP_ACCESS access = *((HTTP_ACCESS *)data); + buffer_sprintf(wb, HTTP_ACCESS_FORMAT, (HTTP_ACCESS_FORMAT_CAST)access); + return true; +} diff --git a/src/libnetdata/http/http_access.h b/src/libnetdata/http/http_access.h new file mode 100644 index 000000000..afc2e1dc7 --- /dev/null +++ b/src/libnetdata/http/http_access.h @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_HTTP_ACCESS_H +#define NETDATA_HTTP_ACCESS_H + +typedef enum __attribute__((packed)) { + HTTP_USER_ROLE_NONE = 0, + HTTP_USER_ROLE_ADMIN = 1, + HTTP_USER_ROLE_MANAGER = 2, + HTTP_USER_ROLE_TROUBLESHOOTER = 3, + HTTP_USER_ROLE_OBSERVER = 4, + HTTP_USER_ROLE_MEMBER = 5, + HTTP_USER_ROLE_BILLING = 6, + HTTP_USER_ROLE_ANY = 7, + + // keep this list so that lower numbers are more strict access levels +} HTTP_USER_ROLE; +const char *http_id2user_role(HTTP_USER_ROLE role); +HTTP_USER_ROLE http_user_role2id(const char *role); + +typedef enum __attribute__((packed)) { + HTTP_ACCESS_NONE = 0, // adm man trb obs mem bil + HTTP_ACCESS_SIGNED_ID = (1 << 0), // User is authenticated A A A A A A + HTTP_ACCESS_SAME_SPACE = (1 << 1), // NC user+agent = same space A A A A A A + HTTP_ACCESS_COMMERCIAL_SPACE = (1 << 2), // NC P P P P P P + HTTP_ACCESS_ANONYMOUS_DATA = (1 << 3), // NC room:Read A A A SR SR - + HTTP_ACCESS_SENSITIVE_DATA = (1 << 4), // NC agent:ViewSensitiveData A A A - SR - + HTTP_ACCESS_VIEW_AGENT_CONFIG = (1 << 5), // NC agent:ReadDynCfg P P - - - - + HTTP_ACCESS_EDIT_AGENT_CONFIG = (1 << 6), // NC agent:EditDynCfg P P - - - - + HTTP_ACCESS_VIEW_NOTIFICATIONS_CONFIG = (1 << 7), // NC agent:ViewNotificationsConfig P - - - - - + HTTP_ACCESS_EDIT_NOTIFICATIONS_CONFIG = (1 << 8), // NC agent:EditNotificationsConfig P - - - - - + HTTP_ACCESS_VIEW_ALERTS_SILENCING = (1 << 9), // NC space:GetSystemSilencingRules A A A - A - + HTTP_ACCESS_EDIT_ALERTS_SILENCING = (1 << 10), // NC space:CreateSystemSilencingRule P P - - P - +} HTTP_ACCESS; // --------------------- + // A = always + // P = commercial plan + // SR = same room (Us+Ag) + +#define HTTP_ACCESS_FORMAT "0x%" PRIx32 +#define HTTP_ACCESS_FORMAT_CAST uint32_t + +#define HTTP_ACCESS_ALL (HTTP_ACCESS)( \ + HTTP_ACCESS_SIGNED_ID \ + | HTTP_ACCESS_SAME_SPACE \ + | HTTP_ACCESS_COMMERCIAL_SPACE \ + | HTTP_ACCESS_ANONYMOUS_DATA \ + | HTTP_ACCESS_SENSITIVE_DATA \ + | HTTP_ACCESS_VIEW_AGENT_CONFIG \ + | HTTP_ACCESS_EDIT_AGENT_CONFIG \ + | HTTP_ACCESS_VIEW_NOTIFICATIONS_CONFIG \ + | HTTP_ACCESS_EDIT_NOTIFICATIONS_CONFIG \ + | HTTP_ACCESS_VIEW_ALERTS_SILENCING \ + | HTTP_ACCESS_EDIT_ALERTS_SILENCING \ +) + +#define HTTP_ACCESS_MAP_OLD_ANY (HTTP_ACCESS)(HTTP_ACCESS_ANONYMOUS_DATA) + +#define HTTP_ACCESS_MAP_OLD_MEMBER (HTTP_ACCESS)( \ + HTTP_ACCESS_SIGNED_ID \ + | HTTP_ACCESS_SAME_SPACE \ + | HTTP_ACCESS_ANONYMOUS_DATA | HTTP_ACCESS_SENSITIVE_DATA) + +#define HTTP_ACCESS_MAP_OLD_ADMIN (HTTP_ACCESS)( \ + HTTP_ACCESS_SIGNED_ID \ + | HTTP_ACCESS_SAME_SPACE \ + | HTTP_ACCESS_ANONYMOUS_DATA | HTTP_ACCESS_SENSITIVE_DATA | HTTP_ACCESS_VIEW_AGENT_CONFIG \ + | HTTP_ACCESS_EDIT_AGENT_CONFIG \ +) + +HTTP_ACCESS http_access2id_one(const char *str); +HTTP_ACCESS http_access2id(char *str); +struct web_buffer; +void http_access2buffer_json_array(struct web_buffer *wb, const char *key, HTTP_ACCESS access); +void http_access2txt(char *buf, size_t size, const char *separator, HTTP_ACCESS access); +HTTP_ACCESS http_access_from_hex(const char *str); +HTTP_ACCESS http_access_from_hex_mapping_old_roles(const char *str); +HTTP_ACCESS http_access_from_source(const char *str); +bool log_cb_http_access_to_hex(struct web_buffer *wb, void *data); + +#define HTTP_ACCESS_PERMISSION_DENIED_HTTP_CODE(access) ((access & HTTP_ACCESS_SIGNED_ID) ? HTTP_RESP_FORBIDDEN : HTTP_RESP_PRECOND_FAIL) + +typedef enum __attribute__((packed)) { + HTTP_ACL_NONE = (0), + + HTTP_ACL_NOCHECK = (1 << 0), // Don't check anything - adding this to an endpoint, disables ACL checking + + // transports + HTTP_ACL_API = (1 << 1), // from the internal web server (TCP port) + HTTP_ACL_API_UDP = (1 << 2), // from the internal web server (UDP port) + HTTP_ACL_API_UNIX = (1 << 3), // from the internal web server (UNIX socket) + HTTP_ACL_H2O = (1 << 4), // from the h2o web server + HTTP_ACL_ACLK = (1 << 5), // from ACLK + HTTP_ACL_WEBRTC = (1 << 6), // from WebRTC + + // HTTP_ACL_API takes the following additional ACLs, based on pattern matching of the client IP + HTTP_ACL_DASHBOARD = (1 << 10), + HTTP_ACL_REGISTRY = (1 << 11), + HTTP_ACL_BADGES = (1 << 12), + HTTP_ACL_MANAGEMENT = (1 << 13), + HTTP_ACL_STREAMING = (1 << 14), + HTTP_ACL_NETDATACONF = (1 << 15), + + // SSL related + HTTP_ACL_SSL_OPTIONAL = (1 << 28), + HTTP_ACL_SSL_FORCE = (1 << 29), + HTTP_ACL_SSL_DEFAULT = (1 << 30), +} HTTP_ACL; + +#define HTTP_ACL_TRANSPORTS (HTTP_ACL)( \ + HTTP_ACL_API \ + | HTTP_ACL_API_UDP \ + | HTTP_ACL_API_UNIX \ + | HTTP_ACL_H2O \ + | HTTP_ACL_ACLK \ + | HTTP_ACL_WEBRTC \ +) + +#define HTTP_ACL_TRANSPORTS_WITHOUT_CLIENT_IP_VALIDATION (HTTP_ACL)( \ + HTTP_ACL_ACLK \ + | HTTP_ACL_WEBRTC \ +) + +#define HTTP_ACL_ALL_FEATURES (HTTP_ACL)( \ + HTTP_ACL_DASHBOARD \ + | HTTP_ACL_REGISTRY \ + | HTTP_ACL_BADGES \ + | HTTP_ACL_MANAGEMENT \ + | HTTP_ACL_STREAMING \ + | HTTP_ACL_NETDATACONF \ +) + +#ifdef NETDATA_DEV_MODE +#define ACL_DEV_OPEN_ACCESS HTTP_ACL_NOCHECK +#else +#define ACL_DEV_OPEN_ACCESS 0 +#endif + +#define http_can_access_dashboard(w) ((w)->acl & HTTP_ACL_DASHBOARD) +#define http_can_access_registry(w) ((w)->acl & HTTP_ACL_REGISTRY) +#define http_can_access_badges(w) ((w)->acl & HTTP_ACL_BADGES) +#define http_can_access_mgmt(w) ((w)->acl & HTTP_ACL_MANAGEMENT) +#define http_can_access_stream(w) ((w)->acl & HTTP_ACL_STREAMING) +#define http_can_access_netdataconf(w) ((w)->acl & HTTP_ACL_NETDATACONF) +#define http_is_using_ssl_optional(w) ((w)->port_acl & HTTP_ACL_SSL_OPTIONAL) +#define http_is_using_ssl_force(w) ((w)->port_acl & HTTP_ACL_SSL_FORCE) +#define http_is_using_ssl_default(w) ((w)->port_acl & HTTP_ACL_SSL_DEFAULT) + +#endif //NETDATA_HTTP_ACCESS_H diff --git a/src/libnetdata/http/http_defs.c b/src/libnetdata/http/http_defs.c new file mode 100644 index 000000000..ef7621a65 --- /dev/null +++ b/src/libnetdata/http/http_defs.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +ENUM_STR_MAP_DEFINE(HTTP_REQUEST_MODE) = +{ + { .name = "OPTIONS", .id = HTTP_REQUEST_MODE_OPTIONS }, + { .name = "GET", .id = HTTP_REQUEST_MODE_GET }, + { .name = "FILECOPY", .id = HTTP_REQUEST_MODE_FILECOPY }, + { .name = "POST", .id = HTTP_REQUEST_MODE_POST }, + { .name = "PUT", .id = HTTP_REQUEST_MODE_PUT }, + { .name = "DELETE", .id = HTTP_REQUEST_MODE_DELETE }, + { .name = "STREAM", .id = HTTP_REQUEST_MODE_STREAM }, + + // terminator + { .name = NULL, .id = 0 } +}; + +ENUM_STR_DEFINE_FUNCTIONS(HTTP_REQUEST_MODE, 0, "UNKNOWN"); + +const char *http_response_code2string(int code) { + switch(code) { + case 100: + return "Continue"; + case 101: + return "Switching Protocols"; + case 102: + return "Processing"; + case 103: + return "Early Hints"; + + case 200: + return "OK"; + case 201: + return "Created"; + case 202: + return "Accepted"; + case 203: + return "Non-Authoritative Information"; + case 204: + return "No Content"; + case 205: + return "Reset Content"; + case 206: + return "Partial Content"; + case 207: + return "Multi-Status"; + case 208: + return "Already Reported"; + case 226: + return "IM Used"; + + case 300: + return "Multiple Choices"; + case 301: + return "Moved Permanently"; + case 302: + return "Found"; + case 303: + return "See Other"; + case 304: + return "Not Modified"; + case 305: + return "Use Proxy"; + case 306: + return "Switch Proxy"; + case 307: + return "Temporary Redirect"; + case 308: + return "Permanent Redirect"; + + case 400: + return "Bad Request"; + case 401: + return "Unauthorized"; + case 402: + return "Payment Required"; + case 403: + return "Forbidden"; + case 404: + return "Not Found"; + case 405: + return "Method Not Allowed"; + case 406: + return "Not Acceptable"; + case 407: + return "Proxy Authentication Required"; + case 408: + return "Request Timeout"; + case 409: + return "Conflict"; + case 410: + return "Gone"; + case 411: + return "Length Required"; + case 412: + return "Precondition Failed"; + case 413: + return "Payload Too Large"; + case 414: + return "URI Too Long"; + case 415: + return "Unsupported Media Type"; + case 416: + return "Range Not Satisfiable"; + case 417: + return "Expectation Failed"; + case 418: + return "I'm a teapot"; + case 421: + return "Misdirected Request"; + case 422: + return "Unprocessable Entity"; + case 423: + return "Locked"; + case 424: + return "Failed Dependency"; + case 425: + return "Too Early"; + case 426: + return "Upgrade Required"; + case 428: + return "Precondition Required"; + case 429: + return "Too Many Requests"; + case 431: + return "Request Header Fields Too Large"; + case 451: + return "Unavailable For Legal Reasons"; + case 499: // nginx's extension to the standard + return "Client Closed Request"; + + case 500: + return "Internal Server Error"; + case 501: + return "Not Implemented"; + case 502: + return "Bad Gateway"; + case 503: + return "Service Unavailable"; + case 504: + return "Gateway Timeout"; + case 505: + return "HTTP Version Not Supported"; + case 506: + return "Variant Also Negotiates"; + case 507: + return "Insufficient Storage"; + case 508: + return "Loop Detected"; + case 510: + return "Not Extended"; + case 511: + return "Network Authentication Required"; + + default: + if(code >= 100 && code < 200) + return "Informational"; + + if(code >= 200 && code < 300) + return "Successful"; + + if(code >= 300 && code < 400) + return "Redirection"; + + if(code >= 400 && code < 500) + return "Client Error"; + + if(code >= 500 && code < 600) + return "Server Error"; + + return "Undefined Error"; + } +} + + +static struct { + const char *extension; + uint32_t hash; + HTTP_CONTENT_TYPE contenttype; +} mime_types[] = { + { "html" , 0 , CT_TEXT_HTML } + , { "js" , 0 , CT_APPLICATION_X_JAVASCRIPT } + , { "css" , 0 , CT_TEXT_CSS } + , { "xml" , 0 , CT_TEXT_XML } + , { "xsl" , 0 , CT_TEXT_XSL } + , { "txt" , 0 , CT_TEXT_PLAIN } + , { "svg" , 0 , CT_IMAGE_SVG_XML } + , { "ttf" , 0 , CT_APPLICATION_X_FONT_TRUETYPE } + , { "otf" , 0 , CT_APPLICATION_X_FONT_OPENTYPE } + , { "woff2", 0 , CT_APPLICATION_FONT_WOFF2 } + , { "woff" , 0 , CT_APPLICATION_FONT_WOFF } + , { "eot" , 0 , CT_APPLICATION_VND_MS_FONTOBJ } + , { "png" , 0 , CT_IMAGE_PNG } + , { "jpg" , 0 , CT_IMAGE_JPG } + , { "jpeg" , 0 , CT_IMAGE_JPG } + , { "gif" , 0 , CT_IMAGE_GIF } + , { "bmp" , 0 , CT_IMAGE_BMP } + , { "ico" , 0 , CT_IMAGE_XICON } + , { "icns" , 0 , CT_IMAGE_ICNS } + + // terminator + , { NULL , 0 , 0 } +}; + +HTTP_CONTENT_TYPE contenttype_for_filename(const char *filename) { + // netdata_log_info("checking filename '%s'", filename); + + static int initialized = 0; + int i; + + if(unlikely(!initialized)) { + for (i = 0; mime_types[i].extension; i++) + mime_types[i].hash = simple_hash(mime_types[i].extension); + + initialized = 1; + } + + const char *s = filename, *last_dot = NULL; + + // find the last dot + while(*s) { + if(unlikely(*s == '.')) last_dot = s; + s++; + } + + if(unlikely(!last_dot || !*last_dot || !last_dot[1])) { + // netdata_log_info("no extension for filename '%s'", filename); + return CT_APPLICATION_OCTET_STREAM; + } + last_dot++; + + // netdata_log_info("extension for filename '%s' is '%s'", filename, last_dot); + + uint32_t hash = simple_hash(last_dot); + for(i = 0; mime_types[i].extension ; i++) { + if(unlikely(hash == mime_types[i].hash && !strcmp(last_dot, mime_types[i].extension))) { + // netdata_log_info("matched extension for filename '%s': '%s'", filename, last_dot); + return mime_types[i].contenttype; + } + } + + // netdata_log_info("not matched extension for filename '%s': '%s'", filename, last_dot); + return CT_APPLICATION_OCTET_STREAM; +} diff --git a/src/libnetdata/http/http_defs.h b/src/libnetdata/http/http_defs.h new file mode 100644 index 000000000..e1e26863e --- /dev/null +++ b/src/libnetdata/http/http_defs.h @@ -0,0 +1,61 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_HTTP_DEFS_H +#define NETDATA_HTTP_DEFS_H + +#define HTTP_1_1 " HTTP/1.1" +#define HTTP_HDR_END "\r\n\r\n" +#define HTTP_ENDL "\r\n" + +// HTTP_CODES 1XX +#define HTTP_RESP_SWITCH_PROTO 101 + +// HTTP_CODES 2XX Success +#define HTTP_RESP_OK 200 +#define HTTP_RESP_ACCEPTED 202 + +// HTTP_CODES 3XX Redirections +#define HTTP_RESP_MOVED_PERM 301 +#define HTTP_RESP_NOT_MODIFIED 304 +#define HTTP_RESP_REDIR_TEMP 307 +#define HTTP_RESP_REDIR_PERM 308 +#define HTTP_RESP_HTTPS_UPGRADE 399 + +// HTTP_CODES 4XX Client Errors +#define HTTP_RESP_BAD_REQUEST 400 +#define HTTP_RESP_NOT_FOUND 404 +#define HTTP_RESP_METHOD_NOT_ALLOWED 405 +#define HTTP_RESP_CONFLICT 409 +#define HTTP_RESP_CONTENT_TOO_LONG 413 + +#define HTTP_RESP_UNAUTHORIZED 401 // do not use 401 when responding to users - it is used by authenticating proxies +#define HTTP_RESP_FORBIDDEN 403 // not enough permissions to access this resource +#define HTTP_RESP_PRECOND_FAIL 412 // An authorization bearer is required by it was not found in the request +#define HTTP_RESP_UNAVAILABLE_FOR_LEGAL_REASONS 451 // Unavailable For Legal Reasons, we use it instead of 403 when access is forbidden due to an ACL. + +#define HTTP_RESP_CLIENT_CLOSED_REQUEST 499 // nginx's enxtension to the standard + +// HTTP_CODES 5XX Server Errors +#define HTTP_RESP_INTERNAL_SERVER_ERROR 500 +#define HTTP_RESP_NOT_IMPLEMENTED 501 +#define HTTP_RESP_SERVICE_UNAVAILABLE 503 +#define HTTP_RESP_GATEWAY_TIMEOUT 504 +#define HTTP_RESP_BACKEND_RESPONSE_INVALID 591 + +typedef enum __attribute__((__packed__)) { + HTTP_REQUEST_MODE_NONE = 0, + HTTP_REQUEST_MODE_GET = 1, + HTTP_REQUEST_MODE_POST = 2, + HTTP_REQUEST_MODE_PUT = 3, + HTTP_REQUEST_MODE_DELETE = 4, + HTTP_REQUEST_MODE_FILECOPY = 5, + HTTP_REQUEST_MODE_OPTIONS = 6, + HTTP_REQUEST_MODE_STREAM = 7, +} HTTP_REQUEST_MODE; + +ENUM_STR_DEFINE_FUNCTIONS_EXTERN(HTTP_REQUEST_MODE); + +const char *http_response_code2string(int code); +HTTP_CONTENT_TYPE contenttype_for_filename(const char *filename); + +#endif /* NETDATA_HTTP_DEFS_H */ diff --git a/libnetdata/inlined.h b/src/libnetdata/inlined.h index 535b791e3..9c0d2dd0b 100644 --- a/libnetdata/inlined.h +++ b/src/libnetdata/inlined.h @@ -204,12 +204,28 @@ static inline long long str2ll(const char *s, char **endptr) { } } +static inline uint32_t str2uint32_hex(const char *src, char **endptr) { + uint32_t num = 0; + const unsigned char *s = (const unsigned char *)src; + unsigned char c; + + while ((c = hex_value_from_ascii[(uint8_t)*s]) != 255) { + num = (num << 4) | c; + s++; + } + + if(endptr) + *endptr = (char *)s; + + return num; +} + static inline uint64_t str2uint64_hex(const char *src, char **endptr) { uint64_t num = 0; const unsigned char *s = (const unsigned char *)src; unsigned char c; - while ((c = hex_value_from_ascii[toupper(*s)]) != 255) { + while ((c = hex_value_from_ascii[(uint8_t)*s]) != 255) { num = (num << 4) | c; s++; } @@ -469,16 +485,16 @@ static inline bool sanitize_command_argument_string(char *dst, const char *src, return true; } -static inline int read_file(const char *filename, char *buffer, size_t size) { +static inline int read_txt_file(const char *filename, char *buffer, size_t size) { if(unlikely(!size)) return 3; - int fd = open(filename, O_RDONLY, 0666); + int fd = open(filename, O_RDONLY | O_CLOEXEC, 0666); if(unlikely(fd == -1)) { buffer[0] = '\0'; return 1; } - ssize_t r = read(fd, buffer, size); + ssize_t r = read(fd, buffer, size - 1); // leave space of the final zero if(unlikely(r == -1)) { buffer[0] = '\0'; close(fd); @@ -490,10 +506,43 @@ static inline int read_file(const char *filename, char *buffer, size_t size) { return 0; } +static inline int read_proc_cmdline(const char *filename, char *buffer, size_t size) { + if (unlikely(!size)) return 3; + + int fd = open(filename, O_RDONLY | O_CLOEXEC, 0666); + if (unlikely(fd == -1)) { + buffer[0] = '\0'; + return 1; + } + + ssize_t r = read(fd, buffer, size - 1); // Leave space for final null character + if (unlikely(r == -1)) { + buffer[0] = '\0'; + close(fd); + return 2; + } + + if (r > 0) { + // Replace null characters with spaces, except for the last one + for (ssize_t i = 0; i < r - 1; i++) { + if (buffer[i] == '\0') { + buffer[i] = ' '; + } + } + buffer[r] = '\0'; // Null-terminate the string + } + else { + buffer[0] = '\0'; // Empty cmdline + } + + close(fd); + return 0; +} + static inline int read_single_number_file(const char *filename, unsigned long long *result) { char buffer[30 + 1]; - int ret = read_file(filename, buffer, 30); + int ret = read_txt_file(filename, buffer, sizeof(buffer)); if(unlikely(ret)) { *result = 0; return ret; @@ -507,7 +556,7 @@ static inline int read_single_number_file(const char *filename, unsigned long lo static inline int read_single_signed_number_file(const char *filename, long long *result) { char buffer[30 + 1]; - int ret = read_file(filename, buffer, 30); + int ret = read_txt_file(filename, buffer, sizeof(buffer)); if(unlikely(ret)) { *result = 0; return ret; @@ -521,7 +570,7 @@ static inline int read_single_signed_number_file(const char *filename, long long static inline int read_single_base64_or_hex_number_file(const char *filename, unsigned long long *result) { char buffer[30 + 1]; - int ret = read_file(filename, buffer, 30); + int ret = read_txt_file(filename, buffer, sizeof(buffer)); if(unlikely(ret)) { *result = 0; return ret; @@ -597,4 +646,50 @@ static inline char *trim_all(char *buffer) { return buffer; } +static inline bool streq(const char *a, const char *b) { + if (a == b) + return true; + + if (a == NULL || b == NULL) + return false; + + return strcmp(a, b) == 0; +} + +static inline bool strstartswith(const char *string, const char *prefix) { + if (string == NULL || prefix == NULL) + return false; + + size_t string_len = strlen(string); + size_t prefix_len = strlen(prefix); + + if (prefix_len > string_len) + return false; + + return strncmp(string, prefix, prefix_len) == 0; +} + +static inline bool strendswith(const char *string, const char *suffix) { + if (string == NULL || suffix == NULL) + return false; + + size_t string_len = strlen(string); + size_t suffix_len = strlen(suffix); + + if (suffix_len > string_len) + return false; + + return strcmp(string + string_len - suffix_len, suffix) == 0; +} + +static inline bool strendswith_lengths(const char *string, size_t string_len, const char *suffix, size_t suffix_len) { + if (string == NULL || suffix == NULL) + return false; + + if (suffix_len > string_len) + return false; + + return strcmp(string + string_len - suffix_len, suffix) == 0; +} + #endif //NETDATA_INLINED_H diff --git a/src/libnetdata/json/README.md b/src/libnetdata/json/README.md new file mode 100644 index 000000000..9ae5ff382 --- /dev/null +++ b/src/libnetdata/json/README.md @@ -0,0 +1,14 @@ +<!-- +title: "json" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/json/README.md +sidebar_label: "json" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# json + +`json` contains a parser for json strings, based on `jsmn` (<https://github.com/zserge/jsmn>), but case you have installed the JSON-C library, the installation script will prefer it, you can also force its use with `--enable-jsonc` in the compilation time. + + diff --git a/libnetdata/json/jsmn.c b/src/libnetdata/json/jsmn.c index 2f48bd65a..2f48bd65a 100644 --- a/libnetdata/json/jsmn.c +++ b/src/libnetdata/json/jsmn.c diff --git a/libnetdata/json/jsmn.h b/src/libnetdata/json/jsmn.h index beff586c6..beff586c6 100644 --- a/libnetdata/json/jsmn.h +++ b/src/libnetdata/json/jsmn.h diff --git a/src/libnetdata/json/json-c-parser-inline.h b/src/libnetdata/json/json-c-parser-inline.h new file mode 100644 index 000000000..be8ae4bfb --- /dev/null +++ b/src/libnetdata/json/json-c-parser-inline.h @@ -0,0 +1,177 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_JSON_C_PARSER_INLINE_H +#define NETDATA_JSON_C_PARSER_INLINE_H + +#define JSONC_PARSE_BOOL_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_boolean)) \ + dst = json_object_get_boolean(_j); \ + else { \ + buffer_sprintf(error, "missing or invalid type for '%s.%s' boolean", path, member); \ + return false; \ + } \ +} while(0) + +#define JSONC_PARSE_TXT2STRING_OR_ERROR_AND_RETURN(jobj, path, member, dst, error, required) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \ + string_freez(dst); \ + dst = string_strdupz(json_object_get_string(_j)); \ + } \ + else if(required) { \ + buffer_sprintf(error, "missing or invalid type for '%s.%s' string", path, member); \ + return false; \ + } \ +} while(0) + +#define JSONC_PARSE_TXT2BUFFER_OR_ERROR_AND_RETURN(jobj, path, member, dst, error, required) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \ + const char *_s = json_object_get_string(_j); \ + if(!_s || !*_s) { \ + buffer_free(dst); \ + dst = NULL; \ + } \ + else { \ + if (dst) \ + buffer_flush(dst); \ + else \ + dst = buffer_create(0, NULL); \ + if (_s && *_s) \ + buffer_strcat(dst, _s); \ + } \ + } \ + else if(required) { \ + buffer_sprintf(error, "missing or invalid type for '%s.%s' string", path, member); \ + return false; \ + } \ +} while(0) + +#define JSONC_PARSE_TXT2PATTERN_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \ + string_freez(dst); \ + const char *_v = json_object_get_string(_j); \ + if(strcmp(_v, "*") == 0) \ + dst = NULL; \ + else \ + dst = string_strdupz(_v); \ + } \ + else { \ + buffer_sprintf(error, "missing or invalid type for '%s.%s' string", path, member); \ + return false; \ + } \ +} while(0) + +#define JSONC_PARSE_TXT2EXPRESSION_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) { \ + const char *_t = json_object_get_string(_j); \ + if(_t && *_t && strcmp(_t, "*") != 0) { \ + const char *_failed_at = NULL; \ + int _err = 0; \ + expression_free(dst); \ + dst = expression_parse(_t, &_failed_at, &_err); \ + if(!dst) { \ + buffer_sprintf(error, "expression '%s.%s' has a non-parseable expression '%s': %s at '%s'", \ + path, member, _t, expression_strerror(_err), _failed_at); \ + return false; \ + } \ + } \ + } \ + else { \ + buffer_sprintf(error, "missing or invalid type for '%s.%s' expression", path, member); \ + return false; \ + } \ +} while(0) + +#define JSONC_PARSE_ARRAY_OF_TXT2BITMAP_OR_ERROR_AND_RETURN(jobj, path, member, converter, dst, error) do { \ + json_object *_jarray; \ + if (json_object_object_get_ex(jobj, member, &_jarray) && json_object_is_type(_jarray, json_type_array)) { \ + size_t _num_options = json_object_array_length(_jarray); \ + dst = 0; \ + for (size_t _i = 0; _i < _num_options; ++_i) { \ + json_object *_joption = json_object_array_get_idx(_jarray, _i); \ + if (!json_object_is_type(_joption, json_type_string)) { \ + buffer_sprintf(error, "invalid type for '%s.%s' at index %zu", path, member, _i); \ + return false; \ + } \ + const char *_option_str = json_object_get_string(_joption); \ + typeof(dst) _bit = converter(_option_str); \ + if (_bit == 0) { \ + buffer_sprintf(error, "unknown option '%s' in '%s.%s' at index %zu", _option_str, path, member, _i); \ + return false; \ + } \ + dst |= _bit; \ + } \ + } else { \ + buffer_sprintf(error, "missing or invalid type for '%s.%s' array", path, member); \ + return false; \ + } \ +} while(0) + + +#define JSONC_PARSE_TXT2ENUM_OR_ERROR_AND_RETURN(jobj, path, member, converter, dst, error) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j) && json_object_is_type(_j, json_type_string)) \ + dst = converter(json_object_get_string(_j)); \ + else { \ + buffer_sprintf(error, "missing or invalid type (expected text value) for '%s.%s' enum", path, member); \ + return false; \ + } \ +} while(0) + +#define JSONC_PARSE_INT_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j)) { \ + if (_j != NULL && json_object_is_type(_j, json_type_int)) \ + dst = json_object_get_int(_j); \ + else if (_j != NULL && json_object_is_type(_j, json_type_double)) \ + dst = (typeof(dst))json_object_get_double(_j); \ + else if (_j == NULL) \ + dst = 0; \ + else { \ + buffer_sprintf(error, "not supported type (expected int) for '%s.%s'", path, member); \ + return false; \ + } \ + } else { \ + buffer_sprintf(error, "missing or invalid type (expected double value or null) for '%s.%s'", path, member); \ + return false; \ + } \ +} while(0) + +#define JSONC_PARSE_DOUBLE_OR_ERROR_AND_RETURN(jobj, path, member, dst, error) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j)) { \ + if (_j != NULL && json_object_is_type(_j, json_type_double)) \ + dst = json_object_get_double(_j); \ + else if (_j != NULL && json_object_is_type(_j, json_type_int)) \ + dst = (typeof(dst))json_object_get_int(_j); \ + else if (_j == NULL) \ + dst = NAN; \ + else { \ + buffer_sprintf(error, "not supported type (expected double) for '%s.%s'", path, member); \ + return false; \ + } \ + } else { \ + buffer_sprintf(error, "missing or invalid type (expected double value or null) for '%s.%s'", path, member); \ + return false; \ + } \ +} while(0) + +#define JSONC_PARSE_SUBOBJECT(jobj, path, member, dst, callback, error) do { \ + json_object *_j; \ + if (json_object_object_get_ex(jobj, member, &_j)) { \ + char _new_path[strlen(path) + strlen(member) + 2]; \ + snprintfz(_new_path, sizeof(_new_path), "%s%s%s", path, *path?".":"", member); \ + if (!callback(_j, _new_path, dst, error)) { \ + return false; \ + } \ + } else { \ + buffer_sprintf(error, "missing '%s.%s' object", path, member); \ + return false; \ + } \ +} while(0) + +#endif //NETDATA_JSON_C_PARSER_INLINE_H diff --git a/src/libnetdata/json/json.c b/src/libnetdata/json/json.c new file mode 100644 index 000000000..a50f6b542 --- /dev/null +++ b/src/libnetdata/json/json.c @@ -0,0 +1,557 @@ +#include "jsmn.h" +#include "../libnetdata.h" +#include "json.h" +#include "libnetdata/libnetdata.h" +#include "health/health.h" + +#define JSON_TOKENS 1024 + +int json_tokens = JSON_TOKENS; + +/** + * Json Tokenise + * + * Map the string given inside tokens. + * + * @param js is the string used to create the tokens + * @param len is the string length + * @param count the number of tokens present in the string + * + * @return it returns the json parsed in tokens + */ +#ifdef ENABLE_JSONC +json_object *json_tokenise(char *js) { + if(!js) { + netdata_log_error("JSON: json string is empty."); + return NULL; + } + + json_object *token = json_tokener_parse(js); + if(!token) { + netdata_log_error("JSON: Invalid json string."); + return NULL; + } + + return token; +} +#else +jsmntok_t *json_tokenise(char *js, size_t len, size_t *count) +{ + int n = json_tokens; + if(!js || !len) { + netdata_log_error("JSON: json string is empty."); + return NULL; + } + + jsmn_parser parser; + jsmn_init(&parser); + + jsmntok_t *tokens = mallocz(sizeof(jsmntok_t) * n); + if(!tokens) return NULL; + + int ret = jsmn_parse(&parser, js, len, tokens, n); + while (ret == JSMN_ERROR_NOMEM) { + n *= 2; + jsmntok_t *new = reallocz(tokens, sizeof(jsmntok_t) * n); + if(!new) { + freez(tokens); + return NULL; + } + tokens = new; + ret = jsmn_parse(&parser, js, len, tokens, n); + } + + if (ret == JSMN_ERROR_INVAL) { + netdata_log_error("JSON: Invalid json string."); + freez(tokens); + return NULL; + } + else if (ret == JSMN_ERROR_PART) { + netdata_log_error("JSON: Truncated JSON string."); + freez(tokens); + return NULL; + } + + if(count) *count = (size_t)ret; + + if(json_tokens < n) json_tokens = n; + return tokens; +} +#endif + +/** + * Callback Print + * + * Set callback print case necessary and wrinte an information inside a buffer to write in the log. + * + * @param e a pointer for a structure that has the complete information about json structure. + * + * @return It always return 0 + */ +int json_callback_print(JSON_ENTRY *e) +{ + BUFFER *wb=buffer_create(300, NULL); + + buffer_sprintf(wb,"%s = ", e->name); + char txt[50]; + switch(e->type) { + case JSON_OBJECT: + e->callback_function = json_callback_print; + buffer_strcat(wb,"OBJECT"); + break; + + case JSON_ARRAY: + e->callback_function = json_callback_print; + sprintf(txt,"ARRAY[%lu]", (long unsigned int) e->data.items); + buffer_strcat(wb, txt); + break; + + case JSON_STRING: + buffer_strcat(wb, e->data.string); + break; + + case JSON_NUMBER: + sprintf(txt, NETDATA_DOUBLE_FORMAT_AUTO, e->data.number); + buffer_strcat(wb,txt); + + break; + + case JSON_BOOLEAN: + buffer_strcat(wb, e->data.boolean?"TRUE":"FALSE"); + break; + + case JSON_NULL: + buffer_strcat(wb,"NULL"); + break; + } + netdata_log_info("JSON: %s", buffer_tostring(wb)); + buffer_free(wb); + return 0; +} + +/** + * JSONC Set String + * + * Set the string value of the structure JSON_ENTRY. + * + * @param e the output structure + */ +static inline void json_jsonc_set_string(JSON_ENTRY *e,char *key,const char *value) { + size_t len = strlen(key); + if(len > JSON_NAME_LEN) + len = JSON_NAME_LEN; + e->type = JSON_STRING; + memcpy(e->name,key,len); + e->name[len] = 0x00; + e->data.string = (char *) value; +} + + +#ifdef ENABLE_JSONC +/** + * JSONC set Boolean + * + * Set the boolean value of the structure JSON_ENTRY + * + * @param e the output structure + * @param value the input value + */ +static inline void json_jsonc_set_boolean(JSON_ENTRY *e,int value) { + e->type = JSON_BOOLEAN; + e->data.boolean = value; +} + +static inline void json_jsonc_set_integer(JSON_ENTRY *e, char *key, int64_t value) { + size_t len = strlen(key); + if(len > JSON_NAME_LEN) + len = JSON_NAME_LEN; + e->type = JSON_NUMBER; + memcpy(e->name, key, len); + e->name[len] = 0; + e->data.number = (NETDATA_DOUBLE)value; +} + +/** + * Parse Array + * + * Parse the array object. + * + * @param ptr the pointer for the object that we will parse. + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + */ +static inline void json_jsonc_parse_array(json_object *ptr, void *callback_data,int (*callback_function)(struct json_entry *)) { + int end = json_object_array_length(ptr); + JSON_ENTRY e; + + if(end) { + int i; + i = 0; + + enum json_type type; + do { + json_object *jvalue = json_object_array_get_idx(ptr, i); + if(jvalue) { + e.callback_data = callback_data; + e.type = JSON_OBJECT; + callback_function(&e); + json_object_object_foreach(jvalue, key, val) { + type = json_object_get_type(val); + if (type == json_type_array) { + e.type = JSON_ARRAY; + json_jsonc_parse_array(val, callback_data, callback_function); + } else if (type == json_type_object) { + json_walk(val,callback_data,callback_function); + } else if (type == json_type_string) { + json_jsonc_set_string(&e,key,json_object_get_string(val)); + callback_function(&e); + } else if (type == json_type_boolean) { + json_jsonc_set_boolean(&e,json_object_get_boolean(val)); + callback_function(&e); + } + } + } + + } while (++i < end); + } +} +#else + +/** + * Walk string + * + * Set JSON_ENTRY to string and map the values from jsmntok_t. + * + * @param js the original string + * @param t the tokens + * @param start the first position + * @param e the output structure. + * + * @return It always return 1 + */ +size_t json_walk_string(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e) +{ + char old = js[t[start].end]; + js[t[start].end] = '\0'; + e->original_string = &js[t[start].start]; + + e->type = JSON_STRING; + e->data.string = e->original_string; + if(e->callback_function) e->callback_function(e); + js[t[start].end] = old; + return 1; +} + +/** + * Walk Primitive + * + * Define the data type of the string + * + * @param js the original string + * @param t the tokens + * @param start the first position + * @param e the output structure. + * + * @return It always return 1 + */ +size_t json_walk_primitive(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e) +{ + char old = js[t[start].end]; + js[t[start].end] = '\0'; + e->original_string = &js[t[start].start]; + + switch(e->original_string[0]) { + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': + case '8': case '9': case '-': case '.': + e->type = JSON_NUMBER; + e->data.number = strtold(e->original_string, NULL); + break; + + case 't': case 'T': + e->type = JSON_BOOLEAN; + e->data.boolean = 1; + break; + + case 'f': case 'F': + e->type = JSON_BOOLEAN; + e->data.boolean = 0; + break; + + case 'n': case 'N': + default: + e->type = JSON_NULL; + break; + } + if(e->callback_function) e->callback_function(e); + js[t[start].end] = old; + return 1; +} + +/** + * Array + * + * Measure the array length + * + * @param js the original string + * @param t the tokens + * @param nest the length of structure t + * @param start the first position + * @param e the structure with values and callback to be used inside the function. + * + * @return It returns the array length + */ +size_t json_walk_array(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e) +{ + JSON_ENTRY ne; + + char old = js[t[start].end]; + js[t[start].end] = '\0'; + ne.original_string = &js[t[start].start]; + + memcpy(&ne, e, sizeof(JSON_ENTRY)); + ne.type = JSON_ARRAY; + ne.data.items = t[start].size; + ne.callback_function = e->callback_function; + ne.name[0]='\0'; + ne.fullname[0]='\0'; + if(e->callback_function) e->callback_function(&ne); + js[t[start].end] = old; + + size_t i, init = start, size = t[start].size; + + start++; + for(i = 0; i < size ; i++) { + ne.pos = i; + if (strlen(e->name) > JSON_NAME_LEN - 24 || strlen(e->fullname) > JSON_FULLNAME_LEN -24) { + netdata_log_info("JSON: JSON walk_array ignoring element with name:%s fullname:%s",e->name, e->fullname); + continue; + } + snprintfz(ne.name, JSON_NAME_LEN, "%s[%lu]", e->name, i); + snprintfz(ne.fullname, JSON_FULLNAME_LEN, "%s[%lu]", e->fullname, i); + + switch(t[start].type) { + case JSMN_PRIMITIVE: + start += json_walk_primitive(js, t, start, &ne); + break; + + case JSMN_OBJECT: + start += json_walk_object(js, t, nest + 1, start, &ne); + break; + + case JSMN_ARRAY: + start += json_walk_array(js, t, nest + 1, start, &ne); + break; + + case JSMN_STRING: + start += json_walk_string(js, t, start, &ne); + break; + } + } + return start - init; +} + +/** + * Object + * + * Measure the Object length + * + * @param js the original string + * @param t the tokens + * @param nest the length of structure t + * @param start the first position + * @param e the output structure. + * + * @return It returns the Object length + */ +size_t json_walk_object(char *js, jsmntok_t *t, size_t nest, size_t start, JSON_ENTRY *e) +{ + JSON_ENTRY ne = { + .name = "", + .fullname = "", + .callback_data = NULL, + .callback_function = NULL + }; + + char old = js[t[start].end]; + js[t[start].end] = '\0'; + ne.original_string = &js[t[start].start]; + memcpy(&ne, e, sizeof(JSON_ENTRY)); + ne.type = JSON_OBJECT; + ne.callback_function = e->callback_function; + if(e->callback_function) e->callback_function(&ne); + js[t[start].end] = old; + + int key = 1; + size_t i, init = start, size = t[start].size; + + start++; + for(i = 0; i < size ; i++) { + switch(t[start].type) { + case JSMN_PRIMITIVE: + start += json_walk_primitive(js, t, start, &ne); + key = 1; + break; + + case JSMN_OBJECT: + start += json_walk_object(js, t, nest + 1, start, &ne); + key = 1; + break; + + case JSMN_ARRAY: + start += json_walk_array(js, t, nest + 1, start, &ne); + key = 1; + break; + + case JSMN_STRING: + default: + if(key) { + int len = t[start].end - t[start].start; + if (unlikely(len>JSON_NAME_LEN)) len=JSON_NAME_LEN; + strncpy(ne.name, &js[t[start].start], len); + ne.name[len] = '\0'; + len=strlen(e->fullname) + strlen(e->fullname[0]?".":"") + strlen(ne.name); + char *c = mallocz((len+1)*sizeof(char)); + sprintf(c,"%s%s%s", e->fullname, e->fullname[0]?".":"", ne.name); + if (unlikely(len>JSON_FULLNAME_LEN)) len=JSON_FULLNAME_LEN; + strncpy(ne.fullname, c, len); + freez(c); + start++; + key = 0; + } + else { + start += json_walk_string(js, t, start, &ne); + key = 1; + } + break; + } + } + return start - init; +} +#endif + +/** + * Tree + * + * Call the correct walk function according its type. + * + * @param t the json object to work + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + * + * @return It always return 1 + */ +#ifdef ENABLE_JSONC +size_t json_walk(json_object *t, void *callback_data, int (*callback_function)(struct json_entry *)) { + JSON_ENTRY e; + + e.callback_data = callback_data; + enum json_type type; + json_object_object_foreach(t, key, val) { + type = json_object_get_type(val); + if (type == json_type_array) { + e.type = JSON_ARRAY; + json_jsonc_parse_array(val,NULL,health_silencers_json_read_callback); + } else if (type == json_type_object) { + e.type = JSON_OBJECT; + } else if (type == json_type_string) { + json_jsonc_set_string(&e,key,json_object_get_string(val)); + callback_function(&e); + } else if (type == json_type_boolean) { + json_jsonc_set_boolean(&e,json_object_get_boolean(val)); + callback_function(&e); + } else if (type == json_type_int) { + json_jsonc_set_integer(&e,key,json_object_get_int64(val)); + callback_function(&e); + } + } + + return 1; +} +#else +/** + * Tree + * + * Call the correct walk function according its type. + * + * @param js the original string + * @param t the tokens + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + * + * @return It always return 1 + */ +size_t json_walk_tree(char *js, jsmntok_t *t, void *callback_data, int (*callback_function)(struct json_entry *)) +{ + JSON_ENTRY e = { + .name = "", + .fullname = "", + .callback_data = callback_data, + .callback_function = callback_function + }; + + switch (t[0].type) { + case JSMN_OBJECT: + e.type = JSON_OBJECT; + json_walk_object(js, t, 0, 0, &e); + break; + + case JSMN_ARRAY: + e.type = JSON_ARRAY; + json_walk_array(js, t, 0, 0, &e); + break; + + case JSMN_PRIMITIVE: + case JSMN_STRING: + break; + } + + return 1; +} +#endif + +/** + * JSON Parse + * + * Parse the json message with the callback function + * + * @param js the string that the callback function will parse + * @param callback_data additional data to be used together the callback function + * @param callback_function function used to create a silencer. + * + * @return JSON_OK case everything happened as expected, JSON_CANNOT_PARSE case there were errors in the + * parsing process and JSON_CANNOT_DOWNLOAD case the string given(js) is NULL. + */ +int json_parse(char *js, void *callback_data, int (*callback_function)(JSON_ENTRY *)) +{ + if(js) { +#ifdef ENABLE_JSONC + json_object *tokens = json_tokenise(js); +#else + size_t count; + jsmntok_t *tokens = json_tokenise(js, strlen(js), &count); +#endif + + if(tokens) { +#ifdef ENABLE_JSONC + json_walk(tokens, callback_data, callback_function); + json_object_put(tokens); +#else + json_walk_tree(js, tokens, callback_data, callback_function); + freez(tokens); +#endif + return JSON_OK; + } + + return JSON_CANNOT_PARSE; + } + + return JSON_CANNOT_DOWNLOAD; +} + +/* +int json_test(char *str) +{ + return json_parse(str, NULL, json_callback_print); +} + */ + diff --git a/libnetdata/json/json.h b/src/libnetdata/json/json.h index b43f06b50..5c3459ede 100644 --- a/libnetdata/json/json.h +++ b/src/libnetdata/json/json.h @@ -1,7 +1,6 @@ #ifndef CHECKIN_JSON_H #define CHECKIN_JSON_H 1 - #if ENABLE_JSONC #include <json-c/json.h> // fix an older json-c bug @@ -72,6 +71,10 @@ size_t json_walk_primitive(char *js, jsmntok_t *t, size_t start, JSON_ENTRY *e); int json_callback_print(JSON_ENTRY *e); +static inline void cleanup_json_object_pp(struct json_object **jobj) { + if(*jobj) + json_object_put(*jobj); +} +#define CLEAN_JSON_OBJECT _cleanup_(cleanup_json_object_pp) struct json_object - -#endif
\ No newline at end of file +#endif // CHECKIN_JSON_H diff --git a/src/libnetdata/july/README.md b/src/libnetdata/july/README.md new file mode 100644 index 000000000..72c862aae --- /dev/null +++ b/src/libnetdata/july/README.md @@ -0,0 +1,14 @@ +<!-- +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/july/README.md +sidebar_label: "July interface" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + + +# July + +An interface similar to `Judy` that uses minimal allocations (that can be cached) +for items that are mainly appended (just a few insertions in the middle) + diff --git a/libnetdata/july/july.c b/src/libnetdata/july/july.c index 56b8494b3..56b8494b3 100644 --- a/libnetdata/july/july.c +++ b/src/libnetdata/july/july.c diff --git a/libnetdata/july/july.h b/src/libnetdata/july/july.h index 672ed44e4..672ed44e4 100644 --- a/libnetdata/july/july.h +++ b/src/libnetdata/july/july.h diff --git a/libnetdata/libjudy/src/Judy.h b/src/libnetdata/libjudy/src/Judy.h index adfb5b53b..adfb5b53b 100644 --- a/libnetdata/libjudy/src/Judy.h +++ b/src/libnetdata/libjudy/src/Judy.h diff --git a/libnetdata/libjudy/src/JudyCommon/JudyMalloc.c b/src/libnetdata/libjudy/src/JudyCommon/JudyMalloc.c index 09a20e399..09a20e399 100644 --- a/libnetdata/libjudy/src/JudyCommon/JudyMalloc.c +++ b/src/libnetdata/libjudy/src/JudyCommon/JudyMalloc.c diff --git a/libnetdata/libjudy/src/JudyCommon/JudyPrivate.h b/src/libnetdata/libjudy/src/JudyCommon/JudyPrivate.h index 350631f01..350631f01 100644 --- a/libnetdata/libjudy/src/JudyCommon/JudyPrivate.h +++ b/src/libnetdata/libjudy/src/JudyCommon/JudyPrivate.h diff --git a/libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h b/src/libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h index 5b4704899..5b4704899 100644 --- a/libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h +++ b/src/libnetdata/libjudy/src/JudyCommon/JudyPrivate1L.h diff --git a/libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h b/src/libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h index 10295ba95..10295ba95 100644 --- a/libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h +++ b/src/libnetdata/libjudy/src/JudyCommon/JudyPrivateBranch.h diff --git a/libnetdata/libjudy/src/JudyHS/JudyHS.c b/src/libnetdata/libjudy/src/JudyHS/JudyHS.c index 21191babc..21191babc 100644 --- a/libnetdata/libjudy/src/JudyHS/JudyHS.c +++ b/src/libnetdata/libjudy/src/JudyHS/JudyHS.c diff --git a/libnetdata/libjudy/src/JudyL/JudyL.h b/src/libnetdata/libjudy/src/JudyL/JudyL.h index d901969d6..d901969d6 100644 --- a/libnetdata/libjudy/src/JudyL/JudyL.h +++ b/src/libnetdata/libjudy/src/JudyL/JudyL.h diff --git a/libnetdata/libjudy/src/JudyL/JudyLByCount.c b/src/libnetdata/libjudy/src/JudyL/JudyLByCount.c index c5a004796..c5a004796 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLByCount.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLByCount.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLCascade.c b/src/libnetdata/libjudy/src/JudyL/JudyLCascade.c index 6b52ddf5f..c1a26f413 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLCascade.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLCascade.c @@ -311,6 +311,7 @@ static int j__udyStageJBBtoJBB( // // NOTE: Caller must release the Leaf2 that was passed in. +__attribute__((no_sanitize("shift"))) FUNCTION static Pjlb_t j__udyJLL2toJLB1( uint16_t * Pjll, // array of 16-bit indexes. #ifdef JUDYL diff --git a/libnetdata/libjudy/src/JudyL/JudyLCount.c b/src/libnetdata/libjudy/src/JudyL/JudyLCount.c index 179757f0a..179757f0a 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLCount.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLCount.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c b/src/libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c index ffe6b3bde..ffe6b3bde 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLCreateBranch.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLDecascade.c b/src/libnetdata/libjudy/src/JudyL/JudyLDecascade.c index 39a89eff1..c2bf81ea1 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLDecascade.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLDecascade.c @@ -345,6 +345,7 @@ FUNCTION int j__udyBranchUToBranchB( // allocation and free, in order to allow the caller to continue with a LeafB1 // if allocation fails. +__attribute__((no_sanitize("shift"))) FUNCTION int j__udyLeafB1ToLeaf1( Pjp_t Pjp, // points to LeafB1 to shrink. Pvoid_t Pjpm) // for global accounting. @@ -431,6 +432,7 @@ FUNCTION int j__udyLeafB1ToLeaf1( // TBD: In this and all following functions, the caller should already be able // to compute the Pop1 return value, so why return it? +__attribute__((no_sanitize("shift"))) FUNCTION Word_t j__udyLeaf1ToLeaf2( uint16_t * PLeaf2, // destination uint16_t * Index portion of leaf. #ifdef JUDYL diff --git a/libnetdata/libjudy/src/JudyL/JudyLDel.c b/src/libnetdata/libjudy/src/JudyL/JudyLDel.c index ced4b5fb3..7c3d9108b 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLDel.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLDel.c @@ -147,6 +147,7 @@ extern Word_t j__udyLLeaf7ToLeafW(Pjlw_t, Pjv_t, Pjp_t, Word_t, Pvoid_t); DBGCODE(uint8_t parentJPtype;) // parent branch JP type. +__attribute__((no_sanitize("shift"))) FUNCTION static int j__udyDelWalk( Pjp_t Pjp, // current JP under which to delete. Word_t Index, // to delete. diff --git a/libnetdata/libjudy/src/JudyL/JudyLFirst.c b/src/libnetdata/libjudy/src/JudyL/JudyLFirst.c index aaf6639cf..aaf6639cf 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLFirst.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLFirst.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLFreeArray.c b/src/libnetdata/libjudy/src/JudyL/JudyLFreeArray.c index 34fac509e..34fac509e 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLFreeArray.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLFreeArray.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLGet.c b/src/libnetdata/libjudy/src/JudyL/JudyLGet.c index 0bb9971cc..e6853939b 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLGet.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLGet.c @@ -44,6 +44,8 @@ // See the manual entry for details. Note support for "shortcut" entries to // trees known to start with a JPM. +__attribute__((no_sanitize("shift"))) + #ifdef JUDY1 #ifdef JUDYGETINLINE diff --git a/libnetdata/libjudy/src/JudyL/JudyLIns.c b/src/libnetdata/libjudy/src/JudyL/JudyLIns.c index f96df4101..256a1ef3b 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLIns.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLIns.c @@ -152,6 +152,7 @@ extern int j__udyLInsertBranch(Pjp_t Pjp, Word_t Index, Word_t Btype, Pjpm_t); // Return -1 for error (details in JPM), 0 for Index already inserted, 1 for // new Index inserted. +__attribute__((no_sanitize("shift"))) FUNCTION static int j__udyInsWalk( Pjp_t Pjp, // current JP to descend. Word_t Index, // to insert. diff --git a/libnetdata/libjudy/src/JudyL/JudyLInsArray.c b/src/libnetdata/libjudy/src/JudyL/JudyLInsArray.c index f8e361f27..f8e361f27 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLInsArray.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLInsArray.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c b/src/libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c index cfa16bd6d..cfa16bd6d 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLInsertBranch.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLMallocIF.c b/src/libnetdata/libjudy/src/JudyL/JudyLMallocIF.c index 9a7d02f21..9a7d02f21 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLMallocIF.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLMallocIF.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLMemActive.c b/src/libnetdata/libjudy/src/JudyL/JudyLMemActive.c index fb58d0e25..fb58d0e25 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLMemActive.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLMemActive.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLMemUsed.c b/src/libnetdata/libjudy/src/JudyL/JudyLMemUsed.c index 81e3a79ce..81e3a79ce 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLMemUsed.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLMemUsed.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLNext.c b/src/libnetdata/libjudy/src/JudyL/JudyLNext.c index 4bcdccf10..4bcdccf10 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLNext.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLNext.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c b/src/libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c index 4da43565d..4da43565d 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLNextEmpty.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLPrev.c b/src/libnetdata/libjudy/src/JudyL/JudyLPrev.c index 4bcdccf10..4bcdccf10 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLPrev.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLPrev.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c b/src/libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c index 4da43565d..4da43565d 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLPrevEmpty.c diff --git a/libnetdata/libjudy/src/JudyL/JudyLTables.c b/src/libnetdata/libjudy/src/JudyL/JudyLTables.c index 21c974986..21c974986 100644 --- a/libnetdata/libjudy/src/JudyL/JudyLTables.c +++ b/src/libnetdata/libjudy/src/JudyL/JudyLTables.c diff --git a/libnetdata/libjudy/src/JudyL/j__udyLGet.c b/src/libnetdata/libjudy/src/JudyL/j__udyLGet.c index 0bb9971cc..0bb9971cc 100644 --- a/libnetdata/libjudy/src/JudyL/j__udyLGet.c +++ b/src/libnetdata/libjudy/src/JudyL/j__udyLGet.c diff --git a/libnetdata/libnetdata.c b/src/libnetdata/libnetdata.c index 37319a884..2dcd8865c 100644 --- a/libnetdata/libnetdata.c +++ b/src/libnetdata/libnetdata.c @@ -129,6 +129,9 @@ static void (*libc_free)(void *) = free_first_run; static char *strdup_first_run(const char *s); static char *(*libc_strdup)(const char *) = strdup_first_run; +static char *strndup_first_run(const char *s, size_t len); +static char *(*libc_strndup)(const char *, size_t) = strndup_first_run; + static size_t malloc_usable_size_first_run(void *ptr); #ifdef HAVE_MALLOC_USABLE_SIZE static size_t (*libc_malloc_usable_size)(void *) = malloc_usable_size_first_run; @@ -169,6 +172,11 @@ static char *strdup_first_run(const char *s) { return libc_strdup(s); } +static char *strndup_first_run(const char *s, size_t len) { + link_system_library_function((libc_function_t *) &libc_strndup, "strndup", true); + return libc_strndup(s, len); +} + static size_t malloc_usable_size_first_run(void *ptr) { link_system_library_function((libc_function_t *) &libc_malloc_usable_size, "malloc_usable_size", false); @@ -202,6 +210,10 @@ char *strdup(const char *s) { return strdupz(s); } +char *strndup(const char *s, size_t len) { + return strndupz(s, len); +} + size_t malloc_usable_size(void *ptr) { return mallocz_usable_size(ptr); } @@ -365,6 +377,30 @@ char *strdupz_int(const char *s, const char *file, const char *function, size_t return (char *)&t->data; } +char *strndupz_int(const char *s, size_t len, const char *file, const char *function, size_t line) { + struct malloc_trace *p = malloc_trace_find_or_create(file, function, line); + size_t size = len + 1; + + size_t_atomic_count(add, p->strdup_calls, 1); + size_t_atomic_count(add, p->allocations, 1); + size_t_atomic_bytes(add, p->bytes, size); + + struct malloc_header *t = (struct malloc_header *)libc_malloc(malloc_header_size + size); + if (unlikely(!t)) fatal("strndupz() cannot allocate %zu bytes of memory (%zu with header).", size, malloc_header_size + size); + t->signature.magic = 0x0BADCAFE; + t->signature.trace = p; + t->signature.size = size; + +#ifdef NETDATA_INTERNAL_CHECKS + for(ssize_t i = 0; i < (ssize_t)sizeof(t->padding) ;i++) // signed to avoid compiler warning when zero-padded + t->padding[i] = 0xFF; +#endif + + memcpy(&t->data, s, size); + t->data[len] = '\0'; + return (char *)&t->data; +} + static struct malloc_header *malloc_get_header(void *ptr, const char *caller, const char *file, const char *function, size_t line) { uint8_t *ret = (uint8_t *)ptr - malloc_header_size; struct malloc_header *t = (struct malloc_header *)ret; @@ -450,6 +486,12 @@ char *strdupz(const char *s) { return t; } +char *strndupz(const char *s, size_t len) { + char *t = strndup(s, len); + if (unlikely(!t)) fatal("Cannot strndup() string '%s' of len %zu", s, len); + return t; +} + // If ptr is NULL, no operation is performed. void freez(void *ptr) { free(ptr); @@ -1045,7 +1087,7 @@ void netdata_fix_chart_id(char *s) { static int memory_file_open(const char *filename, size_t size) { // netdata_log_info("memory_file_open('%s', %zu", filename, size); - int fd = open(filename, O_RDWR | O_CREAT | O_NOATIME, 0664); + int fd = open(filename, O_RDWR | O_CREAT | O_NOATIME | O_CLOEXEC, 0664); if (fd != -1) { if (lseek(fd, size, SEEK_SET) == (off_t) size) { if (write(fd, "", 1) == 1) { @@ -1218,37 +1260,6 @@ int netdata_munmap(void *ptr, size_t size) { return munmap(ptr, size); } -int memory_file_save(const char *filename, void *mem, size_t size) { - char tmpfilename[FILENAME_MAX + 1]; - - snprintfz(tmpfilename, FILENAME_MAX, "%s.%ld.tmp", filename, (long) getpid()); - - int fd = open(tmpfilename, O_RDWR | O_CREAT | O_NOATIME, 0664); - if (fd < 0) { - netdata_log_error("Cannot create/open file '%s'.", filename); - return -1; - } - - if (write(fd, mem, size) != (ssize_t) size) { - netdata_log_error("Cannot write to file '%s' %ld bytes.", filename, (long) size); - close(fd); - return -1; - } - - close(fd); - - if (rename(tmpfilename, filename)) { - netdata_log_error("Cannot rename '%s' to '%s'", tmpfilename, filename); - return -1; - } - - return 0; -} - -int fd_is_valid(int fd) { - return fcntl(fd, F_GETFD) != -1 || errno != EBADF; -} - char *fgets_trim_len(char *buf, size_t buf_size, FILE *fp, size_t *len) { char *s = fgets(buf, (int)buf_size, fp); if (!s) return NULL; @@ -1292,88 +1303,55 @@ int snprintfz(char *dst, size_t n, const char *fmt, ...) { return ret; } -/* -// poor man cycle counting -static unsigned long tsc; -void begin_tsc(void) { - unsigned long a, d; - asm volatile ("cpuid\nrdtsc" : "=a" (a), "=d" (d) : "0" (0) : "ebx", "ecx"); - tsc = ((unsigned long)d << 32) | (unsigned long)a; -} -unsigned long end_tsc(void) { - unsigned long a, d; - asm volatile ("rdtscp" : "=a" (a), "=d" (d) : : "ecx"); - return (((unsigned long)d << 32) | (unsigned long)a) - tsc; -} -*/ +static int is_procfs(const char *path, char **reason) { +#if defined(__APPLE__) || defined(__FreeBSD__) + (void)path; + (void)reason; +#else + struct statfs stat; -int recursively_delete_dir(const char *path, const char *reason) { - DIR *dir = opendir(path); - if(!dir) { - netdata_log_error("Cannot read %s directory to be deleted '%s'", reason?reason:"", path); + if (statfs(path, &stat) == -1) { + if (reason) + *reason = "failed to statfs()"; return -1; } - int ret = 0; - struct dirent *de = NULL; - while((de = readdir(dir))) { - if(de->d_type == DT_DIR - && ( - (de->d_name[0] == '.' && de->d_name[1] == '\0') - || (de->d_name[0] == '.' && de->d_name[1] == '.' && de->d_name[2] == '\0') - )) - continue; - - char fullpath[FILENAME_MAX + 1]; - snprintfz(fullpath, FILENAME_MAX, "%s/%s", path, de->d_name); - - if(de->d_type == DT_DIR) { - int r = recursively_delete_dir(fullpath, reason); - if(r > 0) ret += r; - continue; - } - - netdata_log_info("Deleting %s file '%s'", reason?reason:"", fullpath); - if(unlikely(unlink(fullpath) == -1)) - netdata_log_error("Cannot delete %s file '%s'", reason?reason:"", fullpath); - else - ret++; +#if defined PROC_SUPER_MAGIC + if (stat.f_type != PROC_SUPER_MAGIC) { + if (reason) + *reason = "type is not procfs"; + return -1; } +#endif - netdata_log_info("Deleting empty directory '%s'", path); - if(unlikely(rmdir(path) == -1)) - netdata_log_error("Cannot delete empty directory '%s'", path); - else - ret++; - - closedir(dir); +#endif - return ret; + return 0; } -static int is_virtual_filesystem(const char *path, char **reason) { - +static int is_sysfs(const char *path, char **reason) { #if defined(__APPLE__) || defined(__FreeBSD__) (void)path; (void)reason; #else struct statfs stat; - // stat.f_fsid.__val[0] is a file system id - // stat.f_fsid.__val[1] is the inode - // so their combination uniquely identifies the file/dir if (statfs(path, &stat) == -1) { - if(reason) *reason = "failed to statfs()"; + if (reason) + *reason = "failed to statfs()"; return -1; } - if(stat.f_fsid.__val[0] != 0 || stat.f_fsid.__val[1] != 0) { - errno = EINVAL; - if(reason) *reason = "is not a virtual file system"; +#if defined SYSFS_MAGIC + if (stat.f_type != SYSFS_MAGIC) { + if (reason) + *reason = "type is not sysfs"; return -1; } #endif +#endif + return 0; } @@ -1403,11 +1381,11 @@ int verify_netdata_host_prefix(bool log_msg) { path = buffer; snprintfz(path, FILENAME_MAX, "%s/proc", netdata_configured_host_prefix); - if(is_virtual_filesystem(path, &reason) == -1) + if(is_procfs(path, &reason) == -1) goto failed; snprintfz(path, FILENAME_MAX, "%s/sys", netdata_configured_host_prefix); - if(is_virtual_filesystem(path, &reason) == -1) + if(is_sysfs(path, &reason) == -1) goto failed; if (netdata_configured_host_prefix && *netdata_configured_host_prefix) { @@ -1525,12 +1503,15 @@ int path_is_file(const char *path, const char *subpath) { return is_file; } -void recursive_config_double_dir_load(const char *user_path, const char *stock_path, const char *subpath, int (*callback)(const char *filename, void *data), void *data, size_t depth) { +void recursive_config_double_dir_load(const char *user_path, const char *stock_path, const char *subpath, int (*callback)(const char *filename, void *data, bool stock_config), void *data, size_t depth) { if(depth > 3) { netdata_log_error("CONFIG: Max directory depth reached while reading user path '%s', stock path '%s', subpath '%s'", user_path, stock_path, subpath); return; } + if(!stock_path) + stock_path = user_path; + char *udir = strdupz_path_subpath(user_path, subpath); char *sdir = strdupz_path_subpath(stock_path, subpath); @@ -1564,7 +1545,7 @@ void recursive_config_double_dir_load(const char *user_path, const char *stock_p len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) { char *filename = strdupz_path_subpath(udir, de->d_name); netdata_log_debug(D_HEALTH, "CONFIG calling callback for user file '%s'", filename); - callback(filename, data); + callback(filename, data, false); freez(filename); continue; } @@ -1612,7 +1593,7 @@ void recursive_config_double_dir_load(const char *user_path, const char *stock_p len > 5 && !strcmp(&de->d_name[len - 5], ".conf")) { char *filename = strdupz_path_subpath(sdir, de->d_name); netdata_log_debug(D_HEALTH, "CONFIG calling callback for stock file '%s'", filename); - callback(filename, data); + callback(filename, data, true); freez(filename); continue; } @@ -1741,6 +1722,11 @@ bool run_command_and_copy_output_to_stdout(const char *command, int max_line_len return true; } + +static int fd_is_valid(int fd) { + return fcntl(fd, F_GETFD) != -1 || errno != EBADF; +} + void for_each_open_fd(OPEN_FD_ACTION action, OPEN_FD_EXCLUDE excluded_fds){ int fd; @@ -2014,7 +2000,7 @@ bool rrdr_relative_window_to_absolute(time_t *after, time_t *before, time_t now) } // Returns 1 if an absolute period was requested or 0 if it was a relative period -bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_t *now_ptr, bool unittest_running) { +bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_t *now_ptr, bool unittest) { time_t now = now_realtime_sec() - 1; if(now_ptr) @@ -2028,16 +2014,16 @@ bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_ time_t absolute_minimum_time = now - (10 * 365 * 86400); time_t absolute_maximum_time = now + (1 * 365 * 86400); - if (after_requested < absolute_minimum_time && !unittest_running) + if (after_requested < absolute_minimum_time && !unittest) after_requested = absolute_minimum_time; - if (after_requested > absolute_maximum_time && !unittest_running) + if (after_requested > absolute_maximum_time && !unittest) after_requested = absolute_maximum_time; - if (before_requested < absolute_minimum_time && !unittest_running) + if (before_requested < absolute_minimum_time && !unittest) before_requested = absolute_minimum_time; - if (before_requested > absolute_maximum_time && !unittest_running) + if (before_requested > absolute_maximum_time && !unittest) before_requested = absolute_maximum_time; *before = before_requested; diff --git a/src/libnetdata/libnetdata.h b/src/libnetdata/libnetdata.h new file mode 100644 index 000000000..effb4f8e2 --- /dev/null +++ b/src/libnetdata/libnetdata.h @@ -0,0 +1,601 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LIB_H +#define NETDATA_LIB_H 1 + +# ifdef __cplusplus +extern "C" { +# endif + +#include "config.h" + +#ifdef ENABLE_OPENSSL +#define ENABLE_HTTPS 1 +#endif + +#ifdef HAVE_LIBDATACHANNEL +#define ENABLE_WEBRTC 1 +#endif + +#define STRINGIFY(x) #x +#define TOSTRING(x) STRINGIFY(x) + +#define JUDYHS_INDEX_SIZE_ESTIMATE(key_bytes) (((key_bytes) + sizeof(Word_t) - 1) / sizeof(Word_t) * 4) + +#if defined(NETDATA_DEV_MODE) && !defined(NETDATA_INTERNAL_CHECKS) +#define NETDATA_INTERNAL_CHECKS 1 +#endif + +#ifndef SIZEOF_VOID_P +#error SIZEOF_VOID_P is not defined +#endif + +#if SIZEOF_VOID_P == 4 +#define ENV32BIT 1 +#else +#define ENV64BIT 1 +#endif + +// NETDATA_TRACE_ALLOCATIONS does not work under musl libc, so don't enable it +//#if defined(NETDATA_INTERNAL_CHECKS) && !defined(NETDATA_TRACE_ALLOCATIONS) +//#define NETDATA_TRACE_ALLOCATIONS 1 +//#endif + +#define MALLOC_ALIGNMENT (sizeof(uintptr_t) * 2) +#define size_t_atomic_count(op, var, size) __atomic_## op ##_fetch(&(var), size, __ATOMIC_RELAXED) +#define size_t_atomic_bytes(op, var, size) __atomic_## op ##_fetch(&(var), ((size) % MALLOC_ALIGNMENT)?((size) + MALLOC_ALIGNMENT - ((size) % MALLOC_ALIGNMENT)):(size), __ATOMIC_RELAXED) + +// ---------------------------------------------------------------------------- +// system include files for all netdata C programs + +/* select the memory allocator, based on autoconf findings */ +#if defined(ENABLE_JEMALLOC) + +#if defined(HAVE_JEMALLOC_JEMALLOC_H) +#include <jemalloc/jemalloc.h> +#else // !defined(HAVE_JEMALLOC_JEMALLOC_H) +#include <malloc.h> +#endif // !defined(HAVE_JEMALLOC_JEMALLOC_H) + +#elif defined(ENABLE_TCMALLOC) + +#include <google/tcmalloc.h> + +#else /* !defined(ENABLE_JEMALLOC) && !defined(ENABLE_TCMALLOC) */ + +#if !(defined(__FreeBSD__) || defined(__APPLE__)) +#include <malloc.h> +#endif /* __FreeBSD__ || __APPLE__ */ + +#endif /* !defined(ENABLE_JEMALLOC) && !defined(ENABLE_TCMALLOC) */ + +// ---------------------------------------------------------------------------- + +#if defined(__FreeBSD__) +#include <pthread_np.h> +#define NETDATA_OS_TYPE "freebsd" +#elif defined(__APPLE__) +#define NETDATA_OS_TYPE "macos" +#else +#define NETDATA_OS_TYPE "linux" +#endif /* __FreeBSD__, __APPLE__*/ + +#include <pthread.h> +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <stdarg.h> +#include <stddef.h> +#include <ctype.h> +#include <string.h> +#include <strings.h> +#include <arpa/inet.h> +#include <netinet/tcp.h> +#include <sys/ioctl.h> +#include <libgen.h> +#include <dirent.h> +#include <fcntl.h> +#include <getopt.h> +#include <grp.h> +#include <pwd.h> +#include <limits.h> +#include <locale.h> +#include <net/if.h> +#include <poll.h> +#include <signal.h> +#include <syslog.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <sys/socket.h> +#include <sys/syscall.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <sys/un.h> +#include <time.h> +#include <unistd.h> +#include <uuid/uuid.h> +#include <spawn.h> +#include <uv.h> +#include <assert.h> + +// CentOS 7 has older version that doesn't define this +// same goes for MacOS +#ifndef UUID_STR_LEN +#define UUID_STR_LEN (37) +#endif + +#ifdef HAVE_NETINET_IN_H +#include <netinet/in.h> +#endif + +#ifdef HAVE_RESOLV_H +#include <resolv.h> +#endif + +#ifdef HAVE_NETDB_H +#include <netdb.h> +#endif + +#ifdef HAVE_SYS_PRCTL_H +#include <sys/prctl.h> +#endif + +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif + +#ifdef HAVE_SYS_VFS_H +#include <sys/vfs.h> +#endif + +#ifdef HAVE_SYS_STATFS_H +#include <sys/statfs.h> +#endif + +#ifdef HAVE_LINUX_MAGIC_H +#include <linux/magic.h> +#endif + +#ifdef HAVE_SYS_MOUNT_H +#include <sys/mount.h> +#endif + +#ifdef HAVE_SYS_STATVFS_H +#include <sys/statvfs.h> +#endif + +// #1408 +#ifdef MAJOR_IN_MKDEV +#include <sys/mkdev.h> +#endif +#ifdef MAJOR_IN_SYSMACROS +#include <sys/sysmacros.h> +#endif + +#include <math.h> +#include <float.h> + +#if defined(HAVE_INTTYPES_H) +#include <inttypes.h> +#elif defined(HAVE_STDINT_H) +#include <stdint.h> +#endif + +#include <zlib.h> + +#ifdef HAVE_SYS_CAPABILITY_H +#include <sys/capability.h> +#endif + + +// ---------------------------------------------------------------------------- +// netdata common definitions + +#define _cleanup_(x) __attribute__((__cleanup__(x))) + +#ifdef HAVE_FUNC_ATTRIBUTE_RETURNS_NONNULL +#define NEVERNULL __attribute__((returns_nonnull)) +#else +#define NEVERNULL +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_NOINLINE +#define NOINLINE __attribute__((noinline)) +#else +#define NOINLINE +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_MALLOC +#define MALLOCLIKE __attribute__((malloc)) +#else +#define MALLOCLIKE +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_FORMAT +#define PRINTFLIKE(f, a) __attribute__ ((format(__printf__, f, a))) +#else +#define PRINTFLIKE(f, a) +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_NORETURN +#define NORETURN __attribute__ ((noreturn)) +#else +#define NORETURN +#endif + +#ifdef HAVE_FUNC_ATTRIBUTE_WARN_UNUSED_RESULT +#define WARNUNUSED __attribute__ ((warn_unused_result)) +#else +#define WARNUNUSED +#endif + +void aral_judy_init(void); +size_t judy_aral_overhead(void); +size_t judy_aral_structures(void); + +#define ABS(x) (((x) < 0)? (-(x)) : (x)) +#define MIN(a,b) (((a)<(b))?(a):(b)) +#define MAX(a,b) (((a)>(b))?(a):(b)) +#define SWAP(a, b) do { \ + typeof(a) _tmp = b; \ + b = a; \ + a = _tmp; \ +} while(0) + +#define GUID_LEN 36 + +#include "linked-lists.h" +#include "storage-point.h" + +void netdata_fix_chart_id(char *s); +void netdata_fix_chart_name(char *s); + +int madvise_sequential(void *mem, size_t len); +int madvise_random(void *mem, size_t len); +int madvise_dontfork(void *mem, size_t len); +int madvise_willneed(void *mem, size_t len); +int madvise_dontneed(void *mem, size_t len); +int madvise_dontdump(void *mem, size_t len); +int madvise_mergeable(void *mem, size_t len); + +int vsnprintfz(char *dst, size_t n, const char *fmt, va_list args); +int snprintfz(char *dst, size_t n, const char *fmt, ...) PRINTFLIKE(3, 4); + +// memory allocation functions that handle failures +#ifdef NETDATA_TRACE_ALLOCATIONS +int malloc_trace_walkthrough(int (*callback)(void *item, void *data), void *data); + +#define strdupz(s) strdupz_int(s, __FILE__, __FUNCTION__, __LINE__) +#define strndupz(s, len) strndupz_int(s, len, __FILE__, __FUNCTION__, __LINE__) +#define callocz(nmemb, size) callocz_int(nmemb, size, __FILE__, __FUNCTION__, __LINE__) +#define mallocz(size) mallocz_int(size, __FILE__, __FUNCTION__, __LINE__) +#define reallocz(ptr, size) reallocz_int(ptr, size, __FILE__, __FUNCTION__, __LINE__) +#define freez(ptr) freez_int(ptr, __FILE__, __FUNCTION__, __LINE__) +#define mallocz_usable_size(ptr) mallocz_usable_size_int(ptr, __FILE__, __FUNCTION__, __LINE__) + +char *strdupz_int(const char *s, const char *file, const char *function, size_t line); +char *strndupz_int(const char *s, size_t len, const char *file, const char *function, size_t line); +void *callocz_int(size_t nmemb, size_t size, const char *file, const char *function, size_t line); +void *mallocz_int(size_t size, const char *file, const char *function, size_t line); +void *reallocz_int(void *ptr, size_t size, const char *file, const char *function, size_t line); +void freez_int(void *ptr, const char *file, const char *function, size_t line); +size_t mallocz_usable_size_int(void *ptr, const char *file, const char *function, size_t line); + +#else // NETDATA_TRACE_ALLOCATIONS +char *strdupz(const char *s) MALLOCLIKE NEVERNULL; +char *strndupz(const char *s, size_t len) MALLOCLIKE NEVERNULL; +void *callocz(size_t nmemb, size_t size) MALLOCLIKE NEVERNULL; +void *mallocz(size_t size) MALLOCLIKE NEVERNULL; +void *reallocz(void *ptr, size_t size) MALLOCLIKE NEVERNULL; +void freez(void *ptr); +#endif // NETDATA_TRACE_ALLOCATIONS + +void posix_memfree(void *ptr); + +void json_escape_string(char *dst, const char *src, size_t size); +void json_fix_string(char *s); + +void *netdata_mmap(const char *filename, size_t size, int flags, int ksm, bool read_only, int *open_fd); +int netdata_munmap(void *ptr, size_t size); +int memory_file_save(const char *filename, void *mem, size_t size); + +extern struct rlimit rlimit_nofile; + +extern int enable_ksm; + +char *fgets_trim_len(char *buf, size_t buf_size, FILE *fp, size_t *len); + +int verify_netdata_host_prefix(bool log_msg); + +extern volatile sig_atomic_t netdata_exit; + +extern const char *program_version; + +char *strdupz_path_subpath(const char *path, const char *subpath); +int path_is_dir(const char *path, const char *subpath); +int path_is_file(const char *path, const char *subpath); +void recursive_config_double_dir_load( + const char *user_path + , const char *stock_path + , const char *subpath + , int (*callback)(const char *filename, void *data, bool stock_config) + , void *data + , size_t depth +); +char *read_by_filename(const char *filename, long *file_size); +char *find_and_replace(const char *src, const char *find, const char *replace, const char *where); + +/* fix for alpine linux */ +#ifndef RUSAGE_THREAD +#ifdef RUSAGE_CHILDREN +#define RUSAGE_THREAD RUSAGE_CHILDREN +#endif +#endif + +#define BITS_IN_A_KILOBIT 1000 +#define KILOBITS_IN_A_MEGABIT 1000 + +/* misc. */ + +#define UNUSED(x) (void)(x) + +#ifdef __GNUC__ +#define UNUSED_FUNCTION(x) __attribute__((unused)) UNUSED_##x +#else +#define UNUSED_FUNCTION(x) UNUSED_##x +#endif + +#define error_report(x, args...) do { errno = 0; netdata_log_error(x, ##args); } while(0) + +// Taken from linux kernel +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) + +#include "bitmap.h" + +#define COMPRESSION_MAX_CHUNK 0x4000 +#define COMPRESSION_MAX_OVERHEAD 128 +#define COMPRESSION_MAX_MSG_SIZE (COMPRESSION_MAX_CHUNK - COMPRESSION_MAX_OVERHEAD - 1) +#define PLUGINSD_LINE_MAX (COMPRESSION_MAX_MSG_SIZE - 768) + +bool run_command_and_copy_output_to_stdout(const char *command, int max_line_length); +struct web_buffer *run_command_and_get_output_to_buffer(const char *command, int max_line_length); + +typedef enum { + OPEN_FD_ACTION_CLOSE, + OPEN_FD_ACTION_FD_CLOEXEC +} OPEN_FD_ACTION; +typedef enum { + OPEN_FD_EXCLUDE_STDIN = 0x01, + OPEN_FD_EXCLUDE_STDOUT = 0x02, + OPEN_FD_EXCLUDE_STDERR = 0x04 +} OPEN_FD_EXCLUDE; +void for_each_open_fd(OPEN_FD_ACTION action, OPEN_FD_EXCLUDE excluded_fds); + +void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) NORETURN; +extern char *netdata_configured_host_prefix; + +#define XXH_INLINE_ALL +#include "xxhash.h" + +#include "uuid/uuid.h" +#include "template-enum.h" +#include "http/http_access.h" +#include "http/content_type.h" +#include "config/dyncfg.h" +#include "libjudy/src/Judy.h" +#include "july/july.h" +#include "os.h" +#include "threads/threads.h" +#include "buffer/buffer.h" +#include "locks/locks.h" +#include "circular_buffer/circular_buffer.h" +#include "avl/avl.h" +#include "inlined.h" +#include "line_splitter/line_splitter.h" +#include "clocks/clocks.h" +#include "datetime/iso8601.h" +#include "datetime/rfc3339.h" +#include "datetime/rfc7231.h" +#include "completion/completion.h" +#include "popen/popen.h" +#include "simple_pattern/simple_pattern.h" +#ifdef ENABLE_HTTPS +# include "socket/security.h" +#endif +#include "socket/socket.h" +#include "config/appconfig.h" +#include "log/journal.h" +#include "log/log.h" +#include "buffered_reader/buffered_reader.h" +#include "procfile/procfile.h" +#include "string/string.h" +#include "dictionary/dictionary.h" +#include "dictionary/thread-cache.h" +#if defined(HAVE_LIBBPF) && !defined(__cplusplus) +#include "ebpf/ebpf.h" +#endif +#include "eval/eval.h" +#include "statistical/statistical.h" +#include "adaptive_resortable_list/adaptive_resortable_list.h" +#include "url/url.h" +#include "json/json.h" +#include "json/json-c-parser-inline.h" +#include "string/utf8.h" +#include "libnetdata/aral/aral.h" +#include "onewayalloc/onewayalloc.h" +#include "worker_utilization/worker_utilization.h" +#include "yaml.h" +#include "http/http_defs.h" +#include "gorilla/gorilla.h" +#include "facets/facets.h" +#include "functions_evloop/functions_evloop.h" +#include "query_progress/progress.h" + +// BEWARE: this exists in alarm-notify.sh +#define DEFAULT_CLOUD_BASE_URL "https://app.netdata.cloud" + +#define RRD_STORAGE_TIERS 5 + +static inline size_t struct_natural_alignment(size_t size) __attribute__((const)); + +#define STRUCT_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2) +static inline size_t struct_natural_alignment(size_t size) { + if(unlikely(size % STRUCT_NATURAL_ALIGNMENT)) + size = size + STRUCT_NATURAL_ALIGNMENT - (size % STRUCT_NATURAL_ALIGNMENT); + + return size; +} + +#ifdef NETDATA_TRACE_ALLOCATIONS +struct malloc_trace { + avl_t avl; + + const char *function; + const char *file; + size_t line; + + size_t malloc_calls; + size_t calloc_calls; + size_t realloc_calls; + size_t strdup_calls; + size_t free_calls; + + size_t mmap_calls; + size_t munmap_calls; + + size_t allocations; + size_t bytes; + + struct rrddim *rd_bytes; + struct rrddim *rd_allocations; + struct rrddim *rd_avg_alloc; + struct rrddim *rd_ops; +}; +#endif // NETDATA_TRACE_ALLOCATIONS + +static inline PPvoid_t JudyLFirstThenNext(Pcvoid_t PArray, Word_t * PIndex, bool *first) { + if(unlikely(*first)) { + *first = false; + return JudyLFirst(PArray, PIndex, PJE0); + } + + return JudyLNext(PArray, PIndex, PJE0); +} + +static inline PPvoid_t JudyLLastThenPrev(Pcvoid_t PArray, Word_t * PIndex, bool *first) { + if(unlikely(*first)) { + *first = false; + return JudyLLast(PArray, PIndex, PJE0); + } + + return JudyLPrev(PArray, PIndex, PJE0); +} + +typedef enum { + TIMING_STEP_INTERNAL = 0, + + TIMING_STEP_BEGIN2_PREPARE, + TIMING_STEP_BEGIN2_FIND_CHART, + TIMING_STEP_BEGIN2_PARSE, + TIMING_STEP_BEGIN2_ML, + TIMING_STEP_BEGIN2_PROPAGATE, + TIMING_STEP_BEGIN2_STORE, + + TIMING_STEP_SET2_PREPARE, + TIMING_STEP_SET2_LOOKUP_DIMENSION, + TIMING_STEP_SET2_PARSE, + TIMING_STEP_SET2_ML, + TIMING_STEP_SET2_PROPAGATE, + TIMING_STEP_RRDSET_STORE_METRIC, + TIMING_STEP_DBENGINE_FIRST_CHECK, + TIMING_STEP_DBENGINE_CHECK_DATA, + TIMING_STEP_DBENGINE_PACK, + TIMING_STEP_DBENGINE_PAGE_FIN, + TIMING_STEP_DBENGINE_MRG_UPDATE, + TIMING_STEP_DBENGINE_PAGE_ALLOC, + TIMING_STEP_DBENGINE_CREATE_NEW_PAGE, + TIMING_STEP_DBENGINE_FLUSH_PAGE, + TIMING_STEP_SET2_STORE, + + TIMING_STEP_END2_PREPARE, + TIMING_STEP_END2_PUSH_V1, + TIMING_STEP_END2_ML, + TIMING_STEP_END2_RRDSET, + TIMING_STEP_END2_PROPAGATE, + TIMING_STEP_END2_STORE, + + TIMING_STEP_FREEIPMI_CTX_CREATE, + TIMING_STEP_FREEIPMI_DSR_CACHE_DIR, + TIMING_STEP_FREEIPMI_SENSOR_CONFIG_FILE, + TIMING_STEP_FREEIPMI_SENSOR_READINGS_BY_X, + TIMING_STEP_FREEIPMI_READ_record_id, + TIMING_STEP_FREEIPMI_READ_sensor_number, + TIMING_STEP_FREEIPMI_READ_sensor_type, + TIMING_STEP_FREEIPMI_READ_sensor_name, + TIMING_STEP_FREEIPMI_READ_sensor_state, + TIMING_STEP_FREEIPMI_READ_sensor_units, + TIMING_STEP_FREEIPMI_READ_sensor_bitmask_type, + TIMING_STEP_FREEIPMI_READ_sensor_bitmask, + TIMING_STEP_FREEIPMI_READ_sensor_bitmask_strings, + TIMING_STEP_FREEIPMI_READ_sensor_reading_type, + TIMING_STEP_FREEIPMI_READ_sensor_reading, + TIMING_STEP_FREEIPMI_READ_event_reading_type_code, + TIMING_STEP_FREEIPMI_READ_record_type, + TIMING_STEP_FREEIPMI_READ_record_type_class, + TIMING_STEP_FREEIPMI_READ_sel_state, + TIMING_STEP_FREEIPMI_READ_event_direction, + TIMING_STEP_FREEIPMI_READ_event_type_code, + TIMING_STEP_FREEIPMI_READ_event_offset_type, + TIMING_STEP_FREEIPMI_READ_event_offset, + TIMING_STEP_FREEIPMI_READ_event_offset_string, + TIMING_STEP_FREEIPMI_READ_manufacturer_id, + + // terminator + TIMING_STEP_MAX, +} TIMING_STEP; + +typedef enum { + TIMING_ACTION_INIT, + TIMING_ACTION_STEP, + TIMING_ACTION_FINISH, +} TIMING_ACTION; + +#ifdef NETDATA_TIMING_REPORT +#define timing_init() timing_action(TIMING_ACTION_INIT, TIMING_STEP_INTERNAL) +#define timing_step(step) timing_action(TIMING_ACTION_STEP, step) +#define timing_report() timing_action(TIMING_ACTION_FINISH, TIMING_STEP_INTERNAL) +#else +#define timing_init() debug_dummy() +#define timing_step(step) debug_dummy() +#define timing_report() debug_dummy() +#endif +void timing_action(TIMING_ACTION action, TIMING_STEP step); + +int hash256_string(const unsigned char *string, size_t size, char *hash); + +extern bool unittest_running; +#define API_RELATIVE_TIME_MAX (3 * 365 * 86400) + +bool rrdr_relative_window_to_absolute(time_t *after, time_t *before, time_t now); +bool rrdr_relative_window_to_absolute_query(time_t *after, time_t *before, time_t *now_ptr, bool unittest); + +int netdata_base64_decode(const char *encoded, char *decoded, size_t decoded_size); + +static inline void freez_charp(char **p) { + freez(*p); +} + +static inline void freez_const_charp(const char **p) { + freez((void *)*p); +} + +#define CLEAN_CONST_CHAR_P _cleanup_(freez_const_charp) const char +#define CLEAN_CHAR_P _cleanup_(freez_charp) char + +# ifdef __cplusplus +} +# endif + +#endif // NETDATA_LIB_H diff --git a/src/libnetdata/line_splitter/README.md b/src/libnetdata/line_splitter/README.md new file mode 100644 index 000000000..b391a492c --- /dev/null +++ b/src/libnetdata/line_splitter/README.md @@ -0,0 +1,14 @@ +<!-- +title: "Log" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/log/README.md +sidebar_label: "Log" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Log + +The netdata log library supports debug, info, error and fatal error logging. +By default we have an access log, an error log and a collectors log. + diff --git a/libnetdata/line_splitter/line_splitter.c b/src/libnetdata/line_splitter/line_splitter.c index a459d3347..6726d9096 100644 --- a/libnetdata/line_splitter/line_splitter.c +++ b/src/libnetdata/line_splitter/line_splitter.c @@ -56,14 +56,23 @@ inline int group_by_label_isspace(char c) { return 0; } +inline int dyncfg_id_isspace(char c) { + if(c == ':') + return 1; + + return 0; +} + bool isspace_map_pluginsd[256] = {}; bool isspace_map_config[256] = {}; bool isspace_map_group_by_label[256] = {}; +bool isspace_dyncfg_id_map[256] = {}; __attribute__((constructor)) void initialize_is_space_arrays(void) { for(int c = 0; c < 256 ; c++) { isspace_map_pluginsd[c] = pluginsd_isspace((char) c); isspace_map_config[c] = config_isspace((char) c); isspace_map_group_by_label[c] = group_by_label_isspace((char) c); + isspace_dyncfg_id_map[c] = dyncfg_id_isspace((char)c); } } diff --git a/libnetdata/line_splitter/line_splitter.h b/src/libnetdata/line_splitter/line_splitter.h index b5a59ad3a..968930410 100644 --- a/libnetdata/line_splitter/line_splitter.h +++ b/src/libnetdata/line_splitter/line_splitter.h @@ -22,10 +22,12 @@ static inline void line_splitter_reset(struct line_splitter *line) { int pluginsd_isspace(char c); int config_isspace(char c); int group_by_label_isspace(char c); +int dyncfg_id_isspace(char c); extern bool isspace_map_pluginsd[256]; extern bool isspace_map_config[256]; extern bool isspace_map_group_by_label[256]; +extern bool isspace_dyncfg_id_map[256]; static inline size_t quoted_strings_splitter(char *str, char **words, size_t max_words, bool *isspace_map) { char *s = str, quote = 0; @@ -110,6 +112,9 @@ static inline size_t quoted_strings_splitter(char *str, char **words, size_t max #define quoted_strings_splitter_pluginsd(str, words, max_words) \ quoted_strings_splitter(str, words, max_words, isspace_map_pluginsd) +#define quoted_strings_splitter_dyncfg_id(str, words, max_words) \ + quoted_strings_splitter(str, words, max_words, isspace_dyncfg_id_map) + static inline char *get_word(char **words, size_t num_words, size_t index) { if (unlikely(index >= num_words)) return NULL; diff --git a/src/libnetdata/linked-lists.h b/src/libnetdata/linked-lists.h new file mode 100644 index 000000000..033d11226 --- /dev/null +++ b/src/libnetdata/linked-lists.h @@ -0,0 +1,133 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LINKED_LISTS_H +#define NETDATA_LINKED_LISTS_H + +// --------------------------------------------------------------------------------------------- +// double linked list management +// inspired by https://github.com/troydhanson/uthash/blob/master/src/utlist.h + +#define DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(head, item, prev, next) \ + do { \ + (item)->next = (head); \ + \ + if(likely(head)) { \ + (item)->prev = (head)->prev; \ + (head)->prev = (item); \ + } \ + else \ + (item)->prev = (item); \ + \ + (head) = (item); \ + } while (0) + +#define DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(head, item, prev, next) \ + do { \ + \ + (item)->next = NULL; \ + \ + if(likely(head)) { \ + (item)->prev = (head)->prev; \ + (head)->prev->next = (item); \ + (head)->prev = (item); \ + } \ + else { \ + (item)->prev = (item); \ + (head) = (item); \ + } \ + \ + } while (0) + +#define DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(head, item, prev, next) \ + do { \ + fatal_assert((head) != NULL); \ + fatal_assert((item)->prev != NULL); \ + \ + if((item)->prev == (item)) \ + /* it is the only item in the list */ \ + (head) = NULL; \ + \ + else if((item) == (head)) { \ + /* it is the first item */ \ + fatal_assert((item)->next != NULL); \ + (item)->next->prev = (item)->prev; \ + (head) = (item)->next; \ + } \ + else { \ + /* it is any other item */ \ + (item)->prev->next = (item)->next; \ + \ + if ((item)->next) \ + (item)->next->prev = (item)->prev; \ + else \ + (head)->prev = (item)->prev; \ + } \ + \ + (item)->next = NULL; \ + (item)->prev = NULL; \ + } while (0) + +#define DOUBLE_LINKED_LIST_INSERT_ITEM_BEFORE_UNSAFE(head, existing, item, prev, next) \ + do { \ + if (existing) { \ + fatal_assert((head) != NULL); \ + fatal_assert((item) != NULL); \ + \ + (item)->next = (existing); \ + (item)->prev = (existing)->prev; \ + (existing)->prev = (item); \ + \ + if ((head) == (existing)) \ + (head) = (item); \ + else \ + (item)->prev->next = (item); \ + \ + } \ + else \ + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(head, item, prev, next); \ + \ + } while (0) + +#define DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(head, existing, item, prev, next) \ + do { \ + if (existing) { \ + fatal_assert((head) != NULL); \ + fatal_assert((item) != NULL); \ + \ + (item)->next = (existing)->next; \ + (item)->prev = (existing); \ + (existing)->next = (item); \ + \ + if ((item)->next) \ + (item)->next->prev = (item); \ + else \ + (head)->prev = (item); \ + } \ + else \ + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(head, item, prev, next); \ + \ + } while (0) + +#define DOUBLE_LINKED_LIST_APPEND_LIST_UNSAFE(head, head2, prev, next) \ + do { \ + if (head2) { \ + if (head) { \ + __typeof(head2) _head2_last_item = (head2)->prev; \ + \ + (head2)->prev = (head)->prev; \ + (head)->prev->next = (head2); \ + \ + (head)->prev = _head2_last_item; \ + } \ + else \ + (head) = (head2); \ + } \ + } while (0) + +#define DOUBLE_LINKED_LIST_FOREACH_FORWARD(head, var, prev, next) \ + for ((var) = (head); (var) ; (var) = (var)->next) + +#define DOUBLE_LINKED_LIST_FOREACH_BACKWARD(head, var, prev, next) \ + for ((var) = (head) ? (head)->prev : NULL ; (var) ; (var) = ((var) == (head)) ? NULL : (var)->prev) + +#endif //NETDATA_LINKED_LISTS_H diff --git a/src/libnetdata/locks/README.md b/src/libnetdata/locks/README.md new file mode 100644 index 000000000..35d602f2a --- /dev/null +++ b/src/libnetdata/locks/README.md @@ -0,0 +1,107 @@ +<!-- +title: "Locks" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/locks/README.md +sidebar_label: "Locks" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Locks + +## How to trace netdata locks + +To enable tracing rwlocks in netdata, compile netdata by setting `CFLAGS="-DNETDATA_TRACE_RWLOCKS=1"`, like this: + +``` +CFLAGS="-O1 -ggdb -DNETDATA_TRACE_RWLOCKS=1" ./netdata-installer.sh +``` + +During compilation, the compiler will log: + +``` +libnetdata/locks/locks.c:105:2: warning: #warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT [-Wcpp] + 105 | #warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT + | ^~~~~~~ +``` + +Once compiled, netdata will do the following: + +Every call to `netdata_rwlock_*()` is now measured in time. + +### logging of slow locks/unlocks + +If any call takes more than 10 usec, it will be logged like this: + +``` +RW_LOCK ON LOCK 0x0x7fbe1f2e5190: 4157038, 'ACLK_Query_2' (function build_context_param_list() 99@web/api/formatters/rrd2json.c) WAITED to UNLOCK for 29 usec. +``` + +The time can be changed by setting this `-DNETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC=20` (or whatever number) to the CFLAGS. + +### logging of long hold times + +If any lock is holded for more than 10000 usec, it will be logged like this: + +``` +RW_LOCK ON LOCK 0x0x55a20afc1b20: 4187198, 'ANALYTICS' (function analytics_gather_mutable_meta_data() 532@daemon/analytics.c) holded a 'R' for 13232 usec. +``` + +The time can be changed by setting this `-DNETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC=20000` (or whatever number) to the CFLAGS. + +### logging for probable pauses (predictive) + +The library maintains a linked-list of all the lock holders (one entry per thread). For this linked-list a mutex is used. So every call to the r/w locks now also has a mutex lock. + +If any call is expected to pause the caller (ie the caller is attempting a read lock while there is a write lock in place and vice versa), the library will log something like this: + +``` +RW_LOCK ON LOCK 0x0x5651c9fcce20: 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) WANTS a 'W' lock (while holding 1 rwlocks and 1 mutexes). +There are 7 readers and 0 writers are holding the lock: + => 1: RW_LOCK: process 4190091 'WEB_SERVER[static14]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709847 usec. + => 2: RW_LOCK: process 4190079 'WEB_SERVER[static6]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709869 usec. + => 3: RW_LOCK: process 4190084 'WEB_SERVER[static10]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709948 usec. + => 4: RW_LOCK: process 4190076 'WEB_SERVER[static3]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710190 usec. + => 5: RW_LOCK: process 4190092 'WEB_SERVER[static15]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710195 usec. + => 6: RW_LOCK: process 4190077 'WEB_SERVER[static4]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710208 usec. + => 7: RW_LOCK: process 4190044 'WEB_SERVER[static1]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710221 usec. +``` + +And each of the above is paired with a `GOT` log, like this: + +``` +RW_LOCK ON LOCK 0x0x5651c9fcce20: 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) GOT a 'W' lock (while holding 2 rwlocks and 1 mutexes). +There are 0 readers and 1 writers are holding the lock: + => 1: RW_LOCK: process 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) is having 1 'W' lock for 36 usec. +``` + +Keep in mind that the lock and log are not atomic. The list of callers is indicative (and sometimes just empty because the original holders of the lock, unlocked it until we had the chance to print their names). + +### POSIX compliance check + +The library may also log messages about POSIX unsupported cases, like this: + +``` +RW_LOCK FATAL ON LOCK 0x0x622000109290: 3609368 'PLUGIN[proc]' (function __rrdset_check_rdlock() 10@database/rrdset.c) attempts to acquire a 'W' lock. +But it is not supported by POSIX because: ALREADY HAS THIS LOCK +At this attempt, the task is holding 1 rwlocks and 1 mutexes. +There are 1 readers and 0 writers are holding the lock requested now: + => 1: RW_LOCK: process 3609368 'PLUGIN[proc]' (function rrdset_done() 1398@database/rrdset.c) is having 1 'R' lock for 0 usec. +``` + +### nested read locks + +When compiled with `-DNETDATA_TRACE_RWLOCKS_LOG_NESTED=1` the library will also detect nested read locks and print them like this: + +``` +RW_LOCK ON LOCK 0x0x7ff6ea46d190: 4140225 'WEB_SERVER[static14]' (function rrdr_json_wrapper_begin() 34@web/api/formatters/json_wrapper.c) NESTED READ LOCK REQUEST a 'R' lock (while holding 1 rwlocks and 1 mutexes). +There are 5 readers and 0 writers are holding the lock: + => 1: RW_LOCK: process 4140225 'WEB_SERVER[static14]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 216667 usec. + => 2: RW_LOCK: process 4140211 'WEB_SERVER[static6]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. + => 3: RW_LOCK: process 4140218 'WEB_SERVER[static8]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. + => 4: RW_LOCK: process 4140224 'WEB_SERVER[static13]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. + => 5: RW_LOCK: process 4140227 'WEB_SERVER[static16]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. +``` + + + diff --git a/libnetdata/locks/locks.c b/src/libnetdata/locks/locks.c index 625dd052c..adf683af2 100644 --- a/libnetdata/locks/locks.c +++ b/src/libnetdata/locks/locks.c @@ -297,14 +297,15 @@ void spinlock_init(SPINLOCK *spinlock) { memset(spinlock, 0, sizeof(SPINLOCK)); } -void spinlock_lock(SPINLOCK *spinlock) { +static inline void spinlock_lock_internal(SPINLOCK *spinlock, bool cancelable) { static const struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 }; #ifdef NETDATA_INTERNAL_CHECKS size_t spins = 0; #endif - netdata_thread_disable_cancelability(); + if (!cancelable) + netdata_thread_disable_cancelability(); for(int i = 1; __atomic_load_n(&spinlock->locked, __ATOMIC_RELAXED) || @@ -329,16 +330,19 @@ void spinlock_lock(SPINLOCK *spinlock) { #endif } -void spinlock_unlock(SPINLOCK *spinlock) { +static inline void spinlock_unlock_internal(SPINLOCK *spinlock, bool cancelable) { #ifdef NETDATA_INTERNAL_CHECKS spinlock->locker_pid = 0; #endif __atomic_clear(&spinlock->locked, __ATOMIC_RELEASE); - netdata_thread_enable_cancelability(); + + if (!cancelable) + netdata_thread_enable_cancelability(); } -bool spinlock_trylock(SPINLOCK *spinlock) { - netdata_thread_disable_cancelability(); +static inline bool spinlock_trylock_internal(SPINLOCK *spinlock, bool cancelable) { + if (!cancelable) + netdata_thread_disable_cancelability(); if(!__atomic_load_n(&spinlock->locked, __ATOMIC_RELAXED) && !__atomic_test_and_set(&spinlock->locked, __ATOMIC_ACQUIRE)) @@ -346,10 +350,41 @@ bool spinlock_trylock(SPINLOCK *spinlock) { return true; // we didn't get the lock - netdata_thread_enable_cancelability(); + if (!cancelable) + netdata_thread_enable_cancelability(); return false; } +void spinlock_lock(SPINLOCK *spinlock) +{ + spinlock_lock_internal(spinlock, false); +} + +void spinlock_unlock(SPINLOCK *spinlock) +{ + spinlock_unlock_internal(spinlock, false); +} + +bool spinlock_trylock(SPINLOCK *spinlock) +{ + return spinlock_trylock_internal(spinlock, false); +} + +void spinlock_lock_cancelable(SPINLOCK *spinlock) +{ + spinlock_lock_internal(spinlock, true); +} + +void spinlock_unlock_cancelable(SPINLOCK *spinlock) +{ + spinlock_unlock_internal(spinlock, true); +} + +bool spinlock_trylock_cancelable(SPINLOCK *spinlock) +{ + return spinlock_trylock_internal(spinlock, true); +} + // ---------------------------------------------------------------------------- // rw_spinlock implementation diff --git a/libnetdata/locks/locks.h b/src/libnetdata/locks/locks.h index 6b492ae47..09adfb41f 100644 --- a/libnetdata/locks/locks.h +++ b/src/libnetdata/locks/locks.h @@ -25,6 +25,10 @@ void spinlock_lock(SPINLOCK *spinlock); void spinlock_unlock(SPINLOCK *spinlock); bool spinlock_trylock(SPINLOCK *spinlock); +void spinlock_lock_cancelable(SPINLOCK *spinlock); +void spinlock_unlock_cancelable(SPINLOCK *spinlock); +bool spinlock_trylock_cancelable(SPINLOCK *spinlock); + typedef struct netdata_rw_spinlock { int32_t readers; SPINLOCK spinlock; diff --git a/src/libnetdata/log/README.md b/src/libnetdata/log/README.md new file mode 100644 index 000000000..ef9ca1ef3 --- /dev/null +++ b/src/libnetdata/log/README.md @@ -0,0 +1,223 @@ +<!-- +title: "Log" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/log/README.md +sidebar_label: "Log" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Netdata Logging + +This document describes how Netdata generates its own logs, not how Netdata manages and queries logs databases. + +## Log sources + +Netdata supports the following log sources: + +1. **daemon**, logs generated by Netdata daemon. +2. **collector**, logs generated by Netdata collectors, including internal and external ones. +3. **access**, API requests received by Netdata +4. **health**, all alert transitions and notifications + +## Log outputs + +For each log source, Netdata supports the following output methods: + +- **off**, to disable this log source +- **journal**, to send the logs to systemd-journal. +- **syslog**, to send the logs to syslog. +- **system**, to send the output to `stderr` or `stdout` depending on the log source. +- **stdout**, to write the logs to Netdata's `stdout`. +- **stderr**, to write the logs to Netdata's `stderr`. +- **filename**, to send the logs to a file. + +For `daemon` and `collector` the default is `journal` when systemd-journal is available. +To decide if systemd-journal is available, Netdata checks: + +1. `stderr` is connected to systemd-journald +2. `/run/systemd/journal/socket` exists +3. `/host/run/systemd/journal/socket` exists (`/host` is configurable in containers) + +If any of the above is detected, Netdata will select `journal` for `daemon` and `collector` sources. + +All other sources default to a file. + +## Log formats + +| Format | Description | +|---------|--------------------------------------------------------------------------------------------------------| +| journal | journald-specific log format. Automatically selected when logging to systemd-journal. | +| logfmt | logs data as a series of key/value pairs. The default when logging to any output other than `journal`. | +| json | logs data in JSON format. | + +## Log levels + +Each time Netdata logs, it assigns a priority to the log. It can be one of this (in order of importance): + +| Level | Description | +|-----------|----------------------------------------------------------------------------------------| +| emergency | a fatal condition, Netdata will most likely exit immediately after. | +| alert | a very important issue that may affect how Netdata operates. | +| critical | a very important issue the user should know which, Netdata thinks it can survive. | +| error | an error condition indicating that Netdata is trying to do something, but it fails. | +| warning | something unexpected has happened that may or may not affect the operation of Netdata. | +| notice | something that does not affect the operation of Netdata, but the user should notice. | +| info | the default log level about information the user should know. | +| debug | these are more verbose logs that can be ignored. | + +## Logs Configuration + +In `netdata.conf`, there are the following settings: + +``` +[logs] + # logs to trigger flood protection = 1000 + # logs flood protection period = 60 + # facility = daemon + # level = info + # daemon = journal + # collector = journal + # access = /var/log/netdata/access.log + # health = /var/log/netdata/health.log +``` + +- `logs to trigger flood protection` and `logs flood protection period` enable logs flood protection for `daemon` and `collector` sources. It can also be configured per log source. +- `facility` is used only when Netdata logs to syslog. +- `level` defines the minimum [log level](#log-levels) of logs that will be logged. This setting is applied only to `daemon` and `collector` sources. It can also be configured per source. + +### Configuring log sources + +Each for the sources (`daemon`, `collector`, `access`, `health`), accepts the following: + +``` +source = {FORMAT},level={LEVEL},protection={LOG}/{PERIOD}@{OUTPUT} +``` + +Where: + +- `{FORMAT}`, is one of the [log formats](#log-formats), +- `{LEVEL}`, is the minimum [log level](#log-levels) to be logged, +- `{LOGS}` is the number of `logs to trigger flood protection` configured per output, +- `{PERIOD}` is the equivalent of `logs flood protection period` configured per output, +- `{OUTPUT}` is one of the `[log outputs](#log-outputs), + +All parameters can be omitted, except `{OUTPUT}`. If `{OUTPUT}` is the only given parameter, `@` can be omitted. + +### Logs rotation + +Netdata comes with `logrotate` configuration to rotate its log files periodically. + +The default is usually found in `/etc/logrotate.d/netdata`. + +Sending a `SIGHUP` to Netdata, will instruct it to re-open all its log files. + +## Log Fields + +<details> +<summary>All fields exposed by Netdata</summary> + +| journal | logfmt | json | Description | +|:--------------------------------------:|:------------------------------:|:------------------------------:|:---------------------------------------------------------------------------------------------------------:| +| `_SOURCE_REALTIME_TIMESTAMP` | `time` | `time` | the timestamp of the event | +| `SYSLOG_IDENTIFIER` | `comm` | `comm` | the program logging the event | +| `ND_LOG_SOURCE` | `source` | `source` | one of the [log sources](#log-sources) | +| `PRIORITY`<br/>numeric | `level`<br/>text | `level`<br/>numeric | one of the [log levels](#log-levels) | +| `ERRNO` | `errno` | `errno` | the numeric value of `errno` | +| `INVOCATION_ID` | - | - | a unique UUID of the Netdata session, reset on every Netdata restart, inherited by systemd when available | +| `CODE_LINE` | - | - | the line number of of the source code logging this event | +| `CODE_FILE` | - | - | the filename of the source code logging this event | +| `CODE_FUNCTION` | - | - | the function name of the source code logging this event | +| `TID` | `tid` | `tid` | the thread id of the thread logging this event | +| `THREAD_TAG` | `thread` | `thread` | the name of the thread logging this event | +| `MESSAGE_ID` | `msg_id` | `msg_id` | see [message IDs](#message-ids) | +| `ND_MODULE` | `module` | `module` | the Netdata module logging this event | +| `ND_NIDL_NODE` | `node` | `node` | the hostname of the node the event is related to | +| `ND_NIDL_INSTANCE` | `instance` | `instance` | the instance of the node the event is related to | +| `ND_NIDL_CONTEXT` | `context` | `context` | the context the event is related to (this is usually the chart name, as shown on netdata dashboards | +| `ND_NIDL_DIMENSION` | `dimension` | `dimension` | the dimension the event is related to | +| `ND_SRC_TRANSPORT` | `src_transport` | `src_transport` | when the event happened during a request, this is the request transport | +| `ND_SRC_IP` | `src_ip` | `src_ip` | when the event happened during an inbound request, this is the IP the request came from | +| `ND_SRC_PORT` | `src_port` | `src_port` | when the event happened during an inbound request, this is the port the request came from | +| `ND_SRC_FORWARDED_HOST` | `src_forwarded_host` | `src_forwarded_host` | the contents of the HTTP header `X-Forwarded-Host` | +| `ND_SRC_FORWARDED_FOR` | `src_forwarded_for` | `src_forwarded_for` | the contents of the HTTP header `X-Forwarded-For` | +| `ND_SRC_CAPABILITIES` | `src_capabilities` | `src_capabilities` | when the request came from a child, this is the communication capabilities of the child | +| `ND_DST_TRANSPORT` | `dst_transport` | `dst_transport` | when the event happened during an outbound request, this is the outbound request transport | +| `ND_DST_IP` | `dst_ip` | `dst_ip` | when the event happened during an outbound request, this is the IP the request destination | +| `ND_DST_PORT` | `dst_port` | `dst_port` | when the event happened during an outbound request, this is the port the request destination | +| `ND_DST_CAPABILITIES` | `dst_capabilities` | `dst_capabilities` | when the request goes to a parent, this is the communication capabilities of the parent | +| `ND_REQUEST_METHOD` | `req_method` | `req_method` | when the event happened during an inbound request, this is the method the request was received | +| `ND_RESPONSE_CODE` | `code` | `code` | when responding to a request, this this the response code | +| `ND_CONNECTION_ID` | `conn` | `conn` | when there is a connection id for an inbound connection, this is the connection id | +| `ND_TRANSACTION_ID` | `transaction` | `transaction` | the transaction id (UUID) of all API requests | +| `ND_RESPONSE_SENT_BYTES` | `sent_bytes` | `sent_bytes` | the bytes we sent to API responses | +| `ND_RESPONSE_SIZE_BYTES` | `size_bytes` | `size_bytes` | the uncompressed bytes of the API responses | +| `ND_RESPONSE_PREP_TIME_USEC` | `prep_ut` | `prep_ut` | the time needed to prepare a response | +| `ND_RESPONSE_SENT_TIME_USEC` | `sent_ut` | `sent_ut` | the time needed to send a response | +| `ND_RESPONSE_TOTAL_TIME_USEC` | `total_ut` | `total_ut` | the total time needed to complete a response | +| `ND_ALERT_ID` | `alert_id` | `alert_id` | the alert id this event is related to | +| `ND_ALERT_EVENT_ID` | `alert_event_id` | `alert_event_id` | a sequential number of the alert transition (per host) | +| `ND_ALERT_UNIQUE_ID` | `alert_unique_id` | `alert_unique_id` | a sequential number of the alert transition (per alert) | +| `ND_ALERT_TRANSITION_ID` | `alert_transition_id` | `alert_transition_id` | the unique UUID of this alert transition | +| `ND_ALERT_CONFIG` | `alert_config` | `alert_config` | the alert configuration hash (UUID) | +| `ND_ALERT_NAME` | `alert` | `alert` | the alert name | +| `ND_ALERT_CLASS` | `alert_class` | `alert_class` | the alert classification | +| `ND_ALERT_COMPONENT` | `alert_component` | `alert_component` | the alert component | +| `ND_ALERT_TYPE` | `alert_type` | `alert_type` | the alert type | +| `ND_ALERT_EXEC` | `alert_exec` | `alert_exec` | the alert notification program | +| `ND_ALERT_RECIPIENT` | `alert_recipient` | `alert_recipient` | the alert recipient(s) | +| `ND_ALERT_VALUE` | `alert_value` | `alert_value` | the current alert value | +| `ND_ALERT_VALUE_OLD` | `alert_value_old` | `alert_value_old` | the previous alert value | +| `ND_ALERT_STATUS` | `alert_status` | `alert_status` | the current alert status | +| `ND_ALERT_STATUS_OLD` | `alert_value_old` | `alert_value_old` | the previous alert value | +| `ND_ALERT_UNITS` | `alert_units` | `alert_units` | the units of the alert | +| `ND_ALERT_SUMMARY` | `alert_summary` | `alert_summary` | the summary text of the alert | +| `ND_ALERT_INFO` | `alert_info` | `alert_info` | the info text of the alert | +| `ND_ALERT_DURATION` | `alert_duration` | `alert_duration` | the duration the alert was in its previous state | +| `ND_ALERT_NOTIFICATION_TIMESTAMP_USEC` | `alert_notification_timestamp` | `alert_notification_timestamp` | the timestamp the notification delivery is scheduled | +| `ND_REQUEST` | `request` | `request` | the full request during which the event happened | +| `MESSAGE` | `msg` | `msg` | the event message | + +</details> + +### Message IDs + +Netdata assigns specific message IDs to certain events: + +- `ed4cdb8f1beb4ad3b57cb3cae2d162fa` when a Netdata child connects to this Netdata +- `6e2e3839067648968b646045dbf28d66` when this Netdata connects to a Netdata parent +- `9ce0cb58ab8b44df82c4bf1ad9ee22de` when alerts change state +- `6db0018e83e34320ae2a659d78019fb7` when notifications are sent + +You can view these events using the Netdata systemd-journal.plugin at the `MESSAGE_ID` filter, +or using `journalctl` like this: + +```bash +# query children connection +journalctl MESSAGE_ID=ed4cdb8f1beb4ad3b57cb3cae2d162fa + +# query parent connection +journalctl MESSAGE_ID=6e2e3839067648968b646045dbf28d66 + +# query alert transitions +journalctl MESSAGE_ID=9ce0cb58ab8b44df82c4bf1ad9ee22de + +# query alert notifications +journalctl MESSAGE_ID=6db0018e83e34320ae2a659d78019fb7 +``` + +## Using journalctl to query Netdata logs + +The Netdata service's processes execute within the `netdata` journal namespace. To view the Netdata logs, you should +specify the `--namespace=netdata` option. + +```bash +# Netdata logs since the last time the service was started +journalctl _SYSTEMD_INVOCATION_ID="$(systemctl show --value --property=InvocationID netdata)" --namespace=netdata + +# All netdata logs, the oldest entries are displayed first +journalctl -u netdata --namespace=netdata + +# All netdata logs, the newest entries are displayed first +journalctl -u netdata --namespace=netdata -r +``` diff --git a/libnetdata/log/journal.c b/src/libnetdata/log/journal.c index 21978cf5f..0f1248de7 100644 --- a/libnetdata/log/journal.c +++ b/src/libnetdata/log/journal.c @@ -3,15 +3,12 @@ #include "journal.h" bool is_path_unix_socket(const char *path) { + // Check if the path is valid if(!path || !*path) return false; struct stat statbuf; - // Check if the path is valid - if (!path || !*path) - return false; - // Use stat to check if the file exists and is a socket if (stat(path, &statbuf) == -1) // The file does not exist or cannot be accessed @@ -51,9 +48,11 @@ int journal_direct_fd(const char *path) { if(!is_path_unix_socket(path)) return -1; - int fd = socket(AF_UNIX, SOCK_DGRAM, 0); + int fd = socket(AF_UNIX, SOCK_DGRAM| DEFAULT_SOCKET_FLAGS, 0); if (fd < 0) return -1; + sock_setcloexec(fd); + struct sockaddr_un addr; memset(&addr, 0, sizeof(struct sockaddr_un)); addr.sun_family = AF_UNIX; @@ -97,6 +96,11 @@ static inline bool journal_send_with_memfd(int fd, const char *msg, size_t msg_l msghdr.msg_controllen = sizeof(cmsgbuf); cmsghdr = CMSG_FIRSTHDR(&msghdr); + if(!cmsghdr) { + close(memfd); + return false; + } + cmsghdr->cmsg_level = SOL_SOCKET; cmsghdr->cmsg_type = SCM_RIGHTS; cmsghdr->cmsg_len = CMSG_LEN(sizeof(int)); diff --git a/libnetdata/log/journal.h b/src/libnetdata/log/journal.h index df8ece18b..df8ece18b 100644 --- a/libnetdata/log/journal.h +++ b/src/libnetdata/log/journal.h diff --git a/libnetdata/log/log.c b/src/libnetdata/log/log.c index c805716ce..bfba93ddb 100644 --- a/libnetdata/log/log.c +++ b/src/libnetdata/log/log.c @@ -1,9 +1,10 @@ // SPDX-License-Identifier: GPL-3.0-or-later +// do not REMOVE this, it is used by systemd-journal includes to prevent saving the file, function, line of the +// source code that makes the calls, allowing our loggers to log the lines of source code that actually log #define SD_JOURNAL_SUPPRESS_LOCATION #include "../libnetdata.h" -#include <daemon/main.h> #ifdef __FreeBSD__ #include <sys/endian.h> @@ -13,7 +14,7 @@ #include <machine/endian.h> #endif -#ifdef HAVE_BACKTRACE +#if !defined(ENABLE_SENTRY) && defined(HAVE_BACKTRACE) #include <execinfo.h> #endif @@ -1117,9 +1118,33 @@ static __thread struct log_field thread_log_fields[_NDF_MAX] = { .journal = "ND_SRC_TRANSPORT", .logfmt = "src_transport", }, + [NDF_ACCOUNT_ID] = { + .journal = "ND_ACCOUNT_ID", + .logfmt = "account", + }, + [NDF_USER_NAME] = { + .journal = "ND_USER_NAME", + .logfmt = "user", + }, + [NDF_USER_ROLE] = { + .journal = "ND_USER_ROLE", + .logfmt = "role", + }, + [NDF_USER_ACCESS] = { + .journal = "ND_USER_PERMISSIONS", + .logfmt = "permissions", + }, [NDF_SRC_IP] = { - .journal = "ND_SRC_IP", - .logfmt = "src_ip", + .journal = "ND_SRC_IP", + .logfmt = "src_ip", + }, + [NDF_SRC_FORWARDED_HOST] = { + .journal = "ND_SRC_FORWARDED_HOST", + .logfmt = "src_forwarded_host", + }, + [NDF_SRC_FORWARDED_FOR] = { + .journal = "ND_SRC_FORWARDED_FOR", + .logfmt = "src_forwarded_for", }, [NDF_SRC_PORT] = { .journal = "ND_SRC_PORT", @@ -1353,11 +1378,12 @@ static void nd_logger_json(BUFFER *wb, struct log_field *fields, size_t fields_m case NDFT_DBL: buffer_json_member_add_double(wb, key, fields[i].entry.dbl); break; - case NDFT_UUID:{ - char u[UUID_COMPACT_STR_LEN]; - uuid_unparse_lower_compact(*fields[i].entry.uuid, u); - buffer_json_member_add_string(wb, key, u); - } + case NDFT_UUID: + if(!uuid_is_null(*fields[i].entry.uuid)) { + char u[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(*fields[i].entry.uuid, u); + buffer_json_member_add_string(wb, key, u); + } break; case NDFT_CALLBACK: { if(!tmp) @@ -1423,10 +1449,7 @@ static int64_t log_field_to_int64(struct log_field *lf) { break; case NDFT_CALLBACK: - if(!tmp) - tmp = buffer_create(0, NULL); - else - buffer_flush(tmp); + tmp = buffer_create(0, NULL); if(lf->entry.cb.formatter(tmp, lf->entry.cb.formatter_data)) s = buffer_tostring(tmp); @@ -1435,13 +1458,13 @@ static int64_t log_field_to_int64(struct log_field *lf) { break; case NDFT_U64: - return lf->entry.u64; + return (int64_t)lf->entry.u64; case NDFT_I64: - return lf->entry.i64; + return (int64_t)lf->entry.i64; case NDFT_DBL: - return lf->entry.dbl; + return (int64_t)lf->entry.dbl; } if(s && *s) @@ -1487,10 +1510,7 @@ static uint64_t log_field_to_uint64(struct log_field *lf) { break; case NDFT_CALLBACK: - if(!tmp) - tmp = buffer_create(0, NULL); - else - buffer_flush(tmp); + tmp = buffer_create(0, NULL); if(lf->entry.cb.formatter(tmp, lf->entry.cb.formatter_data)) s = buffer_tostring(tmp); @@ -1505,7 +1525,7 @@ static uint64_t log_field_to_uint64(struct log_field *lf) { return lf->entry.i64; case NDFT_DBL: - return lf->entry.dbl; + return (uint64_t) lf->entry.dbl; } if(s && *s) @@ -1538,7 +1558,7 @@ static void errno_annotator(BUFFER *wb, const char *key, struct log_field *lf) { return; char buf[1024]; - const char *s = errno2str(errnum, buf, sizeof(buf)); + const char *s = errno2str((int)errnum, buf, sizeof(buf)); if(buffer_strlen(wb)) buffer_fast_strcat(wb, " ", 1); @@ -1562,7 +1582,8 @@ static void priority_annotator(BUFFER *wb, const char *key, struct log_field *lf buffer_strcat(wb, nd_log_id2priority(pri)); } -static bool needs_quotes_for_logfmt(const char *s) { +static bool needs_quotes_for_logfmt(const char *s) +{ static bool safe_for_logfmt[256] = { [' '] = true, ['!'] = true, ['"'] = false, ['#'] = true, ['$'] = true, ['%'] = true, ['&'] = true, ['\''] = true, ['('] = true, [')'] = true, ['*'] = true, ['+'] = true, [','] = true, ['-'] = true, @@ -1593,7 +1614,8 @@ static bool needs_quotes_for_logfmt(const char *s) { return false; } -static void string_to_logfmt(BUFFER *wb, const char *s) { +static void string_to_logfmt(BUFFER *wb, const char *s) +{ bool spaces = needs_quotes_for_logfmt(s); if(spaces) @@ -1605,7 +1627,8 @@ static void string_to_logfmt(BUFFER *wb, const char *s) { buffer_fast_strcat(wb, "\"", 1); } -static void nd_logger_logfmt(BUFFER *wb, struct log_field *fields, size_t fields_max) { +static void nd_logger_logfmt(BUFFER *wb, struct log_field *fields, size_t fields_max) +{ // --- FIELD_PARSER_VERSIONS --- // @@ -1670,13 +1693,14 @@ static void nd_logger_logfmt(BUFFER *wb, struct log_field *fields, size_t fields buffer_fast_strcat(wb, "=", 1); buffer_print_netdata_double(wb, fields[i].entry.dbl); break; - case NDFT_UUID: { - char u[UUID_COMPACT_STR_LEN]; - uuid_unparse_lower_compact(*fields[i].entry.uuid, u); - buffer_strcat(wb, key); - buffer_fast_strcat(wb, "=", 1); - buffer_fast_strcat(wb, u, sizeof(u) - 1); - } + case NDFT_UUID: + if(!uuid_is_null(*fields[i].entry.uuid)) { + char u[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(*fields[i].entry.uuid, u); + buffer_strcat(wb, key); + buffer_fast_strcat(wb, "=", 1); + buffer_fast_strcat(wb, u, sizeof(u) - 1); + } break; case NDFT_CALLBACK: { if(!tmp) @@ -1745,32 +1769,34 @@ static bool nd_logger_journal_libsystemd(struct log_field *fields, size_t fields const char *key = fields[i].journal; char *value = NULL; + int rc = 0; switch (fields[i].entry.type) { case NDFT_TXT: if(*fields[i].entry.txt) - asprintf(&value, "%s=%s", key, fields[i].entry.txt); + rc = asprintf(&value, "%s=%s", key, fields[i].entry.txt); break; case NDFT_STR: - asprintf(&value, "%s=%s", key, string2str(fields[i].entry.str)); + rc = asprintf(&value, "%s=%s", key, string2str(fields[i].entry.str)); break; case NDFT_BFR: if(buffer_strlen(fields[i].entry.bfr)) - asprintf(&value, "%s=%s", key, buffer_tostring(fields[i].entry.bfr)); + rc = asprintf(&value, "%s=%s", key, buffer_tostring(fields[i].entry.bfr)); break; case NDFT_U64: - asprintf(&value, "%s=%" PRIu64, key, fields[i].entry.u64); + rc = asprintf(&value, "%s=%" PRIu64, key, fields[i].entry.u64); break; case NDFT_I64: - asprintf(&value, "%s=%" PRId64, key, fields[i].entry.i64); + rc = asprintf(&value, "%s=%" PRId64, key, fields[i].entry.i64); break; case NDFT_DBL: - asprintf(&value, "%s=%f", key, fields[i].entry.dbl); + rc = asprintf(&value, "%s=%f", key, fields[i].entry.dbl); break; - case NDFT_UUID: { - char u[UUID_COMPACT_STR_LEN]; - uuid_unparse_lower_compact(*fields[i].entry.uuid, u); - asprintf(&value, "%s=%s", key, u); - } + case NDFT_UUID: + if(!uuid_is_null(*fields[i].entry.uuid)) { + char u[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(*fields[i].entry.uuid, u); + rc = asprintf(&value, "%s=%s", key, u); + } break; case NDFT_CALLBACK: { if(!tmp) @@ -1778,15 +1804,15 @@ static bool nd_logger_journal_libsystemd(struct log_field *fields, size_t fields else buffer_flush(tmp); if(fields[i].entry.cb.formatter(tmp, fields[i].entry.cb.formatter_data)) - asprintf(&value, "%s=%s", key, buffer_tostring(tmp)); + rc = asprintf(&value, "%s=%s", key, buffer_tostring(tmp)); } break; default: - asprintf(&value, "%s=%s", key, "UNHANDLED"); + rc = asprintf(&value, "%s=%s", key, "UNHANDLED"); break; } - if (value) { + if (rc != -1 && value) { iov[iov_count].iov_base = value; iov[iov_count].iov_len = strlen(value); iov_count++; @@ -1864,14 +1890,15 @@ static bool nd_logger_journal_direct(struct log_field *fields, size_t fields_max buffer_print_netdata_double(wb, fields[i].entry.dbl); buffer_putc(wb, '\n'); break; - case NDFT_UUID:{ - char u[UUID_COMPACT_STR_LEN]; - uuid_unparse_lower_compact(*fields[i].entry.uuid, u); - buffer_strcat(wb, key); - buffer_putc(wb, '='); - buffer_fast_strcat(wb, u, sizeof(u) - 1); - buffer_putc(wb, '\n'); - } + case NDFT_UUID: + if(!uuid_is_null(*fields[i].entry.uuid)) { + char u[UUID_COMPACT_STR_LEN]; + uuid_unparse_lower_compact(*fields[i].entry.uuid, u); + buffer_strcat(wb, key); + buffer_putc(wb, '='); + buffer_fast_strcat(wb, u, sizeof(u) - 1); + buffer_putc(wb, '\n'); + } break; case NDFT_CALLBACK: { if(!tmp) @@ -1913,7 +1940,7 @@ static bool nd_logger_journal_direct(struct log_field *fields, size_t fields_max // ---------------------------------------------------------------------------- // syslog logger - uses logfmt -static bool nd_logger_syslog(int priority, ND_LOG_FORMAT format, struct log_field *fields, size_t fields_max) { +static bool nd_logger_syslog(int priority, ND_LOG_FORMAT format __maybe_unused, struct log_field *fields, size_t fields_max) { CLEAN_BUFFER *wb = buffer_create(1024, NULL); nd_logger_logfmt(wb, fields, fields_max); @@ -2069,7 +2096,7 @@ static void nd_logger_merge_log_stack_to_thread_fields(void) { if((type == NDFT_TXT && (!e->txt || !*e->txt)) || (type == NDFT_BFR && (!e->bfr || !buffer_strlen(e->bfr))) || (type == NDFT_STR && !e->str) || - (type == NDFT_UUID && !e->uuid) || + (type == NDFT_UUID && (!e->uuid || uuid_is_null(*e->uuid))) || (type == NDFT_CALLBACK && !e->cb.formatter) || type == NDFT_UNSET) continue; @@ -2110,7 +2137,7 @@ static void nd_logger(const char *file, const char *function, const unsigned lon else if(thread_log_fields[NDF_LOG_SOURCE].entry.type == NDFT_U64) src = thread_log_fields[NDF_LOG_SOURCE].entry.u64; - if(src != source && src >= 0 && src < _NDLS_MAX) { + if(src != source && src < _NDLS_MAX) { source = src; output = nd_logger_select_output(source, &fp, &spinlock); if(output != NDLM_FILE && output != NDLM_JOURNAL && output != NDLM_SYSLOG) @@ -2137,13 +2164,11 @@ static void nd_logger(const char *file, const char *function, const unsigned lon char os_threadname[NETDATA_THREAD_NAME_MAX + 1]; if(likely(!thread_log_fields[NDF_THREAD_TAG].entry.set)) { const char *thread_tag = netdata_thread_tag(); - if(!netdata_thread_tag_exists()) { - if (!netdata_thread_tag_exists()) { - os_thread_get_current_name_np(os_threadname); - if ('\0' != os_threadname[0]) - /* If it is not an empty string replace "MAIN" thread_tag */ - thread_tag = os_threadname; - } + if (!netdata_thread_tag_exists()) { + os_thread_get_current_name_np(os_threadname); + if ('\0' != os_threadname[0]) + /* If it is not an empty string replace "MAIN" thread_tag */ + thread_tag = os_threadname; } thread_log_fields[NDF_THREAD_TAG].entry = ND_LOG_FIELD_TXT(NDF_THREAD_TAG, thread_tag); @@ -2224,7 +2249,8 @@ static ND_LOG_SOURCES nd_log_validate_source(ND_LOG_SOURCES source) { // ---------------------------------------------------------------------------- // public API for loggers -void netdata_logger(ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const char *file, const char *function, unsigned long line, const char *fmt, ... ) { +void netdata_logger(ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const char *file, const char *function, unsigned long line, const char *fmt, ... ) +{ int saved_errno = errno; source = nd_log_validate_source(source); @@ -2285,17 +2311,14 @@ void netdata_logger_fatal( const char *file, const char *function, const unsigne char action_data[70+1]; snprintfz(action_data, 70, "%04lu@%-10.10s:%-15.15s/%d", line, file, function, saved_errno); - char action_result[60+1]; char os_threadname[NETDATA_THREAD_NAME_MAX + 1]; const char *thread_tag = netdata_thread_tag(); - if(!netdata_thread_tag_exists()) { - if (!netdata_thread_tag_exists()) { - os_thread_get_current_name_np(os_threadname); - if ('\0' != os_threadname[0]) - /* If it is not an empty string replace "MAIN" thread_tag */ - thread_tag = os_threadname; - } + if (!netdata_thread_tag_exists()) { + os_thread_get_current_name_np(os_threadname); + if ('\0' != os_threadname[0]) + /* If it is not an empty string replace "MAIN" thread_tag */ + thread_tag = os_threadname; } if(!thread_tag) thread_tag = "UNKNOWN"; @@ -2308,10 +2331,10 @@ void netdata_logger_fatal( const char *file, const char *function, const unsigne if(strncmp(thread_tag, THREAD_TAG_STREAM_SENDER, strlen(THREAD_TAG_STREAM_SENDER)) == 0) tag_to_send = THREAD_TAG_STREAM_SENDER; + char action_result[60+1]; snprintfz(action_result, 60, "%s:%s", program_name, tag_to_send); - send_statistics("FATAL", action_result, action_data); -#ifdef HAVE_BACKTRACE +#if !defined(ENABLE_SENTRY) && defined(HAVE_BACKTRACE) int fd = nd_log.sources[NDLS_DAEMON].fd; if(fd == -1) fd = STDERR_FILENO; @@ -2328,7 +2351,7 @@ void netdata_logger_fatal( const char *file, const char *function, const unsigne abort(); #endif - netdata_cleanup_and_exit(1); + netdata_cleanup_and_exit(1, "FATAL", action_result, action_data); } // ---------------------------------------------------------------------------- @@ -2407,7 +2430,8 @@ static bool nd_log_limit_reached(struct nd_log_source *source) { source->limits.logs_per_period, source->limits.throttle_period, program_name, - (int64_t)((source->limits.started_monotonic_ut + (source->limits.throttle_period * USEC_PER_SEC) - now_ut)) / USEC_PER_SEC); + (int64_t)(((source->limits.started_monotonic_ut + (source->limits.throttle_period * USEC_PER_SEC) - now_ut)) / USEC_PER_SEC) + ); if(source->pending_msg) freez((void *)source->pending_msg); diff --git a/libnetdata/log/log.h b/src/libnetdata/log/log.h index ad634693c..51d6c8bff 100644 --- a/libnetdata/log/log.h +++ b/src/libnetdata/log/log.h @@ -63,9 +63,17 @@ typedef enum __attribute__((__packed__)) { // web server, aclk and stream receiver NDF_SRC_TRANSPORT, // the transport we received the request, one of: http, https, pluginsd + // Netdata Cloud Related + NDF_ACCOUNT_ID, + NDF_USER_NAME, + NDF_USER_ROLE, + NDF_USER_ACCESS, + // web server and stream receiver NDF_SRC_IP, // the streaming / web server source IP NDF_SRC_PORT, // the streaming / web server source Port + NDF_SRC_FORWARDED_HOST, + NDF_SRC_FORWARDED_FOR, NDF_SRC_CAPABILITIES, // the stream receiver capabilities // stream sender (established links) @@ -286,12 +294,11 @@ typedef struct error_with_limit { #define nd_log_limit_static_global_var(var, log_every_secs, sleep_usecs) static ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) } #define nd_log_limit_static_thread_var(var, log_every_secs, sleep_usecs) static __thread ERROR_LIMIT var = { .last_logged = 0, .count = 0, .log_every = (log_every_secs), .sleep_ut = (sleep_usecs) } -void netdata_logger_with_limit(ERROR_LIMIT *erl, ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const char *file, const char *function, unsigned long line, const char *fmt, ... ) PRINTFLIKE(7, 8);; +void netdata_logger_with_limit(ERROR_LIMIT *erl, ND_LOG_SOURCES source, ND_LOG_FIELD_PRIORITY priority, const char *file, const char *function, unsigned long line, const char *fmt, ... ) PRINTFLIKE(7, 8); #define nd_log_limit(erl, NDLS, NDLP, args...) netdata_logger_with_limit(erl, NDLS, NDLP, __FILE__, __FUNCTION__, __LINE__, ##args) // ---------------------------------------------------------------------------- -void send_statistics(const char *action, const char *action_result, const char *action_data); void netdata_logger_fatal( const char *file, const char *function, unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5); # ifdef __cplusplus diff --git a/libnetdata/log/systemd-cat-native.c b/src/libnetdata/log/systemd-cat-native.c index de6211cc0..0c89399d3 100644 --- a/libnetdata/log/systemd-cat-native.c +++ b/src/libnetdata/log/systemd-cat-native.c @@ -11,7 +11,7 @@ #include <machine/endian.h> #endif -static void log_message_to_stderr(BUFFER *msg) { +static inline void log_message_to_stderr(BUFFER *msg) { CLEAN_BUFFER *tmp = buffer_create(0, NULL); for(size_t i = 0; i < msg->len ;i++) { @@ -436,7 +436,7 @@ cleanup: static int help(void) { fprintf(stderr, "\n" - "Netdata systemd-cat-native " PACKAGE_VERSION "\n" + "Netdata systemd-cat-native " VERSION "\n" "\n" "This program reads from its standard input, lines in the format:\n" "\n" @@ -594,7 +594,7 @@ static int log_input_as_netdata(const char *newline, int timeout_ms) { // an empty line - we are done for this message nd_log(NDLS_HEALTH, priority, - "added %d fields", // if the user supplied a MESSAGE, this will be ignored + "added %zu fields", // if the user supplied a MESSAGE, this will be ignored fields_added); lgs_reset(lgs); @@ -627,7 +627,7 @@ static int log_input_as_netdata(const char *newline, int timeout_ms) { nd_log(NDLS_COLLECTORS, NDLP_ERR, "Field '%.*s' is not a Netdata field. Ignoring it.", - field_len, field); + (int)field_len, field); lgs[NDF_MESSAGE] = backup; } @@ -648,7 +648,7 @@ static int log_input_as_netdata(const char *newline, int timeout_ms) { } if(fields_added) { - nd_log(NDLS_HEALTH, priority, "added %d fields", fields_added); + nd_log(NDLS_HEALTH, priority, "added %zu fields", fields_added); messages_logged++; } diff --git a/libnetdata/log/systemd-cat-native.h b/src/libnetdata/log/systemd-cat-native.h index 34e7a3615..34e7a3615 100644 --- a/libnetdata/log/systemd-cat-native.h +++ b/src/libnetdata/log/systemd-cat-native.h diff --git a/libnetdata/log/systemd-cat-native.md b/src/libnetdata/log/systemd-cat-native.md index b0b15f403..b0b15f403 100644 --- a/libnetdata/log/systemd-cat-native.md +++ b/src/libnetdata/log/systemd-cat-native.md diff --git a/src/libnetdata/maps/local-sockets.h b/src/libnetdata/maps/local-sockets.h new file mode 100644 index 000000000..ce5201242 --- /dev/null +++ b/src/libnetdata/maps/local-sockets.h @@ -0,0 +1,1283 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LOCAL_SOCKETS_H +#define NETDATA_LOCAL_SOCKETS_H + +#include "libnetdata/libnetdata.h" + +// disable libmnl for the moment +#undef HAVE_LIBMNL + +#ifdef HAVE_LIBMNL +#include <linux/inet_diag.h> +#include <linux/sock_diag.h> +#include <linux/unix_diag.h> +#include <linux/netlink.h> +#include <libmnl/libmnl.h> +#endif + +#define UID_UNSET (uid_t)(UINT32_MAX) + +// -------------------------------------------------------------------------------------------------------------------- +// hashtable for keeping the namespaces +// key and value is the namespace inode + +#define SIMPLE_HASHTABLE_VALUE_TYPE uint64_t +#define SIMPLE_HASHTABLE_NAME _NET_NS +#include "libnetdata/simple_hashtable.h" + +// -------------------------------------------------------------------------------------------------------------------- +// hashtable for keeping the sockets of PIDs +// key is the inode + +struct pid_socket; +#define SIMPLE_HASHTABLE_VALUE_TYPE struct pid_socket +#define SIMPLE_HASHTABLE_NAME _PID_SOCKET +#include "libnetdata/simple_hashtable.h" + +// -------------------------------------------------------------------------------------------------------------------- +// hashtable for keeping all the sockets +// key is the inode + +struct local_socket; +#define SIMPLE_HASHTABLE_VALUE_TYPE struct local_socket +#define SIMPLE_HASHTABLE_NAME _LOCAL_SOCKET +#include "libnetdata/simple_hashtable.h" + +// -------------------------------------------------------------------------------------------------------------------- +// hashtable for keeping all local IPs +// key is XXH3_64bits hash of the IP + +union ipv46; +#define SIMPLE_HASHTABLE_VALUE_TYPE union ipv46 +#define SIMPLE_HASHTABLE_NAME _LOCAL_IP +#include "libnetdata/simple_hashtable.h" + +// -------------------------------------------------------------------------------------------------------------------- +// hashtable for keeping all listening ports +// key is XXH3_64bits hash of the family, protocol, port number, namespace + +struct local_port; +#define SIMPLE_HASHTABLE_VALUE_TYPE struct local_port +#define SIMPLE_HASHTABLE_NAME _LISTENING_PORT +#include "libnetdata/simple_hashtable.h" + +// -------------------------------------------------------------------------------------------------------------------- + +struct local_socket_state; +typedef void (*local_sockets_cb_t)(struct local_socket_state *state, struct local_socket *n, void *data); + +typedef struct local_socket_state { + struct { + bool listening; + bool inbound; + bool outbound; + bool local; + bool tcp4; + bool tcp6; + bool udp4; + bool udp6; + bool pid; + bool cmdline; + bool comm; + bool uid; + bool namespaces; + size_t max_errors; + + local_sockets_cb_t cb; + void *data; + + const char *host_prefix; + } config; + + struct { + size_t pid_fds_processed; + size_t pid_fds_opendir_failed; + size_t pid_fds_readlink_failed; + size_t pid_fds_parse_failed; + size_t errors_encountered; + } stats; + +#ifdef HAVE_LIBMNL + bool use_nl; + struct mnl_socket *nl; + uint16_t tmp_protocol; +#endif + + ARAL *local_socket_aral; + ARAL *pid_socket_aral; + + uint64_t proc_self_net_ns_inode; + + SIMPLE_HASHTABLE_NET_NS ns_hashtable; + SIMPLE_HASHTABLE_PID_SOCKET pid_sockets_hashtable; + SIMPLE_HASHTABLE_LOCAL_SOCKET sockets_hashtable; + SIMPLE_HASHTABLE_LOCAL_IP local_ips_hashtable; + SIMPLE_HASHTABLE_LISTENING_PORT listening_ports_hashtable; +} LS_STATE; + +// -------------------------------------------------------------------------------------------------------------------- + +typedef enum __attribute__((packed)) { + SOCKET_DIRECTION_NONE = 0, + SOCKET_DIRECTION_LISTEN = (1 << 0), // a listening socket + SOCKET_DIRECTION_INBOUND = (1 << 1), // an inbound socket connecting a remote system to a local listening socket + SOCKET_DIRECTION_OUTBOUND = (1 << 2), // a socket initiated by this system, connecting to another system + SOCKET_DIRECTION_LOCAL_INBOUND = (1 << 3), // the socket connecting 2 localhost applications + SOCKET_DIRECTION_LOCAL_OUTBOUND = (1 << 4), // the socket connecting 2 localhost applications +} SOCKET_DIRECTION; + +#ifndef TASK_COMM_LEN +#define TASK_COMM_LEN 16 +#endif + +struct pid_socket { + uint64_t inode; + pid_t pid; + uid_t uid; + uint64_t net_ns_inode; + char *cmdline; + char comm[TASK_COMM_LEN]; +}; + +struct local_port { + uint16_t protocol; + uint16_t family; + uint16_t port; + uint64_t net_ns_inode; +}; + +union ipv46 { + uint32_t ipv4; + struct in6_addr ipv6; +}; + +struct socket_endpoint { + uint16_t protocol; + uint16_t family; + uint16_t port; + union ipv46 ip; +}; + +static inline void ipv6_to_in6_addr(const char *ipv6_str, struct in6_addr *d) { + char buf[9]; + + for (size_t k = 0; k < 4; ++k) { + memcpy(buf, ipv6_str + (k * 8), 8); + buf[sizeof(buf) - 1] = '\0'; + d->s6_addr32[k] = str2uint32_hex(buf, NULL); + } +} + +typedef struct local_socket { + uint64_t inode; + uint64_t net_ns_inode; + + int state; + struct socket_endpoint local; + struct socket_endpoint remote; + pid_t pid; + + SOCKET_DIRECTION direction; + + uint8_t timer; + uint8_t retransmits; + uint32_t expires; + uint32_t rqueue; + uint32_t wqueue; + uid_t uid; + + char comm[TASK_COMM_LEN]; + STRING *cmdline; + + struct local_port local_port_key; + + XXH64_hash_t local_ip_hash; + XXH64_hash_t remote_ip_hash; + XXH64_hash_t local_port_hash; + +#ifdef LOCAL_SOCKETS_EXTENDED_MEMBERS + LOCAL_SOCKETS_EXTENDED_MEMBERS +#endif +} LOCAL_SOCKET; + +// -------------------------------------------------------------------------------------------------------------------- + +static inline void local_sockets_log(LS_STATE *ls, const char *format, ...) __attribute__ ((format(__printf__, 2, 3))); +static inline void local_sockets_log(LS_STATE *ls, const char *format, ...) { + if(++ls->stats.errors_encountered == ls->config.max_errors) { + nd_log(NDLS_COLLECTORS, NDLP_ERR, "LOCAL-SOCKETS: max number of logs reached. Not logging anymore"); + return; + } + + if(ls->stats.errors_encountered > ls->config.max_errors) + return; + + char buf[16384]; + va_list args; + va_start(args, format); + vsnprintf(buf, sizeof(buf), format, args); + va_end(args); + + nd_log(NDLS_COLLECTORS, NDLP_ERR, "LOCAL-SOCKETS: %s", buf); +} + +// -------------------------------------------------------------------------------------------------------------------- + +static void local_sockets_foreach_local_socket_call_cb(LS_STATE *ls) { + for(SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_first_read_only_LOCAL_SOCKET(&ls->sockets_hashtable); + sl; + sl = simple_hashtable_next_read_only_LOCAL_SOCKET(&ls->sockets_hashtable, sl)) { + LOCAL_SOCKET *n = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(!n) continue; + + if((ls->config.listening && n->direction & SOCKET_DIRECTION_LISTEN) || + (ls->config.local && n->direction & (SOCKET_DIRECTION_LOCAL_INBOUND|SOCKET_DIRECTION_LOCAL_OUTBOUND)) || + (ls->config.inbound && n->direction & SOCKET_DIRECTION_INBOUND) || + (ls->config.outbound && n->direction & SOCKET_DIRECTION_OUTBOUND) + ) { + // we have to call the callback for this socket + if (ls->config.cb) + ls->config.cb(ls, n, ls->config.data); + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline void local_sockets_fix_cmdline(char* str) { + char *s = str; + + // map invalid characters to underscores + while(*s) { + if(*s == '|' || iscntrl(*s)) *s = '_'; + s++; + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline bool +local_sockets_read_proc_inode_link(LS_STATE *ls, const char *filename, uint64_t *inode, const char *type) { + char link_target[FILENAME_MAX + 1]; + + *inode = 0; + + ssize_t len = readlink(filename, link_target, sizeof(link_target) - 1); + if (len == -1) { + local_sockets_log(ls, "cannot read '%s' link '%s'", type, filename); + + ls->stats.pid_fds_readlink_failed++; + return false; + } + link_target[len] = '\0'; + + len = strlen(type); + if(strncmp(link_target, type, len) == 0 && link_target[len] == ':' && link_target[len + 1] == '[' && isdigit(link_target[len + 2])) { + *inode = strtoull(&link_target[len + 2], NULL, 10); + // ll_log(ls, "read link of type '%s' '%s' from '%s', inode = %"PRIu64, type, link_target, filename, *inode); + return true; + } + else { + // ll_log(ls, "cannot read '%s' link '%s' from '%s'", type, link_target, filename); + ls->stats.pid_fds_processed++; + return false; + } +} + +static inline bool local_sockets_is_path_a_pid(const char *s) { + if(!s || !*s) return false; + + while(*s) { + if(!isdigit(*s++)) + return false; + } + + return true; +} + +static inline bool local_sockets_find_all_sockets_in_proc(LS_STATE *ls, const char *proc_filename) { + DIR *proc_dir; + struct dirent *proc_entry; + char filename[FILENAME_MAX + 1]; + char comm[TASK_COMM_LEN]; + char cmdline[8192]; + const char *cmdline_trimmed; + uint64_t net_ns_inode; + + proc_dir = opendir(proc_filename); + if (proc_dir == NULL) { + local_sockets_log(ls, "cannot opendir() '%s'", proc_filename); + ls->stats.pid_fds_readlink_failed++; + return false; + } + + while ((proc_entry = readdir(proc_dir)) != NULL) { + if(proc_entry->d_type != DT_DIR) + continue; + + if(!strcmp(proc_entry->d_name, ".") || !strcmp(proc_entry->d_name, "..")) + continue; + + if(!local_sockets_is_path_a_pid(proc_entry->d_name)) + continue; + + // Build the path to the fd directory of the process + snprintfz(filename, FILENAME_MAX, "%s/%s/fd/", proc_filename, proc_entry->d_name); + DIR *fd_dir = opendir(filename); + if (fd_dir == NULL) { + local_sockets_log(ls, "cannot opendir() '%s'", filename); + ls->stats.pid_fds_opendir_failed++; + continue; + } + + comm[0] = '\0'; + cmdline[0] = '\0'; + cmdline_trimmed = NULL; + pid_t pid = (pid_t)strtoul(proc_entry->d_name, NULL, 10); + if(!pid) { + local_sockets_log(ls, "cannot parse pid of '%s'", proc_entry->d_name); + closedir(fd_dir); + continue; + } + net_ns_inode = 0; + uid_t uid = UID_UNSET; + + struct dirent *fd_entry; + while ((fd_entry = readdir(fd_dir)) != NULL) { + if(fd_entry->d_type != DT_LNK) + continue; + + snprintfz(filename, sizeof(filename), "%s/%s/fd/%s", proc_filename, proc_entry->d_name, fd_entry->d_name); + uint64_t inode = 0; + if(!local_sockets_read_proc_inode_link(ls, filename, &inode, "socket")) + continue; + + SIMPLE_HASHTABLE_SLOT_PID_SOCKET *sl = simple_hashtable_get_slot_PID_SOCKET(&ls->pid_sockets_hashtable, inode, &inode, true); + struct pid_socket *ps = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(!ps || (ps->pid == 1 && pid != 1)) { + if(uid == UID_UNSET && ls->config.uid) { + char status_buf[512]; + snprintfz(filename, sizeof(filename), "%s/%s/status", proc_filename, proc_entry->d_name); + if (read_txt_file(filename, status_buf, sizeof(status_buf))) + local_sockets_log(ls, "cannot open file: %s\n", filename); + else { + char *u = strstr(status_buf, "Uid:"); + if(u) { + u += 4; + while(isspace(*u)) u++; // skip spaces + while(*u >= '0' && *u <= '9') u++; // skip the first number (real uid) + while(isspace(*u)) u++; // skip spaces again + uid = strtol(u, NULL, 10); // parse the 2nd number (effective uid) + } + } + } + if(!comm[0] && ls->config.comm) { + snprintfz(filename, sizeof(filename), "%s/%s/comm", proc_filename, proc_entry->d_name); + if (read_txt_file(filename, comm, sizeof(comm))) + local_sockets_log(ls, "cannot open file: %s\n", filename); + else { + size_t clen = strlen(comm); + if(comm[clen - 1] == '\n') + comm[clen - 1] = '\0'; + } + } + if(!cmdline[0] && ls->config.cmdline) { + snprintfz(filename, sizeof(filename), "%s/%s/cmdline", proc_filename, proc_entry->d_name); + if (read_proc_cmdline(filename, cmdline, sizeof(cmdline))) + local_sockets_log(ls, "cannot open file: %s\n", filename); + else { + local_sockets_fix_cmdline(cmdline); + cmdline_trimmed = trim(cmdline); + } + } + if(!net_ns_inode && ls->config.namespaces) { + snprintfz(filename, sizeof(filename), "%s/%s/ns/net", proc_filename, proc_entry->d_name); + if(local_sockets_read_proc_inode_link(ls, filename, &net_ns_inode, "net")) { + SIMPLE_HASHTABLE_SLOT_NET_NS *sl_ns = simple_hashtable_get_slot_NET_NS(&ls->ns_hashtable, net_ns_inode, (uint64_t *)net_ns_inode, true); + simple_hashtable_set_slot_NET_NS(&ls->ns_hashtable, sl_ns, net_ns_inode, (uint64_t *)net_ns_inode); + } + } + + if(!ps) + ps = aral_callocz(ls->pid_socket_aral); + + ps->inode = inode; + ps->pid = pid; + ps->uid = uid; + ps->net_ns_inode = net_ns_inode; + strncpyz(ps->comm, comm, sizeof(ps->comm) - 1); + + if(ps->cmdline) + freez(ps->cmdline); + + ps->cmdline = cmdline_trimmed ? strdupz(cmdline_trimmed) : NULL; + simple_hashtable_set_slot_PID_SOCKET(&ls->pid_sockets_hashtable, sl, inode, ps); + } + } + + closedir(fd_dir); + } + + closedir(proc_dir); + return true; +} + +// -------------------------------------------------------------------------------------------------------------------- + +static bool local_sockets_is_ipv4_mapped_ipv6_address(const struct in6_addr *addr) { + // An IPv4-mapped IPv6 address starts with 80 bits of zeros followed by 16 bits of ones + static const unsigned char ipv4_mapped_prefix[12] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xFF, 0xFF }; + return memcmp(addr->s6_addr, ipv4_mapped_prefix, 12) == 0; +} + +static bool local_sockets_is_loopback_address(struct socket_endpoint *se) { + if (se->family == AF_INET) { + // For IPv4, loopback addresses are in the 127.0.0.0/8 range + return (ntohl(se->ip.ipv4) >> 24) == 127; // Check if the first byte is 127 + } else if (se->family == AF_INET6) { + // Check if the address is an IPv4-mapped IPv6 address + if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) { + // Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range + uint8_t *ip6 = (uint8_t *)&se->ip.ipv6; + const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12)); + return (ntohl(ipv4_addr) >> 24) == 127; + } + + // For IPv6, loopback address is ::1 + return memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0; + } + + return false; +} + +static inline bool local_sockets_is_ipv4_reserved_address(uint32_t ip) { + // Check for the reserved address ranges + ip = ntohl(ip); + return ( + (ip >> 24 == 10) || // Private IP range (A class) + (ip >> 20 == (172 << 4) + 1) || // Private IP range (B class) + (ip >> 16 == (192 << 8) + 168) || // Private IP range (C class) + (ip >> 24 == 127) || // Loopback address (127.0.0.0) + (ip >> 24 == 0) || // Reserved (0.0.0.0) + (ip >> 24 == 169 && (ip >> 16) == 254) || // Link-local address (169.254.0.0) + (ip >> 16 == (192 << 8) + 0) // Test-Net (192.0.0.0) + ); +} + +static inline bool local_sockets_is_private_address(struct socket_endpoint *se) { + if (se->family == AF_INET) { + return local_sockets_is_ipv4_reserved_address(se->ip.ipv4); + } + else if (se->family == AF_INET6) { + uint8_t *ip6 = (uint8_t *)&se->ip.ipv6; + + // Check if the address is an IPv4-mapped IPv6 address + if (local_sockets_is_ipv4_mapped_ipv6_address(&se->ip.ipv6)) { + // Extract the last 32 bits (IPv4 address) and check if it's in the 127.0.0.0/8 range + const uint32_t ipv4_addr = *((const uint32_t *)(ip6 + 12)); + return local_sockets_is_ipv4_reserved_address(ipv4_addr); + } + + // Check for link-local addresses (fe80::/10) + if ((ip6[0] == 0xFE) && ((ip6[1] & 0xC0) == 0x80)) + return true; + + // Check for Unique Local Addresses (ULA) (fc00::/7) + if ((ip6[0] & 0xFE) == 0xFC) + return true; + + // Check for multicast addresses (ff00::/8) + if (ip6[0] == 0xFF) + return true; + + // For IPv6, loopback address is :: or ::1 + return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0 || + memcmp(&se->ip.ipv6, &in6addr_loopback, sizeof(se->ip.ipv6)) == 0; + } + + return false; +} + +static bool local_sockets_is_multicast_address(struct socket_endpoint *se) { + if (se->family == AF_INET) { + // For IPv4, check if the address is 0.0.0.0 + uint32_t ip = htonl(se->ip.ipv4); + return (ip >= 0xE0000000 && ip <= 0xEFFFFFFF); // Multicast address range (224.0.0.0/4) + } + else if (se->family == AF_INET6) { + // For IPv6, check if the address is ff00::/8 + uint8_t *ip6 = (uint8_t *)&se->ip.ipv6; + return ip6[0] == 0xff; + } + + return false; +} + +static bool local_sockets_is_zero_address(struct socket_endpoint *se) { + if (se->family == AF_INET) { + // For IPv4, check if the address is 0.0.0.0 + return se->ip.ipv4 == 0; + } + else if (se->family == AF_INET6) { + // For IPv6, check if the address is :: + return memcmp(&se->ip.ipv6, &in6addr_any, sizeof(se->ip.ipv6)) == 0; + } + + return false; +} + +static inline const char *local_sockets_address_space(struct socket_endpoint *se) { + if(local_sockets_is_zero_address(se)) + return "zero"; + else if(local_sockets_is_loopback_address(se)) + return "loopback"; + else if(local_sockets_is_multicast_address(se)) + return "multicast"; + else if(local_sockets_is_private_address(se)) + return "private"; + else + return "public"; +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline void local_sockets_index_listening_port(LS_STATE *ls, LOCAL_SOCKET *n) { + if(n->direction & SOCKET_DIRECTION_LISTEN) { + // for the listening sockets, keep a hashtable with all the local ports + // so that we will be able to detect INBOUND sockets + + SIMPLE_HASHTABLE_SLOT_LISTENING_PORT *sl_port = + simple_hashtable_get_slot_LISTENING_PORT(&ls->listening_ports_hashtable, n->local_port_hash, &n->local_port_key, true); + + struct local_port *port = SIMPLE_HASHTABLE_SLOT_DATA(sl_port); + if(!port) + simple_hashtable_set_slot_LISTENING_PORT(&ls->listening_ports_hashtable, sl_port, n->local_port_hash, &n->local_port_key); + } +} + +static inline bool local_sockets_add_socket(LS_STATE *ls, LOCAL_SOCKET *tmp) { + if(!tmp->inode) return false; + + SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_get_slot_LOCAL_SOCKET(&ls->sockets_hashtable, tmp->inode, &tmp->inode, true); + LOCAL_SOCKET *n = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(n) { + local_sockets_log(ls, "inode %" PRIu64" already exists in hashtable - ignoring duplicate", tmp->inode); + return false; + } + + n = aral_mallocz(ls->local_socket_aral); + *n = *tmp; // copy all contents + + // fix the key + n->local_port_key.port = n->local.port; + n->local_port_key.family = n->local.family; + n->local_port_key.protocol = n->local.protocol; + n->local_port_key.net_ns_inode = ls->proc_self_net_ns_inode; + + n->local_ip_hash = XXH3_64bits(&n->local.ip, sizeof(n->local.ip)); + n->remote_ip_hash = XXH3_64bits(&n->remote.ip, sizeof(n->remote.ip)); + n->local_port_hash = XXH3_64bits(&n->local_port_key, sizeof(n->local_port_key)); + + // --- look up a pid for it ----------------------------------------------------------------------------------- + + SIMPLE_HASHTABLE_SLOT_PID_SOCKET *sl_pid = simple_hashtable_get_slot_PID_SOCKET(&ls->pid_sockets_hashtable, n->inode, &n->inode, false); + struct pid_socket *ps = SIMPLE_HASHTABLE_SLOT_DATA(sl_pid); + if(ps) { + n->net_ns_inode = ps->net_ns_inode; + n->pid = ps->pid; + + if(ps->uid != UID_UNSET && n->uid == UID_UNSET) + n->uid = ps->uid; + + if(ps->cmdline) + n->cmdline = string_strdupz(ps->cmdline); + + strncpyz(n->comm, ps->comm, sizeof(n->comm) - 1); + } + + // --- index it ----------------------------------------------------------------------------------------------- + + simple_hashtable_set_slot_LOCAL_SOCKET(&ls->sockets_hashtable, sl, n->inode, n); + + if(!local_sockets_is_zero_address(&n->local)) { + // put all the local IPs into the local_ips hashtable + // so, we learn all local IPs the system has + + SIMPLE_HASHTABLE_SLOT_LOCAL_IP *sl_ip = + simple_hashtable_get_slot_LOCAL_IP(&ls->local_ips_hashtable, n->local_ip_hash, &n->local.ip, true); + + union ipv46 *ip = SIMPLE_HASHTABLE_SLOT_DATA(sl_ip); + if(!ip) + simple_hashtable_set_slot_LOCAL_IP(&ls->local_ips_hashtable, sl_ip, n->local_ip_hash, &n->local.ip); + } + + // --- 1st phase for direction detection ---------------------------------------------------------------------- + + if((n->local.protocol == IPPROTO_TCP && n->state == TCP_LISTEN) || + local_sockets_is_zero_address(&n->local) || + local_sockets_is_zero_address(&n->remote)) { + // the socket is either in a TCP LISTEN, or + // the remote address is zero + n->direction |= SOCKET_DIRECTION_LISTEN; + } + else { + // we can't say yet if it is inbound or outboud + // so, mark it as both inbound and outbound + n->direction |= SOCKET_DIRECTION_INBOUND | SOCKET_DIRECTION_OUTBOUND; + } + + // --- index it in LISTENING_PORT ----------------------------------------------------------------------------- + + local_sockets_index_listening_port(ls, n); + + return true; +} + +#ifdef HAVE_LIBMNL + +static inline void local_sockets_netlink_init(LS_STATE *ls) { + ls->use_nl = true; + ls->nl = mnl_socket_open(NETLINK_INET_DIAG); + if (!ls->nl) { + local_sockets_log(ls, "cannot open netlink socket"); + ls->use_nl = false; + } + + if (mnl_socket_bind(ls->nl, 0, MNL_SOCKET_AUTOPID) < 0) { + local_sockets_log(ls, "cannot bind netlink socket"); + ls->use_nl = false; + } +} + +static inline void local_sockets_netlink_cleanup(LS_STATE *ls) { + if(ls->nl) { + mnl_socket_close(ls->nl); + ls->nl = NULL; + } +} + +static inline int local_sockets_netlink_cb_data(const struct nlmsghdr *nlh, void *data) { + LS_STATE *ls = data; + + struct inet_diag_msg *diag_msg = mnl_nlmsg_get_payload(nlh); + + LOCAL_SOCKET n = { + .inode = diag_msg->idiag_inode, + .direction = SOCKET_DIRECTION_NONE, + .state = diag_msg->idiag_state, + .local = { + .protocol = ls->tmp_protocol, + .family = diag_msg->idiag_family, + .port = diag_msg->id.idiag_sport, + }, + .remote = { + .protocol = ls->tmp_protocol, + .family = diag_msg->idiag_family, + .port = diag_msg->id.idiag_dport, + }, + .timer = diag_msg->idiag_timer, + .retransmits = diag_msg->idiag_retrans, + .expires = diag_msg->idiag_expires, + .rqueue = diag_msg->idiag_rqueue, + .wqueue = diag_msg->idiag_wqueue, + .uid = diag_msg->idiag_uid, + }; + + if (diag_msg->idiag_family == AF_INET) { + memcpy(&n.local.ip.ipv4, diag_msg->id.idiag_src, sizeof(n.local.ip.ipv4)); + memcpy(&n.remote.ip.ipv4, diag_msg->id.idiag_dst, sizeof(n.remote.ip.ipv4)); + } + else if (diag_msg->idiag_family == AF_INET6) { + memcpy(&n.local.ip.ipv6, diag_msg->id.idiag_src, sizeof(n.local.ip.ipv6)); + memcpy(&n.remote.ip.ipv6, diag_msg->id.idiag_dst, sizeof(n.remote.ip.ipv6)); + } + + local_sockets_add_socket(ls, &n); + + return MNL_CB_OK; +} + +static inline bool local_sockets_netlink_get_sockets(LS_STATE *ls, uint16_t family, uint16_t protocol) { + ls->tmp_protocol = protocol; + + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct inet_diag_req_v2 req; + unsigned int seq, portid = mnl_socket_get_portid(ls->nl); + + memset(&req, 0, sizeof(req)); + req.sdiag_family = family; + req.sdiag_protocol = protocol; + req.idiag_states = -1; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = SOCK_DIAG_BY_FAMILY; + nlh->nlmsg_flags = NLM_F_DUMP | NLM_F_REQUEST; + nlh->nlmsg_seq = seq = time(NULL); + mnl_nlmsg_put_extra_header(nlh, sizeof(req)); + memcpy(mnl_nlmsg_get_payload(nlh), &req, sizeof(req)); + + if (mnl_socket_sendto(ls->nl, nlh, nlh->nlmsg_len) < 0) { + local_sockets_log(ls, "mnl_socket_send failed"); + return false; + } + + ssize_t ret; + while ((ret = mnl_socket_recvfrom(ls->nl, buf, sizeof(buf))) > 0) { + ret = mnl_cb_run(buf, ret, seq, portid, local_sockets_netlink_cb_data, ls); + if (ret <= MNL_CB_STOP) + break; + } + if (ret == -1) { + local_sockets_log(ls, "mnl_socket_recvfrom"); + return false; + } + + return true; +} +#endif // HAVE_LIBMNL + +static inline bool local_sockets_read_proc_net_x(LS_STATE *ls, const char *filename, uint16_t family, uint16_t protocol) { + static bool is_space[256] = { + [':'] = true, + [' '] = true, + }; + + if(family != AF_INET && family != AF_INET6) + return false; + + FILE *fp = fopen(filename, "r"); + if (fp == NULL) + return false; + + char *line = malloc(1024); // no mallocz() here because getline() may resize + if(!line) { + fclose(fp); + return false; + } + + size_t len = 1024; + ssize_t read; + + ssize_t min_line_length = (family == AF_INET) ? 105 : 155; + size_t counter = 0; + + // Read line by line + while ((read = getline(&line, &len, fp)) != -1) { + if(counter++ == 0) continue; // skip the first line + + if(read < min_line_length) { + local_sockets_log(ls, "too small line No %zu of filename '%s': %s", counter, filename, line); + continue; + } + + LOCAL_SOCKET n = { + .direction = SOCKET_DIRECTION_NONE, + .local = { + .family = family, + .protocol = protocol, + }, + .remote = { + .family = family, + .protocol = protocol, + }, + .uid = UID_UNSET, + }; + + char *words[32]; + size_t num_words = quoted_strings_splitter(line, words, 32, is_space); + // char *sl_txt = get_word(words, num_words, 0); + char *local_ip_txt = get_word(words, num_words, 1); + char *local_port_txt = get_word(words, num_words, 2); + char *remote_ip_txt = get_word(words, num_words, 3); + char *remote_port_txt = get_word(words, num_words, 4); + char *state_txt = get_word(words, num_words, 5); + char *tx_queue_txt = get_word(words, num_words, 6); + char *rx_queue_txt = get_word(words, num_words, 7); + char *tr_txt = get_word(words, num_words, 8); + char *tm_when_txt = get_word(words, num_words, 9); + char *retrans_txt = get_word(words, num_words, 10); + char *uid_txt = get_word(words, num_words, 11); + // char *timeout_txt = get_word(words, num_words, 12); + char *inode_txt = get_word(words, num_words, 13); + + if(!local_ip_txt || !local_port_txt || !remote_ip_txt || !remote_port_txt || !state_txt || + !tx_queue_txt || !rx_queue_txt || !tr_txt || !tm_when_txt || !retrans_txt || !uid_txt || !inode_txt) { + local_sockets_log(ls, "cannot parse ipv4 line No %zu of filename '%s'", counter, filename); + continue; + } + + n.local.port = str2uint32_hex(local_port_txt, NULL); + n.remote.port = str2uint32_hex(remote_port_txt, NULL); + n.state = str2uint32_hex(state_txt, NULL); + n.wqueue = str2uint32_hex(tx_queue_txt, NULL); + n.rqueue = str2uint32_hex(rx_queue_txt, NULL); + n.timer = str2uint32_hex(tr_txt, NULL); + n.expires = str2uint32_hex(tm_when_txt, NULL); + n.retransmits = str2uint32_hex(retrans_txt, NULL); + n.uid = str2uint32_t(uid_txt, NULL); + n.inode = str2uint64_t(inode_txt, NULL); + + if(family == AF_INET) { + n.local.ip.ipv4 = str2uint32_hex(local_ip_txt, NULL); + n.remote.ip.ipv4 = str2uint32_hex(remote_ip_txt, NULL); + } + else if(family == AF_INET6) { + ipv6_to_in6_addr(local_ip_txt, &n.local.ip.ipv6); + ipv6_to_in6_addr(remote_ip_txt, &n.remote.ip.ipv6); + } + + local_sockets_add_socket(ls, &n); + } + + fclose(fp); + + if (line) + free(line); // no freez() here because getline() may resize + + return true; +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline void local_sockets_detect_directions(LS_STATE *ls) { + for(SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_first_read_only_LOCAL_SOCKET(&ls->sockets_hashtable); + sl ; + sl = simple_hashtable_next_read_only_LOCAL_SOCKET(&ls->sockets_hashtable, sl)) { + LOCAL_SOCKET *n = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if (!n) continue; + + if ((n->direction & (SOCKET_DIRECTION_INBOUND|SOCKET_DIRECTION_OUTBOUND)) != + (SOCKET_DIRECTION_INBOUND|SOCKET_DIRECTION_OUTBOUND)) + continue; + + // check if the local port is one of our listening ports + { + SIMPLE_HASHTABLE_SLOT_LISTENING_PORT *sl_port = + simple_hashtable_get_slot_LISTENING_PORT(&ls->listening_ports_hashtable, n->local_port_hash, &n->local_port_key, false); + + struct local_port *port = SIMPLE_HASHTABLE_SLOT_DATA(sl_port); // do not reference this pointer - is invalid + if(port) { + // the local port of this socket is a port we listen to + n->direction &= ~SOCKET_DIRECTION_OUTBOUND; + } + else + n->direction &= ~SOCKET_DIRECTION_INBOUND; + } + + // check if the remote IP is one of our local IPs + { + SIMPLE_HASHTABLE_SLOT_LOCAL_IP *sl_ip = + simple_hashtable_get_slot_LOCAL_IP(&ls->local_ips_hashtable, n->remote_ip_hash, &n->remote.ip, false); + + union ipv46 *d = SIMPLE_HASHTABLE_SLOT_DATA(sl_ip); + if (d) { + // the remote IP of this socket is one of our local IPs + if(n->direction & SOCKET_DIRECTION_INBOUND) { + n->direction &= ~SOCKET_DIRECTION_INBOUND; + n->direction |= SOCKET_DIRECTION_LOCAL_INBOUND; + } + else if(n->direction & SOCKET_DIRECTION_OUTBOUND) { + n->direction &= ~SOCKET_DIRECTION_OUTBOUND; + n->direction |= SOCKET_DIRECTION_LOCAL_OUTBOUND; + } + continue; + } + } + + if (local_sockets_is_loopback_address(&n->local) || + local_sockets_is_loopback_address(&n->remote)) { + // both IP addresses are loopback + if(n->direction & SOCKET_DIRECTION_INBOUND) { + n->direction &= ~SOCKET_DIRECTION_INBOUND; + n->direction |= SOCKET_DIRECTION_LOCAL_INBOUND; + } + else if(n->direction & SOCKET_DIRECTION_OUTBOUND) { + n->direction &= ~SOCKET_DIRECTION_OUTBOUND; + n->direction |= SOCKET_DIRECTION_LOCAL_OUTBOUND; + } + } + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline void local_sockets_init(LS_STATE *ls) { + simple_hashtable_init_NET_NS(&ls->ns_hashtable, 1024); + simple_hashtable_init_PID_SOCKET(&ls->pid_sockets_hashtable, 65535); + simple_hashtable_init_LOCAL_SOCKET(&ls->sockets_hashtable, 65535); + simple_hashtable_init_LOCAL_IP(&ls->local_ips_hashtable, 4096); + simple_hashtable_init_LISTENING_PORT(&ls->listening_ports_hashtable, 4096); + + ls->local_socket_aral = aral_create( + "local-sockets", + sizeof(LOCAL_SOCKET), + 65536, + 65536, + NULL, NULL, NULL, false, true); + + ls->pid_socket_aral = aral_create( + "pid-sockets", + sizeof(struct pid_socket), + 65536, + 65536, + NULL, NULL, NULL, false, true); +} + +static inline void local_sockets_cleanup(LS_STATE *ls) { + // free the sockets hashtable data + for(SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_first_read_only_LOCAL_SOCKET(&ls->sockets_hashtable); + sl; + sl = simple_hashtable_next_read_only_LOCAL_SOCKET(&ls->sockets_hashtable, sl)) { + LOCAL_SOCKET *n = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(!n) continue; + + string_freez(n->cmdline); + aral_freez(ls->local_socket_aral, n); + } + + // free the pid_socket hashtable data + for(SIMPLE_HASHTABLE_SLOT_PID_SOCKET *sl = simple_hashtable_first_read_only_PID_SOCKET(&ls->pid_sockets_hashtable); + sl; + sl = simple_hashtable_next_read_only_PID_SOCKET(&ls->pid_sockets_hashtable, sl)) { + struct pid_socket *ps = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(!ps) continue; + + freez(ps->cmdline); + aral_freez(ls->pid_socket_aral, ps); + } + + // free the hashtable + simple_hashtable_destroy_NET_NS(&ls->ns_hashtable); + simple_hashtable_destroy_PID_SOCKET(&ls->pid_sockets_hashtable); + simple_hashtable_destroy_LISTENING_PORT(&ls->listening_ports_hashtable); + simple_hashtable_destroy_LOCAL_IP(&ls->local_ips_hashtable); + simple_hashtable_destroy_LOCAL_SOCKET(&ls->sockets_hashtable); + + aral_destroy(ls->local_socket_aral); + aral_destroy(ls->pid_socket_aral); +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline void local_sockets_do_family_protocol(LS_STATE *ls, const char *filename, uint16_t family, uint16_t protocol) { +#ifdef HAVE_LIBMNL + if(ls->use_nl) { + ls->use_nl = local_sockets_netlink_get_sockets(ls, family, protocol); + + if(ls->use_nl) + return; + } +#endif + + local_sockets_read_proc_net_x(ls, filename, family, protocol); +} + +static inline void local_sockets_read_sockets_from_proc(LS_STATE *ls) { + char path[FILENAME_MAX + 1]; + + if(ls->config.namespaces) { + snprintfz(path, sizeof(path), "%s/proc/self/ns/net", ls->config.host_prefix); + local_sockets_read_proc_inode_link(ls, path, &ls->proc_self_net_ns_inode, "net"); + } + + if(ls->config.cmdline || ls->config.comm || ls->config.pid || ls->config.namespaces) { + snprintfz(path, sizeof(path), "%s/proc", ls->config.host_prefix); + local_sockets_find_all_sockets_in_proc(ls, path); + } + + if(ls->config.tcp4) { + snprintfz(path, sizeof(path), "%s/proc/net/tcp", ls->config.host_prefix); + local_sockets_do_family_protocol(ls, path, AF_INET, IPPROTO_TCP); + } + + if(ls->config.udp4) { + snprintfz(path, sizeof(path), "%s/proc/net/udp", ls->config.host_prefix); + local_sockets_do_family_protocol(ls, path, AF_INET, IPPROTO_UDP); + } + + if(ls->config.tcp6) { + snprintfz(path, sizeof(path), "%s/proc/net/tcp6", ls->config.host_prefix); + local_sockets_do_family_protocol(ls, path, AF_INET6, IPPROTO_TCP); + } + + if(ls->config.udp6) { + snprintfz(path, sizeof(path), "%s/proc/net/udp6", ls->config.host_prefix); + local_sockets_do_family_protocol(ls, path, AF_INET6, IPPROTO_UDP); + } +} + +// -------------------------------------------------------------------------------------------------------------------- + +struct local_sockets_child_work { + int fd; + uint64_t net_ns_inode; +}; + +static inline void local_sockets_send_to_parent(struct local_socket_state *ls __maybe_unused, struct local_socket *n, void *data) { + struct local_sockets_child_work *cw = data; + int fd = cw->fd; + + if(n->net_ns_inode != cw->net_ns_inode) + return; + + // local_sockets_log(ls, "child is sending inode %"PRIu64" of namespace %"PRIu64, n->inode, n->net_ns_inode); + + if(write(fd, n, sizeof(*n)) != sizeof(*n)) + local_sockets_log(ls, "failed to write local socket to pipe"); + + size_t len = n->cmdline ? string_strlen(n->cmdline) + 1 : 0; + if(write(fd, &len, sizeof(len)) != sizeof(len)) + local_sockets_log(ls, "failed to write cmdline length to pipe"); + + if(len) + if(write(fd, string2str(n->cmdline), len) != (ssize_t)len) + local_sockets_log(ls, "failed to write cmdline to pipe"); +} + +static inline bool local_sockets_get_namespace_sockets(LS_STATE *ls, struct pid_socket *ps, pid_t *pid) { + char filename[1024]; + snprintfz(filename, sizeof(filename), "%s/proc/%d/ns/net", ls->config.host_prefix, ps->pid); + + // verify the pid is in the target namespace + int fd = open(filename, O_RDONLY | O_CLOEXEC); + if (fd == -1) { + local_sockets_log(ls, "cannot open file '%s'", filename); + return false; + } + + struct stat statbuf; + if (fstat(fd, &statbuf) == -1) { + close(fd); + local_sockets_log(ls, "failed to get file statistics for '%s'", filename); + return false; + } + + if (statbuf.st_ino != ps->net_ns_inode) { + close(fd); + local_sockets_log(ls, "pid %d is not in the wanted network namespace", ps->pid); + return false; + } + + int pipefd[2]; + if (pipe(pipefd) != 0) { + local_sockets_log(ls, "cannot create pipe"); + close(fd); + return false; + } + + *pid = fork(); + if (*pid == 0) { + // Child process + close(pipefd[0]); + + // local_sockets_log(ls, "child is here for inode %"PRIu64" and namespace %"PRIu64, ps->inode, ps->net_ns_inode); + + struct local_sockets_child_work cw = { + .net_ns_inode = ps->net_ns_inode, + .fd = pipefd[1], + }; + + ls->config.host_prefix = ""; // we need the /proc of the container + ls->config.cb = local_sockets_send_to_parent; + ls->config.data = &cw; + ls->config.cmdline = false; // we have these already + ls->config.comm = false; // we have these already + ls->config.pid = false; // we have these already + ls->config.namespaces = false; + ls->proc_self_net_ns_inode = ps->net_ns_inode; + + + // switch namespace + if (setns(fd, CLONE_NEWNET) == -1) { + local_sockets_log(ls, "failed to switch network namespace at child process"); + exit(EXIT_FAILURE); + } + +#ifdef HAVE_LIBMNL + local_sockets_netlink_cleanup(ls); + local_sockets_netlink_init(ls); +#endif + + // read all sockets from /proc + local_sockets_read_sockets_from_proc(ls); + + // send all sockets to parent + local_sockets_foreach_local_socket_call_cb(ls); + + // send the terminating socket + struct local_socket zero = { + .net_ns_inode = ps->net_ns_inode, + }; + local_sockets_send_to_parent(ls, &zero, &cw); + +#ifdef HAVE_LIBMNL + local_sockets_netlink_cleanup(ls); +#endif + + close(pipefd[1]); // Close write end of pipe + exit(EXIT_SUCCESS); + } + // parent + + close(fd); + close(pipefd[1]); + + size_t received = 0; + struct local_socket buf; + while(read(pipefd[0], &buf, sizeof(buf)) == sizeof(buf)) { + size_t len = 0; + if(read(pipefd[0], &len, sizeof(len)) != sizeof(len)) + local_sockets_log(ls, "failed to read cmdline length from pipe"); + + if(len) { + char cmdline[len + 1]; + if(read(pipefd[0], cmdline, len) != (ssize_t)len) + local_sockets_log(ls, "failed to read cmdline from pipe"); + else { + cmdline[len] = '\0'; + buf.cmdline = string_strdupz(cmdline); + } + } + else + buf.cmdline = NULL; + + received++; + + struct local_socket zero = { + .net_ns_inode = ps->net_ns_inode, + }; + if(memcmp(&buf, &zero, sizeof(buf)) == 0) { + // the terminator + break; + } + + SIMPLE_HASHTABLE_SLOT_LOCAL_SOCKET *sl = simple_hashtable_get_slot_LOCAL_SOCKET(&ls->sockets_hashtable, buf.inode, &buf, true); + LOCAL_SOCKET *n = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(n) { + string_freez(buf.cmdline); + +// local_sockets_log(ls, +// "ns inode %" PRIu64" (comm: '%s', pid: %u, ns: %"PRIu64") already exists in hashtable (comm: '%s', pid: %u, ns: %"PRIu64") - ignoring duplicate", +// buf.inode, buf.comm, buf.pid, buf.net_ns_inode, n->comm, n->pid, n->net_ns_inode); + continue; + } + else { + n = aral_mallocz(ls->local_socket_aral); + memcpy(n, &buf, sizeof(*n)); + simple_hashtable_set_slot_LOCAL_SOCKET(&ls->sockets_hashtable, sl, n->inode, n); + + local_sockets_index_listening_port(ls, n); + } + } + + close(pipefd[0]); + + return received > 0; +} + +static inline void local_socket_waitpid(LS_STATE *ls, pid_t pid) { + if(!pid) return; + + int status; + waitpid(pid, &status, 0); + + if (WIFEXITED(status) && WEXITSTATUS(status) != 0) + local_sockets_log(ls, "Child exited with status %d", WEXITSTATUS(status)); + else if (WIFSIGNALED(status)) + local_sockets_log(ls, "Child terminated by signal %d", WTERMSIG(status)); +} + +static inline void local_sockets_namespaces(LS_STATE *ls) { + pid_t children[5] = { 0 }; + size_t last_child = 0; + + for(SIMPLE_HASHTABLE_SLOT_NET_NS *sl = simple_hashtable_first_read_only_NET_NS(&ls->ns_hashtable); + sl; + sl = simple_hashtable_next_read_only_NET_NS(&ls->ns_hashtable, sl)) { + uint64_t inode = (uint64_t)SIMPLE_HASHTABLE_SLOT_DATA(sl); + + if(inode == ls->proc_self_net_ns_inode) + continue; + + // find a pid_socket that has this namespace + for(SIMPLE_HASHTABLE_SLOT_PID_SOCKET *sl_pid = simple_hashtable_first_read_only_PID_SOCKET(&ls->pid_sockets_hashtable) ; + sl_pid ; + sl_pid = simple_hashtable_next_read_only_PID_SOCKET(&ls->pid_sockets_hashtable, sl_pid)) { + struct pid_socket *ps = SIMPLE_HASHTABLE_SLOT_DATA(sl_pid); + if(!ps || ps->net_ns_inode != inode) continue; + + if(++last_child >= 5) + last_child = 0; + + local_socket_waitpid(ls, children[last_child]); + children[last_child] = 0; + + // now we have a pid that has the same namespace inode + if(local_sockets_get_namespace_sockets(ls, ps, &children[last_child])) + break; + } + } + + for(size_t i = 0; i < 5 ;i++) + local_socket_waitpid(ls, children[i]); +} + +// -------------------------------------------------------------------------------------------------------------------- + +static inline void local_sockets_process(LS_STATE *ls) { + +#ifdef HAVE_LIBMNL + local_sockets_netlink_init(ls); +#endif + + ls->config.host_prefix = netdata_configured_host_prefix; + + // initialize our hashtables + local_sockets_init(ls); + + // read all sockets from /proc + local_sockets_read_sockets_from_proc(ls); + + // check all socket namespaces + if(ls->config.namespaces) + local_sockets_namespaces(ls); + + // detect the directions of the sockets + if(ls->config.inbound || ls->config.outbound || ls->config.local) + local_sockets_detect_directions(ls); + + // call the callback for each socket + local_sockets_foreach_local_socket_call_cb(ls); + + // free all memory + local_sockets_cleanup(ls); + +#ifdef HAVE_LIBMNL + local_sockets_netlink_cleanup(ls); +#endif +} + +static inline void ipv6_address_to_txt(struct in6_addr *in6_addr, char *dst) { + struct sockaddr_in6 sa = { 0 }; + + sa.sin6_family = AF_INET6; + sa.sin6_port = htons(0); + sa.sin6_addr = *in6_addr; + + // Convert to human-readable format + if (inet_ntop(AF_INET6, &(sa.sin6_addr), dst, INET6_ADDRSTRLEN) == NULL) + *dst = '\0'; +} + +static inline void ipv4_address_to_txt(uint32_t ip, char *dst) { + uint8_t octets[4]; + octets[0] = ip & 0xFF; + octets[1] = (ip >> 8) & 0xFF; + octets[2] = (ip >> 16) & 0xFF; + octets[3] = (ip >> 24) & 0xFF; + sprintf(dst, "%u.%u.%u.%u", octets[0], octets[1], octets[2], octets[3]); +} + +#endif //NETDATA_LOCAL_SOCKETS_H diff --git a/src/libnetdata/maps/system-groups.h b/src/libnetdata/maps/system-groups.h new file mode 100644 index 000000000..fd042cd4e --- /dev/null +++ b/src/libnetdata/maps/system-groups.h @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SYSTEM_GROUPS_H +#define NETDATA_SYSTEM_GROUPS_H + +#include "libnetdata/libnetdata.h" + +// -------------------------------------------------------------------------------------------------------------------- +// hashtable for caching uid to username mappings +// key is the uid, value is username (STRING) + +#define SIMPLE_HASHTABLE_VALUE_TYPE STRING +#define SIMPLE_HASHTABLE_NAME _GROUPNAMES_CACHE +#include "libnetdata/simple_hashtable.h" + +typedef struct groupnames_cache { + SPINLOCK spinlock; + SIMPLE_HASHTABLE_GROUPNAMES_CACHE ht; +} GROUPNAMES_CACHE; + +static inline STRING *system_groupnames_cache_lookup_gid(GROUPNAMES_CACHE *gc, gid_t gid) { + spinlock_lock(&gc->spinlock); + + SIMPLE_HASHTABLE_SLOT_GROUPNAMES_CACHE *sl = simple_hashtable_get_slot_GROUPNAMES_CACHE(&gc->ht, gid, &gid, true); + STRING *g = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(!g) { + char tmp[1024 + 1]; + struct group grp, *result = NULL; + + if (getgrgid_r(gid, &grp, tmp, sizeof(tmp), &result) != 0 || !result || !grp.gr_name || !(*grp.gr_name)) { + char name[50]; + snprintfz(name, sizeof(name), "%u", gid); + g = string_strdupz(name); + } + else + g = string_strdupz(grp.gr_name); + + simple_hashtable_set_slot_GROUPNAMES_CACHE(&gc->ht, sl, gid, g); + } + + g = string_dup(g); + spinlock_unlock(&gc->spinlock); + return g; +} + +static inline GROUPNAMES_CACHE *system_groupnames_cache_init(void) { + GROUPNAMES_CACHE *gc = callocz(1, sizeof(*gc)); + spinlock_init(&gc->spinlock); + simple_hashtable_init_GROUPNAMES_CACHE(&gc->ht, 100); + return gc; +} + +static inline void system_groupnames_cache_destroy(GROUPNAMES_CACHE *gc) { + spinlock_lock(&gc->spinlock); + + for(SIMPLE_HASHTABLE_SLOT_GROUPNAMES_CACHE *sl = simple_hashtable_first_read_only_GROUPNAMES_CACHE(&gc->ht); + sl; + sl = simple_hashtable_next_read_only_GROUPNAMES_CACHE(&gc->ht, sl)) { + STRING *u = SIMPLE_HASHTABLE_SLOT_DATA(sl); + string_freez(u); + } + + simple_hashtable_destroy_GROUPNAMES_CACHE(&gc->ht); + freez(gc); +} + +#endif //NETDATA_SYSTEM_GROUPS_H diff --git a/src/libnetdata/maps/system-users.h b/src/libnetdata/maps/system-users.h new file mode 100644 index 000000000..5f7dfae1a --- /dev/null +++ b/src/libnetdata/maps/system-users.h @@ -0,0 +1,67 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SYSTEM_USERS_H +#define NETDATA_SYSTEM_USERS_H + +#include "libnetdata/libnetdata.h" + +// -------------------------------------------------------------------------------------------------------------------- +// hashtable for caching uid to username mappings +// key is the uid, value is username (STRING) + +#define SIMPLE_HASHTABLE_VALUE_TYPE STRING +#define SIMPLE_HASHTABLE_NAME _USERNAMES_CACHE +#include "libnetdata/simple_hashtable.h" + +typedef struct usernames_cache { + SPINLOCK spinlock; + SIMPLE_HASHTABLE_USERNAMES_CACHE ht; +} USERNAMES_CACHE; + +static inline STRING *system_usernames_cache_lookup_uid(USERNAMES_CACHE *uc, uid_t uid) { + spinlock_lock(&uc->spinlock); + + SIMPLE_HASHTABLE_SLOT_USERNAMES_CACHE *sl = simple_hashtable_get_slot_USERNAMES_CACHE(&uc->ht, uid, &uid, true); + STRING *u = SIMPLE_HASHTABLE_SLOT_DATA(sl); + if(!u) { + char tmp[1024 + 1]; + struct passwd pw, *result = NULL; + + if (getpwuid_r(uid, &pw, tmp, sizeof(tmp), &result) != 0 || !result || !pw.pw_name || !(*pw.pw_name)) { + char name[50]; + snprintfz(name, sizeof(name), "%u", uid); + u = string_strdupz(name); + } + else + u = string_strdupz(pw.pw_name); + + simple_hashtable_set_slot_USERNAMES_CACHE(&uc->ht, sl, uid, u); + } + + u = string_dup(u); + spinlock_unlock(&uc->spinlock); + return u; +} + +static inline USERNAMES_CACHE *system_usernames_cache_init(void) { + USERNAMES_CACHE *uc = callocz(1, sizeof(*uc)); + spinlock_init(&uc->spinlock); + simple_hashtable_init_USERNAMES_CACHE(&uc->ht, 100); + return uc; +} + +static inline void system_usernames_cache_destroy(USERNAMES_CACHE *uc) { + spinlock_lock(&uc->spinlock); + + for(SIMPLE_HASHTABLE_SLOT_USERNAMES_CACHE *sl = simple_hashtable_first_read_only_USERNAMES_CACHE(&uc->ht); + sl; + sl = simple_hashtable_next_read_only_USERNAMES_CACHE(&uc->ht, sl)) { + STRING *u = SIMPLE_HASHTABLE_SLOT_DATA(sl); + string_freez(u); + } + + simple_hashtable_destroy_USERNAMES_CACHE(&uc->ht); + freez(uc); +} + +#endif //NETDATA_SYSTEM_USERS_H diff --git a/src/libnetdata/onewayalloc/README.md b/src/libnetdata/onewayalloc/README.md new file mode 100644 index 000000000..082085db0 --- /dev/null +++ b/src/libnetdata/onewayalloc/README.md @@ -0,0 +1,75 @@ +<!-- +title: "One Way Allocator" +custom_edit_url: "https://github.com/netdata/netdata/edit/master/src/libnetdata/onewayalloc/README.md" +sidebar_label: "One way allocator" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# One Way Allocator + +This is a very fast single-threaded-only memory allocator, that minimized system calls +when a lot of memory allocations needs to be made to perform a task, which all of them +can be freed together when the task finishes. + +It has been designed to be used for netdata context queries. + +For netdata to perform a context query, it builds a virtual chart, a chart that contains +all the dimensions of the charts having the same context. This process requires allocating +several structures for each of the dimensions to attach them to the virtual chart. All +these data can be freed immediately after the query finishes. + +## How it works + +1. The caller calls `ONEWAYALLOC *owa = onewayalloc_create(sizehint)` to create an OWA. + Internally this allocates the first memory buffer with size >= `sizehint`. + If `sizehint` is zero, it will allocate 1 hardware page (usually 4kb). + No need to check for success or failure. As with `mallocz()` in netdata, a `fatal()` + will be called if the allocation fails - although this will never fail, since Linux + does not really check if there is memory available for `mmap()` calls. + +2. The caller can then perform any number of the following calls to acquire memory: + - `onewayalloc_mallocz(owa, size)`, similar to `mallocz()` + - `onewayalloc_callocz(owa, nmemb, size)`, similar to `callocz()` + - `onewayalloc_strdupz(owa, string)`, similar to `strdupz()` + - `onewayalloc_memdupz(owa, ptr, size)`, similar to `mallocz()` and then `memcpy()` + +3. Once the caller has done all the work with the allocated buffers, all memory allocated + can be freed with `onewayalloc_destroy(owa)`. + +## How faster it is? + +On modern hardware, for any single query the performance improvement is marginal and not +noticeable at all. + +We performed the following tests using the same huge context query (1000 charts, +100 dimensions each = 100k dimensions) + +1. using `mallocz()`, 1 caller, 256 queries (sequential) +2. using `mallocz()`, 256 callers, 1 query each (parallel) +3. using `OWA`, 1 caller, 256 queries (sequential) +4. using `OWA`, 256 callers, 1 query each (parallel) + +Netdata was configured to use 24 web threads on the 24 core server we used. + +The results are as follows: + +### sequential test + +branch|transactions|time to complete|transaction rate|average response time|min response time|max response time +:---:|:---:|:---:|:---:|:---:|:---:|:---:| +`malloc()`|256|322.35s|0.79/sec|1.26s|1.01s|1.87s +`OWA`|256|310.19s|0.83/sec|1.21s|1.04s|1.63s + +For a single query, the improvement is just marginal and not noticeable at all. + +### parallel test + +branch|transactions|time to complete|transaction rate|average response time|min response time|max response time +:---:|:---:|:---:|:---:|:---:|:---:|:---:| +`malloc()`|256|84.72s|3.02/sec|68.43s|50.20s|84.71s +`OWA`|256|39.35s|6.51/sec|34.48s|20.55s|39.34s + +For parallel workload, like the one executed by netdata.cloud, `OWA` provides a 54% overall speed improvement (more than double the overall +user-experienced speed, including the data query itself). diff --git a/libnetdata/onewayalloc/onewayalloc.c b/src/libnetdata/onewayalloc/onewayalloc.c index 05c9f2a9d..98ab1835b 100644 --- a/libnetdata/onewayalloc/onewayalloc.c +++ b/src/libnetdata/onewayalloc/onewayalloc.c @@ -178,8 +178,6 @@ void onewayalloc_freez(ONEWAYALLOC *owa __maybe_unused, const void *ptr __maybe_ // let's free it with the system allocator netdata_log_error("ONEWAYALLOC: request to free address 0x%p that is not allocated by this OWA", ptr); #endif - - return; } void *onewayalloc_doublesize(ONEWAYALLOC *owa, const void *src, size_t oldsize) { diff --git a/libnetdata/onewayalloc/onewayalloc.h b/src/libnetdata/onewayalloc/onewayalloc.h index a415b063b..a415b063b 100644 --- a/libnetdata/onewayalloc/onewayalloc.h +++ b/src/libnetdata/onewayalloc/onewayalloc.h diff --git a/libnetdata/os.c b/src/libnetdata/os.c index e4624be69..51001aa25 100644 --- a/libnetdata/os.c +++ b/src/libnetdata/os.c @@ -151,11 +151,11 @@ unsigned long read_cpuset_cpus(const char *filename, long system_cpus) { static size_t buf_size = 0; if(!buf) { - buf_size = 100U + 6 * system_cpus; // taken from kernel/cgroup/cpuset.c - buf = mallocz(buf_size + 1); + buf_size = 100U + 6 * system_cpus + 1; // taken from kernel/cgroup/cpuset.c + buf = mallocz(buf_size); } - int ret = read_file(filename, buf, buf_size); + int ret = read_txt_file(filename, buf, buf_size); if(!ret) { char *s = buf; diff --git a/libnetdata/os.h b/src/libnetdata/os.h index 197548b0d..0e4a36963 100644 --- a/libnetdata/os.h +++ b/src/libnetdata/os.h @@ -37,7 +37,7 @@ int getsysctl(const char *name, int *mib, size_t miblen, void *ptr, size_t *len) #if __APPLE__ #include <sys/sysctl.h> -#include "endian.h" +#include "byteorder.h" #define GETSYSCTL_BY_NAME(name, var) getsysctl_by_name(name, &(var), sizeof(var)) int getsysctl_by_name(const char *name, void *ptr, size_t len); diff --git a/src/libnetdata/popen/README.md b/src/libnetdata/popen/README.md new file mode 100644 index 000000000..ca4877c1a --- /dev/null +++ b/src/libnetdata/popen/README.md @@ -0,0 +1,15 @@ +<!-- +title: "popen" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/popen/README.md +sidebar_label: "popen" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# popen + +Process management library + + + diff --git a/libnetdata/popen/popen.c b/src/libnetdata/popen/popen.c index 5f8bd2b4a..5f8bd2b4a 100644 --- a/libnetdata/popen/popen.c +++ b/src/libnetdata/popen/popen.c diff --git a/libnetdata/popen/popen.h b/src/libnetdata/popen/popen.h index 4f86158bc..4f86158bc 100644 --- a/libnetdata/popen/popen.h +++ b/src/libnetdata/popen/popen.h diff --git a/src/libnetdata/procfile/README.md b/src/libnetdata/procfile/README.md new file mode 100644 index 000000000..9f93f8a4f --- /dev/null +++ b/src/libnetdata/procfile/README.md @@ -0,0 +1,71 @@ +<!-- +title: "PROCFILE" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/procfile/README.md +sidebar_label: "Procfile" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# PROCFILE + +procfile is a library for reading text data files (i.e `/proc` files) in the fastest possible way. + +## How it works + +The library automatically adapts (through the iterations) its memory so that each file +is read with single `read()` call. + +Then the library splits the file into words, using the supplied separators. +The library also supported quoted words (i.e. strings within of which the separators are ignored). + +### Initialization + +Initially the caller: + +- calls `procfile_open()` to open the file and allocate the structures needed. + +### Iterations + +For each iteration, the caller: + +- calls `procfile_readall()` to read updated contents. + This call also rewinds (`lseek()` to 0) before reading it. + + For every file, a [BUFFER](https://github.com/netdata/netdata/blob/master/src/libnetdata/buffer/README.md) is used that is automatically adjusted to fit the entire + file contents of the file. So the file is read with a single `read()` call (providing atomicity / consistency when + the data are read from the kernel). + + Once the data are read, 2 arrays of pointers are updated: + + - a `words` array, pointing to each word in the data read + - a `lines` array, pointing to the first word for each line + + This is highly optimized. Both arrays are automatically adjusted to + fit all contents and are updated in a single pass on the data. + + The library provides a number of macros: + + - `procfile_lines()` returns the # of lines read + - `procfile_linewords()` returns the # of words in the given line + - `procfile_word()` returns a pointer the given word # + - `procfile_line()` returns a pointer to the first word of the given line # + - `procfile_lineword()` returns a pointer to the given word # of the given line # + +### Cleanup + +When the caller exits: + +- calls `procfile_free()` to close the file and free all memory used. + +### Performance + +- a **raspberry Pi 1** (the oldest single core one) can process 5.000+ `/proc` files per second. +- a **J1900 Celeron** processor can process 23.000+ `/proc` files per second per core. + +To achieve this kind of performance, the library tries to work in batches so that the code +and the data are inside the processor's caches. + +This library is extensively used in Netdata and its plugins. + + diff --git a/libnetdata/procfile/procfile.c b/src/libnetdata/procfile/procfile.c index 0bc731d68..d9ebf4c93 100644 --- a/libnetdata/procfile/procfile.c +++ b/src/libnetdata/procfile/procfile.c @@ -8,7 +8,7 @@ #define PFLINES_INCREASE_STEP 200 #define PROCFILE_INCREMENT_BUFFER 4096 -int procfile_open_flags = O_RDONLY; +int procfile_open_flags = O_RDONLY | O_CLOEXEC; int procfile_adaptive_initial_allocation = 0; diff --git a/libnetdata/procfile/procfile.h b/src/libnetdata/procfile/procfile.h index 8db5b45f4..8db5b45f4 100644 --- a/libnetdata/procfile/procfile.h +++ b/src/libnetdata/procfile/procfile.h diff --git a/web/rtc/README.md b/src/libnetdata/query_progress/README.md index e69de29bb..e69de29bb 100644 --- a/web/rtc/README.md +++ b/src/libnetdata/query_progress/README.md diff --git a/src/libnetdata/query_progress/progress.c b/src/libnetdata/query_progress/progress.c new file mode 100644 index 000000000..4ddb45135 --- /dev/null +++ b/src/libnetdata/query_progress/progress.c @@ -0,0 +1,660 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "progress.h" + +#define PROGRESS_CACHE_SIZE 200 + +// ---------------------------------------------------------------------------- +// hashtable for HASHED_KEY + +// cleanup hashtable defines +#include "../simple_hashtable_undef.h" + +struct query; +#define SIMPLE_HASHTABLE_VALUE_TYPE struct query +#define SIMPLE_HASHTABLE_KEY_TYPE uuid_t +#define SIMPLE_HASHTABLE_NAME _QUERY +#define SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION query_transaction +#define SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION query_compare_keys +#include "../simple_hashtable.h" + +// ---------------------------------------------------------------------------- + +typedef struct query { + uuid_t transaction; + + BUFFER *query; + BUFFER *payload; + BUFFER *client; + + usec_t started_ut; + usec_t finished_ut; + + HTTP_REQUEST_MODE mode; + HTTP_ACL acl; + + uint32_t sent_size; + uint32_t response_size; + short response_code; + + bool indexed; + + uint32_t updates; + + usec_t duration_ut; + size_t all; + size_t done; + + struct query *prev, *next; +} QUERY_PROGRESS; + +static inline uuid_t *query_transaction(QUERY_PROGRESS *qp) { + return qp ? &qp->transaction : NULL; +} + +static inline bool query_compare_keys(uuid_t *t1, uuid_t *t2) { + if(t1 == t2 || (t1 && t2 && memcmp(t1, t2, sizeof(uuid_t)) == 0)) + return true; + + return false; +} + +static struct progress { + SPINLOCK spinlock; + bool initialized; + + struct { + size_t available; + QUERY_PROGRESS *list; + } cache; + + SIMPLE_HASHTABLE_QUERY hashtable; + +} progress = { + .initialized = false, + .spinlock = NETDATA_SPINLOCK_INITIALIZER, +}; + +SIMPLE_HASHTABLE_HASH query_hash(uuid_t *transaction) { + struct uuid_hi_lo_t { + uint64_t hi; + uint64_t lo; + } *parts = (struct uuid_hi_lo_t *)transaction; + + return parts->lo; +} + +static void query_progress_init_unsafe(void) { + if(!progress.initialized) { + simple_hashtable_init_QUERY(&progress.hashtable, PROGRESS_CACHE_SIZE * 4); + progress.initialized = true; + } +} + +// ---------------------------------------------------------------------------- + +static inline QUERY_PROGRESS *query_progress_find_in_hashtable_unsafe(uuid_t *transaction) { + SIMPLE_HASHTABLE_HASH hash = query_hash(transaction); + SIMPLE_HASHTABLE_SLOT_QUERY *slot = simple_hashtable_get_slot_QUERY(&progress.hashtable, hash, transaction, true); + QUERY_PROGRESS *qp = SIMPLE_HASHTABLE_SLOT_DATA(slot); + + assert(!qp || qp->indexed); + + return qp; +} + +static inline void query_progress_add_to_hashtable_unsafe(QUERY_PROGRESS *qp) { + assert(!qp->indexed); + + SIMPLE_HASHTABLE_HASH hash = query_hash(&qp->transaction); + SIMPLE_HASHTABLE_SLOT_QUERY *slot = + simple_hashtable_get_slot_QUERY(&progress.hashtable, hash, &qp->transaction, true); + + internal_fatal(SIMPLE_HASHTABLE_SLOT_DATA(slot) != NULL && SIMPLE_HASHTABLE_SLOT_DATA(slot) != qp, + "Attempt to overwrite a progress slot, with another value"); + + simple_hashtable_set_slot_QUERY(&progress.hashtable, slot, hash, qp); + + qp->indexed = true; +} + +static inline void query_progress_remove_from_hashtable_unsafe(QUERY_PROGRESS *qp) { + assert(qp->indexed); + + SIMPLE_HASHTABLE_HASH hash = query_hash(&qp->transaction); + SIMPLE_HASHTABLE_SLOT_QUERY *slot = + simple_hashtable_get_slot_QUERY(&progress.hashtable, hash, &qp->transaction, true); + + if(SIMPLE_HASHTABLE_SLOT_DATA(slot) == qp) + simple_hashtable_del_slot_QUERY(&progress.hashtable, slot); + else + internal_fatal(SIMPLE_HASHTABLE_SLOT_DATA(slot) != NULL, + "Attempt to remove from the hashtable a progress slot with a different value"); + + qp->indexed = false; +} + +// ---------------------------------------------------------------------------- + +static QUERY_PROGRESS *query_progress_alloc(uuid_t *transaction) { + QUERY_PROGRESS *qp; + qp = callocz(1, sizeof(*qp)); + uuid_copy(qp->transaction, *transaction); + qp->query = buffer_create(0, NULL); + qp->payload = buffer_create(0, NULL); + qp->client = buffer_create(0, NULL); + return qp; +} + +static void query_progress_free(QUERY_PROGRESS *qp) { + if(!qp) return; + + buffer_free(qp->query); + buffer_free(qp->payload); + buffer_free(qp->client); + freez(qp); +} + +static void query_progress_cleanup_to_reuse(QUERY_PROGRESS *qp, uuid_t *transaction) { + assert(qp && qp->prev == NULL && qp->next == NULL); + assert(!transaction || !qp->indexed); + + buffer_flush(qp->query); + buffer_flush(qp->payload); + buffer_flush(qp->client); + qp->started_ut = qp->finished_ut = qp->duration_ut = 0; + qp->all = qp->done = qp->updates = 0; + qp->acl = 0; + qp->next = qp->prev = NULL; + qp->response_size = qp->sent_size = 0; + qp->response_code = 0; + + if(transaction) + uuid_copy(qp->transaction, *transaction); +} + +static inline void query_progress_update(QUERY_PROGRESS *qp, usec_t started_ut, HTTP_REQUEST_MODE mode, HTTP_ACL acl, const char *query, BUFFER *payload, const char *client) { + qp->mode = mode; + qp->acl = acl; + qp->started_ut = started_ut ? started_ut : now_realtime_usec(); + qp->finished_ut = 0; + qp->duration_ut = 0; + qp->response_size = 0; + qp->sent_size = 0; + qp->response_code = 0; + + if(query && *query && !buffer_strlen(qp->query)) + buffer_strcat(qp->query, query); + + if(payload && !buffer_strlen(qp->payload)) + buffer_copy(qp->payload, payload); + + if(client && *client && !buffer_strlen(qp->client)) + buffer_strcat(qp->client, client); +} + +// ---------------------------------------------------------------------------- + +static inline void query_progress_link_to_cache_unsafe(QUERY_PROGRESS *qp) { + assert(!qp->prev && !qp->next); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(progress.cache.list, qp, prev, next); + progress.cache.available++; +} + +static inline void query_progress_unlink_from_cache_unsafe(QUERY_PROGRESS *qp) { + assert(qp->prev); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(progress.cache.list, qp, prev, next); + progress.cache.available--; +} + +// ---------------------------------------------------------------------------- +// Progress API + +void query_progress_start_or_update(uuid_t *transaction, usec_t started_ut, HTTP_REQUEST_MODE mode, HTTP_ACL acl, const char *query, BUFFER *payload, const char *client) { + if(!transaction) + return; + + spinlock_lock(&progress.spinlock); + query_progress_init_unsafe(); + + QUERY_PROGRESS *qp = query_progress_find_in_hashtable_unsafe(transaction); + if(qp) { + // the transaction is already there + if(qp->prev) { + // reusing a finished transaction + query_progress_unlink_from_cache_unsafe(qp); + query_progress_cleanup_to_reuse(qp, NULL); + } + } + else if (progress.cache.available >= PROGRESS_CACHE_SIZE && progress.cache.list) { + // transaction is not found - get the first available, if any. + qp = progress.cache.list; + query_progress_unlink_from_cache_unsafe(qp); + + query_progress_remove_from_hashtable_unsafe(qp); + query_progress_cleanup_to_reuse(qp, transaction); + } + else { + qp = query_progress_alloc(transaction); + } + + query_progress_update(qp, started_ut, mode, acl, query, payload, client); + + if(!qp->indexed) + query_progress_add_to_hashtable_unsafe(qp); + + spinlock_unlock(&progress.spinlock); +} + +void query_progress_set_finish_line(uuid_t *transaction, size_t all) { + if(!transaction) + return; + + spinlock_lock(&progress.spinlock); + query_progress_init_unsafe(); + + QUERY_PROGRESS *qp = query_progress_find_in_hashtable_unsafe(transaction); + if(qp) { + qp->updates++; + + if(all > qp->all) + qp->all = all; + } + + spinlock_unlock(&progress.spinlock); +} + +void query_progress_done_step(uuid_t *transaction, size_t done) { + if(!transaction) + return; + + spinlock_lock(&progress.spinlock); + query_progress_init_unsafe(); + + QUERY_PROGRESS *qp = query_progress_find_in_hashtable_unsafe(transaction); + if(qp) { + qp->updates++; + qp->done += done; + } + + spinlock_unlock(&progress.spinlock); +} + +void query_progress_finished(uuid_t *transaction, usec_t finished_ut, short int response_code, usec_t duration_ut, size_t response_size, size_t sent_size) { + if(!transaction) + return; + + spinlock_lock(&progress.spinlock); + query_progress_init_unsafe(); + + // find this transaction to update it + { + QUERY_PROGRESS *qp = query_progress_find_in_hashtable_unsafe(transaction); + if(qp) { + qp->sent_size = sent_size; + qp->response_size = response_size; + qp->response_code = response_code; + qp->duration_ut = duration_ut; + qp->finished_ut = finished_ut ? finished_ut : now_realtime_usec(); + + if(qp->prev) + query_progress_unlink_from_cache_unsafe(qp); + + query_progress_link_to_cache_unsafe(qp); + } + } + + // find an item to free + { + QUERY_PROGRESS *qp_to_free = NULL; + if(progress.cache.available > PROGRESS_CACHE_SIZE && progress.cache.list) { + qp_to_free = progress.cache.list; + query_progress_unlink_from_cache_unsafe(qp_to_free); + query_progress_remove_from_hashtable_unsafe(qp_to_free); + } + + spinlock_unlock(&progress.spinlock); + + query_progress_free(qp_to_free); + } +} + +void query_progress_functions_update(uuid_t *transaction, size_t done, size_t all) { + // functions send to the total 'done', not the increment + + if(!transaction) + return; + + spinlock_lock(&progress.spinlock); + query_progress_init_unsafe(); + + QUERY_PROGRESS *qp = query_progress_find_in_hashtable_unsafe(transaction); + + if(qp) { + if(all) + qp->all = all; + + if(done) + qp->done = done; + + qp->updates++; + } + + spinlock_unlock(&progress.spinlock); +} + +// ---------------------------------------------------------------------------- +// /api/v2/progress - to get the progress of a transaction + +int web_api_v2_report_progress(uuid_t *transaction, BUFFER *wb) { + buffer_flush(wb); + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_MINIFY); + + if(!transaction) { + buffer_json_member_add_uint64(wb, "status", 400); + buffer_json_member_add_string(wb, "message", "No transaction given"); + buffer_json_finalize(wb); + return 400; + } + + spinlock_lock(&progress.spinlock); + query_progress_init_unsafe(); + + QUERY_PROGRESS *qp = query_progress_find_in_hashtable_unsafe(transaction); + if(!qp) { + spinlock_unlock(&progress.spinlock); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_NOT_FOUND); + buffer_json_member_add_string(wb, "message", "Transaction not found"); + buffer_json_finalize(wb); + return HTTP_RESP_NOT_FOUND; + } + + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + + buffer_json_member_add_uint64(wb, "started_ut", qp->started_ut); + if(qp->finished_ut) { + buffer_json_member_add_uint64(wb, "finished_ut", qp->finished_ut); + buffer_json_member_add_double(wb, "progress", 100.0); + buffer_json_member_add_uint64(wb, "age_ut", qp->finished_ut - qp->started_ut); + } + else { + usec_t now_ut = now_realtime_usec(); + buffer_json_member_add_uint64(wb, "now_ut", now_ut); + buffer_json_member_add_uint64(wb, "age_ut", now_ut - qp->started_ut); + + if (qp->all) + buffer_json_member_add_double(wb, "progress", (double) qp->done * 100.0 / (double) qp->all); + else + buffer_json_member_add_uint64(wb, "working", qp->done); + } + + buffer_json_finalize(wb); + + spinlock_unlock(&progress.spinlock); + + return 200; +} + +// ---------------------------------------------------------------------------- +// function to show the progress of all current queries +// and the recent few completed queries + +int progress_function_result(BUFFER *wb, const char *hostname) { + buffer_flush(wb); + wb->content_type = CT_APPLICATION_JSON; + buffer_json_initialize(wb, "\"", "\"", 0, true, BUFFER_JSON_OPTIONS_DEFAULT); + + buffer_json_member_add_string(wb, "hostname", hostname); + buffer_json_member_add_uint64(wb, "status", HTTP_RESP_OK); + buffer_json_member_add_string(wb, "type", "table"); + buffer_json_member_add_time_t(wb, "update_every", 1); + buffer_json_member_add_boolean(wb, "has_history", false); + buffer_json_member_add_string(wb, "help", RRDFUNCTIONS_PROGRESS_HELP); + buffer_json_member_add_array(wb, "data"); + + spinlock_lock(&progress.spinlock); + query_progress_init_unsafe(); + + usec_t now_ut = now_realtime_usec(); + usec_t max_duration_ut = 0; + size_t max_size = 0, max_sent = 0; + size_t archived = 0, running = 0; + SIMPLE_HASHTABLE_FOREACH_READ_ONLY(&progress.hashtable, sl, _QUERY) { + QUERY_PROGRESS *qp = SIMPLE_HASHTABLE_FOREACH_READ_ONLY_VALUE(sl); + if(unlikely(!qp)) continue; // not really needed, just for completeness + + if(qp->prev) + archived++; + else + running++; + + bool finished = qp->finished_ut ? true : false; + usec_t duration_ut = finished ? qp->duration_ut : now_ut - qp->started_ut; + if(duration_ut > max_duration_ut) + max_duration_ut = duration_ut; + + if(finished) { + if(qp->response_size > max_size) + max_size = qp->response_size; + + if(qp->sent_size > max_sent) + max_sent = qp->sent_size; + } + + buffer_json_add_array_item_array(wb); // row + + buffer_json_add_array_item_uuid_compact(wb, &qp->transaction); + buffer_json_add_array_item_uint64(wb, qp->started_ut); + buffer_json_add_array_item_string(wb, HTTP_REQUEST_MODE_2str(qp->mode)); + buffer_json_add_array_item_string(wb, buffer_tostring(qp->query)); + + if(!buffer_strlen(qp->client)) { + if(qp->acl & HTTP_ACL_ACLK) + buffer_json_add_array_item_string(wb, "ACLK"); + else if(qp->acl & HTTP_ACL_WEBRTC) + buffer_json_add_array_item_string(wb, "WEBRTC"); + else + buffer_json_add_array_item_string(wb, "unknown"); + } + else + buffer_json_add_array_item_string(wb, buffer_tostring(qp->client)); + + if(finished) { + buffer_json_add_array_item_string(wb, "finished"); + buffer_json_add_array_item_string(wb, "100.00 %%"); + } + else { + char buf[50]; + + buffer_json_add_array_item_string(wb, "in-progress"); + + if (qp->all) + snprintfz(buf, sizeof(buf), "%0.2f %%", (double) qp->done * 100.0 / (double) qp->all); + else + snprintfz(buf, sizeof(buf), "%zu", qp->done); + + buffer_json_add_array_item_string(wb, buf); + } + + buffer_json_add_array_item_double(wb, (double)duration_ut / USEC_PER_MS); + + if(finished) { + buffer_json_add_array_item_uint64(wb, qp->response_code); + buffer_json_add_array_item_uint64(wb, qp->response_size); + buffer_json_add_array_item_uint64(wb, qp->sent_size); + } + else { + buffer_json_add_array_item_string(wb, NULL); + buffer_json_add_array_item_string(wb, NULL); + buffer_json_add_array_item_string(wb, NULL); + } + + buffer_json_add_array_item_object(wb); // row options + { + char *severity = "notice"; + if(finished) { + if(qp->response_code == HTTP_RESP_NOT_MODIFIED || + qp->response_code == HTTP_RESP_CLIENT_CLOSED_REQUEST || + qp->response_code == HTTP_RESP_CONFLICT) + severity = "debug"; + else if(qp->response_code >= 500 && qp->response_code <= 599) + severity = "error"; + else if(qp->response_code >= 400 && qp->response_code <= 499) + severity = "warning"; + else if(qp->response_code >= 300 && qp->response_code <= 399) + severity = "notice"; + else + severity = "normal"; + } + buffer_json_member_add_string(wb, "severity", severity); + } + buffer_json_object_close(wb); // row options + + buffer_json_array_close(wb); // row + } + + assert(archived == progress.cache.available); + + spinlock_unlock(&progress.spinlock); + + buffer_json_array_close(wb); // data + buffer_json_member_add_object(wb, "columns"); + { + size_t field_id = 0; + + // transaction + buffer_rrdf_table_add_field(wb, field_id++, "Transaction", "Transaction ID", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_UNIQUE_KEY, + NULL); + + // timestamp + buffer_rrdf_table_add_field(wb, field_id++, "Started", "Query Start Timestamp", + RRDF_FIELD_TYPE_TIMESTAMP, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_DATETIME_USEC, + 0, NULL, NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // request method + buffer_rrdf_table_add_field(wb, field_id++, "Method", "Request Method", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // query + buffer_rrdf_table_add_field(wb, field_id++, "Query", "Query", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE | RRDF_FIELD_OPTS_FULL_WIDTH | RRDF_FIELD_OPTS_WRAP, NULL); + + // client + buffer_rrdf_table_add_field(wb, field_id++, "Client", "Client", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // status + buffer_rrdf_table_add_field(wb, field_id++, "Status", "Query Status", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_ASCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // progress + buffer_rrdf_table_add_field(wb, field_id++, "Progress", "Query Progress", + RRDF_FIELD_TYPE_STRING, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // duration + buffer_rrdf_table_add_field(wb, field_id++, "Duration", "Query Duration", + RRDF_FIELD_TYPE_DURATION, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NUMBER, + 2, "ms", (double)max_duration_ut / USEC_PER_MS, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_MAX, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // response code + buffer_rrdf_table_add_field(wb, field_id++, "Response", "Query Response Code", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_MULTISELECT, + RRDF_FIELD_OPTS_VISIBLE, NULL); + + // response size + buffer_rrdf_table_add_field(wb, field_id++, "Size", "Query Response Size", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, "bytes", (double)max_size, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + // sent size + buffer_rrdf_table_add_field(wb, field_id++, "Sent", "Query Response Final Size", + RRDF_FIELD_TYPE_INTEGER, RRDF_FIELD_VISUAL_VALUE, RRDF_FIELD_TRANSFORM_NONE, + 0, "bytes", (double)max_sent, RRDF_FIELD_SORT_DESCENDING, NULL, + RRDF_FIELD_SUMMARY_SUM, RRDF_FIELD_FILTER_RANGE, + RRDF_FIELD_OPTS_NONE, NULL); + + // row options + buffer_rrdf_table_add_field(wb, field_id++, "rowOptions", "rowOptions", + RRDF_FIELD_TYPE_NONE, RRDR_FIELD_VISUAL_ROW_OPTIONS, RRDF_FIELD_TRANSFORM_NONE, + 0, NULL, NAN, RRDF_FIELD_SORT_FIXED, NULL, + RRDF_FIELD_SUMMARY_COUNT, RRDF_FIELD_FILTER_NONE, + RRDF_FIELD_OPTS_DUMMY, NULL); + } + + buffer_json_object_close(wb); // columns + buffer_json_member_add_string(wb, "default_sort_column", "Started"); + + buffer_json_member_add_time_t(wb, "expires", (time_t)((now_ut / USEC_PER_SEC) + 1)); + buffer_json_finalize(wb); + + return 200; +} + + +// ---------------------------------------------------------------------------- + +int progress_unittest(void) { + size_t permanent = 100; + uuid_t valid[permanent]; + + usec_t started = now_monotonic_usec(); + + for(size_t i = 0; i < permanent ;i++) { + uuid_generate_random(valid[i]); + query_progress_start_or_update(&valid[i], 0, HTTP_REQUEST_MODE_GET, HTTP_ACL_ACLK, "permanent", NULL, "test"); + } + + for(size_t n = 0; n < 5000000 ;n++) { + uuid_t t; + uuid_generate_random(t); + query_progress_start_or_update(&t, 0, HTTP_REQUEST_MODE_OPTIONS, HTTP_ACL_WEBRTC, "ephemeral", NULL, "test"); + query_progress_finished(&t, 0, 200, 1234, 123, 12); + + QUERY_PROGRESS *qp; + for(size_t i = 0; i < permanent ;i++) { + qp = query_progress_find_in_hashtable_unsafe(&valid[i]); + assert(qp); + (void)qp; + } + } + + usec_t ended = now_monotonic_usec(); + usec_t duration = ended - started; + + printf("progress hashtable resizes: %zu, size: %zu, used: %zu, deleted: %zu, searches: %zu, collisions: %zu, additions: %zu, deletions: %zu\n", + progress.hashtable.resizes, + progress.hashtable.size, progress.hashtable.used, progress.hashtable.deleted, + progress.hashtable.searches, progress.hashtable.collisions, progress.hashtable.additions, progress.hashtable.deletions); + + double d = (double)duration / USEC_PER_SEC; + printf("hashtable ops: %0.2f / sec\n", (double)progress.hashtable.searches / d); + + return 0; +} diff --git a/src/libnetdata/query_progress/progress.h b/src/libnetdata/query_progress/progress.h new file mode 100644 index 000000000..1adb8d2ba --- /dev/null +++ b/src/libnetdata/query_progress/progress.h @@ -0,0 +1,19 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_QUERY_PROGRESS_H +#define NETDATA_QUERY_PROGRESS_H 1 + +#include "../libnetdata.h" + +void query_progress_start_or_update(uuid_t *transaction, usec_t started_ut, HTTP_REQUEST_MODE mode, HTTP_ACL acl, const char *query, BUFFER *payload, const char *client); +void query_progress_done_step(uuid_t *transaction, size_t done); +void query_progress_set_finish_line(uuid_t *transaction, size_t all); +void query_progress_finished(uuid_t *transaction, usec_t finished_ut, short int response_code, usec_t duration_ut, size_t response_size, size_t sent_size); +void query_progress_functions_update(uuid_t *transaction, size_t done, size_t all); + +int web_api_v2_report_progress(uuid_t *transaction, BUFFER *wb); + +#define RRDFUNCTIONS_PROGRESS_HELP "View the progress on the running and latest Netdata API Requests" +int progress_function_result(BUFFER *wb, const char *hostname); + +#endif // NETDATA_QUERY_PROGRESS_H diff --git a/src/libnetdata/required_dummies.h b/src/libnetdata/required_dummies.h new file mode 100644 index 000000000..3b23b87f7 --- /dev/null +++ b/src/libnetdata/required_dummies.h @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LIB_DUMMIES_H +#define NETDATA_LIB_DUMMIES_H 1 + +// callback required by fatal() +void netdata_cleanup_and_exit(int ret, const char *action, const char *action_result, const char *action_data) +{ + (void)action; + (void)action_result; + (void)action_data; + + exit(ret); +} + +// callbacks required by popen() +void signals_block(void){} +void signals_unblock(void){} +void signals_reset(void){} + +void rrdset_thread_rda_free(void){} +void sender_thread_buffer_free(void){} +void query_target_free(void){} +void service_exits(void){} +void rrd_collector_finished(void){} + +// required by get_system_cpus() +char *netdata_configured_host_prefix = ""; + +#endif // NETDATA_LIB_DUMMIES_H diff --git a/src/libnetdata/simple_hashtable.h b/src/libnetdata/simple_hashtable.h new file mode 100644 index 000000000..13cdcd10e --- /dev/null +++ b/src/libnetdata/simple_hashtable.h @@ -0,0 +1,544 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SIMPLE_HASHTABLE_H +#define NETDATA_SIMPLE_HASHTABLE_H + +typedef uint64_t SIMPLE_HASHTABLE_HASH; +#define SIMPLE_HASHTABLE_HASH_SECOND_HASH_SHIFTS 32 + +/* + * CONFIGURATION + * + * SIMPLE_HASHTABLE_NAME + * The name of the hashtable - all functions and defines will have this name appended + * Example: #define SIMPLE_HASHTABLE_NAME _FACET_KEY + * + * SIMPLE_HASHTABLE_VALUE_TYPE and SIMPLE_HASHTABLE_KEY_TYPE + * The data types of values and keys - optional - setting them will enable strict type checking by the compiler. + * If undefined, they both default to void. + * + * SIMPLE_HASHTABLE_SORT_FUNCTION + * A function name that accepts 2x values and compares them for sorting (returning -1, 0, 1). + * When set, the hashtable will maintain an always sorted array of the values in the hashtable. + * Do not use this for non-static hashtables. So, if your data is changing all the time, this can make the + * hashtable quite slower (it memmove()s an array of pointers to keep it sorted, on every single change). + * + * SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION and SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION + * The hashtable can either compare just hashes (the default), or hashes and keys (when these are set). + * Both need to be set for this feature to be enabled. + * + * - SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION + * The name of a function accepting SIMPLE_HASHTABLE_VALUE_TYPE pointer. + * It should return a pointer to SIMPLE_HASHTABLE_KEY_TYPE. + * This function is called prior to SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION to extract the key from a value. + * It is also called during hashtable resize, to rehash all values in the hashtable. + * + * - SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION + * The name of a function accepting 2x SIMPLE_HASHTABLE_KEY_TYPE pointers. + * It should return true when the keys match. + * This function is only called when the hashes match, to verify that the keys also match. + * + * SIMPLE_HASHTABLE_SAMPLE_IMPLEMENTATION + * If defined, 3x functions will be injected for easily working with the hashtable. + * + */ + + +#ifndef SIMPLE_HASHTABLE_NAME +#define SIMPLE_HASHTABLE_NAME +#endif + +#ifndef SIMPLE_HASHTABLE_VALUE_TYPE +#define SIMPLE_HASHTABLE_VALUE_TYPE void +#endif + +#ifndef SIMPLE_HASHTABLE_KEY_TYPE +#define SIMPLE_HASHTABLE_KEY_TYPE void +#endif + +#ifndef SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION +#undef SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION +#endif + +#if defined(SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION) +static inline SIMPLE_HASHTABLE_KEY_TYPE *SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION(SIMPLE_HASHTABLE_VALUE_TYPE *); +#endif + +#if defined(SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION) +static inline bool SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION(SIMPLE_HASHTABLE_KEY_TYPE *, SIMPLE_HASHTABLE_KEY_TYPE *); +#endif + +// First layer of macro for token concatenation +#define CONCAT_INTERNAL(a, b) a ## b +// Second layer of macro, which ensures proper expansion +#define CONCAT(a, b) CONCAT_INTERNAL(a, b) + +// define names for all structures and structures +#define simple_hashtable_init_named CONCAT(simple_hashtable_init, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_destroy_named CONCAT(simple_hashtable_destroy, SIMPLE_HASHTABLE_NAME) + +#define simple_hashtable_slot_named CONCAT(simple_hashtable_slot, SIMPLE_HASHTABLE_NAME) +#define SIMPLE_HASHTABLE_SLOT_NAMED CONCAT(SIMPLE_HASHTABLE_SLOT, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_named CONCAT(simple_hashtable, SIMPLE_HASHTABLE_NAME) +#define SIMPLE_HASHTABLE_NAMED CONCAT(SIMPLE_HASHTABLE, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_resize_named CONCAT(simple_hashtable_resize, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_can_use_slot_named CONCAT(simple_hashtable_keys_match, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_get_slot_named CONCAT(simple_hashtable_get_slot, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_del_slot_named CONCAT(simple_hashtable_del_slot, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_set_slot_named CONCAT(simple_hashtable_set_slot, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_first_read_only_named CONCAT(simple_hashtable_first_read_only, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_next_read_only_named CONCAT(simple_hashtable_next_read_only, SIMPLE_HASHTABLE_NAME) + +#define simple_hashtable_sorted_binary_search_named CONCAT(simple_hashtable_sorted_binary_search, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_add_value_sorted_named CONCAT(simple_hashtable_add_value_sorted, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_del_value_sorted_named CONCAT(simple_hashtable_del_value_sorted, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_replace_value_sorted_named CONCAT(simple_hashtable_replace_value_sorted, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_sorted_array_first_read_only_named CONCAT(simple_hashtable_sorted_array_first_read_only, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_sorted_array_next_read_only_named CONCAT(simple_hashtable_sorted_array_next_read_only, SIMPLE_HASHTABLE_NAME) + +typedef struct simple_hashtable_slot_named { + SIMPLE_HASHTABLE_HASH hash; + SIMPLE_HASHTABLE_VALUE_TYPE *data; +} SIMPLE_HASHTABLE_SLOT_NAMED; + +typedef struct simple_hashtable_named { + size_t resizes; + size_t searches; + size_t collisions; + size_t additions; + size_t deletions; + size_t deleted; + size_t used; + size_t size; + bool needs_cleanup; + SIMPLE_HASHTABLE_SLOT_NAMED *hashtable; + +#ifdef SIMPLE_HASHTABLE_SORT_FUNCTION + struct { + size_t used; + size_t size; + SIMPLE_HASHTABLE_VALUE_TYPE **array; + } sorted; +#endif +} SIMPLE_HASHTABLE_NAMED; + +#ifdef SIMPLE_HASHTABLE_SORT_FUNCTION +static inline size_t simple_hashtable_sorted_binary_search_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE *value) { + size_t left = 0, right = ht->sorted.used; + + while (left < right) { + size_t mid = left + (right - left) / 2; + if (SIMPLE_HASHTABLE_SORT_FUNCTION(ht->sorted.array[mid], value) < 0) + left = mid + 1; + else + right = mid; + } + + return left; +} + +static inline void simple_hashtable_add_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE *value) { + size_t index = simple_hashtable_sorted_binary_search_named(ht, value); + + // Ensure there's enough space in the sorted array + if (ht->sorted.used >= ht->sorted.size) { + size_t size = ht->sorted.size ? ht->sorted.size * 2 : 64; + SIMPLE_HASHTABLE_VALUE_TYPE **array = mallocz(size * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *)); + if(ht->sorted.array) { + memcpy(array, ht->sorted.array, ht->sorted.size * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *)); + freez(ht->sorted.array); + } + ht->sorted.array = array; + ht->sorted.size = size; + } + + // Use memmove to shift elements and create space for the new element + memmove(&ht->sorted.array[index + 1], &ht->sorted.array[index], (ht->sorted.used - index) * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *)); + + ht->sorted.array[index] = value; + ht->sorted.used++; +} + +static inline void simple_hashtable_del_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE *value) { + size_t index = simple_hashtable_sorted_binary_search_named(ht, value); + + // Check if the value exists at the found index + assert(index < ht->sorted.used && ht->sorted.array[index] == value); + + // Use memmove to shift elements and close the gap + memmove(&ht->sorted.array[index], &ht->sorted.array[index + 1], (ht->sorted.used - index - 1) * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *)); + ht->sorted.used--; +} + +static inline void simple_hashtable_replace_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE *old_value, SIMPLE_HASHTABLE_VALUE_TYPE *new_value) { + if(new_value == old_value) + return; + + size_t old_value_index = simple_hashtable_sorted_binary_search_named(ht, old_value); + assert(old_value_index < ht->sorted.used && ht->sorted.array[old_value_index] == old_value); + + int r = SIMPLE_HASHTABLE_SORT_FUNCTION(old_value, new_value); + if(r == 0) { + // Same value, so use the same index + ht->sorted.array[old_value_index] = new_value; + return; + } + + size_t new_value_index = simple_hashtable_sorted_binary_search_named(ht, new_value); + if(old_value_index == new_value_index) { + // Not the same value, but still at the same index + ht->sorted.array[old_value_index] = new_value; + return; + } + else if (old_value_index < new_value_index) { + // The old value is before the new value + size_t shift_start = old_value_index + 1; + size_t shift_end = new_value_index - 1; + size_t shift_size = shift_end - old_value_index; + + memmove(&ht->sorted.array[old_value_index], &ht->sorted.array[shift_start], shift_size * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *)); + ht->sorted.array[shift_end] = new_value; + } + else { + // The old value is after the new value + size_t shift_start = new_value_index; + size_t shift_end = old_value_index; + size_t shift_size = shift_end - new_value_index; + + memmove(&ht->sorted.array[new_value_index + 1], &ht->sorted.array[shift_start], shift_size * sizeof(SIMPLE_HASHTABLE_VALUE_TYPE *)); + ht->sorted.array[new_value_index] = new_value; + } +} + +static inline SIMPLE_HASHTABLE_VALUE_TYPE **simple_hashtable_sorted_array_first_read_only_named(SIMPLE_HASHTABLE_NAMED *ht) { + if (ht->sorted.used > 0) { + return &ht->sorted.array[0]; + } + return NULL; +} + +static inline SIMPLE_HASHTABLE_VALUE_TYPE **simple_hashtable_sorted_array_next_read_only_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_VALUE_TYPE **last) { + if (!last) return NULL; + + // Calculate the current position in the sorted array + size_t currentIndex = last - ht->sorted.array; + + // Proceed to the next element if it exists + if (currentIndex + 1 < ht->sorted.used) { + return &ht->sorted.array[currentIndex + 1]; + } + + // If no more elements, return NULL + return NULL; +} + +#define SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY(ht, var, type, name) \ + for (type **(var) = simple_hashtable_sorted_array_first_read_only ## name(ht); \ + var; \ + (var) = simple_hashtable_sorted_array_next_read_only ## name(ht, var)) + +#define SIMPLE_HASHTABLE_SORTED_FOREACH_READ_ONLY_VALUE(var) (*(var)) + +#else +static inline void simple_hashtable_add_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht __maybe_unused, SIMPLE_HASHTABLE_VALUE_TYPE *value __maybe_unused) { ; } +static inline void simple_hashtable_del_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht __maybe_unused, SIMPLE_HASHTABLE_VALUE_TYPE *value __maybe_unused) { ; } +static inline void simple_hashtable_replace_value_sorted_named(SIMPLE_HASHTABLE_NAMED *ht __maybe_unused, SIMPLE_HASHTABLE_VALUE_TYPE *old_value __maybe_unused, SIMPLE_HASHTABLE_VALUE_TYPE *new_value __maybe_unused) { ; } +#endif + +static inline void simple_hashtable_init_named(SIMPLE_HASHTABLE_NAMED *ht, size_t size) { + memset(ht, 0, sizeof(*ht)); + ht->size = size; + ht->hashtable = callocz(ht->size, sizeof(*ht->hashtable)); +} + +static inline void simple_hashtable_destroy_named(SIMPLE_HASHTABLE_NAMED *ht) { +#ifdef SIMPLE_HASHTABLE_SORT_FUNCTION + freez(ht->sorted.array); +#endif + + freez(ht->hashtable); + memset(ht, 0, sizeof(*ht)); +} + +static inline void simple_hashtable_resize_named(SIMPLE_HASHTABLE_NAMED *ht); + +#define simple_hashtable_data_unset ((void *)NULL) +#define simple_hashtable_data_deleted ((void *)UINT64_MAX) +#define simple_hashtable_data_usernull ((void *)(UINT64_MAX - 1)) +#define simple_hashtable_is_slot_unset(sl) ((sl)->data == simple_hashtable_data_unset) +#define simple_hashtable_is_slot_deleted(sl) ((sl)->data == simple_hashtable_data_deleted) +#define simple_hashtable_is_slot_usernull(sl) ((sl)->data == simple_hashtable_data_usernull) +#define SIMPLE_HASHTABLE_SLOT_DATA(sl) ((simple_hashtable_is_slot_unset(sl) || simple_hashtable_is_slot_deleted(sl) || simple_hashtable_is_slot_usernull(sl)) ? NULL : (sl)->data) + +static inline bool simple_hashtable_can_use_slot_named( + SIMPLE_HASHTABLE_SLOT_NAMED *sl, SIMPLE_HASHTABLE_HASH hash, + SIMPLE_HASHTABLE_KEY_TYPE *key __maybe_unused) { + + if(simple_hashtable_is_slot_unset(sl)) + return true; + + if(simple_hashtable_is_slot_deleted(sl)) + return false; + + if(sl->hash == hash) { +#if defined(SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION) && defined(SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION) + return SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION(SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION(SIMPLE_HASHTABLE_SLOT_DATA(sl)), key); +#else + return true; +#endif + } + + return false; +} + +#define SIMPLE_HASHTABLE_NEEDS_RESIZE(ht) ((ht)->size <= ((ht)->used - (ht)->deleted) << 1 || (ht)->used >= (ht)->size) + +// IMPORTANT: the pointer returned by this call is valid up to the next call of this function (or the resize one). +// If you need to cache something, cache the hash, not the slot pointer. +static inline SIMPLE_HASHTABLE_SLOT_NAMED *simple_hashtable_get_slot_named( + SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_HASH hash, + SIMPLE_HASHTABLE_KEY_TYPE *key, bool resize) { + + // This function finds the requested hash and key in the hashtable. + // It uses a second version of the hash in case of collisions, and then linear probing. + // It may resize the hashtable if it is more than 50% full. + + // Deleted items remain in the hashtable, but they are marked as DELETED. + // Reuse of DELETED slots happens only if the slot to be returned is UNSET. + // So, when looking up for an item, it tries to find it, assuming DELETED + // slots are occupied. If the item to be returned is UNSET, and it has + // encountered a DELETED slot, it returns the DELETED one instead of the UNSET. + + ht->searches++; + + size_t slot; + SIMPLE_HASHTABLE_SLOT_NAMED *sl; + SIMPLE_HASHTABLE_SLOT_NAMED *deleted; + + slot = hash % ht->size; + sl = &ht->hashtable[slot]; + deleted = simple_hashtable_is_slot_deleted(sl) ? sl : NULL; + if(likely(simple_hashtable_can_use_slot_named(sl, hash, key))) + return (simple_hashtable_is_slot_unset(sl) && deleted) ? deleted : sl; + + ht->collisions++; + + if(unlikely(resize && (ht->needs_cleanup || SIMPLE_HASHTABLE_NEEDS_RESIZE(ht)))) { + simple_hashtable_resize_named(ht); + deleted = NULL; // our deleted pointer is not valid anymore + + slot = hash % ht->size; + sl = &ht->hashtable[slot]; + if(likely(simple_hashtable_can_use_slot_named(sl, hash, key))) + return sl; + + ht->collisions++; + } + + slot = ((hash >> SIMPLE_HASHTABLE_HASH_SECOND_HASH_SHIFTS) + 1) % ht->size; + sl = &ht->hashtable[slot]; + deleted = (!deleted && simple_hashtable_is_slot_deleted(sl)) ? sl : deleted; + + // Linear probing until we find it + SIMPLE_HASHTABLE_SLOT_NAMED *sl_started = sl; + size_t collisions_started = ht->collisions; + while (!simple_hashtable_can_use_slot_named(sl, hash, key)) { + slot = (slot + 1) % ht->size; // Wrap around if necessary + sl = &ht->hashtable[slot]; + deleted = (!deleted && simple_hashtable_is_slot_deleted(sl)) ? sl : deleted; + ht->collisions++; + + if(sl == sl_started) { + if(deleted) { + // we looped through all items, and we didn't find a free slot, + // but we have found a deleted slot, so return it. + return deleted; + } + else if(resize) { + // the hashtable is full, without any deleted slots. + // we need to resize it now. + simple_hashtable_resize_named(ht); + return simple_hashtable_get_slot_named(ht, hash, key, false); + } + else { + // the hashtable is full, but resize is false. + // this should never happen. + assert(sl != sl_started); + } + } + } + + if((ht->collisions - collisions_started) > (ht->size / 2) && ht->deleted >= (ht->size / 3)) { + // we traversed through half of the hashtable to find a slot, + // but we have more than 1/3 deleted items + ht->needs_cleanup = true; + } + + return (simple_hashtable_is_slot_unset(sl) && deleted) ? deleted : sl; +} + +static inline bool simple_hashtable_del_slot_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_SLOT_NAMED *sl) { + if(simple_hashtable_is_slot_unset(sl) || simple_hashtable_is_slot_deleted(sl)) + return false; + + ht->deletions++; + ht->deleted++; + + simple_hashtable_del_value_sorted_named(ht, SIMPLE_HASHTABLE_SLOT_DATA(sl)); + + sl->data = simple_hashtable_data_deleted; + return true; +} + +static inline void simple_hashtable_set_slot_named( + SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_SLOT_NAMED *sl, + SIMPLE_HASHTABLE_HASH hash, SIMPLE_HASHTABLE_VALUE_TYPE *data) { + + if(data == NULL) + data = simple_hashtable_data_usernull; + + if(unlikely(data == simple_hashtable_data_unset || data == simple_hashtable_data_deleted)) { + simple_hashtable_del_slot_named(ht, sl); + return; + } + + if(likely(simple_hashtable_is_slot_unset(sl))) { + simple_hashtable_add_value_sorted_named(ht, data); + ht->used++; + } + + else if(unlikely(simple_hashtable_is_slot_deleted(sl))) { + ht->deleted--; + } + + else + simple_hashtable_replace_value_sorted_named(ht, SIMPLE_HASHTABLE_SLOT_DATA(sl), data); + + sl->hash = hash; + sl->data = data; + ht->additions++; +} + +// IMPORTANT +// this call invalidates all SIMPLE_HASHTABLE_SLOT_NAMED pointers +static inline void simple_hashtable_resize_named(SIMPLE_HASHTABLE_NAMED *ht) { + SIMPLE_HASHTABLE_SLOT_NAMED *old = ht->hashtable; + size_t old_size = ht->size; + + size_t new_size = ht->size; + + if(SIMPLE_HASHTABLE_NEEDS_RESIZE(ht)) + new_size = (ht->size << 1) - ((ht->size > 16) ? 1 : 0); + + ht->resizes++; + ht->size = new_size; + ht->hashtable = callocz(new_size, sizeof(*ht->hashtable)); + size_t used = 0; + for(size_t i = 0 ; i < old_size ; i++) { + SIMPLE_HASHTABLE_SLOT_NAMED *slot = &old[i]; + if(simple_hashtable_is_slot_unset(slot) || simple_hashtable_is_slot_deleted(slot)) + continue; + + SIMPLE_HASHTABLE_KEY_TYPE *key = NULL; + +#if defined(SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION) && defined(SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION) + SIMPLE_HASHTABLE_VALUE_TYPE *value = SIMPLE_HASHTABLE_SLOT_DATA(slot); + key = SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION(value); +#endif + + SIMPLE_HASHTABLE_SLOT_NAMED *slot2 = simple_hashtable_get_slot_named(ht, slot->hash, key, false); + *slot2 = *slot; + used++; + } + + assert(used == ht->used - ht->deleted); + + ht->used = used; + ht->deleted = 0; + ht->needs_cleanup = false; + + freez(old); +} + +// ---------------------------------------------------------------------------- +// hashtable traversal, in read-only mode +// the hashtable should not be modified while the traversal is taking place + +static inline SIMPLE_HASHTABLE_SLOT_NAMED *simple_hashtable_first_read_only_named(SIMPLE_HASHTABLE_NAMED *ht) { + for(size_t i = 0; i < ht->size ;i++) { + SIMPLE_HASHTABLE_SLOT_NAMED *sl = &ht->hashtable[i]; + if(!simple_hashtable_is_slot_unset(sl) && !simple_hashtable_is_slot_deleted(sl)) + return sl; + } + + return NULL; +} + +static inline SIMPLE_HASHTABLE_SLOT_NAMED *simple_hashtable_next_read_only_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_SLOT_NAMED *last) { + if (!last) return NULL; + + // Calculate the current position in the array + size_t index = last - ht->hashtable; + + // Iterate over the hashtable starting from the next element + for (size_t i = index + 1; i < ht->size; i++) { + SIMPLE_HASHTABLE_SLOT_NAMED *sl = &ht->hashtable[i]; + if (!simple_hashtable_is_slot_unset(sl) && !simple_hashtable_is_slot_deleted(sl)) { + return sl; + } + } + + // If no more data slots are found, return NULL + return NULL; +} + +#define SIMPLE_HASHTABLE_FOREACH_READ_ONLY(ht, var, name) \ + for(struct simple_hashtable_slot ## name *(var) = simple_hashtable_first_read_only ## name(ht); \ + var; \ + (var) = simple_hashtable_next_read_only ## name(ht, var)) + +#define SIMPLE_HASHTABLE_FOREACH_READ_ONLY_VALUE(var) SIMPLE_HASHTABLE_SLOT_DATA(var) + +// ---------------------------------------------------------------------------- +// high level implementation + +#ifdef SIMPLE_HASHTABLE_SAMPLE_IMPLEMENTATION + +#ifndef XXH_INLINE_ALL +#define XXH_INLINE_ALL +#endif +#include "xxhash.h" + +#define simple_hashtable_set_named CONCAT(simple_hashtable_set, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_get_named CONCAT(simple_hashtable_get, SIMPLE_HASHTABLE_NAME) +#define simple_hashtable_del_named CONCAT(simple_hashtable_del, SIMPLE_HASHTABLE_NAME) + +static inline SIMPLE_HASHTABLE_VALUE_TYPE *simple_hashtable_set_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_KEY_TYPE *key, size_t key_len, SIMPLE_HASHTABLE_VALUE_TYPE *data) { + XXH64_hash_t hash = XXH3_64bits((void *)key, key_len); + SIMPLE_HASHTABLE_SLOT_NAMED *sl = simple_hashtable_get_slot_named(ht, hash, key, true); + simple_hashtable_set_slot_named(ht, sl, hash, data); + return SIMPLE_HASHTABLE_SLOT_DATA(sl); +} + +static inline SIMPLE_HASHTABLE_VALUE_TYPE *simple_hashtable_get_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_KEY_TYPE *key, size_t key_len, SIMPLE_HASHTABLE_VALUE_TYPE *data) { + XXH64_hash_t hash = XXH3_64bits((void *)key, key_len); + SIMPLE_HASHTABLE_SLOT_NAMED *sl = simple_hashtable_get_slot_named(ht, hash, key, true); + return SIMPLE_HASHTABLE_SLOT_DATA(sl); +} + +static inline bool simple_hashtable_del_named(SIMPLE_HASHTABLE_NAMED *ht, SIMPLE_HASHTABLE_KEY_TYPE *key, size_t key_len, SIMPLE_HASHTABLE_VALUE_TYPE *data) { + XXH64_hash_t hash = XXH3_64bits((void *)key, key_len); + SIMPLE_HASHTABLE_SLOT_NAMED *sl = simple_hashtable_get_slot_named(ht, hash, key, true); + return simple_hashtable_del_slot_named(ht, sl); +} + +#endif // SIMPLE_HASHTABLE_SAMPLE_IMPLEMENTATION + +// ---------------------------------------------------------------------------- +// Clear the preprocessor defines of simple_hashtable.h +// allowing simple_hashtable.h to be included multiple times +// with different configuration each time. + +#include "simple_hashtable_undef.h" + +#endif //NETDATA_SIMPLE_HASHTABLE_H diff --git a/src/libnetdata/simple_hashtable_undef.h b/src/libnetdata/simple_hashtable_undef.h new file mode 100644 index 000000000..3fe5a708d --- /dev/null +++ b/src/libnetdata/simple_hashtable_undef.h @@ -0,0 +1,35 @@ + +// this file clears the preprocessor defines of simple_hashtable.h +// allowing simple_hashtable.h to be included multiple times +// with different configuration each time. + +#undef SIMPLE_HASHTABLE_HASH_SECOND_HASH_SHIFTS + +#undef simple_hashtable_init_named +#undef simple_hashtable_destroy_named +#undef simple_hashtable_slot_named +#undef SIMPLE_HASHTABLE_SLOT_NAMED +#undef simple_hashtable_named +#undef SIMPLE_HASHTABLE_NAMED +#undef simple_hashtable_resize_named +#undef simple_hashtable_can_use_slot_named +#undef simple_hashtable_get_slot_named +#undef simple_hashtable_del_slot_named +#undef simple_hashtable_set_slot_named +#undef simple_hashtable_first_read_only_named +#undef simple_hashtable_next_read_only_named +#undef simple_hashtable_sorted_binary_search_named +#undef simple_hashtable_add_value_sorted_named +#undef simple_hashtable_del_value_sorted_named +#undef simple_hashtable_replace_value_sorted_named +#undef simple_hashtable_sorted_array_first_read_only_named +#undef simple_hashtable_sorted_array_next_read_only_named + +#undef SIMPLE_HASHTABLE_SAMPLE_IMPLEMENTATION +#undef SIMPLE_HASHTABLE_SORT_FUNCTION +#undef SIMPLE_HASHTABLE_VALUE_TYPE +#undef SIMPLE_HASHTABLE_KEY_TYPE +#undef SIMPLE_HASHTABLE_VALUE2KEY_FUNCTION +#undef SIMPLE_HASHTABLE_COMPARE_KEYS_FUNCTION +#undef SIMPLE_HASHTABLE_NAME +#undef NETDATA_SIMPLE_HASHTABLE_H diff --git a/src/libnetdata/simple_pattern/README.md b/src/libnetdata/simple_pattern/README.md new file mode 100644 index 000000000..cf8a0f640 --- /dev/null +++ b/src/libnetdata/simple_pattern/README.md @@ -0,0 +1,47 @@ +<!-- +title: "Simple patterns" +description: "Netdata supports simple patterns, which are less cryptic versions of regular expressions. Use familiar notation for powerful results." +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/simple_pattern/README.md +sidebar_label: "Simple patterns" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Simple patterns + +Unix prefers regular expressions. But they are just too hard, too cryptic +to use, write and understand. + +So, Netdata supports **simple patterns**. + +Simple patterns are a space separated list of words, that can have `*` +as a wildcard. Each word may use any number of `*`. Simple patterns +allow **negative** matches by prefixing a word with `!`. + +So, `pattern = !*bad* *` will match anything, except all those that +contain the word `bad`. + +Simple patterns are quite powerful: `pattern = *foobar* !foo* !*bar *` +matches everything containing `foobar`, except strings that start +with `foo` or end with `bar`. + +You can use the Netdata command line to check simple patterns, +like this: + +```sh +# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world' +RESULT: MATCHED - pattern '*foobar* !foo* !*bar *' matches 'hello world' + +# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world bar' +RESULT: NOT MATCHED - pattern '*foobar* !foo* !*bar *' does not match 'hello world bar' + +# netdata -W simple-pattern '*foobar* !foo* !*bar *' 'hello world foobar' +RESULT: MATCHED - pattern '*foobar* !foo* !*bar *' matches 'hello world foobar' +``` + +Netdata stops processing to the first positive or negative match +(left to right). If it is not matched by either positive or negative +patterns, it is denied at the end. + + diff --git a/libnetdata/simple_pattern/simple_pattern.c b/src/libnetdata/simple_pattern/simple_pattern.c index a0051e8f0..7a7f41b1c 100644 --- a/libnetdata/simple_pattern/simple_pattern.c +++ b/src/libnetdata/simple_pattern/simple_pattern.c @@ -147,7 +147,7 @@ SIMPLE_PATTERN *simple_pattern_create(const char *list, const char *separators, if(default_mode == SIMPLE_PATTERN_SUBSTRING) { m->mode = SIMPLE_PATTERN_SUBSTRING; - struct simple_pattern *tm = m; + struct simple_pattern *tm; for(tm = m; tm->child ; tm = tm->child) ; tm->mode = SIMPLE_PATTERN_SUBSTRING; } @@ -396,27 +396,6 @@ extern int simple_pattern_is_potential_name(SIMPLE_PATTERN *p) return (alpha || wildcards) && !colon; } -char *simple_pattern_trim_around_equal(char *src) { - char *store = mallocz(strlen(src) + 1); - - char *dst = store; - while (*src) { - if (*src == '=') { - if (*(dst -1) == ' ') - dst--; - - *dst++ = *src++; - if (*src == ' ') - src++; - } - - *dst++ = *src++; - } - *dst = 0x00; - - return store; -} - char *simple_pattern_iterate(SIMPLE_PATTERN **p) { struct simple_pattern *root = (struct simple_pattern *) *p; diff --git a/libnetdata/simple_pattern/simple_pattern.h b/src/libnetdata/simple_pattern/simple_pattern.h index 1a8d8f7d6..1af0f87b9 100644 --- a/libnetdata/simple_pattern/simple_pattern.h +++ b/src/libnetdata/simple_pattern/simple_pattern.h @@ -5,7 +5,6 @@ #include "../libnetdata.h" - typedef enum __attribute__ ((__packed__)) { SIMPLE_PATTERN_EXACT, SIMPLE_PATTERN_PREFIX, @@ -19,7 +18,8 @@ typedef enum __attribute__ ((__packed__)) { SP_MATCHED_POSITIVE, } SIMPLE_PATTERN_RESULT; -typedef void SIMPLE_PATTERN; +struct simple_pattern; +typedef struct simple_pattern SIMPLE_PATTERN; // create a simple_pattern from the string given // default_mode is used in cases where EXACT matches, without an asterisk, @@ -47,9 +47,6 @@ void simple_pattern_dump(uint64_t debug_type, SIMPLE_PATTERN *p) ; int simple_pattern_is_potential_name(SIMPLE_PATTERN *p) ; char *simple_pattern_iterate(SIMPLE_PATTERN **p); -// Auxiliary function to create a pattern -char *simple_pattern_trim_around_equal(char *src); - #define SIMPLE_PATTERN_DEFAULT_WEB_SEPARATORS ",|\t\r\n\f\v" #define is_valid_sp(x) ((x) && *(x) && !((x)[0] == '*' && (x)[1] == '\0')) diff --git a/src/libnetdata/socket/README.md b/src/libnetdata/socket/README.md new file mode 100644 index 000000000..b81cbb8df --- /dev/null +++ b/src/libnetdata/socket/README.md @@ -0,0 +1,8 @@ +<!-- +Title: "Socket" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/socket/README.md +sidebar_label: "Socket" +learn_status: "Published" +learn_topic_type: "References" +learn_rel_path: "Developers/libnetdata" +--> diff --git a/libnetdata/socket/security.c b/src/libnetdata/socket/security.c index 4deb76623..4deb76623 100644 --- a/libnetdata/socket/security.c +++ b/src/libnetdata/socket/security.c diff --git a/libnetdata/socket/security.h b/src/libnetdata/socket/security.h index fd17b6f3f..fd17b6f3f 100644 --- a/libnetdata/socket/security.h +++ b/src/libnetdata/socket/security.h diff --git a/libnetdata/socket/socket.c b/src/libnetdata/socket/socket.c index 605e85635..b157b157b 100644 --- a/libnetdata/socket/socket.c +++ b/src/libnetdata/socket/socket.c @@ -191,6 +191,16 @@ int sock_setreuse(int fd, int reuse) { return ret; } +void sock_setcloexec(int fd) +{ + UNUSED(fd); +#ifndef SOCK_CLOEXEC + int flags = fcntl(fd, F_GETFD); + if (flags != -1) + (void) fcntl(fd, F_SETFD, flags | FD_CLOEXEC); +#endif +} + int sock_setreuse_port(int fd, int reuse) { int ret; @@ -262,7 +272,7 @@ char *strdup_client_description(int family, const char *protocol, const char *ip int create_listen_socket_unix(const char *path, int listen_backlog) { int sock; - sock = socket(AF_UNIX, SOCK_STREAM, 0); + sock = socket(AF_UNIX, SOCK_STREAM | DEFAULT_SOCKET_FLAGS, 0); if(sock < 0) { nd_log(NDLS_DAEMON, NDLP_ERR, "LISTENER: UNIX socket() on path '%s' failed.", @@ -272,6 +282,7 @@ int create_listen_socket_unix(const char *path, int listen_backlog) { } sock_setnonblock(sock); + sock_setcloexec(sock); sock_enlarge_in(sock); struct sockaddr_un name; @@ -316,7 +327,7 @@ int create_listen_socket_unix(const char *path, int listen_backlog) { int create_listen_socket4(int socktype, const char *ip, uint16_t port, int listen_backlog) { int sock; - sock = socket(AF_INET, socktype, 0); + sock = socket(AF_INET, socktype | DEFAULT_SOCKET_FLAGS, 0); if(sock < 0) { nd_log(NDLS_DAEMON, NDLP_ERR, "LISTENER: IPv4 socket() on ip '%s' port %d, socktype %d failed.", @@ -324,10 +335,10 @@ int create_listen_socket4(int socktype, const char *ip, uint16_t port, int liste return -1; } - sock_setreuse(sock, 1); sock_setreuse_port(sock, 0); sock_setnonblock(sock); + sock_setcloexec(sock); sock_enlarge_in(sock); struct sockaddr_in name; @@ -374,7 +385,7 @@ int create_listen_socket6(int socktype, uint32_t scope_id, const char *ip, int p int sock; int ipv6only = 1; - sock = socket(AF_INET6, socktype, 0); + sock = socket(AF_INET6, socktype | DEFAULT_SOCKET_FLAGS, 0); if (sock < 0) { nd_log(NDLS_DAEMON, NDLP_ERR, "LISTENER: IPv6 socket() on ip '%s' port %d, socktype %d, failed.", @@ -382,10 +393,10 @@ int create_listen_socket6(int socktype, uint32_t scope_id, const char *ip, int p return -1; } - sock_setreuse(sock, 1); sock_setreuse_port(sock, 0); sock_setnonblock(sock); + sock_setcloexec(sock); sock_enlarge_in(sock); /* IPv6 only */ @@ -500,7 +511,7 @@ void listen_sockets_close(LISTEN_SOCKETS *sockets) { * * @param acl is the acl given by the user. */ -WEB_CLIENT_ACL socket_ssl_acl(char *acl) { +HTTP_ACL socket_ssl_acl(char *acl) { char *ssl = strchr(acl,'^'); if(ssl) { //Due the format of the SSL command it is always the last command, @@ -511,34 +522,34 @@ WEB_CLIENT_ACL socket_ssl_acl(char *acl) { if (!strncmp("SSL=",ssl,4)) { ssl += 4; if (!strcmp(ssl,"optional")) { - return WEB_CLIENT_ACL_SSL_OPTIONAL; + return HTTP_ACL_SSL_OPTIONAL; } else if (!strcmp(ssl,"force")) { - return WEB_CLIENT_ACL_SSL_FORCE; + return HTTP_ACL_SSL_FORCE; } } #endif } - return WEB_CLIENT_ACL_NONE; + return HTTP_ACL_NONE; } -WEB_CLIENT_ACL read_acl(char *st) { - WEB_CLIENT_ACL ret = socket_ssl_acl(st); +HTTP_ACL read_acl(char *st) { + HTTP_ACL ret = socket_ssl_acl(st); - if (!strcmp(st,"dashboard")) ret |= WEB_CLIENT_ACL_DASHBOARD; - if (!strcmp(st,"registry")) ret |= WEB_CLIENT_ACL_REGISTRY; - if (!strcmp(st,"badges")) ret |= WEB_CLIENT_ACL_BADGE; - if (!strcmp(st,"management")) ret |= WEB_CLIENT_ACL_MGMT; - if (!strcmp(st,"streaming")) ret |= WEB_CLIENT_ACL_STREAMING; - if (!strcmp(st,"netdata.conf")) ret |= WEB_CLIENT_ACL_NETDATACONF; + if (!strcmp(st,"dashboard")) ret |= HTTP_ACL_DASHBOARD; + if (!strcmp(st,"registry")) ret |= HTTP_ACL_REGISTRY; + if (!strcmp(st,"badges")) ret |= HTTP_ACL_BADGES; + if (!strcmp(st,"management")) ret |= HTTP_ACL_MANAGEMENT; + if (!strcmp(st,"streaming")) ret |= HTTP_ACL_STREAMING; + if (!strcmp(st,"netdata.conf")) ret |= HTTP_ACL_NETDATACONF; return ret; } static inline int bind_to_this(LISTEN_SOCKETS *sockets, const char *definition, uint16_t default_port, int listen_backlog) { int added = 0; - WEB_CLIENT_ACL acl_flags = WEB_CLIENT_ACL_NONE; + HTTP_ACL acl_flags = HTTP_ACL_NONE; struct addrinfo hints; struct addrinfo *result = NULL, *rp = NULL; @@ -549,7 +560,7 @@ static inline int bind_to_this(LISTEN_SOCKETS *sockets, const char *definition, char buffer2[10 + 1]; snprintfz(buffer2, 10, "%d", default_port); - char *ip = buffer, *port = buffer2, *interface = "", *portconfig;; + char *ip = buffer, *port = buffer2, *interface = "", *portconfig; int protocol = IPPROTO_TCP, socktype = SOCK_STREAM; const char *protocol_str = "tcp"; @@ -559,12 +570,14 @@ static inline int bind_to_this(LISTEN_SOCKETS *sockets, const char *definition, protocol = IPPROTO_TCP; socktype = SOCK_STREAM; protocol_str = "tcp"; + acl_flags |= HTTP_ACL_API; } else if(strncmp(ip, "udp:", 4) == 0) { ip += 4; protocol = IPPROTO_UDP; socktype = SOCK_DGRAM; protocol_str = "udp"; + acl_flags |= HTTP_ACL_API_UDP; } else if(strncmp(ip, "unix:", 5) == 0) { char *path = ip + 5; @@ -578,7 +591,8 @@ static inline int bind_to_this(LISTEN_SOCKETS *sockets, const char *definition, sockets->failed++; } else { - acl_flags = WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_REGISTRY | WEB_CLIENT_ACL_BADGE | WEB_CLIENT_ACL_MGMT | WEB_CLIENT_ACL_NETDATACONF | WEB_CLIENT_ACL_STREAMING | WEB_CLIENT_ACL_SSL_DEFAULT; + acl_flags = HTTP_ACL_API_UNIX | HTTP_ACL_DASHBOARD | HTTP_ACL_REGISTRY | HTTP_ACL_BADGES | + HTTP_ACL_MANAGEMENT | HTTP_ACL_NETDATACONF | HTTP_ACL_STREAMING | HTTP_ACL_SSL_DEFAULT; listen_sockets_add(sockets, fd, AF_UNIX, socktype, protocol_str, path, 0, acl_flags); added++; } @@ -628,13 +642,13 @@ static inline int bind_to_this(LISTEN_SOCKETS *sockets, const char *definition, } acl_flags |= read_acl(portconfig); } else { - acl_flags = WEB_CLIENT_ACL_DASHBOARD | WEB_CLIENT_ACL_REGISTRY | WEB_CLIENT_ACL_BADGE | WEB_CLIENT_ACL_MGMT | WEB_CLIENT_ACL_NETDATACONF | WEB_CLIENT_ACL_STREAMING | WEB_CLIENT_ACL_SSL_DEFAULT; + acl_flags |= HTTP_ACL_DASHBOARD | HTTP_ACL_REGISTRY | HTTP_ACL_BADGES | HTTP_ACL_MANAGEMENT | HTTP_ACL_NETDATACONF | HTTP_ACL_STREAMING | HTTP_ACL_SSL_DEFAULT; } //Case the user does not set the option SSL in the "bind to", but he has //the certificates, I must redirect, so I am assuming here the default option - if(!(acl_flags & WEB_CLIENT_ACL_SSL_OPTIONAL) && !(acl_flags & WEB_CLIENT_ACL_SSL_FORCE)) { - acl_flags |= WEB_CLIENT_ACL_SSL_DEFAULT; + if(!(acl_flags & HTTP_ACL_SSL_OPTIONAL) && !(acl_flags & HTTP_ACL_SSL_FORCE)) { + acl_flags |= HTTP_ACL_SSL_DEFAULT; } uint32_t scope_id = 0; @@ -778,7 +792,7 @@ int listen_sockets_setup(LISTEN_SOCKETS *sockets) { // timeout the timeout for establishing a connection static inline int connect_to_unix(const char *path, struct timeval *timeout) { - int fd = socket(AF_UNIX, SOCK_STREAM, 0); + int fd = socket(AF_UNIX, SOCK_STREAM | DEFAULT_SOCKET_FLAGS, 0); if(fd == -1) { nd_log(NDLS_DAEMON, NDLP_ERR, "Failed to create UNIX socket() for '%s'", @@ -794,6 +808,8 @@ static inline int connect_to_unix(const char *path, struct timeval *timeout) { path); } + sock_setcloexec(fd); + struct sockaddr_un addr; memset(&addr, 0, sizeof(addr)); addr.sun_family = AF_UNIX; @@ -891,7 +907,7 @@ int connect_to_this_ip46(int protocol, int socktype, const char *host, uint32_t } } - fd = socket(ai->ai_family, ai->ai_socktype, ai->ai_protocol); + fd = socket(ai->ai_family, ai->ai_socktype | DEFAULT_SOCKET_FLAGS, ai->ai_protocol); if(fd != -1) { if(timeout) { if(setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, (char *) timeout, sizeof(struct timeval)) < 0) @@ -899,6 +915,7 @@ int connect_to_this_ip46(int protocol, int socktype, const char *host, uint32_t "Failed to set timeout on the socket to ip '%s' port '%s'", hostBfr, servBfr); } + sock_setcloexec(fd); errno = 0; if(connect(fd, ai->ai_addr, ai->ai_addrlen) < 0) { @@ -1263,11 +1280,6 @@ int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flags) { if (fd < 0) return fd; - if (flags & SOCK_NONBLOCK) { - newflags |= O_NONBLOCK; - flags &= ~SOCK_NONBLOCK; - } - #ifdef SOCK_CLOEXEC #ifdef O_CLOEXEC if (flags & SOCK_CLOEXEC) { @@ -1384,7 +1396,7 @@ int accept_socket(int fd, int flags, char *client_ip, size_t ipsize, char *clien struct sockaddr_storage sadr; socklen_t addrlen = sizeof(sadr); - int nfd = accept4(fd, (struct sockaddr *)&sadr, &addrlen, flags); + int nfd = accept4(fd, (struct sockaddr *)&sadr, &addrlen, flags | DEFAULT_SOCKET_FLAGS); if (likely(nfd >= 0)) { if (getnameinfo((struct sockaddr *)&sadr, addrlen, client_ip, (socklen_t)ipsize, client_port, (socklen_t)portsize, NI_NUMERICHOST | NI_NUMERICSERV) != 0) { @@ -1398,6 +1410,7 @@ int accept_socket(int fd, int flags, char *client_ip, size_t ipsize, char *clien if (!strcmp(client_ip, "127.0.0.1") || !strcmp(client_ip, "::1")) { strncpyz(client_ip, "localhost", ipsize); } + sock_setcloexec(nfd); #ifdef __FreeBSD__ if(((struct sockaddr *)&sadr)->sa_family == AF_LOCAL) @@ -1463,7 +1476,7 @@ int accept_socket(int fd, int flags, char *client_ip, size_t ipsize, char *clien inline POLLINFO *poll_add_fd(POLLJOB *p , int fd , int socktype - , WEB_CLIENT_ACL port_acl + , HTTP_ACL port_acl , uint32_t flags , const char *client_ip , const char *client_port @@ -1782,12 +1795,25 @@ static int poll_process_new_tcp_connection(POLLJOB *p, POLLINFO *pi, struct poll char client_port[NI_MAXSERV] = ""; char client_host[NI_MAXHOST] = ""; +#ifdef SOCK_NONBLOCK + int flags = SOCK_NONBLOCK; +#else + int flags = 0; +#endif + int nfd = accept_socket( - pf->fd,SOCK_NONBLOCK, + pf->fd, flags, client_ip, INET6_ADDRSTRLEN, client_port,NI_MAXSERV, client_host, NI_MAXHOST, p->access_list, p->allow_dns ); +#ifndef SOCK_NONBLOCK + if (nfd > 0) { + int flags = fcntl(nfd, F_GETFL); + (void)fcntl(nfd, F_SETFL, flags| O_NONBLOCK); + } +#endif + if (unlikely(nfd < 0)) { // accept failed diff --git a/src/libnetdata/socket/socket.h b/src/libnetdata/socket/socket.h new file mode 100644 index 000000000..d506f7aae --- /dev/null +++ b/src/libnetdata/socket/socket.h @@ -0,0 +1,207 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_SOCKET_H +#define NETDATA_SOCKET_H + +#include "../libnetdata.h" + +#ifndef MAX_LISTEN_FDS +#define MAX_LISTEN_FDS 50 +#endif + +typedef struct listen_sockets { + struct config *config; // the config file to use + const char *config_section; // the netdata configuration section to read settings from + const char *default_bind_to; // the default bind to configuration string + uint16_t default_port; // the default port to use + int backlog; // the default listen backlog to use + + size_t opened; // the number of sockets opened + size_t failed; // the number of sockets attempted to open, but failed + int fds[MAX_LISTEN_FDS]; // the open sockets + char *fds_names[MAX_LISTEN_FDS]; // descriptions for the open sockets + int fds_types[MAX_LISTEN_FDS]; // the socktype for the open sockets (SOCK_STREAM, SOCK_DGRAM) + int fds_families[MAX_LISTEN_FDS]; // the family of the open sockets (AF_UNIX, AF_INET, AF_INET6) + HTTP_ACL fds_acl_flags[MAX_LISTEN_FDS]; // the acl to apply to the open sockets (dashboard, badges, streaming, netdata.conf, management) +} LISTEN_SOCKETS; + +char *strdup_client_description(int family, const char *protocol, const char *ip, uint16_t port); + +int listen_sockets_setup(LISTEN_SOCKETS *sockets); +void listen_sockets_close(LISTEN_SOCKETS *sockets); + +void foreach_entry_in_connection_string(const char *destination, bool (*callback)(char *entry, void *data), void *data); +int connect_to_this_ip46(int protocol, int socktype, const char *host, uint32_t scope_id, const char *service, struct timeval *timeout); +int connect_to_this(const char *definition, int default_port, struct timeval *timeout); +int connect_to_one_of(const char *destination, int default_port, struct timeval *timeout, size_t *reconnects_counter, char *connected_to, size_t connected_to_size); +int connect_to_one_of_urls(const char *destination, int default_port, struct timeval *timeout, size_t *reconnects_counter, char *connected_to, size_t connected_to_size); + + +#ifdef ENABLE_HTTPS +ssize_t recv_timeout(NETDATA_SSL *ssl,int sockfd, void *buf, size_t len, int flags, int timeout); +ssize_t send_timeout(NETDATA_SSL *ssl,int sockfd, void *buf, size_t len, int flags, int timeout); +#else +ssize_t recv_timeout(int sockfd, void *buf, size_t len, int flags, int timeout); +ssize_t send_timeout(int sockfd, void *buf, size_t len, int flags, int timeout); +#endif + +bool fd_is_socket(int fd); +bool sock_has_output_error(int fd); + +int sock_setnonblock(int fd); +int sock_delnonblock(int fd); +int sock_setreuse(int fd, int reuse); +void sock_setcloexec(int fd); +int sock_setreuse_port(int fd, int reuse); +int sock_enlarge_in(int fd); +int sock_enlarge_out(int fd); + +int connection_allowed(int fd, char *client_ip, char *client_host, size_t hostsize, + SIMPLE_PATTERN *access_list, const char *patname, int allow_dns); +int accept_socket(int fd, int flags, char *client_ip, size_t ipsize, char *client_port, size_t portsize, + char *client_host, size_t hostsize, SIMPLE_PATTERN *access_list, int allow_dns); + +#ifndef HAVE_ACCEPT4 +int accept4(int sock, struct sockaddr *addr, socklen_t *addrlen, int flags); +#endif /* #ifndef HAVE_ACCEPT4 */ + +#ifdef SOCK_CLOEXEC +#define DEFAULT_SOCKET_FLAGS SOCK_CLOEXEC +#else +#define DEFAULT_SOCKET_FLAGS 0 +#endif + + +// ---------------------------------------------------------------------------- +// poll() based listener + +#define POLLINFO_FLAG_SERVER_SOCKET 0x00000001 +#define POLLINFO_FLAG_CLIENT_SOCKET 0x00000002 +#define POLLINFO_FLAG_DONT_CLOSE 0x00000004 + +typedef struct poll POLLJOB; + +typedef struct pollinfo { + POLLJOB *p; // the parent + size_t slot; // the slot id + + int fd; // the file descriptor + int socktype; // the client socket type + HTTP_ACL port_acl; // the access lists permitted on this web server port (it's -1 for client sockets) + char *client_ip; // Max INET6_ADDRSTRLEN bytes + char *client_port; // Max NI_MAXSERV bytes + char *client_host; // Max NI_MAXHOST bytes + + time_t connected_t; // the time the socket connected + time_t last_received_t; // the time the socket last received data + time_t last_sent_t; // the time the socket last sent data + + size_t recv_count; // the number of times the socket was ready for inbound traffic + size_t send_count; // the number of times the socket was ready for outbound traffic + + uint32_t flags; // internal flags + + // callbacks for this socket + void (*del_callback)(struct pollinfo *pi); + int (*rcv_callback)(struct pollinfo *pi, short int *events); + int (*snd_callback)(struct pollinfo *pi, short int *events); + + // the user data + void *data; + + // linking of free pollinfo structures + // for quickly finding the next available + // this is like a stack, it grows and shrinks + // (with gaps - lower empty slots are preferred) + struct pollinfo *next; +} POLLINFO; + +struct poll { + size_t slots; + size_t used; + size_t min; + size_t max; + + size_t limit; + + time_t complete_request_timeout; + time_t idle_timeout; + time_t checks_every; + + time_t timer_milliseconds; + void *timer_data; + + struct pollfd *fds; + struct pollinfo *inf; + struct pollinfo *first_free; + + SIMPLE_PATTERN *access_list; + int allow_dns; + + void *(*add_callback)(POLLINFO *pi, short int *events, void *data); + void (*del_callback)(POLLINFO *pi); + int (*rcv_callback)(POLLINFO *pi, short int *events); + int (*snd_callback)(POLLINFO *pi, short int *events); + void (*tmr_callback)(void *timer_data); +}; + +#define pollinfo_from_slot(p, slot) (&((p)->inf[(slot)])) + +int poll_default_snd_callback(POLLINFO *pi, short int *events); +int poll_default_rcv_callback(POLLINFO *pi, short int *events); +void poll_default_del_callback(POLLINFO *pi); +void *poll_default_add_callback(POLLINFO *pi, short int *events, void *data); + +POLLINFO *poll_add_fd(POLLJOB *p + , int fd + , int socktype + , HTTP_ACL port_acl + , uint32_t flags + , const char *client_ip + , const char *client_port + , const char *client_host + , void *(*add_callback)(POLLINFO *pi, short int *events, void *data) + , void (*del_callback)(POLLINFO *pi) + , int (*rcv_callback)(POLLINFO *pi, short int *events) + , int (*snd_callback)(POLLINFO *pi, short int *events) + , void *data +); +void poll_close_fd(POLLINFO *pi); + +void poll_events(LISTEN_SOCKETS *sockets + , void *(*add_callback)(POLLINFO *pi, short int *events, void *data) + , void (*del_callback)(POLLINFO *pi) + , int (*rcv_callback)(POLLINFO *pi, short int *events) + , int (*snd_callback)(POLLINFO *pi, short int *events) + , void (*tmr_callback)(void *timer_data) + , bool (*check_to_stop_callback)(void) + , SIMPLE_PATTERN *access_list + , int allow_dns + , void *data + , time_t tcp_request_timeout_seconds + , time_t tcp_idle_timeout_seconds + , time_t timer_milliseconds + , void *timer_data + , size_t max_tcp_sockets +); + +#ifndef INET6_ADDRSTRLEN +#define INET6_ADDRSTRLEN 46 +#endif + +typedef struct socket_peers { + struct { + char ip[INET6_ADDRSTRLEN]; + int port; + } local; + + struct { + char ip[INET6_ADDRSTRLEN]; + int port; + } peer; +} SOCKET_PEERS; + +SOCKET_PEERS socket_peers(int sock_fd); +bool ip_to_hostname(const char *ip, char *dst, size_t dst_len); + +#endif //NETDATA_SOCKET_H diff --git a/src/libnetdata/statistical/README.md b/src/libnetdata/statistical/README.md new file mode 100644 index 000000000..1d1d2afd4 --- /dev/null +++ b/src/libnetdata/statistical/README.md @@ -0,0 +1,12 @@ +<!-- +title: "Statistical functions" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/statistical/README.md +sidebar_label: "Statistical functions" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Statistical functions + +A library for easy and fast calculations of statistical measurements like average, median etc. diff --git a/libnetdata/statistical/statistical.c b/src/libnetdata/statistical/statistical.c index ef9fe4e56..ef9fe4e56 100644 --- a/libnetdata/statistical/statistical.c +++ b/src/libnetdata/statistical/statistical.c diff --git a/libnetdata/statistical/statistical.h b/src/libnetdata/statistical/statistical.h index f3ecfadb4..f3ecfadb4 100644 --- a/libnetdata/statistical/statistical.h +++ b/src/libnetdata/statistical/statistical.h diff --git a/src/libnetdata/storage-point.h b/src/libnetdata/storage-point.h new file mode 100644 index 000000000..53e7506fa --- /dev/null +++ b/src/libnetdata/storage-point.h @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_STORAGE_POINT_H +#define NETDATA_STORAGE_POINT_H + +#include "storage_number/storage_number.h" + +typedef struct storage_point { + NETDATA_DOUBLE min; // when count > 1, this is the minimum among them + NETDATA_DOUBLE max; // when count > 1, this is the maximum among them + NETDATA_DOUBLE sum; // the point sum - divided by count gives the average + + // end_time - start_time = point duration + time_t start_time_s; // the time the point starts + time_t end_time_s; // the time the point ends + + uint32_t count; // the number of original points aggregated + uint32_t anomaly_count; // the number of original points found anomalous + + SN_FLAGS flags; // flags stored with the point +} STORAGE_POINT; + +#define storage_point_unset(x) do { \ + (x).min = (x).max = (x).sum = NAN; \ + (x).count = 0; \ + (x).anomaly_count = 0; \ + (x).flags = SN_FLAG_NONE; \ + (x).start_time_s = 0; \ + (x).end_time_s = 0; \ + } while(0) + +#define storage_point_empty(x, start_s, end_s) do { \ + (x).min = (x).max = (x).sum = NAN; \ + (x).count = 1; \ + (x).anomaly_count = 0; \ + (x).flags = SN_FLAG_NONE; \ + (x).start_time_s = start_s; \ + (x).end_time_s = end_s; \ + } while(0) + +#define STORAGE_POINT_UNSET (STORAGE_POINT){ .min = NAN, .max = NAN, .sum = NAN, .count = 0, .anomaly_count = 0, .flags = SN_FLAG_NONE, .start_time_s = 0, .end_time_s = 0 } + +#define storage_point_is_unset(x) (!(x).count) +#define storage_point_is_gap(x) (!netdata_double_isnumber((x).sum)) +#define storage_point_is_zero(x) (!(x).count || (netdata_double_is_zero((x).min) && netdata_double_is_zero((x).max) && netdata_double_is_zero((x).sum) && (x).anomaly_count == 0)) + +#define storage_point_merge_to(dst, src) do { \ + if(storage_point_is_unset(dst)) \ + (dst) = (src); \ + \ + else if(!storage_point_is_unset(src) && \ + !storage_point_is_gap(src)) { \ + \ + if((src).start_time_s < (dst).start_time_s) \ + (dst).start_time_s = (src).start_time_s;\ + \ + if((src).end_time_s > (dst).end_time_s) \ + (dst).end_time_s = (src).end_time_s; \ + \ + if((src).min < (dst).min) \ + (dst).min = (src).min; \ + \ + if((src).max > (dst).max) \ + (dst).max = (src).max; \ + \ + (dst).sum += (src).sum; \ + \ + (dst).count += (src).count; \ + (dst).anomaly_count += (src).anomaly_count; \ + \ + (dst).flags |= (src).flags & SN_FLAG_RESET; \ + } \ +} while(0) + +#define storage_point_add_to(dst, src) do { \ + if(storage_point_is_unset(dst)) \ + (dst) = (src); \ + \ + else if(!storage_point_is_unset(src) && \ + !storage_point_is_gap(src)) { \ + \ + if((src).start_time_s < (dst).start_time_s) \ + (dst).start_time_s = (src).start_time_s;\ + \ + if((src).end_time_s > (dst).end_time_s) \ + (dst).end_time_s = (src).end_time_s; \ + \ + (dst).min += (src).min; \ + (dst).max += (src).max; \ + (dst).sum += (src).sum; \ + \ + (dst).count += (src).count; \ + (dst).anomaly_count += (src).anomaly_count; \ + \ + (dst).flags |= (src).flags & SN_FLAG_RESET; \ + } \ +} while(0) + +#define storage_point_make_positive(sp) do { \ + if(!storage_point_is_unset(sp) && \ + !storage_point_is_gap(sp)) { \ + \ + if(unlikely(signbit((sp).sum))) \ + (sp).sum = -(sp).sum; \ + \ + if(unlikely(signbit((sp).min))) \ + (sp).min = -(sp).min; \ + \ + if(unlikely(signbit((sp).max))) \ + (sp).max = -(sp).max; \ + \ + if(unlikely((sp).min > (sp).max)) { \ + NETDATA_DOUBLE t = (sp).min; \ + (sp).min = (sp).max; \ + (sp).max = t; \ + } \ + } \ +} while(0) + +#define storage_point_anomaly_rate(sp) \ + (NETDATA_DOUBLE)(storage_point_is_unset(sp) ? 0.0 : (NETDATA_DOUBLE)((sp).anomaly_count) * 100.0 / (NETDATA_DOUBLE)((sp).count)) + +#define storage_point_average_value(sp) \ + ((sp).count ? (sp).sum / (NETDATA_DOUBLE)((sp).count) : 0.0) + + +#endif //NETDATA_STORAGE_POINT_H diff --git a/src/libnetdata/storage_number/README.md b/src/libnetdata/storage_number/README.md new file mode 100644 index 000000000..f0096fb9b --- /dev/null +++ b/src/libnetdata/storage_number/README.md @@ -0,0 +1,21 @@ +<!-- +title: "Netdata storage number" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/storage_number/README.md +sidebar_label: "Storage number" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Netdata storage number + +Although `netdata` does all its calculations using `long double`, it stores all values using +a **custom-made 32-bit number**. + +This custom-made number can store in 29 bits values from `-167772150000000.0` to `167772150000000.0` +with a precision of 0.00001 (yes, it's a floating point number, meaning that higher integer values +have less decimal precision) and 3 bits for flags. + +This provides an extremely optimized memory footprint with just 0.0001% max accuracy loss. + + diff --git a/libnetdata/storage_number/storage_number.c b/src/libnetdata/storage_number/storage_number.c index 6468951bd..89a67a532 100644 --- a/libnetdata/storage_number/storage_number.c +++ b/src/libnetdata/storage_number/storage_number.c @@ -147,13 +147,7 @@ storage_number pack_storage_number(NETDATA_DOUBLE value, SN_FLAGS flags) { r += (m << 27); // the divider m } -#ifdef STORAGE_WITH_MATH - // without this there are rounding problems - // example: 0.9 becomes 0.89 r += lrint((double) n); -#else - r += (storage_number)n; -#endif return r; } @@ -174,60 +168,3 @@ __attribute__((constructor)) void initialize_lut(void) { unpack_storage_number_lut10x[3 * 8 + i] = pow(100, i); // exp = 1 } } - -/* -int print_netdata_double(char *str, NETDATA_DOUBLE value) -{ - char *wstr = str; - - int sign = (value < 0) ? 1 : 0; - if(sign) value = -value; - -#ifdef STORAGE_WITH_MATH - // without llrintl() there are rounding problems - // for example 0.9 becomes 0.89 - unsigned long long uvalue = (unsigned long long int) llrintl(value * (NETDATA_DOUBLE)100000); -#else - unsigned long long uvalue = value * (NETDATA_DOUBLE)100000; -#endif - - wstr = print_number_llu_r_smart(str, uvalue); - - // make sure we have 6 bytes at least - while((wstr - str) < 6) *wstr++ = '0'; - - // put the sign back - if(sign) *wstr++ = '-'; - - // reverse it - char *begin = str, *end = --wstr, aux; - while (end > begin) aux = *end, *end-- = *begin, *begin++ = aux; - // wstr--; - // strreverse(str, wstr); - - // remove trailing zeros - int decimal = 5; - while(decimal > 0 && *wstr == '0') { - *wstr-- = '\0'; - decimal--; - } - - // terminate it, one position to the right - // to let space for a dot - wstr[2] = '\0'; - - // make space for the dot - int i; - for(i = 0; i < decimal ;i++) { - wstr[1] = wstr[0]; - wstr--; - } - - // put the dot - if(wstr[2] == '\0') { wstr[1] = '\0'; decimal--; } - else wstr[1] = '.'; - - // return the buffer length - return (int) ((wstr - str) + 2 + decimal ); -} -*/ diff --git a/libnetdata/storage_number/storage_number.h b/src/libnetdata/storage_number/storage_number.h index 82c870d69..9a95203cd 100644 --- a/libnetdata/storage_number/storage_number.h +++ b/src/libnetdata/storage_number/storage_number.h @@ -116,10 +116,10 @@ storage_number pack_storage_number(NETDATA_DOUBLE value, SN_FLAGS flags) __attri static inline NETDATA_DOUBLE unpack_storage_number(storage_number value) __attribute__((const)); // sign div/mul <--- multiplier / divider ---> 10/100 RESET EXISTS VALUE -#define STORAGE_NUMBER_POSITIVE_MAX_RAW (storage_number)( (0 << 31) | (1 << 30) | (1 << 29) | (1 << 28) | (1 << 27) | (1 << 26) | (0 << 25) | (1 << 24) | 0x00ffffff ) -#define STORAGE_NUMBER_POSITIVE_MIN_RAW (storage_number)( (0 << 31) | (0 << 30) | (1 << 29) | (1 << 28) | (1 << 27) | (0 << 26) | (0 << 25) | (1 << 24) | 0x00000001 ) -#define STORAGE_NUMBER_NEGATIVE_MAX_RAW (storage_number)( (1 << 31) | (0 << 30) | (1 << 29) | (1 << 28) | (1 << 27) | (0 << 26) | (0 << 25) | (1 << 24) | 0x00000001 ) -#define STORAGE_NUMBER_NEGATIVE_MIN_RAW (storage_number)( (1 << 31) | (1 << 30) | (1 << 29) | (1 << 28) | (1 << 27) | (1 << 26) | (0 << 25) | (1 << 24) | 0x00ffffff ) +#define STORAGE_NUMBER_POSITIVE_MAX_RAW (storage_number)( (0U << 31) | (1U << 30) | (1U << 29) | (1U << 28) | (1U << 27) | (1U << 26) | (0U << 25) | (1U << 24) | 0x00ffffff ) +#define STORAGE_NUMBER_POSITIVE_MIN_RAW (storage_number)( (0U << 31) | (0U << 30) | (1U << 29) | (1U << 28) | (1U << 27) | (0U << 26) | (0U << 25) | (1U << 24) | 0x00000001 ) +#define STORAGE_NUMBER_NEGATIVE_MAX_RAW (storage_number)( (1U << 31) | (0U << 30) | (1U << 29) | (1U << 28) | (1U << 27) | (0U << 26) | (0U << 25) | (1U << 24) | 0x00000001 ) +#define STORAGE_NUMBER_NEGATIVE_MIN_RAW (storage_number)( (1U << 31) | (1U << 30) | (1U << 29) | (1U << 28) | (1U << 27) | (1U << 26) | (0U << 25) | (1U << 24) | 0x00ffffff ) // accepted accuracy loss #define ACCURACY_LOSS_ACCEPTED_PERCENT 0.0001 @@ -155,10 +155,10 @@ static inline NETDATA_DOUBLE unpack_storage_number(storage_number value) { // bit 25 SN_FLAG_NOT_ANOMALOUS // bit 30, 29, 28 = (multiplier or divider) 0-7 (8 total) - int mul = (int)((value & ((1<<29)|(1<<28)|(1<<27))) >> 27); + int mul = (int)((value & ((1U<<29)|(1U<<28)|(1U<<27))) >> 27); // bit 24 to bit 1 = the value, so remove all other bits - value ^= value & ((1<<31)|(1<<30)|(1<<29)|(1<<28)|(1<<27)|(1<<26)|(1<<25)|(1<<24)); + value ^= value & ((1U <<31)|(1U <<30)|(1U <<29)|(1U <<28)|(1U <<27)|(1U <<26)|(1U <<25)|(1U<<24)); NETDATA_DOUBLE n = value; diff --git a/libnetdata/storage_number/tests/test_storage_number.c b/src/libnetdata/storage_number/tests/test_storage_number.c index 19309e5c2..19309e5c2 100644 --- a/libnetdata/storage_number/tests/test_storage_number.c +++ b/src/libnetdata/storage_number/tests/test_storage_number.c diff --git a/src/libnetdata/string/README.md b/src/libnetdata/string/README.md new file mode 100644 index 000000000..54c905946 --- /dev/null +++ b/src/libnetdata/string/README.md @@ -0,0 +1,25 @@ +<!-- +title: "String" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/string/README.md +sidebar_label: "String" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# STRING + +STRING provides a way to allocate and free text strings, while de-duplicating them. + +It can be used similarly to libc string functions: + + - `strdup()` and `strdupz()` become `string_strdupz()`. + - `strlen()` becomes `string_strlen()` (and it does not walkthrough the bytes of the string). + - `free()` and `freez()` become `string_freez()`. + +There is also a special `string_dup()` function that increases the reference counter of a STRING, avoiding the +index lookup to find it. + +Once there is a `STRING *`, the actual `const char *` can be accessed with `string2str()`. + +All STRING should be constant. Changing the contents of a `const char *` that has been acquired by `string2str()` should never happen. diff --git a/libnetdata/string/string.c b/src/libnetdata/string/string.c index e1c8352a5..0b4a6470d 100644 --- a/libnetdata/string/string.c +++ b/src/libnetdata/string/string.c @@ -88,8 +88,8 @@ void string_statistics(size_t *inserts, size_t *deletes, size_t *searches, size_ } } -#define string_entry_acquire(se) __atomic_add_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST); -#define string_entry_release(se) __atomic_sub_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST); +#define string_entry_acquire(se) __atomic_add_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST) +#define string_entry_release(se) __atomic_sub_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST) static inline bool string_entry_check_and_acquire(STRING *se) { #ifdef NETDATA_INTERNAL_CHECKS @@ -307,6 +307,25 @@ STRING *string_strdupz(const char *str) { return string; } +STRING *string_strndupz(const char *str, size_t len) { + if(unlikely(!str || !*str || !len)) return NULL; + +#ifdef NETDATA_INTERNAL_CHECKS + uint8_t partition = string_partition_str(str); +#endif + + char buf[len + 1]; + memcpy(buf, str, len); + buf[len] = '\0'; + + STRING *string = string_index_search(buf, len + 1); + while(!string) + string = string_index_insert(buf, len + 1); + + string_stats_atomic_increment(partition, active_references); + return string; +} + void string_freez(STRING *string) { if(unlikely(!string)) return; diff --git a/libnetdata/string/string.h b/src/libnetdata/string/string.h index ba0e3876b..f2ff9666c 100644 --- a/libnetdata/string/string.h +++ b/src/libnetdata/string/string.h @@ -8,7 +8,10 @@ // STRING implementation typedef struct netdata_string STRING; + STRING *string_strdupz(const char *str); +STRING *string_strndupz(const char *str, size_t len); + STRING *string_dup(STRING *string); void string_freez(STRING *string); size_t string_strlen(STRING *string); diff --git a/libnetdata/string/utf8.h b/src/libnetdata/string/utf8.h index 3e6c8c288..3e6c8c288 100644 --- a/libnetdata/string/utf8.h +++ b/src/libnetdata/string/utf8.h diff --git a/src/libnetdata/template-enum.h b/src/libnetdata/template-enum.h new file mode 100644 index 000000000..393a6a945 --- /dev/null +++ b/src/libnetdata/template-enum.h @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_TEMPLATE_ENUM_H +#define NETDATA_TEMPLATE_ENUM_H + +#define ENUM_STR_MAP_DEFINE(type) \ + static struct { \ + type id; \ + const char *name; \ + } type ## _names[] + +#define ENUM_STR_DEFINE_FUNCTIONS_EXTERN(type) \ + type type ## _2id(const char *str); \ + const char *type##_2str(type id); + +#define ENUM_STR_DEFINE_FUNCTIONS(type, def, def_str) \ + type type##_2id(const char *str) \ + { \ + if (!str || !*str) \ + return def; \ + \ + for (size_t i = 0; type ## _names[i].name; i++) { \ + if (strcmp(type ## _names[i].name, str) == 0) \ + return type ## _names[i].id; \ + } \ + \ + return def; \ + } \ + \ + const char *type##_2str(type id) \ + { \ + for (size_t i = 0; type ## _names[i].name; i++) { \ + if (id == type ## _names[i].id) \ + return type ## _names[i].name; \ + } \ + \ + return def_str; \ + } + +#endif //NETDATA_TEMPLATE_ENUM_H diff --git a/libnetdata/tests/test_str2ld.c b/src/libnetdata/tests/test_str2ld.c index 8b97a70f8..8b97a70f8 100644 --- a/libnetdata/tests/test_str2ld.c +++ b/src/libnetdata/tests/test_str2ld.c diff --git a/src/libnetdata/threads/README.md b/src/libnetdata/threads/README.md new file mode 100644 index 000000000..906f47952 --- /dev/null +++ b/src/libnetdata/threads/README.md @@ -0,0 +1,12 @@ +<!-- +title: Threads +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/threads/README.md +sidebar_label: "Threads" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# Threads + +Netdata uses a custom threads library diff --git a/libnetdata/threads/threads.c b/src/libnetdata/threads/threads.c index c14f9a7eb..94761671a 100644 --- a/libnetdata/threads/threads.c +++ b/src/libnetdata/threads/threads.c @@ -281,8 +281,9 @@ static void *netdata_thread_init(void *ptr) { } void *ret = NULL; - pthread_cleanup_push(thread_cleanup, ptr); - ret = netdata_thread->start_routine(netdata_thread->arg); + pthread_cleanup_push(thread_cleanup, ptr) { + ret = netdata_thread->start_routine(netdata_thread->arg); + } pthread_cleanup_pop(1); return ret; diff --git a/libnetdata/threads/threads.h b/src/libnetdata/threads/threads.h index 97c3c8036..4f1d06f00 100644 --- a/libnetdata/threads/threads.h +++ b/src/libnetdata/threads/threads.h @@ -59,6 +59,10 @@ struct netdata_static_thread { const char *netdata_thread_tag(void); int netdata_thread_tag_exists(void); +#define THREAD_TAG_STREAM_RECEIVER "RCVR" +#define THREAD_TAG_STREAM_SENDER "SNDR" + + size_t netdata_threads_init(void); void netdata_threads_init_after_fork(size_t stacksize); void netdata_threads_init_for_external_plugins(size_t stacksize); diff --git a/src/libnetdata/url/README.md b/src/libnetdata/url/README.md new file mode 100644 index 000000000..01a2dddb6 --- /dev/null +++ b/src/libnetdata/url/README.md @@ -0,0 +1,14 @@ +<!-- +title: "URL" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/url/README.md +sidebar_label: "URL" +learn_status: "Published" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# URL + +The URL library contains common functions useful for URLs, like conversion from/to hex, +URL encode/decode and query string parsing. + diff --git a/libnetdata/url/url.c b/src/libnetdata/url/url.c index 39366cbe8..e84971714 100644 --- a/libnetdata/url/url.c +++ b/src/libnetdata/url/url.c @@ -236,7 +236,8 @@ fail_cleanup: return NULL; } -inline bool url_is_request_complete(char *begin, char *end, size_t length, char **post_payload, size_t *post_payload_size) { +inline bool +url_is_request_complete_and_extract_payload(const char *begin, const char *end, size_t length, BUFFER **post_payload) { if (begin == end || length < 4) return false; @@ -244,29 +245,42 @@ inline bool url_is_request_complete(char *begin, char *end, size_t length, char return strstr(end - 4, "\r\n\r\n"); } else if(unlikely(strncmp(begin, "POST ", 5) == 0 || strncmp(begin, "PUT ", 4) == 0)) { - char *cl = strstr(begin, "Content-Length: "); + const char *cl = strcasestr(begin, "Content-Length: "); if(!cl) return false; cl = &cl[16]; size_t content_length = str2ul(cl); - char *payload = strstr(cl, "\r\n\r\n"); + const char *payload = strstr(cl, "\r\n\r\n"); if(!payload) return false; payload += 4; size_t payload_length = length - (payload - begin); if(payload_length == content_length) { - if(post_payload && post_payload_size) { - if (*post_payload) - freez(*post_payload); + if(!*post_payload) + *post_payload = buffer_create(payload_length + 1, NULL); - *post_payload = mallocz(payload_length + 1); - memcpy(*post_payload, payload, payload_length); - (*post_payload)[payload_length] = '\0'; + buffer_contents_replace(*post_payload, payload, payload_length); - *post_payload_size = payload_length; + // parse the content type + const char *ct = strcasestr(begin, "Content-Type: "); + if(ct) { + ct = &ct[14]; + while (*ct && isspace(*ct)) ct++; + const char *space = ct; + while (*space && !isspace(*space) && *space != ';') space++; + size_t ct_len = space - ct; + + char ct_copy[ct_len + 1]; + memcpy(ct_copy, ct, ct_len); + ct_copy[ct_len] = '\0'; + + (*post_payload)->content_type = content_type_string2id(ct_copy); } + else + (*post_payload)->content_type = CT_TEXT_PLAIN; + return true; } diff --git a/libnetdata/url/url.h b/src/libnetdata/url/url.h index 9db018f0b..f7a67dd5c 100644 --- a/libnetdata/url/url.h +++ b/src/libnetdata/url/url.h @@ -25,7 +25,7 @@ char *url_decode(char *str); char *url_decode_r(char *to, const char *url, size_t size); -bool url_is_request_complete(char *begin, char *end, size_t length, char **post_payload, size_t *post_payload_length); +bool url_is_request_complete_and_extract_payload(const char *begin, const char *end, size_t length, BUFFER **post_payload); char *url_find_protocol(char *s); #endif /* NETDATA_URL_H */ diff --git a/src/libnetdata/uuid/README.md b/src/libnetdata/uuid/README.md new file mode 100644 index 000000000..a0da380a9 --- /dev/null +++ b/src/libnetdata/uuid/README.md @@ -0,0 +1,13 @@ +<!-- +title: "UUID" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/uuid/README.md +sidebar_label: "UUID" +learn_topic_type: "Tasks" +learn_rel_path: "Developers/libnetdata" +--> + +# UUID + +Netdata uses libuuid for managing UUIDs. + +In this folder are a few custom helpers.
\ No newline at end of file diff --git a/libnetdata/uuid/uuid.c b/src/libnetdata/uuid/uuid.c index 55b66db9b..f062d0bc5 100644 --- a/libnetdata/uuid/uuid.c +++ b/src/libnetdata/uuid/uuid.c @@ -2,6 +2,26 @@ #include "../libnetdata.h" + +UUID UUID_generate_from_hash(const void *payload, size_t payload_len) { + assert(sizeof(XXH128_hash_t) == sizeof(UUID)); + + UUID uuid; + XXH128_hash_t *xxh3_128 = (XXH128_hash_t *)&uuid; + + // Hash the payload using XXH128 + // Assume xxh128_hash_function is your function to generate XXH128 hash + *xxh3_128 = XXH3_128bits(payload, payload_len); + + // Set the UUID version (here, setting it to 4) + uuid.uuid[6] = (uuid.uuid[6] & 0x0F) | 0x40; // Version 4 + + // Set the UUID variant (standard variant for UUID) + uuid.uuid[8] = (uuid.uuid[8] & 0x3F) | 0x80; // Variant is 10xxxxxx + + return uuid; +} + void uuid_unparse_lower_compact(const uuid_t uuid, char *out) { static const char *hex_chars = "0123456789abcdef"; for (int i = 0; i < 16; i++) { diff --git a/libnetdata/uuid/uuid.h b/src/libnetdata/uuid/uuid.h index 567646846..6d5f024f8 100644 --- a/libnetdata/uuid/uuid.h +++ b/src/libnetdata/uuid/uuid.h @@ -10,6 +10,24 @@ UUID_DEFINE(health_alert_transition_msgid, 0x9c, 0xe0, 0xcb, 0x58, 0xab, 0x8b, 0 // this is also defined in alarm-notify.sh.in UUID_DEFINE(health_alert_notification_msgid, 0x6d, 0xb0, 0x01, 0x8e, 0x83, 0xe3, 0x43, 0x20, 0xae, 0x2a, 0x65, 0x9d, 0x78, 0x01, 0x9f, 0xb7); +typedef struct { + union { + uuid_t uuid; + struct { + uint64_t hig64; + uint64_t low64; + } parts; + }; +} UUID; +UUID UUID_generate_from_hash(const void *payload, size_t payload_len); + +#define UUIDeq(a, b) ((a).parts.hig64 == (b).parts.hig64 && (a).parts.low64 == (b).parts.low64) + +static inline UUID uuid2UUID(uuid_t uu1) { + UUID *ret = (UUID *)uu1; + return *ret; +} + #define UUID_COMPACT_STR_LEN 33 void uuid_unparse_lower_compact(const uuid_t uuid, char *out); int uuid_parse_compact(const char *in, uuid_t uuid); diff --git a/src/libnetdata/worker_utilization/README.md b/src/libnetdata/worker_utilization/README.md new file mode 100644 index 000000000..1a354376c --- /dev/null +++ b/src/libnetdata/worker_utilization/README.md @@ -0,0 +1,94 @@ +<!-- +title: "Worker Utilization" +custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/worker_utilization/README.md +sidebar_label: "Worker Utilization" +learn_status: "Published" +learn_topic_type: "References" +learn_rel_path: "Developers/libnetdata" +--> + +# Worker Utilization + +This library is to be used when there are 1 or more worker threads accepting requests +of some kind and servicing them. The goal is to provide a very simple way to monitor +worker threads utilization, as a percentage of the time they are busy and the amount +of requests served. + +## Design goals + +1. Minimal, if any, impact on the performance of the workers +2. Easy to be integrated into any kind of worker +3. No state of any kind at the worker side + +## How to use + +When a working thread starts, call: + +```c +void worker_register(const char *name); +``` + +This will create the necessary structures for the library to work. +No need to keep a pointer to them. They are allocated as `__thread` variables. + +Then job types need to be defined. Job types are anything a worker does that can be +counted and their execution time needs to be reported. The library is fast enough to +be integrated even on workers that perform hundreds of thousands of actions per second. + +Job types are defined like this: + +```c +void worker_register_job_type(size_t id, const char *name); +``` + +`id` is a number starting from zero. The library is compiled with a fixed size of 50 +ids (0 to 49). More can be allocated by setting `WORKER_UTILIZATION_MAX_JOB_TYPES` in +`worker_utilization.h`. `name` can be any string up to 22 characters. This can be +changed by setting `WORKER_UTILIZATION_MAX_JOB_NAME_LENGTH` in `worker_utilization.h`. + +Each thread that calls `worker_register(name)` will allocate about 3kB for maintaining +the information required. + +When the thread stops, call: + +```c +void worker_unregister(void); +``` + +Again, no parameters, or return values. + +> IMPORTANT: cancellable threads need to add a call to `worker_unregister()` to the +> `pop` function that cleans up the thread. Failure to do so, will result in about +> 3kB of memory leak for every thread that is stopped. + +When you are about to do some work in the working thread, call: + +```c +void worker_is_busy(size_t id); +``` + +When you finish doing the job, call: + +```c +void worker_is_idle(void); +``` + +Calls to `worker_is_busy(id)` can be made one after another (without calling +`worker_is_idle()` between them) to switch jobs without losing any time between +them and eliminating one of the 2 clock calls involved. + +## Implementation details + +Totally lockless, extremely fast, it should not introduce any kind of problems to the +workers. Every time `worker_is_busy(id)` or `worker_is_idle()` are called, a call to +`now_realtime_usec()` is done and a couple of variables are updated. That's it! + +The worker does not need to update the variables regularly. Based on the last status +of the worker, the statistics collector of netdata will calculate if the thread is +busy or idle all the time or part of the time. Works well for both thousands of jobs +per second and unlimited working time (being totally busy with a single request for +ages). + +The statistics collector is called by the global statistics thread of netdata. So, +even if the workers are extremely busy with their jobs, netdata will be able to know +how busy they are. diff --git a/libnetdata/worker_utilization/worker_utilization.c b/src/libnetdata/worker_utilization/worker_utilization.c index f39cea8a0..f39cea8a0 100644 --- a/libnetdata/worker_utilization/worker_utilization.c +++ b/src/libnetdata/worker_utilization/worker_utilization.c diff --git a/libnetdata/worker_utilization/worker_utilization.h b/src/libnetdata/worker_utilization/worker_utilization.h index e2f46c5a6..e2f46c5a6 100644 --- a/libnetdata/worker_utilization/worker_utilization.h +++ b/src/libnetdata/worker_utilization/worker_utilization.h diff --git a/libnetdata/xxhash.h b/src/libnetdata/xxhash.h index 5e2c0ed24..5e2c0ed24 100644 --- a/libnetdata/xxhash.h +++ b/src/libnetdata/xxhash.h |