From d079b656b4719739b2247dcd9d46e9bec793095a Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 6 Feb 2023 17:11:34 +0100 Subject: Merging upstream version 1.38.0. Signed-off-by: Daniel Baumann --- libnetdata/Makefile.am | 3 +- libnetdata/README.md | 4 + libnetdata/adaptive_resortable_list/README.md | 4 + libnetdata/aral/Makefile.am | 8 + libnetdata/aral/README.md | 173 ++++ libnetdata/aral/aral.c | 1081 ++++++++++++++++++++ libnetdata/aral/aral.h | 69 ++ libnetdata/arrayalloc/Makefile.am | 8 - libnetdata/arrayalloc/README.md | 7 - libnetdata/arrayalloc/arrayalloc.c | 489 --------- libnetdata/arrayalloc/arrayalloc.h | 48 - libnetdata/avl/README.md | 4 + libnetdata/buffer/README.md | 4 + libnetdata/buffer/buffer.c | 60 +- libnetdata/buffer/buffer.h | 3 +- libnetdata/circular_buffer/README.md | 6 +- libnetdata/circular_buffer/circular_buffer.c | 30 +- libnetdata/circular_buffer/circular_buffer.h | 3 +- libnetdata/clocks/clocks.c | 52 +- libnetdata/clocks/clocks.h | 3 +- libnetdata/completion/completion.c | 30 + libnetdata/completion/completion.h | 5 + libnetdata/config/README.md | 4 + libnetdata/dictionary/README.md | 4 + libnetdata/dictionary/dictionary.c | 130 ++- libnetdata/dictionary/dictionary.h | 13 +- libnetdata/ebpf/README.md | 8 + libnetdata/ebpf/ebpf.c | 27 +- libnetdata/ebpf/ebpf.h | 2 +- libnetdata/eval/eval.c | 4 +- libnetdata/json/README.md | 4 + libnetdata/json/json.c | 2 +- libnetdata/july/Makefile.am | 8 + libnetdata/july/README.md | 14 + libnetdata/july/july.c | 453 ++++++++ libnetdata/july/july.h | 40 + libnetdata/libnetdata.c | 215 +++- libnetdata/libnetdata.h | 185 +++- libnetdata/locks/README.md | 5 + libnetdata/locks/locks.c | 441 ++------ libnetdata/locks/locks.h | 31 +- libnetdata/log/README.md | 10 + libnetdata/log/log.c | 110 +- libnetdata/log/log.h | 19 +- libnetdata/onewayalloc/README.md | 4 + libnetdata/onewayalloc/onewayalloc.c | 13 + libnetdata/onewayalloc/onewayalloc.h | 2 + libnetdata/os.c | 128 ++- libnetdata/os.h | 6 +- libnetdata/popen/README.md | 10 + libnetdata/popen/popen.c | 18 +- libnetdata/procfile/README.md | 6 +- libnetdata/procfile/procfile.c | 9 +- libnetdata/procfile/procfile.h | 5 +- libnetdata/required_dummies.h | 1 + libnetdata/simple_pattern/README.md | 8 +- libnetdata/socket/security.c | 2 +- libnetdata/socket/socket.c | 37 +- libnetdata/socket/socket.h | 25 +- libnetdata/statistical/README.md | 7 + libnetdata/storage_number/README.md | 4 + libnetdata/string/README.md | 7 +- libnetdata/string/string.c | 35 +- libnetdata/threads/README.md | 7 + libnetdata/threads/threads.c | 39 +- libnetdata/url/README.md | 9 + libnetdata/worker_utilization/worker_utilization.c | 25 +- libnetdata/worker_utilization/worker_utilization.h | 1 + 68 files changed, 3017 insertions(+), 1214 deletions(-) create mode 100644 libnetdata/aral/Makefile.am create mode 100644 libnetdata/aral/README.md create mode 100644 libnetdata/aral/aral.c create mode 100644 libnetdata/aral/aral.h delete mode 100644 libnetdata/arrayalloc/Makefile.am delete mode 100644 libnetdata/arrayalloc/README.md delete mode 100644 libnetdata/arrayalloc/arrayalloc.c delete mode 100644 libnetdata/arrayalloc/arrayalloc.h create mode 100644 libnetdata/july/Makefile.am create mode 100644 libnetdata/july/README.md create mode 100644 libnetdata/july/july.c create mode 100644 libnetdata/july/july.h (limited to 'libnetdata') diff --git a/libnetdata/Makefile.am b/libnetdata/Makefile.am index 1208d16c2..b81d620ba 100644 --- a/libnetdata/Makefile.am +++ b/libnetdata/Makefile.am @@ -5,7 +5,7 @@ MAINTAINERCLEANFILES = $(srcdir)/Makefile.in SUBDIRS = \ adaptive_resortable_list \ - arrayalloc \ + aral \ avl \ buffer \ clocks \ @@ -15,6 +15,7 @@ SUBDIRS = \ ebpf \ eval \ json \ + july \ health \ locks \ log \ diff --git a/libnetdata/README.md b/libnetdata/README.md index fe0690d68..1424faf6c 100644 --- a/libnetdata/README.md +++ b/libnetdata/README.md @@ -1,6 +1,10 @@ # libnetdata diff --git a/libnetdata/adaptive_resortable_list/README.md b/libnetdata/adaptive_resortable_list/README.md index 9eb942bc8..957578487 100644 --- a/libnetdata/adaptive_resortable_list/README.md +++ b/libnetdata/adaptive_resortable_list/README.md @@ -1,6 +1,10 @@ # Adaptive Re-sortable List (ARL) diff --git a/libnetdata/aral/Makefile.am b/libnetdata/aral/Makefile.am new file mode 100644 index 000000000..161784b8f --- /dev/null +++ b/libnetdata/aral/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/aral/README.md b/libnetdata/aral/README.md new file mode 100644 index 000000000..3b0f5bbd6 --- /dev/null +++ b/libnetdata/aral/README.md @@ -0,0 +1,173 @@ + + +# Array Allocator + +Come on! Array allocators are embedded in libc! Why do we need such a thing in Netdata? + +Well, we have a couple of problems to solve: + +1. **Fragmentation** - It is important for Netdata to keeps its overall memory footprint as low as possible. libc does an amazing job when the same thread allocates and frees some memory. But it simply cannot do better without knowing the specifics of the application when memory is allocated and freed randomly between threads. +2. **Speed** - Especially when allocations and de-allocations happen across threads, the speed penalty is tremendous. + +In Netdata we have a few moments that are very tough. Imagine collecting 1 million metrics per second. You have a buffer for each metric and put append new points there. This works beautifully, of course! But then, when the buffers get full, imagine the situation. You suddenly need 1 million buffers, at once! + +To solve this problem we first spread out the buffers. So, the first time each metric asks for a buffer, it gets a smaller one. We added logic there to spread them as evenly as possible across time. Solved? Not exactly! + +We have 3 tiers for each metric. For the metrics of tier 0 (per second resolution) we have a max buffer for 1024 points and every new metrics gets a random size between 3 points and 1024. So they are distributed across time. For 1 million metrics, we have about 1000 buffers beings created every second. + +But at some point, the end of the minute will come, and suddenly all the metrics will need a new buffer for tier 1 (per minute). Oops! We will spread tier 1 buffers across time too, but the first minute is a tough one. We really need 1 million buffers instantly. + +And if that minute happens to also be the beginning of an hour... tier 2 (per hour) kicks in. For that instant we are going to need 2 million buffers instantly. + +The problem becomes even bigger when we collect 2, or even 10 million metrics... + +So solve it, Netdata uses a special implementation of an array allocator that is tightly integrated with the structures we need. + +## Features + +1. Malloc, or MMAP modes. File based MMAP is also supported to put the data in file backed up shared memory. +2. Fully asynchronous operations. There are just a couple of points where spin-locks protect a few counters and pointers. +3. Optional defragmenter, that once enabled it will make free operation slower while trying to maintain a sorted list of fragments to offer first during allocations. The defragmenter can be enabled / disabled at run time. The defragmenter can hurt performance on application with intense turn-around of allocation, like Netdata dbengine caches. So, it is disabled by default. +4. Without the defragmenter enabled, ARAL still tries to keep pages full, but the depth of the search is limited to 3 pages (so, a page with a free slot will either become 1st, 2nd, or 3rd). At the same time, during allocations, ARAL will evaluate the first 2 pages to find the one that is more full than the other, to use it for the new allocation. + +## How it works + +Allocations are organized in pages. Pages have a minimum size (a system page, usually 4KB) and a maximum defined by for each different kind of object. + +Initially every page is free. When an allocation request is made, the free space is split, and the first element is reserved. Free space is now considered there rest. + +This continuous until the page gets full, where a new page is allocated and the process is repeated. + +Each allocation returned has a pointer appended to it. The pointer points to the page the allocation belongs to. + +When a pointer is freed, the page it belongs is identified, its space is marked free, and it is prepended in a single linked list that resides in the page itself. So, each page has its own list of free slots to use. + +Pages are then on another linked list. This is a double linked list and at its beginning has the pages with free space and at the end the pages that are full. + +When the defragmenter is enabled the pages double linked list is also sorted, like this: the fewer the free slots on a page, the earlier in the linked list the page will be, except if it does not have any free slot, in which case it will be at the end. So, the defragmenter tries to have pages full. + +When a page is entirerly free, it is given back to the system immediately. There is no caching of free pages. + + +Parallelism is achieved like this: + +When some threads are waiting for a page to be allocated, free operations are allowed. If a free operation happens before a new page is allocated, any waiting thread will get the slot that is freed on another page. + +Free operations happen in parallel, even for the same page. There is a spin-lock on each page to protect the base pointer of the page's free slots single linked list. But, this is instant. All preparative work happens lockless, then to add the free slot to the page, the page spinlock is acquired, the free slot is prepended to the linked list on the page, the spinlock is released. Such free operations on different pages are totally parallel. + +Once the free operation on a page has finished, the pages double linked list spinlock is acquired to put the page first on that linked list. If the defragmenter is enabled, the spinlock is retained for a little longer, to find the exact position of the page in the linked list. + +During allocations, the reverse order is used. First get the pages double linked list spinlock, get the first page and decrement its free slots counter, then release the spinlock. If the first page does not have any free slots, a page allocation is spawn, without any locks acquired. All threads are spinning waiting for a page with free slots, either from the newly allocated one or from a free operation that may happen in parallel. + +Once a page is acquired, each thread locks its own page to get the first free slot and releases the lock immediately. This is guaranteed to succeed, because when the page was given to that thread its free slots counter was decremented. So, there is a free slot for every thread that got that page. All preparative work to return a pointer to the caller is done lock free. Allocations on different pages are done in parallel, without any intervention between them. + + +## What to expect + +Systems not designed for parallelism achieve their top performance single threaded. The single threaded speed is the baseline. Adding more threads makes them slower. + +The baseline for ARAL is the following, the included stress test when running single threaded: + +``` +Running stress test of 1 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:04:50: netdata INFO : TH[0] : set name of thread 1314983 to TH[0] +ARAL executes 12.27 M malloc and 12.26 M free operations/s +ARAL executes 12.29 M malloc and 12.29 M free operations/s +ARAL executes 12.30 M malloc and 12.30 M free operations/s +ARAL executes 12.30 M malloc and 12.29 M free operations/s +ARAL executes 12.29 M malloc and 12.29 M free operations/s +Waiting the threads to finish... +2023-01-29 17:04:55: netdata INFO : MAIN : ARAL: did 61487356 malloc, 61487356 free, using 1 threads, in 5003808 usecs +``` + +The same test with 2 threads, both threads on the same ARAL of course. As you see performance improved: + +``` +Running stress test of 2 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:05:25: netdata INFO : TH[0] : set name of thread 1315537 to TH[0] +2023-01-29 17:05:25: netdata INFO : TH[1] : set name of thread 1315538 to TH[1] +ARAL executes 17.75 M malloc and 17.73 M free operations/s +ARAL executes 17.93 M malloc and 17.93 M free operations/s +ARAL executes 18.17 M malloc and 18.18 M free operations/s +ARAL executes 18.33 M malloc and 18.32 M free operations/s +ARAL executes 18.36 M malloc and 18.36 M free operations/s +Waiting the threads to finish... +2023-01-29 17:05:30: netdata INFO : MAIN : ARAL: did 90976190 malloc, 90976190 free, using 2 threads, in 5029462 usecs +``` + +The same test with 4 threads: + +``` +Running stress test of 4 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:10:12: netdata INFO : TH[0] : set name of thread 1319552 to TH[0] +2023-01-29 17:10:12: netdata INFO : TH[1] : set name of thread 1319553 to TH[1] +2023-01-29 17:10:12: netdata INFO : TH[2] : set name of thread 1319554 to TH[2] +2023-01-29 17:10:12: netdata INFO : TH[3] : set name of thread 1319555 to TH[3] +ARAL executes 19.95 M malloc and 19.91 M free operations/s +ARAL executes 20.08 M malloc and 20.08 M free operations/s +ARAL executes 20.85 M malloc and 20.85 M free operations/s +ARAL executes 20.84 M malloc and 20.84 M free operations/s +ARAL executes 21.37 M malloc and 21.37 M free operations/s +Waiting the threads to finish... +2023-01-29 17:10:17: netdata INFO : MAIN : ARAL: did 103549747 malloc, 103549747 free, using 4 threads, in 5023325 usecs +``` + +The same with 8 threads: + +``` +Running stress test of 8 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:07:06: netdata INFO : TH[0] : set name of thread 1317608 to TH[0] +2023-01-29 17:07:06: netdata INFO : TH[1] : set name of thread 1317609 to TH[1] +2023-01-29 17:07:06: netdata INFO : TH[2] : set name of thread 1317610 to TH[2] +2023-01-29 17:07:06: netdata INFO : TH[3] : set name of thread 1317611 to TH[3] +2023-01-29 17:07:06: netdata INFO : TH[4] : set name of thread 1317612 to TH[4] +2023-01-29 17:07:06: netdata INFO : TH[5] : set name of thread 1317613 to TH[5] +2023-01-29 17:07:06: netdata INFO : TH[6] : set name of thread 1317614 to TH[6] +2023-01-29 17:07:06: netdata INFO : TH[7] : set name of thread 1317615 to TH[7] +ARAL executes 15.73 M malloc and 15.66 M free operations/s +ARAL executes 13.95 M malloc and 13.94 M free operations/s +ARAL executes 15.59 M malloc and 15.58 M free operations/s +ARAL executes 15.49 M malloc and 15.49 M free operations/s +ARAL executes 16.16 M malloc and 16.16 M free operations/s +Waiting the threads to finish... +2023-01-29 17:07:11: netdata INFO : MAIN : ARAL: did 78427750 malloc, 78427750 free, using 8 threads, in 5088591 usecs +``` + +The same with 16 threads: + +``` +Running stress test of 16 threads, with 10000 elements each, for 5 seconds... +2023-01-29 17:08:04: netdata INFO : TH[0] : set name of thread 1318663 to TH[0] +2023-01-29 17:08:04: netdata INFO : TH[1] : set name of thread 1318664 to TH[1] +2023-01-29 17:08:04: netdata INFO : TH[2] : set name of thread 1318665 to TH[2] +2023-01-29 17:08:04: netdata INFO : TH[3] : set name of thread 1318666 to TH[3] +2023-01-29 17:08:04: netdata INFO : TH[4] : set name of thread 1318667 to TH[4] +2023-01-29 17:08:04: netdata INFO : TH[5] : set name of thread 1318668 to TH[5] +2023-01-29 17:08:04: netdata INFO : TH[6] : set name of thread 1318669 to TH[6] +2023-01-29 17:08:04: netdata INFO : TH[7] : set name of thread 1318670 to TH[7] +2023-01-29 17:08:04: netdata INFO : TH[8] : set name of thread 1318671 to TH[8] +2023-01-29 17:08:04: netdata INFO : TH[9] : set name of thread 1318672 to TH[9] +2023-01-29 17:08:04: netdata INFO : TH[10] : set name of thread 1318673 to TH[10] +2023-01-29 17:08:04: netdata INFO : TH[11] : set name of thread 1318674 to TH[11] +2023-01-29 17:08:04: netdata INFO : TH[12] : set name of thread 1318675 to TH[12] +2023-01-29 17:08:04: netdata INFO : TH[13] : set name of thread 1318676 to TH[13] +2023-01-29 17:08:04: netdata INFO : TH[14] : set name of thread 1318677 to TH[14] +2023-01-29 17:08:04: netdata INFO : TH[15] : set name of thread 1318678 to TH[15] +ARAL executes 11.77 M malloc and 11.62 M free operations/s +ARAL executes 12.80 M malloc and 12.81 M free operations/s +ARAL executes 13.26 M malloc and 13.25 M free operations/s +ARAL executes 13.30 M malloc and 13.29 M free operations/s +ARAL executes 13.23 M malloc and 13.25 M free operations/s +Waiting the threads to finish... +2023-01-29 17:08:09: netdata INFO : MAIN : ARAL: did 65302122 malloc, 65302122 free, using 16 threads, in 5066009 usecs +``` + +As you can see, the top performance is with 4 threads, almost double the single thread speed. +16 threads performance is still better than single threaded, despite the intense concurrency. diff --git a/libnetdata/aral/aral.c b/libnetdata/aral/aral.c new file mode 100644 index 000000000..4505ee0f2 --- /dev/null +++ b/libnetdata/aral/aral.c @@ -0,0 +1,1081 @@ +#include "../libnetdata.h" +#include "aral.h" + +#ifdef NETDATA_TRACE_ALLOCATIONS +#define TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS , const char *file, const char *function, size_t line +#define TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS , file, function, line +#else +#define TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS +#define TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS +#endif + +#define ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST 5 + +// max file size +#define ARAL_MAX_PAGE_SIZE_MMAP (1*1024*1024*1024) + +// max malloc size +// optimal at current versions of libc is up to 256k +// ideal to have the same overhead as libc is 4k +#define ARAL_MAX_PAGE_SIZE_MALLOC (65*1024) + +typedef struct aral_free { + size_t size; + struct aral_free *next; +} ARAL_FREE; + +typedef struct aral_page { + size_t size; // the allocation size of the page + const char *filename; + uint8_t *data; + + uint32_t free_elements_to_move_first; + uint32_t max_elements; // the number of elements that can fit on this page + + struct { + uint32_t used_elements; // the number of used elements on this page + uint32_t free_elements; // the number of free elements on this page + + struct aral_page *prev; // the prev page on the list + struct aral_page *next; // the next page on the list + } aral_lock; + + struct { + SPINLOCK spinlock; + ARAL_FREE *list; + } free; + +} ARAL_PAGE; + +typedef enum { + ARAL_LOCKLESS = (1 << 0), + ARAL_DEFRAGMENT = (1 << 1), + ARAL_ALLOCATED_STATS = (1 << 2), +} ARAL_OPTIONS; + +struct aral { + struct { + char name[ARAL_MAX_NAME + 1]; + + ARAL_OPTIONS options; + + size_t element_size; // calculated to take into account ARAL overheads + size_t max_allocation_size; // calculated in bytes + size_t max_page_elements; // calculated + size_t page_ptr_offset; // calculated + size_t natural_page_size; // calculated + + size_t initial_page_elements; + size_t requested_element_size; + size_t requested_max_page_size; + + struct { + bool enabled; + const char *filename; + char **cache_dir; + } mmap; + } config; + + struct { + SPINLOCK spinlock; + size_t file_number; // for mmap + struct aral_page *pages; // linked list of pages + + size_t user_malloc_operations; + size_t user_free_operations; + size_t defragment_operations; + size_t defragment_linked_list_traversals; + } aral_lock; + + struct { + SPINLOCK spinlock; + size_t allocating_elements; // currently allocating elements + size_t allocation_size; // current / next allocation size + } adders; + + struct { + size_t allocators; // the number of threads currently trying to allocate memory + } atomic; + + struct aral_statistics *stats; +}; + +size_t aral_structures_from_stats(struct aral_statistics *stats) { + return __atomic_load_n(&stats->structures.allocated_bytes, __ATOMIC_RELAXED); +} + +size_t aral_overhead_from_stats(struct aral_statistics *stats) { + return __atomic_load_n(&stats->malloc.allocated_bytes, __ATOMIC_RELAXED) - + __atomic_load_n(&stats->malloc.used_bytes, __ATOMIC_RELAXED); +} + +size_t aral_overhead(ARAL *ar) { + return aral_overhead_from_stats(ar->stats); +} + +size_t aral_structures(ARAL *ar) { + return aral_structures_from_stats(ar->stats); +} + +struct aral_statistics *aral_statistics(ARAL *ar) { + return ar->stats; +} + +#define ARAL_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2) +static inline size_t natural_alignment(size_t size, size_t alignment) { + if(unlikely(size % alignment)) + size = size + alignment - (size % alignment); + + return size; +} + +static size_t aral_align_alloc_size(ARAL *ar, uint64_t size) { + if(size % ar->config.natural_page_size) + size += ar->config.natural_page_size - (size % ar->config.natural_page_size) ; + + if(size % ar->config.element_size) + size -= size % ar->config.element_size; + + return size; +} + +static inline void aral_lock(ARAL *ar) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_lock(&ar->aral_lock.spinlock); +} + +static inline void aral_unlock(ARAL *ar) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_unlock(&ar->aral_lock.spinlock); +} + +static inline void aral_page_free_lock(ARAL *ar, ARAL_PAGE *page) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_lock(&page->free.spinlock); +} + +static inline void aral_page_free_unlock(ARAL *ar, ARAL_PAGE *page) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_unlock(&page->free.spinlock); +} + +static inline bool aral_adders_trylock(ARAL *ar) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + return netdata_spinlock_trylock(&ar->adders.spinlock); + + return true; +} + +static inline void aral_adders_lock(ARAL *ar) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_lock(&ar->adders.spinlock); +} + +static inline void aral_adders_unlock(ARAL *ar) { + if(likely(!(ar->config.options & ARAL_LOCKLESS))) + netdata_spinlock_unlock(&ar->adders.spinlock); +} + +static void aral_delete_leftover_files(const char *name, const char *path, const char *required_prefix) { + DIR *dir = opendir(path); + if(!dir) return; + + char full_path[FILENAME_MAX + 1]; + size_t len = strlen(required_prefix); + + struct dirent *de = NULL; + while((de = readdir(dir))) { + if(de->d_type == DT_DIR) + continue; + + if(strncmp(de->d_name, required_prefix, len) != 0) + continue; + + snprintfz(full_path, FILENAME_MAX, "%s/%s", path, de->d_name); + info("ARAL: '%s' removing left-over file '%s'", name, full_path); + if(unlikely(unlink(full_path) == -1)) + error("ARAL: '%s' cannot delete file '%s'", name, full_path); + } + + closedir(dir); +} + +// ---------------------------------------------------------------------------- +// check a free slot + +#ifdef NETDATA_INTERNAL_CHECKS +static inline void aral_free_validate_internal_check(ARAL *ar, ARAL_FREE *fr) { + if(unlikely(fr->size < ar->config.element_size)) + fatal("ARAL: '%s' free item of size %zu, less than the expected element size %zu", + ar->config.name, fr->size, ar->config.element_size); + + if(unlikely(fr->size % ar->config.element_size)) + fatal("ARAL: '%s' free item of size %zu is not multiple to element size %zu", + ar->config.name, fr->size, ar->config.element_size); +} +#else +#define aral_free_validate_internal_check(ar, fr) debug_dummy() +#endif + +// ---------------------------------------------------------------------------- +// find the page a pointer belongs to + +#ifdef NETDATA_INTERNAL_CHECKS +static inline ARAL_PAGE *find_page_with_allocation_internal_check(ARAL *ar, void *ptr) { + aral_lock(ar); + + uintptr_t seeking = (uintptr_t)ptr; + ARAL_PAGE *page; + + for(page = ar->aral_lock.pages; page ; page = page->aral_lock.next) { + if(unlikely(seeking >= (uintptr_t)page->data && seeking < (uintptr_t)page->data + page->size)) + break; + } + + aral_unlock(ar); + + return page; +} +#endif + +// ---------------------------------------------------------------------------- +// find a page with a free slot (there shouldn't be any) + +#ifdef NETDATA_ARAL_INTERNAL_CHECKS +static inline ARAL_PAGE *find_page_with_free_slots_internal_check___with_aral_lock(ARAL *ar) { + ARAL_PAGE *page; + + for(page = ar->aral_lock.pages; page ; page = page->next) { + if(page->aral_lock.free_elements) + break; + + internal_fatal(page->size - page->aral_lock.used_elements * ar->config.element_size >= ar->config.element_size, + "ARAL: '%s' a page is marked full, but it is not!", ar->config.name); + + internal_fatal(page->size < page->aral_lock.used_elements * ar->config.element_size, + "ARAL: '%s' a page has been overflown!", ar->config.name); + } + + return page; +} +#endif + +size_t aral_next_allocation_size___adders_lock_needed(ARAL *ar) { + size_t size = ar->adders.allocation_size; + + if(size > ar->config.max_allocation_size) + size = ar->config.max_allocation_size; + else + ar->adders.allocation_size = aral_align_alloc_size(ar, (uint64_t)ar->adders.allocation_size * 2); + + return size; +} + +static ARAL_PAGE *aral_create_page___no_lock_needed(ARAL *ar, size_t size TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { + ARAL_PAGE *page = callocz(1, sizeof(ARAL_PAGE)); + netdata_spinlock_init(&page->free.spinlock); + page->size = size; + page->max_elements = page->size / ar->config.element_size; + page->aral_lock.free_elements = page->max_elements; + page->free_elements_to_move_first = page->max_elements / 4; + if(unlikely(page->free_elements_to_move_first < 1)) + page->free_elements_to_move_first = 1; + + __atomic_add_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->structures.allocated_bytes, sizeof(ARAL_PAGE), __ATOMIC_RELAXED); + + if(unlikely(ar->config.mmap.enabled)) { + ar->aral_lock.file_number++; + char filename[FILENAME_MAX + 1]; + snprintfz(filename, FILENAME_MAX, "%s/array_alloc.mmap/%s.%zu", *ar->config.mmap.cache_dir, ar->config.mmap.filename, ar->aral_lock.file_number); + page->filename = strdupz(filename); + page->data = netdata_mmap(page->filename, page->size, MAP_SHARED, 0, false, NULL); + if (unlikely(!page->data)) + fatal("ARAL: '%s' cannot allocate aral buffer of size %zu on filename '%s'", + ar->config.name, page->size, page->filename); + __atomic_add_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->mmap.allocated_bytes, page->size, __ATOMIC_RELAXED); + } + else { +#ifdef NETDATA_TRACE_ALLOCATIONS + page->data = mallocz_int(page->size TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); +#else + page->data = mallocz(page->size); +#endif + __atomic_add_fetch(&ar->stats->malloc.allocations, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->malloc.allocated_bytes, page->size, __ATOMIC_RELAXED); + } + + // link the free space to its page + ARAL_FREE *fr = (ARAL_FREE *)page->data; + fr->size = page->size; + fr->next = NULL; + page->free.list = fr; + + aral_free_validate_internal_check(ar, fr); + + return page; +} + +void aral_del_page___no_lock_needed(ARAL *ar, ARAL_PAGE *page TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { + + // free it + if (ar->config.mmap.enabled) { + netdata_munmap(page->data, page->size); + + if (unlikely(unlink(page->filename) == 1)) + error("Cannot delete file '%s'", page->filename); + + freez((void *)page->filename); + + __atomic_sub_fetch(&ar->stats->mmap.allocations, 1, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->mmap.allocated_bytes, page->size, __ATOMIC_RELAXED); + } + else { +#ifdef NETDATA_TRACE_ALLOCATIONS + freez_int(page->data TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); +#else + freez(page->data); +#endif + __atomic_sub_fetch(&ar->stats->malloc.allocations, 1, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->malloc.allocated_bytes, page->size, __ATOMIC_RELAXED); + } + + freez(page); + + __atomic_sub_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED); + __atomic_sub_fetch(&ar->stats->structures.allocated_bytes, sizeof(ARAL_PAGE), __ATOMIC_RELAXED); +} + +static inline void aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) { + ARAL_PAGE *first = ar->aral_lock.pages; + + if (page->aral_lock.free_elements <= page->free_elements_to_move_first || + !first || + !first->aral_lock.free_elements || + page->aral_lock.free_elements <= first->aral_lock.free_elements + ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST) { + // first position + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + } + else { + ARAL_PAGE *second = first->aral_lock.next; + + if (!second || + !second->aral_lock.free_elements || + page->aral_lock.free_elements <= second->aral_lock.free_elements) + // second position + DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, first, page, aral_lock.prev, aral_lock.next); + else + // third position + DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, second, page, aral_lock.prev, aral_lock.next); + } +} + +static inline ARAL_PAGE *aral_acquire_a_free_slot(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { + __atomic_add_fetch(&ar->atomic.allocators, 1, __ATOMIC_RELAXED); + aral_lock(ar); + + ARAL_PAGE *page = ar->aral_lock.pages; + + while(!page || !page->aral_lock.free_elements) { +#ifdef NETDATA_ARAL_INTERNAL_CHECKS + internal_fatal(find_page_with_free_slots_internal_check___with_aral_lock(ar), "ARAL: '%s' found page with free slot!", ar->config.name); +#endif + aral_unlock(ar); + + if(aral_adders_trylock(ar)) { + if(ar->adders.allocating_elements < __atomic_load_n(&ar->atomic.allocators, __ATOMIC_RELAXED)) { + + size_t size = aral_next_allocation_size___adders_lock_needed(ar); + ar->adders.allocating_elements += size / ar->config.element_size; + aral_adders_unlock(ar); + + page = aral_create_page___no_lock_needed(ar, size TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); + + aral_lock(ar); + aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ar, page); + + aral_adders_lock(ar); + ar->adders.allocating_elements -= size / ar->config.element_size; + aral_adders_unlock(ar); + + // we have a page that is all empty + // and only aral_lock() is held, so + // break the loop + break; + } + + aral_adders_unlock(ar); + } + + aral_lock(ar); + page = ar->aral_lock.pages; + } + + __atomic_sub_fetch(&ar->atomic.allocators, 1, __ATOMIC_RELAXED); + + // we have a page + // and aral locked + + { + ARAL_PAGE *first = ar->aral_lock.pages; + ARAL_PAGE *second = first->aral_lock.next; + + if (!second || + !second->aral_lock.free_elements || + first->aral_lock.free_elements <= second->aral_lock.free_elements + ARAL_FREE_PAGES_DELTA_TO_REARRANGE_LIST) + page = first; + else { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, second, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, second, aral_lock.prev, aral_lock.next); + page = second; + } + } + + internal_fatal(!page || !page->aral_lock.free_elements, + "ARAL: '%s' selected page does not have a free slot in it", + ar->config.name); + + internal_fatal(page->max_elements != page->aral_lock.used_elements + page->aral_lock.free_elements, + "ARAL: '%s' page element counters do not match, " + "page says it can handle %zu elements, " + "but there are %zu used and %zu free items, " + "total %zu items", + ar->config.name, + (size_t)page->max_elements, + (size_t)page->aral_lock.used_elements, (size_t)page->aral_lock.free_elements, + (size_t)page->aral_lock.used_elements + (size_t)page->aral_lock.free_elements + ); + + ar->aral_lock.user_malloc_operations++; + + // acquire a slot for the caller + page->aral_lock.used_elements++; + if(--page->aral_lock.free_elements == 0) { + // we are done with this page + // move the full page last + // so that pages with free items remain first in the list + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + } + + aral_unlock(ar); + + return page; +} + +void *aral_mallocz_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { + + ARAL_PAGE *page = aral_acquire_a_free_slot(ar TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); + + aral_page_free_lock(ar, page); + + internal_fatal(!page->free.list, + "ARAL: '%s' free item to use, cannot be NULL.", ar->config.name); + + internal_fatal(page->free.list->size < ar->config.element_size, + "ARAL: '%s' free item size %zu, cannot be smaller than %zu", + ar->config.name, page->free.list->size, ar->config.element_size); + + ARAL_FREE *found_fr = page->free.list; + + // check if the remaining size (after we use this slot) is not enough for another element + if(unlikely(found_fr->size - ar->config.element_size < ar->config.element_size)) { + // we can use the entire free space entry + + page->free.list = found_fr->next; + } + else { + // we can split the free space entry + + uint8_t *data = (uint8_t *)found_fr; + ARAL_FREE *fr = (ARAL_FREE *)&data[ar->config.element_size]; + fr->size = found_fr->size - ar->config.element_size; + + // link the free slot first in the page + fr->next = found_fr->next; + page->free.list = fr; + + aral_free_validate_internal_check(ar, fr); + } + + aral_page_free_unlock(ar, page); + + // put the page pointer after the element + uint8_t *data = (uint8_t *)found_fr; + ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->config.page_ptr_offset]; + *page_ptr = page; + + if(unlikely(ar->config.mmap.enabled)) + __atomic_add_fetch(&ar->stats->mmap.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); + else + __atomic_add_fetch(&ar->stats->malloc.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); + + return (void *)found_fr; +} + +static inline ARAL_PAGE *aral_ptr_to_page___must_NOT_have_aral_lock(ARAL *ar, void *ptr) { + // given a data pointer we returned before, + // find the ARAL_PAGE it belongs to + + uint8_t *data = (uint8_t *)ptr; + ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->config.page_ptr_offset]; + ARAL_PAGE *page = *page_ptr; + +#ifdef NETDATA_INTERNAL_CHECKS + // make it NULL so that we will fail on double free + // do not enable this on production, because the MMAP file + // will need to be saved again! + *page_ptr = NULL; +#endif + +#ifdef NETDATA_ARAL_INTERNAL_CHECKS + { + // find the page ptr belongs + ARAL_PAGE *page2 = find_page_with_allocation_internal_check(ar, ptr); + + internal_fatal(page != page2, + "ARAL: '%s' page pointers do not match!", + ar->name); + + internal_fatal(!page2, + "ARAL: '%s' free of pointer %p is not in ARAL address space.", + ar->name, ptr); + } +#endif + + internal_fatal(!page, + "ARAL: '%s' possible corruption or double free of pointer %p", + ar->config.name, ptr); + + return page; +} + +static void aral_defrag_sorted_page_position___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) { + ARAL_PAGE *tmp; + + int action = 0; (void)action; + size_t move_later = 0, move_earlier = 0; + + for(tmp = page->aral_lock.next ; + tmp && tmp->aral_lock.free_elements && tmp->aral_lock.free_elements < page->aral_lock.free_elements ; + tmp = tmp->aral_lock.next) + move_later++; + + if(!tmp && page->aral_lock.next) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + action = 1; + } + else if(tmp != page->aral_lock.next) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_INSERT_ITEM_BEFORE_UNSAFE(ar->aral_lock.pages, tmp, page, aral_lock.prev, aral_lock.next); + action = 2; + } + else { + for(tmp = (page == ar->aral_lock.pages) ? NULL : page->aral_lock.prev ; + tmp && (!tmp->aral_lock.free_elements || tmp->aral_lock.free_elements > page->aral_lock.free_elements); + tmp = (tmp == ar->aral_lock.pages) ? NULL : tmp->aral_lock.prev) + move_earlier++; + + if(!tmp) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + action = 3; + } + else if(tmp != page->aral_lock.prev){ + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(ar->aral_lock.pages, tmp, page, aral_lock.prev, aral_lock.next); + action = 4; + } + } + + ar->aral_lock.defragment_operations++; + ar->aral_lock.defragment_linked_list_traversals += move_earlier + move_later; + + internal_fatal(page->aral_lock.next && page->aral_lock.next->aral_lock.free_elements && page->aral_lock.next->aral_lock.free_elements < page->aral_lock.free_elements, + "ARAL: '%s' item should be later in the list", ar->config.name); + + internal_fatal(page != ar->aral_lock.pages && (!page->aral_lock.prev->aral_lock.free_elements || page->aral_lock.prev->aral_lock.free_elements > page->aral_lock.free_elements), + "ARAL: '%s' item should be earlier in the list", ar->config.name); +} + +static inline void aral_move_page_with_free_list___aral_lock_needed(ARAL *ar, ARAL_PAGE *page) { + if(unlikely(page == ar->aral_lock.pages)) + // we are the first already + return; + + if(likely(!(ar->config.options & ARAL_DEFRAGMENT))) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + aral_insert_not_linked_page_with_free_items_to_proper_position___aral_lock_needed(ar, page); + } + else + aral_defrag_sorted_page_position___aral_lock_needed(ar, page); +} + +void aral_freez_internal(ARAL *ar, void *ptr TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { + if(unlikely(!ptr)) return; + + // get the page pointer + ARAL_PAGE *page = aral_ptr_to_page___must_NOT_have_aral_lock(ar, ptr); + + if(unlikely(ar->config.mmap.enabled)) + __atomic_sub_fetch(&ar->stats->mmap.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); + else + __atomic_sub_fetch(&ar->stats->malloc.used_bytes, ar->config.element_size, __ATOMIC_RELAXED); + + // make this element available + ARAL_FREE *fr = (ARAL_FREE *)ptr; + fr->size = ar->config.element_size; + + aral_page_free_lock(ar, page); + fr->next = page->free.list; + page->free.list = fr; + aral_page_free_unlock(ar, page); + + aral_lock(ar); + + internal_fatal(!page->aral_lock.used_elements, + "ARAL: '%s' pointer %p is inside a page without any active allocations.", + ar->config.name, ptr); + + internal_fatal(page->max_elements != page->aral_lock.used_elements + page->aral_lock.free_elements, + "ARAL: '%s' page element counters do not match, " + "page says it can handle %zu elements, " + "but there are %zu used and %zu free items, " + "total %zu items", + ar->config.name, + (size_t)page->max_elements, + (size_t)page->aral_lock.used_elements, (size_t)page->aral_lock.free_elements, + (size_t)page->aral_lock.used_elements + (size_t)page->aral_lock.free_elements + ); + + page->aral_lock.used_elements--; + page->aral_lock.free_elements++; + + ar->aral_lock.user_free_operations++; + + // if the page is empty, release it + if(unlikely(!page->aral_lock.used_elements)) { + bool is_this_page_the_last_one = ar->aral_lock.pages == page && !page->aral_lock.next; + + if(!is_this_page_the_last_one) + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + + aral_unlock(ar); + + if(!is_this_page_the_last_one) + aral_del_page___no_lock_needed(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); + } + else { + aral_move_page_with_free_list___aral_lock_needed(ar, page); + aral_unlock(ar); + } +} + +void aral_destroy_internal(ARAL *ar TRACE_ALLOCATIONS_FUNCTION_DEFINITION_PARAMS) { + aral_lock(ar); + + ARAL_PAGE *page; + while((page = ar->aral_lock.pages)) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(ar->aral_lock.pages, page, aral_lock.prev, aral_lock.next); + aral_del_page___no_lock_needed(ar, page TRACE_ALLOCATIONS_FUNCTION_CALL_PARAMS); + } + + aral_unlock(ar); + + if(ar->config.options & ARAL_ALLOCATED_STATS) + freez(ar->stats); + + freez(ar); +} + +size_t aral_element_size(ARAL *ar) { + return ar->config.requested_element_size; +} + +ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_size, + struct aral_statistics *stats, const char *filename, char **cache_dir, bool mmap, bool lockless) { + ARAL *ar = callocz(1, sizeof(ARAL)); + ar->config.options = (lockless) ? ARAL_LOCKLESS : 0; + ar->config.requested_element_size = element_size; + ar->config.initial_page_elements = initial_page_elements; + ar->config.requested_max_page_size = max_page_size; + ar->config.mmap.filename = filename; + ar->config.mmap.cache_dir = cache_dir; + ar->config.mmap.enabled = mmap; + strncpyz(ar->config.name, name, ARAL_MAX_NAME); + netdata_spinlock_init(&ar->aral_lock.spinlock); + + if(stats) { + ar->stats = stats; + ar->config.options &= ~ARAL_ALLOCATED_STATS; + } + else { + ar->stats = callocz(1, sizeof(struct aral_statistics)); + ar->config.options |= ARAL_ALLOCATED_STATS; + } + + long int page_size = sysconf(_SC_PAGE_SIZE); + if (unlikely(page_size == -1)) + ar->config.natural_page_size = 4096; + else + ar->config.natural_page_size = page_size; + + // we need to add a page pointer after the element + // so, first align the element size to the pointer size + ar->config.element_size = natural_alignment(ar->config.requested_element_size, sizeof(uintptr_t)); + + // then add the size of a pointer to it + ar->config.element_size += sizeof(uintptr_t); + + // make sure it is at least what we need for an ARAL_FREE slot + if (ar->config.element_size < sizeof(ARAL_FREE)) + ar->config.element_size = sizeof(ARAL_FREE); + + // and finally align it to the natural alignment + ar->config.element_size = natural_alignment(ar->config.element_size, ARAL_NATURAL_ALIGNMENT); + + ar->config.max_page_elements = ar->config.requested_max_page_size / ar->config.element_size; + + // we write the page pointer just after each element + ar->config.page_ptr_offset = ar->config.element_size - sizeof(uintptr_t); + + if(ar->config.requested_element_size + sizeof(uintptr_t) > ar->config.element_size) + fatal("ARAL: '%s' failed to calculate properly page_ptr_offset: " + "element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, " + "final element size %zu, page_ptr_offset %zu", + ar->config.name, ar->config.requested_element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, + ar->config.element_size, ar->config.page_ptr_offset); + + //info("ARAL: element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, final element size %zu, page_ptr_offset %zu", + // ar->element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, ar->internal.element_size, ar->internal.page_ptr_offset); + + + if (ar->config.initial_page_elements < 2) + ar->config.initial_page_elements = 2; + + if(ar->config.mmap.enabled && (!ar->config.mmap.cache_dir || !*ar->config.mmap.cache_dir)) { + error("ARAL: '%s' mmap cache directory is not configured properly, disabling mmap.", ar->config.name); + ar->config.mmap.enabled = false; + internal_fatal(true, "ARAL: '%s' mmap cache directory is not configured properly", ar->config.name); + } + + uint64_t max_alloc_size; + if(!ar->config.max_page_elements) + max_alloc_size = ar->config.mmap.enabled ? ARAL_MAX_PAGE_SIZE_MMAP : ARAL_MAX_PAGE_SIZE_MALLOC; + else + max_alloc_size = ar->config.max_page_elements * ar->config.element_size; + + ar->config.max_allocation_size = aral_align_alloc_size(ar, max_alloc_size); + ar->adders.allocation_size = aral_align_alloc_size(ar, (uint64_t)ar->config.element_size * ar->config.initial_page_elements); + ar->aral_lock.pages = NULL; + ar->aral_lock.file_number = 0; + + if(ar->config.mmap.enabled) { + char directory_name[FILENAME_MAX + 1]; + snprintfz(directory_name, FILENAME_MAX, "%s/array_alloc.mmap", *ar->config.mmap.cache_dir); + int r = mkdir(directory_name, 0775); + if (r != 0 && errno != EEXIST) + fatal("Cannot create directory '%s'", directory_name); + + char file[FILENAME_MAX + 1]; + snprintfz(file, FILENAME_MAX, "%s.", ar->config.mmap.filename); + aral_delete_leftover_files(ar->config.name, directory_name, file); + } + + internal_error(true, + "ARAL: '%s' " + "element size %zu (requested %zu bytes), " + "min elements per page %zu (requested %zu), " + "max elements per page %zu, " + "max page size %zu bytes (requested %zu) " + , ar->config.name + , ar->config.element_size, ar->config.requested_element_size + , ar->adders.allocation_size / ar->config.element_size, ar->config.initial_page_elements + , ar->config.max_allocation_size / ar->config.element_size + , ar->config.max_allocation_size, ar->config.requested_max_page_size + ); + + __atomic_add_fetch(&ar->stats->structures.allocations, 1, __ATOMIC_RELAXED); + __atomic_add_fetch(&ar->stats->structures.allocated_bytes, sizeof(ARAL), __ATOMIC_RELAXED); + return ar; +} + +// ---------------------------------------------------------------------------- +// global aral caching + +#define ARAL_BY_SIZE_MAX_SIZE 1024 + +struct aral_by_size { + ARAL *ar; + int32_t refcount; +}; + +struct { + struct aral_statistics shared_statistics; + SPINLOCK spinlock; + struct aral_by_size array[ARAL_BY_SIZE_MAX_SIZE + 1]; +} aral_by_size_globals = {}; + +struct aral_statistics *aral_by_size_statistics(void) { + return &aral_by_size_globals.shared_statistics; +} + +size_t aral_by_size_structures(void) { + return aral_structures_from_stats(&aral_by_size_globals.shared_statistics); +} + +size_t aral_by_size_overhead(void) { + return aral_overhead_from_stats(&aral_by_size_globals.shared_statistics); +} + +ARAL *aral_by_size_acquire(size_t size) { + netdata_spinlock_lock(&aral_by_size_globals.spinlock); + + ARAL *ar = NULL; + + if(size <= ARAL_BY_SIZE_MAX_SIZE && aral_by_size_globals.array[size].ar) { + ar = aral_by_size_globals.array[size].ar; + aral_by_size_globals.array[size].refcount++; + + internal_fatal(aral_element_size(ar) != size, "DICTIONARY: aral has size %zu but we want %zu", + aral_element_size(ar), size); + } + + if(!ar) { + char buf[30 + 1]; + snprintf(buf, 30, "size-%zu", size); + ar = aral_create(buf, + size, + 0, + 65536 * ((size / 150) + 1), + &aral_by_size_globals.shared_statistics, + NULL, NULL, false, false); + + if(size <= ARAL_BY_SIZE_MAX_SIZE) { + aral_by_size_globals.array[size].ar = ar; + aral_by_size_globals.array[size].refcount = 1; + } + } + + netdata_spinlock_unlock(&aral_by_size_globals.spinlock); + + return ar; +} + +void aral_by_size_release(ARAL *ar) { + size_t size = aral_element_size(ar); + + if(size <= ARAL_BY_SIZE_MAX_SIZE) { + netdata_spinlock_lock(&aral_by_size_globals.spinlock); + + internal_fatal(aral_by_size_globals.array[size].ar != ar, + "ARAL BY SIZE: aral pointers do not match"); + + if(aral_by_size_globals.array[size].refcount <= 0) + fatal("ARAL BY SIZE: double release detected"); + + aral_by_size_globals.array[size].refcount--; + if(!aral_by_size_globals.array[size].refcount) { + aral_destroy(aral_by_size_globals.array[size].ar); + aral_by_size_globals.array[size].ar = NULL; + } + + netdata_spinlock_unlock(&aral_by_size_globals.spinlock); + } + else + aral_destroy(ar); +} + +// ---------------------------------------------------------------------------- +// unittest + +struct aral_unittest_config { + bool single_threaded; + bool stop; + ARAL *ar; + size_t elements; + size_t threads; + int errors; +}; + +static void *aral_test_thread(void *ptr) { + struct aral_unittest_config *auc = ptr; + ARAL *ar = auc->ar; + size_t elements = auc->elements; + + void **pointers = callocz(elements, sizeof(void *)); + + do { + for (size_t i = 0; i < elements; i++) { + pointers[i] = aral_mallocz(ar); + } + + for (size_t div = 5; div >= 2; div--) { + for (size_t i = 0; i < elements / div; i++) { + aral_freez(ar, pointers[i]); + pointers[i] = NULL; + } + + for (size_t i = 0; i < elements / div; i++) { + pointers[i] = aral_mallocz(ar); + } + } + + for (size_t step = 50; step >= 10; step -= 10) { + for (size_t i = 0; i < elements; i += step) { + aral_freez(ar, pointers[i]); + pointers[i] = NULL; + } + + for (size_t i = 0; i < elements; i += step) { + pointers[i] = aral_mallocz(ar); + } + } + + for (size_t i = 0; i < elements; i++) { + aral_freez(ar, pointers[i]); + pointers[i] = NULL; + } + + if (auc->single_threaded && ar->aral_lock.pages && ar->aral_lock.pages->aral_lock.used_elements) { + fprintf(stderr, "\n\nARAL leftovers detected (1)\n\n"); + __atomic_add_fetch(&auc->errors, 1, __ATOMIC_RELAXED); + } + + if(!auc->single_threaded && __atomic_load_n(&auc->stop, __ATOMIC_RELAXED)) + break; + + for (size_t i = 0; i < elements; i++) { + pointers[i] = aral_mallocz(ar); + } + + size_t increment = elements / ar->config.max_page_elements; + for (size_t all = increment; all <= elements / 2; all += increment) { + + size_t to_free = (all % ar->config.max_page_elements) + 1; + size_t step = elements / to_free; + if(!step) step = 1; + + // fprintf(stderr, "all %zu, to free %zu, step %zu\n", all, to_free, step); + + size_t free_list[to_free]; + for (size_t i = 0; i < to_free; i++) { + size_t pos = step * i; + aral_freez(ar, pointers[pos]); + pointers[pos] = NULL; + free_list[i] = pos; + } + + for (size_t i = 0; i < to_free; i++) { + size_t pos = free_list[i]; + pointers[pos] = aral_mallocz(ar); + } + } + + for (size_t i = 0; i < elements; i++) { + aral_freez(ar, pointers[i]); + pointers[i] = NULL; + } + + if (auc->single_threaded && ar->aral_lock.pages && ar->aral_lock.pages->aral_lock.used_elements) { + fprintf(stderr, "\n\nARAL leftovers detected (2)\n\n"); + __atomic_add_fetch(&auc->errors, 1, __ATOMIC_RELAXED); + } + + } while(!auc->single_threaded && !__atomic_load_n(&auc->stop, __ATOMIC_RELAXED)); + + freez(pointers); + + return ptr; +} + +int aral_stress_test(size_t threads, size_t elements, size_t seconds) { + fprintf(stderr, "Running stress test of %zu threads, with %zu elements each, for %zu seconds...\n", + threads, elements, seconds); + + struct aral_unittest_config auc = { + .single_threaded = false, + .threads = threads, + .ar = aral_create("aral-stress-test", 20, 0, 8192, NULL, "aral-stress-test", NULL, false, false), + .elements = elements, + .errors = 0, + }; + + usec_t started_ut = now_monotonic_usec(); + netdata_thread_t thread_ptrs[threads]; + + for(size_t i = 0; i < threads ; i++) { + char tag[NETDATA_THREAD_NAME_MAX + 1]; + snprintfz(tag, NETDATA_THREAD_NAME_MAX, "TH[%zu]", i); + netdata_thread_create(&thread_ptrs[i], tag, + NETDATA_THREAD_OPTION_JOINABLE | NETDATA_THREAD_OPTION_DONT_LOG, + aral_test_thread, &auc); + } + + size_t malloc_done = 0; + size_t free_done = 0; + size_t countdown = seconds; + while(countdown-- > 0) { + sleep_usec(1 * USEC_PER_SEC); + aral_lock(auc.ar); + size_t m = auc.ar->aral_lock.user_malloc_operations; + size_t f = auc.ar->aral_lock.user_free_operations; + aral_unlock(auc.ar); + fprintf(stderr, "ARAL executes %0.2f M malloc and %0.2f M free operations/s\n", + (double)(m - malloc_done) / 1000000.0, (double)(f - free_done) / 1000000.0); + malloc_done = m; + free_done = f; + } + + __atomic_store_n(&auc.stop, true, __ATOMIC_RELAXED); + +// fprintf(stderr, "Cancelling the threads...\n"); +// for(size_t i = 0; i < threads ; i++) { +// netdata_thread_cancel(thread_ptrs[i]); +// } + + fprintf(stderr, "Waiting the threads to finish...\n"); + for(size_t i = 0; i < threads ; i++) { + netdata_thread_join(thread_ptrs[i], NULL); + } + + usec_t ended_ut = now_monotonic_usec(); + + if (auc.ar->aral_lock.pages && auc.ar->aral_lock.pages->aral_lock.used_elements) { + fprintf(stderr, "\n\nARAL leftovers detected (3)\n\n"); + __atomic_add_fetch(&auc.errors, 1, __ATOMIC_RELAXED); + } + + info("ARAL: did %zu malloc, %zu free, " + "using %zu threads, in %llu usecs", + auc.ar->aral_lock.user_malloc_operations, + auc.ar->aral_lock.user_free_operations, + threads, + ended_ut - started_ut); + + aral_destroy(auc.ar); + + return auc.errors; +} + +int aral_unittest(size_t elements) { + char *cache_dir = "/tmp/"; + + struct aral_unittest_config auc = { + .single_threaded = true, + .threads = 1, + .ar = aral_create("aral-test", 20, 0, 8192, NULL, "aral-test", &cache_dir, false, false), + .elements = elements, + .errors = 0, + }; + + aral_test_thread(&auc); + + aral_destroy(auc.ar); + + int errors = aral_stress_test(2, elements, 5); + + return auc.errors + errors; +} diff --git a/libnetdata/aral/aral.h b/libnetdata/aral/aral.h new file mode 100644 index 000000000..96f5a9c44 --- /dev/null +++ b/libnetdata/aral/aral.h @@ -0,0 +1,69 @@ + +#ifndef ARAL_H +#define ARAL_H 1 + +#include "../libnetdata.h" + +#define ARAL_MAX_NAME 23 + +typedef struct aral ARAL; + +struct aral_statistics { + struct { + size_t allocations; + size_t allocated_bytes; + } structures; + + struct { + size_t allocations; + size_t allocated_bytes; + size_t used_bytes; + } malloc; + + struct { + size_t allocations; + size_t allocated_bytes; + size_t used_bytes; + } mmap; +}; + +ARAL *aral_create(const char *name, size_t element_size, size_t initial_page_elements, size_t max_page_size, + struct aral_statistics *stats, const char *filename, char **cache_dir, bool mmap, bool lockless); +size_t aral_element_size(ARAL *ar); +size_t aral_overhead(ARAL *ar); +size_t aral_structures(ARAL *ar); +struct aral_statistics *aral_statistics(ARAL *ar); +size_t aral_structures_from_stats(struct aral_statistics *stats); +size_t aral_overhead_from_stats(struct aral_statistics *stats); + +ARAL *aral_by_size_acquire(size_t size); +void aral_by_size_release(ARAL *ar); +size_t aral_by_size_structures(void); +size_t aral_by_size_overhead(void); +struct aral_statistics *aral_by_size_statistics(void); + +int aral_unittest(size_t elements); + +#ifdef NETDATA_TRACE_ALLOCATIONS + +#define aral_mallocz(ar) aral_mallocz_internal(ar, __FILE__, __FUNCTION__, __LINE__) +#define aral_freez(ar, ptr) aral_freez_internal(ar, ptr, __FILE__, __FUNCTION__, __LINE__) +#define aral_destroy(ar) aral_destroy_internal(ar, __FILE__, __FUNCTION__, __LINE__) + +void *aral_mallocz_internal(ARAL *ar, const char *file, const char *function, size_t line); +void aral_freez_internal(ARAL *ar, void *ptr, const char *file, const char *function, size_t line); +void aral_destroy_internal(ARAL *ar, const char *file, const char *function, size_t line); + +#else // NETDATA_TRACE_ALLOCATIONS + +#define aral_mallocz(ar) aral_mallocz_internal(ar) +#define aral_freez(ar, ptr) aral_freez_internal(ar, ptr) +#define aral_destroy(ar) aral_destroy_internal(ar) + +void *aral_mallocz_internal(ARAL *ar); +void aral_freez_internal(ARAL *ar, void *ptr); +void aral_destroy_internal(ARAL *ar); + +#endif // NETDATA_TRACE_ALLOCATIONS + +#endif // ARAL_H diff --git a/libnetdata/arrayalloc/Makefile.am b/libnetdata/arrayalloc/Makefile.am deleted file mode 100644 index 161784b8f..000000000 --- a/libnetdata/arrayalloc/Makefile.am +++ /dev/null @@ -1,8 +0,0 @@ -# SPDX-License-Identifier: GPL-3.0-or-later - -AUTOMAKE_OPTIONS = subdir-objects -MAINTAINERCLEANFILES = $(srcdir)/Makefile.in - -dist_noinst_DATA = \ - README.md \ - $(NULL) diff --git a/libnetdata/arrayalloc/README.md b/libnetdata/arrayalloc/README.md deleted file mode 100644 index 2f21bf3ff..000000000 --- a/libnetdata/arrayalloc/README.md +++ /dev/null @@ -1,7 +0,0 @@ - - -# Array Allocator - diff --git a/libnetdata/arrayalloc/arrayalloc.c b/libnetdata/arrayalloc/arrayalloc.c deleted file mode 100644 index f337279ae..000000000 --- a/libnetdata/arrayalloc/arrayalloc.c +++ /dev/null @@ -1,489 +0,0 @@ -#include "../libnetdata.h" -#include "arrayalloc.h" -#include "daemon/common.h" - -// max file size -#define ARAL_MAX_PAGE_SIZE_MMAP (1*1024*1024*1024) - -// max malloc size -// optimal at current versions of libc is up to 256k -// ideal to have the same overhead as libc is 4k -#define ARAL_MAX_PAGE_SIZE_MALLOC (64*1024) - -typedef struct arrayalloc_free { - size_t size; - struct arrayalloc_page *page; - struct arrayalloc_free *next; -} ARAL_FREE; - -typedef struct arrayalloc_page { - const char *filename; - size_t size; // the total size of the page - size_t used_elements; // the total number of used elements on this page - uint8_t *data; - ARAL_FREE *free_list; - struct arrayalloc_page *prev; // the prev page on the list - struct arrayalloc_page *next; // the next page on the list -} ARAL_PAGE; - -#define ARAL_NATURAL_ALIGNMENT (sizeof(uintptr_t) * 2) -static inline size_t natural_alignment(size_t size, size_t alignment) { - if(unlikely(size % alignment)) - size = size + alignment - (size % alignment); - - return size; -} - -static void arrayalloc_delete_leftover_files(const char *path, const char *required_prefix) { - DIR *dir = opendir(path); - if(!dir) return; - - char fullpath[FILENAME_MAX + 1]; - size_t len = strlen(required_prefix); - - struct dirent *de = NULL; - while((de = readdir(dir))) { - if(de->d_type == DT_DIR) - continue; - - if(strncmp(de->d_name, required_prefix, len) != 0) - continue; - - snprintfz(fullpath, FILENAME_MAX, "%s/%s", path, de->d_name); - info("ARRAYALLOC: removing left-over file '%s'", fullpath); - if(unlikely(unlink(fullpath) == -1)) - error("Cannot delete file '%s'", fullpath); - } - - closedir(dir); -} - -// ---------------------------------------------------------------------------- -// arrayalloc_init() - -static void arrayalloc_init(ARAL *ar) { - static netdata_mutex_t mutex = NETDATA_MUTEX_INITIALIZER; - netdata_mutex_lock(&mutex); - - if(!ar->internal.initialized) { - netdata_mutex_init(&ar->internal.mutex); - - long int page_size = sysconf(_SC_PAGE_SIZE); - if (unlikely(page_size == -1)) - ar->internal.natural_page_size = 4096; - else - ar->internal.natural_page_size = page_size; - - // we need to add a page pointer after the element - // so, first align the element size to the pointer size - ar->internal.element_size = natural_alignment(ar->requested_element_size, sizeof(uintptr_t)); - - // then add the size of a pointer to it - ar->internal.element_size += sizeof(uintptr_t); - - // make sure it is at least what we need for an ARAL_FREE slot - if (ar->internal.element_size < sizeof(ARAL_FREE)) - ar->internal.element_size = sizeof(ARAL_FREE); - - // and finally align it to the natural alignment - ar->internal.element_size = natural_alignment(ar->internal.element_size, ARAL_NATURAL_ALIGNMENT); - - // we write the page pointer just after each element - ar->internal.page_ptr_offset = ar->internal.element_size - sizeof(uintptr_t); - - if(ar->requested_element_size + sizeof(uintptr_t) > ar->internal.element_size) - fatal("ARRAYALLOC: failed to calculate properly page_ptr_offset: element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, final element size %zu, page_ptr_offset %zu", - ar->requested_element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, ar->internal.element_size, ar->internal.page_ptr_offset); - - //info("ARRAYALLOC: element size %zu, sizeof(uintptr_t) %zu, natural alignment %zu, final element size %zu, page_ptr_offset %zu", - // ar->element_size, sizeof(uintptr_t), ARAL_NATURAL_ALIGNMENT, ar->internal.element_size, ar->internal.page_ptr_offset); - - if (ar->initial_elements < 10) - ar->initial_elements = 10; - - ar->internal.mmap = (ar->use_mmap && ar->cache_dir && *ar->cache_dir) ? true : false; - ar->internal.max_alloc_size = ar->internal.mmap ? ARAL_MAX_PAGE_SIZE_MMAP : ARAL_MAX_PAGE_SIZE_MALLOC; - - if(ar->internal.max_alloc_size % ar->internal.natural_page_size) - ar->internal.max_alloc_size += ar->internal.natural_page_size - (ar->internal.max_alloc_size % ar->internal.natural_page_size) ; - - if(ar->internal.max_alloc_size % ar->internal.element_size) - ar->internal.max_alloc_size -= ar->internal.max_alloc_size % ar->internal.element_size; - - ar->internal.pages = NULL; - ar->internal.allocation_multiplier = 1; - ar->internal.file_number = 0; - - if(ar->internal.mmap) { - char directory_name[FILENAME_MAX + 1]; - snprintfz(directory_name, FILENAME_MAX, "%s/array_alloc.mmap", *ar->cache_dir); - int r = mkdir(directory_name, 0775); - if (r != 0 && errno != EEXIST) - fatal("Cannot create directory '%s'", directory_name); - - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s.", ar->filename); - arrayalloc_delete_leftover_files(directory_name, filename); - } - - ar->internal.initialized = true; - } - - netdata_mutex_unlock(&mutex); -} - -// ---------------------------------------------------------------------------- -// check a free slot - -#ifdef NETDATA_INTERNAL_CHECKS -static inline void arrayalloc_free_validate_internal_check(ARAL *ar, ARAL_FREE *fr) { - if(fr->size < ar->internal.element_size) - fatal("ARRAYALLOC: free item of size %zu, less than the expected element size %zu", fr->size, ar->internal.element_size); - - if(fr->size % ar->internal.element_size) - fatal("ARRAYALLOC: free item of size %zu is not multiple to element size %zu", fr->size, ar->internal.element_size); -} -#else -#define arrayalloc_free_validate_internal_check(ar, fr) debug_dummy() -#endif - -// ---------------------------------------------------------------------------- -// find the page a pointer belongs to - -#ifdef NETDATA_INTERNAL_CHECKS -static inline ARAL_PAGE *find_page_with_allocation_internal_check(ARAL *ar, void *ptr) { - uintptr_t seeking = (uintptr_t)ptr; - ARAL_PAGE *page; - - for(page = ar->internal.pages; page ; page = page->next) { - if(unlikely(seeking >= (uintptr_t)page->data && seeking < (uintptr_t)page->data + page->size)) - break; - } - - return page; -} -#endif - -// ---------------------------------------------------------------------------- -// find a page with a free slot (there shouldn't be any) - -#ifdef NETDATA_INTERNAL_CHECKS -static inline ARAL_PAGE *find_page_with_free_slots_internal_check(ARAL *ar) { - ARAL_PAGE *page; - - for(page = ar->internal.pages; page ; page = page->next) { - if(page->free_list) - break; - - internal_fatal(page->size - page->used_elements * ar->internal.element_size >= ar->internal.element_size, - "ARRAYALLOC: a page is marked full, but it is not!"); - - internal_fatal(page->size < page->used_elements * ar->internal.element_size, - "ARRAYALLOC: a page has been overflown!"); - } - - return page; -} -#endif - -#ifdef NETDATA_TRACE_ALLOCATIONS -static void arrayalloc_add_page(ARAL *ar, const char *file, const char *function, size_t line) { -#else -static void arrayalloc_add_page(ARAL *ar) { -#endif - if(unlikely(!ar->internal.initialized)) - arrayalloc_init(ar); - - ARAL_PAGE *page = callocz(1, sizeof(ARAL_PAGE)); - page->size = ar->initial_elements * ar->internal.element_size * ar->internal.allocation_multiplier; - if(page->size > ar->internal.max_alloc_size) - page->size = ar->internal.max_alloc_size; - else - ar->internal.allocation_multiplier *= 2; - - if(ar->internal.mmap) { - ar->internal.file_number++; - char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/array_alloc.mmap/%s.%zu", *ar->cache_dir, ar->filename, ar->internal.file_number); - page->filename = strdupz(filename); - page->data = netdata_mmap(page->filename, page->size, MAP_SHARED, 0); - if (unlikely(!page->data)) - fatal("Cannot allocate arrayalloc buffer of size %zu on filename '%s'", page->size, page->filename); - } - else { -#ifdef NETDATA_TRACE_ALLOCATIONS - page->data = mallocz_int(page->size, file, function, line); -#else - page->data = mallocz(page->size); -#endif - } - - // link the free space to its page - ARAL_FREE *fr = (ARAL_FREE *)page->data; - fr->size = page->size; - fr->page = page; - fr->next = NULL; - page->free_list = fr; - - // link the new page at the front of the list of pages - DOUBLE_LINKED_LIST_PREPEND_UNSAFE(ar->internal.pages, page, prev, next); - - arrayalloc_free_validate_internal_check(ar, fr); -} - -static void arrayalloc_lock(ARAL *ar) { - if(!ar->internal.lockless) - netdata_mutex_lock(&ar->internal.mutex); -} - -static void arrayalloc_unlock(ARAL *ar) { - if(!ar->internal.lockless) - netdata_mutex_unlock(&ar->internal.mutex); -} - -ARAL *arrayalloc_create(size_t element_size, size_t elements, const char *filename, char **cache_dir, bool mmap) { - ARAL *ar = callocz(1, sizeof(ARAL)); - ar->requested_element_size = element_size; - ar->initial_elements = elements; - ar->filename = filename; - ar->cache_dir = cache_dir; - ar->use_mmap = mmap; - return ar; -} - -#ifdef NETDATA_TRACE_ALLOCATIONS -void *arrayalloc_mallocz_int(ARAL *ar, const char *file, const char *function, size_t line) { -#else -void *arrayalloc_mallocz(ARAL *ar) { -#endif - if(unlikely(!ar->internal.initialized)) - arrayalloc_init(ar); - - arrayalloc_lock(ar); - - if(unlikely(!ar->internal.pages || !ar->internal.pages->free_list)) { - internal_fatal(find_page_with_free_slots_internal_check(ar) != NULL, - "ARRAYALLOC: first page does not have any free slots, but there is another that has!"); - -#ifdef NETDATA_TRACE_ALLOCATIONS - arrayalloc_add_page(ar, file, function, line); -#else - arrayalloc_add_page(ar); -#endif - } - - ARAL_PAGE *page = ar->internal.pages; - ARAL_FREE *found_fr = page->free_list; - - internal_fatal(!found_fr, - "ARRAYALLOC: free item to use, cannot be NULL."); - - internal_fatal(found_fr->size < ar->internal.element_size, - "ARRAYALLOC: free item size %zu, cannot be smaller than %zu", - found_fr->size, ar->internal.element_size); - - if(unlikely(found_fr->size - ar->internal.element_size < ar->internal.element_size)) { - // we can use the entire free space entry - - page->free_list = found_fr->next; - - if(unlikely(!page->free_list)) { - // we are done with this page - // move the full page last - // so that pages with free items remain first in the list - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ar->internal.pages, page, prev, next); - DOUBLE_LINKED_LIST_APPEND_UNSAFE(ar->internal.pages, page, prev, next); - } - } - else { - // we can split the free space entry - - uint8_t *data = (uint8_t *)found_fr; - ARAL_FREE *fr = (ARAL_FREE *)&data[ar->internal.element_size]; - fr->page = page; - fr->size = found_fr->size - ar->internal.element_size; - - // link the free slot first in the page - fr->next = found_fr->next; - page->free_list = fr; - - arrayalloc_free_validate_internal_check(ar, fr); - } - - page->used_elements++; - - // put the page pointer after the element - uint8_t *data = (uint8_t *)found_fr; - ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->internal.page_ptr_offset]; - *page_ptr = page; - - arrayalloc_unlock(ar); - return (void *)found_fr; -} - -#ifdef NETDATA_TRACE_ALLOCATIONS -void arrayalloc_freez_int(ARAL *ar, void *ptr, const char *file, const char *function, size_t line) { -#else -void arrayalloc_freez(ARAL *ar, void *ptr) { -#endif - if(unlikely(!ptr)) return; - arrayalloc_lock(ar); - - // get the page pointer - ARAL_PAGE *page; - { - uint8_t *data = (uint8_t *)ptr; - ARAL_PAGE **page_ptr = (ARAL_PAGE **)&data[ar->internal.page_ptr_offset]; - page = *page_ptr; - -#ifdef NETDATA_INTERNAL_CHECKS - // make it NULL so that we will fail on double free - // do not enable this on production, because the MMAP file - // will need to be saved again! - *page_ptr = NULL; -#endif - } - -#ifdef NETDATA_ARRAYALLOC_INTERNAL_CHECKS - { - // find the page ptr belongs - ARAL_PAGE *page2 = find_page_with_allocation_internal_check(ar, ptr); - - if(unlikely(page != page2)) - fatal("ARRAYALLOC: page pointers do not match!"); - - if (unlikely(!page2)) - fatal("ARRAYALLOC: free of pointer %p is not in arrayalloc address space.", ptr); - } -#endif - - if(unlikely(!page)) - fatal("ARRAYALLOC: possible corruption or double free of pointer %p", ptr); - - if (unlikely(!page->used_elements)) - fatal("ARRAYALLOC: free of pointer %p is inside a page without any active allocations.", ptr); - - page->used_elements--; - - // make this element available - ARAL_FREE *fr = (ARAL_FREE *)ptr; - fr->page = page; - fr->size = ar->internal.element_size; - fr->next = page->free_list; - page->free_list = fr; - - // if the page is empty, release it - if(!page->used_elements) { - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ar->internal.pages, page, prev, next); - - // free it - if(ar->internal.mmap) { - netdata_munmap(page->data, page->size); - if (unlikely(unlink(page->filename) == 1)) - error("Cannot delete file '%s'", page->filename); - freez((void *)page->filename); - } - else { -#ifdef NETDATA_TRACE_ALLOCATIONS - freez_int(page->data, file, function, line); -#else - freez(page->data); -#endif - } - - freez(page); - } - else if(page != ar->internal.pages) { - // move the page with free item first - // so that the next allocation will use this page - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(ar->internal.pages, page, prev, next); - DOUBLE_LINKED_LIST_PREPEND_UNSAFE(ar->internal.pages, page, prev, next); - } - - arrayalloc_unlock(ar); -} - -int aral_unittest(size_t elements) { - char *cache_dir = "/tmp/"; - ARAL *ar = arrayalloc_create(20, 10, "test-aral", &cache_dir, false); - - void *pointers[elements]; - - for(size_t i = 0; i < elements ;i++) { - pointers[i] = arrayalloc_mallocz(ar); - } - - for(size_t div = 5; div >= 2 ;div--) { - for (size_t i = 0; i < elements / div; i++) { - arrayalloc_freez(ar, pointers[i]); - } - - for (size_t i = 0; i < elements / div; i++) { - pointers[i] = arrayalloc_mallocz(ar); - } - } - - for(size_t step = 50; step >= 10 ;step -= 10) { - for (size_t i = 0; i < elements; i += step) { - arrayalloc_freez(ar, pointers[i]); - } - - for (size_t i = 0; i < elements; i += step) { - pointers[i] = arrayalloc_mallocz(ar); - } - } - - for(size_t i = 0; i < elements ;i++) { - arrayalloc_freez(ar, pointers[i]); - } - - if(ar->internal.pages) { - fprintf(stderr, "ARAL leftovers detected (1)"); - return 1; - } - - size_t ops = 0; - size_t increment = elements / 10; - size_t allocated = 0; - for(size_t all = increment; all <= elements ; all += increment) { - - for(; allocated < all ; allocated++) { - pointers[allocated] = arrayalloc_mallocz(ar); - ops++; - } - - size_t to_free = now_realtime_usec() % all; - size_t free_list[to_free]; - for(size_t i = 0; i < to_free ;i++) { - size_t pos; - do { - pos = now_realtime_usec() % all; - } while(!pointers[pos]); - - arrayalloc_freez(ar, pointers[pos]); - pointers[pos] = NULL; - free_list[i] = pos; - ops++; - } - - for(size_t i = 0; i < to_free ;i++) { - size_t pos = free_list[i]; - pointers[pos] = arrayalloc_mallocz(ar); - ops++; - } - } - - for(size_t i = 0; i < allocated - 1 ;i++) { - arrayalloc_freez(ar, pointers[i]); - ops++; - } - - arrayalloc_freez(ar, pointers[allocated - 1]); - - if(ar->internal.pages) { - fprintf(stderr, "ARAL leftovers detected (2)"); - return 1; - } - - return 0; -} diff --git a/libnetdata/arrayalloc/arrayalloc.h b/libnetdata/arrayalloc/arrayalloc.h deleted file mode 100644 index cf80b73fd..000000000 --- a/libnetdata/arrayalloc/arrayalloc.h +++ /dev/null @@ -1,48 +0,0 @@ - -#ifndef ARRAYALLOC_H -#define ARRAYALLOC_H 1 - -#include "../libnetdata.h" - -typedef struct arrayalloc { - size_t requested_element_size; - size_t initial_elements; - const char *filename; - char **cache_dir; - bool use_mmap; - - // private members - do not touch - struct { - bool mmap; - bool lockless; - bool initialized; - size_t element_size; - size_t page_ptr_offset; - size_t file_number; - size_t natural_page_size; - size_t allocation_multiplier; - size_t max_alloc_size; - netdata_mutex_t mutex; - struct arrayalloc_page *pages; - } internal; -} ARAL; - -ARAL *arrayalloc_create(size_t element_size, size_t elements, const char *filename, char **cache_dir, bool mmap); -int aral_unittest(size_t elements); - -#ifdef NETDATA_TRACE_ALLOCATIONS - -#define arrayalloc_mallocz(ar) arrayalloc_mallocz_int(ar, __FILE__, __FUNCTION__, __LINE__) -#define arrayalloc_freez(ar, ptr) arrayalloc_freez_int(ar, ptr, __FILE__, __FUNCTION__, __LINE__) - -void *arrayalloc_mallocz_int(ARAL *ar, const char *file, const char *function, size_t line); -void arrayalloc_freez_int(ARAL *ar, void *ptr, const char *file, const char *function, size_t line); - -#else // NETDATA_TRACE_ALLOCATIONS - -void *arrayalloc_mallocz(ARAL *ar); -void arrayalloc_freez(ARAL *ar, void *ptr); - -#endif // NETDATA_TRACE_ALLOCATIONS - -#endif // ARRAYALLOC_H diff --git a/libnetdata/avl/README.md b/libnetdata/avl/README.md index 36392bd79..2b03fec4a 100644 --- a/libnetdata/avl/README.md +++ b/libnetdata/avl/README.md @@ -1,6 +1,10 @@ # AVL diff --git a/libnetdata/buffer/README.md b/libnetdata/buffer/README.md index c5f66e6e3..6a84fd8a3 100644 --- a/libnetdata/buffer/README.md +++ b/libnetdata/buffer/README.md @@ -1,6 +1,10 @@ # BUFFER diff --git a/libnetdata/buffer/buffer.c b/libnetdata/buffer/buffer.c index d0940588f..eeb283209 100644 --- a/libnetdata/buffer/buffer.c +++ b/libnetdata/buffer/buffer.c @@ -442,28 +442,28 @@ void buffer_date(BUFFER *wb, int year, int month, int day, int hours, int minute buffer_need_bytes(wb, 36); char *b = &wb->buffer[wb->len]; - char *p = b; - - *p++ = '0' + year / 1000; year %= 1000; - *p++ = '0' + year / 100; year %= 100; - *p++ = '0' + year / 10; - *p++ = '0' + year % 10; - *p++ = '-'; - *p++ = '0' + month / 10; - *p++ = '0' + month % 10; - *p++ = '-'; - *p++ = '0' + day / 10; - *p++ = '0' + day % 10; - *p++ = ' '; - *p++ = '0' + hours / 10; - *p++ = '0' + hours % 10; - *p++ = ':'; - *p++ = '0' + minutes / 10; - *p++ = '0' + minutes % 10; - *p++ = ':'; - *p++ = '0' + seconds / 10; - *p++ = '0' + seconds % 10; - *p = '\0'; + char *p = b; + + *p++ = '0' + year / 1000; year %= 1000; + *p++ = '0' + year / 100; year %= 100; + *p++ = '0' + year / 10; + *p++ = '0' + year % 10; + *p++ = '-'; + *p++ = '0' + month / 10; + *p++ = '0' + month % 10; + *p++ = '-'; + *p++ = '0' + day / 10; + *p++ = '0' + day % 10; + *p++ = ' '; + *p++ = '0' + hours / 10; + *p++ = '0' + hours % 10; + *p++ = ':'; + *p++ = '0' + minutes / 10; + *p++ = '0' + minutes % 10; + *p++ = ':'; + *p++ = '0' + seconds / 10; + *p++ = '0' + seconds % 10; + *p = '\0'; wb->len += (size_t)(p - b); @@ -472,7 +472,7 @@ void buffer_date(BUFFER *wb, int year, int month, int day, int hours, int minute buffer_overflow_check(wb); } -BUFFER *buffer_create(size_t size) +BUFFER *buffer_create(size_t size, size_t *statistics) { BUFFER *b; @@ -483,9 +483,13 @@ BUFFER *buffer_create(size_t size) b->buffer[0] = '\0'; b->size = size; b->contenttype = CT_TEXT_PLAIN; + b->statistics = statistics; buffer_overflow_init(b); buffer_overflow_check(b); + if(b->statistics) + __atomic_add_fetch(b->statistics, b->size + sizeof(BUFFER) + sizeof(BUFFER_OVERFLOW_EOF) + 2, __ATOMIC_RELAXED); + return(b); } @@ -496,6 +500,9 @@ void buffer_free(BUFFER *b) { debug(D_WEB_BUFFER, "Freeing web buffer of size %zu.", b->size); + if(b->statistics) + __atomic_sub_fetch(b->statistics, b->size + sizeof(BUFFER) + sizeof(BUFFER_OVERFLOW_EOF) + 2, __ATOMIC_RELAXED); + freez(b->buffer); freez(b); } @@ -510,9 +517,7 @@ void buffer_increase(BUFFER *b, size_t free_size_required) { size_t minimum = WEB_DATA_LENGTH_INCREASE_STEP; if(minimum > wanted) wanted = minimum; - size_t optimal = b->size; - if(b->size > 5*1024*1024) optimal = b->size / 2; - + size_t optimal = (b->size > 5*1024*1024) ? b->size / 2 : b->size; if(optimal > wanted) wanted = optimal; debug(D_WEB_BUFFER, "Increasing data buffer from size %zu to %zu.", b->size, b->size + wanted); @@ -520,6 +525,9 @@ void buffer_increase(BUFFER *b, size_t free_size_required) { b->buffer = reallocz(b->buffer, b->size + wanted + sizeof(BUFFER_OVERFLOW_EOF) + 2); b->size += wanted; + if(b->statistics) + __atomic_add_fetch(b->statistics, wanted, __ATOMIC_RELAXED); + buffer_overflow_init(b); buffer_overflow_check(b); } diff --git a/libnetdata/buffer/buffer.h b/libnetdata/buffer/buffer.h index ce6f52899..0fa3495b4 100644 --- a/libnetdata/buffer/buffer.h +++ b/libnetdata/buffer/buffer.h @@ -15,6 +15,7 @@ typedef struct web_buffer { uint8_t options; // options related to the content time_t date; // the timestamp this content has been generated time_t expires; // the timestamp this content expires + size_t *statistics; } BUFFER; // options @@ -61,7 +62,7 @@ void buffer_rrd_value(BUFFER *wb, NETDATA_DOUBLE value); void buffer_date(BUFFER *wb, int year, int month, int day, int hours, int minutes, int seconds); void buffer_jsdate(BUFFER *wb, int year, int month, int day, int hours, int minutes, int seconds); -BUFFER *buffer_create(size_t size); +BUFFER *buffer_create(size_t size, size_t *statistics); void buffer_free(BUFFER *b); void buffer_increase(BUFFER *b, size_t free_size_required); diff --git a/libnetdata/circular_buffer/README.md b/libnetdata/circular_buffer/README.md index 4482173d7..23980dff3 100644 --- a/libnetdata/circular_buffer/README.md +++ b/libnetdata/circular_buffer/README.md @@ -1,6 +1,10 @@ # Circular Buffer diff --git a/libnetdata/circular_buffer/circular_buffer.c b/libnetdata/circular_buffer/circular_buffer.c index c791b420b..b2bded179 100644 --- a/libnetdata/circular_buffer/circular_buffer.c +++ b/libnetdata/circular_buffer/circular_buffer.c @@ -1,16 +1,24 @@ #include "../libnetdata.h" -struct circular_buffer *cbuffer_new(size_t initial, size_t max) { - struct circular_buffer *result = mallocz(sizeof(*result)); - result->size = initial; - result->data = mallocz(initial); - result->write = 0; - result->read = 0; - result->max_size = max; - return result; +struct circular_buffer *cbuffer_new(size_t initial, size_t max, size_t *statistics) { + struct circular_buffer *buf = mallocz(sizeof(struct circular_buffer)); + buf->size = initial; + buf->data = mallocz(initial); + buf->write = 0; + buf->read = 0; + buf->max_size = max; + buf->statistics = statistics; + + if(buf->statistics) + __atomic_add_fetch(buf->statistics, sizeof(struct circular_buffer) + buf->size, __ATOMIC_RELAXED); + + return buf; } void cbuffer_free(struct circular_buffer *buf) { + if(buf && buf->statistics) + __atomic_sub_fetch(buf->statistics, sizeof(struct circular_buffer) + buf->size, __ATOMIC_RELAXED); + freez(buf->data); freez(buf); } @@ -19,6 +27,8 @@ static int cbuffer_realloc_unsafe(struct circular_buffer *buf) { // Check that we can grow if (buf->size >= buf->max_size) return 1; + + size_t old_size = buf->size; size_t new_size = buf->size * 2; if (new_size > buf->max_size) new_size = buf->max_size; @@ -43,6 +53,10 @@ static int cbuffer_realloc_unsafe(struct circular_buffer *buf) { freez(buf->data); buf->data = new_data; buf->size = new_size; + + if(buf->statistics) + __atomic_add_fetch(buf->statistics, new_size - old_size, __ATOMIC_RELAXED); + return 0; } diff --git a/libnetdata/circular_buffer/circular_buffer.h b/libnetdata/circular_buffer/circular_buffer.h index 8c42aa807..9d29a84d7 100644 --- a/libnetdata/circular_buffer/circular_buffer.h +++ b/libnetdata/circular_buffer/circular_buffer.h @@ -5,10 +5,11 @@ struct circular_buffer { size_t size, write, read, max_size; + size_t *statistics; char *data; }; -struct circular_buffer *cbuffer_new(size_t initial, size_t max); +struct circular_buffer *cbuffer_new(size_t initial, size_t max, size_t *statistics); void cbuffer_free(struct circular_buffer *buf); int cbuffer_add_unsafe(struct circular_buffer *buf, const char *d, size_t d_len); void cbuffer_remove_unsafe(struct circular_buffer *buf, size_t num); diff --git a/libnetdata/clocks/clocks.c b/libnetdata/clocks/clocks.c index cabc0000e..19c66f0a5 100644 --- a/libnetdata/clocks/clocks.c +++ b/libnetdata/clocks/clocks.c @@ -189,9 +189,13 @@ void sleep_to_absolute_time(usec_t usec) { .tv_nsec = (suseconds_t)((usec % USEC_PER_SEC) * NSEC_PER_USEC) }; + errno = 0; int ret = 0; while( (ret = clock_nanosleep(clock, TIMER_ABSTIME, &req, NULL)) != 0 ) { - if(ret == EINTR) continue; + if(ret == EINTR) { + errno = 0; + continue; + } else { if (ret == EINVAL) { if (!einval_printed) { @@ -296,7 +300,9 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) { if(unlikely(hb->randomness > tick / 2)) { // TODO: The heartbeat tick should be specified at the heartbeat_init() function usec_t tmp = (now_realtime_usec() * clock_realtime_resolution) % (tick / 2); - info("heartbeat randomness of %llu is too big for a tick of %llu - setting it to %llu", hb->randomness, tick, tmp); + + error_limit_static_global_var(erl, 10, 0); + error_limit(&erl, "heartbeat randomness of %llu is too big for a tick of %llu - setting it to %llu", hb->randomness, tick, tmp); hb->randomness = tmp; } @@ -311,7 +317,7 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) { // sleep_usec() has a loop to guarantee we will sleep for at least the requested time. // According the specs, when we sleep for a relative time, clock adjustments should not affect the duration // we sleep. - sleep_usec(next - now); + sleep_usec_with_now(next - now, now); now = now_realtime_usec(); dt = now - hb->realtime; @@ -322,11 +328,13 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) { if(unlikely(now < next)) { errno = 0; - error("heartbeat clock: woke up %llu microseconds earlier than expected (can be due to the CLOCK_REALTIME set to the past).", next - now); + error_limit_static_global_var(erl, 10, 0); + error_limit(&erl, "heartbeat clock: woke up %llu microseconds earlier than expected (can be due to the CLOCK_REALTIME set to the past).", next - now); } else if(unlikely(now - next > tick / 2)) { errno = 0; - error("heartbeat clock: woke up %llu microseconds later than expected (can be due to system load or the CLOCK_REALTIME set to the future).", now - next); + error_limit_static_global_var(erl, 10, 0); + error_limit(&erl, "heartbeat clock: woke up %llu microseconds later than expected (can be due to system load or the CLOCK_REALTIME set to the future).", now - next); } if(unlikely(!hb->realtime)) { @@ -338,7 +346,7 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick) { return dt; } -void sleep_usec(usec_t usec) { +void sleep_usec_with_now(usec_t usec, usec_t started_ut) { // we expect microseconds (1.000.000 per second) // but timespec is nanoseconds (1.000.000.000 per second) struct timespec rem = { 0, 0 }, req = { @@ -346,21 +354,37 @@ void sleep_usec(usec_t usec) { .tv_nsec = (suseconds_t) ((usec % USEC_PER_SEC) * NSEC_PER_USEC) }; -#ifdef __linux__ - while (clock_nanosleep(CLOCK_REALTIME, 0, &req, &rem) != 0) { -#else + // make sure errno is not EINTR + errno = 0; + + if(!started_ut) + started_ut = now_realtime_usec(); + + usec_t end_ut = started_ut + usec; + while (nanosleep(&req, &rem) != 0) { -#endif if (likely(errno == EINTR && (rem.tv_sec || rem.tv_nsec))) { req = rem; rem = (struct timespec){ 0, 0 }; + + // break an infinite loop + errno = 0; + + usec_t now_ut = now_realtime_usec(); + if(now_ut >= end_ut) + break; + + usec_t remaining_ut = (usec_t)req.tv_sec * USEC_PER_SEC + (usec_t)req.tv_nsec * NSEC_PER_USEC > usec; + usec_t check_ut = now_ut - started_ut; + if(remaining_ut > check_ut) { + req = (struct timespec){ + .tv_sec = (time_t) ( check_ut / USEC_PER_SEC), + .tv_nsec = (suseconds_t) ((check_ut % USEC_PER_SEC) * NSEC_PER_USEC) + }; + } } else { -#ifdef __linux__ - error("Cannot clock_nanosleep(CLOCK_REALTIME) for %llu microseconds.", usec); -#else error("Cannot nanosleep() for %llu microseconds.", usec); -#endif break; } } diff --git a/libnetdata/clocks/clocks.h b/libnetdata/clocks/clocks.h index 7738a2c8e..b050b6254 100644 --- a/libnetdata/clocks/clocks.h +++ b/libnetdata/clocks/clocks.h @@ -141,7 +141,8 @@ usec_t heartbeat_next(heartbeat_t *hb, usec_t tick); void heartbeat_statistics(usec_t *min_ptr, usec_t *max_ptr, usec_t *average_ptr, size_t *count_ptr); -void sleep_usec(usec_t usec); +void sleep_usec_with_now(usec_t usec, usec_t started_ut); +#define sleep_usec(usec) sleep_usec_with_now(usec, 0); void clocks_init(void); diff --git a/libnetdata/completion/completion.c b/libnetdata/completion/completion.c index b5ac86e4f..6257e0299 100644 --- a/libnetdata/completion/completion.c +++ b/libnetdata/completion/completion.c @@ -5,6 +5,7 @@ void completion_init(struct completion *p) { p->completed = 0; + p->completed_jobs = 0; fatal_assert(0 == uv_cond_init(&p->cond)); fatal_assert(0 == uv_mutex_init(&p->mutex)); } @@ -32,3 +33,32 @@ void completion_mark_complete(struct completion *p) uv_cond_broadcast(&p->cond); uv_mutex_unlock(&p->mutex); } + +unsigned completion_wait_for_a_job(struct completion *p, unsigned completed_jobs) +{ + uv_mutex_lock(&p->mutex); + while (0 == p->completed && p->completed_jobs <= completed_jobs) { + uv_cond_wait(&p->cond, &p->mutex); + } + completed_jobs = p->completed_jobs; + uv_mutex_unlock(&p->mutex); + + return completed_jobs; +} + +void completion_mark_complete_a_job(struct completion *p) +{ + uv_mutex_lock(&p->mutex); + p->completed_jobs++; + uv_cond_broadcast(&p->cond); + uv_mutex_unlock(&p->mutex); +} + +bool completion_is_done(struct completion *p) +{ + bool ret; + uv_mutex_lock(&p->mutex); + ret = p->completed; + uv_mutex_unlock(&p->mutex); + return ret; +} diff --git a/libnetdata/completion/completion.h b/libnetdata/completion/completion.h index 667360a42..723f73688 100644 --- a/libnetdata/completion/completion.h +++ b/libnetdata/completion/completion.h @@ -9,6 +9,7 @@ struct completion { uv_mutex_t mutex; uv_cond_t cond; volatile unsigned completed; + volatile unsigned completed_jobs; }; void completion_init(struct completion *p); @@ -19,4 +20,8 @@ void completion_wait_for(struct completion *p); void completion_mark_complete(struct completion *p); +unsigned completion_wait_for_a_job(struct completion *p, unsigned completed_jobs); +void completion_mark_complete_a_job(struct completion *p); +bool completion_is_done(struct completion *p); + #endif /* NETDATA_COMPLETION_H */ diff --git a/libnetdata/config/README.md b/libnetdata/config/README.md index 2eccf7a21..c34cf9255 100644 --- a/libnetdata/config/README.md +++ b/libnetdata/config/README.md @@ -1,6 +1,10 @@ # Netdata ini config files diff --git a/libnetdata/dictionary/README.md b/libnetdata/dictionary/README.md index 6d7e55392..508c4e031 100644 --- a/libnetdata/dictionary/README.md +++ b/libnetdata/dictionary/README.md @@ -1,5 +1,9 @@ # Dictionaries diff --git a/libnetdata/dictionary/dictionary.c b/libnetdata/dictionary/dictionary.c index 0277e067f..061b671ab 100644 --- a/libnetdata/dictionary/dictionary.c +++ b/libnetdata/dictionary/dictionary.c @@ -143,6 +143,8 @@ struct dictionary { DICT_OPTIONS options; // the configuration flags of the dictionary (they never change - no atomics) DICT_FLAGS flags; // run time flags for the dictionary (they change all the time - atomics needed) + ARAL *value_aral; + struct { // support for multiple indexing engines Pvoid_t JudyHSArray; // the hash table netdata_rwlock_t rwlock; // protect the index @@ -179,7 +181,9 @@ struct dictionary { #endif }; +// ---------------------------------------------------------------------------- // forward definitions of functions used in reverse order in the code + static void garbage_collect_pending_deletes(DICTIONARY *dict); static inline void item_linked_list_remove(DICTIONARY *dict, DICTIONARY_ITEM *item); static size_t dict_item_free_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM *item); @@ -260,7 +264,7 @@ static inline void pointer_del(DICTIONARY *dict __maybe_unused, DICTIONARY_ITEM static inline void DICTIONARY_STATS_PLUS_MEMORY(DICTIONARY *dict, size_t key_size, size_t item_size, size_t value_size) { if(key_size) - __atomic_fetch_add(&dict->stats->memory.indexed, (long)key_size, __ATOMIC_RELAXED); + __atomic_fetch_add(&dict->stats->memory.index, (long)JUDYHS_INDEX_SIZE_ESTIMATE(key_size), __ATOMIC_RELAXED); if(item_size) __atomic_fetch_add(&dict->stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); @@ -270,7 +274,7 @@ static inline void DICTIONARY_STATS_PLUS_MEMORY(DICTIONARY *dict, size_t key_siz } static inline void DICTIONARY_STATS_MINUS_MEMORY(DICTIONARY *dict, size_t key_size, size_t item_size, size_t value_size) { if(key_size) - __atomic_fetch_sub(&dict->stats->memory.indexed, (long)key_size, __ATOMIC_RELAXED); + __atomic_fetch_sub(&dict->stats->memory.index, (long)JUDYHS_INDEX_SIZE_ESTIMATE(key_size), __ATOMIC_RELAXED); if(item_size) __atomic_fetch_sub(&dict->stats->memory.dict, (long)item_size, __ATOMIC_RELAXED); @@ -380,7 +384,7 @@ size_t dictionary_referenced_items(DICTIONARY *dict) { long int dictionary_stats_for_registry(DICTIONARY *dict) { if(unlikely(!dict)) return 0; - return (dict->stats->memory.indexed + dict->stats->memory.dict); + return (dict->stats->memory.index + dict->stats->memory.dict); } void dictionary_version_increment(DICTIONARY *dict) { __atomic_fetch_add(&dict->version, 1, __ATOMIC_SEQ_CST); @@ -789,7 +793,7 @@ static void garbage_collect_pending_deletes(DICTIONARY *dict) { // we didn't get a reference if(item_is_not_referenced_and_can_be_removed(dict, item)) { - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(dict->items.list, item, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(dict->items.list, item, prev, next); dict_item_free_with_hooks(dict, item); deleted++; @@ -1167,9 +1171,9 @@ static inline void item_linked_list_add(DICTIONARY *dict, DICTIONARY_ITEM *item) ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); if(dict->options & DICT_OPTION_ADD_IN_FRONT) - DOUBLE_LINKED_LIST_PREPEND_UNSAFE(dict->items.list, item, prev, next); + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(dict->items.list, item, prev, next); else - DOUBLE_LINKED_LIST_APPEND_UNSAFE(dict->items.list, item, prev, next); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(dict->items.list, item, prev, next); #ifdef NETDATA_INTERNAL_CHECKS item->ll_adder_pid = gettid(); @@ -1186,7 +1190,7 @@ static inline void item_linked_list_add(DICTIONARY *dict, DICTIONARY_ITEM *item) static inline void item_linked_list_remove(DICTIONARY *dict, DICTIONARY_ITEM *item) { ll_recursive_lock(dict, DICTIONARY_LOCK_WRITE); - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(dict->items.list, item, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(dict->items.list, item, prev, next); #ifdef NETDATA_INTERNAL_CHECKS item->ll_remover_pid = gettid(); @@ -1234,11 +1238,45 @@ static inline size_t item_get_name_len(const DICTIONARY_ITEM *item) { return strlen(item->caller_name); } +static ARAL *dict_items_aral = NULL; +static ARAL *dict_shared_items_aral = NULL; + +void dictionary_static_items_aral_init(void) { + static SPINLOCK spinlock; + + if(unlikely(!dict_items_aral || !dict_shared_items_aral)) { + netdata_spinlock_lock(&spinlock); + + // we have to check again + if(!dict_items_aral) + dict_items_aral = aral_create( + "dict-items", + sizeof(DICTIONARY_ITEM), + 0, + 65536, + aral_by_size_statistics(), + NULL, NULL, false, false); + + // we have to check again + if(!dict_shared_items_aral) + dict_shared_items_aral = aral_create( + "dict-shared-items", + sizeof(DICTIONARY_ITEM_SHARED), + 0, + 65536, + aral_by_size_statistics(), + NULL, NULL, false, false); + + netdata_spinlock_unlock(&spinlock); + } +} + static DICTIONARY_ITEM *dict_item_create(DICTIONARY *dict __maybe_unused, size_t *allocated_bytes, DICTIONARY_ITEM *master_item) { DICTIONARY_ITEM *item; size_t size = sizeof(DICTIONARY_ITEM); - item = callocz(1, size); + item = aral_mallocz(dict_items_aral); + memset(item, 0, sizeof(DICTIONARY_ITEM)); #ifdef NETDATA_INTERNAL_CHECKS item->creator_pid = gettid(); @@ -1257,7 +1295,9 @@ static DICTIONARY_ITEM *dict_item_create(DICTIONARY *dict __maybe_unused, size_t } else { size = sizeof(DICTIONARY_ITEM_SHARED); - item->shared = callocz(1, size); + item->shared = aral_mallocz(dict_shared_items_aral); + memset(item->shared, 0, sizeof(DICTIONARY_ITEM_SHARED)); + item->shared->links = 1; *allocated_bytes += size; } @@ -1268,20 +1308,39 @@ static DICTIONARY_ITEM *dict_item_create(DICTIONARY *dict __maybe_unused, size_t return item; } -static void *dict_item_value_create(void *value, size_t value_len) { +static inline void *dict_item_value_mallocz(DICTIONARY *dict, size_t value_len) { + if(dict->value_aral) { + internal_fatal(aral_element_size(dict->value_aral) != value_len, + "DICTIONARY: item value size %zu does not match the configured fixed one %zu", + value_len, aral_element_size(dict->value_aral)); + return aral_mallocz(dict->value_aral); + } + else + return mallocz(value_len); +} + +static inline void dict_item_value_freez(DICTIONARY *dict, void *ptr) { + if(dict->value_aral) + aral_freez(dict->value_aral, ptr); + else + freez(ptr); +} + +static void *dict_item_value_create(DICTIONARY *dict, void *value, size_t value_len) { void *ptr = NULL; if(likely(value_len)) { if (likely(value)) { // a value has been supplied // copy it - ptr = mallocz(value_len); + ptr = dict_item_value_mallocz(dict, value_len); memcpy(ptr, value, value_len); } else { // no value has been supplied // allocate a clear memory block - ptr = callocz(1, value_len); + ptr = dict_item_value_mallocz(dict, value_len); + memset(ptr, 0, value_len); } } // else @@ -1320,7 +1379,7 @@ static DICTIONARY_ITEM *dict_item_create_with_hooks(DICTIONARY *dict, const char if(unlikely(dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE)) item->shared->value = value; else - item->shared->value = dict_item_value_create(value, value_len); + item->shared->value = dict_item_value_create(dict, value, value_len); item->shared->value_len = value_len; value_size += value_len; @@ -1360,7 +1419,7 @@ static void dict_item_reset_value_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM * void *old_value = item->shared->value; void *new_value = NULL; if(value_len) { - new_value = mallocz(value_len); + new_value = dict_item_value_mallocz(dict, value_len); if(value) memcpy(new_value, value, value_len); else memset(new_value, 0, value_len); } @@ -1368,7 +1427,7 @@ static void dict_item_reset_value_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM * item->shared->value_len = value_len; debug(D_DICTIONARY, "Dictionary: freeing old value of '%s'", item_get_name(item)); - freez(old_value); + dict_item_value_freez(dict, old_value); } dictionary_execute_insert_callback(dict, item, constructor_data); @@ -1391,17 +1450,18 @@ static size_t dict_item_free_with_hooks(DICTIONARY *dict, DICTIONARY_ITEM *item) if(unlikely(!(dict->options & DICT_OPTION_VALUE_LINK_DONT_CLONE))) { debug(D_DICTIONARY, "Dictionary freeing value of '%s'", item_get_name(item)); - freez(item->shared->value); + dict_item_value_freez(dict, item->shared->value); item->shared->value = NULL; } value_size += item->shared->value_len; - freez(item->shared); + aral_freez(dict_shared_items_aral, item->shared); item->shared = NULL; item_size += sizeof(DICTIONARY_ITEM_SHARED); } - freez(item); + aral_freez(dict_items_aral, item); + item_size += sizeof(DICTIONARY_ITEM); DICTIONARY_STATS_MINUS_MEMORY(dict, key_size, item_size, value_size); @@ -1749,6 +1809,9 @@ static bool dictionary_free_all_resources(DICTIONARY *dict, size_t *mem, bool fo dict_size += sizeof(DICTIONARY); DICTIONARY_STATS_MINUS_MEMORY(dict, 0, sizeof(DICTIONARY), 0); + if(dict->value_aral) + aral_by_size_release(dict->value_aral); + freez(dict); internal_error( @@ -1934,19 +1997,34 @@ static bool api_is_name_good_with_trace(DICTIONARY *dict __maybe_unused, const c // ---------------------------------------------------------------------------- // API - dictionary management -static DICTIONARY *dictionary_create_internal(DICT_OPTIONS options, struct dictionary_stats *stats) { +static DICTIONARY *dictionary_create_internal(DICT_OPTIONS options, struct dictionary_stats *stats, size_t fixed_size) { cleanup_destroyed_dictionaries(); DICTIONARY *dict = callocz(1, sizeof(DICTIONARY)); dict->options = options; dict->stats = stats; + if((dict->options & DICT_OPTION_FIXED_SIZE) && !fixed_size) { + dict->options &= ~DICT_OPTION_FIXED_SIZE; + internal_fatal(true, "DICTIONARY: requested fixed size dictionary, without setting the size"); + } + if(!(dict->options & DICT_OPTION_FIXED_SIZE) && fixed_size) { + dict->options |= DICT_OPTION_FIXED_SIZE; + internal_fatal(true, "DICTIONARY: set a fixed size for the items, without setting DICT_OPTION_FIXED_SIZE flag"); + } + + if(dict->options & DICT_OPTION_FIXED_SIZE) + dict->value_aral = aral_by_size_acquire(fixed_size); + else + dict->value_aral = NULL; + size_t dict_size = 0; dict_size += sizeof(DICTIONARY); dict_size += dictionary_locks_init(dict); dict_size += reference_counter_init(dict); dict_size += hashtable_init_unsafe(dict); + dictionary_static_items_aral_init(); pointer_index_init(dict); DICTIONARY_STATS_PLUS_MEMORY(dict, 0, dict_size, 0); @@ -1955,12 +2033,12 @@ static DICTIONARY *dictionary_create_internal(DICT_OPTIONS options, struct dicti } #ifdef NETDATA_INTERNAL_CHECKS -DICTIONARY *dictionary_create_advanced_with_trace(DICT_OPTIONS options, struct dictionary_stats *stats, const char *function, size_t line, const char *file) { +DICTIONARY *dictionary_create_advanced_with_trace(DICT_OPTIONS options, struct dictionary_stats *stats, size_t fixed_size, const char *function, size_t line, const char *file) { #else -DICTIONARY *dictionary_create_advanced(DICT_OPTIONS options, struct dictionary_stats *stats) { +DICTIONARY *dictionary_create_advanced(DICT_OPTIONS options, struct dictionary_stats *stats, size_t fixed_size) { #endif - DICTIONARY *dict = dictionary_create_internal(options, stats?stats:&dictionary_stats_category_other); + DICTIONARY *dict = dictionary_create_internal(options, stats?stats:&dictionary_stats_category_other, fixed_size); #ifdef NETDATA_INTERNAL_CHECKS dict->creation_function = function; @@ -1978,7 +2056,9 @@ DICTIONARY *dictionary_create_view_with_trace(DICTIONARY *master, const char *fu DICTIONARY *dictionary_create_view(DICTIONARY *master) { #endif - DICTIONARY *dict = dictionary_create_internal(master->options, master->stats); + DICTIONARY *dict = dictionary_create_internal(master->options, master->stats, + master->value_aral ? aral_element_size(master->value_aral) : 0); + dict->master = master; dictionary_hooks_allocate(master); @@ -3295,7 +3375,7 @@ static int dictionary_unittest_view_threads() { // threads testing of dictionary struct dictionary_stats stats_master = {}; struct dictionary_stats stats_view = {}; - tv.master = dictionary_create_advanced(DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE, &stats_master); + tv.master = dictionary_create_advanced(DICT_OPTION_NAME_LINK_DONT_CLONE | DICT_OPTION_DONT_OVERWRITE_VALUE, &stats_master, 0); tv.view = dictionary_create_view(tv.master); tv.view->stats = &stats_view; @@ -3388,7 +3468,7 @@ static int dictionary_unittest_view_threads() { size_t dictionary_unittest_views(void) { size_t errors = 0; struct dictionary_stats stats = {}; - DICTIONARY *master = dictionary_create_advanced(DICT_OPTION_NONE, &stats); + DICTIONARY *master = dictionary_create_advanced(DICT_OPTION_NONE, &stats, 0); DICTIONARY *view = dictionary_create_view(master); fprintf(stderr, "\n\nChecking dictionary views...\n"); diff --git a/libnetdata/dictionary/dictionary.h b/libnetdata/dictionary/dictionary.h index 0e7b3d39f..58220def0 100644 --- a/libnetdata/dictionary/dictionary.h +++ b/libnetdata/dictionary/dictionary.h @@ -53,6 +53,7 @@ typedef enum dictionary_options { DICT_OPTION_NAME_LINK_DONT_CLONE = (1 << 2), // don't copy the name, just point to the one provided (default: copy) DICT_OPTION_DONT_OVERWRITE_VALUE = (1 << 3), // don't overwrite values of dictionary items (default: overwrite) DICT_OPTION_ADD_IN_FRONT = (1 << 4), // add dictionary items at the front of the linked list (default: at the end) + DICT_OPTION_FIXED_SIZE = (1 << 5), // the items of the dictionary have a fixed size } DICT_OPTIONS; struct dictionary_stats { @@ -91,7 +92,7 @@ struct dictionary_stats { // memory struct { - long indexed; // bytes of keys indexed (indication of the index size) + long index; // bytes of keys indexed (indication of the index size) long values; // bytes of caller structures long dict; // bytes of the structures dictionary needs } memory; @@ -107,12 +108,12 @@ struct dictionary_stats { // Create a dictionary #ifdef NETDATA_INTERNAL_CHECKS -#define dictionary_create(options) dictionary_create_advanced_with_trace(options, NULL, __FUNCTION__, __LINE__, __FILE__) -#define dictionary_create_advanced(options, stats) dictionary_create_advanced_with_trace(options, stats, __FUNCTION__, __LINE__, __FILE__) -DICTIONARY *dictionary_create_advanced_with_trace(DICT_OPTIONS options, struct dictionary_stats *stats, const char *function, size_t line, const char *file); +#define dictionary_create(options) dictionary_create_advanced_with_trace(options, NULL, 0, __FUNCTION__, __LINE__, __FILE__) +#define dictionary_create_advanced(options, stats, fixed_size) dictionary_create_advanced_with_trace(options, stats, fixed_size, __FUNCTION__, __LINE__, __FILE__) +DICTIONARY *dictionary_create_advanced_with_trace(DICT_OPTIONS options, struct dictionary_stats *stats, size_t fixed_size, const char *function, size_t line, const char *file); #else -#define dictionary_create(options) dictionary_create_advanced(options, NULL); -DICTIONARY *dictionary_create_advanced(DICT_OPTIONS options, struct dictionary_stats *stats); +#define dictionary_create(options) dictionary_create_advanced(options, NULL, 0); +DICTIONARY *dictionary_create_advanced(DICT_OPTIONS options, struct dictionary_stats *stats, size_t fixed_size); #endif // Create a view on a dictionary diff --git a/libnetdata/ebpf/README.md b/libnetdata/ebpf/README.md index 534867f31..c2dabe102 100644 --- a/libnetdata/ebpf/README.md +++ b/libnetdata/ebpf/README.md @@ -1,5 +1,13 @@ +# eBPF library + +Netdata's eBPF library supports the [eBPF collector](https://github.com/netdata/netdata/blob/master/collectors/ebpf.plugin/README.md). diff --git a/libnetdata/ebpf/ebpf.c b/libnetdata/ebpf/ebpf.c index 382485e5f..7cad59785 100644 --- a/libnetdata/ebpf/ebpf.c +++ b/libnetdata/ebpf/ebpf.c @@ -809,7 +809,7 @@ static void ebpf_select_mode_string(char *output, size_t len, netdata_run_mode_t * * Convert the string given as argument to value present in enum. * - * @param str value read from configuraion file. + * @param str value read from configuration file. * * @return It returns the value to be used. */ @@ -901,7 +901,7 @@ netdata_ebpf_program_loaded_t ebpf_convert_core_type(char *str, netdata_run_mode /** * Adjust Thread Load * - * Adjust thread configuraton according specified load. + * Adjust thread configuration according specified load. * * @param mod the main structure that will be adjusted. * @param file the btf file used with thread. @@ -1060,7 +1060,7 @@ static netdata_ebpf_load_mode_t ebpf_select_load_mode(struct btf *btf_file, netd * Update configuration for a specific thread. * * @param modules structure that will be updated - * @oaram origin specify the configuration file loaded + * @param origin specify the configuration file loaded * @param btf_file a pointer to the loaded btf file. * @param is_rhf is Red Hat family? */ @@ -1124,7 +1124,7 @@ void ebpf_update_module(ebpf_module_t *em, struct btf *btf_file, int kver, int i error("Cannot load the ebpf configuration file %s", em->config_file); return; } - // If user defined data globaly, we will have here EBPF_LOADED_FROM_USER, we need to consider this, to avoid + // If user defined data globally, we will have here EBPF_LOADED_FROM_USER, we need to consider this, to avoid // forcing users to configure thread by thread. origin = (!(em->load & NETDATA_EBPF_LOAD_SOURCE)) ? EBPF_LOADED_FROM_STOCK : em->load & NETDATA_EBPF_LOAD_SOURCE; } else @@ -1139,7 +1139,7 @@ void ebpf_update_module(ebpf_module_t *em, struct btf *btf_file, int kver, int i * Apps and cgroup has internal cleanup that needs attaching tracers to release_task, to avoid overload the function * we will enable this integration by default, if and only if, we are running with trampolines. * - * @param em a poiter to the main thread structure. + * @param em a pointer to the main thread structure. * @param mode is the mode used with different */ void ebpf_adjust_apps_cgroup(ebpf_module_t *em, netdata_ebpf_program_loaded_t mode) @@ -1160,7 +1160,8 @@ void ebpf_adjust_apps_cgroup(ebpf_module_t *em, netdata_ebpf_program_loaded_t mo * Helper used to get address from /proc/kallsym * * @param fa address structure - * @param fd file descriptor loaded inside kernel. + * @param fd file descriptor loaded inside kernel. If a negative value is given + * the function will load address and it won't update hash table. */ void ebpf_load_addresses(ebpf_addresses_t *fa, int fd) { @@ -1182,11 +1183,15 @@ void ebpf_load_addresses(ebpf_addresses_t *fa, int fd) char *fcnt = procfile_lineword(ff, l, 2); uint32_t hash = simple_hash(fcnt); if (fa->hash == hash && !strcmp(fcnt, fa->function)) { - char addr[128]; - snprintf(addr, 127, "0x%s", procfile_lineword(ff, l, 0)); - fa->addr = (unsigned long) strtoul(addr, NULL, 16); - uint32_t key = 0; - bpf_map_update_elem(fd, &key, &fa->addr, BPF_ANY); + if (fd > 0) { + char addr[128]; + snprintf(addr, 127, "0x%s", procfile_lineword(ff, l, 0)); + fa->addr = (unsigned long) strtoul(addr, NULL, 16); + uint32_t key = 0; + bpf_map_update_elem(fd, &key, &fa->addr, BPF_ANY); + } else + fa->addr = 1; + break; } } diff --git a/libnetdata/ebpf/ebpf.h b/libnetdata/ebpf/ebpf.h index 5cff5134f..cf3fa7ccd 100644 --- a/libnetdata/ebpf/ebpf.h +++ b/libnetdata/ebpf/ebpf.h @@ -206,7 +206,7 @@ typedef struct ebpf_specify_name { typedef enum netdata_ebpf_load_mode { EBPF_LOAD_LEGACY = 1<<0, // Select legacy mode, this means we will load binaries - EBPF_LOAD_CORE = 1<<1, // When CO-RE is used, it is necessary to use the souce code + EBPF_LOAD_CORE = 1<<1, // When CO-RE is used, it is necessary to use the source code EBPF_LOAD_PLAY_DICE = 1<<2, // Take a look on environment and choose the best option EBPF_LOADED_FROM_STOCK = 1<<3, // Configuration loaded from Stock file EBPF_LOADED_FROM_USER = 1<<4 // Configuration loaded from user diff --git a/libnetdata/eval/eval.c b/libnetdata/eval/eval.c index 0e429a08c..c7570bd2f 100644 --- a/libnetdata/eval/eval.c +++ b/libnetdata/eval/eval.c @@ -1126,7 +1126,7 @@ EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, in return NULL; } - BUFFER *out = buffer_create(1024); + BUFFER *out = buffer_create(1024, NULL); print_parsed_as_node(out, op, &err); if(err != EVAL_ERROR_OK) { error("failed to re-generate expression '%s' with reason: %s", string, expression_strerror(err)); @@ -1141,7 +1141,7 @@ EVAL_EXPRESSION *expression_parse(const char *string, const char **failed_at, in exp->parsed_as = strdupz(buffer_tostring(out)); buffer_free(out); - exp->error_msg = buffer_create(100); + exp->error_msg = buffer_create(100, NULL); exp->nodes = (void *)op; return exp; diff --git a/libnetdata/json/README.md b/libnetdata/json/README.md index 2e04b8b6b..e772f114d 100644 --- a/libnetdata/json/README.md +++ b/libnetdata/json/README.md @@ -1,6 +1,10 @@ # json diff --git a/libnetdata/json/json.c b/libnetdata/json/json.c index d5f62edaf..532b677ce 100644 --- a/libnetdata/json/json.c +++ b/libnetdata/json/json.c @@ -90,7 +90,7 @@ jsmntok_t *json_tokenise(char *js, size_t len, size_t *count) */ int json_callback_print(JSON_ENTRY *e) { - BUFFER *wb=buffer_create(300); + BUFFER *wb=buffer_create(300, NULL); buffer_sprintf(wb,"%s = ", e->name); char txt[50]; diff --git a/libnetdata/july/Makefile.am b/libnetdata/july/Makefile.am new file mode 100644 index 000000000..161784b8f --- /dev/null +++ b/libnetdata/july/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/july/README.md b/libnetdata/july/README.md new file mode 100644 index 000000000..df2a3d38c --- /dev/null +++ b/libnetdata/july/README.md @@ -0,0 +1,14 @@ + + + +# July + +An interface similar to `Judy` that uses minimal allocations (that can be cached) +for items that are mainly appended (just a few insertions in the middle) + diff --git a/libnetdata/july/july.c b/libnetdata/july/july.c new file mode 100644 index 000000000..0ad5f13e5 --- /dev/null +++ b/libnetdata/july/july.c @@ -0,0 +1,453 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "july.h" + +#define JULYL_MIN_ENTRIES 10 + +struct JulyL_item { + Word_t index; + void *value; +}; + +struct JulyL { + size_t entries; + size_t used; + + // statistics + size_t bytes; + size_t bytes_moved; + size_t reallocs; + + struct { + struct JulyL *prev; + struct JulyL *next; + } cache; + + struct JulyL_item array[]; +}; + +// ---------------------------------------------------------------------------- +// JulyL cache + +static struct { + struct { + SPINLOCK spinlock; + struct JulyL *available_items; + size_t available; + } protected; + + struct { + size_t bytes; + size_t allocated; + size_t bytes_moved; + size_t reallocs; + } atomics; +} julyl_globals = { + .protected = { + .spinlock = NETDATA_SPINLOCK_INITIALIZER, + .available_items = NULL, + .available = 0, + }, + .atomics = { + .bytes = 0, + .allocated = 0, + .bytes_moved = 0, + .reallocs = 0, + }, +}; + +void julyl_cleanup1(void) { + struct JulyL *item = NULL; + + if(!netdata_spinlock_trylock(&julyl_globals.protected.spinlock)) + return; + + if(julyl_globals.protected.available_items && julyl_globals.protected.available > 10) { + item = julyl_globals.protected.available_items; + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(julyl_globals.protected.available_items, item, cache.prev, cache.next); + julyl_globals.protected.available--; + } + + netdata_spinlock_unlock(&julyl_globals.protected.spinlock); + + if(item) { + size_t bytes = item->bytes; + freez(item); + __atomic_sub_fetch(&julyl_globals.atomics.bytes, bytes, __ATOMIC_RELAXED); + __atomic_sub_fetch(&julyl_globals.atomics.allocated, 1, __ATOMIC_RELAXED); + } +} + +struct JulyL *julyl_get(void) { + struct JulyL *j; + + netdata_spinlock_lock(&julyl_globals.protected.spinlock); + + j = julyl_globals.protected.available_items; + if(likely(j)) { + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(julyl_globals.protected.available_items, j, cache.prev, cache.next); + julyl_globals.protected.available--; + } + + netdata_spinlock_unlock(&julyl_globals.protected.spinlock); + + if(unlikely(!j)) { + size_t bytes = sizeof(struct JulyL) + JULYL_MIN_ENTRIES * sizeof(struct JulyL_item); + j = mallocz(bytes); + j->bytes = bytes; + j->entries = JULYL_MIN_ENTRIES; + __atomic_add_fetch(&julyl_globals.atomics.bytes, bytes, __ATOMIC_RELAXED); + __atomic_add_fetch(&julyl_globals.atomics.allocated, 1, __ATOMIC_RELAXED); + } + + j->used = 0; + j->bytes_moved = 0; + j->reallocs = 0; + j->cache.next = j->cache.prev = NULL; + return j; +} + +static void julyl_release(struct JulyL *j) { + if(unlikely(!j)) return; + + __atomic_add_fetch(&julyl_globals.atomics.bytes_moved, j->bytes_moved, __ATOMIC_RELAXED); + __atomic_add_fetch(&julyl_globals.atomics.reallocs, j->reallocs, __ATOMIC_RELAXED); + + netdata_spinlock_lock(&julyl_globals.protected.spinlock); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(julyl_globals.protected.available_items, j, cache.prev, cache.next); + julyl_globals.protected.available++; + netdata_spinlock_unlock(&julyl_globals.protected.spinlock); +} + +size_t julyl_cache_size(void) { + return __atomic_load_n(&julyl_globals.atomics.bytes, __ATOMIC_RELAXED); +} + +size_t julyl_bytes_moved(void) { + return __atomic_load_n(&julyl_globals.atomics.bytes_moved, __ATOMIC_RELAXED); +} + +// ---------------------------------------------------------------------------- +// JulyL + +size_t JulyLGet_binary_search_position_of_index(const struct JulyL *July, Word_t Index) { + // return the position of the first item >= Index + + size_t left = 0; + size_t right = July->used; + while(left < right) { + size_t middle = (left + right) >> 1; + + if(July->array[middle].index > Index) + right = middle; + + else + left = middle + 1; + } + + internal_fatal(left > July->used, "JULY: invalid position returned"); + + if(left > 0 && July->array[left - 1].index == Index) + return left - 1; + + internal_fatal( (left < July->used && July->array[left].index < Index) || + (left > 0 && July->array[left - 1].index >= Index) + , "JULY: wrong item returned"); + + return left; +} + +PPvoid_t JulyLGet(Pcvoid_t PArray, Word_t Index, PJError_t PJError __maybe_unused) { + const struct JulyL *July = PArray; + if(!July) + return NULL; + + size_t pos = JulyLGet_binary_search_position_of_index(July, Index); + + if(unlikely(pos >= July->used || July->array[pos].index != Index)) + return NULL; + + return (PPvoid_t)&July->array[pos].value; +} + +PPvoid_t JulyLIns(PPvoid_t PPArray, Word_t Index, PJError_t PJError __maybe_unused) { + struct JulyL *July = *PPArray; + if(unlikely(!July)) { + July = julyl_get(); + July->used = 0; + *PPArray = July; + } + + size_t pos = JulyLGet_binary_search_position_of_index(July, Index); + + if((pos == July->used || July->array[pos].index != Index)) { + // we have to add this entry + + if (unlikely(July->used == July->entries)) { + // we have to expand the array + size_t bytes = sizeof(struct JulyL) + July->entries * 2 * sizeof(struct JulyL_item); + __atomic_add_fetch(&julyl_globals.atomics.bytes, bytes - July->bytes, __ATOMIC_RELAXED); + July = reallocz(July, bytes); + July->bytes = bytes; + July->entries *= 2; + July->reallocs++; + *PPArray = July; + } + + if (unlikely(pos != July->used)) { + // we have to shift some members to make room + size_t size = (July->used - pos) * sizeof(struct JulyL_item); + memmove(&July->array[pos + 1], &July->array[pos], size); + July->bytes_moved += size; + } + + July->used++; + July->array[pos].value = NULL; + July->array[pos].index = Index; + } + + return &July->array[pos].value; +} + +PPvoid_t JulyLFirst(Pcvoid_t PArray, Word_t *Index, PJError_t PJError __maybe_unused) { + const struct JulyL *July = PArray; + if(!July) + return NULL; + + size_t pos = JulyLGet_binary_search_position_of_index(July, *Index); + // pos is >= Index + + if(unlikely(pos == July->used)) + return NULL; + + *Index = July->array[pos].index; + return (PPvoid_t)&July->array[pos].value; +} + +PPvoid_t JulyLNext(Pcvoid_t PArray, Word_t *Index, PJError_t PJError __maybe_unused) { + const struct JulyL *July = PArray; + if(!July) + return NULL; + + size_t pos = JulyLGet_binary_search_position_of_index(July, *Index); + // pos is >= Index + + if(unlikely(pos == July->used)) + return NULL; + + if(July->array[pos].index == *Index) { + pos++; + + if(unlikely(pos == July->used)) + return NULL; + } + + *Index = July->array[pos].index; + return (PPvoid_t)&July->array[pos].value; +} + +PPvoid_t JulyLLast(Pcvoid_t PArray, Word_t *Index, PJError_t PJError __maybe_unused) { + const struct JulyL *July = PArray; + if(!July) + return NULL; + + size_t pos = JulyLGet_binary_search_position_of_index(July, *Index); + // pos is >= Index + + if(pos > 0 && (pos == July->used || July->array[pos].index > *Index)) + pos--; + + if(unlikely(pos == 0 && July->array[0].index > *Index)) + return NULL; + + *Index = July->array[pos].index; + return (PPvoid_t)&July->array[pos].value; +} + +PPvoid_t JulyLPrev(Pcvoid_t PArray, Word_t *Index, PJError_t PJError __maybe_unused) { + const struct JulyL *July = PArray; + if(!July) + return NULL; + + size_t pos = JulyLGet_binary_search_position_of_index(July, *Index); + // pos is >= Index + + if(unlikely(pos == 0 || July->used == 0)) + return NULL; + + // get the previous one + pos--; + + *Index = July->array[pos].index; + return (PPvoid_t)&July->array[pos].value; +} + +Word_t JulyLFreeArray(PPvoid_t PPArray, PJError_t PJError __maybe_unused) { + struct JulyL *July = *PPArray; + if(unlikely(!July)) + return 0; + + size_t bytes = July->bytes; + julyl_release(July); + *PPArray = NULL; + return bytes; +} + +// ---------------------------------------------------------------------------- +// unittest + +#define item_index(i) (((i) * 2) + 100) + +int julytest(void) { + Word_t entries = 10000; + Pvoid_t array = NULL; + + // test additions + for(Word_t i = 0; i < entries ;i++) { + Pvoid_t *PValue = JulyLIns(&array, item_index(i), PJE0); + if(!PValue) + fatal("JULY: cannot insert item %lu", item_index(i)); + + *PValue = (void *)(item_index(i)); + } + + // test successful finds + for(Word_t i = 0; i < entries ;i++) { + Pvoid_t *PValue = JulyLGet(array, item_index(i), PJE0); + if(!PValue) + fatal("JULY: cannot find item %lu", item_index(i)); + + if(*PValue != (void *)(item_index(i))) + fatal("JULY: item %lu has the value %lu", item_index(i), (unsigned long)(*PValue)); + } + + // test finding the first item + for(Word_t i = 0; i < entries ;i++) { + Word_t index = item_index(i); + Pvoid_t *PValue = JulyLFirst(array, &index, PJE0); + if(!PValue) + fatal("JULY: cannot find first item %lu", item_index(i)); + + if(*PValue != (void *)(item_index(i))) + fatal("JULY: item %lu has the value %lu", item_index(i), (unsigned long)(*PValue)); + + if(index != item_index(i)) + fatal("JULY: item %lu has index %lu", item_index(i), index); + } + + // test finding the next item + for(Word_t i = 0; i < entries - 1 ;i++) { + Word_t index = item_index(i); + Pvoid_t *PValue = JulyLNext(array, &index, PJE0); + if(!PValue) + fatal("JULY: cannot find next item %lu", item_index(i)); + + if(*PValue != (void *)(item_index(i + 1))) + fatal("JULY: item %lu next has the value %lu", item_index(i), (unsigned long)(*PValue)); + + if(index != item_index(i + 1)) + fatal("JULY: item %lu next has index %lu", item_index(i), index); + } + + // test finding the last item + for(Word_t i = 0; i < entries ;i++) { + Word_t index = item_index(i); + Pvoid_t *PValue = JulyLLast(array, &index, PJE0); + if(!PValue) + fatal("JULY: cannot find last item %lu", item_index(i)); + + if(*PValue != (void *)(item_index(i))) + fatal("JULY: item %lu has the value %lu", item_index(i), (unsigned long)(*PValue)); + + if(index != item_index(i)) + fatal("JULY: item %lu has index %lu", item_index(i), index); + } + + // test finding the prev item + for(Word_t i = 1; i < entries ;i++) { + Word_t index = item_index(i); + Pvoid_t *PValue = JulyLPrev(array, &index, PJE0); + if(!PValue) + fatal("JULY: cannot find prev item %lu", item_index(i)); + + if(*PValue != (void *)(item_index(i - 1))) + fatal("JULY: item %lu prev has the value %lu", item_index(i), (unsigned long)(*PValue)); + + if(index != item_index(i - 1)) + fatal("JULY: item %lu prev has index %lu", item_index(i), index); + } + + // test full traversal forward + { + Word_t i = 0; + Word_t index = 0; + bool first = true; + Pvoid_t *PValue; + while((PValue = JulyLFirstThenNext(array, &index, &first))) { + if(*PValue != (void *)(item_index(i))) + fatal("JULY: item %lu traversal has the value %lu", item_index(i), (unsigned long)(*PValue)); + + if(index != item_index(i)) + fatal("JULY: item %lu traversal has index %lu", item_index(i), index); + + i++; + } + + if(i != entries) + fatal("JULY: expected to forward traverse %lu entries, but traversed %lu", entries, i); + } + + // test full traversal backward + { + Word_t i = 0; + Word_t index = (Word_t)(-1); + bool first = true; + Pvoid_t *PValue; + while((PValue = JulyLLastThenPrev(array, &index, &first))) { + if(*PValue != (void *)(item_index(entries - i - 1))) + fatal("JULY: item %lu traversal has the value %lu", item_index(i), (unsigned long)(*PValue)); + + if(index != item_index(entries - i - 1)) + fatal("JULY: item %lu traversal has index %lu", item_index(i), index); + + i++; + } + + if(i != entries) + fatal("JULY: expected to back traverse %lu entries, but traversed %lu", entries, i); + } + + // test finding non-existing first item + for(Word_t i = 0; i < entries ;i++) { + Word_t index = item_index(i) - 1; + Pvoid_t *PValue = JulyLFirst(array, &index, PJE0); + if(!PValue) + fatal("JULY: cannot find first item %lu", item_index(i) - 1); + + if(*PValue != (void *)(item_index(i))) + fatal("JULY: item %lu has the value %lu", item_index(i), (unsigned long)(*PValue)); + + if(index != item_index(i)) + fatal("JULY: item %lu has index %lu", item_index(i), index); + } + + // test finding non-existing last item + for(Word_t i = 0; i < entries ;i++) { + Word_t index = item_index(i) + 1; + Pvoid_t *PValue = JulyLLast(array, &index, PJE0); + if(!PValue) + fatal("JULY: cannot find last item %lu", item_index(i) + 1); + + if(*PValue != (void *)(item_index(i))) + fatal("JULY: item %lu has the value %lu", item_index(i), (unsigned long)(*PValue)); + + if(index != item_index(i)) + fatal("JULY: item %lu has index %lu", item_index(i), index); + } + + JulyLFreeArray(&array, PJE0); + + return 0; +} + + diff --git a/libnetdata/july/july.h b/libnetdata/july/july.h new file mode 100644 index 000000000..672ed44e4 --- /dev/null +++ b/libnetdata/july/july.h @@ -0,0 +1,40 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_JULY_H +#define NETDATA_JULY_H 1 + +#include "../libnetdata.h" + +// #define PDC_USE_JULYL 1 + +PPvoid_t JulyLGet(Pcvoid_t PArray, Word_t Index, PJError_t PJError); +PPvoid_t JulyLIns(PPvoid_t PPArray, Word_t Index, PJError_t PJError); +PPvoid_t JulyLFirst(Pcvoid_t PArray, Word_t *Index, PJError_t PJError); +PPvoid_t JulyLNext(Pcvoid_t PArray, Word_t *Index, PJError_t PJError); +PPvoid_t JulyLLast(Pcvoid_t PArray, Word_t *Index, PJError_t PJError); +PPvoid_t JulyLPrev(Pcvoid_t PArray, Word_t *Index, PJError_t PJError); +Word_t JulyLFreeArray(PPvoid_t PPArray, PJError_t PJError); + +static inline PPvoid_t JulyLFirstThenNext(Pcvoid_t PArray, Word_t * PIndex, bool *first) { + if(unlikely(*first)) { + *first = false; + return JulyLFirst(PArray, PIndex, PJE0); + } + + return JulyLNext(PArray, PIndex, PJE0); +} + +static inline PPvoid_t JulyLLastThenPrev(Pcvoid_t PArray, Word_t * PIndex, bool *first) { + if(unlikely(*first)) { + *first = false; + return JulyLLast(PArray, PIndex, PJE0); + } + + return JulyLPrev(PArray, PIndex, PJE0); +} + +void julyl_cleanup1(void); +size_t julyl_cache_size(void); +size_t julyl_bytes_moved(void); + +#endif // NETDATA_JULY_H diff --git a/libnetdata/libnetdata.c b/libnetdata/libnetdata.c index cc04a97eb..f6b6b026a 100644 --- a/libnetdata/libnetdata.c +++ b/libnetdata/libnetdata.c @@ -21,6 +21,81 @@ int enable_ksm = 0; volatile sig_atomic_t netdata_exit = 0; const char *program_version = VERSION; +#define MAX_JUDY_SIZE_TO_ARAL 24 +static bool judy_sizes_config[MAX_JUDY_SIZE_TO_ARAL + 1] = { + [3] = true, + [4] = true, + [5] = true, + [6] = true, + [7] = true, + [8] = true, + [10] = true, + [11] = true, + [15] = true, + [23] = true, +}; +static ARAL *judy_sizes_aral[MAX_JUDY_SIZE_TO_ARAL + 1] = {}; + +struct aral_statistics judy_sizes_aral_statistics = {}; + +void aral_judy_init(void) { + for(size_t Words = 0; Words <= MAX_JUDY_SIZE_TO_ARAL; Words++) + if(judy_sizes_config[Words]) { + char buf[30+1]; + snprintfz(buf, 30, "judy-%zu", Words * sizeof(Word_t)); + judy_sizes_aral[Words] = aral_create( + buf, + Words * sizeof(Word_t), + 0, + 65536, + &judy_sizes_aral_statistics, + NULL, NULL, false, false); + } +} + +size_t judy_aral_overhead(void) { + return aral_overhead_from_stats(&judy_sizes_aral_statistics); +} + +size_t judy_aral_structures(void) { + return aral_structures_from_stats(&judy_sizes_aral_statistics); +} + +static ARAL *judy_size_aral(Word_t Words) { + if(Words <= MAX_JUDY_SIZE_TO_ARAL && judy_sizes_aral[Words]) + return judy_sizes_aral[Words]; + + return NULL; +} + +inline Word_t JudyMalloc(Word_t Words) { + Word_t Addr; + + ARAL *ar = judy_size_aral(Words); + if(ar) + Addr = (Word_t) aral_mallocz(ar); + else + Addr = (Word_t) mallocz(Words * sizeof(Word_t)); + + return(Addr); +} + +inline void JudyFree(void * PWord, Word_t Words) { + ARAL *ar = judy_size_aral(Words); + if(ar) + aral_freez(ar, PWord); + else + freez(PWord); +} + +Word_t JudyMallocVirtual(Word_t Words) { + return JudyMalloc(Words); +} + +void JudyFreeVirtual(void * PWord, Word_t Words) { + JudyFree(PWord, Words); +} + // ---------------------------------------------------------------------------- // memory allocation functions that handle failures @@ -150,27 +225,6 @@ void posix_memfree(void *ptr) { libc_free(ptr); } -Word_t JudyMalloc(Word_t Words) { - Word_t Addr; - - Addr = (Word_t) mallocz(Words * sizeof(Word_t)); - return(Addr); -} -void JudyFree(void * PWord, Word_t Words) { - (void)Words; - freez(PWord); -} -Word_t JudyMallocVirtual(Word_t Words) { - Word_t Addr; - - Addr = (Word_t) mallocz(Words * sizeof(Word_t)); - return(Addr); -} -void JudyFreeVirtual(void * PWord, Word_t Words) { - (void)Words; - freez(PWord); -} - #define MALLOC_ALIGNMENT (sizeof(uintptr_t) * 2) #define size_t_atomic_count(op, var, size) __atomic_## op ##_fetch(&(var), size, __ATOMIC_RELAXED) #define size_t_atomic_bytes(op, var, size) __atomic_## op ##_fetch(&(var), ((size) % MALLOC_ALIGNMENT)?((size) + MALLOC_ALIGNMENT - ((size) % MALLOC_ALIGNMENT)):(size), __ATOMIC_RELAXED) @@ -1176,7 +1230,7 @@ static int memory_file_open(const char *filename, size_t size) { return fd; } -static inline int madvise_sequential(void *mem, size_t len) { +inline int madvise_sequential(void *mem, size_t len) { static int logger = 1; int ret = madvise(mem, len, MADV_SEQUENTIAL); @@ -1184,7 +1238,15 @@ static inline int madvise_sequential(void *mem, size_t len) { return ret; } -static inline int madvise_dontfork(void *mem, size_t len) { +inline int madvise_random(void *mem, size_t len) { + static int logger = 1; + int ret = madvise(mem, len, MADV_RANDOM); + + if (ret != 0 && logger-- > 0) error("madvise(MADV_RANDOM) failed."); + return ret; +} + +inline int madvise_dontfork(void *mem, size_t len) { static int logger = 1; int ret = madvise(mem, len, MADV_DONTFORK); @@ -1192,7 +1254,7 @@ static inline int madvise_dontfork(void *mem, size_t len) { return ret; } -static inline int madvise_willneed(void *mem, size_t len) { +inline int madvise_willneed(void *mem, size_t len) { static int logger = 1; int ret = madvise(mem, len, MADV_WILLNEED); @@ -1200,24 +1262,27 @@ static inline int madvise_willneed(void *mem, size_t len) { return ret; } +inline int madvise_dontneed(void *mem, size_t len) { + static int logger = 1; + int ret = madvise(mem, len, MADV_DONTNEED); + + if (ret != 0 && logger-- > 0) error("madvise(MADV_DONTNEED) failed."); + return ret; +} + +inline int madvise_dontdump(void *mem __maybe_unused, size_t len __maybe_unused) { #if __linux__ -static inline int madvise_dontdump(void *mem, size_t len) { static int logger = 1; int ret = madvise(mem, len, MADV_DONTDUMP); if (ret != 0 && logger-- > 0) error("madvise(MADV_DONTDUMP) failed."); return ret; -} #else -static inline int madvise_dontdump(void *mem, size_t len) { - UNUSED(mem); - UNUSED(len); - return 0; -} #endif +} -static inline int madvise_mergeable(void *mem, size_t len) { +inline int madvise_mergeable(void *mem __maybe_unused, size_t len __maybe_unused) { #ifdef MADV_MERGEABLE static int logger = 1; int ret = madvise(mem, len, MADV_MERGEABLE); @@ -1225,14 +1290,12 @@ static inline int madvise_mergeable(void *mem, size_t len) { if (ret != 0 && logger-- > 0) error("madvise(MADV_MERGEABLE) failed."); return ret; #else - UNUSED(mem); - UNUSED(len); - return 0; #endif } -void *netdata_mmap(const char *filename, size_t size, int flags, int ksm) { +void *netdata_mmap(const char *filename, size_t size, int flags, int ksm, bool read_only, int *open_fd) +{ // info("netdata_mmap('%s', %zu", filename, size); // MAP_SHARED is used in memory mode map @@ -1271,7 +1334,7 @@ void *netdata_mmap(const char *filename, size_t size, int flags, int ksm) { fd_for_mmap = -1; } - mem = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, fd_for_mmap, 0); + mem = mmap(NULL, size, read_only ? PROT_READ : PROT_READ | PROT_WRITE, flags, fd_for_mmap, 0); if (mem != MAP_FAILED) { #ifdef NETDATA_TRACE_ALLOCATIONS @@ -1288,15 +1351,20 @@ void *netdata_mmap(const char *filename, size_t size, int flags, int ksm) { else info("Cannot seek to beginning of file '%s'.", filename); } - madvise_sequential(mem, size); + // madvise_sequential(mem, size); madvise_dontfork(mem, size); madvise_dontdump(mem, size); - if(flags & MAP_SHARED) madvise_willneed(mem, size); + // if(flags & MAP_SHARED) madvise_willneed(mem, size); if(ksm) madvise_mergeable(mem, size); } cleanup: - if(fd != -1) close(fd); + if(fd != -1) { + if (open_fd) + *open_fd = fd; + else + close(fd); + } if(mem == MAP_FAILED) return NULL; errno = 0; return mem; @@ -1934,3 +2002,70 @@ bool run_command_and_copy_output_to_stdout(const char *command, int max_line_len netdata_pclose(NULL, fp, pid); return true; } + +void for_each_open_fd(OPEN_FD_ACTION action, OPEN_FD_EXCLUDE excluded_fds){ + int fd; + + switch(action){ + case OPEN_FD_ACTION_CLOSE: + if(!(excluded_fds & OPEN_FD_EXCLUDE_STDIN)) (void)close(STDIN_FILENO); + if(!(excluded_fds & OPEN_FD_EXCLUDE_STDOUT)) (void)close(STDOUT_FILENO); + if(!(excluded_fds & OPEN_FD_EXCLUDE_STDERR)) (void)close(STDERR_FILENO); + break; + case OPEN_FD_ACTION_FD_CLOEXEC: + if(!(excluded_fds & OPEN_FD_EXCLUDE_STDIN)) (void)fcntl(STDIN_FILENO, F_SETFD, FD_CLOEXEC); + if(!(excluded_fds & OPEN_FD_EXCLUDE_STDOUT)) (void)fcntl(STDOUT_FILENO, F_SETFD, FD_CLOEXEC); + if(!(excluded_fds & OPEN_FD_EXCLUDE_STDERR)) (void)fcntl(STDERR_FILENO, F_SETFD, FD_CLOEXEC); + break; + default: + break; // do nothing + } + +#if defined(HAVE_CLOSE_RANGE) + if(close_range(STDERR_FILENO + 1, ~0U, (action == OPEN_FD_ACTION_FD_CLOEXEC ? CLOSE_RANGE_CLOEXEC : 0)) == 0) return; + error("close_range() failed, will try to close fds manually"); +#endif + + DIR *dir = opendir("/proc/self/fd"); + if (dir == NULL) { + struct rlimit rl; + int open_max = -1; + + if(getrlimit(RLIMIT_NOFILE, &rl) == 0 && rl.rlim_max != RLIM_INFINITY) open_max = rl.rlim_max; +#ifdef _SC_OPEN_MAX + else open_max = sysconf(_SC_OPEN_MAX); +#endif + + if (open_max == -1) open_max = 65535; // 65535 arbitrary default if everything else fails + + for (fd = STDERR_FILENO + 1; fd < open_max; fd++) { + switch(action){ + case OPEN_FD_ACTION_CLOSE: + if(fd_is_valid(fd)) (void)close(fd); + break; + case OPEN_FD_ACTION_FD_CLOEXEC: + (void)fcntl(fd, F_SETFD, FD_CLOEXEC); + break; + default: + break; // do nothing + } + } + } else { + struct dirent *entry; + while ((entry = readdir(dir)) != NULL) { + fd = str2i(entry->d_name); + if(unlikely((fd == STDIN_FILENO ) || (fd == STDOUT_FILENO) || (fd == STDERR_FILENO) )) continue; + switch(action){ + case OPEN_FD_ACTION_CLOSE: + if(fd_is_valid(fd)) (void)close(fd); + break; + case OPEN_FD_ACTION_FD_CLOEXEC: + (void)fcntl(fd, F_SETFD, FD_CLOEXEC); + break; + default: + break; // do nothing + } + } + closedir(dir); + } +} diff --git a/libnetdata/libnetdata.h b/libnetdata/libnetdata.h index 58eaa9ded..c504bd4bd 100644 --- a/libnetdata/libnetdata.h +++ b/libnetdata/libnetdata.h @@ -11,10 +11,18 @@ extern "C" { #include #endif +#define JUDYHS_INDEX_SIZE_ESTIMATE(key_bytes) (((key_bytes) + sizeof(Word_t) - 1) / sizeof(Word_t) * 4) + #if defined(NETDATA_DEV_MODE) && !defined(NETDATA_INTERNAL_CHECKS) #define NETDATA_INTERNAL_CHECKS 1 #endif +#if SIZEOF_VOID_P == 4 +#define ENV32BIT 1 +#else +#define ENV64BIT 1 +#endif + // NETDATA_TRACE_ALLOCATIONS does not work under musl libc, so don't enable it //#if defined(NETDATA_INTERNAL_CHECKS) && !defined(NETDATA_TRACE_ALLOCATIONS) //#define NETDATA_TRACE_ALLOCATIONS 1 @@ -217,6 +225,10 @@ extern "C" { #define WARNUNUSED #endif +void aral_judy_init(void); +size_t judy_aral_overhead(void); +size_t judy_aral_structures(void); + #define ABS(x) (((x) < 0)? (-(x)) : (x)) #define MIN(a,b) (((a)<(b))?(a):(b)) #define MAX(a,b) (((a)>(b))?(a):(b)) @@ -225,8 +237,9 @@ extern "C" { // --------------------------------------------------------------------------------------------- // double linked list management +// inspired by https://github.com/troydhanson/uthash/blob/master/src/utlist.h -#define DOUBLE_LINKED_LIST_PREPEND_UNSAFE(head, item, prev, next) \ +#define DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(head, item, prev, next) \ do { \ (item)->next = (head); \ \ @@ -240,7 +253,7 @@ extern "C" { (head) = (item); \ } while (0) -#define DOUBLE_LINKED_LIST_APPEND_UNSAFE(head, item, prev, next) \ +#define DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(head, item, prev, next) \ do { \ if(likely(head)) { \ (item)->prev = (head)->prev; \ @@ -256,39 +269,97 @@ extern "C" { \ } while (0) -#define DOUBLE_LINKED_LIST_REMOVE_UNSAFE(head, item, prev, next) \ +#define DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(head, item, prev, next) \ do { \ fatal_assert((head) != NULL); \ fatal_assert((item)->prev != NULL); \ \ - if((item)->prev == (item)) { \ + if((item)->prev == (item)) \ /* it is the only item in the list */ \ (head) = NULL; \ - } \ + \ else if((item) == (head)) { \ /* it is the first item */ \ + fatal_assert((item)->next != NULL); \ (item)->next->prev = (item)->prev; \ (head) = (item)->next; \ } \ else { \ + /* it is any other item */ \ (item)->prev->next = (item)->next; \ - if ((item)->next) { \ + \ + if ((item)->next) \ (item)->next->prev = (item)->prev; \ - } \ - else { \ + else \ (head)->prev = (item)->prev; \ - } \ } \ \ (item)->next = NULL; \ (item)->prev = NULL; \ } while (0) +#define DOUBLE_LINKED_LIST_INSERT_ITEM_BEFORE_UNSAFE(head, existing, item, prev, next) \ + do { \ + if (existing) { \ + fatal_assert((head) != NULL); \ + fatal_assert((item) != NULL); \ + \ + (item)->next = (existing); \ + (item)->prev = (existing)->prev; \ + (existing)->prev = (item); \ + \ + if ((head) == (existing)) \ + (head) = (item); \ + else \ + (item)->prev->next = (item); \ + \ + } \ + else \ + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(head, item, prev, next); \ + \ + } while (0) + +#define DOUBLE_LINKED_LIST_INSERT_ITEM_AFTER_UNSAFE(head, existing, item, prev, next) \ + do { \ + if (existing) { \ + fatal_assert((head) != NULL); \ + fatal_assert((item) != NULL); \ + \ + (item)->next = (existing)->next; \ + (item)->prev = (existing); \ + (existing)->next = (item); \ + \ + if ((item)->next) \ + (item)->next->prev = (item); \ + else \ + (head)->prev = (item); \ + } \ + else \ + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(head, item, prev, next); \ + \ + } while (0) + +#define DOUBLE_LINKED_LIST_APPEND_LIST_UNSAFE(head, head2, prev, next) \ + do { \ + if (head2) { \ + if (head) { \ + __typeof(head2) _head2_last_item = (head2)->prev; \ + \ + (head2)->prev = (head)->prev; \ + (head)->prev->next = (head2); \ + \ + (head)->prev = _head2_last_item; \ + } \ + else \ + (head) = (head2); \ + } \ + } while (0) + #define DOUBLE_LINKED_LIST_FOREACH_FORWARD(head, var, prev, next) \ for ((var) = (head); (var) ; (var) = (var)->next) #define DOUBLE_LINKED_LIST_FOREACH_BACKWARD(head, var, prev, next) \ - for ((var) = (head)?(head)->prev:NULL; (var) && (var) != (head)->prev ; (var) = (var)->prev) + for ((var) = (head) ? (head)->prev : NULL ; (var) ; (var) = ((var) == (head)) ? NULL : (var)->prev) // --------------------------------------------------------------------------------------------- @@ -301,6 +372,14 @@ char *mystrsep(char **ptr, char *s); char *trim(char *s); // remove leading and trailing spaces; may return NULL char *trim_all(char *buffer); // like trim(), but also remove duplicate spaces inside the string; may return NULL +int madvise_sequential(void *mem, size_t len); +int madvise_random(void *mem, size_t len); +int madvise_dontfork(void *mem, size_t len); +int madvise_willneed(void *mem, size_t len); +int madvise_dontneed(void *mem, size_t len); +int madvise_dontdump(void *mem, size_t len); +int madvise_mergeable(void *mem, size_t len); + int vsnprintfz(char *dst, size_t n, const char *fmt, va_list args); int snprintfz(char *dst, size_t n, const char *fmt, ...) PRINTFLIKE(3, 4); @@ -335,7 +414,7 @@ void posix_memfree(void *ptr); void json_escape_string(char *dst, const char *src, size_t size); void json_fix_string(char *s); -void *netdata_mmap(const char *filename, size_t size, int flags, int ksm); +void *netdata_mmap(const char *filename, size_t size, int flags, int ksm, bool read_only, int *open_fd); int netdata_munmap(void *ptr, size_t size); int memory_file_save(const char *filename, void *mem, size_t size); @@ -418,10 +497,22 @@ static inline char *get_word(char **words, size_t num_words, size_t index) { bool run_command_and_copy_output_to_stdout(const char *command, int max_line_length); +typedef enum { + OPEN_FD_ACTION_CLOSE, + OPEN_FD_ACTION_FD_CLOEXEC +} OPEN_FD_ACTION; +typedef enum { + OPEN_FD_EXCLUDE_STDIN = 0x01, + OPEN_FD_EXCLUDE_STDOUT = 0x02, + OPEN_FD_EXCLUDE_STDERR = 0x04 +} OPEN_FD_EXCLUDE; +void for_each_open_fd(OPEN_FD_ACTION action, OPEN_FD_EXCLUDE excluded_fds); + void netdata_cleanup_and_exit(int ret) NORETURN; void send_statistics(const char *action, const char *action_result, const char *action_data); extern char *netdata_configured_host_prefix; #include "libjudy/src/Judy.h" +#include "july/july.h" #include "os.h" #include "storage_number/storage_number.h" #include "threads/threads.h" @@ -453,7 +544,7 @@ extern char *netdata_configured_host_prefix; #include "json/json.h" #include "health/health.h" #include "string/utf8.h" -#include "arrayalloc/arrayalloc.h" +#include "libnetdata/aral/aral.h" #include "onewayalloc/onewayalloc.h" #include "worker_utilization/worker_utilization.h" @@ -500,6 +591,76 @@ struct malloc_trace { }; #endif // NETDATA_TRACE_ALLOCATIONS +static inline PPvoid_t JudyLFirstThenNext(Pcvoid_t PArray, Word_t * PIndex, bool *first) { + if(unlikely(*first)) { + *first = false; + return JudyLFirst(PArray, PIndex, PJE0); + } + + return JudyLNext(PArray, PIndex, PJE0); +} + +static inline PPvoid_t JudyLLastThenPrev(Pcvoid_t PArray, Word_t * PIndex, bool *first) { + if(unlikely(*first)) { + *first = false; + return JudyLLast(PArray, PIndex, PJE0); + } + + return JudyLPrev(PArray, PIndex, PJE0); +} + +static inline size_t indexing_partition_old(Word_t ptr, Word_t modulo) { + size_t total = 0; + + total += (ptr & 0xff) >> 0; + total += (ptr & 0xff00) >> 8; + total += (ptr & 0xff0000) >> 16; + total += (ptr & 0xff000000) >> 24; + + if(sizeof(Word_t) > 4) { + total += (ptr & 0xff00000000) >> 32; + total += (ptr & 0xff0000000000) >> 40; + total += (ptr & 0xff000000000000) >> 48; + total += (ptr & 0xff00000000000000) >> 56; + } + + return (total % modulo); +} + +static uint32_t murmur32(uint32_t h) __attribute__((const)); +static inline uint32_t murmur32(uint32_t h) { + h ^= h >> 16; + h *= 0x85ebca6b; + h ^= h >> 13; + h *= 0xc2b2ae35; + h ^= h >> 16; + + return h; +} + +static uint64_t murmur64(uint64_t h) __attribute__((const)); +static inline uint64_t murmur64(uint64_t k) { + k ^= k >> 33; + k *= 0xff51afd7ed558ccdUL; + k ^= k >> 33; + k *= 0xc4ceb9fe1a85ec53UL; + k ^= k >> 33; + + return k; +} + +static inline size_t indexing_partition(Word_t ptr, Word_t modulo) __attribute__((const)); +static inline size_t indexing_partition(Word_t ptr, Word_t modulo) { + if(sizeof(Word_t) == 8) { + uint64_t hash = murmur64(ptr); + return hash % modulo; + } + else { + uint32_t hash = murmur32(ptr); + return hash % modulo; + } +} + # ifdef __cplusplus } # endif diff --git a/libnetdata/locks/README.md b/libnetdata/locks/README.md index 9132edc43..8810e3d17 100644 --- a/libnetdata/locks/README.md +++ b/libnetdata/locks/README.md @@ -1,5 +1,10 @@ ## How to trace netdata locks diff --git a/libnetdata/locks/locks.c b/libnetdata/locks/locks.c index f7191be52..e73456d70 100644 --- a/libnetdata/locks/locks.c +++ b/libnetdata/locks/locks.c @@ -15,8 +15,6 @@ #ifndef NETDATA_THREAD_LOCKS_ARRAY_SIZE #define NETDATA_THREAD_LOCKS_ARRAY_SIZE 10 #endif -static __thread netdata_rwlock_t *netdata_thread_locks[NETDATA_THREAD_LOCKS_ARRAY_SIZE]; - #endif // NETDATA_TRACE_RWLOCKS @@ -120,8 +118,6 @@ int __netdata_mutex_unlock(netdata_mutex_t *mutex) { #ifdef NETDATA_TRACE_RWLOCKS -#warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT - int netdata_mutex_init_debug(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_init(%p) from %lu@%s, %s()", mutex, line, file, function); @@ -283,12 +279,16 @@ int __netdata_rwlock_trywrlock(netdata_rwlock_t *rwlock) { // https://www.youtube.com/watch?v=rmGJc9PXpuE&t=41s void netdata_spinlock_init(SPINLOCK *spinlock) { - *spinlock = NETDATA_SPINLOCK_INITIALIZER; + memset(spinlock, 0, sizeof(SPINLOCK)); } void netdata_spinlock_lock(SPINLOCK *spinlock) { static const struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 }; +#ifdef NETDATA_INTERNAL_CHECKS + size_t spins = 0; +#endif + netdata_thread_disable_cancelability(); for(int i = 1; @@ -296,254 +296,104 @@ void netdata_spinlock_lock(SPINLOCK *spinlock) { __atomic_test_and_set(&spinlock->locked, __ATOMIC_ACQUIRE) ; i++ ) { + +#ifdef NETDATA_INTERNAL_CHECKS + spins++; +#endif if(unlikely(i == 8)) { i = 0; nanosleep(&ns, NULL); } } + // we have the lock + +#ifdef NETDATA_INTERNAL_CHECKS + spinlock->spins += spins; + spinlock->locker_pid = gettid(); +#endif } void netdata_spinlock_unlock(SPINLOCK *spinlock) { +#ifdef NETDATA_INTERNAL_CHECKS + spinlock->locker_pid = 0; +#endif __atomic_clear(&spinlock->locked, __ATOMIC_RELEASE); netdata_thread_enable_cancelability(); } -#ifdef NETDATA_TRACE_RWLOCKS +bool netdata_spinlock_trylock(SPINLOCK *spinlock) { + netdata_thread_disable_cancelability(); -// ---------------------------------------------------------------------------- -// lockers list + if(!__atomic_load_n(&spinlock->locked, __ATOMIC_RELAXED) && + !__atomic_test_and_set(&spinlock->locked, __ATOMIC_ACQUIRE)) + // we got the lock + return true; -void not_supported_by_posix_rwlocks(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, char locktype, const char *reason) { - __netdata_mutex_lock(&rwlock->lockers_mutex); - fprintf(stderr, - "RW_LOCK FATAL ON LOCK %p: %d '%s' (function %s() %lu@%s) attempts to acquire a '%c' lock, but it is not supported by POSIX because: %s. At this attempt, the task is holding %zu rwlocks and %zu mutexes. There are %zu readers and %zu writers holding this lock:\n", - rwlock, - gettid(), netdata_thread_tag(), - function, line, file, - locktype, - reason, - netdata_locks_acquired_rwlocks, netdata_locks_acquired_mutexes, - rwlock->readers, rwlock->writers); - - int i; - usec_t now = now_monotonic_high_precision_usec(); - netdata_rwlock_locker *p; - for(i = 1, p = rwlock->lockers; p ;p = p->next, i++) { - fprintf(stderr, - " => %i: RW_LOCK %p: process %d '%s' (function %s() %lu@%s) is having %zu '%c' lock for %llu usec.\n", - i, rwlock, - p->pid, p->tag, - p->function, p->line, p->file, - p->callers, p->lock, - (now - p->start_s)); - } - __netdata_mutex_unlock(&rwlock->lockers_mutex); + // we didn't get the lock + return false; } -static void log_rwlock_lockers(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, const char *reason, char locktype) { - - // this function can only be used by one thread at a time - // because otherwise, the threads may deadlock waiting for each other - static netdata_mutex_t log_lockers_mutex = NETDATA_MUTEX_INITIALIZER; - __netdata_mutex_lock(&log_lockers_mutex); - - // now work on this locker - __netdata_mutex_lock(&rwlock->lockers_mutex); - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d '%s' (function %s() %lu@%s) %s a '%c' lock (while holding %zu rwlocks and %zu mutexes). There are %zu readers and %zu writers holding this lock:\n", - rwlock, - gettid(), netdata_thread_tag(), - function, line, file, - reason, locktype, - netdata_locks_acquired_rwlocks, netdata_locks_acquired_mutexes, - rwlock->readers, rwlock->writers); - - int i; - usec_t now = now_monotonic_high_precision_usec(); - netdata_rwlock_locker *p; - for(i = 1, p = rwlock->lockers; p ;p = p->next, i++) { - fprintf(stderr, - " => %i: RW_LOCK %p: process %d '%s' (function %s() %lu@%s) is having %zu '%c' lock for %llu usec.\n", - i, rwlock, - p->pid, p->tag, - p->function, p->line, p->file, - p->callers, p->lock, - (now - p->start_s)); - - if(p->all_caller_locks) { - // find the lock in the netdata_thread_locks[] - // and remove it - int k; - for(k = 0; k < NETDATA_THREAD_LOCKS_ARRAY_SIZE ;k++) { - if (p->all_caller_locks[k] && p->all_caller_locks[k] != rwlock) { - - // lock the other lock lockers list - __netdata_mutex_lock(&p->all_caller_locks[k]->lockers_mutex); - - // print the list of lockers of the other lock - netdata_rwlock_locker *r; - int j; - for(j = 1, r = p->all_caller_locks[k]->lockers; r ;r = r->next, j++) { - fprintf( - stderr, - " ~~~> %i: RW_LOCK %p: process %d '%s' (function %s() %lu@%s) is having %zu '%c' lock for %llu usec.\n", - j, - p->all_caller_locks[k], - r->pid, - r->tag, - r->function, - r->line, - r->file, - r->callers, - r->lock, - (now - r->start_s)); - } - - // unlock the other lock lockers list - __netdata_mutex_unlock(&p->all_caller_locks[k]->lockers_mutex); - } - } - } +#ifdef NETDATA_TRACE_RWLOCKS - } - __netdata_mutex_unlock(&rwlock->lockers_mutex); +// ---------------------------------------------------------------------------- +// lockers list - // unlock this function for other threads - __netdata_mutex_unlock(&log_lockers_mutex); -} - -static netdata_rwlock_locker *add_rwlock_locker(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, char lock_type) { - netdata_rwlock_locker *p = mallocz(sizeof(netdata_rwlock_locker)); - p->pid = gettid(); - p->tag = netdata_thread_tag(); - p->lock = lock_type; - p->file = file; - p->function = function; - p->line = line; - p->callers = 1; - p->all_caller_locks = netdata_thread_locks; - p->start_s = now_monotonic_high_precision_usec(); - - // find a slot in the netdata_thread_locks[] - int i; - for(i = 0; i < NETDATA_THREAD_LOCKS_ARRAY_SIZE ;i++) { - if (!netdata_thread_locks[i]) { - netdata_thread_locks[i] = rwlock; - break; - } - } +static netdata_rwlock_locker *find_rwlock_locker(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + pid_t pid = gettid(); + netdata_rwlock_locker *locker = NULL; __netdata_mutex_lock(&rwlock->lockers_mutex); - p->next = rwlock->lockers; - rwlock->lockers = p; - if(lock_type == 'R') rwlock->readers++; - if(lock_type == 'W') rwlock->writers++; + Pvoid_t *PValue = JudyLGet(rwlock->lockers_pid_JudyL, pid, PJE0); + if(PValue && *PValue) + locker = *PValue; __netdata_mutex_unlock(&rwlock->lockers_mutex); - return p; + return locker; } -static void remove_rwlock_locker(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock, netdata_rwlock_locker *locker) { - usec_t end_s = now_monotonic_high_precision_usec(); +static netdata_rwlock_locker *add_rwlock_locker(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, LOCKER_REQUEST lock_type) { + netdata_rwlock_locker *locker; - if(locker->callers == 0) - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) callers should be positive but it is zero\n", - rwlock, - locker->pid, locker->tag, - locker->function, locker->line, locker->file); - - if(locker->callers > 1 && locker->lock != 'R') - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) only 'R' locks support multiple holders, but here we have %zu callers holding a '%c' lock.\n", - rwlock, - locker->pid, locker->tag, - locker->function, locker->line, locker->file, - locker->callers, locker->lock); - - __netdata_mutex_lock(&rwlock->lockers_mutex); - locker->callers--; - - if(!locker->callers) { - int doit = 0; - - if (rwlock->lockers == locker) { - rwlock->lockers = locker->next; - doit = 1; - } else { - netdata_rwlock_locker *p; - for (p = rwlock->lockers; p && p->next != locker; p = p->next) - ; - if (p && p->next == locker) { - p->next = locker->next; - doit = 1; - } - } - if(doit) { - if(locker->lock == 'R') rwlock->readers--; - if(locker->lock == 'W') rwlock->writers--; - } + locker = find_rwlock_locker(file, function, line, rwlock); + if(locker) { + locker->lock |= lock_type; + locker->refcount++; + } + else { + locker = mallocz(sizeof(netdata_rwlock_locker)); + locker->pid = gettid(); + locker->tag = netdata_thread_tag(); + locker->refcount = 1; + locker->lock = lock_type; + locker->got_it = false; + locker->file = file; + locker->function = function; + locker->line = line; - if(!doit) { - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) with %zu x '%c' lock is not found.\n", - rwlock, - locker->pid, locker->tag, - locker->function, locker->line, locker->file, - locker->callers, locker->lock); - } - else { - // find the lock in the netdata_thread_locks[] - // and remove it - int i; - for(i = 0; i < NETDATA_THREAD_LOCKS_ARRAY_SIZE ;i++) { - if (netdata_thread_locks[i] == rwlock) - netdata_thread_locks[i] = NULL; - } - - if(end_s - locker->start_s >= NETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC) - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) holded a '%c' for %llu usec.\n", - rwlock, - locker->pid, locker->tag, - locker->function, locker->line, locker->file, - locker->lock, end_s - locker->start_s); - - freez(locker); - } + __netdata_mutex_lock(&rwlock->lockers_mutex); + DOUBLE_LINKED_LIST_APPEND_UNSAFE(rwlock->lockers, locker, prev, next); + Pvoid_t *PValue = JudyLIns(&rwlock->lockers_pid_JudyL, locker->pid, PJE0); + *PValue = locker; + if (lock_type == RWLOCK_REQUEST_READ || lock_type == RWLOCK_REQUEST_TRYREAD) rwlock->readers++; + if (lock_type == RWLOCK_REQUEST_WRITE || lock_type == RWLOCK_REQUEST_TRYWRITE) rwlock->writers++; + __netdata_mutex_unlock(&rwlock->lockers_mutex); } - __netdata_mutex_unlock(&rwlock->lockers_mutex); + return locker; } -static netdata_rwlock_locker *find_rwlock_locker(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { - pid_t pid = gettid(); - netdata_rwlock_locker *p; - +static void remove_rwlock_locker(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock, netdata_rwlock_locker *locker) { __netdata_mutex_lock(&rwlock->lockers_mutex); - for(p = rwlock->lockers; p ;p = p->next) { - if(p->pid == pid) break; + locker->refcount--; + if(!locker->refcount) { + DOUBLE_LINKED_LIST_REMOVE_UNSAFE(rwlock->lockers, locker, prev, next); + JudyLDel(&rwlock->lockers_pid_JudyL, locker->pid, PJE0); + if (locker->lock == RWLOCK_REQUEST_READ || locker->lock == RWLOCK_REQUEST_TRYREAD) rwlock->readers--; + else if (locker->lock == RWLOCK_REQUEST_WRITE || locker->lock == RWLOCK_REQUEST_TRYWRITE) rwlock->writers--; + freez(locker); } __netdata_mutex_unlock(&rwlock->lockers_mutex); - - return p; -} - -static netdata_rwlock_locker *update_or_add_rwlock_locker(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, netdata_rwlock_locker *locker, char locktype) { - if(!locker) { - return add_rwlock_locker(file, function, line, rwlock, locktype); - } - else if(locker->lock == 'R' && locktype == 'R') { - __netdata_mutex_lock(&rwlock->lockers_mutex); - locker->callers++; - __netdata_mutex_unlock(&rwlock->lockers_mutex); - return locker; - } - else { - not_supported_by_posix_rwlocks(file, function, line, rwlock, locktype, "DEADLOCK - WANTS TO CHANGE LOCK TYPE BUT ALREADY HAS THIS LOCKED"); - return locker; - } } // ---------------------------------------------------------------------------- @@ -551,84 +401,41 @@ static netdata_rwlock_locker *update_or_add_rwlock_locker(const char *file, cons int netdata_rwlock_destroy_debug(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(%p) from %lu@%s, %s()", rwlock, line, file, function); - - if(rwlock->readers) - error("RW_LOCK: destroying a rwlock with %zu readers in it", rwlock->readers); - if(rwlock->writers) - error("RW_LOCK: destroying a rwlock with %zu writers in it", rwlock->writers); int ret = __netdata_rwlock_destroy(rwlock); if(!ret) { while (rwlock->lockers) remove_rwlock_locker(file, function, line, rwlock, rwlock->lockers); - - if (rwlock->readers) - error("RW_LOCK: internal error - empty rwlock with %zu readers in it", rwlock->readers); - if (rwlock->writers) - error("RW_LOCK: internal error - empty rwlock with %zu writers in it", rwlock->writers); } - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(%p) = %d, from %lu@%s, %s()", rwlock, ret, line, file, function); - return ret; } int netdata_rwlock_init_debug(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(%p) from %lu@%s, %s()", rwlock, line, file, function); int ret = __netdata_rwlock_init(rwlock); if(!ret) { __netdata_mutex_init(&rwlock->lockers_mutex); + rwlock->lockers_pid_JudyL = NULL; rwlock->lockers = NULL; rwlock->readers = 0; rwlock->writers = 0; } - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(%p) = %d, from %lu@%s, %s()", rwlock, ret, line, file, function); - return ret; } int netdata_rwlock_rdlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(%p) from %lu@%s, %s()", rwlock, line, file, function); - - netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); - -#ifdef NETDATA_TRACE_RWLOCKS_LOG_NESTED - if(locker && locker->lock == 'R') { - log_rwlock_lockers(file, function, line, rwlock, "NESTED READ LOCK REQUEST", 'R'); - } -#endif // NETDATA_TRACE_RWLOCKS_LOG_NESTED + netdata_rwlock_locker *locker = add_rwlock_locker(file, function, line, rwlock, RWLOCK_REQUEST_READ); - int log = 0; - if(rwlock->writers) { - log_rwlock_lockers(file, function, line, rwlock, "WANTS", 'R'); - log = 1; - } - - usec_t start_s = now_monotonic_high_precision_usec(); int ret = __netdata_rwlock_rdlock(rwlock); - usec_t end_s = now_monotonic_high_precision_usec(); - - if(!ret) { - locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'R'); - if(log) log_rwlock_lockers(file, function, line, rwlock, "GOT", 'R'); - - } - - if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED for a READ lock for %llu usec.\n", - rwlock, - gettid(), netdata_thread_tag(), - function, line, file, - end_s - start_s); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + if(!ret) + locker->got_it = true; + else + remove_rwlock_locker(file, function, line, rwlock, locker); return ret; } @@ -636,36 +443,13 @@ int netdata_rwlock_rdlock_debug(const char *file __maybe_unused, const char *fun int netdata_rwlock_wrlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(%p) from %lu@%s, %s()", rwlock, line, file, function); - - netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); - if(locker) - not_supported_by_posix_rwlocks(file, function, line, rwlock, 'W', "DEADLOCK - WANTS A WRITE LOCK BUT ALREADY HAVE THIS LOCKED"); + netdata_rwlock_locker *locker = add_rwlock_locker(file, function, line, rwlock, RWLOCK_REQUEST_WRITE); - int log = 0; - if(rwlock->readers) { - log_rwlock_lockers(file, function, line, rwlock, "WANTS", 'W'); - log = 1; - } - - usec_t start_s = now_monotonic_high_precision_usec(); int ret = __netdata_rwlock_wrlock(rwlock); - usec_t end_s = now_monotonic_high_precision_usec(); - - if(!ret){ - locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'W'); - if(log) log_rwlock_lockers(file, function, line, rwlock, "GOT", 'W'); - } - - if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED for a WRITE lock for %llu usec.\n", - rwlock, - gettid(), netdata_thread_tag(), - function, line, file, - end_s - start_s); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + if(!ret) + locker->got_it = true; + else + remove_rwlock_locker(file, function, line, rwlock, locker); return ret; } @@ -673,83 +457,42 @@ int netdata_rwlock_wrlock_debug(const char *file __maybe_unused, const char *fun int netdata_rwlock_unlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(%p) from %lu@%s, %s()", rwlock, line, file, function); - netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + if(unlikely(!locker)) - not_supported_by_posix_rwlocks(file, function, line, rwlock, 'U', "UNLOCK WITHOUT LOCK"); + fatal("UNLOCK WITHOUT LOCK"); - usec_t start_s = now_monotonic_high_precision_usec(); int ret = __netdata_rwlock_unlock(rwlock); - usec_t end_s = now_monotonic_high_precision_usec(); - - if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED to UNLOCK for %llu usec.\n", - rwlock, - gettid(), netdata_thread_tag(), - function, line, file, - end_s - start_s); - - if(likely(!ret && locker)) remove_rwlock_locker(file, function, line, rwlock, locker); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + if(likely(!ret)) + remove_rwlock_locker(file, function, line, rwlock, locker); return ret; } int netdata_rwlock_tryrdlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(%p) from %lu@%s, %s()", rwlock, line, file, function); - netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); - if(locker && locker->lock == 'W') - not_supported_by_posix_rwlocks(file, function, line, rwlock, 'R', "DEADLOCK - WANTS A READ LOCK BUT IT HAS A WRITE LOCK ALREADY"); + netdata_rwlock_locker *locker = add_rwlock_locker(file, function, line, rwlock, RWLOCK_REQUEST_TRYREAD); - usec_t start_s = now_monotonic_high_precision_usec(); int ret = __netdata_rwlock_tryrdlock(rwlock); - usec_t end_s = now_monotonic_high_precision_usec(); - if(!ret) - locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'R'); - - if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED to TRYREAD for %llu usec.\n", - rwlock, - gettid(), netdata_thread_tag(), - function, line, file, - end_s - start_s); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + locker->got_it = true; + else + remove_rwlock_locker(file, function, line, rwlock, locker); return ret; } int netdata_rwlock_trywrlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(%p) from %lu@%s, %s()", rwlock, line, file, function); - netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); - if(locker) - not_supported_by_posix_rwlocks(file, function, line, rwlock, 'W', "ALREADY HAS THIS LOCK"); + netdata_rwlock_locker *locker = add_rwlock_locker(file, function, line, rwlock, RWLOCK_REQUEST_TRYWRITE); - usec_t start_s = now_monotonic_high_precision_usec(); int ret = __netdata_rwlock_trywrlock(rwlock); - usec_t end_s = now_monotonic_high_precision_usec(); - if(!ret) - locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'W'); - - if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) - fprintf(stderr, - "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED to TRYWRITE for %llu usec.\n", - rwlock, - gettid(), netdata_thread_tag(), - function, line, file, - end_s - start_s); - - debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + locker->got_it = true; + else + remove_rwlock_locker(file, function, line, rwlock, locker); return ret; } diff --git a/libnetdata/locks/locks.h b/libnetdata/locks/locks.h index 4d2d1655c..89b110d5e 100644 --- a/libnetdata/locks/locks.h +++ b/libnetdata/locks/locks.h @@ -11,24 +11,39 @@ typedef pthread_mutex_t netdata_mutex_t; typedef struct netdata_spinlock { bool locked; +#ifdef NETDATA_INTERNAL_CHECKS + pid_t locker_pid; + size_t spins; +#endif } SPINLOCK; -#define NETDATA_SPINLOCK_INITIALIZER (SPINLOCK){ .locked = false } + +#define NETDATA_SPINLOCK_INITIALIZER \ + { .locked = false } + void netdata_spinlock_init(SPINLOCK *spinlock); void netdata_spinlock_lock(SPINLOCK *spinlock); void netdata_spinlock_unlock(SPINLOCK *spinlock); +bool netdata_spinlock_trylock(SPINLOCK *spinlock); #ifdef NETDATA_TRACE_RWLOCKS + +typedef enum { + RWLOCK_REQUEST_READ = (1 << 0), + RWLOCK_REQUEST_WRITE = (1 << 1), + RWLOCK_REQUEST_TRYREAD = (1 << 2), + RWLOCK_REQUEST_TRYWRITE = (1 << 3), +} LOCKER_REQUEST; + typedef struct netdata_rwlock_locker { + LOCKER_REQUEST lock; + bool got_it; pid_t pid; + size_t refcount; const char *tag; - char lock; // 'R', 'W' const char *file; const char *function; unsigned long line; - size_t callers; - usec_t start_s; - struct netdata_rwlock_t **all_caller_locks; - struct netdata_rwlock_locker *next; + struct netdata_rwlock_locker *next, *prev; } netdata_rwlock_locker; typedef struct netdata_rwlock_t { @@ -37,6 +52,7 @@ typedef struct netdata_rwlock_t { size_t writers; // the number of writers on the lock netdata_mutex_t lockers_mutex; // a mutex to protect the linked list of the lock holding threads netdata_rwlock_locker *lockers; // the linked list of the lock holding threads + Pvoid_t lockers_pid_JudyL; } netdata_rwlock_t; #define NETDATA_RWLOCK_INITIALIZER { \ @@ -44,7 +60,8 @@ typedef struct netdata_rwlock_t { .readers = 0, \ .writers = 0, \ .lockers_mutex = NETDATA_MUTEX_INITIALIZER, \ - .lockers = NULL \ + .lockers = NULL, \ + .lockers_pid_JudyL = NULL, \ } #else // NETDATA_TRACE_RWLOCKS diff --git a/libnetdata/log/README.md b/libnetdata/log/README.md index a767dd446..5f9e5bc7b 100644 --- a/libnetdata/log/README.md +++ b/libnetdata/log/README.md @@ -1,5 +1,15 @@ +# Log + +The netdata log library supports debug, info, error and fatal error logging. +By default we have an access log, an error log and a collectors log. + diff --git a/libnetdata/log/log.c b/libnetdata/log/log.c index fb3b2d034..1dcdba9c2 100644 --- a/libnetdata/log/log.c +++ b/libnetdata/log/log.c @@ -14,6 +14,7 @@ uint64_t debug_flags = 0; int access_log_syslog = 1; int error_log_syslog = 1; +int collector_log_syslog = 1; int output_log_syslog = 1; // debug log int health_log_syslog = 1; @@ -23,11 +24,15 @@ FILE *stdaccess = NULL; int stdhealth_fd = -1; FILE *stdhealth = NULL; +int stdcollector_fd = -1; +FILE *stderror = NULL; + const char *stdaccess_filename = NULL; const char *stderr_filename = NULL; const char *stdout_filename = NULL; const char *facility_log = NULL; const char *stdhealth_filename = NULL; +const char *stdcollector_filename = NULL; #ifdef ENABLE_ACLK const char *aclklog_filename = NULL; @@ -573,8 +578,14 @@ void reopen_all_log_files() { if(stdout_filename) open_log_file(STDOUT_FILENO, stdout, stdout_filename, &output_log_syslog, 0, NULL); - if(stderr_filename) - open_log_file(STDERR_FILENO, stderr, stderr_filename, &error_log_syslog, 0, NULL); + if(stdcollector_filename) + open_log_file(STDERR_FILENO, stderr, stdcollector_filename, &collector_log_syslog, 0, NULL); + + if(stderr_filename) { + log_lock(); + stderror = open_log_file(stdcollector_fd, stderror, stderr_filename, &error_log_syslog, 1, &stdcollector_fd); + log_unlock(); + } #ifdef ENABLE_ACLK if (aclklog_enabled) @@ -593,7 +604,11 @@ void open_all_log_files() { open_log_file(STDIN_FILENO, stdin, "/dev/null", NULL, 0, NULL); open_log_file(STDOUT_FILENO, stdout, stdout_filename, &output_log_syslog, 0, NULL); - open_log_file(STDERR_FILENO, stderr, stderr_filename, &error_log_syslog, 0, NULL); + open_log_file(STDERR_FILENO, stderr, stdcollector_filename, &collector_log_syslog, 0, NULL); + + log_lock(); + stderror = open_log_file(stdcollector_fd, NULL, stderr_filename, &error_log_syslog, 1, &stdcollector_fd); + log_unlock(); #ifdef ENABLE_ACLK if(aclklog_enabled) @@ -616,7 +631,9 @@ int error_log_limit(int reset) { static time_t start = 0; static unsigned long counter = 0, prevented = 0; - // fprintf(stderr, "FLOOD: counter=%lu, allowed=%lu, backup=%lu, period=%llu\n", counter, error_log_errors_per_period, error_log_errors_per_period_backup, (unsigned long long)error_log_throttle_period); + FILE *fp = (!stderror) ? stderr : stderror; + + // fprintf(fp, "FLOOD: counter=%lu, allowed=%lu, backup=%lu, period=%llu\n", counter, error_log_errors_per_period, error_log_errors_per_period_backup, (unsigned long long)error_log_throttle_period); // do not throttle if the period is 0 if(error_log_throttle_period == 0) @@ -638,7 +655,7 @@ int error_log_limit(int reset) { char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( - stderr, + fp, "%s: %s LOG FLOOD PROTECTION reset for process '%s' " "(prevented %lu logs in the last %"PRId64" seconds).\n", date, @@ -661,7 +678,7 @@ int error_log_limit(int reset) { char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( - stderr, + fp, "%s: %s LOG FLOOD PROTECTION resuming logging from process '%s' " "(prevented %lu logs in the last %"PRId64" seconds).\n", date, @@ -685,7 +702,7 @@ int error_log_limit(int reset) { char date[LOG_DATE_LENGTH]; log_date(date, LOG_DATE_LENGTH, now_realtime_sec()); fprintf( - stderr, + fp, "%s: %s LOG FLOOD PROTECTION too many logs (%lu logs in %"PRId64" seconds, threshold is set to %lu logs " "in %"PRId64" seconds). Preventing more logs from process '%s' for %"PRId64" seconds.\n", date, @@ -758,9 +775,10 @@ void debug_int( const char *file, const char *function, const unsigned long line // ---------------------------------------------------------------------------- // info log -void info_int( const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) +void info_int( int is_collector, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { va_list args; + FILE *fp = (is_collector || !stderror) ? stderr : stderror; log_lock(); @@ -770,7 +788,7 @@ void info_int( const char *file __maybe_unused, const char *function __maybe_unu return; } - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_INFO, fmt, args ); va_end( args ); @@ -781,14 +799,15 @@ void info_int( const char *file __maybe_unused, const char *function __maybe_unu va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s INFO : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, netdata_thread_tag(), line, file, function); + fprintf(fp, "%s: %s INFO : %s : (%04lu@%-20.20s:%-15.15s): ", + date, program_name, netdata_thread_tag(), line, file, function); #else - fprintf(stderr, "%s: %s INFO : %s : ", date, program_name, netdata_thread_tag()); + fprintf(fp, "%s: %s INFO : %s : ", date, program_name, netdata_thread_tag()); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); - fputc('\n', stderr); + fputc('\n', fp); log_unlock(); } @@ -819,6 +838,8 @@ static const char *strerror_result_string(const char *a, const char *b) { (void) #endif void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { + FILE *fp = (!stderror) ? stderr : stderror; + if(erl->sleep_ut) sleep_usec(erl->sleep_ut); @@ -842,7 +863,7 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma return; } - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_ERR, fmt, args ); va_end( args ); @@ -853,26 +874,29 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); + fprintf(fp, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", + date, program_name, prefix, netdata_thread_tag(), line, file, function); #else - fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); + fprintf(fp, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); if(erl->count > 1) - fprintf(stderr, " (similar messages repeated %zu times in the last %llu secs)", erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0)); + fprintf(fp, " (similar messages repeated %zu times in the last %llu secs)", + erl->count, (unsigned long long)(erl->last_logged ? now - erl->last_logged : 0)); if(erl->sleep_ut) - fprintf(stderr, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut); + fprintf(fp, " (sleeping for %llu microseconds every time this happens)", erl->sleep_ut); if(__errno) { char buf[1024]; - fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + fprintf(fp, + " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); errno = 0; } else - fputc('\n', stderr); + fputc('\n', fp); erl->last_logged = now; erl->count = 0; @@ -880,9 +904,10 @@ void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __ma log_unlock(); } -void error_int(const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { +void error_int(int is_collector, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, const char *fmt, ... ) { // save a copy of errno - just in case this function generates a new error int __errno = errno; + FILE *fp = (is_collector || !stderror) ? stderr : stderror; va_list args; @@ -894,7 +919,7 @@ void error_int(const char *prefix, const char *file __maybe_unused, const char * return; } - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_ERR, fmt, args ); va_end( args ); @@ -905,20 +930,22 @@ void error_int(const char *prefix, const char *file __maybe_unused, const char * va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, prefix, netdata_thread_tag(), line, file, function); + fprintf(fp, "%s: %s %-5.5s : %s : (%04lu@%-20.20s:%-15.15s): ", + date, program_name, prefix, netdata_thread_tag(), line, file, function); #else - fprintf(stderr, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); + fprintf(fp, "%s: %s %-5.5s : %s : ", date, program_name, prefix, netdata_thread_tag()); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); if(__errno) { char buf[1024]; - fprintf(stderr, " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); + fprintf(fp, + " (errno %d, %s)\n", __errno, strerror_result(strerror_r(__errno, buf, 1023), buf)); errno = 0; } else - fputc('\n', stderr); + fputc('\n', fp); log_unlock(); } @@ -933,23 +960,27 @@ static void crash_netdata(void) { #ifdef HAVE_BACKTRACE #define BT_BUF_SIZE 100 static void print_call_stack(void) { + FILE *fp = (!stderror) ? stderr : stderror; + int nptrs; void *buffer[BT_BUF_SIZE]; nptrs = backtrace(buffer, BT_BUF_SIZE); if(nptrs) - backtrace_symbols_fd(buffer, nptrs, fileno(stderr)); + backtrace_symbols_fd(buffer, nptrs, fileno(fp)); } #endif void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) { + FILE *fp = (!stderror) ? stderr : stderror; + // save a copy of errno - just in case this function generates a new error int __errno = errno; va_list args; const char *thread_tag; char os_threadname[NETDATA_THREAD_NAME_MAX + 1]; - if(error_log_syslog) { + if(collector_log_syslog) { va_start( args, fmt ); vsyslog(LOG_CRIT, fmt, args ); va_end( args ); @@ -970,15 +1001,16 @@ void fatal_int( const char *file, const char *function, const unsigned long line va_start( args, fmt ); #ifdef NETDATA_INTERNAL_CHECKS - fprintf(stderr, "%s: %s FATAL : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, thread_tag, line, file, function); + fprintf(fp, + "%s: %s FATAL : %s : (%04lu@%-20.20s:%-15.15s): ", date, program_name, thread_tag, line, file, function); #else - fprintf(stderr, "%s: %s FATAL : %s : ", date, program_name, thread_tag); + fprintf(fp, "%s: %s FATAL : %s : ", date, program_name, thread_tag); #endif - vfprintf( stderr, fmt, args ); + vfprintf(fp, fmt, args ); va_end( args ); perror(" # "); - fputc('\n', stderr); + fputc('\n', fp); log_unlock(); @@ -986,7 +1018,15 @@ void fatal_int( const char *file, const char *function, const unsigned long line snprintfz(action_data, 70, "%04lu@%-10.10s:%-15.15s/%d", line, file, function, __errno); char action_result[60+1]; - snprintfz(action_result, 60, "%s:%s", program_name, strncmp(thread_tag, "STREAM_RECEIVER", strlen("STREAM_RECEIVER")) ? thread_tag : "[x]"); + const char *tag_to_send = thread_tag; + + // anonymize thread names + if(strncmp(thread_tag, THREAD_TAG_STREAM_RECEIVER, strlen(THREAD_TAG_STREAM_RECEIVER)) == 0) + tag_to_send = THREAD_TAG_STREAM_RECEIVER; + if(strncmp(thread_tag, THREAD_TAG_STREAM_SENDER, strlen(THREAD_TAG_STREAM_SENDER)) == 0) + tag_to_send = THREAD_TAG_STREAM_SENDER; + + snprintfz(action_result, 60, "%s:%s", program_name, tag_to_send); send_statistics("FATAL", action_result, action_data); #ifdef HAVE_BACKTRACE diff --git a/libnetdata/log/log.h b/libnetdata/log/log.h index 11dab4c1d..3d9f0927d 100644 --- a/libnetdata/log/log.h +++ b/libnetdata/log/log.h @@ -61,10 +61,14 @@ extern FILE *stdaccess; extern int stdhealth_fd; extern FILE *stdhealth; +extern int stdcollector_fd; +extern FILE *stderror; + extern const char *stdaccess_filename; extern const char *stderr_filename; extern const char *stdout_filename; extern const char *stdhealth_filename; +extern const char *stdcollector_filename; extern const char *facility_log; #ifdef ENABLE_ACLK @@ -106,7 +110,7 @@ typedef struct error_with_limit { #ifdef NETDATA_INTERNAL_CHECKS #define debug(type, args...) do { if(unlikely(debug_flags & type)) debug_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) -#define internal_error(condition, args...) do { if(unlikely(condition)) error_int("IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0) +#define internal_error(condition, args...) do { if(unlikely(condition)) error_int(0, "IERR", __FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #define internal_fatal(condition, args...) do { if(unlikely(condition)) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args); } while(0) #else #define debug(type, args...) debug_dummy() @@ -114,17 +118,20 @@ typedef struct error_with_limit { #define internal_fatal(args...) debug_dummy() #endif -#define info(args...) info_int(__FILE__, __FUNCTION__, __LINE__, ##args) -#define infoerr(args...) error_int("INFO", __FILE__, __FUNCTION__, __LINE__, ##args) -#define error(args...) error_int("ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define info(args...) info_int(0, __FILE__, __FUNCTION__, __LINE__, ##args) +#define collector_info(args...) info_int(1, __FILE__, __FUNCTION__, __LINE__, ##args) +#define infoerr(args...) error_int(0, "INFO", __FILE__, __FUNCTION__, __LINE__, ##args) +#define error(args...) error_int(0, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) +#define collector_infoerr(args...) error_int(1, "INFO", __FILE__, __FUNCTION__, __LINE__, ##args) +#define collector_error(args...) error_int(1, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) #define error_limit(erl, args...) error_limit_int(erl, "ERROR", __FILE__, __FUNCTION__, __LINE__, ##args) #define fatal(args...) fatal_int(__FILE__, __FUNCTION__, __LINE__, ##args) #define fatal_assert(expr) ((expr) ? (void)(0) : fatal_int(__FILE__, __FUNCTION__, __LINE__, "Assertion `%s' failed", #expr)) void send_statistics(const char *action, const char *action_result, const char *action_data); void debug_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); -void info_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(4, 5); -void error_int( const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); +void info_int( int is_collector, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(5, 6); +void error_int( int is_collector, const char *prefix, const char *file, const char *function, const unsigned long line, const char *fmt, ... ) PRINTFLIKE(6, 7); void error_limit_int(ERROR_LIMIT *erl, const char *prefix, const char *file __maybe_unused, const char *function __maybe_unused, unsigned long line __maybe_unused, const char *fmt, ... ) PRINTFLIKE(6, 7);; void fatal_int( const char *file, const char *function, const unsigned long line, const char *fmt, ... ) NORETURN PRINTFLIKE(4, 5); void log_access( const char *fmt, ... ) PRINTFLIKE(1, 2); diff --git a/libnetdata/onewayalloc/README.md b/libnetdata/onewayalloc/README.md index 1f459c263..3fa0d9fd3 100644 --- a/libnetdata/onewayalloc/README.md +++ b/libnetdata/onewayalloc/README.md @@ -1,6 +1,10 @@ # One Way Allocator diff --git a/libnetdata/onewayalloc/onewayalloc.c b/libnetdata/onewayalloc/onewayalloc.c index 59c3b6859..2f007b189 100644 --- a/libnetdata/onewayalloc/onewayalloc.c +++ b/libnetdata/onewayalloc/onewayalloc.c @@ -14,6 +14,12 @@ typedef struct owa_page { struct owa_page *last; // the last page on the list - we currently allocate on this } OWA_PAGE; +static size_t onewayalloc_total_memory = 0; + +size_t onewayalloc_allocated_memory(void) { + return __atomic_load_n(&onewayalloc_total_memory, __ATOMIC_RELAXED); +} + // allocations need to be aligned to CPU register width // https://en.wikipedia.org/wiki/Data_structure_alignment static inline size_t natural_alignment(size_t size) { @@ -60,6 +66,7 @@ static OWA_PAGE *onewayalloc_create_internal(OWA_PAGE *head, size_t size_hint) { // OWA_PAGE *page = (OWA_PAGE *)netdata_mmap(NULL, size, MAP_ANONYMOUS|MAP_PRIVATE, 0); // if(unlikely(!page)) fatal("Cannot allocate onewayalloc buffer of size %zu", size); OWA_PAGE *page = (OWA_PAGE *)mallocz(size); + __atomic_add_fetch(&onewayalloc_total_memory, size, __ATOMIC_RELAXED); page->size = size; page->offset = natural_alignment(sizeof(OWA_PAGE)); @@ -183,11 +190,17 @@ void onewayalloc_destroy(ONEWAYALLOC *owa) { // head->stats_mallocs_made, head->stats_mallocs_size, // head->stats_pages, head->stats_pages_size); + size_t total_size = 0; OWA_PAGE *page = head; while(page) { + total_size += page->size; + OWA_PAGE *p = page; page = page->next; + // munmap(p, p->size); freez(p); } + + __atomic_sub_fetch(&onewayalloc_total_memory, total_size, __ATOMIC_RELAXED); } diff --git a/libnetdata/onewayalloc/onewayalloc.h b/libnetdata/onewayalloc/onewayalloc.h index e536e0542..a415b063b 100644 --- a/libnetdata/onewayalloc/onewayalloc.h +++ b/libnetdata/onewayalloc/onewayalloc.h @@ -16,4 +16,6 @@ void onewayalloc_freez(ONEWAYALLOC *owa, const void *ptr); void *onewayalloc_doublesize(ONEWAYALLOC *owa, const void *src, size_t oldsize); +size_t onewayalloc_allocated_memory(void); + #endif // ONEWAYALLOC_H diff --git a/libnetdata/os.c b/libnetdata/os.c index 196288a6a..133c02248 100644 --- a/libnetdata/os.c +++ b/libnetdata/os.c @@ -6,61 +6,77 @@ // system functions // to retrieve settings of the system -int processors = 1; -long get_system_cpus(void) { - processors = 1; +#define CPUS_FOR_COLLECTORS 0 +#define CPUS_FOR_NETDATA 1 -#ifdef __APPLE__ - int32_t tmp_processors; +long get_system_cpus_with_cache(bool cache, bool for_netdata) { + static long processors[2] = { 0, 0 }; - if (unlikely(GETSYSCTL_BY_NAME("hw.logicalcpu", tmp_processors))) { - error("Assuming system has %d processors.", processors); - } else { - processors = tmp_processors; - } + int index = for_netdata ? CPUS_FOR_NETDATA : CPUS_FOR_COLLECTORS; + + if(likely(cache && processors[index] > 0)) + return processors[index]; + +#if defined(__APPLE__) || defined(__FreeBSD__) +#if defined(__APPLE__) +#define HW_CPU_NAME "hw.logicalcpu" +#else +#define HW_CPU_NAME "hw.ncpu" +#endif - return processors; -#elif __FreeBSD__ int32_t tmp_processors; + bool error = false; - if (unlikely(GETSYSCTL_BY_NAME("hw.ncpu", tmp_processors))) { - error("Assuming system has %d processors.", processors); - } else { - processors = tmp_processors; - } + if (unlikely(GETSYSCTL_BY_NAME(HW_CPU_NAME, tmp_processors))) + error = true; + else + processors[index] = tmp_processors; + + if(processors[index] < 1) { + processors[index] = 1; - return processors; + if(error) + error("Assuming system has %d processors.", processors[index]); + } + + return processors[index]; #else char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/stat", netdata_configured_host_prefix); + snprintfz(filename, FILENAME_MAX, "%s/proc/stat", + (!for_netdata && netdata_configured_host_prefix) ? netdata_configured_host_prefix : ""); procfile *ff = procfile_open(filename, NULL, PROCFILE_FLAG_DEFAULT); if(!ff) { - error("Cannot open file '%s'. Assuming system has %d processors.", filename, processors); - return processors; + processors[index] = 1; + error("Cannot open file '%s'. Assuming system has %ld processors.", filename, processors[index]); + return processors[index]; } ff = procfile_readall(ff); if(!ff) { - error("Cannot open file '%s'. Assuming system has %d processors.", filename, processors); - return processors; + processors[index] = 1; + error("Cannot open file '%s'. Assuming system has %ld processors.", filename, processors[index]); + return processors[index]; } - processors = 0; + long tmp_processors = 0; unsigned int i; for(i = 0; i < procfile_lines(ff); i++) { if(!procfile_linewords(ff, i)) continue; - if(strncmp(procfile_lineword(ff, i, 0), "cpu", 3) == 0) processors++; + if(strncmp(procfile_lineword(ff, i, 0), "cpu", 3) == 0) + tmp_processors++; } - processors--; - if(processors < 1) processors = 1; - procfile_close(ff); - debug(D_SYSTEM, "System has %d processors.", processors); - return processors; + processors[index] = --tmp_processors; + + if(processors[index] < 1) + processors[index] = 1; + + debug(D_SYSTEM, "System has %ld processors.", processors[index]); + return processors[index]; #endif /* __APPLE__, __FreeBSD__ */ } @@ -90,7 +106,7 @@ pid_t get_system_pid_max(void) { read = 1; char filename[FILENAME_MAX + 1]; - snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", netdata_configured_host_prefix); + snprintfz(filename, FILENAME_MAX, "%s/proc/sys/kernel/pid_max", netdata_configured_host_prefix?netdata_configured_host_prefix:""); unsigned long long max = 0; if(read_single_number_file(filename, &max) != 0) { @@ -120,6 +136,56 @@ void get_system_HZ(void) { system_hz = (unsigned int) ticks; } +static inline unsigned long cpuset_str2ul(char **s) { + unsigned long n = 0; + char c; + for(c = **s; c >= '0' && c <= '9' ; c = *(++*s)) { + n *= 10; + n += c - '0'; + } + return n; +} + +unsigned long read_cpuset_cpus(const char *filename, long system_cpus) { + static char *buf = NULL; + static size_t buf_size = 0; + + if(!buf) { + buf_size = 100U + 6 * system_cpus; // taken from kernel/cgroup/cpuset.c + buf = mallocz(buf_size + 1); + } + + int ret = read_file(filename, buf, buf_size); + + if(!ret) { + char *s = buf; + unsigned long ncpus = 0; + + // parse the cpuset string and calculate the number of cpus the cgroup is allowed to use + while(*s) { + unsigned long n = cpuset_str2ul(&s); + ncpus++; + if(*s == ',') { + s++; + continue; + } + if(*s == '-') { + s++; + unsigned long m = cpuset_str2ul(&s); + ncpus += m - n; // calculate the number of cpus in the region + } + s++; + } + + if(!ncpus) + return 0; + + return ncpus; + } + + return 0; +} + // ===================================================================================================================== // FreeBSD diff --git a/libnetdata/os.h b/libnetdata/os.h index 67abf0be4..3cda79ed7 100644 --- a/libnetdata/os.h +++ b/libnetdata/os.h @@ -48,8 +48,10 @@ int getsysctl_by_name(const char *name, void *ptr, size_t len); extern const char *os_type; -extern int processors; -long get_system_cpus(void); +#define get_system_cpus() get_system_cpus_with_cache(true, false) +#define get_system_cpus_uncached() get_system_cpus_with_cache(false, false) +long get_system_cpus_with_cache(bool cache, bool for_netdata); +unsigned long read_cpuset_cpus(const char *filename, long system_cpus); extern pid_t pid_max; pid_t get_system_pid_max(void); diff --git a/libnetdata/popen/README.md b/libnetdata/popen/README.md index db4aefaed..804690d13 100644 --- a/libnetdata/popen/README.md +++ b/libnetdata/popen/README.md @@ -1,5 +1,15 @@ +# popen + +Process management library + + diff --git a/libnetdata/popen/popen.c b/libnetdata/popen/popen.c index 57f957f63..5ed74ae95 100644 --- a/libnetdata/popen/popen.c +++ b/libnetdata/popen/popen.c @@ -43,7 +43,7 @@ static void netdata_popen_tracking_add_pid_unsafe(pid_t pid) { mp = mallocz(sizeof(struct netdata_popen)); mp->pid = pid; - DOUBLE_LINKED_LIST_PREPEND_UNSAFE(netdata_popen_root, mp, prev, next); + DOUBLE_LINKED_LIST_PREPEND_ITEM_UNSAFE(netdata_popen_root, mp, prev, next); } // myp_del deletes pid if we're tracking. @@ -61,7 +61,7 @@ static void netdata_popen_tracking_del_pid(pid_t pid) { } if(mp) { - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(netdata_popen_root, mp, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(netdata_popen_root, mp, prev, next); freez(mp); } else @@ -96,7 +96,7 @@ void netdata_popen_tracking_cleanup(void) { while(netdata_popen_root) { struct netdata_popen *mp = netdata_popen_root; - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(netdata_popen_root, mp, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(netdata_popen_root, mp, prev, next); freez(mp); } @@ -163,8 +163,7 @@ static int popene_internal(volatile pid_t *pidptr, char **env, uint8_t flags, FI posix_spawnattr_t attr; posix_spawn_file_actions_t fa; - int stdin_fd_to_exclude_from_closing = -1; - int stdout_fd_to_exclude_from_closing = -1; + unsigned int fds_to_exclude_from_closing = OPEN_FD_EXCLUDE_STDERR; if(posix_spawn_file_actions_init(&fa)) { error("POPEN: posix_spawn_file_actions_init() failed."); @@ -195,7 +194,7 @@ static int popene_internal(volatile pid_t *pidptr, char **env, uint8_t flags, FI if (posix_spawn_file_actions_addopen(&fa, STDIN_FILENO, "/dev/null", O_RDONLY, 0)) { error("POPEN: posix_spawn_file_actions_addopen() on stdin to /dev/null failed."); // this is not a fatal error - stdin_fd_to_exclude_from_closing = STDIN_FILENO; + fds_to_exclude_from_closing |= OPEN_FD_EXCLUDE_STDIN; } } @@ -222,16 +221,13 @@ static int popene_internal(volatile pid_t *pidptr, char **env, uint8_t flags, FI if (posix_spawn_file_actions_addopen(&fa, STDOUT_FILENO, "/dev/null", O_WRONLY, 0)) { error("POPEN: posix_spawn_file_actions_addopen() on stdout to /dev/null failed."); // this is not a fatal error - stdout_fd_to_exclude_from_closing = STDOUT_FILENO; + fds_to_exclude_from_closing |= OPEN_FD_EXCLUDE_STDOUT; } } if(flags & POPEN_FLAG_CLOSE_FD) { // Mark all files to be closed by the exec() stage of posix_spawn() - for(int i = (int)(sysconf(_SC_OPEN_MAX) - 1); i >= 0; i--) { - if(likely(i != STDERR_FILENO && i != stdin_fd_to_exclude_from_closing && i != stdout_fd_to_exclude_from_closing)) - (void)fcntl(i, F_SETFD, FD_CLOEXEC); - } + for_each_open_fd(OPEN_FD_ACTION_FD_CLOEXEC, fds_to_exclude_from_closing); } attr_rc = posix_spawnattr_init(&attr); diff --git a/libnetdata/procfile/README.md b/libnetdata/procfile/README.md index 65638030d..8610e77e5 100644 --- a/libnetdata/procfile/README.md +++ b/libnetdata/procfile/README.md @@ -1,6 +1,10 @@ # PROCFILE @@ -28,7 +32,7 @@ For each iteration, the caller: - calls `procfile_readall()` to read updated contents. This call also rewinds (`lseek()` to 0) before reading it. - For every file, a [BUFFER](/libnetdata/buffer/README.md) is used that is automatically adjusted to fit the entire + For every file, a [BUFFER](https://github.com/netdata/netdata/blob/master/libnetdata/buffer/README.md) is used that is automatically adjusted to fit the entire file contents of the file. So the file is read with a single `read()` call (providing atomicity / consistency when the data are read from the kernel). diff --git a/libnetdata/procfile/procfile.c b/libnetdata/procfile/procfile.c index eb04316c3..cdf0f9723 100644 --- a/libnetdata/procfile/procfile.c +++ b/libnetdata/procfile/procfile.c @@ -296,7 +296,8 @@ procfile *procfile_readall(procfile *ff) { debug(D_PROCFILE, "Reading file '%s', from position %zd with length %zd", procfile_filename(ff), s, (ssize_t)(ff->size - s)); r = read(ff->fd, &ff->data[s], ff->size - s); if(unlikely(r == -1)) { - if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot read from file '%s' on fd %d", procfile_filename(ff), ff->fd); + if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) collector_error(PF_PREFIX ": Cannot read from file '%s' on fd %d", procfile_filename(ff), ff->fd); + else if(unlikely(ff->flags & PROCFILE_FLAG_ERROR_ON_ERROR_LOG)) error(PF_PREFIX ": Cannot read from file '%s' on fd %d", procfile_filename(ff), ff->fd); procfile_close(ff); return NULL; } @@ -306,7 +307,8 @@ procfile *procfile_readall(procfile *ff) { // debug(D_PROCFILE, "Rewinding file '%s'", ff->filename); if(unlikely(lseek(ff->fd, 0, SEEK_SET) == -1)) { - if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot rewind on file '%s'.", procfile_filename(ff)); + if(unlikely(!(ff->flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) collector_error(PF_PREFIX ": Cannot rewind on file '%s'.", procfile_filename(ff)); + else if(unlikely(ff->flags & PROCFILE_FLAG_ERROR_ON_ERROR_LOG)) error(PF_PREFIX ": Cannot rewind on file '%s'.", procfile_filename(ff)); procfile_close(ff); return NULL; } @@ -403,7 +405,8 @@ procfile *procfile_open(const char *filename, const char *separators, uint32_t f int fd = open(filename, procfile_open_flags, 0666); if(unlikely(fd == -1)) { - if(unlikely(!(flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) error(PF_PREFIX ": Cannot open file '%s'", filename); + if(unlikely(!(flags & PROCFILE_FLAG_NO_ERROR_ON_FILE_IO))) collector_error(PF_PREFIX ": Cannot open file '%s'", filename); + else if(unlikely(flags & PROCFILE_FLAG_ERROR_ON_ERROR_LOG)) error(PF_PREFIX ": Cannot open file '%s'", filename); return NULL; } diff --git a/libnetdata/procfile/procfile.h b/libnetdata/procfile/procfile.h index cae4ad484..8db5b45f4 100644 --- a/libnetdata/procfile/procfile.h +++ b/libnetdata/procfile/procfile.h @@ -34,8 +34,9 @@ typedef struct { // ---------------------------------------------------------------------------- // The procfile -#define PROCFILE_FLAG_DEFAULT 0x00000000 -#define PROCFILE_FLAG_NO_ERROR_ON_FILE_IO 0x00000001 +#define PROCFILE_FLAG_DEFAULT 0x00000000 // To store inside `collector.log` +#define PROCFILE_FLAG_NO_ERROR_ON_FILE_IO 0x00000001 // Do not store nothing +#define PROCFILE_FLAG_ERROR_ON_ERROR_LOG 0x00000002 // Store inside `error.log` typedef enum __attribute__ ((__packed__)) procfile_separator { PF_CHAR_IS_SEPARATOR, diff --git a/libnetdata/required_dummies.h b/libnetdata/required_dummies.h index ad1e8fb84..5a0d4e050 100644 --- a/libnetdata/required_dummies.h +++ b/libnetdata/required_dummies.h @@ -36,6 +36,7 @@ int health_variable_lookup(STRING *variable, struct rrdcalc *rc, NETDATA_DOUBLE void rrdset_thread_rda_free(void){}; void sender_thread_buffer_free(void){}; void query_target_free(void){}; +void service_exits(void){}; // required by get_system_cpus() char *netdata_configured_host_prefix = ""; diff --git a/libnetdata/simple_pattern/README.md b/libnetdata/simple_pattern/README.md index cb377f84e..a0a7cf688 100644 --- a/libnetdata/simple_pattern/README.md +++ b/libnetdata/simple_pattern/README.md @@ -1,10 +1,14 @@ -# Netdata simple patterns +# Simple patterns Unix prefers regular expressions. But they are just too hard, too cryptic to use, write and understand. diff --git a/libnetdata/socket/security.c b/libnetdata/socket/security.c index 88b3f6d93..7c5092150 100644 --- a/libnetdata/socket/security.c +++ b/libnetdata/socket/security.c @@ -310,7 +310,7 @@ int security_process_accept(SSL *ssl,int msg) { int counter = 0; while ((err = ERR_get_error()) != 0) { ERR_error_string_n(err, buf, sizeof(buf)); - info("%d SSL Handshake error (%s) on socket %d ", counter++, ERR_error_string((long)SSL_get_error(ssl, test), NULL), sock); + error("%d SSL Handshake error (%s) on socket %d", counter++, ERR_error_string((long)SSL_get_error(ssl, test), NULL), sock); } return NETDATA_SSL_NO_HANDSHAKE; } diff --git a/libnetdata/socket/socket.c b/libnetdata/socket/socket.c index 40271b623..69124b949 100644 --- a/libnetdata/socket/socket.c +++ b/libnetdata/socket/socket.c @@ -926,13 +926,17 @@ ssize_t netdata_ssl_read(SSL *ssl, void *buf, size_t num) { int bytes, err, retries = 0; //do { - bytes = SSL_read(ssl, buf, (int)num); - err = SSL_get_error(ssl, bytes); - retries++; - //} while (bytes <= 0 && (err == SSL_ERROR_WANT_READ)); + bytes = SSL_read(ssl, buf, (int)num); + err = SSL_get_error(ssl, bytes); + retries++; + //} while (bytes <= 0 && err == SSL_ERROR_WANT_READ); - if(unlikely(bytes <= 0)) - error("SSL_read() returned %d bytes, SSL error %d", bytes, err); + if(unlikely(bytes <= 0)) { + if (err == SSL_ERROR_WANT_WRITE || err == SSL_ERROR_WANT_READ) { + bytes = 0; + } else + error("SSL_write() returned %d bytes, SSL error %d", bytes, err); + } if(retries > 1) error_limit(&erl, "SSL_read() retried %d times", retries); @@ -947,17 +951,21 @@ ssize_t netdata_ssl_write(SSL *ssl, const void *buf, size_t num) { size_t total = 0; //do { - bytes = SSL_write(ssl, (uint8_t *)buf + total, (int)(num - total)); - err = SSL_get_error(ssl, bytes); - retries++; + bytes = SSL_write(ssl, (uint8_t *)buf + total, (int)(num - total)); + err = SSL_get_error(ssl, bytes); + retries++; - if(bytes > 0) - total += bytes; + if(bytes > 0) + total += bytes; //} while ((bytes <= 0 && (err == SSL_ERROR_WANT_WRITE)) || (bytes > 0 && total < num)); - if(unlikely(bytes <= 0)) - error("SSL_write() returned %d bytes, SSL error %d", bytes, err); + if(unlikely(bytes <= 0)) { + if (err == SSL_ERROR_WANT_WRITE || err == SSL_ERROR_WANT_READ) { + bytes = 0; + } else + error("SSL_write() returned %d bytes, SSL error %d", bytes, err); + } if(retries > 1) error_limit(&erl, "SSL_write() retried %d times", retries); @@ -1633,6 +1641,7 @@ void poll_events(LISTEN_SOCKETS *sockets , int (*rcv_callback)(POLLINFO * /*pi*/, short int * /*events*/) , int (*snd_callback)(POLLINFO * /*pi*/, short int * /*events*/) , void (*tmr_callback)(void * /*timer_data*/) + , bool (*check_to_stop_callback)(void) , SIMPLE_PATTERN *access_list , int allow_dns , void *data @@ -1715,7 +1724,7 @@ void poll_events(LISTEN_SOCKETS *sockets netdata_thread_cleanup_push(poll_events_cleanup, &p); - while(!netdata_exit) { + while(!check_to_stop_callback()) { if(unlikely(timer_usec)) { now_usec = now_boottime_usec(); diff --git a/libnetdata/socket/socket.h b/libnetdata/socket/socket.h index 282324273..9577453d5 100644 --- a/libnetdata/socket/socket.h +++ b/libnetdata/socket/socket.h @@ -10,18 +10,18 @@ #endif typedef enum web_client_acl { - WEB_CLIENT_ACL_NONE = 0, - WEB_CLIENT_ACL_NOCHECK = 0, - WEB_CLIENT_ACL_DASHBOARD = 1 << 0, - WEB_CLIENT_ACL_REGISTRY = 1 << 1, - WEB_CLIENT_ACL_BADGE = 1 << 2, - WEB_CLIENT_ACL_MGMT = 1 << 3, - WEB_CLIENT_ACL_STREAMING = 1 << 4, - WEB_CLIENT_ACL_NETDATACONF = 1 << 5, - WEB_CLIENT_ACL_SSL_OPTIONAL = 1 << 6, - WEB_CLIENT_ACL_SSL_FORCE = 1 << 7, - WEB_CLIENT_ACL_SSL_DEFAULT = 1 << 8, - WEB_CLIENT_ACL_ACLK = 1 << 9, + WEB_CLIENT_ACL_NONE = (0), + WEB_CLIENT_ACL_NOCHECK = (0), + WEB_CLIENT_ACL_DASHBOARD = (1 << 0), + WEB_CLIENT_ACL_REGISTRY = (1 << 1), + WEB_CLIENT_ACL_BADGE = (1 << 2), + WEB_CLIENT_ACL_MGMT = (1 << 3), + WEB_CLIENT_ACL_STREAMING = (1 << 4), + WEB_CLIENT_ACL_NETDATACONF = (1 << 5), + WEB_CLIENT_ACL_SSL_OPTIONAL = (1 << 6), + WEB_CLIENT_ACL_SSL_FORCE = (1 << 7), + WEB_CLIENT_ACL_SSL_DEFAULT = (1 << 8), + WEB_CLIENT_ACL_ACLK = (1 << 9), } WEB_CLIENT_ACL; #define WEB_CLIENT_ACL_ALL 0xFFFF @@ -202,6 +202,7 @@ void poll_events(LISTEN_SOCKETS *sockets , int (*rcv_callback)(POLLINFO *pi, short int *events) , int (*snd_callback)(POLLINFO *pi, short int *events) , void (*tmr_callback)(void *timer_data) + , bool (*check_to_stop_callback)(void) , SIMPLE_PATTERN *access_list , int allow_dns , void *data diff --git a/libnetdata/statistical/README.md b/libnetdata/statistical/README.md index f254081d2..8fa101f0e 100644 --- a/libnetdata/statistical/README.md +++ b/libnetdata/statistical/README.md @@ -1,5 +1,12 @@ +# Statistical functions +A library for easy and fast calculations of statistical measurements like average, median etc. diff --git a/libnetdata/storage_number/README.md b/libnetdata/storage_number/README.md index 4cd19a98b..da2c3ccfd 100644 --- a/libnetdata/storage_number/README.md +++ b/libnetdata/storage_number/README.md @@ -1,6 +1,10 @@ # Netdata storage number diff --git a/libnetdata/string/README.md b/libnetdata/string/README.md index e73ab2696..b1c6e61c3 100644 --- a/libnetdata/string/README.md +++ b/libnetdata/string/README.md @@ -1,5 +1,10 @@ # STRING @@ -17,4 +22,4 @@ index lookup to find it. Once there is a `STRING *`, the actual `const char *` can be accessed with `string2str()`. -All STRING should be constant. Changing the contents of a `const char *` that has been acquired by `string2str()` should never happen. \ No newline at end of file +All STRING should be constant. Changing the contents of a `const char *` that has been acquired by `string2str()` should never happen. diff --git a/libnetdata/string/string.c b/libnetdata/string/string.c index d2db8aab4..4e232523c 100644 --- a/libnetdata/string/string.c +++ b/libnetdata/string/string.c @@ -56,14 +56,29 @@ static struct string_hashtable { #define string_stats_atomic_decrement(var) __atomic_sub_fetch(&string_base.var, 1, __ATOMIC_RELAXED) void string_statistics(size_t *inserts, size_t *deletes, size_t *searches, size_t *entries, size_t *references, size_t *memory, size_t *duplications, size_t *releases) { - *inserts = string_base.inserts; - *deletes = string_base.deletes; - *searches = string_base.searches; - *entries = (size_t)string_base.entries; - *references = (size_t)string_base.active_references; - *memory = (size_t)string_base.memory; - *duplications = string_base.duplications; - *releases = string_base.releases; + if(inserts) + *inserts = string_base.inserts; + + if(deletes) + *deletes = string_base.deletes; + + if(searches) + *searches = string_base.searches; + + if(entries) + *entries = (size_t)string_base.entries; + + if(references) + *references = (size_t)string_base.active_references; + + if(memory) + *memory = (size_t)string_base.memory; + + if(duplications) + *duplications = string_base.duplications; + + if(releases) + *releases = string_base.releases; } #define string_entry_acquire(se) __atomic_add_fetch(&((se)->refcount), 1, __ATOMIC_SEQ_CST); @@ -186,7 +201,7 @@ static inline STRING *string_index_insert(const char *str, size_t length) { *ptr = string; string_base.inserts++; string_base.entries++; - string_base.memory += (long)mem_size; + string_base.memory += (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(length)); } else { // the item is already in the index @@ -240,7 +255,7 @@ static inline void string_index_delete(STRING *string) { size_t mem_size = sizeof(STRING) + string->length; string_base.deletes++; string_base.entries--; - string_base.memory -= (long)mem_size; + string_base.memory -= (long)(mem_size + JUDYHS_INDEX_SIZE_ESTIMATE(string->length)); freez(string); } diff --git a/libnetdata/threads/README.md b/libnetdata/threads/README.md index 75ab11b1e..71979feac 100644 --- a/libnetdata/threads/README.md +++ b/libnetdata/threads/README.md @@ -1,5 +1,12 @@ +# Threads +Netdata uses a custom threads library diff --git a/libnetdata/threads/threads.c b/libnetdata/threads/threads.c index 5c3d2675c..16de45fd1 100644 --- a/libnetdata/threads/threads.c +++ b/libnetdata/threads/threads.c @@ -2,8 +2,7 @@ #include "../libnetdata.h" -static size_t default_stacksize = 0, wanted_stacksize = 0; -static pthread_attr_t *attr = NULL; +static pthread_attr_t *netdata_threads_attr = NULL; // ---------------------------------------------------------------------------- // per thread data @@ -69,46 +68,48 @@ size_t netdata_threads_init(void) { // -------------------------------------------------------------------- // get the required stack size of the threads of netdata - attr = callocz(1, sizeof(pthread_attr_t)); - i = pthread_attr_init(attr); + netdata_threads_attr = callocz(1, sizeof(pthread_attr_t)); + i = pthread_attr_init(netdata_threads_attr); if(i != 0) fatal("pthread_attr_init() failed with code %d.", i); - i = pthread_attr_getstacksize(attr, &default_stacksize); + size_t stacksize = 0; + i = pthread_attr_getstacksize(netdata_threads_attr, &stacksize); if(i != 0) fatal("pthread_attr_getstacksize() failed with code %d.", i); else - debug(D_OPTIONS, "initial pthread stack size is %zu bytes", default_stacksize); + debug(D_OPTIONS, "initial pthread stack size is %zu bytes", stacksize); - return default_stacksize; + return stacksize; } // ---------------------------------------------------------------------------- // late initialization void netdata_threads_init_after_fork(size_t stacksize) { - wanted_stacksize = stacksize; int i; // ------------------------------------------------------------------------ - // set default pthread stack size + // set pthread stack size - if(attr && default_stacksize < wanted_stacksize && wanted_stacksize > 0) { - i = pthread_attr_setstacksize(attr, wanted_stacksize); + if(netdata_threads_attr && stacksize > (size_t)PTHREAD_STACK_MIN) { + i = pthread_attr_setstacksize(netdata_threads_attr, stacksize); if(i != 0) - fatal("pthread_attr_setstacksize() to %zu bytes, failed with code %d.", wanted_stacksize, i); + error("pthread_attr_setstacksize() to %zu bytes, failed with code %d.", stacksize, i); else - debug(D_SYSTEM, "Successfully set pthread stacksize to %zu bytes", wanted_stacksize); + info("Set threads stack size to %zu bytes", stacksize); } + else + error("Invalid pthread stacksize %zu", stacksize); } - // ---------------------------------------------------------------------------- // netdata_thread_create -extern void rrdset_thread_rda_free(void); -extern void sender_thread_buffer_free(void); -extern void query_target_free(void); +void rrdset_thread_rda_free(void); +void sender_thread_buffer_free(void); +void query_target_free(void); +void service_exits(void); static void thread_cleanup(void *ptr) { if(netdata_thread != ptr) { @@ -123,6 +124,8 @@ static void thread_cleanup(void *ptr) { rrdset_thread_rda_free(); query_target_free(); thread_cache_destroy(); + service_exits(); + worker_unregister(); freez((void *)netdata_thread->tag); netdata_thread->tag = NULL; @@ -214,7 +217,7 @@ int netdata_thread_create(netdata_thread_t *thread, const char *tag, NETDATA_THR info->start_routine = start_routine; info->options = options; - int ret = pthread_create(thread, attr, thread_start, info); + int ret = pthread_create(thread, netdata_threads_attr, thread_start, info); if(ret != 0) error("failed to create new thread for %s. pthread_create() failed with code %d", tag, ret); diff --git a/libnetdata/url/README.md b/libnetdata/url/README.md index bd289d955..cca6f8731 100644 --- a/libnetdata/url/README.md +++ b/libnetdata/url/README.md @@ -1,5 +1,14 @@ +# URL + +The URL library contains common functions useful for URLs, like conversion from/to hex, +URL encode/decode and query string parsing. diff --git a/libnetdata/worker_utilization/worker_utilization.c b/libnetdata/worker_utilization/worker_utilization.c index afaff209b..8028e3a21 100644 --- a/libnetdata/worker_utilization/worker_utilization.c +++ b/libnetdata/worker_utilization/worker_utilization.c @@ -52,6 +52,7 @@ struct workers_workname { // this is what we add to Ju static struct workers_globals { SPINLOCK spinlock; Pvoid_t worknames_JudyHS; + size_t memory; } workers_globals = { // workers globals, the base of all worknames .spinlock = NETDATA_SPINLOCK_INITIALIZER, // a lock for the worknames index @@ -60,6 +61,14 @@ static struct workers_globals { static __thread struct worker *worker = NULL; // the current thread worker +size_t workers_allocated_memory(void) { + netdata_spinlock_lock(&workers_globals.spinlock); + size_t memory = workers_globals.memory; + netdata_spinlock_unlock(&workers_globals.spinlock); + + return memory; +} + void worker_register(const char *name) { if(unlikely(worker)) return; @@ -76,20 +85,22 @@ void worker_register(const char *name) { size_t name_size = strlen(name) + 1; netdata_spinlock_lock(&workers_globals.spinlock); - Pvoid_t *PValue = JudyHSGet(workers_globals.worknames_JudyHS, (void *)name, name_size); - if(!PValue) - PValue = JudyHSIns(&workers_globals.worknames_JudyHS, (void *)name, name_size, PJE0); + workers_globals.memory += sizeof(struct worker) + strlen(worker->tag) + 1 + strlen(worker->workname) + 1; + + Pvoid_t *PValue = JudyHSIns(&workers_globals.worknames_JudyHS, (void *)name, name_size, PJE0); struct workers_workname *workname = *PValue; if(!workname) { workname = mallocz(sizeof(struct workers_workname)); - workname->spinlock = NETDATA_SPINLOCK_INITIALIZER; + netdata_spinlock_init(&workname->spinlock); workname->base = NULL; *PValue = workname; + + workers_globals.memory += sizeof(struct workers_workname) + JUDYHS_INDEX_SIZE_ESTIMATE(name_size); } netdata_spinlock_lock(&workname->spinlock); - DOUBLE_LINKED_LIST_APPEND_UNSAFE(workname->base, worker, prev, next); + DOUBLE_LINKED_LIST_APPEND_ITEM_UNSAFE(workname->base, worker, prev, next); netdata_spinlock_unlock(&workname->spinlock); netdata_spinlock_unlock(&workers_globals.spinlock); @@ -130,14 +141,16 @@ void worker_unregister(void) { if(PValue) { struct workers_workname *workname = *PValue; netdata_spinlock_lock(&workname->spinlock); - DOUBLE_LINKED_LIST_REMOVE_UNSAFE(workname->base, worker, prev, next); + DOUBLE_LINKED_LIST_REMOVE_ITEM_UNSAFE(workname->base, worker, prev, next); netdata_spinlock_unlock(&workname->spinlock); if(!workname->base) { JudyHSDel(&workers_globals.worknames_JudyHS, (void *) worker->workname, workname_size, PJE0); freez(workname); + workers_globals.memory -= sizeof(struct workers_workname) + JUDYHS_INDEX_SIZE_ESTIMATE(workname_size); } } + workers_globals.memory -= sizeof(struct worker) + strlen(worker->tag) + 1 + strlen(worker->workname) + 1; netdata_spinlock_unlock(&workers_globals.spinlock); for(int i = 0; i < WORKER_UTILIZATION_MAX_JOB_TYPES ;i++) { diff --git a/libnetdata/worker_utilization/worker_utilization.h b/libnetdata/worker_utilization/worker_utilization.h index f1412e6b4..6745a010b 100644 --- a/libnetdata/worker_utilization/worker_utilization.h +++ b/libnetdata/worker_utilization/worker_utilization.h @@ -15,6 +15,7 @@ typedef enum { WORKER_METRIC_INCREMENTAL_TOTAL = 4, } WORKER_METRIC_TYPE; +size_t workers_allocated_memory(void); void worker_register(const char *name); void worker_register_job_name(size_t job_id, const char *name); void worker_register_job_custom_metric(size_t job_id, const char *name, const char *units, WORKER_METRIC_TYPE type); -- cgit v1.2.3