From 8020f71afd34d7696d7933659df2d763ab05542f Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 16:31:17 +0200 Subject: Adding upstream version 1.37.1. Signed-off-by: Daniel Baumann --- libnetdata/locks/Makefile.am | 8 + libnetdata/locks/README.md | 100 ++++++ libnetdata/locks/locks.c | 757 +++++++++++++++++++++++++++++++++++++++++++ libnetdata/locks/locks.h | 127 ++++++++ 4 files changed, 992 insertions(+) create mode 100644 libnetdata/locks/Makefile.am create mode 100644 libnetdata/locks/README.md create mode 100644 libnetdata/locks/locks.c create mode 100644 libnetdata/locks/locks.h (limited to 'libnetdata/locks') diff --git a/libnetdata/locks/Makefile.am b/libnetdata/locks/Makefile.am new file mode 100644 index 0000000..161784b --- /dev/null +++ b/libnetdata/locks/Makefile.am @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-3.0-or-later + +AUTOMAKE_OPTIONS = subdir-objects +MAINTAINERCLEANFILES = $(srcdir)/Makefile.in + +dist_noinst_DATA = \ + README.md \ + $(NULL) diff --git a/libnetdata/locks/README.md b/libnetdata/locks/README.md new file mode 100644 index 0000000..9132edc --- /dev/null +++ b/libnetdata/locks/README.md @@ -0,0 +1,100 @@ + + +## How to trace netdata locks + +To enable tracing rwlocks in netdata, compile netdata by setting `CFLAGS="-DNETDATA_TRACE_RWLOCKS=1"`, like this: + +``` +CFLAGS="-O1 -ggdb -DNETDATA_TRACE_RWLOCKS=1" ./netdata-installer.sh +``` + +During compilation, the compiler will log: + +``` +libnetdata/locks/locks.c:105:2: warning: #warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT [-Wcpp] + 105 | #warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT + | ^~~~~~~ +``` + +Once compiled, netdata will do the following: + +Every call to `netdata_rwlock_*()` is now measured in time. + +### logging of slow locks/unlocks + +If any call takes more than 10 usec, it will be logged like this: + +``` +RW_LOCK ON LOCK 0x0x7fbe1f2e5190: 4157038, 'ACLK_Query_2' (function build_context_param_list() 99@web/api/formatters/rrd2json.c) WAITED to UNLOCK for 29 usec. +``` + +The time can be changed by setting this `-DNETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC=20` (or whatever number) to the CFLAGS. + +### logging of long hold times + +If any lock is holded for more than 10000 usec, it will be logged like this: + +``` +RW_LOCK ON LOCK 0x0x55a20afc1b20: 4187198, 'ANALYTICS' (function analytics_gather_mutable_meta_data() 532@daemon/analytics.c) holded a 'R' for 13232 usec. +``` + +The time can be changed by setting this `-DNETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC=20000` (or whatever number) to the CFLAGS. + +### logging for probable pauses (predictive) + +The library maintains a linked-list of all the lock holders (one entry per thread). For this linked-list a mutex is used. So every call to the r/w locks now also has a mutex lock. + +If any call is expected to pause the caller (ie the caller is attempting a read lock while there is a write lock in place and vice versa), the library will log something like this: + +``` +RW_LOCK ON LOCK 0x0x5651c9fcce20: 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) WANTS a 'W' lock (while holding 1 rwlocks and 1 mutexes). +There are 7 readers and 0 writers are holding the lock: + => 1: RW_LOCK: process 4190091 'WEB_SERVER[static14]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709847 usec. + => 2: RW_LOCK: process 4190079 'WEB_SERVER[static6]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709869 usec. + => 3: RW_LOCK: process 4190084 'WEB_SERVER[static10]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709948 usec. + => 4: RW_LOCK: process 4190076 'WEB_SERVER[static3]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710190 usec. + => 5: RW_LOCK: process 4190092 'WEB_SERVER[static15]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710195 usec. + => 6: RW_LOCK: process 4190077 'WEB_SERVER[static4]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710208 usec. + => 7: RW_LOCK: process 4190044 'WEB_SERVER[static1]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710221 usec. +``` + +And each of the above is paired with a `GOT` log, like this: + +``` +RW_LOCK ON LOCK 0x0x5651c9fcce20: 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) GOT a 'W' lock (while holding 2 rwlocks and 1 mutexes). +There are 0 readers and 1 writers are holding the lock: + => 1: RW_LOCK: process 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) is having 1 'W' lock for 36 usec. +``` + +Keep in mind that the lock and log are not atomic. The list of callers is indicative (and sometimes just empty because the original holders of the lock, unlocked it until we had the chance to print their names). + +### POSIX compliance check + +The library may also log messages about POSIX unsupported cases, like this: + +``` +RW_LOCK FATAL ON LOCK 0x0x622000109290: 3609368 'PLUGIN[proc]' (function __rrdset_check_rdlock() 10@database/rrdset.c) attempts to acquire a 'W' lock. +But it is not supported by POSIX because: ALREADY HAS THIS LOCK +At this attempt, the task is holding 1 rwlocks and 1 mutexes. +There are 1 readers and 0 writers are holding the lock requested now: + => 1: RW_LOCK: process 3609368 'PLUGIN[proc]' (function rrdset_done() 1398@database/rrdset.c) is having 1 'R' lock for 0 usec. +``` + +### nested read locks + +When compiled with `-DNETDATA_TRACE_RWLOCKS_LOG_NESTED=1` the library will also detect nested read locks and print them like this: + +``` +RW_LOCK ON LOCK 0x0x7ff6ea46d190: 4140225 'WEB_SERVER[static14]' (function rrdr_json_wrapper_begin() 34@web/api/formatters/json_wrapper.c) NESTED READ LOCK REQUEST a 'R' lock (while holding 1 rwlocks and 1 mutexes). +There are 5 readers and 0 writers are holding the lock: + => 1: RW_LOCK: process 4140225 'WEB_SERVER[static14]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 216667 usec. + => 2: RW_LOCK: process 4140211 'WEB_SERVER[static6]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. + => 3: RW_LOCK: process 4140218 'WEB_SERVER[static8]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. + => 4: RW_LOCK: process 4140224 'WEB_SERVER[static13]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. + => 5: RW_LOCK: process 4140227 'WEB_SERVER[static16]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec. +``` + + + diff --git a/libnetdata/locks/locks.c b/libnetdata/locks/locks.c new file mode 100644 index 0000000..f7191be --- /dev/null +++ b/libnetdata/locks/locks.c @@ -0,0 +1,757 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "../libnetdata.h" + +#ifdef NETDATA_TRACE_RWLOCKS + +#ifndef NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC +#define NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC 10 +#endif + +#ifndef NETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC +#define NETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC 10000 +#endif + +#ifndef NETDATA_THREAD_LOCKS_ARRAY_SIZE +#define NETDATA_THREAD_LOCKS_ARRAY_SIZE 10 +#endif +static __thread netdata_rwlock_t *netdata_thread_locks[NETDATA_THREAD_LOCKS_ARRAY_SIZE]; + + +#endif // NETDATA_TRACE_RWLOCKS + +// ---------------------------------------------------------------------------- +// automatic thread cancelability management, based on locks + +static __thread int netdata_thread_first_cancelability = 0; +static __thread int netdata_thread_nested_disables = 0; + +static __thread size_t netdata_locks_acquired_rwlocks = 0; +static __thread size_t netdata_locks_acquired_mutexes = 0; + +inline void netdata_thread_disable_cancelability(void) { + int old; + int ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old); + if(ret != 0) + error("THREAD_CANCELABILITY: pthread_setcancelstate() on thread %s returned error %d", netdata_thread_tag(), ret); + else { + if(!netdata_thread_nested_disables) + netdata_thread_first_cancelability = old; + + netdata_thread_nested_disables++; + } +} + +inline void netdata_thread_enable_cancelability(void) { + if(netdata_thread_nested_disables < 1) { + error("THREAD_CANCELABILITY: netdata_thread_enable_cancelability(): invalid thread cancelability count %d on thread %s - results will be undefined - please report this!", + netdata_thread_nested_disables, netdata_thread_tag()); + } + else if(netdata_thread_nested_disables == 1) { + int old = 1; + int ret = pthread_setcancelstate(netdata_thread_first_cancelability, &old); + if(ret != 0) + error("THREAD_CANCELABILITY: pthread_setcancelstate() on thread %s returned error %d", netdata_thread_tag(), ret); + else { + if(old != PTHREAD_CANCEL_DISABLE) + error("THREAD_CANCELABILITY: netdata_thread_enable_cancelability(): old thread cancelability on thread %s was changed, expected DISABLED (%d), found %s (%d) - please report this!", netdata_thread_tag(), PTHREAD_CANCEL_DISABLE, (old == PTHREAD_CANCEL_ENABLE)?"ENABLED":"UNKNOWN", old); + } + + netdata_thread_nested_disables = 0; + } + else + netdata_thread_nested_disables--; +} + +// ---------------------------------------------------------------------------- +// mutex + +int __netdata_mutex_init(netdata_mutex_t *mutex) { + int ret = pthread_mutex_init(mutex, NULL); + if(unlikely(ret != 0)) + error("MUTEX_LOCK: failed to initialize (code %d).", ret); + return ret; +} + +int __netdata_mutex_destroy(netdata_mutex_t *mutex) { + int ret = pthread_mutex_destroy(mutex); + if(unlikely(ret != 0)) + error("MUTEX_LOCK: failed to destroy (code %d).", ret); + return ret; +} + +int __netdata_mutex_lock(netdata_mutex_t *mutex) { + netdata_thread_disable_cancelability(); + + int ret = pthread_mutex_lock(mutex); + if(unlikely(ret != 0)) { + netdata_thread_enable_cancelability(); + error("MUTEX_LOCK: failed to get lock (code %d)", ret); + } + else + netdata_locks_acquired_mutexes++; + + return ret; +} + +int __netdata_mutex_trylock(netdata_mutex_t *mutex) { + netdata_thread_disable_cancelability(); + + int ret = pthread_mutex_trylock(mutex); + if(ret != 0) + netdata_thread_enable_cancelability(); + else + netdata_locks_acquired_mutexes++; + + return ret; +} + +int __netdata_mutex_unlock(netdata_mutex_t *mutex) { + int ret = pthread_mutex_unlock(mutex); + if(unlikely(ret != 0)) + error("MUTEX_LOCK: failed to unlock (code %d).", ret); + else { + netdata_locks_acquired_mutexes--; + netdata_thread_enable_cancelability(); + } + + return ret; +} + +#ifdef NETDATA_TRACE_RWLOCKS + +#warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT + +int netdata_mutex_init_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_init(%p) from %lu@%s, %s()", mutex, line, file, function); + + int ret = __netdata_mutex_init(mutex); + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_init(%p) = %d, from %lu@%s, %s()", mutex, ret, line, file, function); + + return ret; +} + +int netdata_mutex_destroy_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_destroy(%p) from %lu@%s, %s()", mutex, line, file, function); + + int ret = __netdata_mutex_destroy(mutex); + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_destroy(%p) = %d, from %lu@%s, %s()", mutex, ret, line, file, function); + + return ret; +} + +int netdata_mutex_lock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_lock(%p) from %lu@%s, %s()", mutex, line, file, function); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_mutex_lock(mutex); + usec_t end_s = now_monotonic_high_precision_usec(); + + // remove compiler unused variables warning + (void)start_s; + (void)end_s; + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_lock(%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_mutex_trylock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_trylock(%p) from %lu@%s, %s()", mutex, line, file, function); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_mutex_trylock(mutex); + usec_t end_s = now_monotonic_high_precision_usec(); + + // remove compiler unused variables warning + (void)start_s; + (void)end_s; + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_trylock(%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_mutex_unlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_mutex_t *mutex) { + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_unlock(%p) from %lu@%s, %s()", mutex, line, file, function); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_mutex_unlock(mutex); + usec_t end_s = now_monotonic_high_precision_usec(); + + // remove compiler unused variables warning + (void)start_s; + (void)end_s; + + debug(D_LOCKS, "MUTEX_LOCK: netdata_mutex_unlock(%p) = %d in %llu usec, from %lu@%s, %s()", mutex, ret, end_s - start_s, line, file, function); + + return ret; +} + +#endif // NETDATA_TRACE_RWLOCKS + +// ---------------------------------------------------------------------------- +// rwlock + +int __netdata_rwlock_destroy(netdata_rwlock_t *rwlock) { + int ret = pthread_rwlock_destroy(&rwlock->rwlock_t); + if(unlikely(ret != 0)) + error("RW_LOCK: failed to destroy lock (code %d)", ret); + return ret; +} + +int __netdata_rwlock_init(netdata_rwlock_t *rwlock) { + int ret = pthread_rwlock_init(&rwlock->rwlock_t, NULL); + if(unlikely(ret != 0)) + error("RW_LOCK: failed to initialize lock (code %d)", ret); + return ret; +} + +int __netdata_rwlock_rdlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_rdlock(&rwlock->rwlock_t); + if(unlikely(ret != 0)) { + netdata_thread_enable_cancelability(); + error("RW_LOCK: failed to obtain read lock (code %d)", ret); + } + else + netdata_locks_acquired_rwlocks++; + + return ret; +} + +int __netdata_rwlock_wrlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_wrlock(&rwlock->rwlock_t); + if(unlikely(ret != 0)) { + error("RW_LOCK: failed to obtain write lock (code %d)", ret); + netdata_thread_enable_cancelability(); + } + else + netdata_locks_acquired_rwlocks++; + + return ret; +} + +int __netdata_rwlock_unlock(netdata_rwlock_t *rwlock) { + int ret = pthread_rwlock_unlock(&rwlock->rwlock_t); + if(unlikely(ret != 0)) + error("RW_LOCK: failed to release lock (code %d)", ret); + else { + netdata_thread_enable_cancelability(); + netdata_locks_acquired_rwlocks--; + } + + return ret; +} + +int __netdata_rwlock_tryrdlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_tryrdlock(&rwlock->rwlock_t); + if(ret != 0) + netdata_thread_enable_cancelability(); + else + netdata_locks_acquired_rwlocks++; + + return ret; +} + +int __netdata_rwlock_trywrlock(netdata_rwlock_t *rwlock) { + netdata_thread_disable_cancelability(); + + int ret = pthread_rwlock_trywrlock(&rwlock->rwlock_t); + if(ret != 0) + netdata_thread_enable_cancelability(); + else + netdata_locks_acquired_rwlocks++; + + return ret; +} + +// ---------------------------------------------------------------------------- +// spinlock implementation +// https://www.youtube.com/watch?v=rmGJc9PXpuE&t=41s + +void netdata_spinlock_init(SPINLOCK *spinlock) { + *spinlock = NETDATA_SPINLOCK_INITIALIZER; +} + +void netdata_spinlock_lock(SPINLOCK *spinlock) { + static const struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 }; + + netdata_thread_disable_cancelability(); + + for(int i = 1; + __atomic_load_n(&spinlock->locked, __ATOMIC_RELAXED) || + __atomic_test_and_set(&spinlock->locked, __ATOMIC_ACQUIRE) + ; i++ + ) { + if(unlikely(i == 8)) { + i = 0; + nanosleep(&ns, NULL); + } + } + // we have the lock +} + +void netdata_spinlock_unlock(SPINLOCK *spinlock) { + __atomic_clear(&spinlock->locked, __ATOMIC_RELEASE); + netdata_thread_enable_cancelability(); +} + +#ifdef NETDATA_TRACE_RWLOCKS + +// ---------------------------------------------------------------------------- +// lockers list + +void not_supported_by_posix_rwlocks(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, char locktype, const char *reason) { + __netdata_mutex_lock(&rwlock->lockers_mutex); + fprintf(stderr, + "RW_LOCK FATAL ON LOCK %p: %d '%s' (function %s() %lu@%s) attempts to acquire a '%c' lock, but it is not supported by POSIX because: %s. At this attempt, the task is holding %zu rwlocks and %zu mutexes. There are %zu readers and %zu writers holding this lock:\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + locktype, + reason, + netdata_locks_acquired_rwlocks, netdata_locks_acquired_mutexes, + rwlock->readers, rwlock->writers); + + int i; + usec_t now = now_monotonic_high_precision_usec(); + netdata_rwlock_locker *p; + for(i = 1, p = rwlock->lockers; p ;p = p->next, i++) { + fprintf(stderr, + " => %i: RW_LOCK %p: process %d '%s' (function %s() %lu@%s) is having %zu '%c' lock for %llu usec.\n", + i, rwlock, + p->pid, p->tag, + p->function, p->line, p->file, + p->callers, p->lock, + (now - p->start_s)); + } + __netdata_mutex_unlock(&rwlock->lockers_mutex); +} + +static void log_rwlock_lockers(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, const char *reason, char locktype) { + + // this function can only be used by one thread at a time + // because otherwise, the threads may deadlock waiting for each other + static netdata_mutex_t log_lockers_mutex = NETDATA_MUTEX_INITIALIZER; + __netdata_mutex_lock(&log_lockers_mutex); + + // now work on this locker + __netdata_mutex_lock(&rwlock->lockers_mutex); + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d '%s' (function %s() %lu@%s) %s a '%c' lock (while holding %zu rwlocks and %zu mutexes). There are %zu readers and %zu writers holding this lock:\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + reason, locktype, + netdata_locks_acquired_rwlocks, netdata_locks_acquired_mutexes, + rwlock->readers, rwlock->writers); + + int i; + usec_t now = now_monotonic_high_precision_usec(); + netdata_rwlock_locker *p; + for(i = 1, p = rwlock->lockers; p ;p = p->next, i++) { + fprintf(stderr, + " => %i: RW_LOCK %p: process %d '%s' (function %s() %lu@%s) is having %zu '%c' lock for %llu usec.\n", + i, rwlock, + p->pid, p->tag, + p->function, p->line, p->file, + p->callers, p->lock, + (now - p->start_s)); + + if(p->all_caller_locks) { + // find the lock in the netdata_thread_locks[] + // and remove it + int k; + for(k = 0; k < NETDATA_THREAD_LOCKS_ARRAY_SIZE ;k++) { + if (p->all_caller_locks[k] && p->all_caller_locks[k] != rwlock) { + + // lock the other lock lockers list + __netdata_mutex_lock(&p->all_caller_locks[k]->lockers_mutex); + + // print the list of lockers of the other lock + netdata_rwlock_locker *r; + int j; + for(j = 1, r = p->all_caller_locks[k]->lockers; r ;r = r->next, j++) { + fprintf( + stderr, + " ~~~> %i: RW_LOCK %p: process %d '%s' (function %s() %lu@%s) is having %zu '%c' lock for %llu usec.\n", + j, + p->all_caller_locks[k], + r->pid, + r->tag, + r->function, + r->line, + r->file, + r->callers, + r->lock, + (now - r->start_s)); + } + + // unlock the other lock lockers list + __netdata_mutex_unlock(&p->all_caller_locks[k]->lockers_mutex); + } + } + } + + } + __netdata_mutex_unlock(&rwlock->lockers_mutex); + + // unlock this function for other threads + __netdata_mutex_unlock(&log_lockers_mutex); +} + +static netdata_rwlock_locker *add_rwlock_locker(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, char lock_type) { + netdata_rwlock_locker *p = mallocz(sizeof(netdata_rwlock_locker)); + p->pid = gettid(); + p->tag = netdata_thread_tag(); + p->lock = lock_type; + p->file = file; + p->function = function; + p->line = line; + p->callers = 1; + p->all_caller_locks = netdata_thread_locks; + p->start_s = now_monotonic_high_precision_usec(); + + // find a slot in the netdata_thread_locks[] + int i; + for(i = 0; i < NETDATA_THREAD_LOCKS_ARRAY_SIZE ;i++) { + if (!netdata_thread_locks[i]) { + netdata_thread_locks[i] = rwlock; + break; + } + } + + __netdata_mutex_lock(&rwlock->lockers_mutex); + p->next = rwlock->lockers; + rwlock->lockers = p; + if(lock_type == 'R') rwlock->readers++; + if(lock_type == 'W') rwlock->writers++; + __netdata_mutex_unlock(&rwlock->lockers_mutex); + + return p; +} + +static void remove_rwlock_locker(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock, netdata_rwlock_locker *locker) { + usec_t end_s = now_monotonic_high_precision_usec(); + + if(locker->callers == 0) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) callers should be positive but it is zero\n", + rwlock, + locker->pid, locker->tag, + locker->function, locker->line, locker->file); + + if(locker->callers > 1 && locker->lock != 'R') + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) only 'R' locks support multiple holders, but here we have %zu callers holding a '%c' lock.\n", + rwlock, + locker->pid, locker->tag, + locker->function, locker->line, locker->file, + locker->callers, locker->lock); + + __netdata_mutex_lock(&rwlock->lockers_mutex); + locker->callers--; + + if(!locker->callers) { + int doit = 0; + + if (rwlock->lockers == locker) { + rwlock->lockers = locker->next; + doit = 1; + } else { + netdata_rwlock_locker *p; + for (p = rwlock->lockers; p && p->next != locker; p = p->next) + ; + if (p && p->next == locker) { + p->next = locker->next; + doit = 1; + } + } + if(doit) { + if(locker->lock == 'R') rwlock->readers--; + if(locker->lock == 'W') rwlock->writers--; + } + + if(!doit) { + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) with %zu x '%c' lock is not found.\n", + rwlock, + locker->pid, locker->tag, + locker->function, locker->line, locker->file, + locker->callers, locker->lock); + } + else { + // find the lock in the netdata_thread_locks[] + // and remove it + int i; + for(i = 0; i < NETDATA_THREAD_LOCKS_ARRAY_SIZE ;i++) { + if (netdata_thread_locks[i] == rwlock) + netdata_thread_locks[i] = NULL; + } + + if(end_s - locker->start_s >= NETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) holded a '%c' for %llu usec.\n", + rwlock, + locker->pid, locker->tag, + locker->function, locker->line, locker->file, + locker->lock, end_s - locker->start_s); + + freez(locker); + } + } + + __netdata_mutex_unlock(&rwlock->lockers_mutex); +} + +static netdata_rwlock_locker *find_rwlock_locker(const char *file __maybe_unused, const char *function __maybe_unused, const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + pid_t pid = gettid(); + netdata_rwlock_locker *p; + + __netdata_mutex_lock(&rwlock->lockers_mutex); + for(p = rwlock->lockers; p ;p = p->next) { + if(p->pid == pid) break; + } + __netdata_mutex_unlock(&rwlock->lockers_mutex); + + return p; +} + +static netdata_rwlock_locker *update_or_add_rwlock_locker(const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock, netdata_rwlock_locker *locker, char locktype) { + if(!locker) { + return add_rwlock_locker(file, function, line, rwlock, locktype); + } + else if(locker->lock == 'R' && locktype == 'R') { + __netdata_mutex_lock(&rwlock->lockers_mutex); + locker->callers++; + __netdata_mutex_unlock(&rwlock->lockers_mutex); + return locker; + } + else { + not_supported_by_posix_rwlocks(file, function, line, rwlock, locktype, "DEADLOCK - WANTS TO CHANGE LOCK TYPE BUT ALREADY HAS THIS LOCKED"); + return locker; + } +} + +// ---------------------------------------------------------------------------- +// debug versions of rwlock + +int netdata_rwlock_destroy_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(%p) from %lu@%s, %s()", rwlock, line, file, function); + + if(rwlock->readers) + error("RW_LOCK: destroying a rwlock with %zu readers in it", rwlock->readers); + if(rwlock->writers) + error("RW_LOCK: destroying a rwlock with %zu writers in it", rwlock->writers); + + int ret = __netdata_rwlock_destroy(rwlock); + if(!ret) { + while (rwlock->lockers) + remove_rwlock_locker(file, function, line, rwlock, rwlock->lockers); + + if (rwlock->readers) + error("RW_LOCK: internal error - empty rwlock with %zu readers in it", rwlock->readers); + if (rwlock->writers) + error("RW_LOCK: internal error - empty rwlock with %zu writers in it", rwlock->writers); + } + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_destroy(%p) = %d, from %lu@%s, %s()", rwlock, ret, line, file, function); + + return ret; +} + +int netdata_rwlock_init_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(%p) from %lu@%s, %s()", rwlock, line, file, function); + + int ret = __netdata_rwlock_init(rwlock); + if(!ret) { + __netdata_mutex_init(&rwlock->lockers_mutex); + rwlock->lockers = NULL; + rwlock->readers = 0; + rwlock->writers = 0; + } + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_init(%p) = %d, from %lu@%s, %s()", rwlock, ret, line, file, function); + + return ret; +} + +int netdata_rwlock_rdlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + +#ifdef NETDATA_TRACE_RWLOCKS_LOG_NESTED + if(locker && locker->lock == 'R') { + log_rwlock_lockers(file, function, line, rwlock, "NESTED READ LOCK REQUEST", 'R'); + } +#endif // NETDATA_TRACE_RWLOCKS_LOG_NESTED + + int log = 0; + if(rwlock->writers) { + log_rwlock_lockers(file, function, line, rwlock, "WANTS", 'R'); + log = 1; + } + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_rdlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(!ret) { + locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'R'); + if(log) log_rwlock_lockers(file, function, line, rwlock, "GOT", 'R'); + + } + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED for a READ lock for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_rdlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_rwlock_wrlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + if(locker) + not_supported_by_posix_rwlocks(file, function, line, rwlock, 'W', "DEADLOCK - WANTS A WRITE LOCK BUT ALREADY HAVE THIS LOCKED"); + + int log = 0; + if(rwlock->readers) { + log_rwlock_lockers(file, function, line, rwlock, "WANTS", 'W'); + log = 1; + } + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_wrlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(!ret){ + locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'W'); + if(log) log_rwlock_lockers(file, function, line, rwlock, "GOT", 'W'); + } + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED for a WRITE lock for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_wrlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_rwlock_unlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + if(unlikely(!locker)) + not_supported_by_posix_rwlocks(file, function, line, rwlock, 'U', "UNLOCK WITHOUT LOCK"); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_unlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED to UNLOCK for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + if(likely(!ret && locker)) remove_rwlock_locker(file, function, line, rwlock, locker); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_unlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_rwlock_tryrdlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + if(locker && locker->lock == 'W') + not_supported_by_posix_rwlocks(file, function, line, rwlock, 'R', "DEADLOCK - WANTS A READ LOCK BUT IT HAS A WRITE LOCK ALREADY"); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_tryrdlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(!ret) + locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'R'); + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED to TRYREAD for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_tryrdlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +int netdata_rwlock_trywrlock_debug(const char *file __maybe_unused, const char *function __maybe_unused, + const unsigned long line __maybe_unused, netdata_rwlock_t *rwlock) { + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(%p) from %lu@%s, %s()", rwlock, line, file, function); + + netdata_rwlock_locker *locker = find_rwlock_locker(file, function, line, rwlock); + if(locker) + not_supported_by_posix_rwlocks(file, function, line, rwlock, 'W', "ALREADY HAS THIS LOCK"); + + usec_t start_s = now_monotonic_high_precision_usec(); + int ret = __netdata_rwlock_trywrlock(rwlock); + usec_t end_s = now_monotonic_high_precision_usec(); + + if(!ret) + locker = update_or_add_rwlock_locker(file, function, line, rwlock, locker, 'W'); + + if(end_s - start_s >= NETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC) + fprintf(stderr, + "RW_LOCK ON LOCK %p: %d, '%s' (function %s() %lu@%s) WAITED to TRYWRITE for %llu usec.\n", + rwlock, + gettid(), netdata_thread_tag(), + function, line, file, + end_s - start_s); + + debug(D_LOCKS, "RW_LOCK: netdata_rwlock_trywrlock(%p) = %d in %llu usec, from %lu@%s, %s()", rwlock, ret, end_s - start_s, line, file, function); + + return ret; +} + +#endif // NETDATA_TRACE_RWLOCKS diff --git a/libnetdata/locks/locks.h b/libnetdata/locks/locks.h new file mode 100644 index 0000000..4d2d165 --- /dev/null +++ b/libnetdata/locks/locks.h @@ -0,0 +1,127 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#ifndef NETDATA_LOCKS_H +#define NETDATA_LOCKS_H 1 + +#include "../libnetdata.h" +#include "../clocks/clocks.h" + +typedef pthread_mutex_t netdata_mutex_t; +#define NETDATA_MUTEX_INITIALIZER PTHREAD_MUTEX_INITIALIZER + +typedef struct netdata_spinlock { + bool locked; +} SPINLOCK; +#define NETDATA_SPINLOCK_INITIALIZER (SPINLOCK){ .locked = false } +void netdata_spinlock_init(SPINLOCK *spinlock); +void netdata_spinlock_lock(SPINLOCK *spinlock); +void netdata_spinlock_unlock(SPINLOCK *spinlock); + +#ifdef NETDATA_TRACE_RWLOCKS +typedef struct netdata_rwlock_locker { + pid_t pid; + const char *tag; + char lock; // 'R', 'W' + const char *file; + const char *function; + unsigned long line; + size_t callers; + usec_t start_s; + struct netdata_rwlock_t **all_caller_locks; + struct netdata_rwlock_locker *next; +} netdata_rwlock_locker; + +typedef struct netdata_rwlock_t { + pthread_rwlock_t rwlock_t; // the lock + size_t readers; // the number of reader on the lock + size_t writers; // the number of writers on the lock + netdata_mutex_t lockers_mutex; // a mutex to protect the linked list of the lock holding threads + netdata_rwlock_locker *lockers; // the linked list of the lock holding threads +} netdata_rwlock_t; + +#define NETDATA_RWLOCK_INITIALIZER { \ + .rwlock_t = PTHREAD_RWLOCK_INITIALIZER, \ + .readers = 0, \ + .writers = 0, \ + .lockers_mutex = NETDATA_MUTEX_INITIALIZER, \ + .lockers = NULL \ + } + +#else // NETDATA_TRACE_RWLOCKS + +typedef struct netdata_rwlock_t { + pthread_rwlock_t rwlock_t; +} netdata_rwlock_t; + +#define NETDATA_RWLOCK_INITIALIZER { \ + .rwlock_t = PTHREAD_RWLOCK_INITIALIZER \ + } + +#endif // NETDATA_TRACE_RWLOCKS + +int __netdata_mutex_init(netdata_mutex_t *mutex); +int __netdata_mutex_destroy(netdata_mutex_t *mutex); +int __netdata_mutex_lock(netdata_mutex_t *mutex); +int __netdata_mutex_trylock(netdata_mutex_t *mutex); +int __netdata_mutex_unlock(netdata_mutex_t *mutex); + +int __netdata_rwlock_destroy(netdata_rwlock_t *rwlock); +int __netdata_rwlock_init(netdata_rwlock_t *rwlock); +int __netdata_rwlock_rdlock(netdata_rwlock_t *rwlock); +int __netdata_rwlock_wrlock(netdata_rwlock_t *rwlock); +int __netdata_rwlock_unlock(netdata_rwlock_t *rwlock); +int __netdata_rwlock_tryrdlock(netdata_rwlock_t *rwlock); +int __netdata_rwlock_trywrlock(netdata_rwlock_t *rwlock); + +void netdata_thread_disable_cancelability(void); +void netdata_thread_enable_cancelability(void); + +#ifdef NETDATA_TRACE_RWLOCKS + +int netdata_mutex_init_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +int netdata_mutex_destroy_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +int netdata_mutex_lock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +int netdata_mutex_trylock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); +int netdata_mutex_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_mutex_t *mutex); + +int netdata_rwlock_destroy_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_init_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_rdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_wrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_unlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_tryrdlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); +int netdata_rwlock_trywrlock_debug( const char *file, const char *function, const unsigned long line, netdata_rwlock_t *rwlock); + +#define netdata_mutex_init(mutex) netdata_mutex_init_debug(__FILE__, __FUNCTION__, __LINE__, mutex) +#define netdata_mutex_destroy(mutex) netdata_mutex_init_debug(__FILE__, __FUNCTION__, __LINE__, mutex) +#define netdata_mutex_lock(mutex) netdata_mutex_lock_debug(__FILE__, __FUNCTION__, __LINE__, mutex) +#define netdata_mutex_trylock(mutex) netdata_mutex_trylock_debug(__FILE__, __FUNCTION__, __LINE__, mutex) +#define netdata_mutex_unlock(mutex) netdata_mutex_unlock_debug(__FILE__, __FUNCTION__, __LINE__, mutex) + +#define netdata_rwlock_destroy(rwlock) netdata_rwlock_destroy_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_init(rwlock) netdata_rwlock_init_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_rdlock(rwlock) netdata_rwlock_rdlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_wrlock(rwlock) netdata_rwlock_wrlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_unlock(rwlock) netdata_rwlock_unlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_tryrdlock(rwlock) netdata_rwlock_tryrdlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) +#define netdata_rwlock_trywrlock(rwlock) netdata_rwlock_trywrlock_debug(__FILE__, __FUNCTION__, __LINE__, rwlock) + +#else // !NETDATA_TRACE_RWLOCKS + +#define netdata_mutex_init(mutex) __netdata_mutex_init(mutex) +#define netdata_mutex_destroy(mutex) __netdata_mutex_destroy(mutex) +#define netdata_mutex_lock(mutex) __netdata_mutex_lock(mutex) +#define netdata_mutex_trylock(mutex) __netdata_mutex_trylock(mutex) +#define netdata_mutex_unlock(mutex) __netdata_mutex_unlock(mutex) + +#define netdata_rwlock_destroy(rwlock) __netdata_rwlock_destroy(rwlock) +#define netdata_rwlock_init(rwlock) __netdata_rwlock_init(rwlock) +#define netdata_rwlock_rdlock(rwlock) __netdata_rwlock_rdlock(rwlock) +#define netdata_rwlock_wrlock(rwlock) __netdata_rwlock_wrlock(rwlock) +#define netdata_rwlock_unlock(rwlock) __netdata_rwlock_unlock(rwlock) +#define netdata_rwlock_tryrdlock(rwlock) __netdata_rwlock_tryrdlock(rwlock) +#define netdata_rwlock_trywrlock(rwlock) __netdata_rwlock_trywrlock(rwlock) + +#endif // NETDATA_TRACE_RWLOCKS + +#endif //NETDATA_LOCKS_H -- cgit v1.2.3