summaryrefslogtreecommitdiffstats
path: root/src/libnetdata/locks
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/libnetdata/locks/README.md107
-rw-r--r--src/libnetdata/locks/locks.c (renamed from libnetdata/locks/locks.c)49
-rw-r--r--src/libnetdata/locks/locks.h (renamed from libnetdata/locks/locks.h)4
3 files changed, 153 insertions, 7 deletions
diff --git a/src/libnetdata/locks/README.md b/src/libnetdata/locks/README.md
new file mode 100644
index 000000000..35d602f2a
--- /dev/null
+++ b/src/libnetdata/locks/README.md
@@ -0,0 +1,107 @@
+<!--
+title: "Locks"
+custom_edit_url: https://github.com/netdata/netdata/edit/master/src/libnetdata/locks/README.md
+sidebar_label: "Locks"
+learn_status: "Published"
+learn_topic_type: "Tasks"
+learn_rel_path: "Developers/libnetdata"
+-->
+
+# Locks
+
+## How to trace netdata locks
+
+To enable tracing rwlocks in netdata, compile netdata by setting `CFLAGS="-DNETDATA_TRACE_RWLOCKS=1"`, like this:
+
+```
+CFLAGS="-O1 -ggdb -DNETDATA_TRACE_RWLOCKS=1" ./netdata-installer.sh
+```
+
+During compilation, the compiler will log:
+
+```
+libnetdata/locks/locks.c:105:2: warning: #warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT [-Wcpp]
+ 105 | #warning NETDATA_TRACE_RWLOCKS ENABLED - EXPECT A LOT OF OUTPUT
+ | ^~~~~~~
+```
+
+Once compiled, netdata will do the following:
+
+Every call to `netdata_rwlock_*()` is now measured in time.
+
+### logging of slow locks/unlocks
+
+If any call takes more than 10 usec, it will be logged like this:
+
+```
+RW_LOCK ON LOCK 0x0x7fbe1f2e5190: 4157038, 'ACLK_Query_2' (function build_context_param_list() 99@web/api/formatters/rrd2json.c) WAITED to UNLOCK for 29 usec.
+```
+
+The time can be changed by setting this `-DNETDATA_TRACE_RWLOCKS_WAIT_TIME_TO_IGNORE_USEC=20` (or whatever number) to the CFLAGS.
+
+### logging of long hold times
+
+If any lock is holded for more than 10000 usec, it will be logged like this:
+
+```
+RW_LOCK ON LOCK 0x0x55a20afc1b20: 4187198, 'ANALYTICS' (function analytics_gather_mutable_meta_data() 532@daemon/analytics.c) holded a 'R' for 13232 usec.
+```
+
+The time can be changed by setting this `-DNETDATA_TRACE_RWLOCKS_HOLD_TIME_TO_IGNORE_USEC=20000` (or whatever number) to the CFLAGS.
+
+### logging for probable pauses (predictive)
+
+The library maintains a linked-list of all the lock holders (one entry per thread). For this linked-list a mutex is used. So every call to the r/w locks now also has a mutex lock.
+
+If any call is expected to pause the caller (ie the caller is attempting a read lock while there is a write lock in place and vice versa), the library will log something like this:
+
+```
+RW_LOCK ON LOCK 0x0x5651c9fcce20: 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) WANTS a 'W' lock (while holding 1 rwlocks and 1 mutexes).
+There are 7 readers and 0 writers are holding the lock:
+ => 1: RW_LOCK: process 4190091 'WEB_SERVER[static14]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709847 usec.
+ => 2: RW_LOCK: process 4190079 'WEB_SERVER[static6]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709869 usec.
+ => 3: RW_LOCK: process 4190084 'WEB_SERVER[static10]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 709948 usec.
+ => 4: RW_LOCK: process 4190076 'WEB_SERVER[static3]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710190 usec.
+ => 5: RW_LOCK: process 4190092 'WEB_SERVER[static15]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710195 usec.
+ => 6: RW_LOCK: process 4190077 'WEB_SERVER[static4]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710208 usec.
+ => 7: RW_LOCK: process 4190044 'WEB_SERVER[static1]' (function web_client_api_request_v1_data() 526@web/api/web_api_v1.c) is having 1 'R' lock for 710221 usec.
+```
+
+And each of the above is paired with a `GOT` log, like this:
+
+```
+RW_LOCK ON LOCK 0x0x5651c9fcce20: 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) GOT a 'W' lock (while holding 2 rwlocks and 1 mutexes).
+There are 0 readers and 1 writers are holding the lock:
+ => 1: RW_LOCK: process 4190039 'HEALTH' (function health_execute_pending_updates() 661@health/health.c) is having 1 'W' lock for 36 usec.
+```
+
+Keep in mind that the lock and log are not atomic. The list of callers is indicative (and sometimes just empty because the original holders of the lock, unlocked it until we had the chance to print their names).
+
+### POSIX compliance check
+
+The library may also log messages about POSIX unsupported cases, like this:
+
+```
+RW_LOCK FATAL ON LOCK 0x0x622000109290: 3609368 'PLUGIN[proc]' (function __rrdset_check_rdlock() 10@database/rrdset.c) attempts to acquire a 'W' lock.
+But it is not supported by POSIX because: ALREADY HAS THIS LOCK
+At this attempt, the task is holding 1 rwlocks and 1 mutexes.
+There are 1 readers and 0 writers are holding the lock requested now:
+ => 1: RW_LOCK: process 3609368 'PLUGIN[proc]' (function rrdset_done() 1398@database/rrdset.c) is having 1 'R' lock for 0 usec.
+```
+
+### nested read locks
+
+When compiled with `-DNETDATA_TRACE_RWLOCKS_LOG_NESTED=1` the library will also detect nested read locks and print them like this:
+
+```
+RW_LOCK ON LOCK 0x0x7ff6ea46d190: 4140225 'WEB_SERVER[static14]' (function rrdr_json_wrapper_begin() 34@web/api/formatters/json_wrapper.c) NESTED READ LOCK REQUEST a 'R' lock (while holding 1 rwlocks and 1 mutexes).
+There are 5 readers and 0 writers are holding the lock:
+ => 1: RW_LOCK: process 4140225 'WEB_SERVER[static14]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 216667 usec.
+ => 2: RW_LOCK: process 4140211 'WEB_SERVER[static6]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec.
+ => 3: RW_LOCK: process 4140218 'WEB_SERVER[static8]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec.
+ => 4: RW_LOCK: process 4140224 'WEB_SERVER[static13]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec.
+ => 5: RW_LOCK: process 4140227 'WEB_SERVER[static16]' (function rrdr_lock_rrdset() 70@web/api/queries/rrdr.c) is having 1 'R' lock for 220001 usec.
+```
+
+
+
diff --git a/libnetdata/locks/locks.c b/src/libnetdata/locks/locks.c
index 625dd052c..adf683af2 100644
--- a/libnetdata/locks/locks.c
+++ b/src/libnetdata/locks/locks.c
@@ -297,14 +297,15 @@ void spinlock_init(SPINLOCK *spinlock) {
memset(spinlock, 0, sizeof(SPINLOCK));
}
-void spinlock_lock(SPINLOCK *spinlock) {
+static inline void spinlock_lock_internal(SPINLOCK *spinlock, bool cancelable) {
static const struct timespec ns = { .tv_sec = 0, .tv_nsec = 1 };
#ifdef NETDATA_INTERNAL_CHECKS
size_t spins = 0;
#endif
- netdata_thread_disable_cancelability();
+ if (!cancelable)
+ netdata_thread_disable_cancelability();
for(int i = 1;
__atomic_load_n(&spinlock->locked, __ATOMIC_RELAXED) ||
@@ -329,16 +330,19 @@ void spinlock_lock(SPINLOCK *spinlock) {
#endif
}
-void spinlock_unlock(SPINLOCK *spinlock) {
+static inline void spinlock_unlock_internal(SPINLOCK *spinlock, bool cancelable) {
#ifdef NETDATA_INTERNAL_CHECKS
spinlock->locker_pid = 0;
#endif
__atomic_clear(&spinlock->locked, __ATOMIC_RELEASE);
- netdata_thread_enable_cancelability();
+
+ if (!cancelable)
+ netdata_thread_enable_cancelability();
}
-bool spinlock_trylock(SPINLOCK *spinlock) {
- netdata_thread_disable_cancelability();
+static inline bool spinlock_trylock_internal(SPINLOCK *spinlock, bool cancelable) {
+ if (!cancelable)
+ netdata_thread_disable_cancelability();
if(!__atomic_load_n(&spinlock->locked, __ATOMIC_RELAXED) &&
!__atomic_test_and_set(&spinlock->locked, __ATOMIC_ACQUIRE))
@@ -346,10 +350,41 @@ bool spinlock_trylock(SPINLOCK *spinlock) {
return true;
// we didn't get the lock
- netdata_thread_enable_cancelability();
+ if (!cancelable)
+ netdata_thread_enable_cancelability();
return false;
}
+void spinlock_lock(SPINLOCK *spinlock)
+{
+ spinlock_lock_internal(spinlock, false);
+}
+
+void spinlock_unlock(SPINLOCK *spinlock)
+{
+ spinlock_unlock_internal(spinlock, false);
+}
+
+bool spinlock_trylock(SPINLOCK *spinlock)
+{
+ return spinlock_trylock_internal(spinlock, false);
+}
+
+void spinlock_lock_cancelable(SPINLOCK *spinlock)
+{
+ spinlock_lock_internal(spinlock, true);
+}
+
+void spinlock_unlock_cancelable(SPINLOCK *spinlock)
+{
+ spinlock_unlock_internal(spinlock, true);
+}
+
+bool spinlock_trylock_cancelable(SPINLOCK *spinlock)
+{
+ return spinlock_trylock_internal(spinlock, true);
+}
+
// ----------------------------------------------------------------------------
// rw_spinlock implementation
diff --git a/libnetdata/locks/locks.h b/src/libnetdata/locks/locks.h
index 6b492ae47..09adfb41f 100644
--- a/libnetdata/locks/locks.h
+++ b/src/libnetdata/locks/locks.h
@@ -25,6 +25,10 @@ void spinlock_lock(SPINLOCK *spinlock);
void spinlock_unlock(SPINLOCK *spinlock);
bool spinlock_trylock(SPINLOCK *spinlock);
+void spinlock_lock_cancelable(SPINLOCK *spinlock);
+void spinlock_unlock_cancelable(SPINLOCK *spinlock);
+bool spinlock_trylock_cancelable(SPINLOCK *spinlock);
+
typedef struct netdata_rw_spinlock {
int32_t readers;
SPINLOCK spinlock;