diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:35:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-18 17:39:31 +0000 |
commit | 85c675d0d09a45a135bddd15d7b385f8758c32fb (patch) | |
tree | 76267dbc9b9a130337be3640948fe397b04ac629 /kernel/locking | |
parent | Adding upstream version 6.6.15. (diff) | |
download | linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.tar.xz linux-85c675d0d09a45a135bddd15d7b385f8758c32fb.zip |
Adding upstream version 6.7.7.upstream/6.7.7
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'kernel/locking')
-rw-r--r-- | kernel/locking/lock_events.c | 10 | ||||
-rw-r--r-- | kernel/locking/lockdep_proc.c | 2 | ||||
-rw-r--r-- | kernel/locking/locktorture.c | 214 | ||||
-rw-r--r-- | kernel/locking/mutex.c | 3 | ||||
-rw-r--r-- | kernel/locking/osq_lock.c | 37 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 37 | ||||
-rw-r--r-- | kernel/locking/rwbase_rt.c | 8 | ||||
-rw-r--r-- | kernel/locking/rwsem.c | 8 | ||||
-rw-r--r-- | kernel/locking/spinlock_rt.c | 6 | ||||
-rw-r--r-- | kernel/locking/test-ww_mutex.c | 28 | ||||
-rw-r--r-- | kernel/locking/ww_rt_mutex.c | 2 |
11 files changed, 269 insertions, 86 deletions
diff --git a/kernel/locking/lock_events.c b/kernel/locking/lock_events.c index fa2c2f951c..e68d820995 100644 --- a/kernel/locking/lock_events.c +++ b/kernel/locking/lock_events.c @@ -146,7 +146,7 @@ static int __init init_lockevent_counts(void) struct dentry *d_counts = debugfs_create_dir(LOCK_EVENTS_DIR, NULL); int i; - if (!d_counts) + if (IS_ERR(d_counts)) goto out; /* @@ -159,14 +159,14 @@ static int __init init_lockevent_counts(void) for (i = 0; i < lockevent_num; i++) { if (skip_lockevent(lockevent_names[i])) continue; - if (!debugfs_create_file(lockevent_names[i], 0400, d_counts, - (void *)(long)i, &fops_lockevent)) + if (IS_ERR(debugfs_create_file(lockevent_names[i], 0400, d_counts, + (void *)(long)i, &fops_lockevent))) goto fail_undo; } - if (!debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200, + if (IS_ERR(debugfs_create_file(lockevent_names[LOCKEVENT_reset_cnts], 0200, d_counts, (void *)(long)LOCKEVENT_reset_cnts, - &fops_lockevent)) + &fops_lockevent))) goto fail_undo; return 0; diff --git a/kernel/locking/lockdep_proc.c b/kernel/locking/lockdep_proc.c index 15fdc7fa5c..e2bfb1db58 100644 --- a/kernel/locking/lockdep_proc.c +++ b/kernel/locking/lockdep_proc.c @@ -440,7 +440,7 @@ static void snprint_time(char *buf, size_t bufsiz, s64 nr) static void seq_time(struct seq_file *m, s64 time) { - char num[15]; + char num[22]; snprint_time(num, sizeof(num), time); seq_printf(m, " %14s", num); diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c index 270c7f80ce..69d3cd2cfc 100644 --- a/kernel/locking/locktorture.c +++ b/kernel/locking/locktorture.c @@ -33,21 +33,23 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Paul E. McKenney <paulmck@linux.ibm.com>"); -torture_param(int, nwriters_stress, -1, "Number of write-locking stress-test threads"); -torture_param(int, nreaders_stress, -1, "Number of read-locking stress-test threads"); +torture_param(int, acq_writer_lim, 0, "Write_acquisition time limit (jiffies)."); +torture_param(int, call_rcu_chains, 0, "Self-propagate call_rcu() chains during test (0=disable)."); torture_param(int, long_hold, 100, "Do occasional long hold of lock (ms), 0=disable"); +torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)"); +torture_param(int, nreaders_stress, -1, "Number of read-locking stress-test threads"); +torture_param(int, nwriters_stress, -1, "Number of write-locking stress-test threads"); torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable"); +torture_param(int, rt_boost, 2, + "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types."); +torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens."); torture_param(int, shuffle_interval, 3, "Number of jiffies between shuffles, 0=disable"); torture_param(int, shutdown_secs, 0, "Shutdown time (j), <= zero to disable."); torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s"); torture_param(int, stutter, 5, "Number of jiffies to run/halt test, 0=disable"); -torture_param(int, rt_boost, 2, - "Do periodic rt-boost. 0=Disable, 1=Only for rt_mutex, 2=For all lock types."); -torture_param(int, rt_boost_factor, 50, "A factor determining how often rt-boost happens."); -torture_param(int, writer_fifo, 0, "Run writers at sched_set_fifo() priority"); torture_param(int, verbose, 1, "Enable verbose debugging printk()s"); -torture_param(int, nested_locks, 0, "Number of nested locks (max = 8)"); +torture_param(int, writer_fifo, 0, "Run writers at sched_set_fifo() priority"); /* Going much higher trips "BUG: MAX_LOCKDEP_CHAIN_HLOCKS too low!" errors */ #define MAX_NESTED_LOCKS 8 @@ -56,6 +58,55 @@ module_param(torture_type, charp, 0444); MODULE_PARM_DESC(torture_type, "Type of lock to torture (spin_lock, spin_lock_irq, mutex_lock, ...)"); +static cpumask_var_t bind_readers; // Bind the readers to the specified set of CPUs. +static cpumask_var_t bind_writers; // Bind the writers to the specified set of CPUs. + +// Parse a cpumask kernel parameter. If there are more users later on, +// this might need to got to a more central location. +static int param_set_cpumask(const char *val, const struct kernel_param *kp) +{ + cpumask_var_t *cm_bind = kp->arg; + int ret; + char *s; + + if (!alloc_cpumask_var(cm_bind, GFP_KERNEL)) { + s = "Out of memory"; + ret = -ENOMEM; + goto out_err; + } + ret = cpulist_parse(val, *cm_bind); + if (!ret) + return ret; + s = "Bad CPU range"; +out_err: + pr_warn("%s: %s, all CPUs set\n", kp->name, s); + cpumask_setall(*cm_bind); + return ret; +} + +// Output a cpumask kernel parameter. +static int param_get_cpumask(char *buffer, const struct kernel_param *kp) +{ + cpumask_var_t *cm_bind = kp->arg; + + return sprintf(buffer, "%*pbl", cpumask_pr_args(*cm_bind)); +} + +static bool cpumask_nonempty(cpumask_var_t mask) +{ + return cpumask_available(mask) && !cpumask_empty(mask); +} + +static const struct kernel_param_ops lt_bind_ops = { + .set = param_set_cpumask, + .get = param_get_cpumask, +}; + +module_param_cb(bind_readers, <_bind_ops, &bind_readers, 0644); +module_param_cb(bind_writers, <_bind_ops, &bind_writers, 0644); + +long torture_sched_setaffinity(pid_t pid, const struct cpumask *in_mask); + static struct task_struct *stats_task; static struct task_struct **writer_tasks; static struct task_struct **reader_tasks; @@ -69,6 +120,12 @@ struct lock_stress_stats { long n_lock_acquired; }; +struct call_rcu_chain { + struct rcu_head crc_rh; + bool crc_stop; +}; +struct call_rcu_chain *call_rcu_chain; + /* Forward reference. */ static void lock_torture_cleanup(void); @@ -116,12 +173,9 @@ static int torture_lock_busted_write_lock(int tid __maybe_unused) static void torture_lock_busted_write_delay(struct torture_random_state *trsp) { - const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; - /* We want a long delay occasionally to force massive contention. */ - if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_ms))) - mdelay(longdelay_ms); + if (long_hold && !(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * long_hold))) + mdelay(long_hold); if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) torture_preempt_schedule(); /* Allow test to be preempted. */ } @@ -194,15 +248,14 @@ __acquires(torture_spinlock) static void torture_spin_lock_write_delay(struct torture_random_state *trsp) { const unsigned long shortdelay_us = 2; - const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; unsigned long j; /* We want a short delay mostly to emulate likely code, and * we want a long delay occasionally to force massive contention. */ - if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * longdelay_ms))) { + if (long_hold && !(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * long_hold))) { j = jiffies; - mdelay(longdelay_ms); + mdelay(long_hold); pr_alert("%s: delay = %lu jiffies.\n", __func__, jiffies - j); } if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 200 * shortdelay_us))) @@ -320,14 +373,12 @@ __acquires(torture_rwlock) static void torture_rwlock_write_delay(struct torture_random_state *trsp) { const unsigned long shortdelay_us = 2; - const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; /* We want a short delay mostly to emulate likely code, and * we want a long delay occasionally to force massive contention. */ - if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_ms))) - mdelay(longdelay_ms); + if (long_hold && !(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * long_hold))) + mdelay(long_hold); else udelay(shortdelay_us); } @@ -348,14 +399,12 @@ __acquires(torture_rwlock) static void torture_rwlock_read_delay(struct torture_random_state *trsp) { const unsigned long shortdelay_us = 10; - const unsigned long longdelay_ms = 100; /* We want a short delay mostly to emulate likely code, and * we want a long delay occasionally to force massive contention. */ - if (!(torture_random(trsp) % - (cxt.nrealreaders_stress * 2000 * longdelay_ms))) - mdelay(longdelay_ms); + if (long_hold && !(torture_random(trsp) % (cxt.nrealreaders_stress * 2000 * long_hold))) + mdelay(long_hold); else udelay(shortdelay_us); } @@ -453,12 +502,9 @@ __acquires(torture_mutex) static void torture_mutex_delay(struct torture_random_state *trsp) { - const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; - /* We want a long delay occasionally to force massive contention. */ - if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_ms))) - mdelay(longdelay_ms * 5); + if (long_hold && !(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * long_hold))) + mdelay(long_hold * 5); if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) torture_preempt_schedule(); /* Allow test to be preempted. */ } @@ -626,15 +672,13 @@ __acquires(torture_rtmutex) static void torture_rtmutex_delay(struct torture_random_state *trsp) { const unsigned long shortdelay_us = 2; - const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; /* * We want a short delay mostly to emulate likely code, and * we want a long delay occasionally to force massive contention. */ - if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_ms))) - mdelay(longdelay_ms); + if (long_hold && !(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * long_hold))) + mdelay(long_hold); if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 200 * shortdelay_us))) udelay(shortdelay_us); @@ -691,12 +735,9 @@ __acquires(torture_rwsem) static void torture_rwsem_write_delay(struct torture_random_state *trsp) { - const unsigned long longdelay_ms = long_hold ? long_hold : ULONG_MAX; - /* We want a long delay occasionally to force massive contention. */ - if (!(torture_random(trsp) % - (cxt.nrealwriters_stress * 2000 * longdelay_ms))) - mdelay(longdelay_ms * 10); + if (long_hold && !(torture_random(trsp) % (cxt.nrealwriters_stress * 2000 * long_hold))) + mdelay(long_hold * 10); if (!(torture_random(trsp) % (cxt.nrealwriters_stress * 20000))) torture_preempt_schedule(); /* Allow test to be preempted. */ } @@ -716,14 +757,11 @@ __acquires(torture_rwsem) static void torture_rwsem_read_delay(struct torture_random_state *trsp) { - const unsigned long longdelay_ms = 100; - /* We want a long delay occasionally to force massive contention. */ - if (!(torture_random(trsp) % - (cxt.nrealreaders_stress * 2000 * longdelay_ms))) - mdelay(longdelay_ms * 2); + if (long_hold && !(torture_random(trsp) % (cxt.nrealreaders_stress * 2000 * long_hold))) + mdelay(long_hold * 2); else - mdelay(longdelay_ms / 2); + mdelay(long_hold / 2); if (!(torture_random(trsp) % (cxt.nrealreaders_stress * 20000))) torture_preempt_schedule(); /* Allow test to be preempted. */ } @@ -803,11 +841,13 @@ static struct lock_torture_ops percpu_rwsem_lock_ops = { */ static int lock_torture_writer(void *arg) { + unsigned long j; + unsigned long j1; + u32 lockset_mask; struct lock_stress_stats *lwsp = arg; - int tid = lwsp - cxt.lwsa; DEFINE_TORTURE_RANDOM(rand); - u32 lockset_mask; bool skip_main_lock; + int tid = lwsp - cxt.lwsa; VERBOSE_TOROUT_STRING("lock_torture_writer task started"); if (!rt_task(current)) @@ -834,17 +874,24 @@ static int lock_torture_writer(void *arg) cxt.cur_ops->nested_lock(tid, lockset_mask); if (!skip_main_lock) { + if (acq_writer_lim > 0) + j = jiffies; cxt.cur_ops->writelock(tid); if (WARN_ON_ONCE(lock_is_write_held)) lwsp->n_lock_fail++; lock_is_write_held = true; if (WARN_ON_ONCE(atomic_read(&lock_is_read_held))) lwsp->n_lock_fail++; /* rare, but... */ - + if (acq_writer_lim > 0) { + j1 = jiffies; + WARN_ONCE(time_after(j1, j + acq_writer_lim), + "%s: Lock acquisition took %lu jiffies.\n", + __func__, j1 - j); + } lwsp->n_lock_acquired++; - } - if (!skip_main_lock) { + cxt.cur_ops->write_delay(&rand); + lock_is_write_held = false; WRITE_ONCE(last_lock_release, jiffies); cxt.cur_ops->writeunlock(tid); @@ -986,16 +1033,69 @@ static int lock_torture_stats(void *arg) return 0; } + static inline void lock_torture_print_module_parms(struct lock_torture_ops *cur_ops, const char *tag) { + static cpumask_t cpumask_all; + cpumask_t *rcmp = cpumask_nonempty(bind_readers) ? bind_readers : &cpumask_all; + cpumask_t *wcmp = cpumask_nonempty(bind_writers) ? bind_writers : &cpumask_all; + + cpumask_setall(&cpumask_all); pr_alert("%s" TORTURE_FLAG - "--- %s%s: nwriters_stress=%d nreaders_stress=%d nested_locks=%d stat_interval=%d verbose=%d shuffle_interval=%d stutter=%d shutdown_secs=%d onoff_interval=%d onoff_holdoff=%d\n", + "--- %s%s: acq_writer_lim=%d bind_readers=%*pbl bind_writers=%*pbl call_rcu_chains=%d long_hold=%d nested_locks=%d nreaders_stress=%d nwriters_stress=%d onoff_holdoff=%d onoff_interval=%d rt_boost=%d rt_boost_factor=%d shuffle_interval=%d shutdown_secs=%d stat_interval=%d stutter=%d verbose=%d writer_fifo=%d\n", torture_type, tag, cxt.debug_lock ? " [debug]": "", - cxt.nrealwriters_stress, cxt.nrealreaders_stress, - nested_locks, stat_interval, verbose, shuffle_interval, - stutter, shutdown_secs, onoff_interval, onoff_holdoff); + acq_writer_lim, cpumask_pr_args(rcmp), cpumask_pr_args(wcmp), + call_rcu_chains, long_hold, nested_locks, cxt.nrealreaders_stress, + cxt.nrealwriters_stress, onoff_holdoff, onoff_interval, rt_boost, + rt_boost_factor, shuffle_interval, shutdown_secs, stat_interval, stutter, + verbose, writer_fifo); +} + +// If requested, maintain call_rcu() chains to keep a grace period always +// in flight. These increase the probability of getting an RCU CPU stall +// warning and associated diagnostics when a locking primitive stalls. + +static void call_rcu_chain_cb(struct rcu_head *rhp) +{ + struct call_rcu_chain *crcp = container_of(rhp, struct call_rcu_chain, crc_rh); + + if (!smp_load_acquire(&crcp->crc_stop)) { + (void)start_poll_synchronize_rcu(); // Start one grace period... + call_rcu(&crcp->crc_rh, call_rcu_chain_cb); // ... and later start another. + } +} + +// Start the requested number of call_rcu() chains. +static int call_rcu_chain_init(void) +{ + int i; + + if (call_rcu_chains <= 0) + return 0; + call_rcu_chain = kcalloc(call_rcu_chains, sizeof(*call_rcu_chain), GFP_KERNEL); + if (!call_rcu_chain) + return -ENOMEM; + for (i = 0; i < call_rcu_chains; i++) { + call_rcu_chain[i].crc_stop = false; + call_rcu(&call_rcu_chain[i].crc_rh, call_rcu_chain_cb); + } + return 0; +} + +// Stop all of the call_rcu() chains. +static void call_rcu_chain_cleanup(void) +{ + int i; + + if (!call_rcu_chain) + return; + for (i = 0; i < call_rcu_chains; i++) + smp_store_release(&call_rcu_chain[i].crc_stop, true); + rcu_barrier(); + kfree(call_rcu_chain); + call_rcu_chain = NULL; } static void lock_torture_cleanup(void) @@ -1048,6 +1148,8 @@ static void lock_torture_cleanup(void) kfree(cxt.lrsa); cxt.lrsa = NULL; + call_rcu_chain_cleanup(); + end: if (cxt.init_called) { if (cxt.cur_ops->exit) @@ -1177,6 +1279,10 @@ static int __init lock_torture_init(void) } } + firsterr = call_rcu_chain_init(); + if (torture_init_error(firsterr)) + goto unwind; + lock_torture_print_module_parms(cxt.cur_ops, "Start of test"); /* Prepare torture context. */ @@ -1250,6 +1356,8 @@ static int __init lock_torture_init(void) writer_fifo ? sched_set_fifo : NULL); if (torture_init_error(firsterr)) goto unwind; + if (cpumask_nonempty(bind_writers)) + torture_sched_setaffinity(writer_tasks[i]->pid, bind_writers); create_reader: if (cxt.cur_ops->readlock == NULL || (j >= cxt.nrealreaders_stress)) @@ -1259,6 +1367,8 @@ static int __init lock_torture_init(void) reader_tasks[j]); if (torture_init_error(firsterr)) goto unwind; + if (cpumask_nonempty(bind_readers)) + torture_sched_setaffinity(reader_tasks[j]->pid, bind_readers); } if (stat_interval > 0) { firsterr = torture_create_kthread(lock_torture_stats, NULL, diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index d973fe6041..2deeeca3e7 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -1126,6 +1126,9 @@ EXPORT_SYMBOL(ww_mutex_lock_interruptible); #endif /* !CONFIG_DEBUG_LOCK_ALLOC */ #endif /* !CONFIG_PREEMPT_RT */ +EXPORT_TRACEPOINT_SYMBOL_GPL(contention_begin); +EXPORT_TRACEPOINT_SYMBOL_GPL(contention_end); + /** * atomic_dec_and_mutex_lock - return holding mutex if we dec to 0 * @cnt: the atomic which we are to dec diff --git a/kernel/locking/osq_lock.c b/kernel/locking/osq_lock.c index d5610ad52b..75a6f61338 100644 --- a/kernel/locking/osq_lock.c +++ b/kernel/locking/osq_lock.c @@ -11,6 +11,13 @@ * called from interrupt context and we have preemption disabled while * spinning. */ + +struct optimistic_spin_node { + struct optimistic_spin_node *next, *prev; + int locked; /* 1 if lock acquired */ + int cpu; /* encoded CPU # + 1 value */ +}; + static DEFINE_PER_CPU_SHARED_ALIGNED(struct optimistic_spin_node, osq_node); /* @@ -37,32 +44,28 @@ static inline struct optimistic_spin_node *decode_cpu(int encoded_cpu_val) /* * Get a stable @node->next pointer, either for unlock() or unqueue() purposes. * Can return NULL in case we were the last queued and we updated @lock instead. + * + * If osq_lock() is being cancelled there must be a previous node + * and 'old_cpu' is its CPU #. + * For osq_unlock() there is never a previous node and old_cpu is + * set to OSQ_UNLOCKED_VAL. */ static inline struct optimistic_spin_node * osq_wait_next(struct optimistic_spin_queue *lock, struct optimistic_spin_node *node, - struct optimistic_spin_node *prev) + int old_cpu) { - struct optimistic_spin_node *next = NULL; int curr = encode_cpu(smp_processor_id()); - int old; - - /* - * If there is a prev node in queue, then the 'old' value will be - * the prev node's CPU #, else it's set to OSQ_UNLOCKED_VAL since if - * we're currently last in queue, then the queue will then become empty. - */ - old = prev ? prev->cpu : OSQ_UNLOCKED_VAL; for (;;) { if (atomic_read(&lock->tail) == curr && - atomic_cmpxchg_acquire(&lock->tail, curr, old) == curr) { + atomic_cmpxchg_acquire(&lock->tail, curr, old_cpu) == curr) { /* * We were the last queued, we moved @lock back. @prev * will now observe @lock and will complete its * unlock()/unqueue(). */ - break; + return NULL; } /* @@ -76,15 +79,15 @@ osq_wait_next(struct optimistic_spin_queue *lock, * wait for a new @node->next from its Step-C. */ if (node->next) { + struct optimistic_spin_node *next; + next = xchg(&node->next, NULL); if (next) - break; + return next; } cpu_relax(); } - - return next; } bool osq_lock(struct optimistic_spin_queue *lock) @@ -186,7 +189,7 @@ bool osq_lock(struct optimistic_spin_queue *lock) * back to @prev. */ - next = osq_wait_next(lock, node, prev); + next = osq_wait_next(lock, node, prev->cpu); if (!next) return false; @@ -226,7 +229,7 @@ void osq_unlock(struct optimistic_spin_queue *lock) return; } - next = osq_wait_next(lock, node, NULL); + next = osq_wait_next(lock, node, OSQ_UNLOCKED_VAL); if (next) WRITE_ONCE(next->locked, 1); } diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 21db0df0eb..4a10e8c16f 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -218,6 +218,11 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, return try_cmpxchg_acquire(&lock->owner, &old, new); } +static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) +{ + return rt_mutex_cmpxchg_acquire(lock, NULL, current); +} + static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, struct task_struct *old, struct task_struct *new) @@ -297,6 +302,20 @@ static __always_inline bool rt_mutex_cmpxchg_acquire(struct rt_mutex_base *lock, } +static int __sched rt_mutex_slowtrylock(struct rt_mutex_base *lock); + +static __always_inline bool rt_mutex_try_acquire(struct rt_mutex_base *lock) +{ + /* + * With debug enabled rt_mutex_cmpxchg trylock() will always fail. + * + * Avoid unconditionally taking the slow path by using + * rt_mutex_slow_trylock() which is covered by the debug code and can + * acquire a non-contended rtmutex. + */ + return rt_mutex_slowtrylock(lock); +} + static __always_inline bool rt_mutex_cmpxchg_release(struct rt_mutex_base *lock, struct task_struct *old, struct task_struct *new) @@ -1613,7 +1632,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, raw_spin_unlock_irq(&lock->wait_lock); if (!owner || !rtmutex_spin_on_owner(lock, waiter, owner)) - schedule(); + rt_mutex_schedule(); raw_spin_lock_irq(&lock->wait_lock); set_current_state(state); @@ -1642,7 +1661,7 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, WARN(1, "rtmutex deadlock detected\n"); while (1) { set_current_state(TASK_INTERRUPTIBLE); - schedule(); + rt_mutex_schedule(); } } @@ -1738,6 +1757,15 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, int ret; /* + * Do all pre-schedule work here, before we queue a waiter and invoke + * PI -- any such work that trips on rtlock (PREEMPT_RT spinlock) would + * otherwise recurse back into task_blocks_on_rt_mutex() through + * rtlock_slowlock() and will then enqueue a second waiter for this + * same task and things get really confusing real fast. + */ + rt_mutex_pre_schedule(); + + /* * Technically we could use raw_spin_[un]lock_irq() here, but this can * be called in early boot if the cmpxchg() fast path is disabled * (debug, no architecture support). In this case we will acquire the @@ -1748,6 +1776,7 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, raw_spin_lock_irqsave(&lock->wait_lock, flags); ret = __rt_mutex_slowlock_locked(lock, ww_ctx, state); raw_spin_unlock_irqrestore(&lock->wait_lock, flags); + rt_mutex_post_schedule(); return ret; } @@ -1755,7 +1784,9 @@ static int __sched rt_mutex_slowlock(struct rt_mutex_base *lock, static __always_inline int __rt_mutex_lock(struct rt_mutex_base *lock, unsigned int state) { - if (likely(rt_mutex_cmpxchg_acquire(lock, NULL, current))) + lockdep_assert(!current->pi_blocked_on); + + if (likely(rt_mutex_try_acquire(lock))) return 0; return rt_mutex_slowlock(lock, NULL, state); diff --git a/kernel/locking/rwbase_rt.c b/kernel/locking/rwbase_rt.c index 25ec023947..34a59569db 100644 --- a/kernel/locking/rwbase_rt.c +++ b/kernel/locking/rwbase_rt.c @@ -71,6 +71,7 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, struct rt_mutex_base *rtm = &rwb->rtmutex; int ret; + rwbase_pre_schedule(); raw_spin_lock_irq(&rtm->wait_lock); /* @@ -125,12 +126,15 @@ static int __sched __rwbase_read_lock(struct rwbase_rt *rwb, rwbase_rtmutex_unlock(rtm); trace_contention_end(rwb, ret); + rwbase_post_schedule(); return ret; } static __always_inline int rwbase_read_lock(struct rwbase_rt *rwb, unsigned int state) { + lockdep_assert(!current->pi_blocked_on); + if (rwbase_read_trylock(rwb)) return 0; @@ -237,6 +241,8 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, /* Force readers into slow path */ atomic_sub(READER_BIAS, &rwb->readers); + rwbase_pre_schedule(); + raw_spin_lock_irqsave(&rtm->wait_lock, flags); if (__rwbase_write_trylock(rwb)) goto out_unlock; @@ -248,6 +254,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, if (rwbase_signal_pending_state(state, current)) { rwbase_restore_current_state(); __rwbase_write_unlock(rwb, 0, flags); + rwbase_post_schedule(); trace_contention_end(rwb, -EINTR); return -EINTR; } @@ -266,6 +273,7 @@ static int __sched rwbase_write_lock(struct rwbase_rt *rwb, out_unlock: raw_spin_unlock_irqrestore(&rtm->wait_lock, flags); + rwbase_post_schedule(); return 0; } diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 9eabd585ce..2340b6d90e 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -1427,8 +1427,14 @@ static inline void __downgrade_write(struct rw_semaphore *sem) #define rwbase_signal_pending_state(state, current) \ signal_pending_state(state, current) +#define rwbase_pre_schedule() \ + rt_mutex_pre_schedule() + #define rwbase_schedule() \ - schedule() + rt_mutex_schedule() + +#define rwbase_post_schedule() \ + rt_mutex_post_schedule() #include "rwbase_rt.c" diff --git a/kernel/locking/spinlock_rt.c b/kernel/locking/spinlock_rt.c index 48a19ed848..38e292454f 100644 --- a/kernel/locking/spinlock_rt.c +++ b/kernel/locking/spinlock_rt.c @@ -37,6 +37,8 @@ static __always_inline void rtlock_lock(struct rt_mutex_base *rtm) { + lockdep_assert(!current->pi_blocked_on); + if (unlikely(!rt_mutex_cmpxchg_acquire(rtm, NULL, current))) rtlock_slowlock(rtm); } @@ -184,9 +186,13 @@ static __always_inline int rwbase_rtmutex_trylock(struct rt_mutex_base *rtm) #define rwbase_signal_pending_state(state, current) (0) +#define rwbase_pre_schedule() + #define rwbase_schedule() \ schedule_rtlock() +#define rwbase_post_schedule() + #include "rwbase_rt.c" /* * The common functions which get wrapped into the rwlock API. diff --git a/kernel/locking/test-ww_mutex.c b/kernel/locking/test-ww_mutex.c index 7c5a8f0549..78719e1ef1 100644 --- a/kernel/locking/test-ww_mutex.c +++ b/kernel/locking/test-ww_mutex.c @@ -9,7 +9,7 @@ #include <linux/delay.h> #include <linux/kthread.h> #include <linux/module.h> -#include <linux/random.h> +#include <linux/prandom.h> #include <linux/slab.h> #include <linux/ww_mutex.h> @@ -386,6 +386,19 @@ struct stress { int nlocks; }; +struct rnd_state rng; +DEFINE_SPINLOCK(rng_lock); + +static inline u32 prandom_u32_below(u32 ceil) +{ + u32 ret; + + spin_lock(&rng_lock); + ret = prandom_u32_state(&rng) % ceil; + spin_unlock(&rng_lock); + return ret; +} + static int *get_random_order(int count) { int *order; @@ -399,7 +412,7 @@ static int *get_random_order(int count) order[n] = n; for (n = count - 1; n > 1; n--) { - r = get_random_u32_below(n + 1); + r = prandom_u32_below(n + 1); if (r != n) { tmp = order[n]; order[n] = order[r]; @@ -452,17 +465,18 @@ retry: ww_mutex_unlock(&locks[order[n]]); if (err == -EDEADLK) { - ww_mutex_lock_slow(&locks[order[contended]], &ctx); - goto retry; + if (!time_after(jiffies, stress->timeout)) { + ww_mutex_lock_slow(&locks[order[contended]], &ctx); + goto retry; + } } + ww_acquire_fini(&ctx); if (err) { pr_err_once("stress (%s) failed with %d\n", __func__, err); break; } - - ww_acquire_fini(&ctx); } while (!time_after(jiffies, stress->timeout)); kfree(order); @@ -629,6 +643,8 @@ static int __init test_ww_mutex_init(void) printk(KERN_INFO "Beginning ww mutex selftests\n"); + prandom_seed_state(&rng, get_random_u64()); + wq = alloc_workqueue("test-ww_mutex", WQ_UNBOUND, 0); if (!wq) return -ENOMEM; diff --git a/kernel/locking/ww_rt_mutex.c b/kernel/locking/ww_rt_mutex.c index d1473c6241..c7196de838 100644 --- a/kernel/locking/ww_rt_mutex.c +++ b/kernel/locking/ww_rt_mutex.c @@ -62,7 +62,7 @@ __ww_rt_mutex_lock(struct ww_mutex *lock, struct ww_acquire_ctx *ww_ctx, } mutex_acquire_nest(&rtm->dep_map, 0, 0, nest_lock, ip); - if (likely(rt_mutex_cmpxchg_acquire(&rtm->rtmutex, NULL, current))) { + if (likely(rt_mutex_try_acquire(&rtm->rtmutex))) { if (ww_ctx) ww_mutex_set_context_fastpath(lock, ww_ctx); return 0; |