summaryrefslogtreecommitdiffstats
path: root/debian/patches-rt/0258-workqueue-Prevent-deadlock-stall-on-RT.patch
diff options
context:
space:
mode:
Diffstat (limited to 'debian/patches-rt/0258-workqueue-Prevent-deadlock-stall-on-RT.patch')
-rw-r--r--debian/patches-rt/0258-workqueue-Prevent-deadlock-stall-on-RT.patch207
1 files changed, 207 insertions, 0 deletions
diff --git a/debian/patches-rt/0258-workqueue-Prevent-deadlock-stall-on-RT.patch b/debian/patches-rt/0258-workqueue-Prevent-deadlock-stall-on-RT.patch
new file mode 100644
index 000000000..10fe17fc0
--- /dev/null
+++ b/debian/patches-rt/0258-workqueue-Prevent-deadlock-stall-on-RT.patch
@@ -0,0 +1,207 @@
+From 95516b448b8dcc30ac9e5fc0e38c1c3d3f8ef930 Mon Sep 17 00:00:00 2001
+From: Thomas Gleixner <tglx@linutronix.de>
+Date: Fri, 27 Jun 2014 16:24:52 +0200
+Subject: [PATCH 258/347] workqueue: Prevent deadlock/stall on RT
+Origin: https://www.kernel.org/pub/linux/kernel/projects/rt/4.19/older/patches-4.19.246-rt110.tar.xz
+
+Austin reported a XFS deadlock/stall on RT where scheduled work gets
+never exececuted and tasks are waiting for each other for ever.
+
+The underlying problem is the modification of the RT code to the
+handling of workers which are about to go to sleep. In mainline a
+worker thread which goes to sleep wakes an idle worker if there is
+more work to do. This happens from the guts of the schedule()
+function. On RT this must be outside and the accessed data structures
+are not protected against scheduling due to the spinlock to rtmutex
+conversion. So the naive solution to this was to move the code outside
+of the scheduler and protect the data structures by the pool
+lock. That approach turned out to be a little naive as we cannot call
+into that code when the thread blocks on a lock, as it is not allowed
+to block on two locks in parallel. So we dont call into the worker
+wakeup magic when the worker is blocked on a lock, which causes the
+deadlock/stall observed by Austin and Mike.
+
+Looking deeper into that worker code it turns out that the only
+relevant data structure which needs to be protected is the list of
+idle workers which can be woken up.
+
+So the solution is to protect the list manipulation operations with
+preempt_enable/disable pairs on RT and call unconditionally into the
+worker code even when the worker is blocked on a lock. The preemption
+protection is safe as there is nothing which can fiddle with the list
+outside of thread context.
+
+Reported-and_tested-by: Austin Schuh <austin@peloton-tech.com>
+Reported-and_tested-by: Mike Galbraith <umgwanakikbuti@gmail.com>
+Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
+Link: http://vger.kernel.org/r/alpine.DEB.2.10.1406271249510.5170@nanos
+Cc: Richard Weinberger <richard.weinberger@gmail.com>
+Cc: Steven Rostedt <rostedt@goodmis.org>
+---
+ kernel/sched/core.c | 6 +++--
+ kernel/workqueue.c | 60 +++++++++++++++++++++++++++++++++++----------
+ 2 files changed, 51 insertions(+), 15 deletions(-)
+
+diff --git a/kernel/sched/core.c b/kernel/sched/core.c
+index 0995748a3a1d..feaf4d5683af 100644
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -3615,9 +3615,8 @@ void __noreturn do_task_dead(void)
+
+ static inline void sched_submit_work(struct task_struct *tsk)
+ {
+- if (!tsk->state || tsk_is_pi_blocked(tsk))
++ if (!tsk->state)
+ return;
+-
+ /*
+ * If a worker went to sleep, notify and ask workqueue whether
+ * it wants to wake up a task to maintain concurrency.
+@@ -3631,6 +3630,9 @@ static inline void sched_submit_work(struct task_struct *tsk)
+ preempt_enable_no_resched();
+ }
+
++ if (tsk_is_pi_blocked(tsk))
++ return;
++
+ /*
+ * If we are going to sleep and we have plugged IO queued,
+ * make sure to submit it to avoid deadlocks.
+diff --git a/kernel/workqueue.c b/kernel/workqueue.c
+index 0c529f920232..91f5696cf335 100644
+--- a/kernel/workqueue.c
++++ b/kernel/workqueue.c
+@@ -126,6 +126,11 @@ enum {
+ * cpu or grabbing pool->lock is enough for read access. If
+ * POOL_DISASSOCIATED is set, it's identical to L.
+ *
++ * On RT we need the extra protection via rt_lock_idle_list() for
++ * the list manipulations against read access from
++ * wq_worker_sleeping(). All other places are nicely serialized via
++ * pool->lock.
++ *
+ * A: wq_pool_attach_mutex protected.
+ *
+ * PL: wq_pool_mutex protected.
+@@ -431,6 +436,31 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq);
+ if (({ assert_rcu_or_wq_mutex(wq); false; })) { } \
+ else
+
++#ifdef CONFIG_PREEMPT_RT_BASE
++static inline void rt_lock_idle_list(struct worker_pool *pool)
++{
++ preempt_disable();
++}
++static inline void rt_unlock_idle_list(struct worker_pool *pool)
++{
++ preempt_enable();
++}
++static inline void sched_lock_idle_list(struct worker_pool *pool) { }
++static inline void sched_unlock_idle_list(struct worker_pool *pool) { }
++#else
++static inline void rt_lock_idle_list(struct worker_pool *pool) { }
++static inline void rt_unlock_idle_list(struct worker_pool *pool) { }
++static inline void sched_lock_idle_list(struct worker_pool *pool)
++{
++ spin_lock_irq(&pool->lock);
++}
++static inline void sched_unlock_idle_list(struct worker_pool *pool)
++{
++ spin_unlock_irq(&pool->lock);
++}
++#endif
++
++
+ #ifdef CONFIG_DEBUG_OBJECTS_WORK
+
+ static struct debug_obj_descr work_debug_descr;
+@@ -837,10 +867,16 @@ static struct worker *first_idle_worker(struct worker_pool *pool)
+ */
+ static void wake_up_worker(struct worker_pool *pool)
+ {
+- struct worker *worker = first_idle_worker(pool);
++ struct worker *worker;
++
++ rt_lock_idle_list(pool);
++
++ worker = first_idle_worker(pool);
+
+ if (likely(worker))
+ wake_up_process(worker->task);
++
++ rt_unlock_idle_list(pool);
+ }
+
+ /**
+@@ -869,7 +905,7 @@ void wq_worker_running(struct task_struct *task)
+ */
+ void wq_worker_sleeping(struct task_struct *task)
+ {
+- struct worker *next, *worker = kthread_data(task);
++ struct worker *worker = kthread_data(task);
+ struct worker_pool *pool;
+
+ /*
+@@ -886,26 +922,18 @@ void wq_worker_sleeping(struct task_struct *task)
+ return;
+
+ worker->sleeping = 1;
+- spin_lock_irq(&pool->lock);
+
+ /*
+ * The counterpart of the following dec_and_test, implied mb,
+ * worklist not empty test sequence is in insert_work().
+ * Please read comment there.
+- *
+- * NOT_RUNNING is clear. This means that we're bound to and
+- * running on the local cpu w/ rq lock held and preemption
+- * disabled, which in turn means that none else could be
+- * manipulating idle_list, so dereferencing idle_list without pool
+- * lock is safe.
+ */
+ if (atomic_dec_and_test(&pool->nr_running) &&
+ !list_empty(&pool->worklist)) {
+- next = first_idle_worker(pool);
+- if (next)
+- wake_up_process(next->task);
++ sched_lock_idle_list(pool);
++ wake_up_worker(pool);
++ sched_unlock_idle_list(pool);
+ }
+- spin_unlock_irq(&pool->lock);
+ }
+
+ /**
+@@ -1678,7 +1706,9 @@ static void worker_enter_idle(struct worker *worker)
+ worker->last_active = jiffies;
+
+ /* idle_list is LIFO */
++ rt_lock_idle_list(pool);
+ list_add(&worker->entry, &pool->idle_list);
++ rt_unlock_idle_list(pool);
+
+ if (too_many_workers(pool) && !timer_pending(&pool->idle_timer))
+ mod_timer(&pool->idle_timer, jiffies + IDLE_WORKER_TIMEOUT);
+@@ -1711,7 +1741,9 @@ static void worker_leave_idle(struct worker *worker)
+ return;
+ worker_clr_flags(worker, WORKER_IDLE);
+ pool->nr_idle--;
++ rt_lock_idle_list(pool);
+ list_del_init(&worker->entry);
++ rt_unlock_idle_list(pool);
+ }
+
+ static struct worker *alloc_worker(int node)
+@@ -1876,7 +1908,9 @@ static void destroy_worker(struct worker *worker)
+ pool->nr_workers--;
+ pool->nr_idle--;
+
++ rt_lock_idle_list(pool);
+ list_del_init(&worker->entry);
++ rt_unlock_idle_list(pool);
+ worker->flags |= WORKER_DIE;
+ wake_up_process(worker->task);
+ }
+--
+2.36.1
+