1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
|
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 26 Jun 2019 11:59:44 +0200
Subject: [PATCH 275/353] futex: Make the futex_hash_bucket lock raw
Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=2a322e04cc1a2220cedbada7ba39667daef47ec7
[ Upstream commit f646521aadedab78801c9befe193e2e8a0c99298 ]
Since commit 1a1fb985f2e2b ("futex: Handle early deadlock return
correctly") we can deadlock while we attempt to acquire the HB lock if
we fail to acquire the lock.
The RT waiter (for the futex lock) is still enqueued and acquiring the
HB lock may build up a lock chain which leads to a deadlock if the owner
of the lock futex-lock holds the HB lock.
Make the hash bucket lock raw so it does not participate in the
lockchain.
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Steven Rostedt (VMware) <rostedt@goodmis.org>
---
kernel/futex.c | 86 +++++++++++++++++++++++++-------------------------
1 file changed, 43 insertions(+), 43 deletions(-)
diff --git a/kernel/futex.c b/kernel/futex.c
index 9bcfdcffb2dd..8a49dd71b233 100644
--- a/kernel/futex.c
+++ b/kernel/futex.c
@@ -243,7 +243,7 @@ struct futex_q {
struct plist_node list;
struct task_struct *task;
- spinlock_t *lock_ptr;
+ raw_spinlock_t *lock_ptr;
union futex_key key;
struct futex_pi_state *pi_state;
struct rt_mutex_waiter *rt_waiter;
@@ -264,7 +264,7 @@ static const struct futex_q futex_q_init = {
*/
struct futex_hash_bucket {
atomic_t waiters;
- spinlock_t lock;
+ raw_spinlock_t lock;
struct plist_head chain;
} ____cacheline_aligned_in_smp;
@@ -952,7 +952,7 @@ static void exit_pi_state_list(struct task_struct *curr)
}
raw_spin_unlock_irq(&curr->pi_lock);
- spin_lock(&hb->lock);
+ raw_spin_lock(&hb->lock);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
raw_spin_lock(&curr->pi_lock);
/*
@@ -962,7 +962,7 @@ static void exit_pi_state_list(struct task_struct *curr)
if (head->next != next) {
/* retain curr->pi_lock for the loop invariant */
raw_spin_unlock(&pi_state->pi_mutex.wait_lock);
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
put_pi_state(pi_state);
continue;
}
@@ -974,7 +974,7 @@ static void exit_pi_state_list(struct task_struct *curr)
raw_spin_unlock(&curr->pi_lock);
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
rt_mutex_futex_unlock(&pi_state->pi_mutex);
put_pi_state(pi_state);
@@ -1523,7 +1523,7 @@ static void __unqueue_futex(struct futex_q *q)
{
struct futex_hash_bucket *hb;
- if (WARN_ON_SMP(!q->lock_ptr || !spin_is_locked(q->lock_ptr))
+ if (WARN_ON_SMP(!q->lock_ptr || !raw_spin_is_locked(q->lock_ptr))
|| WARN_ON(plist_node_empty(&q->list)))
return;
@@ -1643,21 +1643,21 @@ static inline void
double_lock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
{
if (hb1 <= hb2) {
- spin_lock(&hb1->lock);
+ raw_spin_lock(&hb1->lock);
if (hb1 < hb2)
- spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock_nested(&hb2->lock, SINGLE_DEPTH_NESTING);
} else { /* hb1 > hb2 */
- spin_lock(&hb2->lock);
- spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
+ raw_spin_lock(&hb2->lock);
+ raw_spin_lock_nested(&hb1->lock, SINGLE_DEPTH_NESTING);
}
}
static inline void
double_unlock_hb(struct futex_hash_bucket *hb1, struct futex_hash_bucket *hb2)
{
- spin_unlock(&hb1->lock);
+ raw_spin_unlock(&hb1->lock);
if (hb1 != hb2)
- spin_unlock(&hb2->lock);
+ raw_spin_unlock(&hb2->lock);
}
/*
@@ -1685,7 +1685,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
if (!hb_waiters_pending(hb))
goto out_put_key;
- spin_lock(&hb->lock);
+ raw_spin_lock(&hb->lock);
plist_for_each_entry_safe(this, next, &hb->chain, list) {
if (match_futex (&this->key, &key)) {
@@ -1704,7 +1704,7 @@ futex_wake(u32 __user *uaddr, unsigned int flags, int nr_wake, u32 bitset)
}
}
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
wake_up_q(&wake_q);
out_put_key:
put_futex_key(&key);
@@ -2326,7 +2326,7 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
q->lock_ptr = &hb->lock;
- spin_lock(&hb->lock); /* implies smp_mb(); (A) */
+ raw_spin_lock(&hb->lock); /* implies smp_mb(); (A) */
return hb;
}
@@ -2334,7 +2334,7 @@ static inline void
queue_unlock(struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
hb_waiters_dec(hb);
}
@@ -2373,7 +2373,7 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
__releases(&hb->lock)
{
__queue_me(q, hb);
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
}
/**
@@ -2389,41 +2389,41 @@ static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
*/
static int unqueue_me(struct futex_q *q)
{
- spinlock_t *lock_ptr;
+ raw_spinlock_t *lock_ptr;
int ret = 0;
/* In the common case we don't take the spinlock, which is nice. */
retry:
/*
- * q->lock_ptr can change between this read and the following spin_lock.
- * Use READ_ONCE to forbid the compiler from reloading q->lock_ptr and
- * optimizing lock_ptr out of the logic below.
+ * q->lock_ptr can change between this read and the following
+ * raw_spin_lock. Use READ_ONCE to forbid the compiler from reloading
+ * q->lock_ptr and optimizing lock_ptr out of the logic below.
*/
lock_ptr = READ_ONCE(q->lock_ptr);
if (lock_ptr != NULL) {
- spin_lock(lock_ptr);
+ raw_spin_lock(lock_ptr);
/*
* q->lock_ptr can change between reading it and
- * spin_lock(), causing us to take the wrong lock. This
+ * raw_spin_lock(), causing us to take the wrong lock. This
* corrects the race condition.
*
* Reasoning goes like this: if we have the wrong lock,
* q->lock_ptr must have changed (maybe several times)
- * between reading it and the spin_lock(). It can
- * change again after the spin_lock() but only if it was
- * already changed before the spin_lock(). It cannot,
+ * between reading it and the raw_spin_lock(). It can
+ * change again after the raw_spin_lock() but only if it was
+ * already changed before the raw_spin_lock(). It cannot,
* however, change back to the original value. Therefore
* we can detect whether we acquired the correct lock.
*/
if (unlikely(lock_ptr != q->lock_ptr)) {
- spin_unlock(lock_ptr);
+ raw_spin_unlock(lock_ptr);
goto retry;
}
__unqueue_futex(q);
BUG_ON(q->pi_state);
- spin_unlock(lock_ptr);
+ raw_spin_unlock(lock_ptr);
ret = 1;
}
@@ -2445,7 +2445,7 @@ static void unqueue_me_pi(struct futex_q *q)
put_pi_state(q->pi_state);
q->pi_state = NULL;
- spin_unlock(q->lock_ptr);
+ raw_spin_unlock(q->lock_ptr);
}
static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
@@ -2569,7 +2569,7 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
*/
handle_err:
raw_spin_unlock_irq(&pi_state->pi_mutex.wait_lock);
- spin_unlock(q->lock_ptr);
+ raw_spin_unlock(q->lock_ptr);
switch (err) {
case -EFAULT:
@@ -2586,7 +2586,7 @@ static int __fixup_pi_state_owner(u32 __user *uaddr, struct futex_q *q,
break;
}
- spin_lock(q->lock_ptr);
+ raw_spin_lock(q->lock_ptr);
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
/*
@@ -2700,7 +2700,7 @@ static void futex_wait_queue_me(struct futex_hash_bucket *hb, struct futex_q *q,
/*
* The task state is guaranteed to be set before another task can
* wake it. set_current_state() is implemented using smp_store_mb() and
- * queue_me() calls spin_unlock() upon completion, both serializing
+ * queue_me() calls raw_spin_unlock() upon completion, both serializing
* access to the hash list and forcing another memory barrier.
*/
set_current_state(TASK_INTERRUPTIBLE);
@@ -2998,7 +2998,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
* before __rt_mutex_start_proxy_lock() is done.
*/
raw_spin_lock_irq(&q.pi_state->pi_mutex.wait_lock);
- spin_unlock(q.lock_ptr);
+ raw_spin_unlock(q.lock_ptr);
/*
* __rt_mutex_start_proxy_lock() unconditionally enqueues the @rt_waiter
* such that futex_unlock_pi() is guaranteed to observe the waiter when
@@ -3019,7 +3019,7 @@ static int futex_lock_pi(u32 __user *uaddr, unsigned int flags,
ret = rt_mutex_wait_proxy_lock(&q.pi_state->pi_mutex, to, &rt_waiter);
cleanup:
- spin_lock(q.lock_ptr);
+ raw_spin_lock(q.lock_ptr);
/*
* If we failed to acquire the lock (deadlock/signal/timeout), we must
* first acquire the hb->lock before removing the lock from the
@@ -3106,7 +3106,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
return ret;
hb = hash_futex(&key);
- spin_lock(&hb->lock);
+ raw_spin_lock(&hb->lock);
/*
* Check waiters first. We do not trust user space values at
@@ -3140,7 +3140,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* rt_waiter. Also see the WARN in wake_futex_pi().
*/
raw_spin_lock_irq(&pi_state->pi_mutex.wait_lock);
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
/* drops pi_state->pi_mutex.wait_lock */
ret = wake_futex_pi(uaddr, uval, pi_state);
@@ -3179,7 +3179,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
* owner.
*/
if ((ret = cmpxchg_futex_value_locked(&curval, uaddr, uval, 0))) {
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
switch (ret) {
case -EFAULT:
goto pi_faulted;
@@ -3199,7 +3199,7 @@ static int futex_unlock_pi(u32 __user *uaddr, unsigned int flags)
ret = (curval == uval) ? 0 : -EAGAIN;
out_unlock:
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
out_putkey:
put_futex_key(&key);
return ret;
@@ -3372,9 +3372,9 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
/* Queue the futex_q, drop the hb lock, wait for wakeup. */
futex_wait_queue_me(hb, &q, to);
- spin_lock(&hb->lock);
+ raw_spin_lock(&hb->lock);
ret = handle_early_requeue_pi_wakeup(hb, &q, &key2, to);
- spin_unlock(&hb->lock);
+ raw_spin_unlock(&hb->lock);
if (ret)
goto out_put_keys;
@@ -3394,7 +3394,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
* did a lock-steal - fix up the PI-state in that case.
*/
if (q.pi_state && (q.pi_state->owner != current)) {
- spin_lock(q.lock_ptr);
+ raw_spin_lock(q.lock_ptr);
ret = fixup_pi_state_owner(uaddr2, &q, current);
/*
* Drop the reference to the pi state which
@@ -3420,7 +3420,7 @@ static int futex_wait_requeue_pi(u32 __user *uaddr, unsigned int flags,
pi_mutex = &q.pi_state->pi_mutex;
ret = rt_mutex_wait_proxy_lock(pi_mutex, to, &rt_waiter);
- spin_lock(q.lock_ptr);
+ raw_spin_lock(q.lock_ptr);
if (ret && !rt_mutex_cleanup_proxy_lock(pi_mutex, &rt_waiter))
ret = 0;
@@ -4179,7 +4179,7 @@ static int __init futex_init(void)
for (i = 0; i < futex_hashsize; i++) {
atomic_set(&futex_queues[i].waiters, 0);
plist_head_init(&futex_queues[i].chain);
- spin_lock_init(&futex_queues[i].lock);
+ raw_spin_lock_init(&futex_queues[i].lock);
}
return 0;
|