1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
|
From: Luiz Capitulino <lcapitulino@redhat.com>
Date: Fri, 27 May 2016 15:03:28 +0200
Subject: [PATCH 078/342] mm: perform lru_add_drain_all() remotely
Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=0db71c6d6c532698314525788c98376e43ab8792
lru_add_drain_all() works by scheduling lru_add_drain_cpu() to run
on all CPUs that have non-empty LRU pagevecs and then waiting for
the scheduled work to complete. However, workqueue threads may never
have the chance to run on a CPU that's running a SCHED_FIFO task.
This causes lru_add_drain_all() to block forever.
This commit solves this problem by changing lru_add_drain_all()
to drain the LRU pagevecs of remote CPUs. This is done by grabbing
swapvec_lock and calling lru_add_drain_cpu().
PS: This is based on an idea and initial implementation by
Rik van Riel.
Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Luiz Capitulino <lcapitulino@redhat.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/swap.c | 36 ++++++++++++++++++++++++++++++------
1 file changed, 30 insertions(+), 6 deletions(-)
diff --git a/mm/swap.c b/mm/swap.c
index 92f994b962f0..3885645a45ce 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -585,9 +585,15 @@ void lru_add_drain_cpu(int cpu)
unsigned long flags;
/* No harm done if a racing interrupt already did this */
+#ifdef CONFIG_PREEMPT_RT_BASE
+ local_lock_irqsave_on(rotate_lock, flags, cpu);
+ pagevec_move_tail(pvec);
+ local_unlock_irqrestore_on(rotate_lock, flags, cpu);
+#else
local_lock_irqsave(rotate_lock, flags);
pagevec_move_tail(pvec);
local_unlock_irqrestore(rotate_lock, flags);
+#endif
}
pvec = &per_cpu(lru_deactivate_file_pvecs, cpu);
@@ -657,6 +663,16 @@ void lru_add_drain(void)
#ifdef CONFIG_SMP
+#ifdef CONFIG_PREEMPT_RT_BASE
+static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
+{
+ local_lock_on(swapvec_lock, cpu);
+ lru_add_drain_cpu(cpu);
+ local_unlock_on(swapvec_lock, cpu);
+}
+
+#else
+
static DEFINE_PER_CPU(struct work_struct, lru_add_drain_work);
static void lru_add_drain_per_cpu(struct work_struct *dummy)
@@ -664,6 +680,16 @@ static void lru_add_drain_per_cpu(struct work_struct *dummy)
lru_add_drain();
}
+static inline void remote_lru_add_drain(int cpu, struct cpumask *has_work)
+{
+ struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
+
+ INIT_WORK(work, lru_add_drain_per_cpu);
+ queue_work_on(cpu, mm_percpu_wq, work);
+ cpumask_set_cpu(cpu, has_work);
+}
+#endif
+
/*
* Doesn't need any cpu hotplug locking because we do rely on per-cpu
* kworkers being shut down before our page_alloc_cpu_dead callback is
@@ -688,21 +714,19 @@ void lru_add_drain_all(void)
cpumask_clear(&has_work);
for_each_online_cpu(cpu) {
- struct work_struct *work = &per_cpu(lru_add_drain_work, cpu);
if (pagevec_count(&per_cpu(lru_add_pvec, cpu)) ||
pagevec_count(&per_cpu(lru_rotate_pvecs, cpu)) ||
pagevec_count(&per_cpu(lru_deactivate_file_pvecs, cpu)) ||
pagevec_count(&per_cpu(lru_lazyfree_pvecs, cpu)) ||
- need_activate_page_drain(cpu)) {
- INIT_WORK(work, lru_add_drain_per_cpu);
- queue_work_on(cpu, mm_percpu_wq, work);
- cpumask_set_cpu(cpu, &has_work);
- }
+ need_activate_page_drain(cpu))
+ remote_lru_add_drain(cpu, &has_work);
}
+#ifndef CONFIG_PREEMPT_RT_BASE
for_each_cpu(cpu, &has_work)
flush_work(&per_cpu(lru_add_drain_work, cpu));
+#endif
mutex_unlock(&lock);
}
|