1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
|
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 28 May 2018 15:24:21 +0200
Subject: [PATCH 073/354] Split IRQ-off and zone->lock while freeing pages from
PCP list #2
Origin: https://git.kernel.org/cgit/linux/kernel/git/rt/linux-stable-rt.git/commit?id=52f95db6745cd55fcf4a66f563f810ffd1bf8f47
Split the IRQ-off section while accessing the PCP list from zone->lock
while freeing pages.
Introcude isolate_pcp_pages() which separates the pages from the PCP
list onto a temporary list and then free the temporary list via
free_pcppages_bulk().
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
---
mm/page_alloc.c | 60 ++++++++++++++++++++++++++++++++++++++++---------
1 file changed, 50 insertions(+), 10 deletions(-)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index d5fc6a1c79a5..3b8613c96940 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1106,8 +1106,8 @@ static inline void prefetch_buddy(struct page *page)
* And clear the zone's pages_scanned counter, to hold off the "all pages are
* pinned" detection logic.
*/
-static void free_pcppages_bulk(struct zone *zone, int count,
- struct list_head *head)
+static void free_pcppages_bulk(struct zone *zone, struct list_head *head,
+ bool zone_retry)
{
bool isolated_pageblocks;
struct page *page, *tmp;
@@ -1122,12 +1122,27 @@ static void free_pcppages_bulk(struct zone *zone, int count,
*/
list_for_each_entry_safe(page, tmp, head, lru) {
int mt = get_pcppage_migratetype(page);
+
+ if (page_zone(page) != zone) {
+ /*
+ * free_unref_page_list() sorts pages by zone. If we end
+ * up with pages from a different NUMA nodes belonging
+ * to the same ZONE index then we need to redo with the
+ * correct ZONE pointer. Skip the page for now, redo it
+ * on the next iteration.
+ */
+ WARN_ON_ONCE(zone_retry == false);
+ if (zone_retry)
+ continue;
+ }
+
/* MIGRATE_ISOLATE page should not go to pcplists */
VM_BUG_ON_PAGE(is_migrate_isolate(mt), page);
/* Pageblock could have been isolated meanwhile */
if (unlikely(isolated_pageblocks))
mt = get_pageblock_migratetype(page);
+ list_del(&page->lru);
__free_one_page(page, page_to_pfn(page), zone, 0, mt);
trace_mm_page_pcpu_drain(page, 0, mt);
}
@@ -2564,7 +2579,7 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
local_irq_restore(flags);
if (to_drain > 0)
- free_pcppages_bulk(zone, to_drain, &dst);
+ free_pcppages_bulk(zone, &dst, false);
}
#endif
@@ -2594,7 +2609,7 @@ static void drain_pages_zone(unsigned int cpu, struct zone *zone)
local_irq_restore(flags);
if (count)
- free_pcppages_bulk(zone, count, &dst);
+ free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -2787,7 +2802,8 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn)
return true;
}
-static void free_unref_page_commit(struct page *page, unsigned long pfn)
+static void free_unref_page_commit(struct page *page, unsigned long pfn,
+ struct list_head *dst)
{
struct zone *zone = page_zone(page);
struct per_cpu_pages *pcp;
@@ -2816,10 +2832,8 @@ static void free_unref_page_commit(struct page *page, unsigned long pfn)
pcp->count++;
if (pcp->count >= pcp->high) {
unsigned long batch = READ_ONCE(pcp->batch);
- LIST_HEAD(dst);
- isolate_pcp_pages(batch, pcp, &dst);
- free_pcppages_bulk(zone, batch, &dst);
+ isolate_pcp_pages(batch, pcp, dst);
}
}
@@ -2830,13 +2844,17 @@ void free_unref_page(struct page *page)
{
unsigned long flags;
unsigned long pfn = page_to_pfn(page);
+ struct zone *zone = page_zone(page);
+ LIST_HEAD(dst);
if (!free_unref_page_prepare(page, pfn))
return;
local_irq_save(flags);
- free_unref_page_commit(page, pfn);
+ free_unref_page_commit(page, pfn, &dst);
local_irq_restore(flags);
+ if (!list_empty(&dst))
+ free_pcppages_bulk(zone, &dst, false);
}
/*
@@ -2847,6 +2865,11 @@ void free_unref_page_list(struct list_head *list)
struct page *page, *next;
unsigned long flags, pfn;
int batch_count = 0;
+ struct list_head dsts[__MAX_NR_ZONES];
+ int i;
+
+ for (i = 0; i < __MAX_NR_ZONES; i++)
+ INIT_LIST_HEAD(&dsts[i]);
/* Prepare pages for freeing */
list_for_each_entry_safe(page, next, list, lru) {
@@ -2859,10 +2882,12 @@ void free_unref_page_list(struct list_head *list)
local_irq_save(flags);
list_for_each_entry_safe(page, next, list, lru) {
unsigned long pfn = page_private(page);
+ enum zone_type type;
set_page_private(page, 0);
trace_mm_page_free_batched(page);
- free_unref_page_commit(page, pfn);
+ type = page_zonenum(page);
+ free_unref_page_commit(page, pfn, &dsts[type]);
/*
* Guard against excessive IRQ disabled times when we get
@@ -2875,6 +2900,21 @@ void free_unref_page_list(struct list_head *list)
}
}
local_irq_restore(flags);
+
+ for (i = 0; i < __MAX_NR_ZONES; ) {
+ struct page *page;
+ struct zone *zone;
+
+ if (list_empty(&dsts[i])) {
+ i++;
+ continue;
+ }
+
+ page = list_first_entry(&dsts[i], struct page, lru);
+ zone = page_zone(page);
+
+ free_pcppages_bulk(zone, &dsts[i], true);
+ }
}
/*
|