summaryrefslogtreecommitdiffstats
path: root/mm/page_alloc.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r--mm/page_alloc.c209
1 files changed, 122 insertions, 87 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index a663202045..14d39f34d3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -32,6 +32,7 @@
#include <linux/sysctl.h>
#include <linux/cpu.h>
#include <linux/cpuset.h>
+#include <linux/pagevec.h>
#include <linux/memory_hotplug.h>
#include <linux/nodemask.h>
#include <linux/vmstat.h>
@@ -464,19 +465,19 @@ static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
/*
* Temporary debugging check for pages not lying within a given zone.
*/
-static int __maybe_unused bad_range(struct zone *zone, struct page *page)
+static bool __maybe_unused bad_range(struct zone *zone, struct page *page)
{
if (page_outside_zone_boundaries(zone, page))
- return 1;
+ return true;
if (zone != page_zone(page))
- return 1;
+ return true;
- return 0;
+ return false;
}
#else
-static inline int __maybe_unused bad_range(struct zone *zone, struct page *page)
+static inline bool __maybe_unused bad_range(struct zone *zone, struct page *page)
{
- return 0;
+ return false;
}
#endif
@@ -1061,7 +1062,7 @@ out:
* on-demand allocation and then freed again before the deferred pages
* initialization is done, but this is not likely to happen.
*/
-static inline bool should_skip_kasan_poison(struct page *page, fpi_t fpi_flags)
+static inline bool should_skip_kasan_poison(struct page *page)
{
if (IS_ENABLED(CONFIG_KASAN_GENERIC))
return deferred_pages_enabled();
@@ -1080,11 +1081,11 @@ static void kernel_init_pages(struct page *page, int numpages)
kasan_enable_current();
}
-static __always_inline bool free_pages_prepare(struct page *page,
- unsigned int order, fpi_t fpi_flags)
+__always_inline bool free_pages_prepare(struct page *page,
+ unsigned int order)
{
int bad = 0;
- bool skip_kasan_poison = should_skip_kasan_poison(page, fpi_flags);
+ bool skip_kasan_poison = should_skip_kasan_poison(page);
bool init = want_init_on_free();
bool compound = PageCompound(page);
@@ -1266,7 +1267,7 @@ static void __free_pages_ok(struct page *page, unsigned int order,
unsigned long pfn = page_to_pfn(page);
struct zone *zone = page_zone(page);
- if (!free_pages_prepare(page, order, fpi_flags))
+ if (!free_pages_prepare(page, order))
return;
/*
@@ -1422,14 +1423,14 @@ static void check_new_page_bad(struct page *page)
/*
* This page is about to be returned from the page allocator
*/
-static int check_new_page(struct page *page)
+static bool check_new_page(struct page *page)
{
if (likely(page_expected_state(page,
PAGE_FLAGS_CHECK_AT_PREP|__PG_HWPOISON)))
- return 0;
+ return false;
check_new_page_bad(page);
- return 1;
+ return true;
}
static inline bool check_new_pages(struct page *page, unsigned int order)
@@ -2343,7 +2344,7 @@ static bool free_unref_page_prepare(struct page *page, unsigned long pfn,
{
int migratetype;
- if (!free_pages_prepare(page, order, FPI_NONE))
+ if (!free_pages_prepare(page, order))
return false;
migratetype = get_pfnblock_migratetype(page, pfn);
@@ -2515,66 +2516,70 @@ void free_unref_page(struct page *page, unsigned int order)
}
/*
- * Free a list of 0-order pages
+ * Free a batch of folios
*/
-void free_unref_page_list(struct list_head *list)
+void free_unref_folios(struct folio_batch *folios)
{
unsigned long __maybe_unused UP_flags;
- struct page *page, *next;
struct per_cpu_pages *pcp = NULL;
struct zone *locked_zone = NULL;
- int batch_count = 0;
- int migratetype;
+ int i, j, migratetype;
- /* Prepare pages for freeing */
- list_for_each_entry_safe(page, next, list, lru) {
- unsigned long pfn = page_to_pfn(page);
- if (!free_unref_page_prepare(page, pfn, 0)) {
- list_del(&page->lru);
+ /* Prepare folios for freeing */
+ for (i = 0, j = 0; i < folios->nr; i++) {
+ struct folio *folio = folios->folios[i];
+ unsigned long pfn = folio_pfn(folio);
+ unsigned int order = folio_order(folio);
+
+ if (order > 0 && folio_test_large_rmappable(folio))
+ folio_undo_large_rmappable(folio);
+ if (!free_unref_page_prepare(&folio->page, pfn, order))
continue;
- }
/*
- * Free isolated pages directly to the allocator, see
- * comment in free_unref_page.
+ * Free isolated folios and orders not handled on the PCP
+ * directly to the allocator, see comment in free_unref_page.
*/
- migratetype = get_pcppage_migratetype(page);
- if (unlikely(is_migrate_isolate(migratetype))) {
- list_del(&page->lru);
- free_one_page(page_zone(page), page, pfn, 0, migratetype, FPI_NONE);
+ migratetype = get_pcppage_migratetype(&folio->page);
+ if (!pcp_allowed_order(order) ||
+ is_migrate_isolate(migratetype)) {
+ free_one_page(folio_zone(folio), &folio->page, pfn,
+ order, migratetype, FPI_NONE);
continue;
}
+ folio->private = (void *)(unsigned long)order;
+ if (j != i)
+ folios->folios[j] = folio;
+ j++;
}
+ folios->nr = j;
- list_for_each_entry_safe(page, next, list, lru) {
- struct zone *zone = page_zone(page);
+ for (i = 0; i < folios->nr; i++) {
+ struct folio *folio = folios->folios[i];
+ struct zone *zone = folio_zone(folio);
+ unsigned int order = (unsigned long)folio->private;
- list_del(&page->lru);
- migratetype = get_pcppage_migratetype(page);
+ folio->private = NULL;
+ migratetype = get_pcppage_migratetype(&folio->page);
- /*
- * Either different zone requiring a different pcp lock or
- * excessive lock hold times when freeing a large list of
- * pages.
- */
- if (zone != locked_zone || batch_count == SWAP_CLUSTER_MAX) {
+ /* Different zone requires a different pcp lock */
+ if (zone != locked_zone) {
if (pcp) {
pcp_spin_unlock(pcp);
pcp_trylock_finish(UP_flags);
}
- batch_count = 0;
-
/*
- * trylock is necessary as pages may be getting freed
+ * trylock is necessary as folios may be getting freed
* from IRQ or SoftIRQ context after an IO completion.
*/
pcp_trylock_prepare(UP_flags);
pcp = pcp_spin_trylock(zone->per_cpu_pageset);
if (unlikely(!pcp)) {
pcp_trylock_finish(UP_flags);
- free_one_page(zone, page, page_to_pfn(page),
- 0, migratetype, FPI_NONE);
+ free_one_page(zone, &folio->page,
+ folio_pfn(folio), order,
+ migratetype, FPI_NONE);
locked_zone = NULL;
continue;
}
@@ -2588,15 +2593,16 @@ void free_unref_page_list(struct list_head *list)
if (unlikely(migratetype >= MIGRATE_PCPTYPES))
migratetype = MIGRATE_MOVABLE;
- trace_mm_page_free_batched(page);
- free_unref_page_commit(zone, pcp, page, migratetype, 0);
- batch_count++;
+ trace_mm_page_free_batched(&folio->page);
+ free_unref_page_commit(zone, pcp, &folio->page, migratetype,
+ order);
}
if (pcp) {
pcp_spin_unlock(pcp);
pcp_trylock_finish(UP_flags);
}
+ folio_batch_reinit(folios);
}
/*
@@ -2616,8 +2622,8 @@ void split_page(struct page *page, unsigned int order)
for (i = 1; i < (1 << order); i++)
set_page_refcounted(page + i);
- split_page_owner(page, 1 << order);
- split_page_memcg(page, 1 << order);
+ split_page_owner(page, order, 0);
+ split_page_memcg(page, order, 0);
}
EXPORT_SYMBOL_GPL(split_page);
@@ -4687,8 +4693,8 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
gfp_t gfp = gfp_mask;
#if (PAGE_SIZE < PAGE_FRAG_CACHE_MAX_SIZE)
- gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY |
- __GFP_NOMEMALLOC;
+ gfp_mask = (gfp_mask & ~__GFP_DIRECT_RECLAIM) | __GFP_COMP |
+ __GFP_NOWARN | __GFP_NORETRY | __GFP_NOMEMALLOC;
page = alloc_pages_node(NUMA_NO_NODE, gfp_mask,
PAGE_FRAG_CACHE_MAX_ORDER);
nc->size = page ? PAGE_FRAG_CACHE_MAX_SIZE : PAGE_SIZE;
@@ -4701,6 +4707,16 @@ static struct page *__page_frag_cache_refill(struct page_frag_cache *nc,
return page;
}
+void page_frag_cache_drain(struct page_frag_cache *nc)
+{
+ if (!nc->va)
+ return;
+
+ __page_frag_cache_drain(virt_to_head_page(nc->va), nc->pagecnt_bias);
+ nc->va = NULL;
+}
+EXPORT_SYMBOL(page_frag_cache_drain);
+
void __page_frag_cache_drain(struct page *page, unsigned int count)
{
VM_BUG_ON_PAGE(page_ref_count(page) == 0, page);
@@ -4710,9 +4726,9 @@ void __page_frag_cache_drain(struct page *page, unsigned int count)
}
EXPORT_SYMBOL(__page_frag_cache_drain);
-void *page_frag_alloc_align(struct page_frag_cache *nc,
- unsigned int fragsz, gfp_t gfp_mask,
- unsigned int align_mask)
+void *__page_frag_alloc_align(struct page_frag_cache *nc,
+ unsigned int fragsz, gfp_t gfp_mask,
+ unsigned int align_mask)
{
unsigned int size = PAGE_SIZE;
struct page *page;
@@ -4781,7 +4797,7 @@ refill:
return nc->va + offset;
}
-EXPORT_SYMBOL(page_frag_alloc_align);
+EXPORT_SYMBOL(__page_frag_alloc_align);
/*
* Frees a page fragment allocated out of either a compound or order 0 page.
@@ -4803,8 +4819,8 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order,
struct page *page = virt_to_page((void *)addr);
struct page *last = page + nr;
- split_page_owner(page, 1 << order);
- split_page_memcg(page, 1 << order);
+ split_page_owner(page, order, 0);
+ split_page_memcg(page, order, 0);
while (page < --last)
set_page_refcounted(last);
@@ -5574,37 +5590,34 @@ static void zone_pcp_update(struct zone *zone, int cpu_online)
mutex_unlock(&pcp_batch_high_lock);
}
-static void zone_pcp_update_cacheinfo(struct zone *zone)
+static void zone_pcp_update_cacheinfo(struct zone *zone, unsigned int cpu)
{
- int cpu;
struct per_cpu_pages *pcp;
struct cpu_cacheinfo *cci;
- for_each_online_cpu(cpu) {
- pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
- cci = get_cpu_cacheinfo(cpu);
- /*
- * If data cache slice of CPU is large enough, "pcp->batch"
- * pages can be preserved in PCP before draining PCP for
- * consecutive high-order pages freeing without allocation.
- * This can reduce zone lock contention without hurting
- * cache-hot pages sharing.
- */
- spin_lock(&pcp->lock);
- if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
- pcp->flags |= PCPF_FREE_HIGH_BATCH;
- else
- pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
- spin_unlock(&pcp->lock);
- }
+ pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu);
+ cci = get_cpu_cacheinfo(cpu);
+ /*
+ * If data cache slice of CPU is large enough, "pcp->batch"
+ * pages can be preserved in PCP before draining PCP for
+ * consecutive high-order pages freeing without allocation.
+ * This can reduce zone lock contention without hurting
+ * cache-hot pages sharing.
+ */
+ spin_lock(&pcp->lock);
+ if ((cci->per_cpu_data_slice_size >> PAGE_SHIFT) > 3 * pcp->batch)
+ pcp->flags |= PCPF_FREE_HIGH_BATCH;
+ else
+ pcp->flags &= ~PCPF_FREE_HIGH_BATCH;
+ spin_unlock(&pcp->lock);
}
-void setup_pcp_cacheinfo(void)
+void setup_pcp_cacheinfo(unsigned int cpu)
{
struct zone *zone;
for_each_populated_zone(zone)
- zone_pcp_update_cacheinfo(zone);
+ zone_pcp_update_cacheinfo(zone, cpu);
}
/*
@@ -5847,7 +5860,7 @@ static void __setup_per_zone_wmarks(void)
spin_lock_irqsave(&zone->lock, flags);
tmp = (u64)pages_min * zone_managed_pages(zone);
- do_div(tmp, lowmem_pages);
+ tmp = div64_ul(tmp, lowmem_pages);
if (is_highmem(zone) || zone_idx(zone) == ZONE_MOVABLE) {
/*
* __GFP_HIGH and PF_MEMALLOC allocations usually don't
@@ -6221,9 +6234,14 @@ static void alloc_contig_dump_pages(struct list_head *page_list)
}
}
-/* [start, end) must belong to a single zone. */
+/*
+ * [start, end) must belong to a single zone.
+ * @migratetype: using migratetype to filter the type of migration in
+ * trace_mm_alloc_contig_migrate_range_info.
+ */
int __alloc_contig_migrate_range(struct compact_control *cc,
- unsigned long start, unsigned long end)
+ unsigned long start, unsigned long end,
+ int migratetype)
{
/* This function is based on compact_zone() from compaction.c. */
unsigned int nr_reclaimed;
@@ -6234,6 +6252,10 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
.nid = zone_to_nid(cc->zone),
.gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL,
};
+ struct page *page;
+ unsigned long total_mapped = 0;
+ unsigned long total_migrated = 0;
+ unsigned long total_reclaimed = 0;
lru_cache_disable();
@@ -6259,9 +6281,18 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
&cc->migratepages);
cc->nr_migratepages -= nr_reclaimed;
+ if (trace_mm_alloc_contig_migrate_range_info_enabled()) {
+ total_reclaimed += nr_reclaimed;
+ list_for_each_entry(page, &cc->migratepages, lru)
+ total_mapped += page_mapcount(page);
+ }
+
ret = migrate_pages(&cc->migratepages, alloc_migration_target,
NULL, (unsigned long)&mtc, cc->mode, MR_CONTIG_RANGE, NULL);
+ if (trace_mm_alloc_contig_migrate_range_info_enabled() && !ret)
+ total_migrated += cc->nr_migratepages;
+
/*
* On -ENOMEM, migrate_pages() bails out right away. It is pointless
* to retry again over this error, so do the same here.
@@ -6275,9 +6306,13 @@ int __alloc_contig_migrate_range(struct compact_control *cc,
if (!(cc->gfp_mask & __GFP_NOWARN) && ret == -EBUSY)
alloc_contig_dump_pages(&cc->migratepages);
putback_movable_pages(&cc->migratepages);
- return ret;
}
- return 0;
+
+ trace_mm_alloc_contig_migrate_range_info(start, end, migratetype,
+ total_migrated,
+ total_reclaimed,
+ total_mapped);
+ return (ret < 0) ? ret : 0;
}
/**
@@ -6357,7 +6392,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
* allocated. So, if we fall through be sure to clear ret so that
* -EBUSY is not accidentally used or returned to caller.
*/
- ret = __alloc_contig_migrate_range(&cc, start, end);
+ ret = __alloc_contig_migrate_range(&cc, start, end, migratetype);
if (ret && ret != -EBUSY)
goto done;
ret = 0;