diff options
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 893 |
1 files changed, 494 insertions, 399 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 00fafda76b..df2c442f1c 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -54,6 +54,7 @@ #include <linux/khugepaged.h> #include <linux/delayacct.h> #include <linux/cacheinfo.h> +#include <linux/pgalloc_tag.h> #include <asm/div64.h> #include "internal.h" #include "shuffle.h" @@ -206,24 +207,6 @@ EXPORT_SYMBOL(node_states); gfp_t gfp_allowed_mask __read_mostly = GFP_BOOT_MASK; -/* - * A cached value of the page's pageblock's migratetype, used when the page is - * put on a pcplist. Used to avoid the pageblock migratetype lookup when - * freeing from pcplists in most cases, at the cost of possibly becoming stale. - * Also the migratetype set in the page does not necessarily match the pcplist - * index, e.g. page might have MIGRATE_CMA set but be on a pcplist with any - * other index - this ensures that it will be put on the correct CMA freelist. - */ -static inline int get_pcppage_migratetype(struct page *page) -{ - return page->index; -} - -static inline void set_pcppage_migratetype(struct page *page, int migratetype) -{ - page->index = migratetype; -} - #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE unsigned int pageblock_order __read_mostly; #endif @@ -332,7 +315,7 @@ static inline bool deferred_pages_enabled(void) static bool __ref _deferred_grow_zone(struct zone *zone, unsigned int order) { - return deferred_grow_zone(zone, order); + return deferred_grow_zone(zone, order); } #else static inline bool deferred_pages_enabled(void) @@ -525,7 +508,7 @@ static inline unsigned int order_to_pindex(int migratetype, int order) #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (order > PAGE_ALLOC_COSTLY_ORDER) { - VM_BUG_ON(order != pageblock_order); + VM_BUG_ON(order != HPAGE_PMD_ORDER); movable = migratetype == MIGRATE_MOVABLE; @@ -544,7 +527,7 @@ static inline int pindex_to_order(unsigned int pindex) #ifdef CONFIG_TRANSPARENT_HUGEPAGE if (pindex >= NR_LOWORDER_PCP_LISTS) - order = pageblock_order; + order = HPAGE_PMD_ORDER; #else VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER); #endif @@ -557,20 +540,12 @@ static inline bool pcp_allowed_order(unsigned int order) if (order <= PAGE_ALLOC_COSTLY_ORDER) return true; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (order == pageblock_order) + if (order == HPAGE_PMD_ORDER) return true; #endif return false; } -static inline void free_the_page(struct page *page, unsigned int order) -{ - if (pcp_allowed_order(order)) /* Via pcp? */ - free_unref_page(page, order); - else - __free_pages_ok(page, order, FPI_NONE); -} - /* * Higher-order pages are called "compound pages". They are structured thusly: * @@ -595,20 +570,6 @@ void prep_compound_page(struct page *page, unsigned int order) prep_compound_head(page, order); } -void destroy_large_folio(struct folio *folio) -{ - if (folio_test_hugetlb(folio)) { - free_huge_folio(folio); - return; - } - - if (folio_test_large_rmappable(folio)) - folio_undo_large_rmappable(folio); - - mem_cgroup_uncharge(folio); - free_the_page(&folio->page, folio_order(folio)); -} - static inline void set_buddy_order(struct page *page, unsigned int order) { set_page_private(page, order); @@ -639,12 +600,14 @@ compaction_capture(struct capture_control *capc, struct page *page, return false; /* - * Do not let lower order allocations pollute a movable pageblock. + * Do not let lower order allocations pollute a movable pageblock + * unless compaction is also requesting movable pages. * This might let an unmovable request use a reclaimable pageblock * and vice-versa but no more than normal fallback logic which can * have trouble finding a high-order free page. */ - if (order < pageblock_order && migratetype == MIGRATE_MOVABLE) + if (order < pageblock_order && migratetype == MIGRATE_MOVABLE && + capc->cc->migratetype != MIGRATE_MOVABLE) return false; capc->page = page; @@ -665,23 +628,33 @@ compaction_capture(struct capture_control *capc, struct page *page, } #endif /* CONFIG_COMPACTION */ -/* Used for pages not on another list */ -static inline void add_to_free_list(struct page *page, struct zone *zone, - unsigned int order, int migratetype) +static inline void account_freepages(struct zone *zone, int nr_pages, + int migratetype) { - struct free_area *area = &zone->free_area[order]; + if (is_migrate_isolate(migratetype)) + return; - list_add(&page->buddy_list, &area->free_list[migratetype]); - area->nr_free++; + __mod_zone_page_state(zone, NR_FREE_PAGES, nr_pages); + + if (is_migrate_cma(migratetype)) + __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, nr_pages); } /* Used for pages not on another list */ -static inline void add_to_free_list_tail(struct page *page, struct zone *zone, - unsigned int order, int migratetype) +static inline void __add_to_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype, + bool tail) { struct free_area *area = &zone->free_area[order]; - list_add_tail(&page->buddy_list, &area->free_list[migratetype]); + VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, + "page type is %lu, passed migratetype is %d (nr=%d)\n", + get_pageblock_migratetype(page), migratetype, 1 << order); + + if (tail) + list_add_tail(&page->buddy_list, &area->free_list[migratetype]); + else + list_add(&page->buddy_list, &area->free_list[migratetype]); area->nr_free++; } @@ -691,16 +664,28 @@ static inline void add_to_free_list_tail(struct page *page, struct zone *zone, * allocation again (e.g., optimization for memory onlining). */ static inline void move_to_free_list(struct page *page, struct zone *zone, - unsigned int order, int migratetype) + unsigned int order, int old_mt, int new_mt) { struct free_area *area = &zone->free_area[order]; - list_move_tail(&page->buddy_list, &area->free_list[migratetype]); + /* Free page moving can fail, so it happens before the type update */ + VM_WARN_ONCE(get_pageblock_migratetype(page) != old_mt, + "page type is %lu, passed migratetype is %d (nr=%d)\n", + get_pageblock_migratetype(page), old_mt, 1 << order); + + list_move_tail(&page->buddy_list, &area->free_list[new_mt]); + + account_freepages(zone, -(1 << order), old_mt); + account_freepages(zone, 1 << order, new_mt); } -static inline void del_page_from_free_list(struct page *page, struct zone *zone, - unsigned int order) +static inline void __del_page_from_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype) { + VM_WARN_ONCE(get_pageblock_migratetype(page) != migratetype, + "page type is %lu, passed migratetype is %d (nr=%d)\n", + get_pageblock_migratetype(page), migratetype, 1 << order); + /* clear reported state and update reported page count */ if (page_reported(page)) __ClearPageReported(page); @@ -711,6 +696,13 @@ static inline void del_page_from_free_list(struct page *page, struct zone *zone, zone->free_area[order].nr_free--; } +static inline void del_page_from_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype) +{ + __del_page_from_free_list(page, zone, order, migratetype); + account_freepages(zone, -(1 << order), migratetype); +} + static inline struct page *get_page_from_free_area(struct free_area *area, int migratetype) { @@ -782,16 +774,16 @@ static inline void __free_one_page(struct page *page, VM_BUG_ON_PAGE(page->flags & PAGE_FLAGS_CHECK_AT_PREP, page); VM_BUG_ON(migratetype == -1); - if (likely(!is_migrate_isolate(migratetype))) - __mod_zone_freepage_state(zone, 1 << order, migratetype); - VM_BUG_ON_PAGE(pfn & ((1 << order) - 1), page); VM_BUG_ON_PAGE(bad_range(zone, page), page); + account_freepages(zone, 1 << order, migratetype); + while (order < MAX_PAGE_ORDER) { + int buddy_mt = migratetype; + if (compaction_capture(capc, page, order, migratetype)) { - __mod_zone_freepage_state(zone, -(1 << order), - migratetype); + account_freepages(zone, -(1 << order), migratetype); return; } @@ -806,11 +798,11 @@ static inline void __free_one_page(struct page *page, * pageblock isolation could cause incorrect freepage or CMA * accounting or HIGHATOMIC accounting. */ - int buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn); + buddy_mt = get_pfnblock_migratetype(buddy, buddy_pfn); - if (migratetype != buddy_mt - && (!migratetype_is_mergeable(migratetype) || - !migratetype_is_mergeable(buddy_mt))) + if (migratetype != buddy_mt && + (!migratetype_is_mergeable(migratetype) || + !migratetype_is_mergeable(buddy_mt))) goto done_merging; } @@ -819,9 +811,19 @@ static inline void __free_one_page(struct page *page, * merge with it and move up one order. */ if (page_is_guard(buddy)) - clear_page_guard(zone, buddy, order, migratetype); + clear_page_guard(zone, buddy, order); else - del_page_from_free_list(buddy, zone, order); + __del_page_from_free_list(buddy, zone, order, buddy_mt); + + if (unlikely(buddy_mt != migratetype)) { + /* + * Match buddy type. This ensures that an + * expand() down the line puts the sub-blocks + * on the right freelists. + */ + set_pageblock_migratetype(buddy, migratetype); + } + combined_pfn = buddy_pfn & pfn; page = page + (combined_pfn - pfn); pfn = combined_pfn; @@ -838,74 +840,13 @@ done_merging: else to_tail = buddy_merge_likely(pfn, buddy_pfn, page, order); - if (to_tail) - add_to_free_list_tail(page, zone, order, migratetype); - else - add_to_free_list(page, zone, order, migratetype); + __add_to_free_list(page, zone, order, migratetype, to_tail); /* Notify page reporting subsystem of freed page */ if (!(fpi_flags & FPI_SKIP_REPORT_NOTIFY)) page_reporting_notify_free(order); } -/** - * split_free_page() -- split a free page at split_pfn_offset - * @free_page: the original free page - * @order: the order of the page - * @split_pfn_offset: split offset within the page - * - * Return -ENOENT if the free page is changed, otherwise 0 - * - * It is used when the free page crosses two pageblocks with different migratetypes - * at split_pfn_offset within the page. The split free page will be put into - * separate migratetype lists afterwards. Otherwise, the function achieves - * nothing. - */ -int split_free_page(struct page *free_page, - unsigned int order, unsigned long split_pfn_offset) -{ - struct zone *zone = page_zone(free_page); - unsigned long free_page_pfn = page_to_pfn(free_page); - unsigned long pfn; - unsigned long flags; - int free_page_order; - int mt; - int ret = 0; - - if (split_pfn_offset == 0) - return ret; - - spin_lock_irqsave(&zone->lock, flags); - - if (!PageBuddy(free_page) || buddy_order(free_page) != order) { - ret = -ENOENT; - goto out; - } - - mt = get_pfnblock_migratetype(free_page, free_page_pfn); - if (likely(!is_migrate_isolate(mt))) - __mod_zone_freepage_state(zone, -(1UL << order), mt); - - del_page_from_free_list(free_page, zone, order); - for (pfn = free_page_pfn; - pfn < free_page_pfn + (1UL << order);) { - int mt = get_pfnblock_migratetype(pfn_to_page(pfn), pfn); - - free_page_order = min_t(unsigned int, - pfn ? __ffs(pfn) : order, - __fls(split_pfn_offset)); - __free_one_page(pfn_to_page(pfn), pfn, zone, free_page_order, - mt, FPI_NONE); - pfn += 1UL << free_page_order; - split_pfn_offset -= (1UL << free_page_order); - /* we have done the first part, now switch to second part */ - if (split_pfn_offset == 0) - split_pfn_offset = (1UL << order) - (pfn - free_page_pfn); - } -out: - spin_unlock_irqrestore(&zone->lock, flags); - return ret; -} /* * A bad page could be due to a number of fields. Instead of multiple branches, * try and check multiple fields with one check. The caller must do a detailed @@ -1001,6 +942,10 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) bad_page(page, "nonzero entire_mapcount"); goto out; } + if (unlikely(folio_large_mapcount(folio))) { + bad_page(page, "nonzero large_mapcount"); + goto out; + } if (unlikely(atomic_read(&folio->_nr_pages_mapped))) { bad_page(page, "nonzero nr_pages_mapped"); goto out; @@ -1011,10 +956,11 @@ static int free_tail_page_prepare(struct page *head_page, struct page *page) } break; case 2: - /* - * the second tail page: ->mapping is - * deferred_list.next -- ignore value. - */ + /* the second tail page: deferred_list overlaps ->mapping */ + if (unlikely(!list_empty(&folio->_deferred_list))) { + bad_page(page, "on deferred list"); + goto out; + } break; default: if (page->mapping != TAIL_MAPPING) { @@ -1106,6 +1052,7 @@ __always_inline bool free_pages_prepare(struct page *page, /* Do not let hwpoison pages hit pcplists/buddy */ reset_page_owner(page, order); page_table_check_free(page, order); + pgalloc_tag_sub(page, 1 << order); return false; } @@ -1145,6 +1092,7 @@ __always_inline bool free_pages_prepare(struct page *page, page->flags &= ~PAGE_FLAGS_CHECK_AT_PREP; reset_page_owner(page, order); page_table_check_free(page, order); + pgalloc_tag_sub(page, 1 << order); if (!PageHighMem(page)) { debug_check_no_locks_freed(page_address(page), @@ -1196,7 +1144,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, { unsigned long flags; unsigned int order; - bool isolated_pageblocks; struct page *page; /* @@ -1209,7 +1156,6 @@ static void free_pcppages_bulk(struct zone *zone, int count, pindex = pindex - 1; spin_lock_irqsave(&zone->lock, flags); - isolated_pageblocks = has_isolate_pageblock(zone); while (count > 0) { struct list_head *list; @@ -1225,23 +1171,19 @@ static void free_pcppages_bulk(struct zone *zone, int count, order = pindex_to_order(pindex); nr_pages = 1 << order; do { + unsigned long pfn; int mt; page = list_last_entry(list, struct page, pcp_list); - mt = get_pcppage_migratetype(page); + pfn = page_to_pfn(page); + mt = get_pfnblock_migratetype(page, pfn); /* must delete to avoid corrupting pcp list */ list_del(&page->pcp_list); count -= nr_pages; pcp->count -= nr_pages; - /* MIGRATE_ISOLATE page should not go to pcplists */ - VM_BUG_ON_PAGE(is_migrate_isolate(mt), page); - /* Pageblock could have been isolated meanwhile */ - if (unlikely(isolated_pageblocks)) - mt = get_pageblock_migratetype(page); - - __free_one_page(page, page_to_pfn(page), zone, order, mt, FPI_NONE); + __free_one_page(page, pfn, zone, order, mt, FPI_NONE); trace_mm_page_pcpu_drain(page, order, mt); } while (count > 0 && !list_empty(list)); } @@ -1249,18 +1191,15 @@ static void free_pcppages_bulk(struct zone *zone, int count, spin_unlock_irqrestore(&zone->lock, flags); } -static void free_one_page(struct zone *zone, - struct page *page, unsigned long pfn, - unsigned int order, - int migratetype, fpi_t fpi_flags) +static void free_one_page(struct zone *zone, struct page *page, + unsigned long pfn, unsigned int order, + fpi_t fpi_flags) { unsigned long flags; + int migratetype; spin_lock_irqsave(&zone->lock, flags); - if (unlikely(has_isolate_pageblock(zone) || - is_migrate_isolate(migratetype))) { - migratetype = get_pfnblock_migratetype(page, pfn); - } + migratetype = get_pfnblock_migratetype(page, pfn); __free_one_page(page, pfn, zone, order, migratetype, fpi_flags); spin_unlock_irqrestore(&zone->lock, flags); } @@ -1268,21 +1207,13 @@ static void free_one_page(struct zone *zone, static void __free_pages_ok(struct page *page, unsigned int order, fpi_t fpi_flags) { - int migratetype; unsigned long pfn = page_to_pfn(page); struct zone *zone = page_zone(page); if (!free_pages_prepare(page, order)) return; - /* - * Calling get_pfnblock_migratetype() without spin_lock_irqsave() here - * is used to avoid calling get_pfnblock_migratetype() under the lock. - * This will reduce the lock holding time. - */ - migratetype = get_pfnblock_migratetype(page, pfn); - - free_one_page(zone, page, pfn, order, migratetype, fpi_flags); + free_one_page(zone, page, pfn, order, fpi_flags); __count_vm_events(PGFREE, 1 << order); } @@ -1393,6 +1324,7 @@ static inline void expand(struct zone *zone, struct page *page, int low, int high, int migratetype) { unsigned long size = 1 << high; + unsigned long nr_added = 0; while (high > low) { high--; @@ -1405,12 +1337,14 @@ static inline void expand(struct zone *zone, struct page *page, * Corresponding page table entries will not be touched, * pages will stay not present in virtual address space */ - if (set_page_guard(zone, &page[size], high, migratetype)) + if (set_page_guard(zone, &page[size], high)) continue; - add_to_free_list(&page[size], zone, high, migratetype); + __add_to_free_list(&page[size], zone, high, migratetype, false); set_buddy_order(&page[size], high); + nr_added += size; } + account_freepages(zone, nr_added, migratetype); } static void check_new_page_bad(struct page *page) @@ -1538,6 +1472,7 @@ inline void post_alloc_hook(struct page *page, unsigned int order, set_page_owner(page, order, gfp_flags); page_table_check_alloc(page, order); + pgalloc_tag_add(page, current, 1 << order); } static void prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags, @@ -1578,9 +1513,8 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, page = get_page_from_free_area(area, migratetype); if (!page) continue; - del_page_from_free_list(page, zone, current_order); + del_page_from_free_list(page, zone, current_order, migratetype); expand(zone, page, order, current_order, migratetype); - set_pcppage_migratetype(page, migratetype); trace_mm_page_alloc_zone_locked(page, order, migratetype, pcp_allowed_order(order) && migratetype < MIGRATE_PCPTYPES); @@ -1597,7 +1531,7 @@ struct page *__rmqueue_smallest(struct zone *zone, unsigned int order, * * The other migratetypes do not have fallbacks. */ -static int fallbacks[MIGRATE_TYPES][MIGRATE_PCPTYPES - 1] = { +static int fallbacks[MIGRATE_PCPTYPES][MIGRATE_PCPTYPES - 1] = { [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE }, [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE }, [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE }, @@ -1615,30 +1549,23 @@ static inline struct page *__rmqueue_cma_fallback(struct zone *zone, #endif /* - * Move the free pages in a range to the freelist tail of the requested type. - * Note that start_page and end_pages are not aligned on a pageblock - * boundary. If alignment is required, use move_freepages_block() + * Change the type of a block and move all its free pages to that + * type's freelist. */ -static int move_freepages(struct zone *zone, - unsigned long start_pfn, unsigned long end_pfn, - int migratetype, int *num_movable) +static int __move_freepages_block(struct zone *zone, unsigned long start_pfn, + int old_mt, int new_mt) { struct page *page; - unsigned long pfn; + unsigned long pfn, end_pfn; unsigned int order; int pages_moved = 0; - for (pfn = start_pfn; pfn <= end_pfn;) { + VM_WARN_ON(start_pfn & (pageblock_nr_pages - 1)); + end_pfn = pageblock_end_pfn(start_pfn); + + for (pfn = start_pfn; pfn < end_pfn;) { page = pfn_to_page(pfn); if (!PageBuddy(page)) { - /* - * We assume that pages that could be isolated for - * migration are movable. But we don't actually try - * isolating, as that would be expensive. - */ - if (num_movable && - (PageLRU(page) || __PageMovable(page))) - (*num_movable)++; pfn++; continue; } @@ -1648,36 +1575,187 @@ static int move_freepages(struct zone *zone, VM_BUG_ON_PAGE(page_zone(page) != zone, page); order = buddy_order(page); - move_to_free_list(page, zone, order, migratetype); + + move_to_free_list(page, zone, order, old_mt, new_mt); + pfn += 1 << order; pages_moved += 1 << order; } + set_pageblock_migratetype(pfn_to_page(start_pfn), new_mt); + return pages_moved; } -int move_freepages_block(struct zone *zone, struct page *page, - int migratetype, int *num_movable) +static bool prep_move_freepages_block(struct zone *zone, struct page *page, + unsigned long *start_pfn, + int *num_free, int *num_movable) { - unsigned long start_pfn, end_pfn, pfn; + unsigned long pfn, start, end; + + pfn = page_to_pfn(page); + start = pageblock_start_pfn(pfn); + end = pageblock_end_pfn(pfn); - if (num_movable) + /* + * The caller only has the lock for @zone, don't touch ranges + * that straddle into other zones. While we could move part of + * the range that's inside the zone, this call is usually + * accompanied by other operations such as migratetype updates + * which also should be locked. + */ + if (!zone_spans_pfn(zone, start)) + return false; + if (!zone_spans_pfn(zone, end - 1)) + return false; + + *start_pfn = start; + + if (num_free) { + *num_free = 0; *num_movable = 0; + for (pfn = start; pfn < end;) { + page = pfn_to_page(pfn); + if (PageBuddy(page)) { + int nr = 1 << buddy_order(page); - pfn = page_to_pfn(page); - start_pfn = pageblock_start_pfn(pfn); - end_pfn = pageblock_end_pfn(pfn) - 1; + *num_free += nr; + pfn += nr; + continue; + } + /* + * We assume that pages that could be isolated for + * migration are movable. But we don't actually try + * isolating, as that would be expensive. + */ + if (PageLRU(page) || __PageMovable(page)) + (*num_movable)++; + pfn++; + } + } - /* Do not cross zone boundaries */ - if (!zone_spans_pfn(zone, start_pfn)) - start_pfn = pfn; - if (!zone_spans_pfn(zone, end_pfn)) - return 0; + return true; +} + +static int move_freepages_block(struct zone *zone, struct page *page, + int old_mt, int new_mt) +{ + unsigned long start_pfn; - return move_freepages(zone, start_pfn, end_pfn, migratetype, - num_movable); + if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL)) + return -1; + + return __move_freepages_block(zone, start_pfn, old_mt, new_mt); } +#ifdef CONFIG_MEMORY_ISOLATION +/* Look for a buddy that straddles start_pfn */ +static unsigned long find_large_buddy(unsigned long start_pfn) +{ + int order = 0; + struct page *page; + unsigned long pfn = start_pfn; + + while (!PageBuddy(page = pfn_to_page(pfn))) { + /* Nothing found */ + if (++order > MAX_PAGE_ORDER) + return start_pfn; + pfn &= ~0UL << order; + } + + /* + * Found a preceding buddy, but does it straddle? + */ + if (pfn + (1 << buddy_order(page)) > start_pfn) + return pfn; + + /* Nothing found */ + return start_pfn; +} + +/* Split a multi-block free page into its individual pageblocks */ +static void split_large_buddy(struct zone *zone, struct page *page, + unsigned long pfn, int order) +{ + unsigned long end_pfn = pfn + (1 << order); + + VM_WARN_ON_ONCE(order <= pageblock_order); + VM_WARN_ON_ONCE(pfn & (pageblock_nr_pages - 1)); + + /* Caller removed page from freelist, buddy info cleared! */ + VM_WARN_ON_ONCE(PageBuddy(page)); + + while (pfn != end_pfn) { + int mt = get_pfnblock_migratetype(page, pfn); + + __free_one_page(page, pfn, zone, pageblock_order, mt, FPI_NONE); + pfn += pageblock_nr_pages; + page = pfn_to_page(pfn); + } +} + +/** + * move_freepages_block_isolate - move free pages in block for page isolation + * @zone: the zone + * @page: the pageblock page + * @migratetype: migratetype to set on the pageblock + * + * This is similar to move_freepages_block(), but handles the special + * case encountered in page isolation, where the block of interest + * might be part of a larger buddy spanning multiple pageblocks. + * + * Unlike the regular page allocator path, which moves pages while + * stealing buddies off the freelist, page isolation is interested in + * arbitrary pfn ranges that may have overlapping buddies on both ends. + * + * This function handles that. Straddling buddies are split into + * individual pageblocks. Only the block of interest is moved. + * + * Returns %true if pages could be moved, %false otherwise. + */ +bool move_freepages_block_isolate(struct zone *zone, struct page *page, + int migratetype) +{ + unsigned long start_pfn, pfn; + + if (!prep_move_freepages_block(zone, page, &start_pfn, NULL, NULL)) + return false; + + /* No splits needed if buddies can't span multiple blocks */ + if (pageblock_order == MAX_PAGE_ORDER) + goto move; + + /* We're a tail block in a larger buddy */ + pfn = find_large_buddy(start_pfn); + if (pfn != start_pfn) { + struct page *buddy = pfn_to_page(pfn); + int order = buddy_order(buddy); + + del_page_from_free_list(buddy, zone, order, + get_pfnblock_migratetype(buddy, pfn)); + set_pageblock_migratetype(page, migratetype); + split_large_buddy(zone, buddy, pfn, order); + return true; + } + + /* We're the starting block of a larger buddy */ + if (PageBuddy(page) && buddy_order(page) > pageblock_order) { + int order = buddy_order(page); + + del_page_from_free_list(page, zone, order, + get_pfnblock_migratetype(page, pfn)); + set_pageblock_migratetype(page, migratetype); + split_large_buddy(zone, page, pfn, order); + return true; + } +move: + __move_freepages_block(zone, start_pfn, + get_pfnblock_migratetype(page, start_pfn), + migratetype); + return true; +} +#endif /* CONFIG_MEMORY_ISOLATION */ + static void change_pageblock_range(struct page *pageblock_page, int start_order, int migratetype) { @@ -1760,33 +1838,37 @@ static inline bool boost_watermark(struct zone *zone) } /* - * This function implements actual steal behaviour. If order is large enough, - * we can steal whole pageblock. If not, we first move freepages in this - * pageblock to our migratetype and determine how many already-allocated pages - * are there in the pageblock with a compatible migratetype. If at least half - * of pages are free or compatible, we can change migratetype of the pageblock - * itself, so pages freed in the future will be put on the correct free list. + * This function implements actual steal behaviour. If order is large enough, we + * can claim the whole pageblock for the requested migratetype. If not, we check + * the pageblock for constituent pages; if at least half of the pages are free + * or compatible, we can still claim the whole block, so pages freed in the + * future will be put on the correct free list. Otherwise, we isolate exactly + * the order we need from the fallback block and leave its migratetype alone. */ -static void steal_suitable_fallback(struct zone *zone, struct page *page, - unsigned int alloc_flags, int start_type, bool whole_block) +static struct page * +steal_suitable_fallback(struct zone *zone, struct page *page, + int current_order, int order, int start_type, + unsigned int alloc_flags, bool whole_block) { - unsigned int current_order = buddy_order(page); int free_pages, movable_pages, alike_pages; - int old_block_type; + unsigned long start_pfn; + int block_type; - old_block_type = get_pageblock_migratetype(page); + block_type = get_pageblock_migratetype(page); /* * This can happen due to races and we want to prevent broken * highatomic accounting. */ - if (is_migrate_highatomic(old_block_type)) + if (is_migrate_highatomic(block_type)) goto single_page; /* Take ownership for orders >= pageblock_order */ if (current_order >= pageblock_order) { + del_page_from_free_list(page, zone, current_order, block_type); change_pageblock_range(page, current_order, start_type); - goto single_page; + expand(zone, page, order, current_order, start_type); + return page; } /* @@ -1801,10 +1883,9 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, if (!whole_block) goto single_page; - free_pages = move_freepages_block(zone, page, start_type, - &movable_pages); /* moving whole block can fail due to zone boundary conditions */ - if (!free_pages) + if (!prep_move_freepages_block(zone, page, &start_pfn, &free_pages, + &movable_pages)) goto single_page; /* @@ -1822,7 +1903,7 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, * vice versa, be conservative since we can't distinguish the * exact migratetype of non-movable pages. */ - if (old_block_type == MIGRATE_MOVABLE) + if (block_type == MIGRATE_MOVABLE) alike_pages = pageblock_nr_pages - (free_pages + movable_pages); else @@ -1833,13 +1914,15 @@ static void steal_suitable_fallback(struct zone *zone, struct page *page, * compatible migratability as our allocation, claim the whole block. */ if (free_pages + alike_pages >= (1 << (pageblock_order-1)) || - page_group_by_mobility_disabled) - set_pageblock_migratetype(page, start_type); - - return; + page_group_by_mobility_disabled) { + __move_freepages_block(zone, start_pfn, block_type, start_type); + return __rmqueue_smallest(zone, order, start_type); + } single_page: - move_to_free_list(page, zone, current_order, start_type); + del_page_from_free_list(page, zone, current_order, block_type); + expand(zone, page, order, current_order, block_type); + return page; } /* @@ -1877,10 +1960,12 @@ int find_suitable_fallback(struct free_area *area, unsigned int order, } /* - * Reserve a pageblock for exclusive use of high-order atomic allocations if - * there are no empty page blocks that contain a page with a suitable order + * Reserve the pageblock(s) surrounding an allocation request for + * exclusive use of high-order atomic allocations if there are no + * empty page blocks that contain a page with a suitable order */ -static void reserve_highatomic_pageblock(struct page *page, struct zone *zone) +static void reserve_highatomic_pageblock(struct page *page, int order, + struct zone *zone) { int mt; unsigned long max_managed, flags; @@ -1906,10 +1991,16 @@ static void reserve_highatomic_pageblock(struct page *page, struct zone *zone) /* Yoink! */ mt = get_pageblock_migratetype(page); /* Only reserve normal pageblocks (i.e., they can merge with others) */ - if (migratetype_is_mergeable(mt)) { + if (!migratetype_is_mergeable(mt)) + goto out_unlock; + + if (order < pageblock_order) { + if (move_freepages_block(zone, page, mt, MIGRATE_HIGHATOMIC) == -1) + goto out_unlock; zone->nr_reserved_highatomic += pageblock_nr_pages; - set_pageblock_migratetype(page, MIGRATE_HIGHATOMIC); - move_freepages_block(zone, page, MIGRATE_HIGHATOMIC, NULL); + } else { + change_pageblock_range(page, order, MIGRATE_HIGHATOMIC); + zone->nr_reserved_highatomic += 1 << order; } out_unlock: @@ -1922,7 +2013,7 @@ out_unlock: * intense memory pressure but failed atomic allocations should be easier * to recover from than an OOM. * - * If @force is true, try to unreserve a pageblock even though highatomic + * If @force is true, try to unreserve pageblocks even though highatomic * pageblock is exhausted. */ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, @@ -1934,7 +2025,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, struct zone *zone; struct page *page; int order; - bool ret; + int ret; for_each_zone_zonelist_nodemask(zone, z, zonelist, ac->highest_zoneidx, ac->nodemask) { @@ -1949,11 +2040,13 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < NR_PAGE_ORDERS; order++) { struct free_area *area = &(zone->free_area[order]); + int mt; page = get_page_from_free_area(area, MIGRATE_HIGHATOMIC); if (!page) continue; + mt = get_pageblock_migratetype(page); /* * In page freeing path, migratetype change is racy so * we can counter several free pages in a pageblock @@ -1961,7 +2054,8 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * from highatomic to ac->migratetype. So we should * adjust the count once. */ - if (is_migrate_highatomic_page(page)) { + if (is_migrate_highatomic(mt)) { + unsigned long size; /* * It should never happen but changes to * locking could inadvertently allow a per-cpu @@ -1969,9 +2063,9 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * while unreserving so be safe and watch for * underflows. */ - zone->nr_reserved_highatomic -= min( - pageblock_nr_pages, - zone->nr_reserved_highatomic); + size = max(pageblock_nr_pages, 1UL << order); + size = min(size, zone->nr_reserved_highatomic); + zone->nr_reserved_highatomic -= size; } /* @@ -1983,10 +2077,22 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * of pageblocks that cannot be completely freed * may increase. */ - set_pageblock_migratetype(page, ac->migratetype); - ret = move_freepages_block(zone, page, ac->migratetype, - NULL); - if (ret) { + if (order < pageblock_order) + ret = move_freepages_block(zone, page, mt, + ac->migratetype); + else { + move_to_free_list(page, zone, order, mt, + ac->migratetype); + change_pageblock_range(page, order, + ac->migratetype); + ret = 1; + } + /* + * Reserving the block(s) already succeeded, + * so this should not fail on zone boundaries. + */ + WARN_ON_ONCE(ret == -1); + if (ret > 0) { spin_unlock_irqrestore(&zone->lock, flags); return ret; } @@ -2007,7 +2113,7 @@ static bool unreserve_highatomic_pageblock(const struct alloc_context *ac, * deviation from the rest of this file, to make the for loop * condition simpler. */ -static __always_inline bool +static __always_inline struct page * __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, unsigned int alloc_flags) { @@ -2054,7 +2160,7 @@ __rmqueue_fallback(struct zone *zone, int order, int start_migratetype, goto do_steal; } - return false; + return NULL; find_smallest: for (current_order = order; current_order < NR_PAGE_ORDERS; current_order++) { @@ -2074,14 +2180,14 @@ find_smallest: do_steal: page = get_page_from_free_area(area, fallback_mt); - steal_suitable_fallback(zone, page, alloc_flags, start_migratetype, - can_steal); + /* take off list, maybe claim block, expand remainder */ + page = steal_suitable_fallback(zone, page, current_order, order, + start_migratetype, alloc_flags, can_steal); trace_mm_page_alloc_extfrag(page, order, current_order, start_migratetype, fallback_mt); - return true; - + return page; } /* @@ -2108,15 +2214,15 @@ __rmqueue(struct zone *zone, unsigned int order, int migratetype, return page; } } -retry: + page = __rmqueue_smallest(zone, order, migratetype); if (unlikely(!page)) { if (alloc_flags & ALLOC_CMA) page = __rmqueue_cma_fallback(zone, order); - if (!page && __rmqueue_fallback(zone, order, migratetype, - alloc_flags)) - goto retry; + if (!page) + page = __rmqueue_fallback(zone, order, migratetype, + alloc_flags); } return page; } @@ -2151,12 +2257,7 @@ static int rmqueue_bulk(struct zone *zone, unsigned int order, * pages are ordered properly. */ list_add_tail(&page->pcp_list, list); - if (is_migrate_cma(get_pcppage_migratetype(page))) - __mod_zone_page_state(zone, NR_FREE_CMA_PAGES, - -(1 << order)); } - - __mod_zone_page_state(zone, NR_FREE_PAGES, -(i << order)); spin_unlock_irqrestore(&zone->lock, flags); return i; @@ -2221,14 +2322,21 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp) */ static void drain_pages_zone(unsigned int cpu, struct zone *zone) { - struct per_cpu_pages *pcp; + struct per_cpu_pages *pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); + int count; - pcp = per_cpu_ptr(zone->per_cpu_pageset, cpu); - if (pcp->count) { + do { spin_lock(&pcp->lock); - free_pcppages_bulk(zone, pcp->count, pcp, 0); + count = pcp->count; + if (count) { + int to_drain = min(count, + pcp->batch << CONFIG_PCP_BATCH_SCALE_MAX); + + free_pcppages_bulk(zone, to_drain, pcp, 0); + count -= to_drain; + } spin_unlock(&pcp->lock); - } + } while (count); } /* @@ -2344,19 +2452,6 @@ void drain_all_pages(struct zone *zone) __drain_all_pages(zone, false); } -static bool free_unref_page_prepare(struct page *page, unsigned long pfn, - unsigned int order) -{ - int migratetype; - - if (!free_pages_prepare(page, order)) - return false; - - migratetype = get_pfnblock_migratetype(page, pfn); - set_pcppage_migratetype(page, migratetype); - return true; -} - static int nr_pcp_free(struct per_cpu_pages *pcp, int batch, int high, bool free_high) { int min_nr_free, max_nr_free; @@ -2487,9 +2582,14 @@ void free_unref_page(struct page *page, unsigned int order) struct per_cpu_pages *pcp; struct zone *zone; unsigned long pfn = page_to_pfn(page); - int migratetype, pcpmigratetype; + int migratetype; - if (!free_unref_page_prepare(page, pfn, order)) + if (!pcp_allowed_order(order)) { + __free_pages_ok(page, order, FPI_NONE); + return; + } + + if (!free_pages_prepare(page, order)) return; /* @@ -2499,23 +2599,23 @@ void free_unref_page(struct page *page, unsigned int order) * get those areas back if necessary. Otherwise, we may have to free * excessively into the page allocator */ - migratetype = pcpmigratetype = get_pcppage_migratetype(page); + migratetype = get_pfnblock_migratetype(page, pfn); if (unlikely(migratetype >= MIGRATE_PCPTYPES)) { if (unlikely(is_migrate_isolate(migratetype))) { - free_one_page(page_zone(page), page, pfn, order, migratetype, FPI_NONE); + free_one_page(page_zone(page), page, pfn, order, FPI_NONE); return; } - pcpmigratetype = MIGRATE_MOVABLE; + migratetype = MIGRATE_MOVABLE; } zone = page_zone(page); pcp_trylock_prepare(UP_flags); pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (pcp) { - free_unref_page_commit(zone, pcp, page, pcpmigratetype, order); + free_unref_page_commit(zone, pcp, page, migratetype, order); pcp_spin_unlock(pcp); } else { - free_one_page(zone, page, pfn, order, migratetype, FPI_NONE); + free_one_page(zone, page, pfn, order, FPI_NONE); } pcp_trylock_finish(UP_flags); } @@ -2528,7 +2628,7 @@ void free_unref_folios(struct folio_batch *folios) unsigned long __maybe_unused UP_flags; struct per_cpu_pages *pcp = NULL; struct zone *locked_zone = NULL; - int i, j, migratetype; + int i, j; /* Prepare folios for freeing */ for (i = 0, j = 0; i < folios->nr; i++) { @@ -2538,18 +2638,15 @@ void free_unref_folios(struct folio_batch *folios) if (order > 0 && folio_test_large_rmappable(folio)) folio_undo_large_rmappable(folio); - if (!free_unref_page_prepare(&folio->page, pfn, order)) + if (!free_pages_prepare(&folio->page, order)) continue; - /* - * Free isolated folios and orders not handled on the PCP - * directly to the allocator, see comment in free_unref_page. + * Free orders not handled on the PCP directly to the + * allocator. */ - migratetype = get_pcppage_migratetype(&folio->page); - if (!pcp_allowed_order(order) || - is_migrate_isolate(migratetype)) { - free_one_page(folio_zone(folio), &folio->page, pfn, - order, migratetype, FPI_NONE); + if (!pcp_allowed_order(order)) { + free_one_page(folio_zone(folio), &folio->page, + pfn, order, FPI_NONE); continue; } folio->private = (void *)(unsigned long)order; @@ -2562,16 +2659,31 @@ void free_unref_folios(struct folio_batch *folios) for (i = 0; i < folios->nr; i++) { struct folio *folio = folios->folios[i]; struct zone *zone = folio_zone(folio); + unsigned long pfn = folio_pfn(folio); unsigned int order = (unsigned long)folio->private; + int migratetype; folio->private = NULL; - migratetype = get_pcppage_migratetype(&folio->page); + migratetype = get_pfnblock_migratetype(&folio->page, pfn); /* Different zone requires a different pcp lock */ - if (zone != locked_zone) { + if (zone != locked_zone || + is_migrate_isolate(migratetype)) { if (pcp) { pcp_spin_unlock(pcp); pcp_trylock_finish(UP_flags); + locked_zone = NULL; + pcp = NULL; + } + + /* + * Free isolated pages directly to the + * allocator, see comment in free_unref_page. + */ + if (is_migrate_isolate(migratetype)) { + free_one_page(zone, &folio->page, pfn, + order, FPI_NONE); + continue; } /* @@ -2582,10 +2694,8 @@ void free_unref_folios(struct folio_batch *folios) pcp = pcp_spin_trylock(zone->per_cpu_pageset); if (unlikely(!pcp)) { pcp_trylock_finish(UP_flags); - free_one_page(zone, &folio->page, - folio_pfn(folio), order, - migratetype, FPI_NONE); - locked_zone = NULL; + free_one_page(zone, &folio->page, pfn, + order, FPI_NONE); continue; } locked_zone = zone; @@ -2628,6 +2738,7 @@ void split_page(struct page *page, unsigned int order) for (i = 1; i < (1 << order); i++) set_page_refcounted(page + i); split_page_owner(page, order, 0); + pgalloc_tag_split(page, 1 << order); split_page_memcg(page, order, 0); } EXPORT_SYMBOL_GPL(split_page); @@ -2648,11 +2759,9 @@ int __isolate_free_page(struct page *page, unsigned int order) watermark = zone->_watermark[WMARK_MIN] + (1UL << order); if (!zone_watermark_ok(zone, 0, watermark, 0, ALLOC_CMA)) return 0; - - __mod_zone_freepage_state(zone, -(1UL << order), mt); } - del_page_from_free_list(page, zone, order); + del_page_from_free_list(page, zone, order, mt); /* * Set the pageblock if the isolated page is at least half of a @@ -2667,8 +2776,8 @@ int __isolate_free_page(struct page *page, unsigned int order) * with others) */ if (migratetype_is_mergeable(mt)) - set_pageblock_migratetype(page, - MIGRATE_MOVABLE); + move_freepages_block(zone, page, mt, + MIGRATE_MOVABLE); } } @@ -2752,8 +2861,6 @@ struct page *rmqueue_buddy(struct zone *preferred_zone, struct zone *zone, return NULL; } } - __mod_zone_freepage_state(zone, -(1 << order), - get_pcppage_migratetype(page)); spin_unlock_irqrestore(&zone->lock, flags); } while (check_new_pages(page, order)); @@ -3326,7 +3433,7 @@ try_this_zone: * if the pageblock should be reserved for the future */ if (unlikely(alloc_flags & ALLOC_HIGHATOMIC)) - reserve_highatomic_pageblock(page, zone); + reserve_highatomic_pageblock(page, order, zone); return page; } else { @@ -4389,7 +4496,7 @@ static inline bool prepare_alloc_pages(gfp_t gfp_mask, unsigned int order, * * Returns the number of pages on the list or array. */ -unsigned long __alloc_pages_bulk(gfp_t gfp, int preferred_nid, +unsigned long alloc_pages_bulk_noprof(gfp_t gfp, int preferred_nid, nodemask_t *nodemask, int nr_pages, struct list_head *page_list, struct page **page_array) @@ -4525,7 +4632,7 @@ failed_irq: pcp_trylock_finish(UP_flags); failed: - page = __alloc_pages(gfp, 0, preferred_nid, nodemask); + page = __alloc_pages_noprof(gfp, 0, preferred_nid, nodemask); if (page) { if (page_list) list_add(&page->lru, page_list); @@ -4536,13 +4643,13 @@ failed: goto out; } -EXPORT_SYMBOL_GPL(__alloc_pages_bulk); +EXPORT_SYMBOL_GPL(alloc_pages_bulk_noprof); /* * This is the 'heart' of the zoned buddy allocator. */ -struct page *__alloc_pages(gfp_t gfp, unsigned int order, int preferred_nid, - nodemask_t *nodemask) +struct page *__alloc_pages_noprof(gfp_t gfp, unsigned int order, + int preferred_nid, nodemask_t *nodemask) { struct page *page; unsigned int alloc_flags = ALLOC_WMARK_LOW; @@ -4604,38 +4711,38 @@ out: return page; } -EXPORT_SYMBOL(__alloc_pages); +EXPORT_SYMBOL(__alloc_pages_noprof); -struct folio *__folio_alloc(gfp_t gfp, unsigned int order, int preferred_nid, +struct folio *__folio_alloc_noprof(gfp_t gfp, unsigned int order, int preferred_nid, nodemask_t *nodemask) { - struct page *page = __alloc_pages(gfp | __GFP_COMP, order, + struct page *page = __alloc_pages_noprof(gfp | __GFP_COMP, order, preferred_nid, nodemask); return page_rmappable_folio(page); } -EXPORT_SYMBOL(__folio_alloc); +EXPORT_SYMBOL(__folio_alloc_noprof); /* * Common helper functions. Never use with __GFP_HIGHMEM because the returned * address cannot represent highmem pages. Use alloc_pages and then kmap if * you need to access high mem. */ -unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order) +unsigned long get_free_pages_noprof(gfp_t gfp_mask, unsigned int order) { struct page *page; - page = alloc_pages(gfp_mask & ~__GFP_HIGHMEM, order); + page = alloc_pages_noprof(gfp_mask & ~__GFP_HIGHMEM, order); if (!page) return 0; return (unsigned long) page_address(page); } -EXPORT_SYMBOL(__get_free_pages); +EXPORT_SYMBOL(get_free_pages_noprof); -unsigned long get_zeroed_page(gfp_t gfp_mask) +unsigned long get_zeroed_page_noprof(gfp_t gfp_mask) { - return __get_free_page(gfp_mask | __GFP_ZERO); + return get_free_pages_noprof(gfp_mask | __GFP_ZERO, 0); } -EXPORT_SYMBOL(get_zeroed_page); +EXPORT_SYMBOL(get_zeroed_page_noprof); /** * __free_pages - Free pages allocated with alloc_pages(). @@ -4661,12 +4768,15 @@ void __free_pages(struct page *page, unsigned int order) { /* get PageHead before we drop reference */ int head = PageHead(page); + struct alloc_tag *tag = pgalloc_tag_get(page); if (put_page_testzero(page)) - free_the_page(page, order); - else if (!head) + free_unref_page(page, order); + else if (!head) { + pgalloc_tag_sub_pages(tag, (1 << order) - 1); while (order-- > 0) - free_the_page(page + (1 << order), order); + free_unref_page(page + (1 << order), order); + } } EXPORT_SYMBOL(__free_pages); @@ -4727,7 +4837,7 @@ void __page_frag_cache_drain(struct page *page, unsigned int count) VM_BUG_ON_PAGE(page_ref_count(page) == 0, page); if (page_ref_sub_and_test(page, count)) - free_the_page(page, compound_order(page)); + free_unref_page(page, compound_order(page)); } EXPORT_SYMBOL(__page_frag_cache_drain); @@ -4768,7 +4878,7 @@ refill: goto refill; if (unlikely(nc->pfmemalloc)) { - free_the_page(page, compound_order(page)); + free_unref_page(page, compound_order(page)); goto refill; } @@ -4812,7 +4922,7 @@ void page_frag_free(void *addr) struct page *page = virt_to_head_page(addr); if (unlikely(put_page_testzero(page))) - free_the_page(page, compound_order(page)); + free_unref_page(page, compound_order(page)); } EXPORT_SYMBOL(page_frag_free); @@ -4825,6 +4935,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order, struct page *last = page + nr; split_page_owner(page, order, 0); + pgalloc_tag_split(page, 1 << order); split_page_memcg(page, order, 0); while (page < --last) set_page_refcounted(last); @@ -4851,7 +4962,7 @@ static void *make_alloc_exact(unsigned long addr, unsigned int order, * * Return: pointer to the allocated area or %NULL in case of error. */ -void *alloc_pages_exact(size_t size, gfp_t gfp_mask) +void *alloc_pages_exact_noprof(size_t size, gfp_t gfp_mask) { unsigned int order = get_order(size); unsigned long addr; @@ -4859,10 +4970,10 @@ void *alloc_pages_exact(size_t size, gfp_t gfp_mask) if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM); - addr = __get_free_pages(gfp_mask, order); + addr = get_free_pages_noprof(gfp_mask, order); return make_alloc_exact(addr, order, size); } -EXPORT_SYMBOL(alloc_pages_exact); +EXPORT_SYMBOL(alloc_pages_exact_noprof); /** * alloc_pages_exact_nid - allocate an exact number of physically-contiguous @@ -4876,7 +4987,7 @@ EXPORT_SYMBOL(alloc_pages_exact); * * Return: pointer to the allocated area or %NULL in case of error. */ -void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) +void * __meminit alloc_pages_exact_nid_noprof(int nid, size_t size, gfp_t gfp_mask) { unsigned int order = get_order(size); struct page *p; @@ -4884,7 +4995,7 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask) if (WARN_ON_ONCE(gfp_mask & (__GFP_COMP | __GFP_HIGHMEM))) gfp_mask &= ~(__GFP_COMP | __GFP_HIGHMEM); - p = alloc_pages_node(nid, gfp_mask, order); + p = alloc_pages_node_noprof(nid, gfp_mask, order); if (!p) return NULL; return make_alloc_exact((unsigned long)page_address(p), order, size); @@ -5185,37 +5296,13 @@ static void setup_min_slab_ratio(void); static void build_zonelists(pg_data_t *pgdat) { - int node, local_node; struct zoneref *zonerefs; int nr_zones; - local_node = pgdat->node_id; - zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs; nr_zones = build_zonerefs_node(pgdat, zonerefs); zonerefs += nr_zones; - /* - * Now we build the zonelist so that it contains the zones - * of all the other nodes. - * We don't want to pressure a particular node, so when - * building the zones for node N, we make sure that the - * zones coming right after the local ones are those from - * node N+1 (modulo N) - */ - for (node = local_node + 1; node < MAX_NUMNODES; node++) { - if (!node_online(node)) - continue; - nr_zones = build_zonerefs_node(NODE_DATA(node), zonerefs); - zonerefs += nr_zones; - } - for (node = 0; node < local_node; node++) { - if (!node_online(node)) - continue; - nr_zones = build_zonerefs_node(NODE_DATA(node), zonerefs); - zonerefs += nr_zones; - } - zonerefs->zone = NULL; zonerefs->zone_idx = 0; } @@ -5722,6 +5809,23 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char return pages; } +void free_reserved_page(struct page *page) +{ + if (mem_alloc_profiling_enabled()) { + union codetag_ref *ref = get_page_tag_ref(page); + + if (ref) { + set_codetag_empty(ref); + put_page_tag_ref(ref); + } + } + ClearPageReserved(page); + init_page_count(page); + __free_page(page); + adjust_managed_page_count(page, 1); +} +EXPORT_SYMBOL(free_reserved_page); + static int page_alloc_cpu_dead(unsigned int cpu) { struct zone *zone; @@ -5832,10 +5936,11 @@ static void setup_per_zone_lowmem_reserve(void) for (j = i + 1; j < MAX_NR_ZONES; j++) { struct zone *upper_zone = &pgdat->node_zones[j]; + bool empty = !zone_managed_pages(upper_zone); managed_pages += zone_managed_pages(upper_zone); - if (clear) + if (clear || empty) zone->lowmem_reserve[j] = 0; else zone->lowmem_reserve[j] = managed_pages / ratio; @@ -6216,7 +6321,6 @@ static struct ctl_table page_alloc_sysctl_table[] = { .extra2 = SYSCTL_ONE_HUNDRED, }, #endif - {} }; void __init page_alloc_sysctl_init(void) @@ -6256,6 +6360,7 @@ int __alloc_contig_migrate_range(struct compact_control *cc, struct migration_target_control mtc = { .nid = zone_to_nid(cc->zone), .gfp_mask = GFP_USER | __GFP_MOVABLE | __GFP_RETRY_MAYFAIL, + .reason = MR_CONTIG_RANGE, }; struct page *page; unsigned long total_mapped = 0; @@ -6288,8 +6393,12 @@ int __alloc_contig_migrate_range(struct compact_control *cc, if (trace_mm_alloc_contig_migrate_range_info_enabled()) { total_reclaimed += nr_reclaimed; - list_for_each_entry(page, &cc->migratepages, lru) - total_mapped += page_mapcount(page); + list_for_each_entry(page, &cc->migratepages, lru) { + struct folio *folio = page_folio(page); + + total_mapped += folio_mapped(folio) * + folio_nr_pages(folio); + } } ret = migrate_pages(&cc->migratepages, alloc_migration_target, @@ -6341,11 +6450,10 @@ int __alloc_contig_migrate_range(struct compact_control *cc, * pages which PFN is in [start, end) are allocated for the caller and * need to be freed with free_contig_range(). */ -int alloc_contig_range(unsigned long start, unsigned long end, +int alloc_contig_range_noprof(unsigned long start, unsigned long end, unsigned migratetype, gfp_t gfp_mask) { unsigned long outer_start, outer_end; - int order; int ret = 0; struct compact_control cc = { @@ -6418,29 +6526,7 @@ int alloc_contig_range(unsigned long start, unsigned long end, * We don't have to hold zone->lock here because the pages are * isolated thus they won't get removed from buddy. */ - - order = 0; - outer_start = start; - while (!PageBuddy(pfn_to_page(outer_start))) { - if (++order > MAX_PAGE_ORDER) { - outer_start = start; - break; - } - outer_start &= ~0UL << order; - } - - if (outer_start != start) { - order = buddy_order(pfn_to_page(outer_start)); - - /* - * outer_start page could be small order buddy page and - * it doesn't include start page. Adjust outer_start - * in this case to report failed page properly - * on tracepoint in test_pages_isolated() - */ - if (outer_start + (1UL << order) <= start) - outer_start = start; - } + outer_start = find_large_buddy(start); /* Make sure the range is really isolated. */ if (test_pages_isolated(outer_start, end, 0)) { @@ -6465,15 +6551,15 @@ done: undo_isolate_page_range(start, end, migratetype); return ret; } -EXPORT_SYMBOL(alloc_contig_range); +EXPORT_SYMBOL(alloc_contig_range_noprof); static int __alloc_contig_pages(unsigned long start_pfn, unsigned long nr_pages, gfp_t gfp_mask) { unsigned long end_pfn = start_pfn + nr_pages; - return alloc_contig_range(start_pfn, end_pfn, MIGRATE_MOVABLE, - gfp_mask); + return alloc_contig_range_noprof(start_pfn, end_pfn, MIGRATE_MOVABLE, + gfp_mask); } static bool pfn_range_valid_contig(struct zone *z, unsigned long start_pfn, @@ -6528,8 +6614,8 @@ static bool zone_spans_last_pfn(const struct zone *zone, * * Return: pointer to contiguous pages on success, or NULL if not successful. */ -struct page *alloc_contig_pages(unsigned long nr_pages, gfp_t gfp_mask, - int nid, nodemask_t *nodemask) +struct page *alloc_contig_pages_noprof(unsigned long nr_pages, gfp_t gfp_mask, + int nid, nodemask_t *nodemask) { unsigned long ret, pfn, flags; struct zonelist *zonelist; @@ -6660,8 +6746,9 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) BUG_ON(page_count(page)); BUG_ON(!PageBuddy(page)); + VM_WARN_ON(get_pageblock_migratetype(page) != MIGRATE_ISOLATE); order = buddy_order(page); - del_page_from_free_list(page, zone, order); + del_page_from_free_list(page, zone, order, MIGRATE_ISOLATE); pfn += (1 << order); } spin_unlock_irqrestore(&zone->lock, flags); @@ -6671,16 +6758,16 @@ void __offline_isolated_pages(unsigned long start_pfn, unsigned long end_pfn) /* * This function returns a stable result only if called under zone lock. */ -bool is_free_buddy_page(struct page *page) +bool is_free_buddy_page(const struct page *page) { unsigned long pfn = page_to_pfn(page); unsigned int order; for (order = 0; order < NR_PAGE_ORDERS; order++) { - struct page *page_head = page - (pfn & ((1 << order) - 1)); + const struct page *head = page - (pfn & ((1 << order) - 1)); - if (PageBuddy(page_head) && - buddy_order_unsafe(page_head) >= order) + if (PageBuddy(head) && + buddy_order_unsafe(head) >= order) break; } @@ -6689,6 +6776,14 @@ bool is_free_buddy_page(struct page *page) EXPORT_SYMBOL(is_free_buddy_page); #ifdef CONFIG_MEMORY_FAILURE +static inline void add_to_free_list(struct page *page, struct zone *zone, + unsigned int order, int migratetype, + bool tail) +{ + __add_to_free_list(page, zone, order, migratetype, tail); + account_freepages(zone, 1 << order, migratetype); +} + /* * Break down a higher-order page in sub-pages, and keep our target out of * buddy allocator. @@ -6711,10 +6806,10 @@ static void break_down_buddy_pages(struct zone *zone, struct page *page, current_buddy = page + size; } - if (set_page_guard(zone, current_buddy, high, migratetype)) + if (set_page_guard(zone, current_buddy, high)) continue; - add_to_free_list(current_buddy, zone, high, migratetype); + add_to_free_list(current_buddy, zone, high, migratetype, false); set_buddy_order(current_buddy, high); } } @@ -6740,12 +6835,11 @@ bool take_page_off_buddy(struct page *page) int migratetype = get_pfnblock_migratetype(page_head, pfn_head); - del_page_from_free_list(page_head, zone, page_order); + del_page_from_free_list(page_head, zone, page_order, + migratetype); break_down_buddy_pages(zone, page_head, page, 0, page_order, migratetype); SetPageHWPoisonTakenOff(page); - if (!is_migrate_isolate(migratetype)) - __mod_zone_freepage_state(zone, -1, migratetype); ret = true; break; } @@ -6762,13 +6856,14 @@ bool take_page_off_buddy(struct page *page) bool put_page_back_buddy(struct page *page) { struct zone *zone = page_zone(page); - unsigned long pfn = page_to_pfn(page); unsigned long flags; - int migratetype = get_pfnblock_migratetype(page, pfn); bool ret = false; spin_lock_irqsave(&zone->lock, flags); if (put_page_testzero(page)) { + unsigned long pfn = page_to_pfn(page); + int migratetype = get_pfnblock_migratetype(page, pfn); + ClearPageHWPoisonTakenOff(page); __free_one_page(page, pfn, zone, 0, migratetype, FPI_NONE); if (TestClearPageHWPoison(page)) { @@ -6852,7 +6947,7 @@ static bool try_to_accept_memory_one(struct zone *zone) list_del(&page->lru); last = list_empty(&zone->unaccepted_pages); - __mod_zone_freepage_state(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); + account_freepages(zone, -MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, -MAX_ORDER_NR_PAGES); spin_unlock_irqrestore(&zone->lock, flags); @@ -6904,7 +6999,7 @@ static bool __free_unaccepted(struct page *page) spin_lock_irqsave(&zone->lock, flags); first = list_empty(&zone->unaccepted_pages); list_add_tail(&page->lru, &zone->unaccepted_pages); - __mod_zone_freepage_state(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); + account_freepages(zone, MAX_ORDER_NR_PAGES, MIGRATE_MOVABLE); __mod_zone_page_state(zone, NR_UNACCEPTED, MAX_ORDER_NR_PAGES); spin_unlock_irqrestore(&zone->lock, flags); |