summaryrefslogtreecommitdiffstats
path: root/mm/mm_init.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:17:46 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-08-07 13:17:46 +0000
commit7f3a4257159dea8e7ef66d1a539dc6df708b8ed3 (patch)
treebcc69b5f4609f348fac49e2f59e210b29eaea783 /mm/mm_init.c
parentAdding upstream version 6.9.12. (diff)
downloadlinux-7f3a4257159dea8e7ef66d1a539dc6df708b8ed3.tar.xz
linux-7f3a4257159dea8e7ef66d1a539dc6df708b8ed3.zip
Adding upstream version 6.10.3.upstream/6.10.3
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'mm/mm_init.c')
-rw-r--r--mm/mm_init.c175
1 files changed, 56 insertions, 119 deletions
diff --git a/mm/mm_init.c b/mm/mm_init.c
index 549e76af8f..3ec04933f7 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -24,9 +24,11 @@
#include <linux/page_ext.h>
#include <linux/pti.h>
#include <linux/pgtable.h>
+#include <linux/stackdepot.h>
#include <linux/swap.h>
#include <linux/cma.h>
#include <linux/crash_dump.h>
+#include <linux/execmem.h>
#include "internal.h"
#include "slab.h"
#include "shuffle.h"
@@ -226,7 +228,6 @@ static unsigned long required_movablecore_percent __initdata;
static unsigned long nr_kernel_pages __initdata;
static unsigned long nr_all_pages __initdata;
-static unsigned long dma_reserve __initdata;
static bool deferred_struct_pages __meminitdata;
@@ -1144,7 +1145,7 @@ static void __init adjust_zone_range_for_zone_movable(int nid,
* Return the number of holes in a range on a node. If nid is MAX_NUMNODES,
* then all holes in the requested range will be accounted for.
*/
-unsigned long __init __absent_pages_in_range(int nid,
+static unsigned long __init __absent_pages_in_range(int nid,
unsigned long range_start_pfn,
unsigned long range_end_pfn)
{
@@ -1265,6 +1266,30 @@ static void __init reset_memoryless_node_totalpages(struct pglist_data *pgdat)
pr_debug("On node %d totalpages: 0\n", pgdat->node_id);
}
+static void __init calc_nr_kernel_pages(void)
+{
+ unsigned long start_pfn, end_pfn;
+ phys_addr_t start_addr, end_addr;
+ u64 u;
+#ifdef CONFIG_HIGHMEM
+ unsigned long high_zone_low = arch_zone_lowest_possible_pfn[ZONE_HIGHMEM];
+#endif
+
+ for_each_free_mem_range(u, NUMA_NO_NODE, MEMBLOCK_NONE, &start_addr, &end_addr, NULL) {
+ start_pfn = PFN_UP(start_addr);
+ end_pfn = PFN_DOWN(end_addr);
+
+ if (start_pfn < end_pfn) {
+ nr_all_pages += end_pfn - start_pfn;
+#ifdef CONFIG_HIGHMEM
+ start_pfn = clamp(start_pfn, 0, high_zone_low);
+ end_pfn = clamp(end_pfn, 0, high_zone_low);
+#endif
+ nr_kernel_pages += end_pfn - start_pfn;
+ }
+ }
+}
+
static void __init calculate_node_totalpages(struct pglist_data *pgdat,
unsigned long node_start_pfn,
unsigned long node_end_pfn)
@@ -1308,26 +1333,6 @@ static void __init calculate_node_totalpages(struct pglist_data *pgdat,
pr_debug("On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
}
-static unsigned long __init calc_memmap_size(unsigned long spanned_pages,
- unsigned long present_pages)
-{
- unsigned long pages = spanned_pages;
-
- /*
- * Provide a more accurate estimation if there are holes within
- * the zone and SPARSEMEM is in use. If there are holes within the
- * zone, each populated memory region may cost us one or two extra
- * memmap pages due to alignment because memmap pages for each
- * populated regions may not be naturally aligned on page boundary.
- * So the (present_pages >> 4) heuristic is a tradeoff for that.
- */
- if (spanned_pages > present_pages + (present_pages >> 4) &&
- IS_ENABLED(CONFIG_SPARSEMEM))
- pages = present_pages;
-
- return PAGE_ALIGN(pages * sizeof(struct page)) >> PAGE_SHIFT;
-}
-
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
static void pgdat_init_split_queue(struct pglist_data *pgdat)
{
@@ -1542,15 +1547,6 @@ void __ref free_area_init_core_hotplug(struct pglist_data *pgdat)
}
#endif
-/*
- * Set up the zone data structures:
- * - mark all pages reserved
- * - mark all memory queues empty
- * - clear the memory bitmaps
- *
- * NOTE: pgdat should get zeroed by caller.
- * NOTE: this function is only called during early init.
- */
static void __init free_area_init_core(struct pglist_data *pgdat)
{
enum zone_type j;
@@ -1561,47 +1557,13 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
for (j = 0; j < MAX_NR_ZONES; j++) {
struct zone *zone = pgdat->node_zones + j;
- unsigned long size, freesize, memmap_pages;
-
- size = zone->spanned_pages;
- freesize = zone->present_pages;
-
- /*
- * Adjust freesize so that it accounts for how much memory
- * is used by this zone for memmap. This affects the watermark
- * and per-cpu initialisations
- */
- memmap_pages = calc_memmap_size(size, freesize);
- if (!is_highmem_idx(j)) {
- if (freesize >= memmap_pages) {
- freesize -= memmap_pages;
- if (memmap_pages)
- pr_debug(" %s zone: %lu pages used for memmap\n",
- zone_names[j], memmap_pages);
- } else
- pr_warn(" %s zone: %lu memmap pages exceeds freesize %lu\n",
- zone_names[j], memmap_pages, freesize);
- }
-
- /* Account for reserved pages */
- if (j == 0 && freesize > dma_reserve) {
- freesize -= dma_reserve;
- pr_debug(" %s zone: %lu pages reserved\n", zone_names[0], dma_reserve);
- }
-
- if (!is_highmem_idx(j))
- nr_kernel_pages += freesize;
- /* Charge for highmem memmap if there are enough kernel pages */
- else if (nr_kernel_pages > memmap_pages * 2)
- nr_kernel_pages -= memmap_pages;
- nr_all_pages += freesize;
+ unsigned long size = zone->spanned_pages;
/*
- * Set an approximate value for lowmem here, it will be adjusted
- * when the bootmem allocator frees pages into the buddy system.
- * And all highmem pages will be managed by the buddy system.
+ * Initialize zone->managed_pages as 0 , it will be reset
+ * when memblock allocator frees pages into buddy system.
*/
- zone_init_internals(zone, j, nid, freesize);
+ zone_init_internals(zone, j, nid, zone->present_pages);
if (!size)
continue;
@@ -1874,30 +1836,26 @@ void __init free_area_init(unsigned long *max_zone_pfn)
panic("Cannot allocate %zuB for node %d.\n",
sizeof(*pgdat), nid);
arch_refresh_nodedata(nid, pgdat);
- free_area_init_node(nid);
-
- /*
- * We do not want to confuse userspace by sysfs
- * files/directories for node without any memory
- * attached to it, so this node is not marked as
- * N_MEMORY and not marked online so that no sysfs
- * hierarchy will be created via register_one_node for
- * it. The pgdat will get fully initialized by
- * hotadd_init_pgdat() when memory is hotplugged into
- * this node.
- */
- continue;
}
pgdat = NODE_DATA(nid);
free_area_init_node(nid);
- /* Any memory on that node */
- if (pgdat->node_present_pages)
+ /*
+ * No sysfs hierarcy will be created via register_one_node()
+ *for memory-less node because here it's not marked as N_MEMORY
+ *and won't be set online later. The benefit is userspace
+ *program won't be confused by sysfs files/directories of
+ *memory-less node. The pgdat will get fully initialized by
+ *hotadd_init_pgdat() when memory is hotplugged into this node.
+ */
+ if (pgdat->node_present_pages) {
node_set_state(nid, N_MEMORY);
- check_for_memory(pgdat);
+ check_for_memory(pgdat);
+ }
}
+ calc_nr_kernel_pages();
memmap_init();
/* disable hash distribution for systems with a single node */
@@ -2057,7 +2015,7 @@ static unsigned long __init deferred_init_pages(struct zone *zone,
__init_single_page(page, pfn, zid, nid);
nr_pages++;
}
- return (nr_pages);
+ return nr_pages;
}
/*
@@ -2259,10 +2217,6 @@ zone_empty:
* Return true when zone was grown, otherwise return false. We return true even
* when we grow less than requested, to let the caller decide if there are
* enough pages to satisfy the allocation.
- *
- * Note: We use noinline because this function is needed only during boot, and
- * it is called from a __ref function _deferred_grow_zone. This way we are
- * making sure that it is not inlined into permanent text section.
*/
bool __init deferred_grow_zone(struct zone *zone, unsigned int order)
{
@@ -2412,17 +2366,6 @@ void __init page_alloc_init_late(void)
page_alloc_sysctl_init();
}
-#ifndef __HAVE_ARCH_RESERVED_KERNEL_PAGES
-/*
- * Returns the number of pages that arch has reserved but
- * is not known to alloc_large_system_hash().
- */
-static unsigned long __init arch_reserved_kernel_pages(void)
-{
- return 0;
-}
-#endif
-
/*
* Adaptive scale is meant to reduce sizes of hash tables on large memory
* machines. As memory size is increased the scale is also increased but at
@@ -2465,7 +2408,6 @@ void *__init alloc_large_system_hash(const char *tablename,
if (!numentries) {
/* round applicable memory size up to nearest megabyte */
numentries = nr_kernel_pages;
- numentries -= arch_reserved_kernel_pages();
/* It isn't necessary when PAGE_SIZE >= 1MB */
if (PAGE_SIZE < SZ_1M)
@@ -2547,26 +2489,9 @@ void *__init alloc_large_system_hash(const char *tablename,
return table;
}
-/**
- * set_dma_reserve - set the specified number of pages reserved in the first zone
- * @new_dma_reserve: The number of pages to mark reserved
- *
- * The per-cpu batchsize and zone watermarks are determined by managed_pages.
- * In the DMA zone, a significant percentage may be consumed by kernel image
- * and other unfreeable allocations which can skew the watermarks badly. This
- * function may optionally be used to account for unfreeable pages in the
- * first zone (e.g., ZONE_DMA). The effect will be lower watermarks and
- * smaller per-cpu batchsize.
- */
-void __init set_dma_reserve(unsigned long new_dma_reserve)
-{
- dma_reserve = new_dma_reserve;
-}
-
void __init memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order)
{
-
if (IS_ENABLED(CONFIG_DEFERRED_STRUCT_PAGE_INIT)) {
int nid = early_pfn_to_nid(pfn);
@@ -2578,6 +2503,17 @@ void __init memblock_free_pages(struct page *page, unsigned long pfn,
/* KMSAN will take care of these pages. */
return;
}
+
+ /* pages were reserved and not allocated */
+ if (mem_alloc_profiling_enabled()) {
+ union codetag_ref *ref = get_page_tag_ref(page);
+
+ if (ref) {
+ set_codetag_empty(ref);
+ put_page_tag_ref(ref);
+ }
+ }
+
__free_pages_core(page, order);
}
@@ -2793,4 +2729,5 @@ void __init mm_core_init(void)
pti_init();
kmsan_init_runtime();
mm_cache_init();
+ execmem_init();
}