diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-08 03:43:39 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-08 03:43:39 +0000 |
commit | 27a2c36c9538c7e5536a1b20fd74f0fc911d7950 (patch) | |
tree | 3101221cb1458bce62f7ae76638517d93f8b64a8 /mm/page_alloc.c | |
parent | Releasing progress-linux version 4.19.260-1progress5u1. (diff) | |
download | linux-27a2c36c9538c7e5536a1b20fd74f0fc911d7950.tar.xz linux-27a2c36c9538c7e5536a1b20fd74f0fc911d7950.zip |
Merging upstream version 4.19.269.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'mm/page_alloc.c')
-rw-r--r-- | mm/page_alloc.c | 65 |
1 files changed, 55 insertions, 10 deletions
diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 9c35403d9..1cffd4e1f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -3779,6 +3779,30 @@ void fs_reclaim_release(gfp_t gfp_mask) EXPORT_SYMBOL_GPL(fs_reclaim_release); #endif +/* + * Zonelists may change due to hotplug during allocation. Detect when zonelists + * have been rebuilt so allocation retries. Reader side does not lock and + * retries the allocation if zonelist changes. Writer side is protected by the + * embedded spin_lock. + */ +static DEFINE_SEQLOCK(zonelist_update_seq); + +static unsigned int zonelist_iter_begin(void) +{ + if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)) + return read_seqbegin(&zonelist_update_seq); + + return 0; +} + +static unsigned int check_retry_zonelist(unsigned int seq) +{ + if (IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)) + return read_seqretry(&zonelist_update_seq, seq); + + return seq; +} + /* Perform direct synchronous page reclaim */ static int __perform_reclaim(gfp_t gfp_mask, unsigned int order, @@ -4084,6 +4108,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, int compaction_retries; int no_progress_loops; unsigned int cpuset_mems_cookie; + unsigned int zonelist_iter_cookie; int reserve_flags; /* @@ -4094,11 +4119,12 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, (__GFP_ATOMIC|__GFP_DIRECT_RECLAIM))) gfp_mask &= ~__GFP_ATOMIC; -retry_cpuset: +restart: compaction_retries = 0; no_progress_loops = 0; compact_priority = DEF_COMPACT_PRIORITY; cpuset_mems_cookie = read_mems_allowed_begin(); + zonelist_iter_cookie = zonelist_iter_begin(); /* * The fast path uses conservative alloc_flags to succeed only until @@ -4247,9 +4273,13 @@ retry: goto retry; - /* Deal with possible cpuset update races before we start OOM killing */ - if (check_retry_cpuset(cpuset_mems_cookie, ac)) - goto retry_cpuset; + /* + * Deal with possible cpuset update races or zonelist updates to avoid + * a unnecessary OOM kill. + */ + if (check_retry_cpuset(cpuset_mems_cookie, ac) || + check_retry_zonelist(zonelist_iter_cookie)) + goto restart; /* Reclaim has failed us, start killing things */ page = __alloc_pages_may_oom(gfp_mask, order, ac, &did_some_progress); @@ -4269,9 +4299,13 @@ retry: } nopage: - /* Deal with possible cpuset update races before we fail */ - if (check_retry_cpuset(cpuset_mems_cookie, ac)) - goto retry_cpuset; + /* + * Deal with possible cpuset update races or zonelist updates to avoid + * a unnecessary OOM kill. + */ + if (check_retry_cpuset(cpuset_mems_cookie, ac) || + check_retry_zonelist(zonelist_iter_cookie)) + goto restart; /* * Make sure that __GFP_NOFAIL request doesn't leak out and make sure @@ -4569,6 +4603,18 @@ refill: /* reset page count bias and offset to start of new frag */ nc->pagecnt_bias = PAGE_FRAG_CACHE_MAX_SIZE + 1; offset = size - fragsz; + if (unlikely(offset < 0)) { + /* + * The caller is trying to allocate a fragment + * with fragsz > PAGE_SIZE but the cache isn't big + * enough to satisfy the request, this may + * happen in low memory conditions. + * We don't release the cache page because + * it could make memory pressure worse + * so we simply return NULL here. + */ + return NULL; + } } nc->pagecnt_bias--; @@ -5379,9 +5425,8 @@ static void __build_all_zonelists(void *data) int nid; int __maybe_unused cpu; pg_data_t *self = data; - static DEFINE_SPINLOCK(lock); - spin_lock(&lock); + write_seqlock(&zonelist_update_seq); #ifdef CONFIG_NUMA memset(node_load, 0, sizeof(node_load)); @@ -5414,7 +5459,7 @@ static void __build_all_zonelists(void *data) #endif } - spin_unlock(&lock); + write_sequnlock(&zonelist_update_seq); } static noinline void __init |