From e313beb668f41d49c10c546180fc02c62ab2cbe3 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 20:48:00 +0200 Subject: Merging upstream version 6.7.12. Signed-off-by: Daniel Baumann --- mm/compaction.c | 7 +------ mm/filemap.c | 16 ++++++++++++++++ mm/kasan/kasan_test.c | 3 ++- mm/memcontrol.c | 10 +++++----- mm/memtest.c | 4 ++-- mm/mmap.c | 10 +++++++++- mm/page_alloc.c | 10 ++++++---- mm/readahead.c | 4 ++-- mm/shmem.c | 2 +- mm/shmem_quota.c | 10 +++++++--- mm/swapfile.c | 13 ++++++++++++- mm/vmpressure.c | 2 +- mm/vmscan.c | 5 ++++- 13 files changed, 68 insertions(+), 28 deletions(-) (limited to 'mm') diff --git a/mm/compaction.c b/mm/compaction.c index 01ba298739..f31d18741a 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2701,16 +2701,11 @@ enum compact_result try_to_compact_pages(gfp_t gfp_mask, unsigned int order, unsigned int alloc_flags, const struct alloc_context *ac, enum compact_priority prio, struct page **capture) { - int may_perform_io = (__force int)(gfp_mask & __GFP_IO); struct zoneref *z; struct zone *zone; enum compact_result rc = COMPACT_SKIPPED; - /* - * Check if the GFP flags allow compaction - GFP_NOIO is really - * tricky context because the migration might require IO - */ - if (!may_perform_io) + if (!gfp_compaction_allowed(gfp_mask)) return COMPACT_SKIPPED; trace_mm_compaction_try_to_compact_pages(order, gfp_mask, prio); diff --git a/mm/filemap.c b/mm/filemap.c index 5be7887957..321349e2c9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4150,7 +4150,23 @@ static void filemap_cachestat(struct address_space *mapping, /* shmem file - in swap cache */ swp_entry_t swp = radix_to_swp_entry(folio); + /* swapin error results in poisoned entry */ + if (non_swap_entry(swp)) + goto resched; + + /* + * Getting a swap entry from the shmem + * inode means we beat + * shmem_unuse(). rcu_read_lock() + * ensures swapoff waits for us before + * freeing the swapper space. However, + * we can race with swapping and + * invalidation, so there might not be + * a shadow in the swapcache (yet). + */ shadow = get_shadow_from_swap_cache(swp); + if (!shadow) + goto resched; } #endif if (workingset_test_recent(shadow, true, &workingset)) diff --git a/mm/kasan/kasan_test.c b/mm/kasan/kasan_test.c index 34515a106c..23906e886b 100644 --- a/mm/kasan/kasan_test.c +++ b/mm/kasan/kasan_test.c @@ -451,7 +451,8 @@ static void kmalloc_oob_16(struct kunit *test) /* This test is specifically crafted for the generic mode. */ KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC); - ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); + /* RELOC_HIDE to prevent gcc from warning about short alloc */ + ptr1 = RELOC_HIDE(kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL), 0); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 792fb3a5ce..27c9f451d4 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4379,7 +4379,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) * only one element of the array here. */ for (; i >= 0 && unlikely(t->entries[i].threshold > usage); i--) - eventfd_signal(t->entries[i].eventfd, 1); + eventfd_signal(t->entries[i].eventfd); /* i = current_threshold + 1 */ i++; @@ -4391,7 +4391,7 @@ static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap) * only one element of the array here. */ for (; i < t->size && unlikely(t->entries[i].threshold <= usage); i++) - eventfd_signal(t->entries[i].eventfd, 1); + eventfd_signal(t->entries[i].eventfd); /* Update current_threshold */ t->current_threshold = i - 1; @@ -4431,7 +4431,7 @@ static int mem_cgroup_oom_notify_cb(struct mem_cgroup *memcg) spin_lock(&memcg_oom_lock); list_for_each_entry(ev, &memcg->oom_notify, list) - eventfd_signal(ev->eventfd, 1); + eventfd_signal(ev->eventfd); spin_unlock(&memcg_oom_lock); return 0; @@ -4650,7 +4650,7 @@ static int mem_cgroup_oom_register_event(struct mem_cgroup *memcg, /* already in OOM ? */ if (memcg->under_oom) - eventfd_signal(eventfd, 1); + eventfd_signal(eventfd); spin_unlock(&memcg_oom_lock); return 0; @@ -4942,7 +4942,7 @@ static void memcg_event_remove(struct work_struct *work) event->unregister_event(memcg, event->eventfd); /* Notify userspace the event is going away. */ - eventfd_signal(event->eventfd, 1); + eventfd_signal(event->eventfd); eventfd_ctx_put(event->eventfd); kfree(event); diff --git a/mm/memtest.c b/mm/memtest.c index 32f3e9dda8..c2c609c391 100644 --- a/mm/memtest.c +++ b/mm/memtest.c @@ -51,10 +51,10 @@ static void __init memtest(u64 pattern, phys_addr_t start_phys, phys_addr_t size last_bad = 0; for (p = start; p < end; p++) - *p = pattern; + WRITE_ONCE(*p, pattern); for (p = start; p < end; p++, start_phys_aligned += incr) { - if (*p == pattern) + if (READ_ONCE(*p) == pattern) continue; if (start_phys_aligned == last_bad + incr) { last_bad += incr; diff --git a/mm/mmap.c b/mm/mmap.c index b9a43872ac..23072589fe 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -954,13 +954,21 @@ static struct vm_area_struct } else if (merge_prev) { /* case 2 */ if (curr) { vma_start_write(curr); - err = dup_anon_vma(prev, curr, &anon_dup); if (end == curr->vm_end) { /* case 7 */ + /* + * can_vma_merge_after() assumed we would not be + * removing prev vma, so it skipped the check + * for vm_ops->close, but we are removing curr + */ + if (curr->vm_ops && curr->vm_ops->close) + err = -EINVAL; remove = curr; } else { /* case 5 */ adjust = curr; adj_start = (end - curr->vm_start); } + if (!err) + err = dup_anon_vma(prev, curr, &anon_dup); } } else { /* merge_next */ vma_start_write(next); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6d2a74138f..b1d9dcdabd 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -4042,6 +4042,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order, struct alloc_context *ac) { bool can_direct_reclaim = gfp_mask & __GFP_DIRECT_RECLAIM; + bool can_compact = gfp_compaction_allowed(gfp_mask); const bool costly_order = order > PAGE_ALLOC_COSTLY_ORDER; struct page *page = NULL; unsigned int alloc_flags; @@ -4112,7 +4113,7 @@ restart: * Don't try this for allocations that are allowed to ignore * watermarks, as the ALLOC_NO_WATERMARKS attempt didn't yet happen. */ - if (can_direct_reclaim && + if (can_direct_reclaim && can_compact && (costly_order || (order > 0 && ac->migratetype != MIGRATE_MOVABLE)) && !gfp_pfmemalloc_allowed(gfp_mask)) { @@ -4210,9 +4211,10 @@ retry: /* * Do not retry costly high order allocations unless they are - * __GFP_RETRY_MAYFAIL + * __GFP_RETRY_MAYFAIL and we can compact */ - if (costly_order && !(gfp_mask & __GFP_RETRY_MAYFAIL)) + if (costly_order && (!can_compact || + !(gfp_mask & __GFP_RETRY_MAYFAIL))) goto nopage; if (should_reclaim_retry(gfp_mask, order, ac, alloc_flags, @@ -4225,7 +4227,7 @@ retry: * implementation of the compaction depends on the sufficient amount * of free memory (see __compaction_suitable) */ - if (did_some_progress > 0 && + if (did_some_progress > 0 && can_compact && should_compact_retry(ac, order, alloc_flags, compact_result, &compact_priority, &compaction_retries)) diff --git a/mm/readahead.c b/mm/readahead.c index 6925e6959f..1d1a84deb5 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -469,7 +469,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, if (!folio) return -ENOMEM; - mark = round_up(mark, 1UL << order); + mark = round_down(mark, 1UL << order); if (index == mark) folio_set_readahead(folio); err = filemap_add_folio(ractl->mapping, folio, index, gfp); @@ -577,7 +577,7 @@ static void ondemand_readahead(struct readahead_control *ractl, * It's the expected callback index, assume sequential access. * Ramp up sizes, and push forward the readahead window. */ - expected = round_up(ra->start + ra->size - ra->async_size, + expected = round_down(ra->start + ra->size - ra->async_size, 1UL << order); if (index == expected || index == (ra->start + ra->size)) { ra->start += ra->size; diff --git a/mm/shmem.c b/mm/shmem.c index 0d1ce70bce..fb2daf89e2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -310,7 +310,7 @@ static void shmem_disable_quotas(struct super_block *sb) dquot_quota_off(sb, type); } -static struct dquot **shmem_get_dquots(struct inode *inode) +static struct dquot __rcu **shmem_get_dquots(struct inode *inode) { return SHMEM_I(inode)->i_dquot; } diff --git a/mm/shmem_quota.c b/mm/shmem_quota.c index 062d1c1097..ce514e700d 100644 --- a/mm/shmem_quota.c +++ b/mm/shmem_quota.c @@ -116,7 +116,7 @@ static int shmem_free_file_info(struct super_block *sb, int type) static int shmem_get_next_id(struct super_block *sb, struct kqid *qid) { struct mem_dqinfo *info = sb_dqinfo(sb, qid->type); - struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node *node; qid_t id = from_kqid(&init_user_ns, *qid); struct quota_info *dqopt = sb_dqopt(sb); struct quota_id *entry = NULL; @@ -126,6 +126,7 @@ static int shmem_get_next_id(struct super_block *sb, struct kqid *qid) return -ESRCH; down_read(&dqopt->dqio_sem); + node = ((struct rb_root *)info->dqi_priv)->rb_node; while (node) { entry = rb_entry(node, struct quota_id, node); @@ -165,7 +166,7 @@ out_unlock: static int shmem_acquire_dquot(struct dquot *dquot) { struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); - struct rb_node **n = &((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node **n; struct shmem_sb_info *sbinfo = dquot->dq_sb->s_fs_info; struct rb_node *parent = NULL, *new_node = NULL; struct quota_id *new_entry, *entry; @@ -176,6 +177,8 @@ static int shmem_acquire_dquot(struct dquot *dquot) mutex_lock(&dquot->dq_lock); down_write(&dqopt->dqio_sem); + n = &((struct rb_root *)info->dqi_priv)->rb_node; + while (*n) { parent = *n; entry = rb_entry(parent, struct quota_id, node); @@ -264,7 +267,7 @@ static bool shmem_is_empty_dquot(struct dquot *dquot) static int shmem_release_dquot(struct dquot *dquot) { struct mem_dqinfo *info = sb_dqinfo(dquot->dq_sb, dquot->dq_id.type); - struct rb_node *node = ((struct rb_root *)info->dqi_priv)->rb_node; + struct rb_node *node; qid_t id = from_kqid(&init_user_ns, dquot->dq_id); struct quota_info *dqopt = sb_dqopt(dquot->dq_sb); struct quota_id *entry = NULL; @@ -275,6 +278,7 @@ static int shmem_release_dquot(struct dquot *dquot) goto out_dqlock; down_write(&dqopt->dqio_sem); + node = ((struct rb_root *)info->dqi_priv)->rb_node; while (node) { entry = rb_entry(node, struct quota_id, node); diff --git a/mm/swapfile.c b/mm/swapfile.c index 022581ec40..91397a2539 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1226,6 +1226,11 @@ static unsigned char __swap_entry_free_locked(struct swap_info_struct *p, * with get_swap_device() and put_swap_device(), unless the swap * functions call get/put_swap_device() by themselves. * + * Note that when only holding the PTL, swapoff might succeed immediately + * after freeing a swap entry. Therefore, immediately after + * __swap_entry_free(), the swap info might become stale and should not + * be touched without a prior get_swap_device(). + * * Check whether swap entry is valid in the swap device. If so, * return pointer to swap_info_struct, and keep the swap entry valid * via preventing the swap device from being swapoff, until @@ -1603,13 +1608,19 @@ int free_swap_and_cache(swp_entry_t entry) if (non_swap_entry(entry)) return 1; - p = _swap_info_get(entry); + p = get_swap_device(entry); if (p) { + if (WARN_ON(data_race(!p->swap_map[swp_offset(entry)]))) { + put_swap_device(p); + return 0; + } + count = __swap_entry_free(p, entry); if (count == SWAP_HAS_CACHE && !swap_page_trans_huge_swapped(p, entry)) __try_to_reclaim_swap(p, swp_offset(entry), TTRS_UNMAPPED | TTRS_FULL); + put_swap_device(p); } return p != NULL; } diff --git a/mm/vmpressure.c b/mm/vmpressure.c index 22c6689d93..bd5183dfd8 100644 --- a/mm/vmpressure.c +++ b/mm/vmpressure.c @@ -169,7 +169,7 @@ static bool vmpressure_event(struct vmpressure *vmpr, continue; if (level < ev->level) continue; - eventfd_signal(ev->efd, 1); + eventfd_signal(ev->efd); ret = true; } mutex_unlock(&vmpr->events_lock); diff --git a/mm/vmscan.c b/mm/vmscan.c index bba207f41b..ebaf79d683 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5733,7 +5733,7 @@ static void shrink_lruvec(struct lruvec *lruvec, struct scan_control *sc) /* Use reclaim/compaction for costly allocs or under memory pressure */ static bool in_reclaim_compaction(struct scan_control *sc) { - if (IS_ENABLED(CONFIG_COMPACTION) && sc->order && + if (gfp_compaction_allowed(sc->gfp_mask) && sc->order && (sc->order > PAGE_ALLOC_COSTLY_ORDER || sc->priority < DEF_PRIORITY - 2)) return true; @@ -5978,6 +5978,9 @@ static inline bool compaction_ready(struct zone *zone, struct scan_control *sc) { unsigned long watermark; + if (!gfp_compaction_allowed(sc->gfp_mask)) + return false; + /* Allocation can already succeed, nothing to do */ if (zone_watermark_ok(zone, sc->order, min_wmark_pages(zone), sc->reclaim_idx, 0)) -- cgit v1.2.3