summaryrefslogtreecommitdiffstats
path: root/mm/filemap.c
diff options
context:
space:
mode:
Diffstat (limited to 'mm/filemap.c')
-rw-r--r--mm/filemap.c217
1 files changed, 83 insertions, 134 deletions
diff --git a/mm/filemap.c b/mm/filemap.c
index b1ef7be120..ad5b4aa049 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -131,11 +131,8 @@ static void page_cache_delete(struct address_space *mapping,
mapping_set_update(&xas, mapping);
- /* hugetlb pages are represented by a single entry in the xarray */
- if (!folio_test_hugetlb(folio)) {
- xas_set_order(&xas, folio->index, folio_order(folio));
- nr = folio_nr_pages(folio);
- }
+ xas_set_order(&xas, folio->index, folio_order(folio));
+ nr = folio_nr_pages(folio);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
@@ -234,7 +231,7 @@ void filemap_free_folio(struct address_space *mapping, struct folio *folio)
if (free_folio)
free_folio(folio);
- if (folio_test_large(folio) && !folio_test_hugetlb(folio))
+ if (folio_test_large(folio))
refs = folio_nr_pages(folio);
folio_put_refs(folio, refs);
}
@@ -819,7 +816,7 @@ void replace_page_cache_folio(struct folio *old, struct folio *new)
new->mapping = mapping;
new->index = offset;
- mem_cgroup_migrate(old, new);
+ mem_cgroup_replace_folio(old, new);
xas_lock_irq(&xas);
xas_store(&xas, new);
@@ -855,14 +852,15 @@ noinline int __filemap_add_folio(struct address_space *mapping,
if (!huge) {
int error = mem_cgroup_charge(folio, NULL, gfp);
- VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
if (error)
return error;
charged = true;
- xas_set_order(&xas, index, folio_order(folio));
- nr = folio_nr_pages(folio);
}
+ VM_BUG_ON_FOLIO(index & (folio_nr_pages(folio) - 1), folio);
+ xas_set_order(&xas, index, folio_order(folio));
+ nr = folio_nr_pages(folio);
+
gfp &= GFP_RECLAIM_MASK;
folio_ref_add(folio, nr);
folio->mapping = mapping;
@@ -1135,32 +1133,13 @@ static void folio_wake_bit(struct folio *folio, int bit_nr)
wait_queue_head_t *q = folio_waitqueue(folio);
struct wait_page_key key;
unsigned long flags;
- wait_queue_entry_t bookmark;
key.folio = folio;
key.bit_nr = bit_nr;
key.page_match = 0;
- bookmark.flags = 0;
- bookmark.private = NULL;
- bookmark.func = NULL;
- INIT_LIST_HEAD(&bookmark.entry);
-
spin_lock_irqsave(&q->lock, flags);
- __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
-
- while (bookmark.flags & WQ_FLAG_BOOKMARK) {
- /*
- * Take a breather from holding the lock,
- * allow pages that finish wake up asynchronously
- * to acquire the lock and remove themselves
- * from wait queue
- */
- spin_unlock_irqrestore(&q->lock, flags);
- cpu_relax();
- spin_lock_irqsave(&q->lock, flags);
- __wake_up_locked_key_bookmark(q, TASK_NORMAL, &key, &bookmark);
- }
+ __wake_up_locked_key(q, TASK_NORMAL, &key);
/*
* It's possible to miss clearing waiters here, when we woke our page
@@ -1177,13 +1156,6 @@ static void folio_wake_bit(struct folio *folio, int bit_nr)
spin_unlock_irqrestore(&q->lock, flags);
}
-static void folio_wake(struct folio *folio, int bit)
-{
- if (!folio_test_waiters(folio))
- return;
- folio_wake_bit(folio, bit);
-}
-
/*
* A choice of three behaviors for folio_wait_bit_common():
*/
@@ -1484,29 +1456,6 @@ void folio_add_wait_queue(struct folio *folio, wait_queue_entry_t *waiter)
}
EXPORT_SYMBOL_GPL(folio_add_wait_queue);
-#ifndef clear_bit_unlock_is_negative_byte
-
-/*
- * PG_waiters is the high bit in the same byte as PG_lock.
- *
- * On x86 (and on many other architectures), we can clear PG_lock and
- * test the sign bit at the same time. But if the architecture does
- * not support that special operation, we just do this all by hand
- * instead.
- *
- * The read of PG_waiters has to be after (or concurrently with) PG_locked
- * being cleared, but a memory barrier should be unnecessary since it is
- * in the same byte as PG_locked.
- */
-static inline bool clear_bit_unlock_is_negative_byte(long nr, volatile void *mem)
-{
- clear_bit_unlock(nr, mem);
- /* smp_mb__after_atomic(); */
- return test_bit(PG_waiters, mem);
-}
-
-#endif
-
/**
* folio_unlock - Unlock a locked folio.
* @folio: The folio.
@@ -1522,12 +1471,42 @@ void folio_unlock(struct folio *folio)
BUILD_BUG_ON(PG_waiters != 7);
BUILD_BUG_ON(PG_locked > 7);
VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
- if (clear_bit_unlock_is_negative_byte(PG_locked, folio_flags(folio, 0)))
+ if (folio_xor_flags_has_waiters(folio, 1 << PG_locked))
folio_wake_bit(folio, PG_locked);
}
EXPORT_SYMBOL(folio_unlock);
/**
+ * folio_end_read - End read on a folio.
+ * @folio: The folio.
+ * @success: True if all reads completed successfully.
+ *
+ * When all reads against a folio have completed, filesystems should
+ * call this function to let the pagecache know that no more reads
+ * are outstanding. This will unlock the folio and wake up any thread
+ * sleeping on the lock. The folio will also be marked uptodate if all
+ * reads succeeded.
+ *
+ * Context: May be called from interrupt or process context. May not be
+ * called from NMI context.
+ */
+void folio_end_read(struct folio *folio, bool success)
+{
+ unsigned long mask = 1 << PG_locked;
+
+ /* Must be in bottom byte for x86 to work */
+ BUILD_BUG_ON(PG_uptodate > 7);
+ VM_BUG_ON_FOLIO(!folio_test_locked(folio), folio);
+ VM_BUG_ON_FOLIO(folio_test_uptodate(folio), folio);
+
+ if (likely(success))
+ mask |= 1 << PG_uptodate;
+ if (folio_xor_flags_has_waiters(folio, mask))
+ folio_wake_bit(folio, PG_locked);
+}
+EXPORT_SYMBOL(folio_end_read);
+
+/**
* folio_end_private_2 - Clear PG_private_2 and wake any waiters.
* @folio: The folio.
*
@@ -1588,9 +1567,15 @@ EXPORT_SYMBOL(folio_wait_private_2_killable);
/**
* folio_end_writeback - End writeback against a folio.
* @folio: The folio.
+ *
+ * The folio must actually be under writeback.
+ *
+ * Context: May be called from process or interrupt context.
*/
void folio_end_writeback(struct folio *folio)
{
+ VM_BUG_ON_FOLIO(!folio_test_writeback(folio), folio);
+
/*
* folio_test_clear_reclaim() could be used here but it is an
* atomic operation and overkill in this particular case. Failing
@@ -1607,14 +1592,11 @@ void folio_end_writeback(struct folio *folio)
* Writeback does not hold a folio reference of its own, relying
* on truncation to wait for the clearing of PG_writeback.
* But here we must make sure that the folio is not freed and
- * reused before the folio_wake().
+ * reused before the folio_wake_bit().
*/
folio_get(folio);
- if (!__folio_end_writeback(folio))
- BUG();
-
- smp_mb__after_atomic();
- folio_wake(folio, PG_writeback);
+ if (__folio_end_writeback(folio))
+ folio_wake_bit(folio, PG_writeback);
acct_reclaim_writeback(folio);
folio_put(folio);
}
@@ -2040,7 +2022,7 @@ unsigned find_get_entries(struct address_space *mapping, pgoff_t *start,
int idx = folio_batch_count(fbatch) - 1;
folio = fbatch->folios[idx];
- if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ if (!xa_is_value(folio))
nr = folio_nr_pages(folio);
*start = indices[idx] + nr;
}
@@ -2104,7 +2086,7 @@ put:
int idx = folio_batch_count(fbatch) - 1;
folio = fbatch->folios[idx];
- if (!xa_is_value(folio) && !folio_test_hugetlb(folio))
+ if (!xa_is_value(folio))
nr = folio_nr_pages(folio);
*start = indices[idx] + nr;
}
@@ -2122,51 +2104,13 @@ put:
* index @start and up to index @end (inclusive). The folios are returned
* in @fbatch with an elevated reference count.
*
- * The first folio may start before @start; if it does, it will contain
- * @start. The final folio may extend beyond @end; if it does, it will
- * contain @end. The folios have ascending indices. There may be gaps
- * between the folios if there are indices which have no folio in the
- * page cache. If folios are added to or removed from the page cache
- * while this is running, they may or may not be found by this call.
- *
* Return: The number of folios which were found.
* We also update @start to index the next folio for the traversal.
*/
unsigned filemap_get_folios(struct address_space *mapping, pgoff_t *start,
pgoff_t end, struct folio_batch *fbatch)
{
- XA_STATE(xas, &mapping->i_pages, *start);
- struct folio *folio;
-
- rcu_read_lock();
- while ((folio = find_get_entry(&xas, end, XA_PRESENT)) != NULL) {
- /* Skip over shadow, swap and DAX entries */
- if (xa_is_value(folio))
- continue;
- if (!folio_batch_add(fbatch, folio)) {
- unsigned long nr = folio_nr_pages(folio);
-
- if (folio_test_hugetlb(folio))
- nr = 1;
- *start = folio->index + nr;
- goto out;
- }
- }
-
- /*
- * We come here when there is no page beyond @end. We take care to not
- * overflow the index @start as it confuses some of the callers. This
- * breaks the iteration when there is a page at index -1 but that is
- * already broken anyway.
- */
- if (end == (pgoff_t)-1)
- *start = (pgoff_t)-1;
- else
- *start = end + 1;
-out:
- rcu_read_unlock();
-
- return folio_batch_count(fbatch);
+ return filemap_get_folios_tag(mapping, start, end, XA_PRESENT, fbatch);
}
EXPORT_SYMBOL(filemap_get_folios);
@@ -2213,9 +2157,6 @@ unsigned filemap_get_folios_contig(struct address_space *mapping,
if (!folio_batch_add(fbatch, folio)) {
nr = folio_nr_pages(folio);
-
- if (folio_test_hugetlb(folio))
- nr = 1;
*start = folio->index + nr;
goto out;
}
@@ -2232,10 +2173,7 @@ update_start:
if (nr) {
folio = fbatch->folios[nr - 1];
- if (folio_test_hugetlb(folio))
- *start = folio->index + 1;
- else
- *start = folio_next_index(folio);
+ *start = folio->index + folio_nr_pages(folio);
}
out:
rcu_read_unlock();
@@ -2251,7 +2189,13 @@ EXPORT_SYMBOL(filemap_get_folios_contig);
* @tag: The tag index
* @fbatch: The batch to fill
*
- * Same as filemap_get_folios(), but only returning folios tagged with @tag.
+ * The first folio may start before @start; if it does, it will contain
+ * @start. The final folio may extend beyond @end; if it does, it will
+ * contain @end. The folios have ascending indices. There may be gaps
+ * between the folios if there are indices which have no folio in the
+ * page cache. If folios are added to or removed from the page cache
+ * while this is running, they may or may not be found by this call.
+ * Only returns folios that are tagged with @tag.
*
* Return: The number of folios found.
* Also update @start to index the next folio for traversal.
@@ -2273,9 +2217,6 @@ unsigned filemap_get_folios_tag(struct address_space *mapping, pgoff_t *start,
continue;
if (!folio_batch_add(fbatch, folio)) {
unsigned long nr = folio_nr_pages(folio);
-
- if (folio_test_hugetlb(folio))
- nr = 1;
*start = folio->index + nr;
goto out;
}
@@ -3113,7 +3054,7 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
/*
* NOTE! This will make us return with VM_FAULT_RETRY, but with
- * the mmap_lock still held. That's how FAULT_FLAG_RETRY_NOWAIT
+ * the fault lock still held. That's how FAULT_FLAG_RETRY_NOWAIT
* is supposed to work. We have way too many special cases..
*/
if (vmf->flags & FAULT_FLAG_RETRY_NOWAIT)
@@ -3123,13 +3064,14 @@ static int lock_folio_maybe_drop_mmap(struct vm_fault *vmf, struct folio *folio,
if (vmf->flags & FAULT_FLAG_KILLABLE) {
if (__folio_lock_killable(folio)) {
/*
- * We didn't have the right flags to drop the mmap_lock,
- * but all fault_handlers only check for fatal signals
- * if we return VM_FAULT_RETRY, so we need to drop the
- * mmap_lock here and return 0 if we don't have a fpin.
+ * We didn't have the right flags to drop the
+ * fault lock, but all fault_handlers only check
+ * for fatal signals if we return VM_FAULT_RETRY,
+ * so we need to drop the fault lock here and
+ * return 0 if we don't have a fpin.
*/
if (*fpin == NULL)
- mmap_read_unlock(vmf->vma->vm_mm);
+ release_fault_lock(vmf);
return 0;
}
} else
@@ -3330,21 +3272,28 @@ retry_find:
VM_BUG_ON_FOLIO(!folio_contains(folio, index), folio);
/*
- * We have a locked page in the page cache, now we need to check
- * that it's up-to-date. If not, it is going to be due to an error.
+ * We have a locked folio in the page cache, now we need to check
+ * that it's up-to-date. If not, it is going to be due to an error,
+ * or because readahead was otherwise unable to retrieve it.
*/
if (unlikely(!folio_test_uptodate(folio))) {
/*
- * The page was in cache and uptodate and now it is not.
- * Strange but possible since we didn't hold the page lock all
- * the time. Let's drop everything get the invalidate lock and
- * try again.
+ * If the invalidate lock is not held, the folio was in cache
+ * and uptodate and now it is not. Strange but possible since we
+ * didn't hold the page lock all the time. Let's drop
+ * everything, get the invalidate lock and try again.
*/
if (!mapping_locked) {
folio_unlock(folio);
folio_put(folio);
goto retry_find;
}
+
+ /*
+ * OK, the folio is really not uptodate. This can be because the
+ * VMA has the VM_RAND_READ flag set, or because an error
+ * arose. Let's read it in directly.
+ */
goto page_not_uptodate;
}
@@ -3503,7 +3452,7 @@ static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf,
* handled in the specific fault path, and it'll prohibit the
* fault-around logic.
*/
- if (!pte_none(vmf->pte[count]))
+ if (!pte_none(ptep_get(&vmf->pte[count])))
goto skip;
count++;
@@ -3600,7 +3549,7 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf,
addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT;
vmf->pte += xas.xa_index - last_pgoff;
last_pgoff = xas.xa_index;
- end = folio->index + folio_nr_pages(folio) - 1;
+ end = folio_next_index(folio) - 1;
nr_pages = min(end, end_pgoff) - xas.xa_index + 1;
if (!folio_test_large(folio))
@@ -3678,7 +3627,7 @@ int generic_file_mmap(struct file *file, struct vm_area_struct *vma)
*/
int generic_file_readonly_mmap(struct file *file, struct vm_area_struct *vma)
{
- if ((vma->vm_flags & VM_SHARED) && (vma->vm_flags & VM_MAYWRITE))
+ if (vma_is_shared_maywrite(vma))
return -EINVAL;
return generic_file_mmap(file, vma);
}