diff options
Diffstat (limited to '')
-rw-r--r-- | fs/btrfs/extent_io.c | 75 |
1 files changed, 58 insertions, 17 deletions
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index eade0432bd..87082f9732 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2734,16 +2734,34 @@ static int fiemap_process_hole(struct btrfs_inode *inode, * it beyond i_size. */ while (cur_offset < end && cur_offset < i_size) { + struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; u64 prealloc_start; + u64 lockstart; + u64 lockend; u64 prealloc_len = 0; bool delalloc; + lockstart = round_down(cur_offset, inode->root->fs_info->sectorsize); + lockend = round_up(end, inode->root->fs_info->sectorsize); + + /* + * We are only locking for the delalloc range because that's the + * only thing that can change here. With fiemap we have a lock + * on the inode, so no buffered or direct writes can happen. + * + * However mmaps and normal page writeback will cause this to + * change arbitrarily. We have to lock the extent lock here to + * make sure that nobody messes with the tree while we're doing + * btrfs_find_delalloc_in_range. + */ + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, cur_offset, end, delalloc_cached_state, &delalloc_start, &delalloc_end); + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) break; @@ -2911,15 +2929,15 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len) { const u64 ino = btrfs_ino(inode); - struct extent_state *cached_state = NULL; struct extent_state *delalloc_cached_state = NULL; struct btrfs_path *path; struct fiemap_cache cache = { 0 }; struct btrfs_backref_share_check_ctx *backref_ctx; u64 last_extent_end; u64 prev_extent_end; - u64 lockstart; - u64 lockend; + u64 range_start; + u64 range_end; + const u64 sectorsize = inode->root->fs_info->sectorsize; bool stopped = false; int ret; @@ -2930,12 +2948,11 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto out; } - lockstart = round_down(start, inode->root->fs_info->sectorsize); - lockend = round_up(start + len, inode->root->fs_info->sectorsize); - prev_extent_end = lockstart; + range_start = round_down(start, sectorsize); + range_end = round_up(start + len, sectorsize); + prev_extent_end = range_start; btrfs_inode_lock(inode, BTRFS_ILOCK_SHARED); - lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); ret = fiemap_find_last_extent_offset(inode, path, &last_extent_end); if (ret < 0) @@ -2943,7 +2960,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, btrfs_release_path(path); path->reada = READA_FORWARD; - ret = fiemap_search_slot(inode, path, lockstart); + ret = fiemap_search_slot(inode, path, range_start); if (ret < 0) { goto out_unlock; } else if (ret > 0) { @@ -2955,7 +2972,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, goto check_eof_delalloc; } - while (prev_extent_end < lockend) { + while (prev_extent_end < range_end) { struct extent_buffer *leaf = path->nodes[0]; struct btrfs_file_extent_item *ei; struct btrfs_key key; @@ -2978,19 +2995,19 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, * The first iteration can leave us at an extent item that ends * before our range's start. Move to the next item. */ - if (extent_end <= lockstart) + if (extent_end <= range_start) goto next_item; backref_ctx->curr_leaf_bytenr = leaf->start; /* We have in implicit hole (NO_HOLES feature enabled). */ if (prev_extent_end < key.offset) { - const u64 range_end = min(key.offset, lockend) - 1; + const u64 hole_end = min(key.offset, range_end) - 1; ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, 0, 0, 0, - prev_extent_end, range_end); + prev_extent_end, hole_end); if (ret < 0) { goto out_unlock; } else if (ret > 0) { @@ -3000,7 +3017,7 @@ int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, } /* We've reached the end of the fiemap range, stop. */ - if (key.offset >= lockend) { + if (key.offset >= range_end) { stopped = true; break; } @@ -3094,29 +3111,41 @@ check_eof_delalloc: btrfs_free_path(path); path = NULL; - if (!stopped && prev_extent_end < lockend) { + if (!stopped && prev_extent_end < range_end) { ret = fiemap_process_hole(inode, fieinfo, &cache, &delalloc_cached_state, backref_ctx, - 0, 0, 0, prev_extent_end, lockend - 1); + 0, 0, 0, prev_extent_end, range_end - 1); if (ret < 0) goto out_unlock; - prev_extent_end = lockend; + prev_extent_end = range_end; } if (cache.cached && cache.offset + cache.len >= last_extent_end) { const u64 i_size = i_size_read(&inode->vfs_inode); if (prev_extent_end < i_size) { + struct extent_state *cached_state = NULL; u64 delalloc_start; u64 delalloc_end; + u64 lockstart; + u64 lockend; bool delalloc; + lockstart = round_down(prev_extent_end, sectorsize); + lockend = round_up(i_size, sectorsize); + + /* + * See the comment in fiemap_process_hole as to why + * we're doing the locking here. + */ + lock_extent(&inode->io_tree, lockstart, lockend, &cached_state); delalloc = btrfs_find_delalloc_in_range(inode, prev_extent_end, i_size - 1, &delalloc_cached_state, &delalloc_start, &delalloc_end); + unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); if (!delalloc) cache.flags |= FIEMAP_EXTENT_LAST; } else { @@ -3127,7 +3156,6 @@ check_eof_delalloc: ret = emit_last_fiemap_cache(fieinfo, &cache); out_unlock: - unlock_extent(&inode->io_tree, lockstart, lockend, &cached_state); btrfs_inode_unlock(inode, BTRFS_ILOCK_SHARED); out: free_extent_state(delalloc_cached_state); @@ -4024,6 +4052,19 @@ int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num, if (test_and_set_bit(EXTENT_BUFFER_READING, &eb->bflags)) goto done; + /* + * Between the initial test_bit(EXTENT_BUFFER_UPTODATE) and the above + * test_and_set_bit(EXTENT_BUFFER_READING), someone else could have + * started and finished reading the same eb. In this case, UPTODATE + * will now be set, and we shouldn't read it in again. + */ + if (unlikely(test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags))) { + clear_bit(EXTENT_BUFFER_READING, &eb->bflags); + smp_mb__after_atomic(); + wake_up_bit(&eb->bflags, EXTENT_BUFFER_READING); + return 0; + } + clear_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags); eb->read_mirror = 0; check_buffer_tree_ref(eb); |