summaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/btrfs/block-group.c13
-rw-r--r--fs/btrfs/extent-tree.c3
-rw-r--r--fs/btrfs/free-space-cache.c4
-rw-r--r--fs/btrfs/inode.c16
-rw-r--r--fs/btrfs/space-info.c5
-rw-r--r--fs/btrfs/space-info.h1
-rw-r--r--fs/ceph/caps.c35
-rw-r--r--fs/ceph/super.h7
-rw-r--r--fs/ext4/inode.c76
-rw-r--r--fs/f2fs/segment.c4
-rw-r--r--fs/file.c1
11 files changed, 110 insertions, 55 deletions
diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c
index 60066822b5..350cd1201c 100644
--- a/fs/btrfs/block-group.c
+++ b/fs/btrfs/block-group.c
@@ -1216,8 +1216,8 @@ int btrfs_remove_block_group(struct btrfs_trans_handle *trans,
block_group->space_info->total_bytes -= block_group->length;
block_group->space_info->bytes_readonly -=
(block_group->length - block_group->zone_unusable);
- block_group->space_info->bytes_zone_unusable -=
- block_group->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(fs_info, block_group->space_info,
+ -block_group->zone_unusable);
block_group->space_info->disk_total -= block_group->length * factor;
spin_unlock(&block_group->space_info->lock);
@@ -1389,7 +1389,8 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
if (btrfs_is_zoned(cache->fs_info)) {
/* Migrate zone_unusable bytes to readonly */
sinfo->bytes_readonly += cache->zone_unusable;
- sinfo->bytes_zone_unusable -= cache->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
+ -cache->zone_unusable);
cache->zone_unusable = 0;
}
cache->ro++;
@@ -3034,9 +3035,11 @@ void btrfs_dec_block_group_ro(struct btrfs_block_group *cache)
if (btrfs_is_zoned(cache->fs_info)) {
/* Migrate zone_unusable bytes back */
cache->zone_unusable =
- (cache->alloc_offset - cache->used) +
+ (cache->alloc_offset - cache->used - cache->pinned -
+ cache->reserved) +
(cache->length - cache->zone_capacity);
- sinfo->bytes_zone_unusable += cache->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(cache->fs_info, sinfo,
+ cache->zone_unusable);
sinfo->bytes_readonly -= cache->zone_unusable;
}
num_bytes = cache->length - cache->reserved -
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 3774c191e3..b75e14f399 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2806,7 +2806,8 @@ static int unpin_extent_range(struct btrfs_fs_info *fs_info,
readonly = true;
} else if (btrfs_is_zoned(fs_info)) {
/* Need reset before reusing in a zoned block group */
- space_info->bytes_zone_unusable += len;
+ btrfs_space_info_update_bytes_zone_unusable(fs_info, space_info,
+ len);
readonly = true;
}
spin_unlock(&cache->lock);
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index dabc3d0793..d674f21065 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -2723,8 +2723,10 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
* If the block group is read-only, we should account freed space into
* bytes_readonly.
*/
- if (!block_group->ro)
+ if (!block_group->ro) {
block_group->zone_unusable += to_unusable;
+ WARN_ON(block_group->zone_unusable > block_group->length);
+ }
spin_unlock(&ctl->tree_lock);
if (!used) {
spin_lock(&block_group->lock);
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 3a2b902b2d..39d22693e4 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -737,8 +737,9 @@ out:
return ret;
}
-static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset,
- u64 end,
+static noinline int cow_file_range_inline(struct btrfs_inode *inode,
+ struct page *locked_page,
+ u64 offset, u64 end,
size_t compressed_size,
int compress_type,
struct folio *compressed_folio,
@@ -762,7 +763,10 @@ static noinline int cow_file_range_inline(struct btrfs_inode *inode, u64 offset,
return ret;
}
- extent_clear_unlock_delalloc(inode, offset, end, NULL, &cached,
+ if (ret == 0)
+ locked_page = NULL;
+
+ extent_clear_unlock_delalloc(inode, offset, end, locked_page, &cached,
clear_flags,
PAGE_UNLOCK | PAGE_START_WRITEBACK |
PAGE_END_WRITEBACK);
@@ -1037,10 +1041,10 @@ again:
* extent for the subpage case.
*/
if (total_in < actual_end)
- ret = cow_file_range_inline(inode, start, end, 0,
+ ret = cow_file_range_inline(inode, NULL, start, end, 0,
BTRFS_COMPRESS_NONE, NULL, false);
else
- ret = cow_file_range_inline(inode, start, end, total_compressed,
+ ret = cow_file_range_inline(inode, NULL, start, end, total_compressed,
compress_type, folios[0], false);
if (ret <= 0) {
if (ret < 0)
@@ -1359,7 +1363,7 @@ static noinline int cow_file_range(struct btrfs_inode *inode,
if (!no_inline) {
/* lets try to make an inline extent */
- ret = cow_file_range_inline(inode, start, end, 0,
+ ret = cow_file_range_inline(inode, locked_page, start, end, 0,
BTRFS_COMPRESS_NONE, NULL, false);
if (ret <= 0) {
/*
diff --git a/fs/btrfs/space-info.c b/fs/btrfs/space-info.c
index ae8c564425..8f194eefd3 100644
--- a/fs/btrfs/space-info.c
+++ b/fs/btrfs/space-info.c
@@ -311,7 +311,7 @@ void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
found->bytes_used += block_group->used;
found->disk_used += block_group->used * factor;
found->bytes_readonly += block_group->bytes_super;
- found->bytes_zone_unusable += block_group->zone_unusable;
+ btrfs_space_info_update_bytes_zone_unusable(info, found, block_group->zone_unusable);
if (block_group->length > 0)
found->full = 0;
btrfs_try_granting_tickets(info, found);
@@ -573,8 +573,7 @@ again:
spin_lock(&cache->lock);
avail = cache->length - cache->used - cache->pinned -
- cache->reserved - cache->delalloc_bytes -
- cache->bytes_super - cache->zone_unusable;
+ cache->reserved - cache->bytes_super - cache->zone_unusable;
btrfs_info(fs_info,
"block group %llu has %llu bytes, %llu used %llu pinned %llu reserved %llu delalloc %llu super %llu zone_unusable (%llu bytes available) %s",
cache->start, cache->length, cache->used, cache->pinned,
diff --git a/fs/btrfs/space-info.h b/fs/btrfs/space-info.h
index a733458fd1..3e304300fc 100644
--- a/fs/btrfs/space-info.h
+++ b/fs/btrfs/space-info.h
@@ -207,6 +207,7 @@ btrfs_space_info_update_##name(struct btrfs_fs_info *fs_info, \
DECLARE_SPACE_INFO_UPDATE(bytes_may_use, "space_info");
DECLARE_SPACE_INFO_UPDATE(bytes_pinned, "pinned");
+DECLARE_SPACE_INFO_UPDATE(bytes_zone_unusable, "zone_unusable");
int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
void btrfs_add_bg_to_space_info(struct btrfs_fs_info *info,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index c4941ba245..68df96800e 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2016,6 +2016,8 @@ bool __ceph_should_report_size(struct ceph_inode_info *ci)
* CHECK_CAPS_AUTHONLY - we should only check the auth cap
* CHECK_CAPS_FLUSH - we should flush any dirty caps immediately, without
* further delay.
+ * CHECK_CAPS_FLUSH_FORCE - we should flush any caps immediately, without
+ * further delay.
*/
void ceph_check_caps(struct ceph_inode_info *ci, int flags)
{
@@ -2097,7 +2099,7 @@ retry:
}
doutc(cl, "%p %llx.%llx file_want %s used %s dirty %s "
- "flushing %s issued %s revoking %s retain %s %s%s%s\n",
+ "flushing %s issued %s revoking %s retain %s %s%s%s%s\n",
inode, ceph_vinop(inode), ceph_cap_string(file_wanted),
ceph_cap_string(used), ceph_cap_string(ci->i_dirty_caps),
ceph_cap_string(ci->i_flushing_caps),
@@ -2105,7 +2107,8 @@ retry:
ceph_cap_string(retain),
(flags & CHECK_CAPS_AUTHONLY) ? " AUTHONLY" : "",
(flags & CHECK_CAPS_FLUSH) ? " FLUSH" : "",
- (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "");
+ (flags & CHECK_CAPS_NOINVAL) ? " NOINVAL" : "",
+ (flags & CHECK_CAPS_FLUSH_FORCE) ? " FLUSH_FORCE" : "");
/*
* If we no longer need to hold onto old our caps, and we may
@@ -2180,6 +2183,11 @@ retry:
queue_writeback = true;
}
+ if (flags & CHECK_CAPS_FLUSH_FORCE) {
+ doutc(cl, "force to flush caps\n");
+ goto ack;
+ }
+
if (cap == ci->i_auth_cap &&
(cap->issued & CEPH_CAP_FILE_WR)) {
/* request larger max_size from MDS? */
@@ -3504,6 +3512,8 @@ static void handle_cap_grant(struct inode *inode,
bool queue_invalidate = false;
bool deleted_inode = false;
bool fill_inline = false;
+ bool revoke_wait = false;
+ int flags = 0;
/*
* If there is at least one crypto block then we'll trust
@@ -3699,16 +3709,18 @@ static void handle_cap_grant(struct inode *inode,
ceph_cap_string(cap->issued), ceph_cap_string(newcaps),
ceph_cap_string(revoking));
if (S_ISREG(inode->i_mode) &&
- (revoking & used & CEPH_CAP_FILE_BUFFER))
+ (revoking & used & CEPH_CAP_FILE_BUFFER)) {
writeback = true; /* initiate writeback; will delay ack */
- else if (queue_invalidate &&
+ revoke_wait = true;
+ } else if (queue_invalidate &&
revoking == CEPH_CAP_FILE_CACHE &&
- (newcaps & CEPH_CAP_FILE_LAZYIO) == 0)
- ; /* do nothing yet, invalidation will be queued */
- else if (cap == ci->i_auth_cap)
+ (newcaps & CEPH_CAP_FILE_LAZYIO) == 0) {
+ revoke_wait = true; /* do nothing yet, invalidation will be queued */
+ } else if (cap == ci->i_auth_cap) {
check_caps = 1; /* check auth cap only */
- else
+ } else {
check_caps = 2; /* check all caps */
+ }
/* If there is new caps, try to wake up the waiters */
if (~cap->issued & newcaps)
wake = true;
@@ -3735,8 +3747,9 @@ static void handle_cap_grant(struct inode *inode,
BUG_ON(cap->issued & ~cap->implemented);
/* don't let check_caps skip sending a response to MDS for revoke msgs */
- if (le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
+ if (!revoke_wait && le32_to_cpu(grant->op) == CEPH_CAP_OP_REVOKE) {
cap->mds_wanted = 0;
+ flags |= CHECK_CAPS_FLUSH_FORCE;
if (cap == ci->i_auth_cap)
check_caps = 1; /* check auth cap only */
else
@@ -3792,9 +3805,9 @@ static void handle_cap_grant(struct inode *inode,
mutex_unlock(&session->s_mutex);
if (check_caps == 1)
- ceph_check_caps(ci, CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
+ ceph_check_caps(ci, flags | CHECK_CAPS_AUTHONLY | CHECK_CAPS_NOINVAL);
else if (check_caps == 2)
- ceph_check_caps(ci, CHECK_CAPS_NOINVAL);
+ ceph_check_caps(ci, flags | CHECK_CAPS_NOINVAL);
}
/*
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b63b4cd9b5..6e817bf133 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -200,9 +200,10 @@ struct ceph_cap {
struct list_head caps_item;
};
-#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */
-#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */
-#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */
+#define CHECK_CAPS_AUTHONLY 1 /* only check auth cap */
+#define CHECK_CAPS_FLUSH 2 /* flush any dirty caps */
+#define CHECK_CAPS_NOINVAL 4 /* don't invalidate pagecache */
+#define CHECK_CAPS_FLUSH_FORCE 8 /* force flush any caps */
struct ceph_cap_flush {
u64 tid;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 4bae9ccf5f..4b0d64a76e 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -453,6 +453,35 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
}
#endif /* ES_AGGRESSIVE_TEST */
+static int ext4_map_query_blocks(handle_t *handle, struct inode *inode,
+ struct ext4_map_blocks *map)
+{
+ unsigned int status;
+ int retval;
+
+ if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
+ retval = ext4_ext_map_blocks(handle, inode, map, 0);
+ else
+ retval = ext4_ind_map_blocks(handle, inode, map, 0);
+
+ if (retval <= 0)
+ return retval;
+
+ if (unlikely(retval != map->m_len)) {
+ ext4_warning(inode->i_sb,
+ "ES len assertion failed for inode "
+ "%lu: retval %d != map->m_len %d",
+ inode->i_ino, retval, map->m_len);
+ WARN_ON(1);
+ }
+
+ status = map->m_flags & EXT4_MAP_UNWRITTEN ?
+ EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
+ ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
+ map->m_pblk, status);
+ return retval;
+}
+
/*
* The ext4_map_blocks() function tries to look up the requested blocks,
* and returns if the blocks are already mapped.
@@ -1708,6 +1737,7 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
if (ext4_es_is_hole(&es))
goto add_delayed;
+found:
/*
* Delayed extent could be allocated by fallocate.
* So we need to check it.
@@ -1744,36 +1774,34 @@ static int ext4_da_map_blocks(struct inode *inode, sector_t iblock,
down_read(&EXT4_I(inode)->i_data_sem);
if (ext4_has_inline_data(inode))
retval = 0;
- else if (ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))
- retval = ext4_ext_map_blocks(NULL, inode, map, 0);
else
- retval = ext4_ind_map_blocks(NULL, inode, map, 0);
- if (retval < 0) {
- up_read(&EXT4_I(inode)->i_data_sem);
+ retval = ext4_map_query_blocks(NULL, inode, map);
+ up_read(&EXT4_I(inode)->i_data_sem);
+ if (retval)
return retval;
- }
- if (retval > 0) {
- unsigned int status;
- if (unlikely(retval != map->m_len)) {
- ext4_warning(inode->i_sb,
- "ES len assertion failed for inode "
- "%lu: retval %d != map->m_len %d",
- inode->i_ino, retval, map->m_len);
- WARN_ON(1);
+add_delayed:
+ down_write(&EXT4_I(inode)->i_data_sem);
+ /*
+ * Page fault path (ext4_page_mkwrite does not take i_rwsem)
+ * and fallocate path (no folio lock) can race. Make sure we
+ * lookup the extent status tree here again while i_data_sem
+ * is held in write mode, before inserting a new da entry in
+ * the extent status tree.
+ */
+ if (ext4_es_lookup_extent(inode, iblock, NULL, &es)) {
+ if (!ext4_es_is_hole(&es)) {
+ up_write(&EXT4_I(inode)->i_data_sem);
+ goto found;
+ }
+ } else if (!ext4_has_inline_data(inode)) {
+ retval = ext4_map_query_blocks(NULL, inode, map);
+ if (retval) {
+ up_write(&EXT4_I(inode)->i_data_sem);
+ return retval;
}
-
- status = map->m_flags & EXT4_MAP_UNWRITTEN ?
- EXTENT_STATUS_UNWRITTEN : EXTENT_STATUS_WRITTEN;
- ext4_es_insert_extent(inode, map->m_lblk, map->m_len,
- map->m_pblk, status);
- up_read(&EXT4_I(inode)->i_data_sem);
- return retval;
}
- up_read(&EXT4_I(inode)->i_data_sem);
-add_delayed:
- down_write(&EXT4_I(inode)->i_data_sem);
retval = ext4_insert_delayed_block(inode, map->m_lblk);
up_write(&EXT4_I(inode)->i_data_sem);
if (retval)
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 259e235bec..6018257852 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -3483,7 +3483,9 @@ static int __get_segment_type_6(struct f2fs_io_info *fio)
if (page_private_gcing(fio->page)) {
if (fio->sbi->am.atgc_enabled &&
(fio->io_type == FS_DATA_IO) &&
- (fio->sbi->gc_mode != GC_URGENT_HIGH))
+ (fio->sbi->gc_mode != GC_URGENT_HIGH) &&
+ __is_valid_data_blkaddr(fio->old_blkaddr) &&
+ !is_inode_flag_set(inode, FI_OPU_WRITE))
return CURSEG_ALL_DATA_ATGC;
else
return CURSEG_COLD_DATA;
diff --git a/fs/file.c b/fs/file.c
index a3b72aa64f..a11e59b5d6 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -1248,6 +1248,7 @@ __releases(&files->file_lock)
* tables and this condition does not arise without those.
*/
fdt = files_fdtable(files);
+ fd = array_index_nospec(fd, fdt->max_fds);
tofree = fdt->fd[fd];
if (!tofree && fd_is_open(fd, fdt))
goto Ebusy;