From e616d72f8b12df275e5afd05b0f5ed251f6d56a1 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 1 Jul 2024 19:14:06 +0200 Subject: Merging upstream version 6.9.7. Signed-off-by: Daniel Baumann --- fs/9p/vfs_dentry.c | 9 +- fs/afs/mntpt.c | 5 + fs/btrfs/bio.c | 4 +- fs/btrfs/block-group.c | 11 ++- fs/btrfs/disk-io.c | 10 +- fs/btrfs/extent_io.c | 70 ++++++++------ fs/btrfs/qgroup.c | 34 +++++-- fs/btrfs/super.c | 8 ++ fs/btrfs/tree-log.c | 17 ++-- fs/btrfs/zoned.c | 13 ++- fs/cachefiles/daemon.c | 3 +- fs/cachefiles/internal.h | 5 + fs/cachefiles/ondemand.c | 165 +++++++++++++++++++++++--------- fs/dlm/ast.c | 14 +++ fs/dlm/dlm_internal.h | 1 + fs/dlm/user.c | 15 +-- fs/ecryptfs/keystore.c | 4 +- fs/erofs/decompressor_deflate.c | 55 +++++------ fs/exec.c | 11 +++ fs/ext4/inode.c | 5 +- fs/ext4/mballoc.c | 5 + fs/ext4/mballoc.h | 2 +- fs/ext4/namei.c | 2 +- fs/ext4/super.c | 22 ++--- fs/ext4/sysfs.c | 24 +++-- fs/ext4/xattr.c | 117 +++++++++++------------ fs/f2fs/checkpoint.c | 9 +- fs/f2fs/data.c | 19 ++-- fs/f2fs/f2fs.h | 15 +-- fs/f2fs/file.c | 92 +++++++++++------- fs/f2fs/gc.c | 9 +- fs/f2fs/inode.c | 6 ++ fs/f2fs/node.c | 14 ++- fs/f2fs/segment.c | 2 + fs/f2fs/super.c | 12 ++- fs/fs-writeback.c | 7 +- fs/fuse/dev.c | 3 +- fs/gfs2/glock.c | 91 +++++++++++++----- fs/gfs2/glock.h | 1 + fs/gfs2/glops.c | 3 + fs/gfs2/incore.h | 1 + fs/gfs2/lock_dlm.c | 32 +++++-- fs/gfs2/ops_fstype.c | 1 + fs/gfs2/super.c | 3 - fs/gfs2/util.c | 1 - fs/iomap/buffered-io.c | 2 +- fs/jffs2/xattr.c | 3 + fs/jfs/xattr.c | 4 +- fs/libfs.c | 55 +++++++++-- fs/netfs/buffered_write.c | 2 +- fs/nfs/dir.c | 47 +++++++--- fs/nfs/filelayout/filelayout.c | 4 +- fs/nfs/fs_context.c | 9 +- fs/nfs/internal.h | 4 +- fs/nfs/nfs4proc.c | 25 ++++- fs/nfs/nfs4state.c | 12 +-- fs/nfsd/nfsctl.c | 4 +- fs/nfsd/nfsfh.c | 4 +- fs/nilfs2/dir.c | 2 +- fs/nilfs2/ioctl.c | 2 +- fs/nilfs2/segment.c | 66 ++++++++++--- fs/ntfs3/dir.c | 1 + fs/ntfs3/fslog.c | 3 +- fs/ntfs3/index.c | 6 ++ fs/ntfs3/inode.c | 24 +++-- fs/ntfs3/ntfs.h | 2 +- fs/ntfs3/record.c | 11 +-- fs/ntfs3/super.c | 2 - fs/ocfs2/file.c | 2 + fs/ocfs2/journal.c | 192 +++++++++++++++++++++----------------- fs/ocfs2/localalloc.c | 19 ++-- fs/ocfs2/namei.c | 4 +- fs/ocfs2/ocfs2.h | 27 ++++++ fs/ocfs2/reservations.c | 2 +- fs/ocfs2/suballoc.c | 6 +- fs/ocfs2/super.c | 4 +- fs/openpromfs/inode.c | 8 +- fs/overlayfs/dir.c | 3 - fs/overlayfs/export.c | 6 +- fs/proc/base.c | 2 +- fs/proc/fd.c | 42 ++++----- fs/proc/task_mmu.c | 9 +- fs/proc/vmcore.c | 2 + fs/smb/client/cifsfs.c | 14 ++- fs/smb/client/cifspdu.h | 2 +- fs/smb/client/inode.c | 4 + fs/smb/client/smb2ops.c | 4 + fs/smb/client/smb2transport.c | 2 +- fs/smb/server/mgmt/share_config.c | 6 +- fs/smb/server/oplock.c | 21 +++-- fs/smb/server/smb2pdu.c | 22 ++--- fs/smb/server/vfs.c | 17 ++-- fs/smb/server/vfs.h | 3 +- fs/smb/server/vfs_cache.c | 3 +- fs/tracefs/event_inode.c | 64 +++++++++---- fs/tracefs/inode.c | 33 +++---- fs/udf/inode.c | 27 +++--- fs/udf/udftime.c | 11 ++- fs/verity/init.c | 7 +- 99 files changed, 1164 insertions(+), 650 deletions(-) (limited to 'fs') diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index f16f735816..01338d4c2d 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -48,12 +48,17 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry) static void v9fs_dentry_release(struct dentry *dentry) { struct hlist_node *p, *n; + struct hlist_head head; p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n", dentry, dentry); - hlist_for_each_safe(p, n, (struct hlist_head *)&dentry->d_fsdata) + + spin_lock(&dentry->d_lock); + hlist_move_list((struct hlist_head *)&dentry->d_fsdata, &head); + spin_unlock(&dentry->d_lock); + + hlist_for_each_safe(p, n, &head) p9_fid_put(hlist_entry(p, struct p9_fid, dlist)); - dentry->d_fsdata = NULL; } static int v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c index 97f50e9fd9..297487ee83 100644 --- a/fs/afs/mntpt.c +++ b/fs/afs/mntpt.c @@ -140,6 +140,11 @@ static int afs_mntpt_set_params(struct fs_context *fc, struct dentry *mntpt) put_page(page); if (ret < 0) return ret; + + /* Don't cross a backup volume mountpoint from a backup volume */ + if (src_as->volume && src_as->volume->type == AFSVL_BACKVOL && + ctx->type == AFSVL_BACKVOL) + return -ENODEV; } return 0; diff --git a/fs/btrfs/bio.c b/fs/btrfs/bio.c index 477f350a8b..e3a57196b0 100644 --- a/fs/btrfs/bio.c +++ b/fs/btrfs/bio.c @@ -741,7 +741,9 @@ static bool btrfs_submit_chunk(struct btrfs_bio *bbio, int mirror_num) ret = btrfs_bio_csum(bbio); if (ret) goto fail_put_bio; - } else if (use_append) { + } else if (use_append || + (btrfs_is_zoned(fs_info) && inode && + inode->flags & BTRFS_INODE_NODATASUM)) { ret = btrfs_alloc_dummy_sum(bbio); if (ret) goto fail_put_bio; diff --git a/fs/btrfs/block-group.c b/fs/btrfs/block-group.c index 1e09aeea69..1a66be33bb 100644 --- a/fs/btrfs/block-group.c +++ b/fs/btrfs/block-group.c @@ -1785,6 +1785,7 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) container_of(work, struct btrfs_fs_info, reclaim_bgs_work); struct btrfs_block_group *bg; struct btrfs_space_info *space_info; + LIST_HEAD(retry_list); if (!test_bit(BTRFS_FS_OPEN, &fs_info->flags)) return; @@ -1921,8 +1922,11 @@ void btrfs_reclaim_bgs_work(struct work_struct *work) } next: - if (ret) - btrfs_mark_bg_to_reclaim(bg); + if (ret) { + /* Refcount held by the reclaim_bgs list after splice. */ + btrfs_get_block_group(bg); + list_add_tail(&bg->bg_list, &retry_list); + } btrfs_put_block_group(bg); mutex_unlock(&fs_info->reclaim_bgs_lock); @@ -1942,6 +1946,9 @@ next: spin_unlock(&fs_info->unused_bgs_lock); mutex_unlock(&fs_info->reclaim_bgs_lock); end: + spin_lock(&fs_info->unused_bgs_lock); + list_splice_tail(&retry_list, &fs_info->reclaim_bgs); + spin_unlock(&fs_info->unused_bgs_lock); btrfs_exclop_finish(fs_info); sb_end_write(fs_info->sb); } diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 3df5477d48..3a47eec87b 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -4544,18 +4544,10 @@ static void btrfs_destroy_delayed_refs(struct btrfs_transaction *trans, struct btrfs_fs_info *fs_info) { struct rb_node *node; - struct btrfs_delayed_ref_root *delayed_refs; + struct btrfs_delayed_ref_root *delayed_refs = &trans->delayed_refs; struct btrfs_delayed_ref_node *ref; - delayed_refs = &trans->delayed_refs; - spin_lock(&delayed_refs->lock); - if (atomic_read(&delayed_refs->num_entries) == 0) { - spin_unlock(&delayed_refs->lock); - btrfs_debug(fs_info, "delayed_refs has NO entry"); - return; - } - while ((node = rb_first_cached(&delayed_refs->href_root)) != NULL) { struct btrfs_delayed_ref_head *head; struct rb_node *n; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 2776112dbd..41173701f1 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -2773,13 +2773,19 @@ static int fiemap_next_leaf_item(struct btrfs_inode *inode, struct btrfs_path *p goto out; } - /* See the comment at fiemap_search_slot() about why we clone. */ - copy_extent_buffer_full(clone, path->nodes[0]); /* * Important to preserve the start field, for the optimizations when * checking if extents are shared (see extent_fiemap()). + * + * We must set ->start before calling copy_extent_buffer_full(). If we + * are on sub-pagesize blocksize, we use ->start to determine the offset + * into the folio where our eb exists, and if we update ->start after + * the fact then any subsequent reads of the eb may read from a + * different offset in the folio than where we originally copied into. */ clone->start = path->nodes[0]->start; + /* See the comment at fiemap_search_slot() about why we clone. */ + copy_extent_buffer_full(clone, path->nodes[0]); slot = path->slots[0]; btrfs_release_path(path); @@ -3656,6 +3662,8 @@ static struct extent_buffer *grab_extent_buffer( struct folio *folio = page_folio(page); struct extent_buffer *exists; + lockdep_assert_held(&page->mapping->i_private_lock); + /* * For subpage case, we completely rely on radix tree to ensure we * don't try to insert two ebs for the same bytenr. So here we always @@ -3723,13 +3731,14 @@ static int check_eb_alignment(struct btrfs_fs_info *fs_info, u64 start) * The caller needs to free the existing folios and retry using the same order. */ static int attach_eb_folio_to_filemap(struct extent_buffer *eb, int i, + struct btrfs_subpage *prealloc, struct extent_buffer **found_eb_ret) { struct btrfs_fs_info *fs_info = eb->fs_info; struct address_space *mapping = fs_info->btree_inode->i_mapping; const unsigned long index = eb->start >> PAGE_SHIFT; - struct folio *existing_folio; + struct folio *existing_folio = NULL; int ret; ASSERT(found_eb_ret); @@ -3741,12 +3750,14 @@ retry: ret = filemap_add_folio(mapping, eb->folios[i], index + i, GFP_NOFS | __GFP_NOFAIL); if (!ret) - return 0; + goto finish; existing_folio = filemap_lock_folio(mapping, index + i); /* The page cache only exists for a very short time, just retry. */ - if (IS_ERR(existing_folio)) + if (IS_ERR(existing_folio)) { + existing_folio = NULL; goto retry; + } /* For now, we should only have single-page folios for btree inode. */ ASSERT(folio_nr_pages(existing_folio) == 1); @@ -3757,14 +3768,13 @@ retry: return -EAGAIN; } - if (fs_info->nodesize < PAGE_SIZE) { - /* - * We're going to reuse the existing page, can drop our page - * and subpage structure now. - */ +finish: + spin_lock(&mapping->i_private_lock); + if (existing_folio && fs_info->nodesize < PAGE_SIZE) { + /* We're going to reuse the existing page, can drop our folio now. */ __free_page(folio_page(eb->folios[i], 0)); eb->folios[i] = existing_folio; - } else { + } else if (existing_folio) { struct extent_buffer *existing_eb; existing_eb = grab_extent_buffer(fs_info, @@ -3772,6 +3782,7 @@ retry: if (existing_eb) { /* The extent buffer still exists, we can use it directly. */ *found_eb_ret = existing_eb; + spin_unlock(&mapping->i_private_lock); folio_unlock(existing_folio); folio_put(existing_folio); return 1; @@ -3780,6 +3791,22 @@ retry: __free_page(folio_page(eb->folios[i], 0)); eb->folios[i] = existing_folio; } + eb->folio_size = folio_size(eb->folios[i]); + eb->folio_shift = folio_shift(eb->folios[i]); + /* Should not fail, as we have preallocated the memory. */ + ret = attach_extent_buffer_folio(eb, eb->folios[i], prealloc); + ASSERT(!ret); + /* + * To inform we have an extra eb under allocation, so that + * detach_extent_buffer_page() won't release the folio private when the + * eb hasn't been inserted into radix tree yet. + * + * The ref will be decreased when the eb releases the page, in + * detach_extent_buffer_page(). Thus needs no special handling in the + * error path. + */ + btrfs_folio_inc_eb_refs(fs_info, eb->folios[i]); + spin_unlock(&mapping->i_private_lock); return 0; } @@ -3791,7 +3818,6 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info, int attached = 0; struct extent_buffer *eb; struct extent_buffer *existing_eb = NULL; - struct address_space *mapping = fs_info->btree_inode->i_mapping; struct btrfs_subpage *prealloc = NULL; u64 lockdep_owner = owner_root; bool page_contig = true; @@ -3857,7 +3883,7 @@ reallocate: for (int i = 0; i < num_folios; i++) { struct folio *folio; - ret = attach_eb_folio_to_filemap(eb, i, &existing_eb); + ret = attach_eb_folio_to_filemap(eb, i, prealloc, &existing_eb); if (ret > 0) { ASSERT(existing_eb); goto out; @@ -3894,24 +3920,6 @@ reallocate: * and free the allocated page. */ folio = eb->folios[i]; - eb->folio_size = folio_size(folio); - eb->folio_shift = folio_shift(folio); - spin_lock(&mapping->i_private_lock); - /* Should not fail, as we have preallocated the memory */ - ret = attach_extent_buffer_folio(eb, folio, prealloc); - ASSERT(!ret); - /* - * To inform we have extra eb under allocation, so that - * detach_extent_buffer_page() won't release the folio private - * when the eb hasn't yet been inserted into radix tree. - * - * The ref will be decreased when the eb released the page, in - * detach_extent_buffer_page(). - * Thus needs no special handling in error path. - */ - btrfs_folio_inc_eb_refs(fs_info, folio); - spin_unlock(&mapping->i_private_lock); - WARN_ON(btrfs_folio_test_dirty(fs_info, folio, eb->start, eb->len)); /* diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 40e5f7f2fc..1167899a16 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -468,6 +468,7 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) } if (!qgroup) { struct btrfs_qgroup *prealloc; + struct btrfs_root *tree_root = fs_info->tree_root; prealloc = kzalloc(sizeof(*prealloc), GFP_KERNEL); if (!prealloc) { @@ -475,6 +476,25 @@ int btrfs_read_qgroup_config(struct btrfs_fs_info *fs_info) goto out; } qgroup = add_qgroup_rb(fs_info, prealloc, found_key.offset); + /* + * If a qgroup exists for a subvolume ID, it is possible + * that subvolume has been deleted, in which case + * re-using that ID would lead to incorrect accounting. + * + * Ensure that we skip any such subvol ids. + * + * We don't need to lock because this is only called + * during mount before we start doing things like creating + * subvolumes. + */ + if (is_fstree(qgroup->qgroupid) && + qgroup->qgroupid > tree_root->free_objectid) + /* + * Don't need to check against BTRFS_LAST_FREE_OBJECTID, + * as it will get checked on the next call to + * btrfs_get_free_objectid. + */ + tree_root->free_objectid = qgroup->qgroupid + 1; } ret = btrfs_sysfs_add_one_qgroup(fs_info, qgroup); if (ret < 0) @@ -3129,7 +3149,7 @@ static int qgroup_auto_inherit(struct btrfs_fs_info *fs_info, qgids = res->qgroups; list_for_each_entry(qg_list, &inode_qg->groups, next_group) - qgids[i] = qg_list->group->qgroupid; + qgids[i++] = qg_list->group->qgroupid; *inherit = res; return 0; @@ -3826,14 +3846,14 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, /* we're resuming qgroup rescan at mount time */ if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN)) { - btrfs_warn(fs_info, + btrfs_debug(fs_info, "qgroup rescan init failed, qgroup rescan is not queued"); ret = -EINVAL; } else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { - btrfs_warn(fs_info, + btrfs_debug(fs_info, "qgroup rescan init failed, qgroup is not enabled"); - ret = -EINVAL; + ret = -ENOTCONN; } if (ret) @@ -3844,14 +3864,12 @@ qgroup_rescan_init(struct btrfs_fs_info *fs_info, u64 progress_objectid, if (init_flags) { if (fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_RESCAN) { - btrfs_warn(fs_info, - "qgroup rescan is already in progress"); ret = -EINPROGRESS; } else if (!(fs_info->qgroup_flags & BTRFS_QGROUP_STATUS_FLAG_ON)) { - btrfs_warn(fs_info, + btrfs_debug(fs_info, "qgroup rescan init failed, qgroup is not enabled"); - ret = -EINVAL; + ret = -ENOTCONN; } else if (btrfs_qgroup_mode(fs_info) == BTRFS_QGROUP_MODE_DISABLED) { /* Quota disable is in progress */ ret = -EBUSY; diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 7e44ccaf34..fa6964de34 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -119,6 +119,7 @@ enum { Opt_thread_pool, Opt_treelog, Opt_user_subvol_rm_allowed, + Opt_norecovery, /* Rescue options */ Opt_rescue, @@ -245,6 +246,8 @@ static const struct fs_parameter_spec btrfs_fs_parameters[] = { __fsparam(NULL, "nologreplay", Opt_nologreplay, fs_param_deprecated, NULL), /* Deprecated, with alias rescue=usebackuproot */ __fsparam(NULL, "usebackuproot", Opt_usebackuproot, fs_param_deprecated, NULL), + /* For compatibility only, alias for "rescue=nologreplay". */ + fsparam_flag("norecovery", Opt_norecovery), /* Debugging options. */ fsparam_flag_no("enospc_debug", Opt_enospc_debug), @@ -438,6 +441,11 @@ static int btrfs_parse_param(struct fs_context *fc, struct fs_parameter *param) "'nologreplay' is deprecated, use 'rescue=nologreplay' instead"); btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); break; + case Opt_norecovery: + btrfs_info(NULL, +"'norecovery' is for compatibility only, recommended to use 'rescue=nologreplay'"); + btrfs_set_opt(ctx->mount_opt, NOLOGREPLAY); + break; case Opt_flushoncommit: if (result.negated) btrfs_clear_opt(ctx->mount_opt, FLUSHONCOMMIT); diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index 472918a5bc..d4fc5fedd8 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -4856,18 +4856,23 @@ static int btrfs_log_prealloc_extents(struct btrfs_trans_handle *trans, path->slots[0]++; continue; } - if (!dropped_extents) { - /* - * Avoid logging extent items logged in past fsync calls - * and leading to duplicate keys in the log tree. - */ + /* + * Avoid overlapping items in the log tree. The first time we + * get here, get rid of everything from a past fsync. After + * that, if the current extent starts before the end of the last + * extent we copied, truncate the last one. This can happen if + * an ordered extent completion modifies the subvolume tree + * while btrfs_next_leaf() has the tree unlocked. + */ + if (!dropped_extents || key.offset < truncate_offset) { ret = truncate_inode_items(trans, root->log_root, inode, - truncate_offset, + min(key.offset, truncate_offset), BTRFS_EXTENT_DATA_KEY); if (ret) goto out; dropped_extents = true; } + truncate_offset = btrfs_file_extent_end(path); if (ins_nr == 0) start_slot = slot; ins_nr++; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 4cba80b343..459514da16 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -1290,7 +1290,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, struct btrfs_chunk_map *map) { struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace; - struct btrfs_device *device = map->stripes[zone_idx].dev; + struct btrfs_device *device; int dev_replace_is_ongoing = 0; unsigned int nofs_flag; struct blk_zone zone; @@ -1298,7 +1298,11 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, info->physical = map->stripes[zone_idx].physical; + down_read(&dev_replace->rwsem); + device = map->stripes[zone_idx].dev; + if (!device->bdev) { + up_read(&dev_replace->rwsem); info->alloc_offset = WP_MISSING_DEV; return 0; } @@ -1308,6 +1312,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, __set_bit(zone_idx, active); if (!btrfs_dev_is_sequential(device, info->physical)) { + up_read(&dev_replace->rwsem); info->alloc_offset = WP_CONVENTIONAL; return 0; } @@ -1315,11 +1320,9 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, /* This zone will be used for allocation, so mark this zone non-empty. */ btrfs_dev_clear_zone_empty(device, info->physical); - down_read(&dev_replace->rwsem); dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace); if (dev_replace_is_ongoing && dev_replace->tgtdev != NULL) btrfs_dev_clear_zone_empty(dev_replace->tgtdev, info->physical); - up_read(&dev_replace->rwsem); /* * The group is mapped to a sequential zone. Get the zone write pointer @@ -1330,6 +1333,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, ret = btrfs_get_dev_zone(device, info->physical, &zone); memalloc_nofs_restore(nofs_flag); if (ret) { + up_read(&dev_replace->rwsem); if (ret != -EIO && ret != -EOPNOTSUPP) return ret; info->alloc_offset = WP_MISSING_DEV; @@ -1341,6 +1345,7 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, "zoned: unexpected conventional zone %llu on device %s (devid %llu)", zone.start << SECTOR_SHIFT, rcu_str_deref(device->name), device->devid); + up_read(&dev_replace->rwsem); return -EIO; } @@ -1368,6 +1373,8 @@ static int btrfs_load_zone_info(struct btrfs_fs_info *fs_info, int zone_idx, break; } + up_read(&dev_replace->rwsem); + return 0; } diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c index 6465e25742..06cdf1a8a1 100644 --- a/fs/cachefiles/daemon.c +++ b/fs/cachefiles/daemon.c @@ -133,7 +133,7 @@ static int cachefiles_daemon_open(struct inode *inode, struct file *file) return 0; } -static void cachefiles_flush_reqs(struct cachefiles_cache *cache) +void cachefiles_flush_reqs(struct cachefiles_cache *cache) { struct xarray *xa = &cache->reqs; struct cachefiles_req *req; @@ -159,6 +159,7 @@ static void cachefiles_flush_reqs(struct cachefiles_cache *cache) xa_for_each(xa, index, req) { req->error = -EIO; complete(&req->done); + __xa_erase(xa, index); } xa_unlock(xa); diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index d33169f001..6845a90cdf 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -55,6 +55,7 @@ struct cachefiles_ondemand_info { int ondemand_id; enum cachefiles_object_state state; struct cachefiles_object *object; + spinlock_t lock; }; /* @@ -138,6 +139,7 @@ static inline bool cachefiles_in_ondemand_mode(struct cachefiles_cache *cache) struct cachefiles_req { struct cachefiles_object *object; struct completion done; + refcount_t ref; int error; struct cachefiles_msg msg; }; @@ -186,6 +188,7 @@ extern int cachefiles_has_space(struct cachefiles_cache *cache, * daemon.c */ extern const struct file_operations cachefiles_daemon_fops; +extern void cachefiles_flush_reqs(struct cachefiles_cache *cache); extern void cachefiles_get_unbind_pincount(struct cachefiles_cache *cache); extern void cachefiles_put_unbind_pincount(struct cachefiles_cache *cache); @@ -424,6 +427,8 @@ do { \ pr_err("I/O Error: " FMT"\n", ##__VA_ARGS__); \ fscache_io_error((___cache)->cache); \ set_bit(CACHEFILES_DEAD, &(___cache)->flags); \ + if (cachefiles_in_ondemand_mode(___cache)) \ + cachefiles_flush_reqs(___cache); \ } while (0) #define cachefiles_io_error_obj(object, FMT, ...) \ diff --git a/fs/cachefiles/ondemand.c b/fs/cachefiles/ondemand.c index 4ba42f1fa3..89f118d68d 100644 --- a/fs/cachefiles/ondemand.c +++ b/fs/cachefiles/ondemand.c @@ -4,19 +4,40 @@ #include #include "internal.h" +struct ondemand_anon_file { + struct file *file; + int fd; +}; + +static inline void cachefiles_req_put(struct cachefiles_req *req) +{ + if (refcount_dec_and_test(&req->ref)) + kfree(req); +} + static int cachefiles_ondemand_fd_release(struct inode *inode, struct file *file) { struct cachefiles_object *object = file->private_data; - struct cachefiles_cache *cache = object->volume->cache; - struct cachefiles_ondemand_info *info = object->ondemand; - int object_id = info->ondemand_id; + struct cachefiles_cache *cache; + struct cachefiles_ondemand_info *info; + int object_id; struct cachefiles_req *req; - XA_STATE(xas, &cache->reqs, 0); + XA_STATE(xas, NULL, 0); + + if (!object) + return 0; + + info = object->ondemand; + cache = object->volume->cache; + xas.xa = &cache->reqs; xa_lock(&cache->reqs); + spin_lock(&info->lock); + object_id = info->ondemand_id; info->ondemand_id = CACHEFILES_ONDEMAND_ID_CLOSED; cachefiles_ondemand_set_object_close(object); + spin_unlock(&info->lock); /* Only flush CACHEFILES_REQ_NEW marked req to avoid race with daemon_read */ xas_for_each_marked(&xas, req, ULONG_MAX, CACHEFILES_REQ_NEW) { @@ -116,6 +137,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) { struct cachefiles_req *req; struct fscache_cookie *cookie; + struct cachefiles_ondemand_info *info; char *pid, *psize; unsigned long id; long size; @@ -166,6 +188,33 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) goto out; } + info = req->object->ondemand; + spin_lock(&info->lock); + /* + * The anonymous fd was closed before copen ? Fail the request. + * + * t1 | t2 + * --------------------------------------------------------- + * cachefiles_ondemand_copen + * req = xa_erase(&cache->reqs, id) + * // Anon fd is maliciously closed. + * cachefiles_ondemand_fd_release + * xa_lock(&cache->reqs) + * cachefiles_ondemand_set_object_close(object) + * xa_unlock(&cache->reqs) + * cachefiles_ondemand_set_object_open + * // No one will ever close it again. + * cachefiles_ondemand_daemon_read + * cachefiles_ondemand_select_req + * + * Get a read req but its fd is already closed. The daemon can't + * issue a cread ioctl with an closed fd, then hung. + */ + if (info->ondemand_id == CACHEFILES_ONDEMAND_ID_CLOSED) { + spin_unlock(&info->lock); + req->error = -EBADFD; + goto out; + } cookie = req->object->cookie; cookie->object_size = size; if (size) @@ -175,6 +224,7 @@ int cachefiles_ondemand_copen(struct cachefiles_cache *cache, char *args) trace_cachefiles_ondemand_copen(req->object, id, size); cachefiles_ondemand_set_object_open(req->object); + spin_unlock(&info->lock); wake_up_all(&cache->daemon_pollwq); out: @@ -205,14 +255,14 @@ int cachefiles_ondemand_restore(struct cachefiles_cache *cache, char *args) return 0; } -static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) +static int cachefiles_ondemand_get_fd(struct cachefiles_req *req, + struct ondemand_anon_file *anon_file) { struct cachefiles_object *object; struct cachefiles_cache *cache; struct cachefiles_open *load; - struct file *file; u32 object_id; - int ret, fd; + int ret; object = cachefiles_grab_object(req->object, cachefiles_obj_get_ondemand_fd); @@ -224,35 +274,53 @@ static int cachefiles_ondemand_get_fd(struct cachefiles_req *req) if (ret < 0) goto err; - fd = get_unused_fd_flags(O_WRONLY); - if (fd < 0) { - ret = fd; + anon_file->fd = get_unused_fd_flags(O_WRONLY); + if (anon_file->fd < 0) { + ret = anon_file->fd; goto err_free_id; } - file = anon_inode_getfile("[cachefiles]", &cachefiles_ondemand_fd_fops, - object, O_WRONLY); - if (IS_ERR(file)) { - ret = PTR_ERR(file); + anon_file->file = anon_inode_getfile("[cachefiles]", + &cachefiles_ondemand_fd_fops, object, O_WRONLY); + if (IS_ERR(anon_file->file)) { + ret = PTR_ERR(anon_file->file); goto err_put_fd; } - file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; - fd_install(fd, file); + spin_lock(&object->ondemand->lock); + if (object->ondemand->ondemand_id > 0) { + spin_unlock(&object->ondemand->lock); + /* Pair with check in cachefiles_ondemand_fd_release(). */ + anon_file->file->private_data = NULL; + ret = -EEXIST; + goto err_put_file; + } + + anon_file->file->f_mode |= FMODE_PWRITE | FMODE_LSEEK; load = (void *)req->msg.data; - load->fd = fd; + load->fd = anon_file->fd; object->ondemand->ondemand_id = object_id; + spin_unlock(&object->ondemand->lock); cachefiles_get_unbind_pincount(cache); trace_cachefiles_ondemand_open(object, &req->msg, load); return 0; +err_put_file: + fput(anon_file->file); + anon_file->file = NULL; err_put_fd: - put_unused_fd(fd); + put_unused_fd(anon_file->fd); + anon_file->fd = ret; err_free_id: xa_erase(&cache->ondemand_ids, object_id); err: + spin_lock(&object->ondemand->lock); + /* Avoid marking an opened object as closed. */ + if (object->ondemand->ondemand_id <= 0) + cachefiles_ondemand_set_object_close(object); + spin_unlock(&object->ondemand->lock); cachefiles_put_object(object, cachefiles_obj_put_ondemand_fd); return ret; } @@ -299,9 +367,9 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, { struct cachefiles_req *req; struct cachefiles_msg *msg; - unsigned long id = 0; size_t n; int ret = 0; + struct ondemand_anon_file anon_file; XA_STATE(xas, &cache->reqs, cache->req_id_next); xa_lock(&cache->reqs); @@ -330,42 +398,45 @@ ssize_t cachefiles_ondemand_daemon_read(struct cachefiles_cache *cache, xas_clear_mark(&xas, CACHEFILES_REQ_NEW); cache->req_id_next = xas.xa_index + 1; + refcount_inc(&req->ref); + cachefiles_grab_object(req->object, cachefiles_obj_get_read_req); xa_unlock(&cache->reqs); - id = xas.xa_index; - if (msg->opcode == CACHEFILES_OP_OPEN) { - ret = cachefiles_ondemand_get_fd(req); - if (ret) { - cachefiles_ondemand_set_object_close(req->object); - goto error; - } + ret = cachefiles_ondemand_get_fd(req, &anon_file); + if (ret) + goto out; } - msg->msg_id = id; + msg->msg_id = xas.xa_index; msg->object_id = req->object->ondemand->ondemand_id; - if (copy_to_user(_buffer, msg, n) != 0) { + if (copy_to_user(_buffer, msg, n) != 0) ret = -EFAULT; - goto err_put_fd; - } - /* CLOSE request has no reply */ - if (msg->opcode == CACHEFILES_OP_CLOSE) { - xa_erase(&cache->reqs, id); - complete(&req->done); + if (msg->opcode == CACHEFILES_OP_OPEN) { + if (ret < 0) { + fput(anon_file.file); + put_unused_fd(anon_file.fd); + goto out; + } + fd_install(anon_file.fd, anon_file.file); } - - return n; - -err_put_fd: - if (msg->opcode == CACHEFILES_OP_OPEN) - close_fd(((struct cachefiles_open *)msg->data)->fd); -error: - xa_erase(&cache->reqs, id); - req->error = ret; - complete(&req->done); - return ret; +out: + cachefiles_put_object(req->object, cachefiles_obj_put_read_req); + /* Remove error request and CLOSE request has no reply */ + if (ret || msg->opcode == CACHEFILES_OP_CLOSE) { + xas_reset(&xas); + xas_lock(&xas); + if (xas_load(&xas) == req) { + req->error = ret; + complete(&req->done); + xas_store(&xas, NULL); + } + xas_unlock(&xas); + } + cachefiles_req_put(req); + return ret ? ret : n; } typedef int (*init_req_fn)(struct cachefiles_req *req, void *private); @@ -395,6 +466,7 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, goto out; } + refcount_set(&req->ref, 1); req->object = object; init_completion(&req->done); req->msg.opcode = opcode; @@ -456,7 +528,7 @@ static int cachefiles_ondemand_send_req(struct cachefiles_object *object, wake_up_all(&cache->daemon_pollwq); wait_for_completion(&req->done); ret = req->error; - kfree(req); + cachefiles_req_put(req); return ret; out: /* Reset the object to close state in error handling path. @@ -578,6 +650,7 @@ int cachefiles_ondemand_init_obj_info(struct cachefiles_object *object, return -ENOMEM; object->ondemand->object = object; + spin_lock_init(&object->ondemand->lock); INIT_WORK(&object->ondemand->ondemand_work, ondemand_object_worker); return 0; } diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c index 1f2f70a1b8..decedc4ee1 100644 --- a/fs/dlm/ast.c +++ b/fs/dlm/ast.c @@ -12,6 +12,7 @@ #include #include "dlm_internal.h" +#include "lvb_table.h" #include "memory.h" #include "lock.h" #include "user.h" @@ -42,6 +43,7 @@ int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, struct dlm_ls *ls = lkb->lkb_resource->res_ls; int rv = DLM_ENQUEUE_CALLBACK_SUCCESS; struct dlm_callback *cb; + int copy_lvb = 0; int prev_mode; if (flags & DLM_CB_BAST) { @@ -73,6 +75,17 @@ int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, goto out; } } + } else if (flags & DLM_CB_CAST) { + if (test_bit(DLM_DFL_USER_BIT, &lkb->lkb_dflags)) { + if (lkb->lkb_last_cast) + prev_mode = lkb->lkb_last_cb->mode; + else + prev_mode = -1; + + if (!status && lkb->lkb_lksb->sb_lvbptr && + dlm_lvb_operations[prev_mode + 1][mode + 1]) + copy_lvb = 1; + } } cb = dlm_allocate_cb(); @@ -85,6 +98,7 @@ int dlm_enqueue_lkb_callback(struct dlm_lkb *lkb, uint32_t flags, int mode, cb->mode = mode; cb->sb_status = status; cb->sb_flags = (sbflags & 0x000000FF); + cb->copy_lvb = copy_lvb; kref_init(&cb->ref); if (!test_and_set_bit(DLM_IFL_CB_PENDING_BIT, &lkb->lkb_iflags)) rv = DLM_ENQUEUE_CALLBACK_NEED_SCHED; diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index 3b4dbce849..a9137c90f3 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -222,6 +222,7 @@ struct dlm_callback { int sb_status; /* copy to lksb status */ uint8_t sb_flags; /* copy to lksb flags */ int8_t mode; /* rq mode of bast, gr mode of cast */ + int copy_lvb; struct list_head list; struct kref ref; diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 9f9b684488..12a483deee 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -21,7 +21,6 @@ #include "dlm_internal.h" #include "lockspace.h" #include "lock.h" -#include "lvb_table.h" #include "user.h" #include "ast.h" #include "config.h" @@ -806,8 +805,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, struct dlm_lkb *lkb; DECLARE_WAITQUEUE(wait, current); struct dlm_callback *cb; - int rv, ret, copy_lvb = 0; - int old_mode, new_mode; + int rv, ret; if (count == sizeof(struct dlm_device_version)) { rv = copy_version_to_user(buf, count); @@ -864,9 +862,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, lkb = list_first_entry(&proc->asts, struct dlm_lkb, lkb_cb_list); - /* rem_lkb_callback sets a new lkb_last_cast */ - old_mode = lkb->lkb_last_cast->mode; - rv = dlm_dequeue_lkb_callback(lkb, &cb); switch (rv) { case DLM_DEQUEUE_CALLBACK_EMPTY: @@ -895,12 +890,6 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, if (cb->flags & DLM_CB_BAST) { trace_dlm_bast(lkb->lkb_resource->res_ls, lkb, cb->mode); } else if (cb->flags & DLM_CB_CAST) { - new_mode = cb->mode; - - if (!cb->sb_status && lkb->lkb_lksb->sb_lvbptr && - dlm_lvb_operations[old_mode + 1][new_mode + 1]) - copy_lvb = 1; - lkb->lkb_lksb->sb_status = cb->sb_status; lkb->lkb_lksb->sb_flags = cb->sb_flags; trace_dlm_ast(lkb->lkb_resource->res_ls, lkb); @@ -908,7 +897,7 @@ static ssize_t device_read(struct file *file, char __user *buf, size_t count, ret = copy_result_to_user(lkb->lkb_ua, test_bit(DLM_PROC_FLAGS_COMPAT, &proc->flags), - cb->flags, cb->mode, copy_lvb, buf, count); + cb->flags, cb->mode, cb->copy_lvb, buf, count); kref_put(&cb->ref, dlm_release_callback); diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c index 3fe41964c0..7f9f68c00e 100644 --- a/fs/ecryptfs/keystore.c +++ b/fs/ecryptfs/keystore.c @@ -300,9 +300,11 @@ write_tag_66_packet(char *signature, u8 cipher_code, * | Key Identifier Size | 1 or 2 bytes | * | Key Identifier | arbitrary | * | File Encryption Key Size | 1 or 2 bytes | + * | Cipher Code | 1 byte | * | File Encryption Key | arbitrary | + * | Checksum | 2 bytes | */ - data_len = (5 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); + data_len = (8 + ECRYPTFS_SIG_SIZE_HEX + crypt_stat->key_size); *packet = kmalloc(data_len, GFP_KERNEL); message = *packet; if (!message) { diff --git a/fs/erofs/decompressor_deflate.c b/fs/erofs/decompressor_deflate.c index 81e65c453e..3a3461561a 100644 --- a/fs/erofs/decompressor_deflate.c +++ b/fs/erofs/decompressor_deflate.c @@ -46,39 +46,15 @@ int __init z_erofs_deflate_init(void) /* by default, use # of possible CPUs instead */ if (!z_erofs_deflate_nstrms) z_erofs_deflate_nstrms = num_possible_cpus(); - - for (; z_erofs_deflate_avail_strms < z_erofs_deflate_nstrms; - ++z_erofs_deflate_avail_strms) { - struct z_erofs_deflate *strm; - - strm = kzalloc(sizeof(*strm), GFP_KERNEL); - if (!strm) - goto out_failed; - - /* XXX: in-kernel zlib cannot shrink windowbits currently */ - strm->z.workspace = vmalloc(zlib_inflate_workspacesize()); - if (!strm->z.workspace) { - kfree(strm); - goto out_failed; - } - - spin_lock(&z_erofs_deflate_lock); - strm->next = z_erofs_deflate_head; - z_erofs_deflate_head = strm; - spin_unlock(&z_erofs_deflate_lock); - } return 0; - -out_failed: - erofs_err(NULL, "failed to allocate zlib workspace"); - z_erofs_deflate_exit(); - return -ENOMEM; } int z_erofs_load_deflate_config(struct super_block *sb, struct erofs_super_block *dsb, void *data, int size) { struct z_erofs_deflate_cfgs *dfl = data; + static DEFINE_MUTEX(deflate_resize_mutex); + static bool inited; if (!dfl || size < sizeof(struct z_erofs_deflate_cfgs)) { erofs_err(sb, "invalid deflate cfgs, size=%u", size); @@ -89,9 +65,36 @@ int z_erofs_load_deflate_config(struct super_block *sb, erofs_err(sb, "unsupported windowbits %u", dfl->windowbits); return -EOPNOTSUPP; } + mutex_lock(&deflate_resize_mutex); + if (!inited) { + for (; z_erofs_deflate_avail_strms < z_erofs_deflate_nstrms; + ++z_erofs_deflate_avail_strms) { + struct z_erofs_deflate *strm; + + strm = kzalloc(sizeof(*strm), GFP_KERNEL); + if (!strm) + goto failed; + /* XXX: in-kernel zlib cannot customize windowbits */ + strm->z.workspace = vmalloc(zlib_inflate_workspacesize()); + if (!strm->z.workspace) { + kfree(strm); + goto failed; + } + spin_lock(&z_erofs_deflate_lock); + strm->next = z_erofs_deflate_head; + z_erofs_deflate_head = strm; + spin_unlock(&z_erofs_deflate_lock); + } + inited = true; + } + mutex_unlock(&deflate_resize_mutex); erofs_info(sb, "EXPERIMENTAL DEFLATE feature in use. Use at your own risk!"); return 0; +failed: + mutex_unlock(&deflate_resize_mutex); + z_erofs_deflate_exit(); + return -ENOMEM; } int z_erofs_deflate_decompress(struct z_erofs_decompress_req *rq, diff --git a/fs/exec.c b/fs/exec.c index cf1df7f16e..0c5f06d08c 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include @@ -267,6 +268,14 @@ static int __bprm_mm_init(struct linux_binprm *bprm) goto err_free; } + /* + * Need to be called with mmap write lock + * held, to avoid race with ksmd. + */ + err = ksm_execve(mm); + if (err) + goto err_ksm; + /* * Place the stack at the largest stack address the architecture * supports. Later, we'll move this to an appropriate place. We don't @@ -288,6 +297,8 @@ static int __bprm_mm_init(struct linux_binprm *bprm) bprm->p = vma->vm_end - sizeof(void *); return 0; err: + ksm_exit(mm); +err_ksm: mmap_write_unlock(mm); err_free: bprm->vma = NULL; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 537803250c..30e8248662 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -2334,7 +2334,7 @@ static int mpage_journal_page_buffers(handle_t *handle, if (folio_pos(folio) + len > size && !ext4_verity_in_progress(inode)) - len = size - folio_pos(folio); + len = size & (len - 1); return ext4_journal_folio_buffers(handle, folio, len); } @@ -2887,9 +2887,6 @@ retry: if (IS_ERR(folio)) return PTR_ERR(folio); - /* In case writeback began while the folio was unlocked */ - folio_wait_stable(folio); - #ifdef CONFIG_FS_ENCRYPTION ret = ext4_block_write_begin(folio, pos, len, ext4_da_get_block_prep); #else diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c index 12b3f19601..66b5a68b02 100644 --- a/fs/ext4/mballoc.c +++ b/fs/ext4/mballoc.c @@ -831,6 +831,8 @@ static int mb_avg_fragment_size_order(struct super_block *sb, ext4_grpblk_t len) return 0; if (order == MB_NUM_ORDERS(sb)) order--; + if (WARN_ON_ONCE(order > MB_NUM_ORDERS(sb))) + order = MB_NUM_ORDERS(sb) - 1; return order; } @@ -1008,6 +1010,8 @@ static void ext4_mb_choose_next_group_best_avail(struct ext4_allocation_context * goal length. */ order = fls(ac->ac_g_ex.fe_len) - 1; + if (WARN_ON_ONCE(order - 1 > MB_NUM_ORDERS(ac->ac_sb))) + order = MB_NUM_ORDERS(ac->ac_sb); min_order = order - sbi->s_mb_best_avail_max_trim_order; if (min_order < 0) min_order = 0; @@ -6113,6 +6117,7 @@ ext4_mb_new_blocks_simple(struct ext4_allocation_request *ar, int *errp) ext4_mb_mark_bb(sb, block, 1, true); ar->len = 1; + *errp = 0; return block; } diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h index 56938532b4..7bfc5fb5a1 100644 --- a/fs/ext4/mballoc.h +++ b/fs/ext4/mballoc.h @@ -193,8 +193,8 @@ struct ext4_allocation_context { ext4_grpblk_t ac_orig_goal_len; __u32 ac_flags; /* allocation hints */ + __u32 ac_groups_linear_remaining; __u16 ac_groups_scanned; - __u16 ac_groups_linear_remaining; __u16 ac_found; __u16 ac_cX_found[EXT4_MB_NUM_CRS]; __u16 ac_tail; diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c index 5e4f65c14d..a630b27a4c 100644 --- a/fs/ext4/namei.c +++ b/fs/ext4/namei.c @@ -2897,7 +2897,7 @@ retry: inode = ext4_new_inode_start_handle(idmap, dir, mode, NULL, 0, NULL, EXT4_HT_DIR, - EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) + + EXT4_MAXQUOTAS_TRANS_BLOCKS(dir->i_sb) + 4 + EXT4_XATTR_TRANS_BLOCKS); handle = ext4_journal_current_handle(); err = PTR_ERR(inode); diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 044135796f..4b368f4dbc 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -5551,19 +5551,15 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) if (err) goto failed_mount6; - err = ext4_register_sysfs(sb); - if (err) - goto failed_mount7; - err = ext4_init_orphan_info(sb); if (err) - goto failed_mount8; + goto failed_mount7; #ifdef CONFIG_QUOTA /* Enable quota usage during mount. */ if (ext4_has_feature_quota(sb) && !sb_rdonly(sb)) { err = ext4_enable_quotas(sb); if (err) - goto failed_mount9; + goto failed_mount8; } #endif /* CONFIG_QUOTA */ @@ -5589,7 +5585,7 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) ext4_msg(sb, KERN_INFO, "recovery complete"); err = ext4_mark_recovery_complete(sb, es); if (err) - goto failed_mount10; + goto failed_mount9; } if (test_opt(sb, DISCARD) && !bdev_max_discard_sectors(sb->s_bdev)) @@ -5606,15 +5602,17 @@ static int __ext4_fill_super(struct fs_context *fc, struct super_block *sb) atomic_set(&sbi->s_warning_count, 0); atomic_set(&sbi->s_msg_count, 0); + /* Register sysfs after all initializations are complete. */ + err = ext4_register_sysfs(sb); + if (err) + goto failed_mount9; + return 0; -failed_mount10: +failed_mount9: ext4_quotas_off(sb, EXT4_MAXQUOTAS); -failed_mount9: __maybe_unused +failed_mount8: __maybe_unused ext4_release_orphan_info(sb); -failed_mount8: - ext4_unregister_sysfs(sb); - kobject_put(&sbi->s_kobj); failed_mount7: ext4_unregister_li_request(sb); failed_mount6: diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 6d332dff79..63cbda3700 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -29,6 +29,7 @@ typedef enum { attr_trigger_test_error, attr_first_error_time, attr_last_error_time, + attr_clusters_in_group, attr_feature, attr_pointer_ui, attr_pointer_ul, @@ -104,7 +105,7 @@ static ssize_t reserved_clusters_store(struct ext4_sb_info *sbi, int ret; ret = kstrtoull(skip_spaces(buf), 0, &val); - if (ret || val >= clusters) + if (ret || val >= clusters || (s64)val < 0) return -EINVAL; atomic64_set(&sbi->s_resv_clusters, val); @@ -207,13 +208,14 @@ EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead, ext4_sb_info, s_inode_readahead_blks); +EXT4_ATTR_OFFSET(mb_group_prealloc, 0644, clusters_in_group, + ext4_sb_info, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); EXT4_RW_ATTR_SBI_UI(mb_stats, s_mb_stats); EXT4_RW_ATTR_SBI_UI(mb_max_to_scan, s_mb_max_to_scan); EXT4_RW_ATTR_SBI_UI(mb_min_to_scan, s_mb_min_to_scan); EXT4_RW_ATTR_SBI_UI(mb_order2_req, s_mb_order2_reqs); EXT4_RW_ATTR_SBI_UI(mb_stream_req, s_mb_stream_request); -EXT4_RW_ATTR_SBI_UI(mb_group_prealloc, s_mb_group_prealloc); EXT4_RW_ATTR_SBI_UI(mb_max_linear_groups, s_mb_max_linear_groups); EXT4_RW_ATTR_SBI_UI(extent_max_zeroout_kb, s_extent_max_zeroout_kb); EXT4_ATTR(trigger_fs_error, 0200, trigger_test_error); @@ -392,6 +394,7 @@ static ssize_t ext4_attr_show(struct kobject *kobj, (unsigned long long) percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit)); case attr_inode_readahead: + case attr_clusters_in_group: case attr_pointer_ui: if (!ptr) return 0; @@ -451,7 +454,8 @@ static ssize_t ext4_attr_store(struct kobject *kobj, s_kobj); struct ext4_attr *a = container_of(attr, struct ext4_attr, attr); void *ptr = calc_ptr(a, sbi); - unsigned long t; + unsigned int t; + unsigned long lt; int ret; switch (a->attr_id) { @@ -460,7 +464,7 @@ static ssize_t ext4_attr_store(struct kobject *kobj, case attr_pointer_ui: if (!ptr) return 0; - ret = kstrtoul(skip_spaces(buf), 0, &t); + ret = kstrtouint(skip_spaces(buf), 0, &t); if (ret) return ret; if (a->attr_ptr == ptr_ext4_super_block_offset) @@ -468,13 +472,21 @@ static ssize_t ext4_attr_store(struct kobject *kobj, else *((unsigned int *) ptr) = t; return len; + case attr_clusters_in_group: + ret = kstrtouint(skip_spaces(buf), 0, &t); + if (ret) + return ret; + if (t > sbi->s_clusters_per_group) + return -EINVAL; + *((unsigned int *) ptr) = t; + return len; case attr_pointer_ul: if (!ptr) return 0; - ret = kstrtoul(skip_spaces(buf), 0, &t); + ret = kstrtoul(skip_spaces(buf), 0, <); if (ret) return ret; - *((unsigned long *) ptr) = t; + *((unsigned long *) ptr) = lt; return len; case attr_inode_readahead: return inode_readahead_blks_store(sbi, buf, len); diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c index b67a176bfc..78f06f86c3 100644 --- a/fs/ext4/xattr.c +++ b/fs/ext4/xattr.c @@ -1619,6 +1619,7 @@ out_err: static int ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s, handle_t *handle, struct inode *inode, + struct inode *new_ea_inode, bool is_block) { struct ext4_xattr_entry *last, *next; @@ -1626,7 +1627,6 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, size_t min_offs = s->end - s->base, name_len = strlen(i->name); int in_inode = i->in_inode; struct inode *old_ea_inode = NULL; - struct inode *new_ea_inode = NULL; size_t old_size, new_size; int ret; @@ -1711,38 +1711,11 @@ static int ext4_xattr_set_entry(struct ext4_xattr_info *i, old_ea_inode = NULL; goto out; } - } - if (i->value && in_inode) { - WARN_ON_ONCE(!i->value_len); - - new_ea_inode = ext4_xattr_inode_lookup_create(handle, inode, - i->value, i->value_len); - if (IS_ERR(new_ea_inode)) { - ret = PTR_ERR(new_ea_inode); - new_ea_inode = NULL; - goto out; - } - } - if (old_ea_inode) { /* We are ready to release ref count on the old_ea_inode. */ ret = ext4_xattr_inode_dec_ref(handle, old_ea_inode); - if (ret) { - /* Release newly required ref count on new_ea_inode. */ - if (new_ea_inode) { - int err; - - err = ext4_xattr_inode_dec_ref(handle, - new_ea_inode); - if (err) - ext4_warning_inode(new_ea_inode, - "dec ref new_ea_inode err=%d", - err); - ext4_xattr_inode_free_quota(inode, new_ea_inode, - i->value_len); - } + if (ret) goto out; - } ext4_xattr_inode_free_quota(inode, old_ea_inode, le32_to_cpu(here->e_value_size)); @@ -1866,7 +1839,6 @@ update_hash: ret = 0; out: iput(old_ea_inode); - iput(new_ea_inode); return ret; } @@ -1929,9 +1901,21 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, size_t old_ea_inode_quota = 0; unsigned int ea_ino; - #define header(x) ((struct ext4_xattr_header *)(x)) + /* If we need EA inode, prepare it before locking the buffer */ + if (i->value && i->in_inode) { + WARN_ON_ONCE(!i->value_len); + + ea_inode = ext4_xattr_inode_lookup_create(handle, inode, + i->value, i->value_len); + if (IS_ERR(ea_inode)) { + error = PTR_ERR(ea_inode); + ea_inode = NULL; + goto cleanup; + } + } + if (s->base) { int offset = (char *)s->here - bs->bh->b_data; @@ -1940,6 +1924,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, EXT4_JTR_NONE); if (error) goto cleanup; + lock_buffer(bs->bh); if (header(s->base)->h_refcount == cpu_to_le32(1)) { @@ -1966,7 +1951,7 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode, } ea_bdebug(bs->bh, "modifying in-place"); error = ext4_xattr_set_entry(i, s, handle, inode, - true /* is_block */); + ea_inode, true /* is_block */); ext4_xattr_block_csum_set(inode, bs->bh); unlock_buffer(bs->bh); if (error == -EFSCORRUPTED) @@ -2034,29 +2019,13 @@ clone_block: s->end = s->base + sb->s_blocksize; } - error = ext4_xattr_set_entry(i, s, handle, inode, true /* is_block */); + error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode, + true /* is_block */); if (error == -EFSCORRUPTED) goto bad_block; if (error) goto cleanup; - if (i->value && s->here->e_value_inum) { - /* - * A ref count on ea_inode has been taken as part of the call to - * ext4_xattr_set_entry() above. We would like to drop this - * extra ref but we have to wait until the xattr block is - * initialized and has its own ref count on the ea_inode. - */ - ea_ino = le32_to_cpu(s->here->e_value_inum); - error = ext4_xattr_inode_iget(inode, ea_ino, - le32_to_cpu(s->here->e_hash), - &ea_inode); - if (error) { - ea_inode = NULL; - goto cleanup; - } - } - inserted: if (!IS_LAST_ENTRY(s->first)) { new_bh = ext4_xattr_block_cache_find(inode, header(s->base), @@ -2198,17 +2167,16 @@ getblk_failed: cleanup: if (ea_inode) { - int error2; - - error2 = ext4_xattr_inode_dec_ref(handle, ea_inode); - if (error2) - ext4_warning_inode(ea_inode, "dec ref error=%d", - error2); + if (error) { + int error2; - /* If there was an error, revert the quota charge. */ - if (error) + error2 = ext4_xattr_inode_dec_ref(handle, ea_inode); + if (error2) + ext4_warning_inode(ea_inode, "dec ref error=%d", + error2); ext4_xattr_inode_free_quota(inode, ea_inode, i_size_read(ea_inode)); + } iput(ea_inode); } if (ce) @@ -2266,14 +2234,38 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, { struct ext4_xattr_ibody_header *header; struct ext4_xattr_search *s = &is->s; + struct inode *ea_inode = NULL; int error; if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) return -ENOSPC; - error = ext4_xattr_set_entry(i, s, handle, inode, false /* is_block */); - if (error) + /* If we need EA inode, prepare it before locking the buffer */ + if (i->value && i->in_inode) { + WARN_ON_ONCE(!i->value_len); + + ea_inode = ext4_xattr_inode_lookup_create(handle, inode, + i->value, i->value_len); + if (IS_ERR(ea_inode)) + return PTR_ERR(ea_inode); + } + error = ext4_xattr_set_entry(i, s, handle, inode, ea_inode, + false /* is_block */); + if (error) { + if (ea_inode) { + int error2; + + error2 = ext4_xattr_inode_dec_ref(handle, ea_inode); + if (error2) + ext4_warning_inode(ea_inode, "dec ref error=%d", + error2); + + ext4_xattr_inode_free_quota(inode, ea_inode, + i_size_read(ea_inode)); + iput(ea_inode); + } return error; + } header = IHDR(inode, ext4_raw_inode(&is->iloc)); if (!IS_LAST_ENTRY(s->first)) { header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC); @@ -2282,6 +2274,7 @@ int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode, header->h_magic = cpu_to_le32(0); ext4_clear_inode_state(inode, EXT4_STATE_XATTR); } + iput(ea_inode); return 0; } @@ -3113,8 +3106,10 @@ ext4_xattr_block_cache_find(struct inode *inode, bh = ext4_sb_bread(inode->i_sb, ce->e_value, REQ_PRIO); if (IS_ERR(bh)) { - if (PTR_ERR(bh) == -ENOMEM) + if (PTR_ERR(bh) == -ENOMEM) { + mb_cache_entry_put(ea_block_cache, ce); return NULL; + } bh = NULL; EXT4_ERROR_INODE(inode, "block %lu read error", (unsigned long)ce->e_value); diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c index eac698b8dd..b013205026 100644 --- a/fs/f2fs/checkpoint.c +++ b/fs/f2fs/checkpoint.c @@ -179,22 +179,22 @@ static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, break; case META_SIT: if (unlikely(blkaddr >= SIT_BLK_CNT(sbi))) - goto err; + goto check_only; break; case META_SSA: if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) || blkaddr < SM_I(sbi)->ssa_blkaddr)) - goto err; + goto check_only; break; case META_CP: if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr || blkaddr < __start_cp_addr(sbi))) - goto err; + goto check_only; break; case META_POR: if (unlikely(blkaddr >= MAX_BLKADDR(sbi) || blkaddr < MAIN_BLKADDR(sbi))) - goto err; + goto check_only; break; case DATA_GENERIC: case DATA_GENERIC_ENHANCE: @@ -228,6 +228,7 @@ static bool __f2fs_is_valid_blkaddr(struct f2fs_sb_info *sbi, return true; err: f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); +check_only: return false; } diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index d9494b5fc7..5f77f9df24 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -3896,15 +3896,14 @@ static int check_swap_activate(struct swap_info_struct *sis, struct address_space *mapping = swap_file->f_mapping; struct inode *inode = mapping->host; struct f2fs_sb_info *sbi = F2FS_I_SB(inode); - sector_t cur_lblock; - sector_t last_lblock; - sector_t pblock; - sector_t lowest_pblock = -1; - sector_t highest_pblock = 0; + block_t cur_lblock; + block_t last_lblock; + block_t pblock; + block_t lowest_pblock = -1; + block_t highest_pblock = 0; int nr_extents = 0; - unsigned long nr_pblocks; + unsigned int nr_pblocks; unsigned int blks_per_sec = BLKS_PER_SEC(sbi); - unsigned int sec_blks_mask = BLKS_PER_SEC(sbi) - 1; unsigned int not_aligned = 0; int ret = 0; @@ -3942,8 +3941,8 @@ retry: pblock = map.m_pblk; nr_pblocks = map.m_len; - if ((pblock - SM_I(sbi)->main_blkaddr) & sec_blks_mask || - nr_pblocks & sec_blks_mask || + if ((pblock - SM_I(sbi)->main_blkaddr) % blks_per_sec || + nr_pblocks % blks_per_sec || !f2fs_valid_pinned_area(sbi, pblock)) { bool last_extent = false; @@ -4185,7 +4184,7 @@ static int f2fs_iomap_begin(struct inode *inode, loff_t offset, loff_t length, if (WARN_ON_ONCE(map.m_pblk == COMPRESS_ADDR)) return -EINVAL; - if (map.m_pblk != NULL_ADDR) { + if (map.m_flags & F2FS_MAP_MAPPED) { iomap->length = blks_to_bytes(inode, map.m_len); iomap->type = IOMAP_MAPPED; iomap->flags |= IOMAP_F_MERGED; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index fced2b7652..07b3675ea1 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2309,7 +2309,7 @@ static inline void f2fs_i_blocks_write(struct inode *, block_t, bool, bool); static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, struct inode *inode, blkcnt_t *count, bool partial) { - blkcnt_t diff = 0, release = 0; + long long diff = 0, release = 0; block_t avail_user_block_count; int ret; @@ -2329,26 +2329,27 @@ static inline int inc_valid_block_count(struct f2fs_sb_info *sbi, percpu_counter_add(&sbi->alloc_valid_block_count, (*count)); spin_lock(&sbi->stat_lock); - sbi->total_valid_block_count += (block_t)(*count); - avail_user_block_count = get_available_block_count(sbi, inode, true); - if (unlikely(sbi->total_valid_block_count > avail_user_block_count)) { + avail_user_block_count = get_available_block_count(sbi, inode, true); + diff = (long long)sbi->total_valid_block_count + *count - + avail_user_block_count; + if (unlikely(diff > 0)) { if (!partial) { spin_unlock(&sbi->stat_lock); + release = *count; goto enospc; } - - diff = sbi->total_valid_block_count - avail_user_block_count; if (diff > *count) diff = *count; *count -= diff; release = diff; - sbi->total_valid_block_count -= diff; if (!*count) { spin_unlock(&sbi->stat_lock); goto enospc; } } + sbi->total_valid_block_count += (block_t)(*count); + spin_unlock(&sbi->stat_lock); if (unlikely(release)) { diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 1761ad125f..208dedc161 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -952,9 +952,14 @@ int f2fs_setattr(struct mnt_idmap *idmap, struct dentry *dentry, ATTR_GID | ATTR_TIMES_SET)))) return -EPERM; - if ((attr->ia_valid & ATTR_SIZE) && - !f2fs_is_compress_backend_ready(inode)) - return -EOPNOTSUPP; + if ((attr->ia_valid & ATTR_SIZE)) { + if (!f2fs_is_compress_backend_ready(inode)) + return -EOPNOTSUPP; + if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED) && + !IS_ALIGNED(attr->ia_size, + F2FS_BLK_TO_BYTES(F2FS_I(inode)->i_cluster_size))) + return -EINVAL; + } err = setattr_prepare(idmap, dentry, attr); if (err) @@ -1325,6 +1330,9 @@ static int __clone_blkaddrs(struct inode *src_inode, struct inode *dst_inode, f2fs_put_page(psrc, 1); return PTR_ERR(pdst); } + + f2fs_wait_on_page_writeback(pdst, DATA, true, true); + memcpy_page(pdst, 0, psrc, 0, PAGE_SIZE); set_page_dirty(pdst); set_page_private_gcing(pdst); @@ -1817,15 +1825,6 @@ static long f2fs_fallocate(struct file *file, int mode, (mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE))) return -EOPNOTSUPP; - /* - * Pinned file should not support partial truncation since the block - * can be used by applications. - */ - if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && - (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | - FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) - return -EOPNOTSUPP; - if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE)) @@ -1833,6 +1832,17 @@ static long f2fs_fallocate(struct file *file, int mode, inode_lock(inode); + /* + * Pinned file should not support partial truncation since the block + * can be used by applications. + */ + if ((f2fs_compressed_file(inode) || f2fs_is_pinned_file(inode)) && + (mode & (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_COLLAPSE_RANGE | + FALLOC_FL_ZERO_RANGE | FALLOC_FL_INSERT_RANGE))) { + ret = -EOPNOTSUPP; + goto out; + } + ret = file_modified(file); if (ret) goto out; @@ -2837,7 +2847,8 @@ static int f2fs_move_file_range(struct file *file_in, loff_t pos_in, goto out; } - if (f2fs_compressed_file(src) || f2fs_compressed_file(dst)) { + if (f2fs_compressed_file(src) || f2fs_compressed_file(dst) || + f2fs_is_pinned_file(src) || f2fs_is_pinned_file(dst)) { ret = -EOPNOTSUPP; goto out_unlock; } @@ -3522,9 +3533,6 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3543,7 +3551,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -3570,9 +3579,12 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3590,6 +3602,8 @@ static int f2fs_release_compress_blocks(struct file *filp, unsigned long arg) f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -3641,7 +3655,8 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, while (count) { int compr_blocks = 0; - blkcnt_t reserved; + blkcnt_t reserved = 0; + blkcnt_t to_reserved; int ret; for (i = 0; i < cluster_size; i++) { @@ -3661,20 +3676,26 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, * fails in release_compress_blocks(), so NEW_ADDR * is a possible case. */ - if (blkaddr == NEW_ADDR || - __is_valid_data_blkaddr(blkaddr)) { + if (blkaddr == NEW_ADDR) { + reserved++; + continue; + } + if (__is_valid_data_blkaddr(blkaddr)) { compr_blocks++; continue; } } - reserved = cluster_size - compr_blocks; + to_reserved = cluster_size - compr_blocks - reserved; /* for the case all blocks in cluster were reserved */ - if (reserved == 1) + if (to_reserved == 1) { + dn->ofs_in_node += cluster_size; goto next; + } - ret = inc_valid_block_count(sbi, dn->inode, &reserved, false); + ret = inc_valid_block_count(sbi, dn->inode, + &to_reserved, false); if (unlikely(ret)) return ret; @@ -3685,7 +3706,7 @@ static int reserve_compress_blocks(struct dnode_of_data *dn, pgoff_t count, f2fs_i_compr_blocks_update(dn->inode, compr_blocks, true); - *reserved_blocks += reserved; + *reserved_blocks += to_reserved; next: count -= cluster_size; } @@ -3704,9 +3725,6 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) if (!f2fs_sb_has_compression(sbi)) return -EOPNOTSUPP; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - if (f2fs_readonly(sbi->sb)) return -EROFS; @@ -3718,7 +3736,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) inode_lock(inode); - if (!is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + !is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto unlock_inode; } @@ -3735,9 +3754,12 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) struct dnode_of_data dn; pgoff_t end_offset, count; + f2fs_lock_op(sbi); + set_new_dnode(&dn, inode, NULL, NULL, 0); ret = f2fs_get_dnode_of_data(&dn, page_idx, LOOKUP_NODE); if (ret) { + f2fs_unlock_op(sbi); if (ret == -ENOENT) { page_idx = f2fs_get_next_page_offset(&dn, page_idx); @@ -3755,6 +3777,8 @@ static int f2fs_reserve_compress_blocks(struct file *filp, unsigned long arg) f2fs_put_dnode(&dn); + f2fs_unlock_op(sbi); + if (ret < 0) break; @@ -4119,9 +4143,6 @@ static int f2fs_ioc_decompress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); file_start_write(filp); @@ -4132,7 +4153,8 @@ static int f2fs_ioc_decompress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } @@ -4197,9 +4219,6 @@ static int f2fs_ioc_compress_file(struct file *filp) if (!(filp->f_mode & FMODE_WRITE)) return -EBADF; - if (!f2fs_compressed_file(inode)) - return -EINVAL; - f2fs_balance_fs(sbi, true); file_start_write(filp); @@ -4210,7 +4229,8 @@ static int f2fs_ioc_compress_file(struct file *filp) goto out; } - if (is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { + if (!f2fs_compressed_file(inode) || + is_inode_flag_set(inode, FI_COMPRESS_RELEASED)) { ret = -EINVAL; goto out; } diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c index 8852814dab..e86c7f0153 100644 --- a/fs/f2fs/gc.c +++ b/fs/f2fs/gc.c @@ -1554,10 +1554,15 @@ next_step: int err; inode = f2fs_iget(sb, dni.ino); - if (IS_ERR(inode) || is_bad_inode(inode) || - special_file(inode->i_mode)) + if (IS_ERR(inode)) continue; + if (is_bad_inode(inode) || + special_file(inode->i_mode)) { + iput(inode); + continue; + } + err = f2fs_gc_pinned_control(inode, gc_type, segno); if (err == -EAGAIN) { iput(inode); diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c index c26effdce9..2af50bc34f 100644 --- a/fs/f2fs/inode.c +++ b/fs/f2fs/inode.c @@ -361,6 +361,12 @@ static bool sanity_check_inode(struct inode *inode, struct page *node_page) return false; } + if (fi->i_xattr_nid && f2fs_check_nid_range(sbi, fi->i_xattr_nid)) { + f2fs_warn(sbi, "%s: inode (ino=%lx) has corrupted i_xattr_nid: %u, run fsck to fix.", + __func__, inode->i_ino, fi->i_xattr_nid); + return false; + } + return true; } diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c index b3de6d6cdb..15c9a9f575 100644 --- a/fs/f2fs/node.c +++ b/fs/f2fs/node.c @@ -1187,7 +1187,17 @@ skip_partial: default: BUG(); } - if (err < 0 && err != -ENOENT) + if (err == -ENOENT) { + set_sbi_flag(F2FS_P_SB(page), SBI_NEED_FSCK); + f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); + f2fs_err_ratelimited(sbi, + "truncate node fail, ino:%lu, nid:%u, " + "offset[0]:%d, offset[1]:%d, nofs:%d", + inode->i_ino, dn.nid, offset[0], + offset[1], nofs); + err = 0; + } + if (err < 0) goto fail; if (offset[1] == 0 && ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) { @@ -1319,6 +1329,7 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) } if (unlikely(new_ni.blk_addr != NULL_ADDR)) { err = -EFSCORRUPTED; + dec_valid_node_count(sbi, dn->inode, !ofs); set_sbi_flag(sbi, SBI_NEED_FSCK); f2fs_handle_error(sbi, ERROR_INVALID_BLKADDR); goto fail; @@ -1345,7 +1356,6 @@ struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs) if (ofs == 0) inc_valid_inode_count(sbi); return page; - fail: clear_node_page_dirty(page); f2fs_put_page(page, 1); diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c index 4fd76e867e..6474b7338e 100644 --- a/fs/f2fs/segment.c +++ b/fs/f2fs/segment.c @@ -3559,6 +3559,8 @@ int f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page, if (segment_full) { if (type == CURSEG_COLD_DATA_PINNED && !((curseg->segno + 1) % sbi->segs_per_sec)) { + write_sum_page(sbi, curseg->sum_blk, + GET_SUM_BLOCK(sbi, curseg->segno)); reset_curseg_fields(curseg); goto skip_new_segment; } diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c index a4bc26dfdb..2f75a7dfc3 100644 --- a/fs/f2fs/super.c +++ b/fs/f2fs/super.c @@ -2132,8 +2132,6 @@ static void default_options(struct f2fs_sb_info *sbi, bool remount) F2FS_OPTION(sbi).memory_mode = MEMORY_MODE_NORMAL; F2FS_OPTION(sbi).errors = MOUNT_ERRORS_CONTINUE; - sbi->sb->s_flags &= ~SB_INLINECRYPT; - set_opt(sbi, INLINE_XATTR); set_opt(sbi, INLINE_DATA); set_opt(sbi, INLINE_DENTRY); @@ -4131,9 +4129,15 @@ void f2fs_handle_critical_error(struct f2fs_sb_info *sbi, unsigned char reason, if (shutdown) set_sbi_flag(sbi, SBI_IS_SHUTDOWN); - /* continue filesystem operators if errors=continue */ - if (continue_fs || f2fs_readonly(sb)) + /* + * Continue filesystem operators if errors=continue. Should not set + * RO by shutdown, since RO bypasses thaw_super which can hang the + * system. + */ + if (continue_fs || f2fs_readonly(sb) || shutdown) { + f2fs_warn(sbi, "Stopped filesystem due to reason: %d", reason); return; + } f2fs_warn(sbi, "Remounting filesystem read-only"); /* diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index e4f17c53dd..d31853032a 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -2069,6 +2069,7 @@ static long wb_writeback(struct bdi_writeback *wb, struct inode *inode; long progress; struct blk_plug plug; + bool queued = false; blk_start_plug(&plug); for (;;) { @@ -2111,8 +2112,10 @@ static long wb_writeback(struct bdi_writeback *wb, dirtied_before = jiffies; trace_writeback_start(wb, work); - if (list_empty(&wb->b_io)) + if (list_empty(&wb->b_io)) { queue_io(wb, work, dirtied_before); + queued = true; + } if (work->sb) progress = writeback_sb_inodes(work->sb, wb, work); else @@ -2127,7 +2130,7 @@ static long wb_writeback(struct bdi_writeback *wb, * mean the overall work is done. So we keep looping as long * as made some progress on cleaning pages or inodes. */ - if (progress) { + if (progress || !queued) { spin_unlock(&wb->list_lock); continue; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 3ec8bb5e68..9eb191b5c4 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -1813,7 +1813,8 @@ static void fuse_resend(struct fuse_conn *fc) spin_unlock(&fc->lock); list_for_each_entry_safe(req, next, &to_queue, list) { - __set_bit(FR_PENDING, &req->flags); + set_bit(FR_PENDING, &req->flags); + clear_bit(FR_SENT, &req->flags); /* mark the request as resend request */ req->in.h.unique |= FUSE_UNIQUE_RESEND; } diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 34540f9d01..2507fe34cb 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -166,19 +166,45 @@ static bool glock_blocked_by_withdraw(struct gfs2_glock *gl) return true; } -void gfs2_glock_free(struct gfs2_glock *gl) +static void __gfs2_glock_free(struct gfs2_glock *gl) { - struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; - - gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0); rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms); smp_mb(); wake_up_glock(gl); call_rcu(&gl->gl_rcu, gfs2_glock_dealloc); +} + +void gfs2_glock_free(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + __gfs2_glock_free(gl); + if (atomic_dec_and_test(&sdp->sd_glock_disposal)) + wake_up(&sdp->sd_kill_wait); +} + +void gfs2_glock_free_later(struct gfs2_glock *gl) { + struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + + spin_lock(&lru_lock); + list_add(&gl->gl_lru, &sdp->sd_dead_glocks); + spin_unlock(&lru_lock); if (atomic_dec_and_test(&sdp->sd_glock_disposal)) wake_up(&sdp->sd_kill_wait); } +static void gfs2_free_dead_glocks(struct gfs2_sbd *sdp) +{ + struct list_head *list = &sdp->sd_dead_glocks; + + while(!list_empty(list)) { + struct gfs2_glock *gl; + + gl = list_first_entry(list, struct gfs2_glock, gl_lru); + list_del_init(&gl->gl_lru); + __gfs2_glock_free(gl); + } +} + /** * gfs2_glock_hold() - increment reference count on glock * @gl: The glock to hold @@ -591,7 +617,6 @@ static void finish_xmote(struct gfs2_glock *gl, unsigned int ret) struct gfs2_holder *gh; unsigned state = ret & LM_OUT_ST_MASK; - spin_lock(&gl->gl_lockref.lock); trace_gfs2_glock_state_change(gl, state); state_change(gl, state); gh = find_first_waiter(gl); @@ -639,7 +664,6 @@ retry: gl->gl_target, state); GLOCK_BUG_ON(gl, 1); } - spin_unlock(&gl->gl_lockref.lock); return; } @@ -662,7 +686,6 @@ retry: } out: clear_bit(GLF_LOCK, &gl->gl_flags); - spin_unlock(&gl->gl_lockref.lock); } static bool is_system_glock(struct gfs2_glock *gl) @@ -690,6 +713,7 @@ __acquires(&gl->gl_lockref.lock) { const struct gfs2_glock_operations *glops = gl->gl_ops; struct gfs2_sbd *sdp = gl->gl_name.ln_sbd; + struct lm_lockstruct *ls = &sdp->sd_lockstruct; unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0); int ret; @@ -718,6 +742,9 @@ __acquires(&gl->gl_lockref.lock) (gl->gl_state == LM_ST_EXCLUSIVE) || (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB))) clear_bit(GLF_BLOCKING, &gl->gl_flags); + if (!glops->go_inval && !glops->go_sync) + goto skip_inval; + spin_unlock(&gl->gl_lockref.lock); if (glops->go_sync) { ret = glops->go_sync(gl); @@ -730,6 +757,7 @@ __acquires(&gl->gl_lockref.lock) fs_err(sdp, "Error %d syncing glock \n", ret); gfs2_dump_glock(NULL, gl, true); } + spin_lock(&gl->gl_lockref.lock); goto skip_inval; } } @@ -750,9 +778,10 @@ __acquires(&gl->gl_lockref.lock) glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA); clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } + spin_lock(&gl->gl_lockref.lock); skip_inval: - gfs2_glock_hold(gl); + gl->gl_lockref.count++; /* * Check for an error encountered since we called go_sync and go_inval. * If so, we can't withdraw from the glock code because the withdraw @@ -794,31 +823,37 @@ skip_inval: */ clear_bit(GLF_LOCK, &gl->gl_flags); clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags); - gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); - goto out; + __gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD); + return; } else { clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags); } } - if (sdp->sd_lockstruct.ls_ops->lm_lock) { - /* lock_dlm */ - ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags); + if (ls->ls_ops->lm_lock) { + spin_unlock(&gl->gl_lockref.lock); + ret = ls->ls_ops->lm_lock(gl, target, lck_flags); + spin_lock(&gl->gl_lockref.lock); + if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED && target == LM_ST_UNLOCKED && - test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) { - finish_xmote(gl, target); - gfs2_glock_queue_work(gl, 0); + test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { + /* + * The lockspace has been released and the lock has + * been unlocked implicitly. + */ } else if (ret) { fs_err(sdp, "lm_lock ret %d\n", ret); - GLOCK_BUG_ON(gl, !gfs2_withdrawing_or_withdrawn(sdp)); + target = gl->gl_state | LM_OUT_ERROR; + } else { + /* The operation will be completed asynchronously. */ + return; } - } else { /* lock_nolock */ - finish_xmote(gl, target); - gfs2_glock_queue_work(gl, 0); } -out: - spin_lock(&gl->gl_lockref.lock); + + /* Complete the operation now. */ + finish_xmote(gl, target); + __gfs2_glock_queue_work(gl, 0); } /** @@ -1071,11 +1106,12 @@ static void glock_work_func(struct work_struct *work) struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work); unsigned int drop_refs = 1; - if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { + spin_lock(&gl->gl_lockref.lock); + if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags)) { + clear_bit(GLF_REPLY_PENDING, &gl->gl_flags); finish_xmote(gl, gl->gl_reply); drop_refs++; } - spin_lock(&gl->gl_lockref.lock); if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) && gl->gl_state != LM_ST_UNLOCKED && gl->gl_demote_state != LM_ST_EXCLUSIVE) { @@ -2148,8 +2184,11 @@ static void thaw_glock(struct gfs2_glock *gl) return; if (!lockref_get_not_dead(&gl->gl_lockref)) return; + + spin_lock(&gl->gl_lockref.lock); set_bit(GLF_REPLY_PENDING, &gl->gl_flags); - gfs2_glock_queue_work(gl, 0); + __gfs2_glock_queue_work(gl, 0); + spin_unlock(&gl->gl_lockref.lock); } /** @@ -2225,6 +2264,8 @@ void gfs2_gl_hash_clear(struct gfs2_sbd *sdp) wait_event_timeout(sdp->sd_kill_wait, atomic_read(&sdp->sd_glock_disposal) == 0, HZ * 600); + gfs2_lm_unmount(sdp); + gfs2_free_dead_glocks(sdp); glock_hash_walk(dump_glock_func, sdp); } diff --git a/fs/gfs2/glock.h b/fs/gfs2/glock.h index 0114f3e0eb..86987a59a0 100644 --- a/fs/gfs2/glock.h +++ b/fs/gfs2/glock.h @@ -252,6 +252,7 @@ void gfs2_gl_dq_holders(struct gfs2_sbd *sdp); void gfs2_glock_thaw(struct gfs2_sbd *sdp); void gfs2_glock_add_to_lru(struct gfs2_glock *gl); void gfs2_glock_free(struct gfs2_glock *gl); +void gfs2_glock_free_later(struct gfs2_glock *gl); int __init gfs2_glock_init(void); void gfs2_glock_exit(void); diff --git a/fs/gfs2/glops.c b/fs/gfs2/glops.c index 45653cbc8a..e0e8dfeee7 100644 --- a/fs/gfs2/glops.c +++ b/fs/gfs2/glops.c @@ -82,6 +82,9 @@ static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync, GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count)); spin_unlock(&sdp->sd_ail_lock); gfs2_log_unlock(sdp); + + if (gfs2_withdrawing(sdp)) + gfs2_withdraw(sdp); } diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index 95a334d64d..60abd7050c 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -838,6 +838,7 @@ struct gfs2_sbd { /* For quiescing the filesystem */ struct gfs2_holder sd_freeze_gh; struct mutex sd_freeze_mutex; + struct list_head sd_dead_glocks; char sd_fsname[GFS2_FSNAME_LEN + 3 * sizeof(int) + 2]; char sd_table_name[GFS2_FSNAME_LEN]; diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index d1ac5d0679..e028e55e67 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -121,6 +121,11 @@ static void gdlm_ast(void *arg) struct gfs2_glock *gl = arg; unsigned ret = gl->gl_state; + /* If the glock is dead, we only react to a dlm_unlock() reply. */ + if (__lockref_is_dead(&gl->gl_lockref) && + gl->gl_lksb.sb_status != -DLM_EUNLOCK) + return; + gfs2_update_reply_times(gl); BUG_ON(gl->gl_lksb.sb_flags & DLM_SBF_DEMOTED); @@ -171,6 +176,9 @@ static void gdlm_bast(void *arg, int mode) { struct gfs2_glock *gl = arg; + if (__lockref_is_dead(&gl->gl_lockref)) + return; + switch (mode) { case DLM_LOCK_EX: gfs2_glock_cb(gl, LM_ST_UNLOCKED); @@ -291,8 +299,12 @@ static void gdlm_put_lock(struct gfs2_glock *gl) struct lm_lockstruct *ls = &sdp->sd_lockstruct; int error; - if (gl->gl_lksb.sb_lkid == 0) - goto out_free; + BUG_ON(!__lockref_is_dead(&gl->gl_lockref)); + + if (gl->gl_lksb.sb_lkid == 0) { + gfs2_glock_free(gl); + return; + } clear_bit(GLF_BLOCKING, &gl->gl_flags); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); @@ -300,13 +312,17 @@ static void gdlm_put_lock(struct gfs2_glock *gl) gfs2_update_request_times(gl); /* don't want to call dlm if we've unmounted the lock protocol */ - if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) - goto out_free; + if (test_bit(DFL_UNMOUNT, &ls->ls_recover_flags)) { + gfs2_glock_free(gl); + return; + } /* don't want to skip dlm_unlock writing the lvb when lock has one */ if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) && - !gl->gl_lksb.sb_lvbptr) - goto out_free; + !gl->gl_lksb.sb_lvbptr) { + gfs2_glock_free_later(gl); + return; + } again: error = dlm_unlock(ls->ls_dlm, gl->gl_lksb.sb_lkid, DLM_LKF_VALBLK, @@ -321,10 +337,6 @@ again: gl->gl_name.ln_type, (unsigned long long)gl->gl_name.ln_number, error); } - return; - -out_free: - gfs2_glock_free(gl); } static void gdlm_cancel(struct gfs2_glock *gl) diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c index 572d58e862..cde7118599 100644 --- a/fs/gfs2/ops_fstype.c +++ b/fs/gfs2/ops_fstype.c @@ -136,6 +136,7 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb) atomic_set(&sdp->sd_log_in_flight, 0); init_waitqueue_head(&sdp->sd_log_flush_wait); mutex_init(&sdp->sd_freeze_mutex); + INIT_LIST_HEAD(&sdp->sd_dead_glocks); return sdp; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e5f7946634..2d780b4701 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -646,10 +646,7 @@ restart: gfs2_gl_hash_clear(sdp); truncate_inode_pages_final(&sdp->sd_aspace); gfs2_delete_debugfs_file(sdp); - /* Unmount the locking protocol */ - gfs2_lm_unmount(sdp); - /* At this point, we're through participating in the lockspace */ gfs2_sys_fs_del(sdp); free_sbd(sdp); } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index f52141ce94..fc3ecb180a 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -350,7 +350,6 @@ int gfs2_withdraw(struct gfs2_sbd *sdp) fs_err(sdp, "telling LM to unmount\n"); lm->lm_unmount(sdp); } - set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags); fs_err(sdp, "File system withdrawn\n"); dump_stack(); clear_bit(SDF_WITHDRAW_IN_PROG, &sdp->sd_flags); diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 4e8e41c8b3..4ac6c8c403 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -909,11 +909,11 @@ static size_t iomap_write_end(struct iomap_iter *iter, loff_t pos, size_t len, static loff_t iomap_write_iter(struct iomap_iter *iter, struct iov_iter *i) { loff_t length = iomap_length(iter); - size_t chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; loff_t pos = iter->pos; ssize_t written = 0; long status = 0; struct address_space *mapping = iter->inode->i_mapping; + size_t chunk = mapping_max_folio_size(mapping); unsigned int bdp_flags = (iter->flags & IOMAP_NOWAIT) ? BDP_ASYNC : 0; do { diff --git a/fs/jffs2/xattr.c b/fs/jffs2/xattr.c index 00224f3a8d..defb4162c3 100644 --- a/fs/jffs2/xattr.c +++ b/fs/jffs2/xattr.c @@ -1110,6 +1110,9 @@ int do_jffs2_setxattr(struct inode *inode, int xprefix, const char *xname, return rc; request = PAD(sizeof(struct jffs2_raw_xattr) + strlen(xname) + 1 + size); + if (request > c->sector_size - c->cleanmarker_size) + return -ERANGE; + rc = jffs2_reserve_space(c, request, &length, ALLOC_NORMAL, JFFS2_SUMMARY_XATTR_SIZE); if (rc) { diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c index 0fb7afac29..9987055293 100644 --- a/fs/jfs/xattr.c +++ b/fs/jfs/xattr.c @@ -557,9 +557,11 @@ static int ea_get(struct inode *inode, struct ea_buffer *ea_buf, int min_size) size_check: if (EALIST_SIZE(ea_buf->xattr) != ea_size) { + int size = min_t(int, EALIST_SIZE(ea_buf->xattr), ea_size); + printk(KERN_ERR "ea_get: invalid extended attribute\n"); print_hex_dump(KERN_ERR, "", DUMP_PREFIX_ADDRESS, 16, 1, - ea_buf->xattr, ea_size, 1); + ea_buf->xattr, size, 1); ea_release(inode, ea_buf); rc = -EIO; goto clean_up; diff --git a/fs/libfs.c b/fs/libfs.c index 3a6f2cb364..b635ee5adb 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -295,6 +295,18 @@ int simple_offset_add(struct offset_ctx *octx, struct dentry *dentry) return 0; } +static int simple_offset_replace(struct offset_ctx *octx, struct dentry *dentry, + long offset) +{ + int ret; + + ret = mtree_store(&octx->mt, offset, dentry, GFP_KERNEL); + if (ret) + return ret; + offset_set(dentry, offset); + return 0; +} + /** * simple_offset_remove - Remove an entry to a directory's offset map * @octx: directory offset ctx to be updated @@ -345,6 +357,36 @@ int simple_offset_empty(struct dentry *dentry) return ret; } +/** + * simple_offset_rename - handle directory offsets for rename + * @old_dir: parent directory of source entry + * @old_dentry: dentry of source entry + * @new_dir: parent_directory of destination entry + * @new_dentry: dentry of destination + * + * Caller provides appropriate serialization. + * + * User space expects the directory offset value of the replaced + * (new) directory entry to be unchanged after a rename. + * + * Returns zero on success, a negative errno value on failure. + */ +int simple_offset_rename(struct inode *old_dir, struct dentry *old_dentry, + struct inode *new_dir, struct dentry *new_dentry) +{ + struct offset_ctx *old_ctx = old_dir->i_op->get_offset_ctx(old_dir); + struct offset_ctx *new_ctx = new_dir->i_op->get_offset_ctx(new_dir); + long new_offset = dentry2offset(new_dentry); + + simple_offset_remove(old_ctx, old_dentry); + + if (new_offset) { + offset_set(new_dentry, 0); + return simple_offset_replace(new_ctx, old_dentry, new_offset); + } + return simple_offset_add(new_ctx, old_dentry); +} + /** * simple_offset_rename_exchange - exchange rename with directory offsets * @old_dir: parent of dentry being moved @@ -352,6 +394,9 @@ int simple_offset_empty(struct dentry *dentry) * @new_dir: destination parent * @new_dentry: destination dentry * + * This API preserves the directory offset values. Caller provides + * appropriate serialization. + * * Returns zero on success. Otherwise a negative errno is returned and the * rename is rolled back. */ @@ -369,11 +414,11 @@ int simple_offset_rename_exchange(struct inode *old_dir, simple_offset_remove(old_ctx, old_dentry); simple_offset_remove(new_ctx, new_dentry); - ret = simple_offset_add(new_ctx, old_dentry); + ret = simple_offset_replace(new_ctx, old_dentry, new_index); if (ret) goto out_restore; - ret = simple_offset_add(old_ctx, new_dentry); + ret = simple_offset_replace(old_ctx, new_dentry, old_index); if (ret) { simple_offset_remove(new_ctx, old_dentry); goto out_restore; @@ -388,10 +433,8 @@ int simple_offset_rename_exchange(struct inode *old_dir, return 0; out_restore: - offset_set(old_dentry, old_index); - mtree_store(&old_ctx->mt, old_index, old_dentry, GFP_KERNEL); - offset_set(new_dentry, new_index); - mtree_store(&new_ctx->mt, new_index, new_dentry, GFP_KERNEL); + (void)simple_offset_replace(old_ctx, old_dentry, old_index); + (void)simple_offset_replace(new_ctx, new_dentry, new_index); return ret; } diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c index 267b622d92..912ad0a1df 100644 --- a/fs/netfs/buffered_write.c +++ b/fs/netfs/buffered_write.c @@ -163,7 +163,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter, struct folio *folio; enum netfs_how_to_modify howto; enum netfs_folio_trace trace; - unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC; + unsigned int bdp_flags = (iocb->ki_flags & IOCB_NOWAIT) ? BDP_ASYNC : 0; ssize_t written = 0, ret, ret2; loff_t i_size, pos = iocb->ki_pos, from, to; size_t max_chunk = PAGE_SIZE << MAX_PAGECACHE_ORDER; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index ac505671ef..bdd6cb33a3 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1802,9 +1802,10 @@ __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags, if (parent != READ_ONCE(dentry->d_parent)) return -ECHILD; } else { - /* Wait for unlink to complete */ + /* Wait for unlink to complete - see unblock_revalidate() */ wait_var_event(&dentry->d_fsdata, - dentry->d_fsdata != NFS_FSDATA_BLOCKED); + smp_load_acquire(&dentry->d_fsdata) + != NFS_FSDATA_BLOCKED); parent = dget_parent(dentry); ret = reval(d_inode(parent), dentry, flags); dput(parent); @@ -1817,6 +1818,29 @@ static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags) return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate); } +static void block_revalidate(struct dentry *dentry) +{ + /* old devname - just in case */ + kfree(dentry->d_fsdata); + + /* Any new reference that could lead to an open + * will take ->d_lock in lookup_open() -> d_lookup(). + * Holding this lock ensures we cannot race with + * __nfs_lookup_revalidate() and removes and need + * for further barriers. + */ + lockdep_assert_held(&dentry->d_lock); + + dentry->d_fsdata = NFS_FSDATA_BLOCKED; +} + +static void unblock_revalidate(struct dentry *dentry) +{ + /* store_release ensures wait_var_event() sees the update */ + smp_store_release(&dentry->d_fsdata, NULL); + wake_up_var(&dentry->d_fsdata); +} + /* * A weaker form of d_revalidate for revalidating just the d_inode(dentry) * when we don't really care about the dentry name. This is called when a @@ -2501,15 +2525,12 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) spin_unlock(&dentry->d_lock); goto out; } - /* old devname */ - kfree(dentry->d_fsdata); - dentry->d_fsdata = NFS_FSDATA_BLOCKED; + block_revalidate(dentry); spin_unlock(&dentry->d_lock); error = nfs_safe_remove(dentry); nfs_dentry_remove_handle_error(dir, dentry, error); - dentry->d_fsdata = NULL; - wake_up_var(&dentry->d_fsdata); + unblock_revalidate(dentry); out: trace_nfs_unlink_exit(dir, dentry, error); return error; @@ -2616,8 +2637,7 @@ nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data) { struct dentry *new_dentry = data->new_dentry; - new_dentry->d_fsdata = NULL; - wake_up_var(&new_dentry->d_fsdata); + unblock_revalidate(new_dentry); } /* @@ -2679,11 +2699,6 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) || WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED)) goto out; - if (new_dentry->d_fsdata) { - /* old devname */ - kfree(new_dentry->d_fsdata); - new_dentry->d_fsdata = NULL; - } spin_lock(&new_dentry->d_lock); if (d_count(new_dentry) > 2) { @@ -2705,7 +2720,7 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, new_dentry = dentry; new_inode = NULL; } else { - new_dentry->d_fsdata = NFS_FSDATA_BLOCKED; + block_revalidate(new_dentry); must_unblock = true; spin_unlock(&new_dentry->d_lock); } @@ -2717,6 +2732,8 @@ int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir, task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry, must_unblock ? nfs_unblock_rename : NULL); if (IS_ERR(task)) { + if (must_unblock) + unblock_revalidate(new_dentry); error = PTR_ERR(task); goto out; } diff --git a/fs/nfs/filelayout/filelayout.c b/fs/nfs/filelayout/filelayout.c index ce8f8934bc..569ae4ec60 100644 --- a/fs/nfs/filelayout/filelayout.c +++ b/fs/nfs/filelayout/filelayout.c @@ -883,7 +883,7 @@ filelayout_pg_init_read(struct nfs_pageio_descriptor *pgio, NFS4_MAX_UINT64, IOMODE_READ, false, - GFP_KERNEL); + nfs_io_gfp_mask()); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; @@ -907,7 +907,7 @@ filelayout_pg_init_write(struct nfs_pageio_descriptor *pgio, NFS4_MAX_UINT64, IOMODE_RW, false, - GFP_NOFS); + nfs_io_gfp_mask()); if (IS_ERR(pgio->pg_lseg)) { pgio->pg_error = PTR_ERR(pgio->pg_lseg); pgio->pg_lseg = NULL; diff --git a/fs/nfs/fs_context.c b/fs/nfs/fs_context.c index d0a0956f8a..cac1157be2 100644 --- a/fs/nfs/fs_context.c +++ b/fs/nfs/fs_context.c @@ -1112,9 +1112,12 @@ static int nfs23_parse_monolithic(struct fs_context *fc, ctx->acdirmax = data->acdirmax; ctx->need_mount = false; - memcpy(sap, &data->addr, sizeof(data->addr)); - ctx->nfs_server.addrlen = sizeof(data->addr); - ctx->nfs_server.port = ntohs(data->addr.sin_port); + if (!is_remount_fc(fc)) { + memcpy(sap, &data->addr, sizeof(data->addr)); + ctx->nfs_server.addrlen = sizeof(data->addr); + ctx->nfs_server.port = ntohs(data->addr.sin_port); + } + if (sap->ss_family != AF_INET || !nfs_verify_server_address(sap)) goto out_no_address; diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 06253695fe..2ff0c35d80 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -710,9 +710,9 @@ unsigned long nfs_block_bits(unsigned long bsize, unsigned char *nrbitsp) if ((bsize & (bsize - 1)) || nrbitsp) { unsigned char nrbits; - for (nrbits = 31; nrbits && !(bsize & (1 << nrbits)); nrbits--) + for (nrbits = 31; nrbits && !(bsize & (1UL << nrbits)); nrbits--) ; - bsize = 1 << nrbits; + bsize = 1UL << nrbits; if (nrbitsp) *nrbitsp = nrbits; } diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index ea390db94b..3a816c4a6d 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -4023,6 +4023,23 @@ static void test_fs_location_for_trunking(struct nfs4_fs_location *location, } } +static bool _is_same_nfs4_pathname(struct nfs4_pathname *path1, + struct nfs4_pathname *path2) +{ + int i; + + if (path1->ncomponents != path2->ncomponents) + return false; + for (i = 0; i < path1->ncomponents; i++) { + if (path1->components[i].len != path2->components[i].len) + return false; + if (memcmp(path1->components[i].data, path2->components[i].data, + path1->components[i].len)) + return false; + } + return true; +} + static int _nfs4_discover_trunking(struct nfs_server *server, struct nfs_fh *fhandle) { @@ -4056,9 +4073,13 @@ static int _nfs4_discover_trunking(struct nfs_server *server, if (status) goto out_free_3; - for (i = 0; i < locations->nlocations; i++) + for (i = 0; i < locations->nlocations; i++) { + if (!_is_same_nfs4_pathname(&locations->fs_path, + &locations->locations[i].rootpath)) + continue; test_fs_location_for_trunking(&locations->locations[i], clp, server); + } out_free_3: kfree(locations->fattr); out_free_2: @@ -5456,7 +5477,7 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task, struct rpc_message *msg = &task->tk_msg; if (msg->rpc_proc == &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS] && - server->caps & NFS_CAP_READ_PLUS && task->tk_status == -ENOTSUPP) { + task->tk_status == -ENOTSUPP) { server->caps &= ~NFS_CAP_READ_PLUS; msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ]; rpc_restart_call_prepare(task); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 662e86ea3a..5b452411e8 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -2116,6 +2116,7 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred { struct nfs_client *clp = server->nfs_client; struct nfs4_fs_locations *locations = NULL; + struct nfs_fattr *fattr; struct inode *inode; struct page *page; int status, result; @@ -2125,19 +2126,16 @@ static int nfs4_try_migration(struct nfs_server *server, const struct cred *cred (unsigned long long)server->fsid.minor, clp->cl_hostname); - result = 0; page = alloc_page(GFP_KERNEL); locations = kmalloc(sizeof(struct nfs4_fs_locations), GFP_KERNEL); - if (page == NULL || locations == NULL) { - dprintk("<-- %s: no memory\n", __func__); - goto out; - } - locations->fattr = nfs_alloc_fattr(); - if (locations->fattr == NULL) { + fattr = nfs_alloc_fattr(); + if (page == NULL || locations == NULL || fattr == NULL) { dprintk("<-- %s: no memory\n", __func__); + result = 0; goto out; } + locations->fattr = fattr; inode = d_inode(server->super->s_root); result = nfs4_proc_get_locations(server, NFS_FH(inode), locations, page, cred); diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index ecd18bffee..4d23bb1d08 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -48,12 +48,10 @@ enum { NFSD_MaxBlkSize, NFSD_MaxConnections, NFSD_Filecache, -#ifdef CONFIG_NFSD_V4 NFSD_Leasetime, NFSD_Gracetime, NFSD_RecoveryDir, NFSD_V4EndGrace, -#endif NFSD_MaxReserved }; @@ -1360,7 +1358,9 @@ static int nfsd_fill_super(struct super_block *sb, struct fs_context *fc) #ifdef CONFIG_NFSD_V4 [NFSD_Leasetime] = {"nfsv4leasetime", &transaction_ops, S_IWUSR|S_IRUSR}, [NFSD_Gracetime] = {"nfsv4gracetime", &transaction_ops, S_IWUSR|S_IRUSR}, +#ifdef CONFIG_NFSD_LEGACY_CLIENT_TRACKING [NFSD_RecoveryDir] = {"nfsv4recoverydir", &transaction_ops, S_IWUSR|S_IRUSR}, +#endif [NFSD_V4EndGrace] = {"v4_end_grace", &transaction_ops, S_IWUSR|S_IRUGO}, #endif /* last one */ {""} diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index 40fecf7b22..0b75305fb5 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -573,7 +573,7 @@ fh_compose(struct svc_fh *fhp, struct svc_export *exp, struct dentry *dentry, _fh_update(fhp, exp, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) { fh_put(fhp); - return nfserr_opnotsupp; + return nfserr_stale; } return 0; @@ -599,7 +599,7 @@ fh_update(struct svc_fh *fhp) _fh_update(fhp, fhp->fh_export, dentry); if (fhp->fh_handle.fh_fileid_type == FILEID_INVALID) - return nfserr_opnotsupp; + return nfserr_stale; return 0; out_bad: printk(KERN_ERR "fh_update: fh not verified!\n"); diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c index aee40db7a0..35e6c55a0d 100644 --- a/fs/nilfs2/dir.c +++ b/fs/nilfs2/dir.c @@ -608,7 +608,7 @@ int nilfs_empty_dir(struct inode *inode) kaddr = nilfs_get_folio(inode, i, &folio); if (IS_ERR(kaddr)) - continue; + return 0; de = (struct nilfs_dir_entry *)kaddr; kaddr += nilfs_last_byte(inode, i) - NILFS_DIR_REC_LEN(1); diff --git a/fs/nilfs2/ioctl.c b/fs/nilfs2/ioctl.c index f1a01c191c..8be471ce4f 100644 --- a/fs/nilfs2/ioctl.c +++ b/fs/nilfs2/ioctl.c @@ -60,7 +60,7 @@ static int nilfs_ioctl_wrap_copy(struct the_nilfs *nilfs, if (argv->v_nmembs == 0) return 0; - if (argv->v_size > PAGE_SIZE) + if ((size_t)argv->v_size > PAGE_SIZE) return -EINVAL; /* diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index aa5290cb74..a1b011cb4b 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -1652,6 +1652,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) if (bh->b_folio != bd_folio) { if (bd_folio) { folio_lock(bd_folio); + folio_wait_writeback(bd_folio); folio_clear_dirty_for_io(bd_folio); folio_start_writeback(bd_folio); folio_unlock(bd_folio); @@ -1665,6 +1666,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) if (bh == segbuf->sb_super_root) { if (bh->b_folio != bd_folio) { folio_lock(bd_folio); + folio_wait_writeback(bd_folio); folio_clear_dirty_for_io(bd_folio); folio_start_writeback(bd_folio); folio_unlock(bd_folio); @@ -1681,6 +1683,7 @@ static void nilfs_segctor_prepare_write(struct nilfs_sc_info *sci) } if (bd_folio) { folio_lock(bd_folio); + folio_wait_writeback(bd_folio); folio_clear_dirty_for_io(bd_folio); folio_start_writeback(bd_folio); folio_unlock(bd_folio); @@ -2124,8 +2127,10 @@ static void nilfs_segctor_start_timer(struct nilfs_sc_info *sci) { spin_lock(&sci->sc_state_lock); if (!(sci->sc_state & NILFS_SEGCTOR_COMMIT)) { - sci->sc_timer.expires = jiffies + sci->sc_interval; - add_timer(&sci->sc_timer); + if (sci->sc_task) { + sci->sc_timer.expires = jiffies + sci->sc_interval; + add_timer(&sci->sc_timer); + } sci->sc_state |= NILFS_SEGCTOR_COMMIT; } spin_unlock(&sci->sc_state_lock); @@ -2172,19 +2177,36 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) struct nilfs_segctor_wait_request wait_req; int err = 0; - spin_lock(&sci->sc_state_lock); init_wait(&wait_req.wq); wait_req.err = 0; atomic_set(&wait_req.done, 0); + init_waitqueue_entry(&wait_req.wq, current); + + /* + * To prevent a race issue where completion notifications from the + * log writer thread are missed, increment the request sequence count + * "sc_seq_request" and insert a wait queue entry using the current + * sequence number into the "sc_wait_request" queue at the same time + * within the lock section of "sc_state_lock". + */ + spin_lock(&sci->sc_state_lock); wait_req.seq = ++sci->sc_seq_request; + add_wait_queue(&sci->sc_wait_request, &wait_req.wq); spin_unlock(&sci->sc_state_lock); - init_waitqueue_entry(&wait_req.wq, current); - add_wait_queue(&sci->sc_wait_request, &wait_req.wq); - set_current_state(TASK_INTERRUPTIBLE); wake_up(&sci->sc_wait_daemon); for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + /* + * Synchronize only while the log writer thread is alive. + * Leave flushing out after the log writer thread exits to + * the cleanup work in nilfs_segctor_destroy(). + */ + if (!sci->sc_task) + break; + if (atomic_read(&wait_req.done)) { err = wait_req.err; break; @@ -2200,7 +2222,7 @@ static int nilfs_segctor_sync(struct nilfs_sc_info *sci) return err; } -static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) +static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err, bool force) { struct nilfs_segctor_wait_request *wrq, *n; unsigned long flags; @@ -2208,7 +2230,7 @@ static void nilfs_segctor_wakeup(struct nilfs_sc_info *sci, int err) spin_lock_irqsave(&sci->sc_wait_request.lock, flags); list_for_each_entry_safe(wrq, n, &sci->sc_wait_request.head, wq.entry) { if (!atomic_read(&wrq->done) && - nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq)) { + (force || nilfs_cnt32_ge(sci->sc_seq_done, wrq->seq))) { wrq->err = err; atomic_set(&wrq->done, 1); } @@ -2326,10 +2348,21 @@ int nilfs_construct_dsync_segment(struct super_block *sb, struct inode *inode, */ static void nilfs_segctor_accept(struct nilfs_sc_info *sci) { + bool thread_is_alive; + spin_lock(&sci->sc_state_lock); sci->sc_seq_accepted = sci->sc_seq_request; + thread_is_alive = (bool)sci->sc_task; spin_unlock(&sci->sc_state_lock); - del_timer_sync(&sci->sc_timer); + + /* + * This function does not race with the log writer thread's + * termination. Therefore, deleting sc_timer, which should not be + * done after the log writer thread exits, can be done safely outside + * the area protected by sc_state_lock. + */ + if (thread_is_alive) + del_timer_sync(&sci->sc_timer); } /** @@ -2346,7 +2379,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) if (mode == SC_LSEG_SR) { sci->sc_state &= ~NILFS_SEGCTOR_COMMIT; sci->sc_seq_done = sci->sc_seq_accepted; - nilfs_segctor_wakeup(sci, err); + nilfs_segctor_wakeup(sci, err, false); sci->sc_flush_request = 0; } else { if (mode == SC_FLUSH_FILE) @@ -2355,7 +2388,7 @@ static void nilfs_segctor_notify(struct nilfs_sc_info *sci, int mode, int err) sci->sc_flush_request &= ~FLUSH_DAT_BIT; /* re-enable timer if checkpoint creation was not done */ - if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && + if ((sci->sc_state & NILFS_SEGCTOR_COMMIT) && sci->sc_task && time_before(jiffies, sci->sc_timer.expires)) add_timer(&sci->sc_timer); } @@ -2545,6 +2578,7 @@ static int nilfs_segctor_thread(void *arg) int timeout = 0; sci->sc_timer_task = current; + timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); /* start sync. */ sci->sc_task = current; @@ -2612,6 +2646,7 @@ static int nilfs_segctor_thread(void *arg) end_thread: /* end sync. */ sci->sc_task = NULL; + timer_shutdown_sync(&sci->sc_timer); wake_up(&sci->sc_wait_task); /* for nilfs_segctor_kill_thread() */ spin_unlock(&sci->sc_state_lock); return 0; @@ -2675,7 +2710,6 @@ static struct nilfs_sc_info *nilfs_segctor_new(struct super_block *sb, INIT_LIST_HEAD(&sci->sc_gc_inodes); INIT_LIST_HEAD(&sci->sc_iput_queue); INIT_WORK(&sci->sc_iput_work, nilfs_iput_work_func); - timer_setup(&sci->sc_timer, nilfs_construction_timeout, 0); sci->sc_interval = HZ * NILFS_SC_DEFAULT_TIMEOUT; sci->sc_mjcp_freq = HZ * NILFS_SC_DEFAULT_SR_FREQ; @@ -2729,6 +2763,13 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) || sci->sc_seq_request != sci->sc_seq_done); spin_unlock(&sci->sc_state_lock); + /* + * Forcibly wake up tasks waiting in nilfs_segctor_sync(), which can + * be called from delayed iput() via nilfs_evict_inode() and can race + * with the above log writer thread termination. + */ + nilfs_segctor_wakeup(sci, 0, true); + if (flush_work(&sci->sc_iput_work)) flag = true; @@ -2754,7 +2795,6 @@ static void nilfs_segctor_destroy(struct nilfs_sc_info *sci) down_write(&nilfs->ns_segctor_sem); - timer_shutdown_sync(&sci->sc_timer); kfree(sci); } diff --git a/fs/ntfs3/dir.c b/fs/ntfs3/dir.c index 263635199b..1937e8e612 100644 --- a/fs/ntfs3/dir.c +++ b/fs/ntfs3/dir.c @@ -475,6 +475,7 @@ static int ntfs_readdir(struct file *file, struct dir_context *ctx) vbo = (u64)bit << index_bits; if (vbo >= i_size) { ntfs_inode_err(dir, "Looks like your dir is corrupt"); + ctx->pos = eod; err = -EINVAL; goto out; } diff --git a/fs/ntfs3/fslog.c b/fs/ntfs3/fslog.c index 855519713b..4085fe30bf 100644 --- a/fs/ntfs3/fslog.c +++ b/fs/ntfs3/fslog.c @@ -1184,7 +1184,8 @@ out: static int log_read_rst(struct ntfs_log *log, bool first, struct restart_info *info) { - u32 skip, vbo; + u32 skip; + u64 vbo; struct RESTART_HDR *r_page = NULL; /* Determine which restart area we are looking for. */ diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c index daabaad63a..14284f0ed4 100644 --- a/fs/ntfs3/index.c +++ b/fs/ntfs3/index.c @@ -1533,6 +1533,11 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, goto out1; } + if (data_size <= le64_to_cpu(alloc->nres.data_size)) { + /* Reuse index. */ + goto out; + } + /* Increase allocation. */ err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len, &indx->alloc_run, data_size, &data_size, true, @@ -1546,6 +1551,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni, if (in->name == I30_NAME) i_size_write(&ni->vfs_inode, data_size); +out: *vbn = bit << indx->idx2vbn_bits; return 0; diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index d273eda1cf..90d4e9a985 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -37,7 +37,7 @@ static struct inode *ntfs_read_mft(struct inode *inode, bool is_dir; unsigned long ino = inode->i_ino; u32 rp_fa = 0, asize, t32; - u16 roff, rsize, names = 0; + u16 roff, rsize, names = 0, links = 0; const struct ATTR_FILE_NAME *fname = NULL; const struct INDEX_ROOT *root; struct REPARSE_DATA_BUFFER rp; // 0x18 bytes @@ -200,11 +200,12 @@ next_attr: rsize < SIZEOF_ATTRIBUTE_FILENAME) goto out; + names += 1; fname = Add2Ptr(attr, roff); if (fname->type == FILE_NAME_DOS) goto next_attr; - names += 1; + links += 1; if (name && name->len == fname->name_len && !ntfs_cmp_names_cpu(name, (struct le_str *)&fname->name_len, NULL, false)) @@ -429,7 +430,7 @@ end_enum: ni->mi.dirty = true; } - set_nlink(inode, names); + set_nlink(inode, links); if (S_ISDIR(mode)) { ni->std_fa |= FILE_ATTRIBUTE_DIRECTORY; @@ -576,13 +577,18 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, clear_buffer_uptodate(bh); if (is_resident(ni)) { - ni_lock(ni); - err = attr_data_read_resident(ni, &folio->page); - ni_unlock(ni); - - if (!err) - set_buffer_uptodate(bh); + bh->b_blocknr = RESIDENT_LCN; bh->b_size = block_size; + if (!folio) { + err = 0; + } else { + ni_lock(ni); + err = attr_data_read_resident(ni, &folio->page); + ni_unlock(ni); + + if (!err) + set_buffer_uptodate(bh); + } return err; } diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h index 9c7478150a..3d6143c7ab 100644 --- a/fs/ntfs3/ntfs.h +++ b/fs/ntfs3/ntfs.h @@ -59,7 +59,7 @@ struct GUID { struct cpu_str { u8 len; u8 unused; - u16 name[10]; + u16 name[]; }; struct le_str { diff --git a/fs/ntfs3/record.c b/fs/ntfs3/record.c index 6aa3a9d44d..6c76503edc 100644 --- a/fs/ntfs3/record.c +++ b/fs/ntfs3/record.c @@ -534,16 +534,9 @@ bool mi_remove_attr(struct ntfs_inode *ni, struct mft_inode *mi, if (aoff + asize > used) return false; - if (ni && is_attr_indexed(attr)) { + if (ni && is_attr_indexed(attr) && attr->type == ATTR_NAME) { u16 links = le16_to_cpu(ni->mi.mrec->hard_links); - struct ATTR_FILE_NAME *fname = - attr->type != ATTR_NAME ? - NULL : - resident_data_ex(attr, - SIZEOF_ATTRIBUTE_FILENAME); - if (fname && fname->type == FILE_NAME_DOS) { - /* Do not decrease links count deleting DOS name. */ - } else if (!links) { + if (!links) { /* minor error. Not critical. */ } else { ni->mi.mrec->hard_links = cpu_to_le16(links - 1); diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c index b26d95a8d3..0ed534f759 100644 --- a/fs/ntfs3/super.c +++ b/fs/ntfs3/super.c @@ -1861,8 +1861,6 @@ static int __init init_ntfs_fs(void) { int err; - pr_info("ntfs3: Max link count %u\n", NTFS_LINK_MAX); - if (IS_ENABLED(CONFIG_NTFS3_FS_POSIX_ACL)) pr_info("ntfs3: Enabled Linux POSIX ACLs support\n"); if (IS_ENABLED(CONFIG_NTFS3_64BIT_CLUSTER)) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 0da8e7bd32..ccc57038a9 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1936,6 +1936,8 @@ static int __ocfs2_change_file_space(struct file *file, struct inode *inode, inode_lock(inode); + /* Wait all existing dio workers, newcomers will block on i_rwsem */ + inode_dio_wait(inode); /* * This prevents concurrent writes on other nodes */ diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c index 604fea3a26..86807086b2 100644 --- a/fs/ocfs2/journal.c +++ b/fs/ocfs2/journal.c @@ -479,12 +479,6 @@ bail: return status; } - -struct ocfs2_triggers { - struct jbd2_buffer_trigger_type ot_triggers; - int ot_offset; -}; - static inline struct ocfs2_triggers *to_ocfs2_trigger(struct jbd2_buffer_trigger_type *triggers) { return container_of(triggers, struct ocfs2_triggers, ot_triggers); @@ -548,85 +542,76 @@ static void ocfs2_db_frozen_trigger(struct jbd2_buffer_trigger_type *triggers, static void ocfs2_abort_trigger(struct jbd2_buffer_trigger_type *triggers, struct buffer_head *bh) { + struct ocfs2_triggers *ot = to_ocfs2_trigger(triggers); + mlog(ML_ERROR, "ocfs2_abort_trigger called by JBD2. bh = 0x%lx, " "bh->b_blocknr = %llu\n", (unsigned long)bh, (unsigned long long)bh->b_blocknr); - ocfs2_error(bh->b_assoc_map->host->i_sb, + ocfs2_error(ot->sb, "JBD2 has aborted our journal, ocfs2 cannot continue\n"); } -static struct ocfs2_triggers di_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_dinode, i_check), -}; - -static struct ocfs2_triggers eb_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_extent_block, h_check), -}; - -static struct ocfs2_triggers rb_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_refcount_block, rf_check), -}; - -static struct ocfs2_triggers gd_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_group_desc, bg_check), -}; - -static struct ocfs2_triggers db_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_db_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, -}; +static void ocfs2_setup_csum_triggers(struct super_block *sb, + enum ocfs2_journal_trigger_type type, + struct ocfs2_triggers *ot) +{ + BUG_ON(type >= OCFS2_JOURNAL_TRIGGER_COUNT); -static struct ocfs2_triggers xb_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_xattr_block, xb_check), -}; + switch (type) { + case OCFS2_JTR_DI: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_dinode, i_check); + break; + case OCFS2_JTR_EB: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_extent_block, h_check); + break; + case OCFS2_JTR_RB: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_refcount_block, rf_check); + break; + case OCFS2_JTR_GD: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_group_desc, bg_check); + break; + case OCFS2_JTR_DB: + ot->ot_triggers.t_frozen = ocfs2_db_frozen_trigger; + break; + case OCFS2_JTR_XB: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_xattr_block, xb_check); + break; + case OCFS2_JTR_DQ: + ot->ot_triggers.t_frozen = ocfs2_dq_frozen_trigger; + break; + case OCFS2_JTR_DR: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check); + break; + case OCFS2_JTR_DL: + ot->ot_triggers.t_frozen = ocfs2_frozen_trigger; + ot->ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check); + break; + case OCFS2_JTR_NONE: + /* To make compiler happy... */ + return; + } -static struct ocfs2_triggers dq_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_dq_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, -}; + ot->ot_triggers.t_abort = ocfs2_abort_trigger; + ot->sb = sb; +} -static struct ocfs2_triggers dr_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_dx_root_block, dr_check), -}; +void ocfs2_initialize_journal_triggers(struct super_block *sb, + struct ocfs2_triggers triggers[]) +{ + enum ocfs2_journal_trigger_type type; -static struct ocfs2_triggers dl_triggers = { - .ot_triggers = { - .t_frozen = ocfs2_frozen_trigger, - .t_abort = ocfs2_abort_trigger, - }, - .ot_offset = offsetof(struct ocfs2_dx_leaf, dl_check), -}; + for (type = OCFS2_JTR_DI; type < OCFS2_JOURNAL_TRIGGER_COUNT; type++) + ocfs2_setup_csum_triggers(sb, type, &triggers[type]); +} static int __ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, @@ -708,56 +693,91 @@ static int __ocfs2_journal_access(handle_t *handle, int ocfs2_journal_access_di(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &di_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DI], + type); } int ocfs2_journal_access_eb(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &eb_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_EB], + type); } int ocfs2_journal_access_rb(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &rb_triggers, + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_RB], type); } int ocfs2_journal_access_gd(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &gd_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_GD], + type); } int ocfs2_journal_access_db(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &db_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DB], + type); } int ocfs2_journal_access_xb(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &xb_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_XB], + type); } int ocfs2_journal_access_dq(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &dq_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DQ], + type); } int ocfs2_journal_access_dr(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &dr_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DR], + type); } int ocfs2_journal_access_dl(handle_t *handle, struct ocfs2_caching_info *ci, struct buffer_head *bh, int type) { - return __ocfs2_journal_access(handle, ci, bh, &dl_triggers, type); + struct ocfs2_super *osb = OCFS2_SB(ocfs2_metadata_cache_get_super(ci)); + + return __ocfs2_journal_access(handle, ci, bh, + &osb->s_journal_triggers[OCFS2_JTR_DL], + type); } int ocfs2_journal_access(handle_t *handle, struct ocfs2_caching_info *ci, @@ -778,13 +798,15 @@ void ocfs2_journal_dirty(handle_t *handle, struct buffer_head *bh) if (!is_handle_aborted(handle)) { journal_t *journal = handle->h_transaction->t_journal; - mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed. " - "Aborting transaction and journal.\n"); + mlog(ML_ERROR, "jbd2_journal_dirty_metadata failed: " + "handle type %u started at line %u, credits %u/%u " + "errcode %d. Aborting transaction and journal.\n", + handle->h_type, handle->h_line_no, + handle->h_requested_credits, + jbd2_handle_buffer_credits(handle), status); handle->h_err = status; jbd2_journal_abort_handle(handle); jbd2_journal_abort(journal, status); - ocfs2_abort(bh->b_assoc_map->host->i_sb, - "Journal already aborted.\n"); } } } diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c index c803c10dd9..33aeaaa056 100644 --- a/fs/ocfs2/localalloc.c +++ b/fs/ocfs2/localalloc.c @@ -863,14 +863,8 @@ static int ocfs2_local_alloc_find_clear_bits(struct ocfs2_super *osb, numfound = bitoff = startoff = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); - while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) != -1) { - if (bitoff == left) { - /* mlog(0, "bitoff (%d) == left", bitoff); */ - break; - } - /* mlog(0, "Found a zero: bitoff = %d, startoff = %d, " - "numfound = %d\n", bitoff, startoff, numfound);*/ - + while ((bitoff = ocfs2_find_next_zero_bit(bitmap, left, startoff)) < + left) { /* Ok, we found a zero bit... is it contig. or do we * start over?*/ if (bitoff == startoff) { @@ -976,9 +970,9 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, start = count = 0; left = le32_to_cpu(alloc->id1.bitmap1.i_total); - while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) - != -1) { - if ((bit_off < left) && (bit_off == start)) { + while ((bit_off = ocfs2_find_next_zero_bit(bitmap, left, start)) < + left) { + if (bit_off == start) { count++; start++; continue; @@ -1002,8 +996,7 @@ static int ocfs2_sync_local_to_main(struct ocfs2_super *osb, goto bail; } } - if (bit_off >= left) - break; + count = 1; start = bit_off + 1; } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 9221a33f91..4d1ea8703f 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -566,7 +566,7 @@ static int __ocfs2_mknod_locked(struct inode *dir, fe->i_last_eb_blk = 0; strcpy(fe->i_signature, OCFS2_INODE_SIGNATURE); fe->i_flags |= cpu_to_le32(OCFS2_VALID_FL); - ktime_get_real_ts64(&ts); + ktime_get_coarse_real_ts64(&ts); fe->i_atime = fe->i_ctime = fe->i_mtime = cpu_to_le64(ts.tv_sec); fe->i_mtime_nsec = fe->i_ctime_nsec = fe->i_atime_nsec = @@ -797,6 +797,7 @@ static int ocfs2_link(struct dentry *old_dentry, ocfs2_set_links_count(fe, inode->i_nlink); fe->i_ctime = cpu_to_le64(inode_get_ctime_sec(inode)); fe->i_ctime_nsec = cpu_to_le32(inode_get_ctime_nsec(inode)); + ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, fe_bh); err = ocfs2_add_entry(handle, dentry, inode, @@ -993,6 +994,7 @@ static int ocfs2_unlink(struct inode *dir, drop_nlink(inode); drop_nlink(inode); ocfs2_set_links_count(fe, inode->i_nlink); + ocfs2_update_inode_fsync_trans(handle, inode, 0); ocfs2_journal_dirty(handle, fe_bh); inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h index a503c553ba..8fe826143d 100644 --- a/fs/ocfs2/ocfs2.h +++ b/fs/ocfs2/ocfs2.h @@ -284,6 +284,30 @@ enum ocfs2_mount_options #define OCFS2_OSB_ERROR_FS 0x0004 #define OCFS2_DEFAULT_ATIME_QUANTUM 60 +struct ocfs2_triggers { + struct jbd2_buffer_trigger_type ot_triggers; + int ot_offset; + struct super_block *sb; +}; + +enum ocfs2_journal_trigger_type { + OCFS2_JTR_DI, + OCFS2_JTR_EB, + OCFS2_JTR_RB, + OCFS2_JTR_GD, + OCFS2_JTR_DB, + OCFS2_JTR_XB, + OCFS2_JTR_DQ, + OCFS2_JTR_DR, + OCFS2_JTR_DL, + OCFS2_JTR_NONE /* This must be the last entry */ +}; + +#define OCFS2_JOURNAL_TRIGGER_COUNT OCFS2_JTR_NONE + +void ocfs2_initialize_journal_triggers(struct super_block *sb, + struct ocfs2_triggers triggers[]); + struct ocfs2_journal; struct ocfs2_slot_info; struct ocfs2_recovery_map; @@ -351,6 +375,9 @@ struct ocfs2_super struct ocfs2_journal *journal; unsigned long osb_commit_interval; + /* Journal triggers for checksum */ + struct ocfs2_triggers s_journal_triggers[OCFS2_JOURNAL_TRIGGER_COUNT]; + struct delayed_work la_enable_wq; /* diff --git a/fs/ocfs2/reservations.c b/fs/ocfs2/reservations.c index a9d1296d73..1fe61974d9 100644 --- a/fs/ocfs2/reservations.c +++ b/fs/ocfs2/reservations.c @@ -414,7 +414,7 @@ static int ocfs2_resmap_find_free_bits(struct ocfs2_reservation_map *resmap, start = search_start; while ((offset = ocfs2_find_next_zero_bit(bitmap, resmap->m_bitmap_len, - start)) != -1) { + start)) < resmap->m_bitmap_len) { /* Search reached end of the region */ if (offset >= (search_start + search_len)) break; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index 166c8918c8..9619984153 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -1290,10 +1290,8 @@ static int ocfs2_block_group_find_clear_bits(struct ocfs2_super *osb, found = start = best_offset = best_size = 0; bitmap = bg->bg_bitmap; - while((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) != -1) { - if (offset == total_bits) - break; - + while ((offset = ocfs2_find_next_zero_bit(bitmap, total_bits, start)) < + total_bits) { if (!ocfs2_test_bg_bit_allocatable(bg_bh, offset)) { /* We found a zero, but we can't use it as it * hasn't been put to disk yet! */ diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index 8aabaed2c1..afee70125a 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1075,9 +1075,11 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent) debugfs_create_file("fs_state", S_IFREG|S_IRUSR, osb->osb_debug_root, osb, &ocfs2_osb_debug_fops); - if (ocfs2_meta_ecc(osb)) + if (ocfs2_meta_ecc(osb)) { + ocfs2_initialize_journal_triggers(sb, osb->s_journal_triggers); ocfs2_blockcheck_stats_debugfs_install( &osb->osb_ecc_stats, osb->osb_debug_root); + } status = ocfs2_mount_volume(sb); if (status < 0) diff --git a/fs/openpromfs/inode.c b/fs/openpromfs/inode.c index 4a0779e3ef..a7b527ea50 100644 --- a/fs/openpromfs/inode.c +++ b/fs/openpromfs/inode.c @@ -355,10 +355,10 @@ static struct inode *openprom_iget(struct super_block *sb, ino_t ino) return inode; } -static int openprom_remount(struct super_block *sb, int *flags, char *data) +static int openpromfs_reconfigure(struct fs_context *fc) { - sync_filesystem(sb); - *flags |= SB_NOATIME; + sync_filesystem(fc->root->d_sb); + fc->sb_flags |= SB_NOATIME; return 0; } @@ -366,7 +366,6 @@ static const struct super_operations openprom_sops = { .alloc_inode = openprom_alloc_inode, .free_inode = openprom_free_inode, .statfs = simple_statfs, - .remount_fs = openprom_remount, }; static int openprom_fill_super(struct super_block *s, struct fs_context *fc) @@ -415,6 +414,7 @@ static int openpromfs_get_tree(struct fs_context *fc) static const struct fs_context_operations openpromfs_context_ops = { .get_tree = openpromfs_get_tree, + .reconfigure = openpromfs_reconfigure, }; static int openpromfs_init_fs_context(struct fs_context *fc) diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 0f8b4a7192..02d89a285d 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -327,9 +327,6 @@ static int ovl_create_upper(struct dentry *dentry, struct inode *inode, struct dentry *newdentry; int err; - if (!attr->hardlink && !IS_POSIXACL(udir)) - attr->mode &= ~current_umask(); - inode_lock_nested(udir, I_MUTEX_PARENT); newdentry = ovl_create_real(ofs, udir, ovl_lookup_upper(ofs, dentry->d_name.name, diff --git a/fs/overlayfs/export.c b/fs/overlayfs/export.c index 063409069f..5868cb2229 100644 --- a/fs/overlayfs/export.c +++ b/fs/overlayfs/export.c @@ -181,6 +181,10 @@ static int ovl_check_encode_origin(struct dentry *dentry) struct ovl_fs *ofs = OVL_FS(dentry->d_sb); bool decodable = ofs->config.nfs_export; + /* No upper layer? */ + if (!ovl_upper_mnt(ofs)) + return 1; + /* Lower file handle for non-upper non-decodable */ if (!ovl_dentry_upper(dentry) && !decodable) return 1; @@ -209,7 +213,7 @@ static int ovl_check_encode_origin(struct dentry *dentry) * ovl_connect_layer() will try to make origin's layer "connected" by * copying up a "connectable" ancestor. */ - if (d_is_dir(dentry) && ovl_upper_mnt(ofs) && decodable) + if (d_is_dir(dentry) && decodable) return ovl_connect_layer(dentry); /* Lower file handle for indexed and non-upper dir/non-dir */ diff --git a/fs/proc/base.c b/fs/proc/base.c index 18550c071d..72a1acd036 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3214,7 +3214,7 @@ static int proc_pid_ksm_stat(struct seq_file *m, struct pid_namespace *ns, mm = get_task_mm(task); if (mm) { seq_printf(m, "ksm_rmap_items %lu\n", mm->ksm_rmap_items); - seq_printf(m, "ksm_zero_pages %lu\n", mm->ksm_zero_pages); + seq_printf(m, "ksm_zero_pages %ld\n", mm_ksm_zero_pages(mm)); seq_printf(m, "ksm_merging_pages %lu\n", mm->ksm_merging_pages); seq_printf(m, "ksm_process_profit %ld\n", ksm_process_profit(mm)); mmput(mm); diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 6e72e5ad42..f4b1c8b42a 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -74,7 +74,18 @@ out: return 0; } -static int proc_fdinfo_access_allowed(struct inode *inode) +static int seq_fdinfo_open(struct inode *inode, struct file *file) +{ + return single_open(file, seq_show, inode); +} + +/** + * Shared /proc/pid/fdinfo and /proc/pid/fdinfo/fd permission helper to ensure + * that the current task has PTRACE_MODE_READ in addition to the normal + * POSIX-like checks. + */ +static int proc_fdinfo_permission(struct mnt_idmap *idmap, struct inode *inode, + int mask) { bool allowed = false; struct task_struct *task = get_proc_task(inode); @@ -88,18 +99,13 @@ static int proc_fdinfo_access_allowed(struct inode *inode) if (!allowed) return -EACCES; - return 0; + return generic_permission(idmap, inode, mask); } -static int seq_fdinfo_open(struct inode *inode, struct file *file) -{ - int ret = proc_fdinfo_access_allowed(inode); - - if (ret) - return ret; - - return single_open(file, seq_show, inode); -} +static const struct inode_operations proc_fdinfo_file_inode_operations = { + .permission = proc_fdinfo_permission, + .setattr = proc_setattr, +}; static const struct file_operations proc_fdinfo_file_operations = { .open = seq_fdinfo_open, @@ -388,6 +394,8 @@ static struct dentry *proc_fdinfo_instantiate(struct dentry *dentry, ei = PROC_I(inode); ei->fd = data->fd; + inode->i_op = &proc_fdinfo_file_inode_operations; + inode->i_fop = &proc_fdinfo_file_operations; tid_fd_update_inode(task, inode, 0); @@ -407,23 +415,13 @@ static int proc_readfdinfo(struct file *file, struct dir_context *ctx) proc_fdinfo_instantiate); } -static int proc_open_fdinfo(struct inode *inode, struct file *file) -{ - int ret = proc_fdinfo_access_allowed(inode); - - if (ret) - return ret; - - return 0; -} - const struct inode_operations proc_fdinfo_inode_operations = { .lookup = proc_lookupfdinfo, + .permission = proc_fdinfo_permission, .setattr = proc_setattr, }; const struct file_operations proc_fdinfo_operations = { - .open = proc_open_fdinfo, .read = generic_read_dir, .iterate_shared = proc_readfdinfo, .llseek = generic_file_llseek, diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 102f48668c..a097c9a652 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -965,12 +965,17 @@ static int show_smaps_rollup(struct seq_file *m, void *v) break; /* Case 1 and 2 above */ - if (vma->vm_start >= last_vma_end) + if (vma->vm_start >= last_vma_end) { + smap_gather_stats(vma, &mss, 0); + last_vma_end = vma->vm_end; continue; + } /* Case 4 above */ - if (vma->vm_end > last_vma_end) + if (vma->vm_end > last_vma_end) { smap_gather_stats(vma, &mss, last_vma_end); + last_vma_end = vma->vm_end; + } } } for_each_vma(vmi, vma); diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 1fb213f379..d06607a1f1 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -383,6 +383,8 @@ static ssize_t __read_vmcore(struct iov_iter *iter, loff_t *fpos) /* leave now if filled buffer already */ if (!iov_iter_count(iter)) return acc; + + cond_resched(); } list_for_each_entry(m, &vmcore_list, list) { diff --git a/fs/smb/client/cifsfs.c b/fs/smb/client/cifsfs.c index 39277c3718..d8ede68260 100644 --- a/fs/smb/client/cifsfs.c +++ b/fs/smb/client/cifsfs.c @@ -134,7 +134,7 @@ module_param(enable_oplocks, bool, 0644); MODULE_PARM_DESC(enable_oplocks, "Enable or disable oplocks. Default: y/Y/1"); module_param(enable_gcm_256, bool, 0644); -MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: n/N/0"); +MODULE_PARM_DESC(enable_gcm_256, "Enable requesting strongest (256 bit) GCM encryption. Default: y/Y/0"); module_param(require_gcm_256, bool, 0644); MODULE_PARM_DESC(require_gcm_256, "Require strongest (256 bit) GCM encryption. Default: n/N/0"); @@ -1277,7 +1277,7 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, struct cifsFileInfo *smb_file_src = src_file->private_data; struct cifsFileInfo *smb_file_target = dst_file->private_data; struct cifs_tcon *target_tcon, *src_tcon; - unsigned long long destend, fstart, fend, new_size; + unsigned long long destend, fstart, fend, old_size, new_size; unsigned int xid; int rc; @@ -1342,6 +1342,9 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); if (rc) goto unlock; + if (fend > target_cifsi->netfs.zero_point) + target_cifsi->netfs.zero_point = fend + 1; + old_size = target_cifsi->netfs.remote_i_size; /* Discard all the folios that overlap the destination region. */ cifs_dbg(FYI, "about to discard pages %llx-%llx\n", fstart, fend); @@ -1354,12 +1357,13 @@ static loff_t cifs_remap_file_range(struct file *src_file, loff_t off, if (target_tcon->ses->server->ops->duplicate_extents) { rc = target_tcon->ses->server->ops->duplicate_extents(xid, smb_file_src, smb_file_target, off, len, destoff); - if (rc == 0 && new_size > i_size_read(target_inode)) { + if (rc == 0 && new_size > old_size) { truncate_setsize(target_inode, new_size); - netfs_resize_file(&target_cifsi->netfs, new_size, true); fscache_resize_cookie(cifs_inode_cookie(target_inode), new_size); } + if (rc == 0 && new_size > target_cifsi->netfs.zero_point) + target_cifsi->netfs.zero_point = new_size; } /* force revalidate of size and timestamps of target file now @@ -1451,6 +1455,8 @@ ssize_t cifs_file_copychunk_range(unsigned int xid, rc = cifs_flush_folio(target_inode, destend, &fstart, &fend, false); if (rc) goto unlock; + if (fend > target_cifsi->netfs.zero_point) + target_cifsi->netfs.zero_point = fend + 1; /* Discard all the folios that overlap the destination region. */ truncate_inode_pages_range(&target_inode->i_data, fstart, fend); diff --git a/fs/smb/client/cifspdu.h b/fs/smb/client/cifspdu.h index c46d418c1c..a2072ab9e5 100644 --- a/fs/smb/client/cifspdu.h +++ b/fs/smb/client/cifspdu.h @@ -2574,7 +2574,7 @@ typedef struct { struct win_dev { - unsigned char type[8]; /* IntxCHR or IntxBLK or LnxFIFO*/ + unsigned char type[8]; /* IntxCHR or IntxBLK or LnxFIFO or LnxSOCK */ __le64 major; __le64 minor; } __attribute__((packed)); diff --git a/fs/smb/client/inode.c b/fs/smb/client/inode.c index 60afab5c83..d0e6959133 100644 --- a/fs/smb/client/inode.c +++ b/fs/smb/client/inode.c @@ -591,6 +591,10 @@ cifs_sfu_type(struct cifs_fattr *fattr, const char *path, mnr = le64_to_cpu(*(__le64 *)(pbuf+16)); fattr->cf_rdev = MKDEV(mjr, mnr); } + } else if (memcmp("LnxSOCK", pbuf, 8) == 0) { + cifs_dbg(FYI, "Socket\n"); + fattr->cf_mode |= S_IFSOCK; + fattr->cf_dtype = DT_SOCK; } else if (memcmp("IntxLNK", pbuf, 7) == 0) { cifs_dbg(FYI, "Symlink\n"); fattr->cf_mode |= S_IFLNK; diff --git a/fs/smb/client/smb2ops.c b/fs/smb/client/smb2ops.c index 28f0b7d19d..d35c45f3a2 100644 --- a/fs/smb/client/smb2ops.c +++ b/fs/smb/client/smb2ops.c @@ -2028,6 +2028,7 @@ smb2_duplicate_extents(const unsigned int xid, * size will be queried on next revalidate, but it is important * to make sure that file's cached size is updated immediately */ + netfs_resize_file(netfs_inode(inode), dest_off + len, true); cifs_setsize(inode, dest_off + len); } rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, @@ -4995,6 +4996,9 @@ static int __cifs_sfu_make_node(unsigned int xid, struct inode *inode, pdev.major = cpu_to_le64(MAJOR(dev)); pdev.minor = cpu_to_le64(MINOR(dev)); break; + case S_IFSOCK: + strscpy(pdev.type, "LnxSOCK"); + break; case S_IFIFO: strscpy(pdev.type, "LnxFIFO"); break; diff --git a/fs/smb/client/smb2transport.c b/fs/smb/client/smb2transport.c index 02135a6053..1476c445ca 100644 --- a/fs/smb/client/smb2transport.c +++ b/fs/smb/client/smb2transport.c @@ -216,8 +216,8 @@ smb2_find_smb_tcon(struct TCP_Server_Info *server, __u64 ses_id, __u32 tid) } tcon = smb2_find_smb_sess_tcon_unlocked(ses, tid); if (!tcon) { - cifs_put_smb_ses(ses); spin_unlock(&cifs_tcp_ses_lock); + cifs_put_smb_ses(ses); return NULL; } spin_unlock(&cifs_tcp_ses_lock); diff --git a/fs/smb/server/mgmt/share_config.c b/fs/smb/server/mgmt/share_config.c index a2f0a2edce..e0a6b75809 100644 --- a/fs/smb/server/mgmt/share_config.c +++ b/fs/smb/server/mgmt/share_config.c @@ -165,8 +165,12 @@ static struct ksmbd_share_config *share_config_request(struct unicode_map *um, share->path = kstrndup(ksmbd_share_config_path(resp), path_len, GFP_KERNEL); - if (share->path) + if (share->path) { share->path_sz = strlen(share->path); + while (share->path_sz > 1 && + share->path[share->path_sz - 1] == '/') + share->path[--share->path_sz] = '\0'; + } share->create_mask = resp->create_mask; share->directory_mask = resp->directory_mask; share->force_create_mode = resp->force_create_mode; diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index b9d9116fc2..a8f52c4ebb 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -610,19 +610,24 @@ static int oplock_break_pending(struct oplock_info *opinfo, int req_op_level) if (opinfo->op_state == OPLOCK_CLOSING) return -ENOENT; else if (opinfo->level <= req_op_level) { - if (opinfo->is_lease && - opinfo->o_lease->state != - (SMB2_LEASE_HANDLE_CACHING_LE | - SMB2_LEASE_READ_CACHING_LE)) + if (opinfo->is_lease == false) + return 1; + + if (opinfo->o_lease->state != + (SMB2_LEASE_HANDLE_CACHING_LE | + SMB2_LEASE_READ_CACHING_LE)) return 1; } } if (opinfo->level <= req_op_level) { - if (opinfo->is_lease && - opinfo->o_lease->state != - (SMB2_LEASE_HANDLE_CACHING_LE | - SMB2_LEASE_READ_CACHING_LE)) { + if (opinfo->is_lease == false) { + wake_up_oplock_break(opinfo); + return 1; + } + if (opinfo->o_lease->state != + (SMB2_LEASE_HANDLE_CACHING_LE | + SMB2_LEASE_READ_CACHING_LE)) { wake_up_oplock_break(opinfo); return 1; } diff --git a/fs/smb/server/smb2pdu.c b/fs/smb/server/smb2pdu.c index b6c5a8ea38..e7e0789178 100644 --- a/fs/smb/server/smb2pdu.c +++ b/fs/smb/server/smb2pdu.c @@ -630,6 +630,12 @@ smb2_get_name(const char *src, const int maxlen, struct nls_table *local_nls) return name; } + if (*name == '\\') { + pr_err("not allow directory name included leading slash\n"); + kfree(name); + return ERR_PTR(-EINVAL); + } + ksmbd_conv_path_to_unix(name); ksmbd_strip_last_slash(name); return name; @@ -2361,7 +2367,8 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, if (rc > 0) { rc = ksmbd_vfs_remove_xattr(idmap, path, - attr_name); + attr_name, + get_write); if (rc < 0) { ksmbd_debug(SMB, @@ -2376,7 +2383,7 @@ static int smb2_set_ea(struct smb2_ea_info *eabuf, unsigned int buf_len, } else { rc = ksmbd_vfs_setxattr(idmap, path, attr_name, value, le16_to_cpu(eabuf->EaValueLength), - 0, true); + 0, get_write); if (rc < 0) { ksmbd_debug(SMB, "ksmbd_vfs_setxattr is failed(%d)\n", @@ -2468,7 +2475,7 @@ static int smb2_remove_smb_xattrs(const struct path *path) !strncmp(&name[XATTR_USER_PREFIX_LEN], STREAM_PREFIX, STREAM_PREFIX_LEN)) { err = ksmbd_vfs_remove_xattr(idmap, path, - name); + name, true); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", name); @@ -2842,20 +2849,11 @@ int smb2_open(struct ksmbd_work *work) } if (req->NameLength) { - if ((req->CreateOptions & FILE_DIRECTORY_FILE_LE) && - *(char *)req->Buffer == '\\') { - pr_err("not allow directory name included leading slash\n"); - rc = -EINVAL; - goto err_out2; - } - name = smb2_get_name((char *)req + le16_to_cpu(req->NameOffset), le16_to_cpu(req->NameLength), work->conn->local_nls); if (IS_ERR(name)) { rc = PTR_ERR(name); - if (rc != -ENOMEM) - rc = -ENOENT; name = NULL; goto err_out2; } diff --git a/fs/smb/server/vfs.c b/fs/smb/server/vfs.c index 51b1b0bed6..9e859ba010 100644 --- a/fs/smb/server/vfs.c +++ b/fs/smb/server/vfs.c @@ -1058,16 +1058,21 @@ int ksmbd_vfs_fqar_lseek(struct ksmbd_file *fp, loff_t start, loff_t length, } int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - const struct path *path, char *attr_name) + const struct path *path, char *attr_name, + bool get_write) { int err; - err = mnt_want_write(path->mnt); - if (err) - return err; + if (get_write == true) { + err = mnt_want_write(path->mnt); + if (err) + return err; + } err = vfs_removexattr(idmap, path->dentry, attr_name); - mnt_drop_write(path->mnt); + + if (get_write == true) + mnt_drop_write(path->mnt); return err; } @@ -1380,7 +1385,7 @@ int ksmbd_vfs_remove_sd_xattrs(struct mnt_idmap *idmap, const struct path *path) ksmbd_debug(SMB, "%s, len %zd\n", name, strlen(name)); if (!strncmp(name, XATTR_NAME_SD, XATTR_NAME_SD_LEN)) { - err = ksmbd_vfs_remove_xattr(idmap, path, name); + err = ksmbd_vfs_remove_xattr(idmap, path, name, true); if (err) ksmbd_debug(SMB, "remove xattr failed : %s\n", name); } diff --git a/fs/smb/server/vfs.h b/fs/smb/server/vfs.h index cfe1c8092f..cb76f4b5ba 100644 --- a/fs/smb/server/vfs.h +++ b/fs/smb/server/vfs.h @@ -114,7 +114,8 @@ int ksmbd_vfs_setxattr(struct mnt_idmap *idmap, int ksmbd_vfs_xattr_stream_name(char *stream_name, char **xattr_stream_name, size_t *xattr_stream_name_size, int s_type); int ksmbd_vfs_remove_xattr(struct mnt_idmap *idmap, - const struct path *path, char *attr_name); + const struct path *path, char *attr_name, + bool get_write); int ksmbd_vfs_kern_path_locked(struct ksmbd_work *work, char *name, unsigned int flags, struct path *parent_path, struct path *path, bool caseless); diff --git a/fs/smb/server/vfs_cache.c b/fs/smb/server/vfs_cache.c index 6cb599cd28..8b2e37c871 100644 --- a/fs/smb/server/vfs_cache.c +++ b/fs/smb/server/vfs_cache.c @@ -254,7 +254,8 @@ static void __ksmbd_inode_close(struct ksmbd_file *fp) ci->m_flags &= ~S_DEL_ON_CLS_STREAM; err = ksmbd_vfs_remove_xattr(file_mnt_idmap(filp), &filp->f_path, - fp->stream.name); + fp->stream.name, + true); if (err) pr_err("remove xattr failed : %s\n", fp->stream.name); diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index a878cea70f..129d0f54ba 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -50,8 +50,12 @@ static struct eventfs_root_inode *get_root_inode(struct eventfs_inode *ei) /* Just try to make something consistent and unique */ static int eventfs_dir_ino(struct eventfs_inode *ei) { - if (!ei->ino) + if (!ei->ino) { ei->ino = get_next_ino(); + /* Must not have the file inode number */ + if (ei->ino == EVENTFS_FILE_INODE_INO) + ei->ino = get_next_ino(); + } return ei->ino; } @@ -301,33 +305,60 @@ static const struct file_operations eventfs_file_operations = { .llseek = generic_file_llseek, }; -/* - * On a remount of tracefs, if UID or GID options are set, then - * the mount point inode permissions should be used. - * Reset the saved permission flags appropriately. - */ -void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) +static void eventfs_set_attrs(struct eventfs_inode *ei, bool update_uid, kuid_t uid, + bool update_gid, kgid_t gid, int level) { - struct eventfs_inode *ei = ti->private; + struct eventfs_inode *ei_child; - if (!ei) + /* Update events// */ + if (WARN_ON_ONCE(level > 3)) return; - if (update_uid) + if (update_uid) { ei->attr.mode &= ~EVENTFS_SAVE_UID; + ei->attr.uid = uid; + } - if (update_gid) + if (update_gid) { ei->attr.mode &= ~EVENTFS_SAVE_GID; + ei->attr.gid = gid; + } + + list_for_each_entry(ei_child, &ei->children, list) { + eventfs_set_attrs(ei_child, update_uid, uid, update_gid, gid, level + 1); + } if (!ei->entry_attrs) return; for (int i = 0; i < ei->nr_entries; i++) { - if (update_uid) + if (update_uid) { ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID; - if (update_gid) + ei->entry_attrs[i].uid = uid; + } + if (update_gid) { ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID; + ei->entry_attrs[i].gid = gid; + } } + +} + +/* + * On a remount of tracefs, if UID or GID options are set, then + * the mount point inode permissions should be used. + * Reset the saved permission flags appropriately. + */ +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) +{ + struct eventfs_inode *ei = ti->private; + + /* Only the events directory does the updates */ + if (!ei || !ei->is_events || ei->is_freed) + return; + + eventfs_set_attrs(ei, update_uid, ti->vfs_inode.i_uid, + update_gid, ti->vfs_inode.i_gid, 0); } /* Return the evenfs_inode of the "events" directory */ @@ -345,10 +376,9 @@ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) * If the ei is being freed, the ownership of the children * doesn't matter. */ - if (ei->is_freed) { - ei = NULL; - break; - } + if (ei->is_freed) + return NULL; + // Walk upwards until you find the events inode } while (!ei->is_events); diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index 417c840e64..d8c60c2b76 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -439,10 +439,26 @@ static int tracefs_show_options(struct seq_file *m, struct dentry *root) return 0; } +static int tracefs_drop_inode(struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + + /* + * This inode is being freed and cannot be used for + * eventfs. Clear the flag so that it doesn't call into + * eventfs during the remount flag updates. The eventfs_inode + * gets freed after an RCU cycle, so the content will still + * be safe if the iteration is going on now. + */ + ti->flags &= ~TRACEFS_EVENT_INODE; + + return 1; +} + static const struct super_operations tracefs_super_operations = { .alloc_inode = tracefs_alloc_inode, .free_inode = tracefs_free_inode, - .drop_inode = generic_delete_inode, + .drop_inode = tracefs_drop_inode, .statfs = simple_statfs, .remount_fs = tracefs_remount, .show_options = tracefs_show_options, @@ -469,22 +485,7 @@ static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) return !(ei && ei->is_freed); } -static void tracefs_d_iput(struct dentry *dentry, struct inode *inode) -{ - struct tracefs_inode *ti = get_tracefs(inode); - - /* - * This inode is being freed and cannot be used for - * eventfs. Clear the flag so that it doesn't call into - * eventfs during the remount flag updates. The eventfs_inode - * gets freed after an RCU cycle, so the content will still - * be safe if the iteration is going on now. - */ - ti->flags &= ~TRACEFS_EVENT_INODE; -} - static const struct dentry_operations tracefs_dentry_operations = { - .d_iput = tracefs_d_iput, .d_revalidate = tracefs_d_revalidate, .d_release = tracefs_d_release, }; diff --git a/fs/udf/inode.c b/fs/udf/inode.c index 2f831a3a91..bbf8918417 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -341,7 +341,7 @@ const struct address_space_operations udf_aops = { */ int udf_expand_file_adinicb(struct inode *inode) { - struct page *page; + struct folio *folio; struct udf_inode_info *iinfo = UDF_I(inode); int err; @@ -357,12 +357,13 @@ int udf_expand_file_adinicb(struct inode *inode) return 0; } - page = find_or_create_page(inode->i_mapping, 0, GFP_KERNEL); - if (!page) - return -ENOMEM; + folio = __filemap_get_folio(inode->i_mapping, 0, + FGP_LOCK | FGP_ACCESSED | FGP_CREAT, GFP_KERNEL); + if (IS_ERR(folio)) + return PTR_ERR(folio); - if (!PageUptodate(page)) - udf_adinicb_readpage(page); + if (!folio_test_uptodate(folio)) + udf_adinicb_readpage(&folio->page); down_write(&iinfo->i_data_sem); memset(iinfo->i_data + iinfo->i_lenEAttr, 0x00, iinfo->i_lenAlloc); @@ -371,22 +372,22 @@ int udf_expand_file_adinicb(struct inode *inode) iinfo->i_alloc_type = ICBTAG_FLAG_AD_SHORT; else iinfo->i_alloc_type = ICBTAG_FLAG_AD_LONG; - set_page_dirty(page); - unlock_page(page); + folio_mark_dirty(folio); + folio_unlock(folio); up_write(&iinfo->i_data_sem); err = filemap_fdatawrite(inode->i_mapping); if (err) { /* Restore everything back so that we don't lose data... */ - lock_page(page); + folio_lock(folio); down_write(&iinfo->i_data_sem); - memcpy_to_page(page, 0, iinfo->i_data + iinfo->i_lenEAttr, - inode->i_size); - unlock_page(page); + memcpy_from_folio(iinfo->i_data + iinfo->i_lenEAttr, + folio, 0, inode->i_size); + folio_unlock(folio); iinfo->i_alloc_type = ICBTAG_FLAG_AD_IN_ICB; iinfo->i_lenAlloc = inode->i_size; up_write(&iinfo->i_data_sem); } - put_page(page); + folio_put(folio); mark_inode_dirty(inode); return err; diff --git a/fs/udf/udftime.c b/fs/udf/udftime.c index 758163af39..78ecc63360 100644 --- a/fs/udf/udftime.c +++ b/fs/udf/udftime.c @@ -46,13 +46,18 @@ udf_disk_stamp_to_time(struct timespec64 *dest, struct timestamp src) dest->tv_sec = mktime64(year, src.month, src.day, src.hour, src.minute, src.second); dest->tv_sec -= offset * 60; - dest->tv_nsec = 1000 * (src.centiseconds * 10000 + - src.hundredsOfMicroseconds * 100 + src.microseconds); + /* * Sanitize nanosecond field since reportedly some filesystems are * recorded with bogus sub-second values. */ - dest->tv_nsec %= NSEC_PER_SEC; + if (src.centiseconds < 100 && src.hundredsOfMicroseconds < 100 && + src.microseconds < 100) { + dest->tv_nsec = 1000 * (src.centiseconds * 10000 + + src.hundredsOfMicroseconds * 100 + src.microseconds); + } else { + dest->tv_nsec = 0; + } } void diff --git a/fs/verity/init.c b/fs/verity/init.c index cb2c9aac61..f440f0e61e 100644 --- a/fs/verity/init.c +++ b/fs/verity/init.c @@ -10,8 +10,6 @@ #include #ifdef CONFIG_SYSCTL -static struct ctl_table_header *fsverity_sysctl_header; - static struct ctl_table fsverity_sysctl_table[] = { #ifdef CONFIG_FS_VERITY_BUILTIN_SIGNATURES { @@ -28,10 +26,7 @@ static struct ctl_table fsverity_sysctl_table[] = { static void __init fsverity_init_sysctl(void) { - fsverity_sysctl_header = register_sysctl("fs/verity", - fsverity_sysctl_table); - if (!fsverity_sysctl_header) - panic("fsverity sysctl registration failed"); + register_sysctl_init("fs/verity", fsverity_sysctl_table); } #else /* CONFIG_SYSCTL */ static inline void fsverity_init_sysctl(void) -- cgit v1.2.3