diff options
Diffstat (limited to 'fs')
47 files changed, 556 insertions, 281 deletions
diff --git a/fs/9p/fid.h b/fs/9p/fid.h index 29281b7c38..0d6138bee2 100644 --- a/fs/9p/fid.h +++ b/fs/9p/fid.h @@ -49,9 +49,6 @@ static inline struct p9_fid *v9fs_fid_clone(struct dentry *dentry) static inline void v9fs_fid_add_modes(struct p9_fid *fid, unsigned int s_flags, unsigned int s_cache, unsigned int f_flags) { - if (fid->qid.type != P9_QTFILE) - return; - if ((!s_cache) || ((fid->qid.version == 0) && !(s_flags & V9FS_IGNORE_QV)) || (s_flags & V9FS_DIRECT_IO) || (f_flags & O_DIRECT)) { diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index bae330c2f0..a504240c81 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -520,6 +520,7 @@ const struct file_operations v9fs_file_operations = { .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync, + .setlease = simple_nosetlease, }; const struct file_operations v9fs_file_operations_dotl = { @@ -534,4 +535,5 @@ const struct file_operations v9fs_file_operations_dotl = { .splice_read = v9fs_file_splice_read, .splice_write = iter_file_splice_write, .fsync = v9fs_file_fsync_dotl, + .setlease = simple_nosetlease, }; diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 32572982f7..0fde0ab77e 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -83,7 +83,7 @@ static int p9mode2perm(struct v9fs_session_info *v9ses, int res; int mode = stat->mode; - res = mode & S_IALLUGO; + res = mode & 0777; /* S_IRWXUGO */ if (v9fs_proto_dotu(v9ses)) { if ((mode & P9_DMSETUID) == P9_DMSETUID) res |= S_ISUID; @@ -178,6 +178,9 @@ int v9fs_uflags2omode(int uflags, int extended) break; } + if (uflags & O_TRUNC) + ret |= P9_OTRUNC; + if (extended) { if (uflags & O_EXCL) ret |= P9_OEXCL; @@ -371,17 +374,21 @@ void v9fs_evict_inode(struct inode *inode) struct v9fs_inode __maybe_unused *v9inode = V9FS_I(inode); __le32 __maybe_unused version; - truncate_inode_pages_final(&inode->i_data); + if (!is_bad_inode(inode)) { + truncate_inode_pages_final(&inode->i_data); - version = cpu_to_le32(v9inode->qid.version); - netfs_clear_inode_writeback(inode, &version); + version = cpu_to_le32(v9inode->qid.version); + netfs_clear_inode_writeback(inode, &version); - clear_inode(inode); - filemap_fdatawrite(&inode->i_data); + clear_inode(inode); + filemap_fdatawrite(&inode->i_data); #ifdef CONFIG_9P_FSCACHE - fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false); + if (v9fs_inode_cookie(v9inode)) + fscache_relinquish_cookie(v9fs_inode_cookie(v9inode), false); #endif + } else + clear_inode(inode); } static int v9fs_test_inode(struct inode *inode, void *data) @@ -1145,8 +1152,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode, struct v9fs_session_info *v9ses = sb->s_fs_info; struct v9fs_inode *v9inode = V9FS_I(inode); - set_nlink(inode, 1); - inode_set_atime(inode, stat->atime, 0); inode_set_mtime(inode, stat->mtime, 0); inode_set_ctime(inode, stat->mtime, 0); diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 941f7d0e0b..23cc67f29a 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -310,6 +310,7 @@ static const struct super_operations v9fs_super_ops = { .alloc_inode = v9fs_alloc_inode, .free_inode = v9fs_free_inode, .statfs = simple_statfs, + .drop_inode = v9fs_drop_inode, .evict_inode = v9fs_evict_inode, .show_options = v9fs_show_options, .umount_begin = v9fs_umount_begin, diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index b28bb1c93d..d3c534c1bf 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -2522,7 +2522,7 @@ void btrfs_clear_delalloc_extent(struct btrfs_inode *inode, */ if (bits & EXTENT_CLEAR_META_RESV && root != fs_info->tree_root) - btrfs_delalloc_release_metadata(inode, len, false); + btrfs_delalloc_release_metadata(inode, len, true); /* For sanity tests. */ if (btrfs_is_testing(fs_info)) diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c index 59850dc17b..81f67ebf74 100644 --- a/fs/btrfs/ordered-data.c +++ b/fs/btrfs/ordered-data.c @@ -1189,6 +1189,7 @@ struct btrfs_ordered_extent *btrfs_split_ordered_extent( ordered->disk_bytenr += len; ordered->num_bytes -= len; ordered->disk_num_bytes -= len; + ordered->ram_bytes -= len; if (test_bit(BTRFS_ORDERED_IO_DONE, &ordered->flags)) { ASSERT(ordered->bytes_left == 0); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 132802bd80..82d4559eb4 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -3052,6 +3052,8 @@ int btrfs_qgroup_check_inherit(struct btrfs_fs_info *fs_info, struct btrfs_qgroup_inherit *inherit, size_t size) { + if (!btrfs_qgroup_enabled(fs_info)) + return 0; if (inherit->flags & ~BTRFS_QGROUP_INHERIT_FLAGS_SUPP) return -EOPNOTSUPP; if (size < sizeof(*inherit) || size > PAGE_SIZE) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index f1705ae59e..b93cdf5f17 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -1496,6 +1496,7 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) radix_tree_tag_clear(&fs_info->fs_roots_radix, (unsigned long)root->root_key.objectid, BTRFS_ROOT_TRANS_TAG); + btrfs_qgroup_free_meta_all_pertrans(root); spin_unlock(&fs_info->fs_roots_radix_lock); btrfs_free_log(trans, root); @@ -1520,7 +1521,6 @@ static noinline int commit_fs_roots(struct btrfs_trans_handle *trans) if (ret2) return ret2; spin_lock(&fs_info->fs_roots_radix_lock); - btrfs_qgroup_free_meta_all_pertrans(root); } } spin_unlock(&fs_info->fs_roots_radix_lock); diff --git a/fs/btrfs/tree-checker.c b/fs/btrfs/tree-checker.c index 6eccf84964..75c78a5556 100644 --- a/fs/btrfs/tree-checker.c +++ b/fs/btrfs/tree-checker.c @@ -1793,6 +1793,11 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf) return BTRFS_TREE_BLOCK_INVALID_LEVEL; } + if (unlikely(!btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN))) { + generic_err(leaf, 0, "invalid flag for leaf, WRITTEN not set"); + return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET; + } + /* * Extent buffers from a relocation tree have a owner field that * corresponds to the subvolume tree they are based on. So just from an @@ -1854,6 +1859,7 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf) for (slot = 0; slot < nritems; slot++) { u32 item_end_expected; u64 item_data_end; + enum btrfs_tree_block_status ret; btrfs_item_key_to_cpu(leaf, &key, slot); @@ -1909,21 +1915,10 @@ enum btrfs_tree_block_status __btrfs_check_leaf(struct extent_buffer *leaf) return BTRFS_TREE_BLOCK_INVALID_OFFSETS; } - /* - * We only want to do this if WRITTEN is set, otherwise the leaf - * may be in some intermediate state and won't appear valid. - */ - if (btrfs_header_flag(leaf, BTRFS_HEADER_FLAG_WRITTEN)) { - enum btrfs_tree_block_status ret; - - /* - * Check if the item size and content meet other - * criteria - */ - ret = check_leaf_item(leaf, &key, slot, &prev_key); - if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) - return ret; - } + /* Check if the item size and content meet other criteria. */ + ret = check_leaf_item(leaf, &key, slot, &prev_key); + if (unlikely(ret != BTRFS_TREE_BLOCK_CLEAN)) + return ret; prev_key.objectid = key.objectid; prev_key.type = key.type; @@ -1953,6 +1948,11 @@ enum btrfs_tree_block_status __btrfs_check_node(struct extent_buffer *node) int level = btrfs_header_level(node); u64 bytenr; + if (unlikely(!btrfs_header_flag(node, BTRFS_HEADER_FLAG_WRITTEN))) { + generic_err(node, 0, "invalid flag for node, WRITTEN not set"); + return BTRFS_TREE_BLOCK_WRITTEN_NOT_SET; + } + if (unlikely(level <= 0 || level >= BTRFS_MAX_LEVEL)) { generic_err(node, 0, "invalid level for node, have %d expect [1, %d]", diff --git a/fs/btrfs/tree-checker.h b/fs/btrfs/tree-checker.h index 14b9fbe82d..580ec4fde0 100644 --- a/fs/btrfs/tree-checker.h +++ b/fs/btrfs/tree-checker.h @@ -51,6 +51,7 @@ enum btrfs_tree_block_status { BTRFS_TREE_BLOCK_INVALID_BLOCKPTR, BTRFS_TREE_BLOCK_INVALID_ITEM, BTRFS_TREE_BLOCK_INVALID_OWNER, + BTRFS_TREE_BLOCK_WRITTEN_NOT_SET, }; /* diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index f3890f7c78..3ebadc5ec8 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1182,23 +1182,30 @@ static int open_fs_devices(struct btrfs_fs_devices *fs_devices, struct btrfs_device *device; struct btrfs_device *latest_dev = NULL; struct btrfs_device *tmp_device; + int ret = 0; list_for_each_entry_safe(device, tmp_device, &fs_devices->devices, dev_list) { - int ret; + int ret2; - ret = btrfs_open_one_device(fs_devices, device, flags, holder); - if (ret == 0 && + ret2 = btrfs_open_one_device(fs_devices, device, flags, holder); + if (ret2 == 0 && (!latest_dev || device->generation > latest_dev->generation)) { latest_dev = device; - } else if (ret == -ENODATA) { + } else if (ret2 == -ENODATA) { fs_devices->num_devices--; list_del(&device->dev_list); btrfs_free_device(device); } + if (ret == 0 && ret2 != 0) + ret = ret2; } - if (fs_devices->open_devices == 0) + + if (fs_devices->open_devices == 0) { + if (ret) + return ret; return -EINVAL; + } fs_devices->opened = 1; fs_devices->latest_dev = latest_dev; @@ -3449,6 +3456,7 @@ again: * alignment and size). */ ret = -EUCLEAN; + mutex_unlock(&fs_info->reclaim_bgs_lock); goto error; } diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index 410f5af623..c69174675c 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -84,13 +84,6 @@ struct erofs_dev_context { bool flatdev; }; -struct erofs_fs_context { - struct erofs_mount_opts opt; - struct erofs_dev_context *devs; - char *fsid; - char *domain_id; -}; - /* all filesystem-wide lz4 configurations */ struct erofs_sb_lz4_info { /* # of pages needed for EROFS lz4 rolling decompression */ diff --git a/fs/erofs/super.c b/fs/erofs/super.c index 24788c230b..a2fa745585 100644 --- a/fs/erofs/super.c +++ b/fs/erofs/super.c @@ -370,18 +370,18 @@ out: return ret; } -static void erofs_default_options(struct erofs_fs_context *ctx) +static void erofs_default_options(struct erofs_sb_info *sbi) { #ifdef CONFIG_EROFS_FS_ZIP - ctx->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; - ctx->opt.max_sync_decompress_pages = 3; - ctx->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; + sbi->opt.cache_strategy = EROFS_ZIP_CACHE_READAROUND; + sbi->opt.max_sync_decompress_pages = 3; + sbi->opt.sync_decompress = EROFS_SYNC_DECOMPRESS_AUTO; #endif #ifdef CONFIG_EROFS_FS_XATTR - set_opt(&ctx->opt, XATTR_USER); + set_opt(&sbi->opt, XATTR_USER); #endif #ifdef CONFIG_EROFS_FS_POSIX_ACL - set_opt(&ctx->opt, POSIX_ACL); + set_opt(&sbi->opt, POSIX_ACL); #endif } @@ -426,17 +426,17 @@ static const struct fs_parameter_spec erofs_fs_parameters[] = { static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) { #ifdef CONFIG_FS_DAX - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; switch (mode) { case EROFS_MOUNT_DAX_ALWAYS: warnfc(fc, "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); - set_opt(&ctx->opt, DAX_ALWAYS); - clear_opt(&ctx->opt, DAX_NEVER); + set_opt(&sbi->opt, DAX_ALWAYS); + clear_opt(&sbi->opt, DAX_NEVER); return true; case EROFS_MOUNT_DAX_NEVER: - set_opt(&ctx->opt, DAX_NEVER); - clear_opt(&ctx->opt, DAX_ALWAYS); + set_opt(&sbi->opt, DAX_NEVER); + clear_opt(&sbi->opt, DAX_ALWAYS); return true; default: DBG_BUGON(1); @@ -451,7 +451,7 @@ static bool erofs_fc_set_dax_mode(struct fs_context *fc, unsigned int mode) static int erofs_fc_parse_param(struct fs_context *fc, struct fs_parameter *param) { - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; struct fs_parse_result result; struct erofs_device_info *dif; int opt, ret; @@ -464,9 +464,9 @@ static int erofs_fc_parse_param(struct fs_context *fc, case Opt_user_xattr: #ifdef CONFIG_EROFS_FS_XATTR if (result.boolean) - set_opt(&ctx->opt, XATTR_USER); + set_opt(&sbi->opt, XATTR_USER); else - clear_opt(&ctx->opt, XATTR_USER); + clear_opt(&sbi->opt, XATTR_USER); #else errorfc(fc, "{,no}user_xattr options not supported"); #endif @@ -474,16 +474,16 @@ static int erofs_fc_parse_param(struct fs_context *fc, case Opt_acl: #ifdef CONFIG_EROFS_FS_POSIX_ACL if (result.boolean) - set_opt(&ctx->opt, POSIX_ACL); + set_opt(&sbi->opt, POSIX_ACL); else - clear_opt(&ctx->opt, POSIX_ACL); + clear_opt(&sbi->opt, POSIX_ACL); #else errorfc(fc, "{,no}acl options not supported"); #endif break; case Opt_cache_strategy: #ifdef CONFIG_EROFS_FS_ZIP - ctx->opt.cache_strategy = result.uint_32; + sbi->opt.cache_strategy = result.uint_32; #else errorfc(fc, "compression not supported, cache_strategy ignored"); #endif @@ -505,27 +505,27 @@ static int erofs_fc_parse_param(struct fs_context *fc, kfree(dif); return -ENOMEM; } - down_write(&ctx->devs->rwsem); - ret = idr_alloc(&ctx->devs->tree, dif, 0, 0, GFP_KERNEL); - up_write(&ctx->devs->rwsem); + down_write(&sbi->devs->rwsem); + ret = idr_alloc(&sbi->devs->tree, dif, 0, 0, GFP_KERNEL); + up_write(&sbi->devs->rwsem); if (ret < 0) { kfree(dif->path); kfree(dif); return ret; } - ++ctx->devs->extra_devices; + ++sbi->devs->extra_devices; break; #ifdef CONFIG_EROFS_FS_ONDEMAND case Opt_fsid: - kfree(ctx->fsid); - ctx->fsid = kstrdup(param->string, GFP_KERNEL); - if (!ctx->fsid) + kfree(sbi->fsid); + sbi->fsid = kstrdup(param->string, GFP_KERNEL); + if (!sbi->fsid) return -ENOMEM; break; case Opt_domain_id: - kfree(ctx->domain_id); - ctx->domain_id = kstrdup(param->string, GFP_KERNEL); - if (!ctx->domain_id) + kfree(sbi->domain_id); + sbi->domain_id = kstrdup(param->string, GFP_KERNEL); + if (!sbi->domain_id) return -ENOMEM; break; #else @@ -582,8 +582,7 @@ static const struct export_operations erofs_export_ops = { static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) { struct inode *inode; - struct erofs_sb_info *sbi; - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = EROFS_SB(sb); int err; sb->s_magic = EROFS_SUPER_MAGIC; @@ -591,19 +590,6 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) sb->s_maxbytes = MAX_LFS_FILESIZE; sb->s_op = &erofs_sops; - sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); - if (!sbi) - return -ENOMEM; - - sb->s_fs_info = sbi; - sbi->opt = ctx->opt; - sbi->devs = ctx->devs; - ctx->devs = NULL; - sbi->fsid = ctx->fsid; - ctx->fsid = NULL; - sbi->domain_id = ctx->domain_id; - ctx->domain_id = NULL; - sbi->blkszbits = PAGE_SHIFT; if (erofs_is_fscache_mode(sb)) { sb->s_blocksize = PAGE_SIZE; @@ -707,9 +693,9 @@ static int erofs_fc_fill_super(struct super_block *sb, struct fs_context *fc) static int erofs_fc_get_tree(struct fs_context *fc) { - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; - if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && ctx->fsid) + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) return get_tree_nodev(fc, erofs_fc_fill_super); return get_tree_bdev(fc, erofs_fc_fill_super); @@ -719,19 +705,19 @@ static int erofs_fc_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; struct erofs_sb_info *sbi = EROFS_SB(sb); - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *new_sbi = fc->s_fs_info; DBG_BUGON(!sb_rdonly(sb)); - if (ctx->fsid || ctx->domain_id) + if (new_sbi->fsid || new_sbi->domain_id) erofs_info(sb, "ignoring reconfiguration for fsid|domain_id."); - if (test_opt(&ctx->opt, POSIX_ACL)) + if (test_opt(&new_sbi->opt, POSIX_ACL)) fc->sb_flags |= SB_POSIXACL; else fc->sb_flags &= ~SB_POSIXACL; - sbi->opt = ctx->opt; + sbi->opt = new_sbi->opt; fc->sb_flags |= SB_RDONLY; return 0; @@ -762,12 +748,15 @@ static void erofs_free_dev_context(struct erofs_dev_context *devs) static void erofs_fc_free(struct fs_context *fc) { - struct erofs_fs_context *ctx = fc->fs_private; + struct erofs_sb_info *sbi = fc->s_fs_info; - erofs_free_dev_context(ctx->devs); - kfree(ctx->fsid); - kfree(ctx->domain_id); - kfree(ctx); + if (!sbi) + return; + + erofs_free_dev_context(sbi->devs); + kfree(sbi->fsid); + kfree(sbi->domain_id); + kfree(sbi); } static const struct fs_context_operations erofs_context_ops = { @@ -779,38 +768,35 @@ static const struct fs_context_operations erofs_context_ops = { static int erofs_init_fs_context(struct fs_context *fc) { - struct erofs_fs_context *ctx; + struct erofs_sb_info *sbi; - ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); - if (!ctx) + sbi = kzalloc(sizeof(*sbi), GFP_KERNEL); + if (!sbi) return -ENOMEM; - ctx->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); - if (!ctx->devs) { - kfree(ctx); + + sbi->devs = kzalloc(sizeof(struct erofs_dev_context), GFP_KERNEL); + if (!sbi->devs) { + kfree(sbi); return -ENOMEM; } - fc->fs_private = ctx; + fc->s_fs_info = sbi; - idr_init(&ctx->devs->tree); - init_rwsem(&ctx->devs->rwsem); - erofs_default_options(ctx); + idr_init(&sbi->devs->tree); + init_rwsem(&sbi->devs->rwsem); + erofs_default_options(sbi); fc->ops = &erofs_context_ops; return 0; } static void erofs_kill_sb(struct super_block *sb) { - struct erofs_sb_info *sbi; + struct erofs_sb_info *sbi = EROFS_SB(sb); - if (erofs_is_fscache_mode(sb)) + if (IS_ENABLED(CONFIG_EROFS_FS_ONDEMAND) && sbi->fsid) kill_anon_super(sb); else kill_block_super(sb); - sbi = EROFS_SB(sb); - if (!sbi) - return; - erofs_free_dev_context(sbi->devs); fs_put_dax(sbi->dax_dev, NULL); erofs_fscache_unregister_fs(sb); diff --git a/fs/exfat/file.c b/fs/exfat/file.c index cc00f1a7a1..9adfc38ca7 100644 --- a/fs/exfat/file.c +++ b/fs/exfat/file.c @@ -51,7 +51,7 @@ static int exfat_cont_expand(struct inode *inode, loff_t size) clu.flags = ei->flags; ret = exfat_alloc_cluster(inode, new_num_clusters - num_clusters, - &clu, IS_DIRSYNC(inode)); + &clu, inode_needs_sync(inode)); if (ret) return ret; @@ -77,12 +77,11 @@ out: ei->i_size_aligned = round_up(size, sb->s_blocksize); ei->i_size_ondisk = ei->i_size_aligned; inode->i_blocks = round_up(size, sbi->cluster_size) >> 9; + mark_inode_dirty(inode); - if (IS_DIRSYNC(inode)) + if (IS_SYNC(inode)) return write_inode_now(inode, 1); - mark_inode_dirty(inode); - return 0; free_clu: diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index d9ccfd27e4..643175498d 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -1718,7 +1718,8 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) struct buffer_head *dibh, *bh; struct gfs2_holder rd_gh; unsigned int bsize_shift = sdp->sd_sb.sb_bsize_shift; - u64 lblock = (offset + (1 << bsize_shift) - 1) >> bsize_shift; + unsigned int bsize = 1 << bsize_shift; + u64 lblock = (offset + bsize - 1) >> bsize_shift; __u16 start_list[GFS2_MAX_META_HEIGHT]; __u16 __end_list[GFS2_MAX_META_HEIGHT], *end_list = NULL; unsigned int start_aligned, end_aligned; @@ -1729,7 +1730,7 @@ static int punch_hole(struct gfs2_inode *ip, u64 offset, u64 length) u64 prev_bnr = 0; __be64 *start, *end; - if (offset >= maxsize) { + if (offset + bsize - 1 >= maxsize) { /* * The starting point lies beyond the allocated metadata; * there are no blocks to deallocate. diff --git a/fs/nfs/client.c b/fs/nfs/client.c index fbdc9ca80f..a8fad331df 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -73,7 +73,6 @@ const struct rpc_program nfs_program = { .number = NFS_PROGRAM, .nrvers = ARRAY_SIZE(nfs_version), .version = nfs_version, - .stats = &nfs_rpcstat, .pipe_dir_name = NFS_PIPE_DIRNAME, }; @@ -502,6 +501,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, const struct nfs_client_initdata *cl_init, rpc_authflavor_t flavor) { + struct nfs_net *nn = net_generic(clp->cl_net, nfs_net_id); struct rpc_clnt *clnt = NULL; struct rpc_create_args args = { .net = clp->cl_net, @@ -513,6 +513,7 @@ int nfs_create_rpc_client(struct nfs_client *clp, .servername = clp->cl_hostname, .nodename = cl_init->nodename, .program = &nfs_program, + .stats = &nn->rpcstats, .version = clp->rpc_ops->version, .authflavor = flavor, .cred = cl_init->cred, @@ -1182,6 +1183,8 @@ void nfs_clients_init(struct net *net) #endif spin_lock_init(&nn->nfs_client_lock); nn->boot_time = ktime_get_real(); + memset(&nn->rpcstats, 0, sizeof(nn->rpcstats)); + nn->rpcstats.program = &nfs_program; nfs_netns_sysfs_setup(nn, net); } diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index ebb8d60e11..6fe4b47c39 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -2426,12 +2426,21 @@ EXPORT_SYMBOL_GPL(nfs_net_id); static int nfs_net_init(struct net *net) { + struct nfs_net *nn = net_generic(net, nfs_net_id); + nfs_clients_init(net); + + if (!rpc_proc_register(net, &nn->rpcstats)) { + nfs_clients_exit(net); + return -ENOMEM; + } + return nfs_fs_proc_net_init(net); } static void nfs_net_exit(struct net *net) { + rpc_proc_unregister(net, "nfs"); nfs_fs_proc_net_exit(net); nfs_clients_exit(net); } @@ -2486,15 +2495,12 @@ static int __init init_nfs_fs(void) if (err) goto out1; - rpc_proc_register(&init_net, &nfs_rpcstat); - err = register_nfs_fs(); if (err) goto out0; return 0; out0: - rpc_proc_unregister(&init_net, "nfs"); nfs_destroy_directcache(); out1: nfs_destroy_writepagecache(); @@ -2524,7 +2530,6 @@ static void __exit exit_nfs_fs(void) nfs_destroy_inodecache(); nfs_destroy_nfspagecache(); unregister_pernet_subsys(&nfs_net_ops); - rpc_proc_unregister(&init_net, "nfs"); unregister_nfs_fs(); nfs_fs_proc_exit(); nfsiod_stop(); diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index e3722ce672..06253695fe 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -449,8 +449,6 @@ int nfs_try_get_tree(struct fs_context *); int nfs_get_tree_common(struct fs_context *); void nfs_kill_super(struct super_block *); -extern struct rpc_stat nfs_rpcstat; - extern int __init register_nfs_fs(void); extern void __exit unregister_nfs_fs(void); extern bool nfs_sb_active(struct super_block *sb); diff --git a/fs/nfs/netns.h b/fs/nfs/netns.h index c8374f74dc..a68b21603e 100644 --- a/fs/nfs/netns.h +++ b/fs/nfs/netns.h @@ -9,6 +9,7 @@ #include <linux/nfs4.h> #include <net/net_namespace.h> #include <net/netns/generic.h> +#include <linux/sunrpc/stats.h> struct bl_dev_msg { int32_t status; @@ -34,6 +35,7 @@ struct nfs_net { struct nfs_netns_client *nfs_client; spinlock_t nfs_client_lock; ktime_t boot_time; + struct rpc_stat rpcstats; #ifdef CONFIG_PROC_FS struct proc_dir_entry *proc_nfsfs; #endif diff --git a/fs/nfsd/cache.h b/fs/nfsd/cache.h index 4cbe0434cb..66a05fefae 100644 --- a/fs/nfsd/cache.h +++ b/fs/nfsd/cache.h @@ -80,8 +80,6 @@ enum { int nfsd_drc_slab_create(void); void nfsd_drc_slab_free(void); -int nfsd_net_reply_cache_init(struct nfsd_net *nn); -void nfsd_net_reply_cache_destroy(struct nfsd_net *nn); int nfsd_reply_cache_init(struct nfsd_net *); void nfsd_reply_cache_shutdown(struct nfsd_net *); int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, diff --git a/fs/nfsd/netns.h b/fs/nfsd/netns.h index 74b4360779..0cef4bb407 100644 --- a/fs/nfsd/netns.h +++ b/fs/nfsd/netns.h @@ -11,6 +11,7 @@ #include <net/net_namespace.h> #include <net/netns/generic.h> #include <linux/filelock.h> +#include <linux/nfs4.h> #include <linux/percpu_counter.h> #include <linux/siphash.h> @@ -26,10 +27,22 @@ struct nfsd4_client_tracking_ops; enum { /* cache misses due only to checksum comparison failures */ - NFSD_NET_PAYLOAD_MISSES, + NFSD_STATS_PAYLOAD_MISSES, /* amount of memory (in bytes) currently consumed by the DRC */ - NFSD_NET_DRC_MEM_USAGE, - NFSD_NET_COUNTERS_NUM + NFSD_STATS_DRC_MEM_USAGE, + NFSD_STATS_RC_HITS, /* repcache hits */ + NFSD_STATS_RC_MISSES, /* repcache misses */ + NFSD_STATS_RC_NOCACHE, /* uncached reqs */ + NFSD_STATS_FH_STALE, /* FH stale error */ + NFSD_STATS_IO_READ, /* bytes returned to read requests */ + NFSD_STATS_IO_WRITE, /* bytes passed in write requests */ +#ifdef CONFIG_NFSD_V4 + NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */ + NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP, +#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op)) + NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */ +#endif + NFSD_STATS_COUNTERS_NUM }; /* @@ -164,7 +177,7 @@ struct nfsd_net { atomic_t num_drc_entries; /* Per-netns stats counters */ - struct percpu_counter counter[NFSD_NET_COUNTERS_NUM]; + struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; /* longest hash chain seen */ unsigned int longest_chain; diff --git a/fs/nfsd/nfs4callback.c b/fs/nfsd/nfs4callback.c index 926c29879c..30aa241038 100644 --- a/fs/nfsd/nfs4callback.c +++ b/fs/nfsd/nfs4callback.c @@ -85,7 +85,21 @@ static void encode_uint32(struct xdr_stream *xdr, u32 n) static void encode_bitmap4(struct xdr_stream *xdr, const __u32 *bitmap, size_t len) { - WARN_ON_ONCE(xdr_stream_encode_uint32_array(xdr, bitmap, len) < 0); + xdr_stream_encode_uint32_array(xdr, bitmap, len); +} + +static int decode_cb_fattr4(struct xdr_stream *xdr, uint32_t *bitmap, + struct nfs4_cb_fattr *fattr) +{ + fattr->ncf_cb_change = 0; + fattr->ncf_cb_fsize = 0; + if (bitmap[0] & FATTR4_WORD0_CHANGE) + if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_change) < 0) + return -NFSERR_BAD_XDR; + if (bitmap[0] & FATTR4_WORD0_SIZE) + if (xdr_stream_decode_u64(xdr, &fattr->ncf_cb_fsize) < 0) + return -NFSERR_BAD_XDR; + return 0; } static void encode_nfs_cb_opnum4(struct xdr_stream *xdr, enum nfs_cb_opnum4 op) @@ -334,6 +348,30 @@ encode_cb_recallany4args(struct xdr_stream *xdr, } /* + * CB_GETATTR4args + * struct CB_GETATTR4args { + * nfs_fh4 fh; + * bitmap4 attr_request; + * }; + * + * The size and change attributes are the only one + * guaranteed to be serviced by the client. + */ +static void +encode_cb_getattr4args(struct xdr_stream *xdr, struct nfs4_cb_compound_hdr *hdr, + struct nfs4_cb_fattr *fattr) +{ + struct nfs4_delegation *dp = + container_of(fattr, struct nfs4_delegation, dl_cb_fattr); + struct knfsd_fh *fh = &dp->dl_stid.sc_file->fi_fhandle; + + encode_nfs_cb_opnum4(xdr, OP_CB_GETATTR); + encode_nfs_fh4(xdr, fh); + encode_bitmap4(xdr, fattr->ncf_cb_bmap, ARRAY_SIZE(fattr->ncf_cb_bmap)); + hdr->nops++; +} + +/* * CB_SEQUENCE4args * * struct CB_SEQUENCE4args { @@ -469,6 +507,26 @@ static void nfs4_xdr_enc_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, } /* + * 20.1. Operation 3: CB_GETATTR - Get Attributes + */ +static void nfs4_xdr_enc_cb_getattr(struct rpc_rqst *req, + struct xdr_stream *xdr, const void *data) +{ + const struct nfsd4_callback *cb = data; + struct nfs4_cb_fattr *ncf = + container_of(cb, struct nfs4_cb_fattr, ncf_getattr); + struct nfs4_cb_compound_hdr hdr = { + .ident = cb->cb_clp->cl_cb_ident, + .minorversion = cb->cb_clp->cl_minorversion, + }; + + encode_cb_compound4args(xdr, &hdr); + encode_cb_sequence4args(xdr, cb, &hdr); + encode_cb_getattr4args(xdr, &hdr, ncf); + encode_cb_nops(&hdr); +} + +/* * 20.2. Operation 4: CB_RECALL - Recall a Delegation */ static void nfs4_xdr_enc_cb_recall(struct rpc_rqst *req, struct xdr_stream *xdr, @@ -524,6 +582,42 @@ static int nfs4_xdr_dec_cb_null(struct rpc_rqst *req, struct xdr_stream *xdr, } /* + * 20.1. Operation 3: CB_GETATTR - Get Attributes + */ +static int nfs4_xdr_dec_cb_getattr(struct rpc_rqst *rqstp, + struct xdr_stream *xdr, + void *data) +{ + struct nfsd4_callback *cb = data; + struct nfs4_cb_compound_hdr hdr; + int status; + u32 bitmap[3] = {0}; + u32 attrlen; + struct nfs4_cb_fattr *ncf = + container_of(cb, struct nfs4_cb_fattr, ncf_getattr); + + status = decode_cb_compound4res(xdr, &hdr); + if (unlikely(status)) + return status; + + status = decode_cb_sequence4res(xdr, cb); + if (unlikely(status || cb->cb_seq_status)) + return status; + + status = decode_cb_op_status(xdr, OP_CB_GETATTR, &cb->cb_status); + if (status) + return status; + if (xdr_stream_decode_uint32_array(xdr, bitmap, 3) < 0) + return -NFSERR_BAD_XDR; + if (xdr_stream_decode_u32(xdr, &attrlen) < 0) + return -NFSERR_BAD_XDR; + if (attrlen > (sizeof(ncf->ncf_cb_change) + sizeof(ncf->ncf_cb_fsize))) + return -NFSERR_BAD_XDR; + status = decode_cb_fattr4(xdr, bitmap, ncf); + return status; +} + +/* * 20.2. Operation 4: CB_RECALL - Recall a Delegation */ static int nfs4_xdr_dec_cb_recall(struct rpc_rqst *rqstp, @@ -831,6 +925,7 @@ static const struct rpc_procinfo nfs4_cb_procedures[] = { PROC(CB_NOTIFY_LOCK, COMPOUND, cb_notify_lock, cb_notify_lock), PROC(CB_OFFLOAD, COMPOUND, cb_offload, cb_offload), PROC(CB_RECALL_ANY, COMPOUND, cb_recall_any, cb_recall_any), + PROC(CB_GETATTR, COMPOUND, cb_getattr, cb_getattr), }; static unsigned int nfs4_cb_counts[ARRAY_SIZE(nfs4_cb_procedures)]; diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 14712fa08f..648ff42700 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -2490,10 +2490,10 @@ nfsd4_proc_null(struct svc_rqst *rqstp) return rpc_success; } -static inline void nfsd4_increment_op_stats(u32 opnum) +static inline void nfsd4_increment_op_stats(struct nfsd_net *nn, u32 opnum) { if (opnum >= FIRST_NFS4_OP && opnum <= LAST_NFS4_OP) - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_NFS4_OP(opnum)]); + percpu_counter_inc(&nn->counter[NFSD_STATS_NFS4_OP(opnum)]); } static const struct nfsd4_operation nfsd4_ops[]; @@ -2768,7 +2768,7 @@ encode_op: status, nfsd4_op_name(op->opnum)); nfsd4_cstate_clear_replay(cstate); - nfsd4_increment_op_stats(op->opnum); + nfsd4_increment_op_stats(nn, op->opnum); } fh_put(current_fh); diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 692ede4882..71f9442c5c 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -8447,6 +8447,7 @@ __be32 nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode) { __be32 status; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct file_lock_context *ctx; struct file_lock *fl; struct nfs4_delegation *dp; @@ -8476,7 +8477,7 @@ nfsd4_deleg_getattr_conflict(struct svc_rqst *rqstp, struct inode *inode) } break_lease: spin_unlock(&ctx->flc_lock); - nfsd_stats_wdeleg_getattr_inc(); + nfsd_stats_wdeleg_getattr_inc(nn); status = nfserrno(nfsd_open_break_lease(inode, NFSD_MAY_READ)); if (status != nfserr_jukebox || !nfsd_wait_for_delegreturn(rqstp, inode)) diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index c17bdf973c..24db9f9ea8 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -3513,6 +3513,7 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, args.exp = exp; args.dentry = dentry; args.ignore_crossmnt = (ignore_crossmnt != 0); + args.acl = NULL; /* * Make a local copy of the attribute bitmap that can be modified. @@ -3567,7 +3568,6 @@ nfsd4_encode_fattr4(struct svc_rqst *rqstp, struct xdr_stream *xdr, } else args.fhp = fhp; - args.acl = NULL; if (attrmask[0] & FATTR4_WORD0_ACL) { err = nfsd4_get_nfs4_acl(rqstp, dentry, &args.acl); if (err == -EOPNOTSUPP) diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c index 5c1a4a0aa6..cfcc6ac8f2 100644 --- a/fs/nfsd/nfscache.c +++ b/fs/nfsd/nfscache.c @@ -176,27 +176,6 @@ void nfsd_drc_slab_free(void) kmem_cache_destroy(drc_slab); } -/** - * nfsd_net_reply_cache_init - per net namespace reply cache set-up - * @nn: nfsd_net being initialized - * - * Returns zero on succes; otherwise a negative errno is returned. - */ -int nfsd_net_reply_cache_init(struct nfsd_net *nn) -{ - return nfsd_percpu_counters_init(nn->counter, NFSD_NET_COUNTERS_NUM); -} - -/** - * nfsd_net_reply_cache_destroy - per net namespace reply cache tear-down - * @nn: nfsd_net being freed - * - */ -void nfsd_net_reply_cache_destroy(struct nfsd_net *nn) -{ - nfsd_percpu_counters_destroy(nn->counter, NFSD_NET_COUNTERS_NUM); -} - int nfsd_reply_cache_init(struct nfsd_net *nn) { unsigned int hashsize; @@ -501,7 +480,7 @@ out: int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, unsigned int len, struct nfsd_cacherep **cacherep) { - struct nfsd_net *nn; + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct nfsd_cacherep *rp, *found; __wsum csum; struct nfsd_drc_bucket *b; @@ -510,7 +489,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, int rtn = RC_DOIT; if (type == RC_NOCACHE) { - nfsd_stats_rc_nocache_inc(); + nfsd_stats_rc_nocache_inc(nn); goto out; } @@ -520,7 +499,6 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, * Since the common case is a cache miss followed by an insert, * preallocate an entry. */ - nn = net_generic(SVC_NET(rqstp), nfsd_net_id); rp = nfsd_cacherep_alloc(rqstp, csum, nn); if (!rp) goto out; @@ -537,7 +515,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, nfsd_cacherep_dispose(&dispose); - nfsd_stats_rc_misses_inc(); + nfsd_stats_rc_misses_inc(nn); atomic_inc(&nn->num_drc_entries); nfsd_stats_drc_mem_usage_add(nn, sizeof(*rp)); goto out; @@ -545,7 +523,7 @@ int nfsd_cache_lookup(struct svc_rqst *rqstp, unsigned int start, found_entry: /* We found a matching entry which is either in progress or done. */ nfsd_reply_cache_free_locked(NULL, rp, nn); - nfsd_stats_rc_hits_inc(); + nfsd_stats_rc_hits_inc(nn); rtn = RC_DROPIT; rp = found; @@ -687,15 +665,15 @@ int nfsd_reply_cache_stats_show(struct seq_file *m, void *v) atomic_read(&nn->num_drc_entries)); seq_printf(m, "hash buckets: %u\n", 1 << nn->maskbits); seq_printf(m, "mem usage: %lld\n", - percpu_counter_sum_positive(&nn->counter[NFSD_NET_DRC_MEM_USAGE])); + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_DRC_MEM_USAGE])); seq_printf(m, "cache hits: %lld\n", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS])); + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS])); seq_printf(m, "cache misses: %lld\n", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES])); + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES])); seq_printf(m, "not cached: %lld\n", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE])); + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE])); seq_printf(m, "payload misses: %lld\n", - percpu_counter_sum_positive(&nn->counter[NFSD_NET_PAYLOAD_MISSES])); + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_PAYLOAD_MISSES])); seq_printf(m, "longest chain len: %u\n", nn->longest_chain); seq_printf(m, "cachesize at longest: %u\n", nn->longest_chain_cachesize); return 0; diff --git a/fs/nfsd/nfsctl.c b/fs/nfsd/nfsctl.c index f206ca32e7..ea3c811424 100644 --- a/fs/nfsd/nfsctl.c +++ b/fs/nfsd/nfsctl.c @@ -1671,7 +1671,7 @@ static __net_init int nfsd_net_init(struct net *net) retval = nfsd_idmap_init(net); if (retval) goto out_idmap_error; - retval = nfsd_net_reply_cache_init(nn); + retval = nfsd_stat_counters_init(nn); if (retval) goto out_repcache_error; nn->nfsd_versions = NULL; @@ -1679,6 +1679,7 @@ static __net_init int nfsd_net_init(struct net *net) nfsd4_init_leases_net(nn); get_random_bytes(&nn->siphash_key, sizeof(nn->siphash_key)); seqlock_init(&nn->writeverf_lock); + nfsd_proc_stat_init(net); return 0; @@ -1699,7 +1700,8 @@ static __net_exit void nfsd_net_exit(struct net *net) { struct nfsd_net *nn = net_generic(net, nfsd_net_id); - nfsd_net_reply_cache_destroy(nn); + nfsd_proc_stat_shutdown(net); + nfsd_stat_counters_destroy(nn); nfsd_idmap_shutdown(net); nfsd_export_shutdown(net); nfsd_netns_free_versions(nn); @@ -1722,12 +1724,9 @@ static int __init init_nfsd(void) retval = nfsd4_init_pnfs(); if (retval) goto out_free_slabs; - retval = nfsd_stat_init(); /* Statistics */ - if (retval) - goto out_free_pnfs; retval = nfsd_drc_slab_create(); if (retval) - goto out_free_stat; + goto out_free_pnfs; nfsd_lockd_init(); /* lockd->nfsd callbacks */ retval = create_proc_exports_entry(); if (retval) @@ -1761,8 +1760,6 @@ out_free_exports: out_free_lockd: nfsd_lockd_shutdown(); nfsd_drc_slab_free(); -out_free_stat: - nfsd_stat_shutdown(); out_free_pnfs: nfsd4_exit_pnfs(); out_free_slabs: @@ -1780,7 +1777,6 @@ static void __exit exit_nfsd(void) nfsd_drc_slab_free(); remove_proc_entry("fs/nfs/exports", NULL); remove_proc_entry("fs/nfs", NULL); - nfsd_stat_shutdown(); nfsd_lockd_shutdown(); nfsd4_free_slabs(); nfsd4_exit_pnfs(); diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index dbfa0ac135..40fecf7b22 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -327,6 +327,7 @@ out: __be32 fh_verify(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type, int access) { + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); struct svc_export *exp = NULL; struct dentry *dentry; __be32 error; @@ -395,7 +396,7 @@ skip_pseudoflavor_check: out: trace_nfsd_fh_verify_err(rqstp, fhp, type, access, error); if (error == nfserr_stale) - nfsd_stats_fh_stale_inc(exp); + nfsd_stats_fh_stale_inc(nn, exp); return error; } diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h index 41bdc913fa..0bbbe57e02 100644 --- a/fs/nfsd/state.h +++ b/fs/nfsd/state.h @@ -117,6 +117,16 @@ struct nfs4_cpntf_state { time64_t cpntf_time; /* last time stateid used */ }; +struct nfs4_cb_fattr { + struct nfsd4_callback ncf_getattr; + u32 ncf_cb_status; + u32 ncf_cb_bmap[1]; + + /* from CB_GETATTR reply */ + u64 ncf_cb_change; + u64 ncf_cb_fsize; +}; + /* * Represents a delegation stateid. The nfs4_client holds references to these * and they are put when it is being destroyed or when the delegation is @@ -150,6 +160,9 @@ struct nfs4_delegation { int dl_retries; struct nfsd4_callback dl_recall; bool dl_recalled; + + /* for CB_GETATTR */ + struct nfs4_cb_fattr dl_cb_fattr; }; #define cb_to_delegation(cb) \ @@ -640,6 +653,7 @@ enum nfsd4_cb_op { NFSPROC4_CLNT_CB_SEQUENCE, NFSPROC4_CLNT_CB_NOTIFY_LOCK, NFSPROC4_CLNT_CB_RECALL_ANY, + NFSPROC4_CLNT_CB_GETATTR, }; /* Returns true iff a is later than b: */ diff --git a/fs/nfsd/stats.c b/fs/nfsd/stats.c index 12d79f5d4e..44e275324b 100644 --- a/fs/nfsd/stats.c +++ b/fs/nfsd/stats.c @@ -34,15 +34,17 @@ struct svc_stat nfsd_svcstats = { static int nfsd_show(struct seq_file *seq, void *v) { + struct net *net = pde_data(file_inode(seq->file)); + struct nfsd_net *nn = net_generic(net, nfsd_net_id); int i; seq_printf(seq, "rc %lld %lld %lld\nfh %lld 0 0 0 0\nio %lld %lld\n", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_HITS]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_MISSES]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_FH_STALE]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_READ]), - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_IO_WRITE])); + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_HITS]), + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_MISSES]), + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_RC_NOCACHE]), + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_FH_STALE]), + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_READ]), + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_IO_WRITE])); /* thread usage: */ seq_printf(seq, "th %u 0", atomic_read(&nfsdstats.th_cnt)); @@ -63,10 +65,10 @@ static int nfsd_show(struct seq_file *seq, void *v) seq_printf(seq, "proc4ops %u", LAST_NFS4_OP + 1); for (i = 0; i <= LAST_NFS4_OP; i++) { seq_printf(seq, " %lld", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_NFS4_OP(i)])); + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_NFS4_OP(i)])); } seq_printf(seq, "\nwdeleg_getattr %lld", - percpu_counter_sum_positive(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR])); + percpu_counter_sum_positive(&nn->counter[NFSD_STATS_WDELEG_GETATTR])); seq_putc(seq, '\n'); #endif @@ -108,31 +110,22 @@ void nfsd_percpu_counters_destroy(struct percpu_counter counters[], int num) percpu_counter_destroy(&counters[i]); } -static int nfsd_stat_counters_init(void) +int nfsd_stat_counters_init(struct nfsd_net *nn) { - return nfsd_percpu_counters_init(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); + return nfsd_percpu_counters_init(nn->counter, NFSD_STATS_COUNTERS_NUM); } -static void nfsd_stat_counters_destroy(void) +void nfsd_stat_counters_destroy(struct nfsd_net *nn) { - nfsd_percpu_counters_destroy(nfsdstats.counter, NFSD_STATS_COUNTERS_NUM); + nfsd_percpu_counters_destroy(nn->counter, NFSD_STATS_COUNTERS_NUM); } -int nfsd_stat_init(void) +void nfsd_proc_stat_init(struct net *net) { - int err; - - err = nfsd_stat_counters_init(); - if (err) - return err; - - svc_proc_register(&init_net, &nfsd_svcstats, &nfsd_proc_ops); - - return 0; + svc_proc_register(net, &nfsd_svcstats, &nfsd_proc_ops); } -void nfsd_stat_shutdown(void) +void nfsd_proc_stat_shutdown(struct net *net) { - nfsd_stat_counters_destroy(); - svc_proc_unregister(&init_net, "nfsd"); + svc_proc_unregister(net, "nfsd"); } diff --git a/fs/nfsd/stats.h b/fs/nfsd/stats.h index 14f50c660b..c24be4ddbe 100644 --- a/fs/nfsd/stats.h +++ b/fs/nfsd/stats.h @@ -10,26 +10,7 @@ #include <uapi/linux/nfsd/stats.h> #include <linux/percpu_counter.h> - -enum { - NFSD_STATS_RC_HITS, /* repcache hits */ - NFSD_STATS_RC_MISSES, /* repcache misses */ - NFSD_STATS_RC_NOCACHE, /* uncached reqs */ - NFSD_STATS_FH_STALE, /* FH stale error */ - NFSD_STATS_IO_READ, /* bytes returned to read requests */ - NFSD_STATS_IO_WRITE, /* bytes passed in write requests */ -#ifdef CONFIG_NFSD_V4 - NFSD_STATS_FIRST_NFS4_OP, /* count of individual nfsv4 operations */ - NFSD_STATS_LAST_NFS4_OP = NFSD_STATS_FIRST_NFS4_OP + LAST_NFS4_OP, -#define NFSD_STATS_NFS4_OP(op) (NFSD_STATS_FIRST_NFS4_OP + (op)) - NFSD_STATS_WDELEG_GETATTR, /* count of getattr conflict with wdeleg */ -#endif - NFSD_STATS_COUNTERS_NUM -}; - struct nfsd_stats { - struct percpu_counter counter[NFSD_STATS_COUNTERS_NUM]; - atomic_t th_cnt; /* number of available threads */ }; @@ -40,64 +21,69 @@ extern struct svc_stat nfsd_svcstats; int nfsd_percpu_counters_init(struct percpu_counter *counters, int num); void nfsd_percpu_counters_reset(struct percpu_counter *counters, int num); void nfsd_percpu_counters_destroy(struct percpu_counter *counters, int num); -int nfsd_stat_init(void); -void nfsd_stat_shutdown(void); +int nfsd_stat_counters_init(struct nfsd_net *nn); +void nfsd_stat_counters_destroy(struct nfsd_net *nn); +void nfsd_proc_stat_init(struct net *net); +void nfsd_proc_stat_shutdown(struct net *net); -static inline void nfsd_stats_rc_hits_inc(void) +static inline void nfsd_stats_rc_hits_inc(struct nfsd_net *nn) { - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_HITS]); + percpu_counter_inc(&nn->counter[NFSD_STATS_RC_HITS]); } -static inline void nfsd_stats_rc_misses_inc(void) +static inline void nfsd_stats_rc_misses_inc(struct nfsd_net *nn) { - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_MISSES]); + percpu_counter_inc(&nn->counter[NFSD_STATS_RC_MISSES]); } -static inline void nfsd_stats_rc_nocache_inc(void) +static inline void nfsd_stats_rc_nocache_inc(struct nfsd_net *nn) { - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_RC_NOCACHE]); + percpu_counter_inc(&nn->counter[NFSD_STATS_RC_NOCACHE]); } -static inline void nfsd_stats_fh_stale_inc(struct svc_export *exp) +static inline void nfsd_stats_fh_stale_inc(struct nfsd_net *nn, + struct svc_export *exp) { - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_FH_STALE]); + percpu_counter_inc(&nn->counter[NFSD_STATS_FH_STALE]); if (exp && exp->ex_stats) percpu_counter_inc(&exp->ex_stats->counter[EXP_STATS_FH_STALE]); } -static inline void nfsd_stats_io_read_add(struct svc_export *exp, s64 amount) +static inline void nfsd_stats_io_read_add(struct nfsd_net *nn, + struct svc_export *exp, s64 amount) { - percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_READ], amount); + percpu_counter_add(&nn->counter[NFSD_STATS_IO_READ], amount); if (exp && exp->ex_stats) percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_READ], amount); } -static inline void nfsd_stats_io_write_add(struct svc_export *exp, s64 amount) +static inline void nfsd_stats_io_write_add(struct nfsd_net *nn, + struct svc_export *exp, s64 amount) { - percpu_counter_add(&nfsdstats.counter[NFSD_STATS_IO_WRITE], amount); + percpu_counter_add(&nn->counter[NFSD_STATS_IO_WRITE], amount); if (exp && exp->ex_stats) percpu_counter_add(&exp->ex_stats->counter[EXP_STATS_IO_WRITE], amount); } static inline void nfsd_stats_payload_misses_inc(struct nfsd_net *nn) { - percpu_counter_inc(&nn->counter[NFSD_NET_PAYLOAD_MISSES]); + percpu_counter_inc(&nn->counter[NFSD_STATS_PAYLOAD_MISSES]); } static inline void nfsd_stats_drc_mem_usage_add(struct nfsd_net *nn, s64 amount) { - percpu_counter_add(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); + percpu_counter_add(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount); } static inline void nfsd_stats_drc_mem_usage_sub(struct nfsd_net *nn, s64 amount) { - percpu_counter_sub(&nn->counter[NFSD_NET_DRC_MEM_USAGE], amount); + percpu_counter_sub(&nn->counter[NFSD_STATS_DRC_MEM_USAGE], amount); } #ifdef CONFIG_NFSD_V4 -static inline void nfsd_stats_wdeleg_getattr_inc(void) +static inline void nfsd_stats_wdeleg_getattr_inc(struct nfsd_net *nn) { - percpu_counter_inc(&nfsdstats.counter[NFSD_STATS_WDELEG_GETATTR]); + percpu_counter_inc(&nn->counter[NFSD_STATS_WDELEG_GETATTR]); } #endif #endif /* _NFSD_STATS_H */ diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 4ed1e83def..9b5e20b2d0 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1002,7 +1002,9 @@ static __be32 nfsd_finish_read(struct svc_rqst *rqstp, struct svc_fh *fhp, unsigned long *count, u32 *eof, ssize_t host_err) { if (host_err >= 0) { - nfsd_stats_io_read_add(fhp->fh_export, host_err); + struct nfsd_net *nn = net_generic(SVC_NET(rqstp), nfsd_net_id); + + nfsd_stats_io_read_add(nn, fhp->fh_export, host_err); *eof = nfsd_eof_on_read(file, offset, host_err, *count); *count = host_err; fsnotify_access(file); @@ -1185,7 +1187,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct nfsd_file *nf, goto out_nfserr; } *cnt = host_err; - nfsd_stats_io_write_add(exp, *cnt); + nfsd_stats_io_write_add(nn, exp, *cnt); fsnotify_modify(file); host_err = filemap_check_wb_err(file->f_mapping, since); if (host_err < 0) diff --git a/fs/nfsd/xdr4cb.h b/fs/nfsd/xdr4cb.h index 0d39af1b00..e8b00309c4 100644 --- a/fs/nfsd/xdr4cb.h +++ b/fs/nfsd/xdr4cb.h @@ -54,3 +54,21 @@ #define NFS4_dec_cb_recall_any_sz (cb_compound_dec_hdr_sz + \ cb_sequence_dec_sz + \ op_dec_sz) + +/* + * 1: CB_GETATTR opcode (32-bit) + * N: file_handle + * 1: number of entry in attribute array (32-bit) + * 1: entry 0 in attribute array (32-bit) + */ +#define NFS4_enc_cb_getattr_sz (cb_compound_enc_hdr_sz + \ + cb_sequence_enc_sz + \ + 1 + enc_nfs4_fh_sz + 1 + 1) +/* + * 4: fattr_bitmap_maxsz + * 1: attribute array len + * 2: change attr (64-bit) + * 2: size (64-bit) + */ +#define NFS4_dec_cb_getattr_sz (cb_compound_dec_hdr_sz + \ + cb_sequence_dec_sz + 4 + 1 + 2 + 2 + op_dec_sz) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3f78ebbb79..8433f076e9 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1818,15 +1818,13 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, } static void make_uffd_wp_pte(struct vm_area_struct *vma, - unsigned long addr, pte_t *pte) + unsigned long addr, pte_t *pte, pte_t ptent) { - pte_t ptent = ptep_get(pte); - if (pte_present(ptent)) { pte_t old_pte; old_pte = ptep_modify_prot_start(vma, addr, pte); - ptent = pte_mkuffd_wp(ptent); + ptent = pte_mkuffd_wp(old_pte); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); } else if (is_swap_pte(ptent)) { ptent = pte_swp_mkuffd_wp(ptent); @@ -2176,9 +2174,12 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) { /* Fast path for performing exclusive WP */ for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { - if (pte_uffd_wp(ptep_get(pte))) + pte_t ptent = ptep_get(pte); + + if ((pte_present(ptent) && pte_uffd_wp(ptent)) || + pte_swp_uffd_wp_any(ptent)) continue; - make_uffd_wp_pte(vma, addr, pte); + make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = addr + PAGE_SIZE; @@ -2191,8 +2192,10 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, p->arg.return_mask == PAGE_IS_WRITTEN) { for (addr = start; addr < end; pte++, addr += PAGE_SIZE) { unsigned long next = addr + PAGE_SIZE; + pte_t ptent = ptep_get(pte); - if (pte_uffd_wp(ptep_get(pte))) + if ((pte_present(ptent) && pte_uffd_wp(ptent)) || + pte_swp_uffd_wp_any(ptent)) continue; ret = pagemap_scan_output(p->cur_vma_category | PAGE_IS_WRITTEN, p, addr, &next); @@ -2200,7 +2203,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, break; if (~p->arg.flags & PM_SCAN_WP_MATCHING) continue; - make_uffd_wp_pte(vma, addr, pte); + make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = next; @@ -2209,8 +2212,9 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, } for (addr = start; addr != end; pte++, addr += PAGE_SIZE) { + pte_t ptent = ptep_get(pte); unsigned long categories = p->cur_vma_category | - pagemap_page_category(p, vma, addr, ptep_get(pte)); + pagemap_page_category(p, vma, addr, ptent); unsigned long next = addr + PAGE_SIZE; if (!pagemap_scan_is_interesting_page(categories, p)) @@ -2225,7 +2229,7 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start, if (~categories & PAGE_IS_WRITTEN) continue; - make_uffd_wp_pte(vma, addr, pte); + make_uffd_wp_pte(vma, addr, pte, ptent); if (!flush_end) start = addr; flush_end = next; diff --git a/fs/smb/client/cifsglob.h b/fs/smb/client/cifsglob.h index 0c3311de5d..a393d505e8 100644 --- a/fs/smb/client/cifsglob.h +++ b/fs/smb/client/cifsglob.h @@ -1063,6 +1063,7 @@ struct cifs_ses { and after mount option parsing we fill it */ char *domainName; char *password; + char *password2; /* When key rotation used, new password may be set before it expires */ char workstation_name[CIFS_MAX_WORKSTATION_LEN]; struct session_key auth_key; struct ntlmssp_auth *ntlmssp; /* ciphertext, flags, server challenge */ diff --git a/fs/smb/client/connect.c b/fs/smb/client/connect.c index 7516c7d455..e28f011f11 100644 --- a/fs/smb/client/connect.c +++ b/fs/smb/client/connect.c @@ -2186,6 +2186,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) } ++delim; + /* BB consider adding support for password2 (Key Rotation) for multiuser in future */ ctx->password = kstrndup(delim, len, GFP_KERNEL); if (!ctx->password) { cifs_dbg(FYI, "Unable to allocate %zd bytes for password\n", @@ -2209,6 +2210,7 @@ cifs_set_cifscreds(struct smb3_fs_context *ctx, struct cifs_ses *ses) kfree(ctx->username); ctx->username = NULL; kfree_sensitive(ctx->password); + /* no need to free ctx->password2 since not allocated in this path */ ctx->password = NULL; goto out_key_put; } @@ -2320,6 +2322,12 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx) if (!ses->password) goto get_ses_fail; } + /* ctx->password freed at unmount */ + if (ctx->password2) { + ses->password2 = kstrdup(ctx->password2, GFP_KERNEL); + if (!ses->password2) + goto get_ses_fail; + } if (ctx->domainname) { ses->domainName = kstrdup(ctx->domainname, GFP_KERNEL); if (!ses->domainName) diff --git a/fs/smb/client/fs_context.c b/fs/smb/client/fs_context.c index 775b0ca605..f119035a82 100644 --- a/fs/smb/client/fs_context.c +++ b/fs/smb/client/fs_context.c @@ -162,6 +162,7 @@ const struct fs_parameter_spec smb3_fs_parameters[] = { fsparam_string("username", Opt_user), fsparam_string("pass", Opt_pass), fsparam_string("password", Opt_pass), + fsparam_string("password2", Opt_pass2), fsparam_string("ip", Opt_ip), fsparam_string("addr", Opt_ip), fsparam_string("domain", Opt_domain), @@ -315,6 +316,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx new_ctx->nodename = NULL; new_ctx->username = NULL; new_ctx->password = NULL; + new_ctx->password2 = NULL; new_ctx->server_hostname = NULL; new_ctx->domainname = NULL; new_ctx->UNC = NULL; @@ -327,6 +329,7 @@ smb3_fs_context_dup(struct smb3_fs_context *new_ctx, struct smb3_fs_context *ctx DUP_CTX_STR(prepath); DUP_CTX_STR(username); DUP_CTX_STR(password); + DUP_CTX_STR(password2); DUP_CTX_STR(server_hostname); DUP_CTX_STR(UNC); DUP_CTX_STR(source); @@ -885,6 +888,8 @@ static int smb3_reconfigure(struct fs_context *fc) else { kfree_sensitive(ses->password); ses->password = kstrdup(ctx->password, GFP_KERNEL); + kfree_sensitive(ses->password2); + ses->password2 = kstrdup(ctx->password2, GFP_KERNEL); } STEAL_STRING(cifs_sb, ctx, domainname); STEAL_STRING(cifs_sb, ctx, nodename); @@ -1287,6 +1292,18 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, goto cifs_parse_mount_err; } break; + case Opt_pass2: + kfree_sensitive(ctx->password2); + ctx->password2 = NULL; + if (strlen(param->string) == 0) + break; + + ctx->password2 = kstrdup(param->string, GFP_KERNEL); + if (ctx->password2 == NULL) { + cifs_errorf(fc, "OOM when copying password2 string\n"); + goto cifs_parse_mount_err; + } + break; case Opt_ip: if (strlen(param->string) == 0) { ctx->got_ip = false; @@ -1586,6 +1603,8 @@ static int smb3_fs_context_parse_param(struct fs_context *fc, cifs_parse_mount_err: kfree_sensitive(ctx->password); ctx->password = NULL; + kfree_sensitive(ctx->password2); + ctx->password2 = NULL; return -EINVAL; } @@ -1690,6 +1709,8 @@ smb3_cleanup_fs_context_contents(struct smb3_fs_context *ctx) ctx->username = NULL; kfree_sensitive(ctx->password); ctx->password = NULL; + kfree_sensitive(ctx->password2); + ctx->password2 = NULL; kfree(ctx->server_hostname); ctx->server_hostname = NULL; kfree(ctx->UNC); diff --git a/fs/smb/client/fs_context.h b/fs/smb/client/fs_context.h index 61776572b8..369a3fea1d 100644 --- a/fs/smb/client/fs_context.h +++ b/fs/smb/client/fs_context.h @@ -138,6 +138,7 @@ enum cifs_param { Opt_source, Opt_user, Opt_pass, + Opt_pass2, Opt_ip, Opt_domain, Opt_srcaddr, @@ -171,6 +172,7 @@ struct smb3_fs_context { char *username; char *password; + char *password2; char *domainname; char *source; char *server_hostname; diff --git a/fs/smb/client/misc.c b/fs/smb/client/misc.c index 0d13db80e6..d56959d02e 100644 --- a/fs/smb/client/misc.c +++ b/fs/smb/client/misc.c @@ -101,6 +101,7 @@ sesInfoFree(struct cifs_ses *buf_to_free) kfree(buf_to_free->serverDomain); kfree(buf_to_free->serverNOS); kfree_sensitive(buf_to_free->password); + kfree_sensitive(buf_to_free->password2); kfree(buf_to_free->user_name); kfree(buf_to_free->domainName); kfree_sensitive(buf_to_free->auth_key.response); diff --git a/fs/smb/client/smb2pdu.c b/fs/smb/client/smb2pdu.c index b71e32d66e..60793143e2 100644 --- a/fs/smb/client/smb2pdu.c +++ b/fs/smb/client/smb2pdu.c @@ -367,6 +367,17 @@ again: } rc = cifs_setup_session(0, ses, server, nls_codepage); + if ((rc == -EACCES) || (rc == -EKEYEXPIRED) || (rc == -EKEYREVOKED)) { + /* + * Try alternate password for next reconnect (key rotation + * could be enabled on the server e.g.) if an alternate + * password is available and the current password is expired, + * but do not swap on non pwd related errors like host down + */ + if (ses->password2) + swap(ses->password2, ses->password); + } + if ((rc == -EACCES) && !tcon->retry) { mutex_unlock(&ses->session_mutex); rc = -EHOSTDOWN; diff --git a/fs/smb/server/oplock.c b/fs/smb/server/oplock.c index 53dfaac425..dc729ab980 100644 --- a/fs/smb/server/oplock.c +++ b/fs/smb/server/oplock.c @@ -612,13 +612,23 @@ static int oplock_break_pending(struct oplock_info *opinfo, int req_op_level) if (opinfo->op_state == OPLOCK_CLOSING) return -ENOENT; - else if (!opinfo->is_lease && opinfo->level <= req_op_level) - return 1; + else if (opinfo->level <= req_op_level) { + if (opinfo->is_lease && + opinfo->o_lease->state != + (SMB2_LEASE_HANDLE_CACHING_LE | + SMB2_LEASE_READ_CACHING_LE)) + return 1; + } } - if (!opinfo->is_lease && opinfo->level <= req_op_level) { - wake_up_oplock_break(opinfo); - return 1; + if (opinfo->level <= req_op_level) { + if (opinfo->is_lease && + opinfo->o_lease->state != + (SMB2_LEASE_HANDLE_CACHING_LE | + SMB2_LEASE_READ_CACHING_LE)) { + wake_up_oplock_break(opinfo); + return 1; + } } return 0; } @@ -886,7 +896,6 @@ static int oplock_break(struct oplock_info *brk_opinfo, int req_op_level) struct lease *lease = brk_opinfo->o_lease; atomic_inc(&brk_opinfo->breaking_cnt); - err = oplock_break_pending(brk_opinfo, req_op_level); if (err) return err < 0 ? err : 0; @@ -1199,7 +1208,9 @@ int smb_grant_oplock(struct ksmbd_work *work, int req_op_level, u64 pid, /* Only v2 leases handle the directory */ if (S_ISDIR(file_inode(fp->filp)->i_mode)) { - if (!lctx || lctx->version != 2) + if (!lctx || lctx->version != 2 || + (lctx->flags != SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE && + !lctx->epoch)) return 0; } @@ -1461,8 +1472,9 @@ void create_lease_buf(u8 *rbuf, struct lease *lease) buf->lcontext.LeaseFlags = lease->flags; buf->lcontext.Epoch = cpu_to_le16(lease->epoch); buf->lcontext.LeaseState = lease->state; - memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, - SMB2_LEASE_KEY_SIZE); + if (lease->flags == SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) + memcpy(buf->lcontext.ParentLeaseKey, lease->parent_lease_key, + SMB2_LEASE_KEY_SIZE); buf->ccontext.DataOffset = cpu_to_le16(offsetof (struct create_lease_v2, lcontext)); buf->ccontext.DataLength = cpu_to_le32(sizeof(struct lease_context_v2)); @@ -1527,8 +1539,9 @@ struct lease_ctx_info *parse_lease_state(void *open_req, bool is_dir) lreq->flags = lc->lcontext.LeaseFlags; lreq->epoch = lc->lcontext.Epoch; lreq->duration = lc->lcontext.LeaseDuration; - memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, - SMB2_LEASE_KEY_SIZE); + if (lreq->flags == SMB2_LEASE_FLAG_PARENT_LEASE_KEY_SET_LE) + memcpy(lreq->parent_lease_key, lc->lcontext.ParentLeaseKey, + SMB2_LEASE_KEY_SIZE); lreq->version = 2; } else { struct create_lease *lc = (struct create_lease *)cc; diff --git a/fs/smb/server/transport_tcp.c b/fs/smb/server/transport_tcp.c index 002a3f0dc7..6633fa78e9 100644 --- a/fs/smb/server/transport_tcp.c +++ b/fs/smb/server/transport_tcp.c @@ -448,6 +448,10 @@ static int create_socket(struct interface *iface) sin6.sin6_family = PF_INET6; sin6.sin6_addr = in6addr_any; sin6.sin6_port = htons(server_conf.tcp_port); + + lock_sock(ksmbd_socket->sk); + ksmbd_socket->sk->sk_ipv6only = false; + release_sock(ksmbd_socket->sk); } ksmbd_tcp_nodelay(ksmbd_socket); diff --git a/fs/tracefs/event_inode.c b/fs/tracefs/event_inode.c index 110e8a2721..56d1741fe0 100644 --- a/fs/tracefs/event_inode.c +++ b/fs/tracefs/event_inode.c @@ -164,21 +164,7 @@ static int eventfs_set_attr(struct mnt_idmap *idmap, struct dentry *dentry, * determined by the parent directory. */ if (dentry->d_inode->i_mode & S_IFDIR) { - /* - * The events directory dentry is never freed, unless its - * part of an instance that is deleted. It's attr is the - * default for its child files and directories. - * Do not update it. It's not used for its own mode or ownership. - */ - if (ei->is_events) { - /* But it still needs to know if it was modified */ - if (iattr->ia_valid & ATTR_UID) - ei->attr.mode |= EVENTFS_SAVE_UID; - if (iattr->ia_valid & ATTR_GID) - ei->attr.mode |= EVENTFS_SAVE_GID; - } else { - update_attr(&ei->attr, iattr); - } + update_attr(&ei->attr, iattr); } else { name = dentry->d_name.name; @@ -265,6 +251,35 @@ static const struct file_operations eventfs_file_operations = { .llseek = generic_file_llseek, }; +/* + * On a remount of tracefs, if UID or GID options are set, then + * the mount point inode permissions should be used. + * Reset the saved permission flags appropriately. + */ +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid) +{ + struct eventfs_inode *ei = ti->private; + + if (!ei) + return; + + if (update_uid) + ei->attr.mode &= ~EVENTFS_SAVE_UID; + + if (update_gid) + ei->attr.mode &= ~EVENTFS_SAVE_GID; + + if (!ei->entry_attrs) + return; + + for (int i = 0; i < ei->nr_entries; i++) { + if (update_uid) + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_UID; + if (update_gid) + ei->entry_attrs[i].mode &= ~EVENTFS_SAVE_GID; + } +} + /* Return the evenfs_inode of the "events" directory */ static struct eventfs_inode *eventfs_find_events(struct dentry *dentry) { diff --git a/fs/tracefs/inode.c b/fs/tracefs/inode.c index d65ffad4c3..e30b74228e 100644 --- a/fs/tracefs/inode.c +++ b/fs/tracefs/inode.c @@ -30,20 +30,47 @@ static struct vfsmount *tracefs_mount; static int tracefs_mount_count; static bool tracefs_registered; +/* + * Keep track of all tracefs_inodes in order to update their + * flags if necessary on a remount. + */ +static DEFINE_SPINLOCK(tracefs_inode_lock); +static LIST_HEAD(tracefs_inodes); + static struct inode *tracefs_alloc_inode(struct super_block *sb) { struct tracefs_inode *ti; + unsigned long flags; ti = kmem_cache_alloc(tracefs_inode_cachep, GFP_KERNEL); if (!ti) return NULL; + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_add_rcu(&ti->list, &tracefs_inodes); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); + return &ti->vfs_inode; } +static void tracefs_free_inode_rcu(struct rcu_head *rcu) +{ + struct tracefs_inode *ti; + + ti = container_of(rcu, struct tracefs_inode, rcu); + kmem_cache_free(tracefs_inode_cachep, ti); +} + static void tracefs_free_inode(struct inode *inode) { - kmem_cache_free(tracefs_inode_cachep, get_tracefs(inode)); + struct tracefs_inode *ti = get_tracefs(inode); + unsigned long flags; + + spin_lock_irqsave(&tracefs_inode_lock, flags); + list_del_rcu(&ti->list); + spin_unlock_irqrestore(&tracefs_inode_lock, flags); + + call_rcu(&ti->rcu, tracefs_free_inode_rcu); } static ssize_t default_read_file(struct file *file, char __user *buf, @@ -153,16 +180,39 @@ static void set_tracefs_inode_owner(struct inode *inode) { struct tracefs_inode *ti = get_tracefs(inode); struct inode *root_inode = ti->private; + kuid_t uid; + kgid_t gid; + + uid = root_inode->i_uid; + gid = root_inode->i_gid; + + /* + * If the root is not the mount point, then check the root's + * permissions. If it was never set, then default to the + * mount point. + */ + if (root_inode != d_inode(root_inode->i_sb->s_root)) { + struct tracefs_inode *rti; + + rti = get_tracefs(root_inode); + root_inode = d_inode(root_inode->i_sb->s_root); + + if (!(rti->flags & TRACEFS_UID_PERM_SET)) + uid = root_inode->i_uid; + + if (!(rti->flags & TRACEFS_GID_PERM_SET)) + gid = root_inode->i_gid; + } /* * If this inode has never been referenced, then update * the permissions to the superblock. */ if (!(ti->flags & TRACEFS_UID_PERM_SET)) - inode->i_uid = root_inode->i_uid; + inode->i_uid = uid; if (!(ti->flags & TRACEFS_GID_PERM_SET)) - inode->i_gid = root_inode->i_gid; + inode->i_gid = gid; } static int tracefs_permission(struct mnt_idmap *idmap, @@ -313,6 +363,8 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) struct tracefs_fs_info *fsi = sb->s_fs_info; struct inode *inode = d_inode(sb->s_root); struct tracefs_mount_opts *opts = &fsi->mount_opts; + struct tracefs_inode *ti; + bool update_uid, update_gid; umode_t tmp_mode; /* @@ -332,6 +384,25 @@ static int tracefs_apply_options(struct super_block *sb, bool remount) if (!remount || opts->opts & BIT(Opt_gid)) inode->i_gid = opts->gid; + if (remount && (opts->opts & BIT(Opt_uid) || opts->opts & BIT(Opt_gid))) { + + update_uid = opts->opts & BIT(Opt_uid); + update_gid = opts->opts & BIT(Opt_gid); + + rcu_read_lock(); + list_for_each_entry_rcu(ti, &tracefs_inodes, list) { + if (update_uid) + ti->flags &= ~TRACEFS_UID_PERM_SET; + + if (update_gid) + ti->flags &= ~TRACEFS_GID_PERM_SET; + + if (ti->flags & TRACEFS_EVENT_INODE) + eventfs_remount(ti, update_uid, update_gid); + } + rcu_read_unlock(); + } + return 0; } @@ -398,7 +469,22 @@ static int tracefs_d_revalidate(struct dentry *dentry, unsigned int flags) return !(ei && ei->is_freed); } +static void tracefs_d_iput(struct dentry *dentry, struct inode *inode) +{ + struct tracefs_inode *ti = get_tracefs(inode); + + /* + * This inode is being freed and cannot be used for + * eventfs. Clear the flag so that it doesn't call into + * eventfs during the remount flag updates. The eventfs_inode + * gets freed after an RCU cycle, so the content will still + * be safe if the iteration is going on now. + */ + ti->flags &= ~TRACEFS_EVENT_INODE; +} + static const struct dentry_operations tracefs_dentry_operations = { + .d_iput = tracefs_d_iput, .d_revalidate = tracefs_d_revalidate, .d_release = tracefs_d_release, }; diff --git a/fs/tracefs/internal.h b/fs/tracefs/internal.h index beb3dcd0e4..824cbe8367 100644 --- a/fs/tracefs/internal.h +++ b/fs/tracefs/internal.h @@ -11,8 +11,12 @@ enum { }; struct tracefs_inode { - struct inode vfs_inode; + union { + struct inode vfs_inode; + struct rcu_head rcu; + }; /* The below gets initialized with memset_after(ti, 0, vfs_inode) */ + struct list_head list; unsigned long flags; void *private; }; @@ -75,6 +79,7 @@ struct dentry *tracefs_end_creating(struct dentry *dentry); struct dentry *tracefs_failed_creating(struct dentry *dentry); struct inode *tracefs_get_inode(struct super_block *sb); +void eventfs_remount(struct tracefs_inode *ti, bool update_uid, bool update_gid); void eventfs_d_release(struct dentry *dentry); #endif /* _TRACEFS_INTERNAL_H */ diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 959551ff9a..13ef4e1fc1 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -925,6 +925,10 @@ static int userfaultfd_release(struct inode *inode, struct file *file) prev = vma; continue; } + /* Reset ptes for the whole vma range if wr-protected */ + if (userfaultfd_wp(vma)) + uffd_wp_range(vma, vma->vm_start, + vma->vm_end - vma->vm_start, false); new_flags = vma->vm_flags & ~__VM_UFFD_FLAGS; vma = vma_modify_flags_uffd(&vmi, prev, vma, vma->vm_start, vma->vm_end, new_flags, diff --git a/fs/vboxsf/file.c b/fs/vboxsf/file.c index 2307f8037e..118dedef8e 100644 --- a/fs/vboxsf/file.c +++ b/fs/vboxsf/file.c @@ -218,6 +218,7 @@ const struct file_operations vboxsf_reg_fops = { .release = vboxsf_file_release, .fsync = noop_fsync, .splice_read = filemap_splice_read, + .setlease = simple_nosetlease, }; const struct inode_operations vboxsf_reg_iops = { |