diff options
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r-- | fs/btrfs/file.c | 60 |
1 files changed, 46 insertions, 14 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d90138683a..ca434f0cd2 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1550,21 +1550,37 @@ relock: * So here we disable page faults in the iov_iter and then retry if we * got -EFAULT, faulting in the pages before the retry. */ +again: from->nofault = true; dio = btrfs_dio_write(iocb, from, written); from->nofault = false; - /* - * iomap_dio_complete() will call btrfs_sync_file() if we have a dsync - * iocb, and that needs to lock the inode. So unlock it before calling - * iomap_dio_complete() to avoid a deadlock. - */ - btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); - - if (IS_ERR_OR_NULL(dio)) + if (IS_ERR_OR_NULL(dio)) { ret = PTR_ERR_OR_ZERO(dio); - else + } else { + struct btrfs_file_private stack_private = { 0 }; + struct btrfs_file_private *private; + const bool have_private = (file->private_data != NULL); + + if (!have_private) + file->private_data = &stack_private; + + /* + * If we have a synchoronous write, we must make sure the fsync + * triggered by the iomap_dio_complete() call below doesn't + * deadlock on the inode lock - we are already holding it and we + * can't call it after unlocking because we may need to complete + * partial writes due to the input buffer (or parts of it) not + * being already faulted in. + */ + private = file->private_data; + private->fsync_skip_inode_lock = true; ret = iomap_dio_complete(dio); + private->fsync_skip_inode_lock = false; + + if (!have_private) + file->private_data = NULL; + } /* No increment (+=) because iomap returns a cumulative value. */ if (ret > 0) @@ -1591,10 +1607,12 @@ relock: } else { fault_in_iov_iter_readable(from, left); prev_left = left; - goto relock; + goto again; } } + btrfs_inode_unlock(BTRFS_I(inode), ilock_flags); + /* * If 'ret' is -ENOTBLK or we have not written all data, then it means * we must fallback to buffered IO. @@ -1793,6 +1811,7 @@ static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx) */ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) { + struct btrfs_file_private *private = file->private_data; struct dentry *dentry = file_dentry(file); struct inode *inode = d_inode(dentry); struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); @@ -1802,6 +1821,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) int ret = 0, err; u64 len; bool full_sync; + const bool skip_ilock = (private ? private->fsync_skip_inode_lock : false); trace_btrfs_sync_file(file, datasync); @@ -1829,7 +1849,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) if (ret) goto out; - btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + if (skip_ilock) + down_write(&BTRFS_I(inode)->i_mmap_lock); + else + btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); atomic_inc(&root->log_batch); @@ -1853,7 +1876,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) */ ret = start_ordered_ops(inode, start, end); if (ret) { - btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&BTRFS_I(inode)->i_mmap_lock); + else + btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); goto out; } @@ -1982,7 +2008,10 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) * file again, but that will end up using the synchronization * inside btrfs_sync_log to keep things safe. */ - btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&BTRFS_I(inode)->i_mmap_lock); + else + btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); if (ret == BTRFS_NO_LOG_SYNC) { ret = btrfs_end_transaction(trans); @@ -2051,7 +2080,10 @@ out: out_release_extents: btrfs_release_log_ctx_extents(&ctx); - btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); + if (skip_ilock) + up_write(&BTRFS_I(inode)->i_mmap_lock); + else + btrfs_inode_unlock(BTRFS_I(inode), BTRFS_ILOCK_MMAP); goto out; } |