diff options
Diffstat (limited to 'fs/xfs/xfs_exchrange.c')
-rw-r--r-- | fs/xfs/xfs_exchrange.c | 804 |
1 files changed, 804 insertions, 0 deletions
diff --git a/fs/xfs/xfs_exchrange.c b/fs/xfs/xfs_exchrange.c new file mode 100644 index 0000000000..c8a655c92c --- /dev/null +++ b/fs/xfs/xfs_exchrange.c @@ -0,0 +1,804 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Copyright (c) 2020-2024 Oracle. All Rights Reserved. + * Author: Darrick J. Wong <djwong@kernel.org> + */ +#include "xfs.h" +#include "xfs_shared.h" +#include "xfs_format.h" +#include "xfs_log_format.h" +#include "xfs_trans_resv.h" +#include "xfs_mount.h" +#include "xfs_defer.h" +#include "xfs_inode.h" +#include "xfs_trans.h" +#include "xfs_quota.h" +#include "xfs_bmap_util.h" +#include "xfs_reflink.h" +#include "xfs_trace.h" +#include "xfs_exchrange.h" +#include "xfs_exchmaps.h" +#include "xfs_sb.h" +#include "xfs_icache.h" +#include "xfs_log.h" +#include "xfs_rtbitmap.h" +#include <linux/fsnotify.h> + +/* Lock (and optionally join) two inodes for a file range exchange. */ +void +xfs_exchrange_ilock( + struct xfs_trans *tp, + struct xfs_inode *ip1, + struct xfs_inode *ip2) +{ + if (ip1 != ip2) + xfs_lock_two_inodes(ip1, XFS_ILOCK_EXCL, + ip2, XFS_ILOCK_EXCL); + else + xfs_ilock(ip1, XFS_ILOCK_EXCL); + if (tp) { + xfs_trans_ijoin(tp, ip1, 0); + if (ip2 != ip1) + xfs_trans_ijoin(tp, ip2, 0); + } + +} + +/* Unlock two inodes after a file range exchange operation. */ +void +xfs_exchrange_iunlock( + struct xfs_inode *ip1, + struct xfs_inode *ip2) +{ + if (ip2 != ip1) + xfs_iunlock(ip2, XFS_ILOCK_EXCL); + xfs_iunlock(ip1, XFS_ILOCK_EXCL); +} + +/* + * Estimate the resource requirements to exchange file contents between the two + * files. The caller is required to hold the IOLOCK and the MMAPLOCK and to + * have flushed both inodes' pagecache and active direct-ios. + */ +int +xfs_exchrange_estimate( + struct xfs_exchmaps_req *req) +{ + int error; + + xfs_exchrange_ilock(NULL, req->ip1, req->ip2); + error = xfs_exchmaps_estimate(req); + xfs_exchrange_iunlock(req->ip1, req->ip2); + return error; +} + +#define QRETRY_IP1 (0x1) +#define QRETRY_IP2 (0x2) + +/* + * Obtain a quota reservation to make sure we don't hit EDQUOT. We can skip + * this if quota enforcement is disabled or if both inodes' dquots are the + * same. The qretry structure must be initialized to zeroes before the first + * call to this function. + */ +STATIC int +xfs_exchrange_reserve_quota( + struct xfs_trans *tp, + const struct xfs_exchmaps_req *req, + unsigned int *qretry) +{ + int64_t ddelta, rdelta; + int ip1_error = 0; + int error; + + /* + * Don't bother with a quota reservation if we're not enforcing them + * or the two inodes have the same dquots. + */ + if (!XFS_IS_QUOTA_ON(tp->t_mountp) || req->ip1 == req->ip2 || + (req->ip1->i_udquot == req->ip2->i_udquot && + req->ip1->i_gdquot == req->ip2->i_gdquot && + req->ip1->i_pdquot == req->ip2->i_pdquot)) + return 0; + + *qretry = 0; + + /* + * For each file, compute the net gain in the number of regular blocks + * that will be mapped into that file and reserve that much quota. The + * quota counts must be able to absorb at least that much space. + */ + ddelta = req->ip2_bcount - req->ip1_bcount; + rdelta = req->ip2_rtbcount - req->ip1_rtbcount; + if (ddelta > 0 || rdelta > 0) { + error = xfs_trans_reserve_quota_nblks(tp, req->ip1, + ddelta > 0 ? ddelta : 0, + rdelta > 0 ? rdelta : 0, + false); + if (error == -EDQUOT || error == -ENOSPC) { + /* + * Save this error and see what happens if we try to + * reserve quota for ip2. Then report both. + */ + *qretry |= QRETRY_IP1; + ip1_error = error; + error = 0; + } + if (error) + return error; + } + if (ddelta < 0 || rdelta < 0) { + error = xfs_trans_reserve_quota_nblks(tp, req->ip2, + ddelta < 0 ? -ddelta : 0, + rdelta < 0 ? -rdelta : 0, + false); + if (error == -EDQUOT || error == -ENOSPC) + *qretry |= QRETRY_IP2; + if (error) + return error; + } + if (ip1_error) + return ip1_error; + + /* + * For each file, forcibly reserve the gross gain in mapped blocks so + * that we don't trip over any quota block reservation assertions. + * We must reserve the gross gain because the quota code subtracts from + * bcount the number of blocks that we unmap; it does not add that + * quantity back to the quota block reservation. + */ + error = xfs_trans_reserve_quota_nblks(tp, req->ip1, req->ip1_bcount, + req->ip1_rtbcount, true); + if (error) + return error; + + return xfs_trans_reserve_quota_nblks(tp, req->ip2, req->ip2_bcount, + req->ip2_rtbcount, true); +} + +/* Exchange the mappings (and hence the contents) of two files' forks. */ +STATIC int +xfs_exchrange_mappings( + const struct xfs_exchrange *fxr, + struct xfs_inode *ip1, + struct xfs_inode *ip2) +{ + struct xfs_mount *mp = ip1->i_mount; + struct xfs_exchmaps_req req = { + .ip1 = ip1, + .ip2 = ip2, + .startoff1 = XFS_B_TO_FSBT(mp, fxr->file1_offset), + .startoff2 = XFS_B_TO_FSBT(mp, fxr->file2_offset), + .blockcount = XFS_B_TO_FSB(mp, fxr->length), + }; + struct xfs_trans *tp; + unsigned int qretry; + bool retried = false; + int error; + + trace_xfs_exchrange_mappings(fxr, ip1, ip2); + + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) + req.flags |= XFS_EXCHMAPS_SET_SIZES; + if (fxr->flags & XFS_EXCHANGE_RANGE_FILE1_WRITTEN) + req.flags |= XFS_EXCHMAPS_INO1_WRITTEN; + + /* + * Round the request length up to the nearest file allocation unit. + * The prep function already checked that the request offsets and + * length in @fxr are safe to round up. + */ + if (xfs_inode_has_bigrtalloc(ip2)) + req.blockcount = xfs_rtb_roundup_rtx(mp, req.blockcount); + + error = xfs_exchrange_estimate(&req); + if (error) + return error; + +retry: + /* Allocate the transaction, lock the inodes, and join them. */ + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_write, req.resblks, 0, + XFS_TRANS_RES_FDBLKS, &tp); + if (error) + return error; + + xfs_exchrange_ilock(tp, ip1, ip2); + + trace_xfs_exchrange_before(ip2, 2); + trace_xfs_exchrange_before(ip1, 1); + + error = xfs_exchmaps_check_forks(mp, &req); + if (error) + goto out_trans_cancel; + + /* + * Reserve ourselves some quota if any of them are in enforcing mode. + * In theory we only need enough to satisfy the change in the number + * of blocks between the two ranges being remapped. + */ + error = xfs_exchrange_reserve_quota(tp, &req, &qretry); + if ((error == -EDQUOT || error == -ENOSPC) && !retried) { + xfs_trans_cancel(tp); + xfs_exchrange_iunlock(ip1, ip2); + if (qretry & QRETRY_IP1) + xfs_blockgc_free_quota(ip1, 0); + if (qretry & QRETRY_IP2) + xfs_blockgc_free_quota(ip2, 0); + retried = true; + goto retry; + } + if (error) + goto out_trans_cancel; + + /* If we got this far on a dry run, all parameters are ok. */ + if (fxr->flags & XFS_EXCHANGE_RANGE_DRY_RUN) + goto out_trans_cancel; + + /* Update the mtime and ctime of both files. */ + if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME1) + xfs_trans_ichgtime(tp, ip1, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + if (fxr->flags & __XFS_EXCHANGE_RANGE_UPD_CMTIME2) + xfs_trans_ichgtime(tp, ip2, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); + + xfs_exchange_mappings(tp, &req); + + /* + * Force the log to persist metadata updates if the caller or the + * administrator requires this. The generic prep function already + * flushed the relevant parts of the page cache. + */ + if (xfs_has_wsync(mp) || (fxr->flags & XFS_EXCHANGE_RANGE_DSYNC)) + xfs_trans_set_sync(tp); + + error = xfs_trans_commit(tp); + + trace_xfs_exchrange_after(ip2, 2); + trace_xfs_exchrange_after(ip1, 1); + + if (error) + goto out_unlock; + + /* + * If the caller wanted us to exchange the contents of two complete + * files of unequal length, exchange the incore sizes now. This should + * be safe because we flushed both files' page caches, exchanged all + * the mappings, and updated the ondisk sizes. + */ + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + loff_t temp; + + temp = i_size_read(VFS_I(ip2)); + i_size_write(VFS_I(ip2), i_size_read(VFS_I(ip1))); + i_size_write(VFS_I(ip1), temp); + } + +out_unlock: + xfs_exchrange_iunlock(ip1, ip2); + return error; + +out_trans_cancel: + xfs_trans_cancel(tp); + goto out_unlock; +} + +/* + * Generic code for exchanging ranges of two files via XFS_IOC_EXCHANGE_RANGE. + * This part deals with struct file objects and byte ranges and does not deal + * with XFS-specific data structures such as xfs_inodes and block ranges. This + * separation may some day facilitate porting to another filesystem. + * + * The goal is to exchange fxr.length bytes starting at fxr.file1_offset in + * file1 with the same number of bytes starting at fxr.file2_offset in file2. + * Implementations must call xfs_exchange_range_prep to prepare the two + * files prior to taking locks; and they must update the inode change and mod + * times of both files as part of the metadata update. The timestamp update + * and freshness checks must be done atomically as part of the data exchange + * operation to ensure correctness of the freshness check. + * xfs_exchange_range_finish must be called after the operation completes + * successfully but before locks are dropped. + */ + +/* Verify that we have security clearance to perform this operation. */ +static int +xfs_exchange_range_verify_area( + struct xfs_exchrange *fxr) +{ + int ret; + + ret = remap_verify_area(fxr->file1, fxr->file1_offset, fxr->length, + true); + if (ret) + return ret; + + return remap_verify_area(fxr->file2, fxr->file2_offset, fxr->length, + true); +} + +/* + * Performs necessary checks before doing a range exchange, having stabilized + * mutable inode attributes via i_rwsem. + */ +static inline int +xfs_exchange_range_checks( + struct xfs_exchrange *fxr, + unsigned int alloc_unit) +{ + struct inode *inode1 = file_inode(fxr->file1); + struct inode *inode2 = file_inode(fxr->file2); + uint64_t allocmask = alloc_unit - 1; + int64_t test_len; + uint64_t blen; + loff_t size1, size2, tmp; + int error; + + /* Don't touch certain kinds of inodes */ + if (IS_IMMUTABLE(inode1) || IS_IMMUTABLE(inode2)) + return -EPERM; + if (IS_SWAPFILE(inode1) || IS_SWAPFILE(inode2)) + return -ETXTBSY; + + size1 = i_size_read(inode1); + size2 = i_size_read(inode2); + + /* Ranges cannot start after EOF. */ + if (fxr->file1_offset > size1 || fxr->file2_offset > size2) + return -EINVAL; + + /* + * If the caller said to exchange to EOF, we set the length of the + * request large enough to cover everything to the end of both files. + */ + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) { + fxr->length = max_t(int64_t, size1 - fxr->file1_offset, + size2 - fxr->file2_offset); + + error = xfs_exchange_range_verify_area(fxr); + if (error) + return error; + } + + /* + * The start of both ranges must be aligned to the file allocation + * unit. + */ + if (!IS_ALIGNED(fxr->file1_offset, alloc_unit) || + !IS_ALIGNED(fxr->file2_offset, alloc_unit)) + return -EINVAL; + + /* Ensure offsets don't wrap. */ + if (check_add_overflow(fxr->file1_offset, fxr->length, &tmp) || + check_add_overflow(fxr->file2_offset, fxr->length, &tmp)) + return -EINVAL; + + /* + * We require both ranges to end within EOF, unless we're exchanging + * to EOF. + */ + if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) && + (fxr->file1_offset + fxr->length > size1 || + fxr->file2_offset + fxr->length > size2)) + return -EINVAL; + + /* + * Make sure we don't hit any file size limits. If we hit any size + * limits such that test_length was adjusted, we abort the whole + * operation. + */ + test_len = fxr->length; + error = generic_write_check_limits(fxr->file2, fxr->file2_offset, + &test_len); + if (error) + return error; + error = generic_write_check_limits(fxr->file1, fxr->file1_offset, + &test_len); + if (error) + return error; + if (test_len != fxr->length) + return -EINVAL; + + /* + * If the user wanted us to exchange up to the infile's EOF, round up + * to the next allocation unit boundary for this check. Do the same + * for the outfile. + * + * Otherwise, reject the range length if it's not aligned to an + * allocation unit. + */ + if (fxr->file1_offset + fxr->length == size1) + blen = ALIGN(size1, alloc_unit) - fxr->file1_offset; + else if (fxr->file2_offset + fxr->length == size2) + blen = ALIGN(size2, alloc_unit) - fxr->file2_offset; + else if (!IS_ALIGNED(fxr->length, alloc_unit)) + return -EINVAL; + else + blen = fxr->length; + + /* Don't allow overlapped exchanges within the same file. */ + if (inode1 == inode2 && + fxr->file2_offset + blen > fxr->file1_offset && + fxr->file1_offset + blen > fxr->file2_offset) + return -EINVAL; + + /* + * Ensure that we don't exchange a partial EOF block into the middle of + * another file. + */ + if ((fxr->length & allocmask) == 0) + return 0; + + blen = fxr->length; + if (fxr->file2_offset + blen < size2) + blen &= ~allocmask; + + if (fxr->file1_offset + blen < size1) + blen &= ~allocmask; + + return blen == fxr->length ? 0 : -EINVAL; +} + +/* + * Check that the two inodes are eligible for range exchanges, the ranges make + * sense, and then flush all dirty data. Caller must ensure that the inodes + * have been locked against any other modifications. + */ +static inline int +xfs_exchange_range_prep( + struct xfs_exchrange *fxr, + unsigned int alloc_unit) +{ + struct inode *inode1 = file_inode(fxr->file1); + struct inode *inode2 = file_inode(fxr->file2); + bool same_inode = (inode1 == inode2); + int error; + + /* Check that we don't violate system file offset limits. */ + error = xfs_exchange_range_checks(fxr, alloc_unit); + if (error || fxr->length == 0) + return error; + + /* Wait for the completion of any pending IOs on both files */ + inode_dio_wait(inode1); + if (!same_inode) + inode_dio_wait(inode2); + + error = filemap_write_and_wait_range(inode1->i_mapping, + fxr->file1_offset, + fxr->file1_offset + fxr->length - 1); + if (error) + return error; + + error = filemap_write_and_wait_range(inode2->i_mapping, + fxr->file2_offset, + fxr->file2_offset + fxr->length - 1); + if (error) + return error; + + /* + * If the files or inodes involved require synchronous writes, amend + * the request to force the filesystem to flush all data and metadata + * to disk after the operation completes. + */ + if (((fxr->file1->f_flags | fxr->file2->f_flags) & O_SYNC) || + IS_SYNC(inode1) || IS_SYNC(inode2)) + fxr->flags |= XFS_EXCHANGE_RANGE_DSYNC; + + return 0; +} + +/* + * Finish a range exchange operation, if it was successful. Caller must ensure + * that the inodes are still locked against any other modifications. + */ +static inline int +xfs_exchange_range_finish( + struct xfs_exchrange *fxr) +{ + int error; + + error = file_remove_privs(fxr->file1); + if (error) + return error; + if (file_inode(fxr->file1) == file_inode(fxr->file2)) + return 0; + + return file_remove_privs(fxr->file2); +} + +/* + * Check the alignment of an exchange request when the allocation unit size + * isn't a power of two. The generic file-level helpers use (fast) + * bitmask-based alignment checks, but here we have to use slow long division. + */ +static int +xfs_exchrange_check_rtalign( + const struct xfs_exchrange *fxr, + struct xfs_inode *ip1, + struct xfs_inode *ip2, + unsigned int alloc_unit) +{ + uint64_t length = fxr->length; + uint64_t blen; + loff_t size1, size2; + + size1 = i_size_read(VFS_I(ip1)); + size2 = i_size_read(VFS_I(ip2)); + + /* The start of both ranges must be aligned to a rt extent. */ + if (!isaligned_64(fxr->file1_offset, alloc_unit) || + !isaligned_64(fxr->file2_offset, alloc_unit)) + return -EINVAL; + + if (fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF) + length = max_t(int64_t, size1 - fxr->file1_offset, + size2 - fxr->file2_offset); + + /* + * If the user wanted us to exchange up to the infile's EOF, round up + * to the next rt extent boundary for this check. Do the same for the + * outfile. + * + * Otherwise, reject the range length if it's not rt extent aligned. + * We already confirmed the starting offsets' rt extent block + * alignment. + */ + if (fxr->file1_offset + length == size1) + blen = roundup_64(size1, alloc_unit) - fxr->file1_offset; + else if (fxr->file2_offset + length == size2) + blen = roundup_64(size2, alloc_unit) - fxr->file2_offset; + else if (!isaligned_64(length, alloc_unit)) + return -EINVAL; + else + blen = length; + + /* Don't allow overlapped exchanges within the same file. */ + if (ip1 == ip2 && + fxr->file2_offset + blen > fxr->file1_offset && + fxr->file1_offset + blen > fxr->file2_offset) + return -EINVAL; + + /* + * Ensure that we don't exchange a partial EOF rt extent into the + * middle of another file. + */ + if (isaligned_64(length, alloc_unit)) + return 0; + + blen = length; + if (fxr->file2_offset + length < size2) + blen = rounddown_64(blen, alloc_unit); + + if (fxr->file1_offset + blen < size1) + blen = rounddown_64(blen, alloc_unit); + + return blen == length ? 0 : -EINVAL; +} + +/* Prepare two files to have their data exchanged. */ +STATIC int +xfs_exchrange_prep( + struct xfs_exchrange *fxr, + struct xfs_inode *ip1, + struct xfs_inode *ip2) +{ + struct xfs_mount *mp = ip2->i_mount; + unsigned int alloc_unit = xfs_inode_alloc_unitsize(ip2); + int error; + + trace_xfs_exchrange_prep(fxr, ip1, ip2); + + /* Verify both files are either real-time or non-realtime */ + if (XFS_IS_REALTIME_INODE(ip1) != XFS_IS_REALTIME_INODE(ip2)) + return -EINVAL; + + /* Check non-power of two alignment issues, if necessary. */ + if (!is_power_of_2(alloc_unit)) { + error = xfs_exchrange_check_rtalign(fxr, ip1, ip2, alloc_unit); + if (error) + return error; + + /* + * Do the generic file-level checks with the regular block + * alignment. + */ + alloc_unit = mp->m_sb.sb_blocksize; + } + + error = xfs_exchange_range_prep(fxr, alloc_unit); + if (error || fxr->length == 0) + return error; + + /* Attach dquots to both inodes before changing block maps. */ + error = xfs_qm_dqattach(ip2); + if (error) + return error; + error = xfs_qm_dqattach(ip1); + if (error) + return error; + + trace_xfs_exchrange_flush(fxr, ip1, ip2); + + /* Flush the relevant ranges of both files. */ + error = xfs_flush_unmap_range(ip2, fxr->file2_offset, fxr->length); + if (error) + return error; + error = xfs_flush_unmap_range(ip1, fxr->file1_offset, fxr->length); + if (error) + return error; + + /* + * Cancel CoW fork preallocations for the ranges of both files. The + * prep function should have flushed all the dirty data, so the only + * CoW mappings remaining should be speculative. + */ + if (xfs_inode_has_cow_data(ip1)) { + error = xfs_reflink_cancel_cow_range(ip1, fxr->file1_offset, + fxr->length, true); + if (error) + return error; + } + + if (xfs_inode_has_cow_data(ip2)) { + error = xfs_reflink_cancel_cow_range(ip2, fxr->file2_offset, + fxr->length, true); + if (error) + return error; + } + + return 0; +} + +/* + * Exchange contents of files. This is the binding between the generic + * file-level concepts and the XFS inode-specific implementation. + */ +STATIC int +xfs_exchrange_contents( + struct xfs_exchrange *fxr) +{ + struct inode *inode1 = file_inode(fxr->file1); + struct inode *inode2 = file_inode(fxr->file2); + struct xfs_inode *ip1 = XFS_I(inode1); + struct xfs_inode *ip2 = XFS_I(inode2); + struct xfs_mount *mp = ip1->i_mount; + int error; + + if (!xfs_has_exchange_range(mp)) + return -EOPNOTSUPP; + + if (fxr->flags & ~(XFS_EXCHANGE_RANGE_ALL_FLAGS | + XFS_EXCHANGE_RANGE_PRIV_FLAGS)) + return -EINVAL; + + if (xfs_is_shutdown(mp)) + return -EIO; + + /* Lock both files against IO */ + error = xfs_ilock2_io_mmap(ip1, ip2); + if (error) + goto out_err; + + /* Prepare and then exchange file contents. */ + error = xfs_exchrange_prep(fxr, ip1, ip2); + if (error) + goto out_unlock; + + error = xfs_exchrange_mappings(fxr, ip1, ip2); + if (error) + goto out_unlock; + + /* + * Finish the exchange by removing special file privileges like any + * other file write would do. This may involve turning on support for + * logged xattrs if either file has security capabilities. + */ + error = xfs_exchange_range_finish(fxr); + if (error) + goto out_unlock; + +out_unlock: + xfs_iunlock2_io_mmap(ip1, ip2); +out_err: + if (error) + trace_xfs_exchrange_error(ip2, error, _RET_IP_); + return error; +} + +/* Exchange parts of two files. */ +static int +xfs_exchange_range( + struct xfs_exchrange *fxr) +{ + struct inode *inode1 = file_inode(fxr->file1); + struct inode *inode2 = file_inode(fxr->file2); + int ret; + + BUILD_BUG_ON(XFS_EXCHANGE_RANGE_ALL_FLAGS & + XFS_EXCHANGE_RANGE_PRIV_FLAGS); + + /* Both files must be on the same mount/filesystem. */ + if (fxr->file1->f_path.mnt != fxr->file2->f_path.mnt) + return -EXDEV; + + if (fxr->flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + return -EINVAL; + + /* Userspace requests only honored for regular files. */ + if (S_ISDIR(inode1->i_mode) || S_ISDIR(inode2->i_mode)) + return -EISDIR; + if (!S_ISREG(inode1->i_mode) || !S_ISREG(inode2->i_mode)) + return -EINVAL; + + /* Both files must be opened for read and write. */ + if (!(fxr->file1->f_mode & FMODE_READ) || + !(fxr->file1->f_mode & FMODE_WRITE) || + !(fxr->file2->f_mode & FMODE_READ) || + !(fxr->file2->f_mode & FMODE_WRITE)) + return -EBADF; + + /* Neither file can be opened append-only. */ + if ((fxr->file1->f_flags & O_APPEND) || + (fxr->file2->f_flags & O_APPEND)) + return -EBADF; + + /* + * If we're not exchanging to EOF, we can check the areas before + * stabilizing both files' i_size. + */ + if (!(fxr->flags & XFS_EXCHANGE_RANGE_TO_EOF)) { + ret = xfs_exchange_range_verify_area(fxr); + if (ret) + return ret; + } + + /* Update cmtime if the fd/inode don't forbid it. */ + if (!(fxr->file1->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode1)) + fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME1; + if (!(fxr->file2->f_mode & FMODE_NOCMTIME) && !IS_NOCMTIME(inode2)) + fxr->flags |= __XFS_EXCHANGE_RANGE_UPD_CMTIME2; + + file_start_write(fxr->file2); + ret = xfs_exchrange_contents(fxr); + file_end_write(fxr->file2); + if (ret) + return ret; + + fsnotify_modify(fxr->file1); + if (fxr->file2 != fxr->file1) + fsnotify_modify(fxr->file2); + return 0; +} + +/* Collect exchange-range arguments from userspace. */ +long +xfs_ioc_exchange_range( + struct file *file, + struct xfs_exchange_range __user *argp) +{ + struct xfs_exchrange fxr = { + .file2 = file, + }; + struct xfs_exchange_range args; + struct fd file1; + int error; + + if (copy_from_user(&args, argp, sizeof(args))) + return -EFAULT; + if (memchr_inv(&args.pad, 0, sizeof(args.pad))) + return -EINVAL; + if (args.flags & ~XFS_EXCHANGE_RANGE_ALL_FLAGS) + return -EINVAL; + + fxr.file1_offset = args.file1_offset; + fxr.file2_offset = args.file2_offset; + fxr.length = args.length; + fxr.flags = args.flags; + + file1 = fdget(args.file1_fd); + if (!file1.file) + return -EBADF; + fxr.file1 = file1.file; + + error = xfs_exchange_range(&fxr); + fdput(file1); + return error; +} |