From 50ba0232fd5312410f1b65247e774244f89a628e Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 18 May 2024 20:50:36 +0200 Subject: Merging upstream version 6.8.9. Signed-off-by: Daniel Baumann --- fs/ceph/addr.c | 41 ++++++++++++----------------------------- fs/ceph/cache.h | 45 +++++++-------------------------------------- fs/ceph/caps.c | 4 ++-- fs/ceph/dir.c | 23 ++++++++++++++--------- fs/ceph/export.c | 2 -- fs/ceph/file.c | 21 +++++++++++++-------- fs/ceph/inode.c | 4 ++-- fs/ceph/mds_client.c | 44 +++++++++++++++++++++++++++++--------------- fs/ceph/mds_client.h | 3 +-- fs/ceph/super.h | 14 ++++++++++++++ 10 files changed, 94 insertions(+), 107 deletions(-) (limited to 'fs/ceph') diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index 85be3bf18c..ee9caf7916 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -159,27 +159,7 @@ static void ceph_invalidate_folio(struct folio *folio, size_t offset, ceph_put_snap_context(snapc); } - folio_wait_fscache(folio); -} - -static bool ceph_release_folio(struct folio *folio, gfp_t gfp) -{ - struct inode *inode = folio->mapping->host; - struct ceph_client *cl = ceph_inode_to_client(inode); - - doutc(cl, "%llx.%llx idx %lu (%sdirty)\n", ceph_vinop(inode), - folio->index, folio_test_dirty(folio) ? "" : "not "); - - if (folio_test_private(folio)) - return false; - - if (folio_test_fscache(folio)) { - if (current_is_kswapd() || !(gfp & __GFP_FS)) - return false; - folio_wait_fscache(folio); - } - ceph_fscache_note_page_release(inode); - return true; + netfs_invalidate_folio(folio, offset, length); } static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) @@ -357,6 +337,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) u64 len = subreq->len; bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD); u64 off = subreq->start; + int extent_cnt; if (ceph_inode_is_shutdown(inode)) { err = -EIO; @@ -370,8 +351,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino, off, &len, 0, 1, sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ, - CEPH_OSD_FLAG_READ | fsc->client->osdc.client->options->read_from_replica, - NULL, ci->i_truncate_seq, ci->i_truncate_size, false); + CEPH_OSD_FLAG_READ, NULL, ci->i_truncate_seq, + ci->i_truncate_size, false); if (IS_ERR(req)) { err = PTR_ERR(req); req = NULL; @@ -379,7 +360,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq) } if (sparse) { - err = ceph_alloc_sparse_ext_map(&req->r_ops[0]); + extent_cnt = __ceph_sparse_read_ext_count(inode, len); + err = ceph_alloc_sparse_ext_map(&req->r_ops[0], extent_cnt); if (err) goto out; } @@ -509,7 +491,6 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq) const struct netfs_request_ops ceph_netfs_ops = { .init_request = ceph_init_request, .free_request = ceph_netfs_free_request, - .begin_cache_operation = ceph_begin_cache_operation, .issue_read = ceph_netfs_issue_read, .expand_readahead = ceph_netfs_expand_readahead, .clamp_length = ceph_netfs_clamp_length, @@ -814,8 +795,10 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) ihold(inode); if (wbc->sync_mode == WB_SYNC_NONE && - ceph_inode_to_fs_client(inode)->write_congested) + ceph_inode_to_fs_client(inode)->write_congested) { + redirty_page_for_writepage(wbc, page); return AOP_WRITEPAGE_ACTIVATE; + } wait_on_page_fscache(page); @@ -907,8 +890,8 @@ static void writepages_finish(struct ceph_osd_request *req) doutc(cl, "unlocking %p\n", page); if (remove_page) - generic_error_remove_page(inode->i_mapping, - page); + generic_error_remove_folio(inode->i_mapping, + page_folio(page)); unlock_page(page); } @@ -1586,7 +1569,7 @@ const struct address_space_operations ceph_aops = { .write_end = ceph_write_end, .dirty_folio = ceph_dirty_folio, .invalidate_folio = ceph_invalidate_folio, - .release_folio = ceph_release_folio, + .release_folio = netfs_release_folio, .direct_IO = noop_direct_IO, }; diff --git a/fs/ceph/cache.h b/fs/ceph/cache.h index dc502daac4..20efac0203 100644 --- a/fs/ceph/cache.h +++ b/fs/ceph/cache.h @@ -43,38 +43,19 @@ static inline void ceph_fscache_resize(struct inode *inode, loff_t to) } } -static inline void ceph_fscache_unpin_writeback(struct inode *inode, +static inline int ceph_fscache_unpin_writeback(struct inode *inode, struct writeback_control *wbc) { - fscache_unpin_writeback(wbc, ceph_fscache_cookie(ceph_inode(inode))); + return netfs_unpin_writeback(inode, wbc); } -static inline int ceph_fscache_dirty_folio(struct address_space *mapping, - struct folio *folio) -{ - struct ceph_inode_info *ci = ceph_inode(mapping->host); - - return fscache_dirty_folio(mapping, folio, ceph_fscache_cookie(ci)); -} - -static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq) -{ - struct fscache_cookie *cookie = ceph_fscache_cookie(ceph_inode(rreq->inode)); - - return fscache_begin_read_operation(&rreq->cache_resources, cookie); -} +#define ceph_fscache_dirty_folio netfs_dirty_folio static inline bool ceph_is_cache_enabled(struct inode *inode) { return fscache_cookie_enabled(ceph_fscache_cookie(ceph_inode(inode))); } -static inline void ceph_fscache_note_page_release(struct inode *inode) -{ - struct ceph_inode_info *ci = ceph_inode(inode); - - fscache_note_page_release(ceph_fscache_cookie(ci)); -} #else /* CONFIG_CEPH_FSCACHE */ static inline int ceph_fscache_register_fs(struct ceph_fs_client* fsc, struct fs_context *fc) @@ -119,30 +100,18 @@ static inline void ceph_fscache_resize(struct inode *inode, loff_t to) { } -static inline void ceph_fscache_unpin_writeback(struct inode *inode, - struct writeback_control *wbc) +static inline int ceph_fscache_unpin_writeback(struct inode *inode, + struct writeback_control *wbc) { + return 0; } -static inline int ceph_fscache_dirty_folio(struct address_space *mapping, - struct folio *folio) -{ - return filemap_dirty_folio(mapping, folio); -} +#define ceph_fscache_dirty_folio filemap_dirty_folio static inline bool ceph_is_cache_enabled(struct inode *inode) { return false; } - -static inline int ceph_begin_cache_operation(struct netfs_io_request *rreq) -{ - return -ENOBUFS; -} - -static inline void ceph_fscache_note_page_release(struct inode *inode) -{ -} #endif /* CONFIG_CEPH_FSCACHE */ #endif diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index 7fb4aae974..e8d8e3e633 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -4775,13 +4775,13 @@ int ceph_drop_caps_for_unlink(struct inode *inode) doutc(mdsc->fsc->client, "%p %llx.%llx\n", inode, ceph_vinop(inode)); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); ci->i_ceph_flags |= CEPH_I_FLUSH; if (!list_empty(&ci->i_cap_delay_list)) list_del_init(&ci->i_cap_delay_list); list_add_tail(&ci->i_cap_delay_list, &mdsc->cap_unlink_delay_list); - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); /* * Fire the work immediately, because the MDS maybe diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 91709934c8..0e9f56eaba 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -174,7 +174,7 @@ __dcache_find_get_entry(struct dentry *parent, u64 idx, /* * When possible, we try to satisfy a readdir by peeking at the * dcache. We make this work by carefully ordering dentries on - * d_child when we initially get results back from the MDS, and + * d_children when we initially get results back from the MDS, and * falling back to a "normal" sync readdir if any dentries in the dir * are dropped. * @@ -1593,10 +1593,12 @@ struct ceph_lease_walk_control { unsigned long dir_lease_ttl; }; +static int __dir_lease_check(const struct dentry *, struct ceph_lease_walk_control *); +static int __dentry_lease_check(const struct dentry *); + static unsigned long __dentry_leases_walk(struct ceph_mds_client *mdsc, - struct ceph_lease_walk_control *lwc, - int (*check)(struct dentry*, void*)) + struct ceph_lease_walk_control *lwc) { struct ceph_dentry_info *di, *tmp; struct dentry *dentry, *last = NULL; @@ -1624,7 +1626,10 @@ __dentry_leases_walk(struct ceph_mds_client *mdsc, goto next; } - ret = check(dentry, lwc); + if (lwc->dir_lease) + ret = __dir_lease_check(dentry, lwc); + else + ret = __dentry_lease_check(dentry); if (ret & TOUCH) { /* move it into tail of dir lease list */ __dentry_dir_lease_touch(mdsc, di); @@ -1681,7 +1686,7 @@ next: return freed; } -static int __dentry_lease_check(struct dentry *dentry, void *arg) +static int __dentry_lease_check(const struct dentry *dentry) { struct ceph_dentry_info *di = ceph_dentry(dentry); int ret; @@ -1696,9 +1701,9 @@ static int __dentry_lease_check(struct dentry *dentry, void *arg) return DELETE; } -static int __dir_lease_check(struct dentry *dentry, void *arg) +static int __dir_lease_check(const struct dentry *dentry, + struct ceph_lease_walk_control *lwc) { - struct ceph_lease_walk_control *lwc = arg; struct ceph_dentry_info *di = ceph_dentry(dentry); int ret = __dir_lease_try_check(dentry); @@ -1737,7 +1742,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc) lwc.dir_lease = false; lwc.nr_to_scan = CEPH_CAPS_PER_RELEASE * 2; - freed = __dentry_leases_walk(mdsc, &lwc, __dentry_lease_check); + freed = __dentry_leases_walk(mdsc, &lwc); if (!lwc.nr_to_scan) /* more invalid leases */ return -EAGAIN; @@ -1747,7 +1752,7 @@ int ceph_trim_dentries(struct ceph_mds_client *mdsc) lwc.dir_lease = true; lwc.expire_dir_lease = freed < count; lwc.dir_lease_ttl = mdsc->fsc->mount_options->caps_wanted_delay_max * HZ; - freed +=__dentry_leases_walk(mdsc, &lwc, __dir_lease_check); + freed +=__dentry_leases_walk(mdsc, &lwc); if (!lwc.nr_to_scan) /* more to check */ return -EAGAIN; diff --git a/fs/ceph/export.c b/fs/ceph/export.c index 726af69d4d..a79f163ae4 100644 --- a/fs/ceph/export.c +++ b/fs/ceph/export.c @@ -286,8 +286,6 @@ static struct dentry *__snapfh_to_dentry(struct super_block *sb, doutc(cl, "%llx.%llx parent %llx hash %x err=%d", vino.ino, vino.snap, sfh->parent_ino, sfh->hash, err); } - if (IS_ERR(inode)) - return ERR_CAST(inode); /* see comments in ceph_get_parent() */ return unlinked ? d_obtain_root(inode) : d_obtain_alias(inode); } diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 523debc6f2..3d1cd079db 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "super.h" #include "mds_client.h" @@ -1028,6 +1029,7 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, struct ceph_osd_req_op *op; u64 read_off = off; u64 read_len = len; + int extent_cnt; /* determine new offset/length if encrypted */ ceph_fscrypt_adjust_off_and_len(inode, &read_off, &read_len); @@ -1067,7 +1069,8 @@ ssize_t __ceph_sync_read(struct inode *inode, loff_t *ki_pos, op = &req->r_ops[0]; if (sparse) { - ret = ceph_alloc_sparse_ext_map(op); + extent_cnt = __ceph_sparse_read_ext_count(inode, read_len); + ret = ceph_alloc_sparse_ext_map(op, extent_cnt); if (ret) { ceph_osdc_put_request(req); break; @@ -1467,6 +1470,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, ssize_t len; struct ceph_osd_req_op *op; int readop = sparse ? CEPH_OSD_OP_SPARSE_READ : CEPH_OSD_OP_READ; + int extent_cnt; if (write) size = min_t(u64, size, fsc->mount_options->wsize); @@ -1530,7 +1534,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter, osd_req_op_extent_osd_data_bvecs(req, 0, bvecs, num_pages, len); op = &req->r_ops[0]; if (sparse) { - ret = ceph_alloc_sparse_ext_map(op); + extent_cnt = __ceph_sparse_read_ext_count(inode, size); + ret = ceph_alloc_sparse_ext_map(op, extent_cnt); if (ret) { ceph_osdc_put_request(req); break; @@ -3013,8 +3018,8 @@ static ssize_t __ceph_copy_file_range(struct file *src_file, loff_t src_off, * {read,write}_iter, which will get caps again. */ put_rd_wr_caps(src_ci, src_got, dst_ci, dst_got); - ret = do_splice_direct(src_file, &src_off, dst_file, - &dst_off, src_objlen, flags); + ret = splice_file_range(src_file, &src_off, dst_file, &dst_off, + src_objlen); /* Abort on short copies or on error */ if (ret < (long)src_objlen) { doutc(cl, "Failed partial copy (%zd)\n", ret); @@ -3068,8 +3073,8 @@ out_caps: */ if (len && (len < src_ci->i_layout.object_size)) { doutc(cl, "Final partial copy of %zu bytes\n", len); - bytes = do_splice_direct(src_file, &src_off, dst_file, - &dst_off, len, flags); + bytes = splice_file_range(src_file, &src_off, dst_file, + &dst_off, len); if (bytes > 0) ret += bytes; else @@ -3092,8 +3097,8 @@ static ssize_t ceph_copy_file_range(struct file *src_file, loff_t src_off, len, flags); if (ret == -EOPNOTSUPP || ret == -EXDEV) - ret = generic_copy_file_range(src_file, src_off, dst_file, - dst_off, len, flags); + ret = splice_copy_file_range(src_file, src_off, dst_file, + dst_off, len); return ret; } diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7d41c3e034..7b2e77517f 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -576,7 +576,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb) doutc(fsc->client, "%p\n", &ci->netfs.inode); /* Set parameters for the netfs library */ - netfs_inode_init(&ci->netfs, &ceph_netfs_ops); + netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); spin_lock_init(&ci->i_ceph_lock); @@ -696,7 +696,7 @@ void ceph_evict_inode(struct inode *inode) percpu_counter_dec(&mdsc->metric.total_inodes); truncate_inode_pages_final(&inode->i_data); - if (inode->i_state & I_PINNING_FSCACHE_WB) + if (inode->i_state & I_PINNING_NETFS_WB) ceph_fscache_unuse_cookie(inode, true); clear_inode(inode); diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 950360b075..360b686c3c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -1534,7 +1534,8 @@ static int encode_metric_spec(void **p, void *end) * session message, specialization for CEPH_SESSION_REQUEST_OPEN * to include additional client metadata fields. */ -static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u64 seq) +static struct ceph_msg * +create_session_full_msg(struct ceph_mds_client *mdsc, int op, u64 seq) { struct ceph_msg *msg; struct ceph_mds_session_head *h; @@ -1578,6 +1579,9 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 size = METRIC_BYTES(count); extra_bytes += 2 + 4 + 4 + size; + /* flags, mds auth caps and oldest_client_tid */ + extra_bytes += 4 + 4 + 8; + /* Allocate the message */ msg = ceph_msg_new(CEPH_MSG_CLIENT_SESSION, sizeof(*h) + extra_bytes, GFP_NOFS, false); @@ -1589,16 +1593,16 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 end = p + msg->front.iov_len; h = p; - h->op = cpu_to_le32(CEPH_SESSION_REQUEST_OPEN); + h->op = cpu_to_le32(op); h->seq = cpu_to_le64(seq); /* * Serialize client metadata into waiting buffer space, using * the format that userspace expects for map * - * ClientSession messages with metadata are v4 + * ClientSession messages with metadata are v7 */ - msg->hdr.version = cpu_to_le16(4); + msg->hdr.version = cpu_to_le16(7); msg->hdr.compat_version = cpu_to_le16(1); /* The write pointer, following the session_head structure */ @@ -1634,6 +1638,15 @@ static struct ceph_msg *create_session_open_msg(struct ceph_mds_client *mdsc, u6 return ERR_PTR(ret); } + /* version == 5, flags */ + ceph_encode_32(&p, 0); + + /* version == 6, mds auth caps */ + ceph_encode_32(&p, 0); + + /* version == 7, oldest_client_tid */ + ceph_encode_64(&p, mdsc->oldest_tid); + msg->front.iov_len = p - msg->front.iov_base; msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); @@ -1663,7 +1676,8 @@ static int __open_session(struct ceph_mds_client *mdsc, session->s_renew_requested = jiffies; /* send connect message */ - msg = create_session_open_msg(mdsc, session->s_seq); + msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_OPEN, + session->s_seq); if (IS_ERR(msg)) return PTR_ERR(msg); ceph_con_send(&session->s_con, msg); @@ -2028,10 +2042,10 @@ static int send_renew_caps(struct ceph_mds_client *mdsc, doutc(cl, "to mds%d (%s)\n", session->s_mds, ceph_mds_state_name(state)); - msg = ceph_create_session_msg(CEPH_SESSION_REQUEST_RENEWCAPS, + msg = create_session_full_msg(mdsc, CEPH_SESSION_REQUEST_RENEWCAPS, ++session->s_renew_seq); - if (!msg) - return -ENOMEM; + if (IS_ERR(msg)) + return PTR_ERR(msg); ceph_con_send(&session->s_con, msg); return 0; } @@ -2128,7 +2142,7 @@ static bool drop_negative_children(struct dentry *dentry) goto out; spin_lock(&dentry->d_lock); - list_for_each_entry(child, &dentry->d_subdirs, d_child) { + hlist_for_each_entry(child, &dentry->d_children, d_sib) { if (d_really_is_positive(child)) { all_negative = false; break; @@ -2490,7 +2504,7 @@ static void ceph_cap_unlink_work(struct work_struct *work) struct ceph_client *cl = mdsc->fsc->client; doutc(cl, "begin\n"); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); while (!list_empty(&mdsc->cap_unlink_delay_list)) { struct ceph_inode_info *ci; struct inode *inode; @@ -2502,15 +2516,15 @@ static void ceph_cap_unlink_work(struct work_struct *work) inode = igrab(&ci->netfs.inode); if (inode) { - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); doutc(cl, "on %p %llx.%llx\n", inode, ceph_vinop(inode)); ceph_check_caps(ci, CHECK_CAPS_FLUSH); iput(inode); - spin_lock(&mdsc->cap_unlink_delay_lock); + spin_lock(&mdsc->cap_delay_lock); } } - spin_unlock(&mdsc->cap_unlink_delay_lock); + spin_unlock(&mdsc->cap_delay_lock); doutc(cl, "done\n"); } @@ -5390,7 +5404,6 @@ int ceph_mdsc_init(struct ceph_fs_client *fsc) INIT_LIST_HEAD(&mdsc->cap_wait_list); spin_lock_init(&mdsc->cap_delay_lock); INIT_LIST_HEAD(&mdsc->cap_unlink_delay_list); - spin_lock_init(&mdsc->cap_unlink_delay_lock); INIT_LIST_HEAD(&mdsc->snap_flush_list); spin_lock_init(&mdsc->snap_flush_lock); mdsc->last_cap_flush_tid = 1; @@ -5917,7 +5930,8 @@ static void mds_peer_reset(struct ceph_connection *con) pr_warn_client(mdsc->fsc->client, "mds%d closed our session\n", s->s_mds); - if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO) + if (READ_ONCE(mdsc->fsc->mount_state) != CEPH_MOUNT_FENCE_IO && + ceph_mdsmap_get_state(mdsc->mdsmap, s->s_mds) >= CEPH_MDS_STATE_RECONNECT) send_mds_reconnect(mdsc, s); } diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index 03f8ff0087..b88e804152 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -461,9 +461,8 @@ struct ceph_mds_client { struct delayed_work delayed_work; /* delayed work */ unsigned long last_renew_caps; /* last time we renewed our caps */ struct list_head cap_delay_list; /* caps with delayed release */ - spinlock_t cap_delay_lock; /* protects cap_delay_list */ struct list_head cap_unlink_delay_list; /* caps with delayed release for unlink */ - spinlock_t cap_unlink_delay_lock; /* protects cap_unlink_delay_list */ + spinlock_t cap_delay_lock; /* protects cap_delay_list and cap_unlink_delay_list */ struct list_head snap_flush_list; /* cap_snaps ready to flush */ spinlock_t snap_flush_lock; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index 15d00bdd92..b63b4cd9b5 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -3,6 +3,7 @@ #define _FS_CEPH_SUPER_H #include +#include #include #include @@ -1405,6 +1406,19 @@ static inline void __ceph_update_quota(struct ceph_inode_info *ci, ceph_adjust_quota_realms_count(&ci->netfs.inode, has_quota); } +static inline int __ceph_sparse_read_ext_count(struct inode *inode, u64 len) +{ + int cnt = 0; + + if (IS_ENCRYPTED(inode)) { + cnt = len >> CEPH_FSCRYPT_BLOCK_SHIFT; + if (cnt > CEPH_SPARSE_EXT_ARRAY_INITIAL) + cnt = 0; + } + + return cnt; +} + extern void ceph_handle_quota(struct ceph_mds_client *mdsc, struct ceph_mds_session *session, struct ceph_msg *msg); -- cgit v1.2.3