diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:17:52 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-08-07 13:17:52 +0000 |
commit | 3afb00d3f86d3d924f88b56fa8285d4e9db85852 (patch) | |
tree | 95a985d3019522cea546b7d8df621369bc44fc6c /fs/ceph | |
parent | Adding debian version 6.9.12-1. (diff) | |
download | linux-3afb00d3f86d3d924f88b56fa8285d4e9db85852.tar.xz linux-3afb00d3f86d3d924f88b56fa8285d4e9db85852.zip |
Merging upstream version 6.10.3.
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'fs/ceph')
-rw-r--r-- | fs/ceph/addr.c | 24 | ||||
-rw-r--r-- | fs/ceph/dir.c | 28 | ||||
-rw-r--r-- | fs/ceph/file.c | 66 | ||||
-rw-r--r-- | fs/ceph/inode.c | 48 | ||||
-rw-r--r-- | fs/ceph/mds_client.c | 270 | ||||
-rw-r--r-- | fs/ceph/mds_client.h | 28 | ||||
-rw-r--r-- | fs/ceph/super.c | 3 |
7 files changed, 433 insertions, 34 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c index ee9caf7916..8c16bc5250 100644 --- a/fs/ceph/addr.c +++ b/fs/ceph/addr.c @@ -193,7 +193,7 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq) * block, but do not exceed the file size, unless the original * request already exceeds it. */ - new_end = min(round_up(end, lo->stripe_unit), rreq->i_size); + new_end = umin(round_up(end, lo->stripe_unit), rreq->i_size); if (new_end > end && new_end <= rreq->start + max_len) rreq->len = new_end - rreq->start; @@ -498,11 +498,6 @@ const struct netfs_request_ops ceph_netfs_ops = { }; #ifdef CONFIG_CEPH_FSCACHE -static void ceph_set_page_fscache(struct page *page) -{ - set_page_fscache(page); -} - static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async) { struct inode *inode = priv; @@ -517,13 +512,9 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b struct fscache_cookie *cookie = ceph_fscache_cookie(ci); fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode), - ceph_fscache_write_terminated, inode, caching); + ceph_fscache_write_terminated, inode, true, caching); } #else -static inline void ceph_set_page_fscache(struct page *page) -{ -} - static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching) { } @@ -715,8 +706,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc) len = wlen; set_page_writeback(page); - if (caching) - ceph_set_page_fscache(page); ceph_fscache_write_to_cache(inode, page_off, len, caching); if (IS_ENCRYPTED(inode)) { @@ -800,8 +789,6 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc) return AOP_WRITEPAGE_ACTIVATE; } - wait_on_page_fscache(page); - err = writepage_nounlock(page, wbc); if (err == -ERESTARTSYS) { /* direct memory reclaimer was killed by SIGKILL. return 0 @@ -1075,7 +1062,7 @@ get_more_pages: unlock_page(page); break; } - if (PageWriteback(page) || PageFsCache(page)) { + if (PageWriteback(page)) { if (wbc->sync_mode == WB_SYNC_NONE) { doutc(cl, "%p under writeback\n", page); unlock_page(page); @@ -1083,7 +1070,6 @@ get_more_pages: } doutc(cl, "waiting on writeback %p\n", page); wait_on_page_writeback(page); - wait_on_page_fscache(page); } if (!clear_page_dirty_for_io(page)) { @@ -1268,8 +1254,6 @@ new_request: } set_page_writeback(page); - if (caching) - ceph_set_page_fscache(page); len += thp_size(page); } ceph_fscache_write_to_cache(inode, offset, len, caching); @@ -1513,7 +1497,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping, if (r < 0) return r; - folio_wait_fscache(folio); + folio_wait_private_2(folio); /* [DEPRECATED] */ WARN_ON_ONCE(!folio_test_locked(folio)); *pagep = &folio->page; return 0; diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c index 0e9f56eaba..82a2e2a06a 100644 --- a/fs/ceph/dir.c +++ b/fs/ceph/dir.c @@ -1336,8 +1336,12 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) struct inode *inode = d_inode(dentry); struct ceph_mds_request *req; bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); + struct dentry *dn; int err = -EROFS; int op; + char *path; + int pathlen; + u64 pathbase; if (ceph_snap(dir) == CEPH_SNAPDIR) { /* rmdir .snap/foo is RMSNAP */ @@ -1351,6 +1355,30 @@ static int ceph_unlink(struct inode *dir, struct dentry *dentry) CEPH_MDS_OP_RMDIR : CEPH_MDS_OP_UNLINK; } else goto out; + + dn = d_find_alias(dir); + if (!dn) { + try_async = false; + } else { + path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + try_async = false; + err = 0; + } else { + err = ceph_mds_check_access(mdsc, path, MAY_WRITE); + } + ceph_mdsc_free_path(path, pathlen); + dput(dn); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + try_async = false; + err = 0; + } + } + retry: req = ceph_mdsc_create_request(mdsc, op, USE_AUTH_MDS); if (IS_ERR(req)) { diff --git a/fs/ceph/file.c b/fs/ceph/file.c index 16873d0769..4b8d59ebda 100644 --- a/fs/ceph/file.c +++ b/fs/ceph/file.c @@ -366,6 +366,12 @@ int ceph_open(struct inode *inode, struct file *file) struct ceph_file_info *fi = file->private_data; int err; int flags, fmode, wanted; + struct dentry *dentry; + char *path; + int pathlen; + u64 pathbase; + bool do_sync = false; + int mask = MAY_READ; if (fi) { doutc(cl, "file %p is already opened\n", file); @@ -387,6 +393,31 @@ int ceph_open(struct inode *inode, struct file *file) fmode = ceph_flags_to_mode(flags); wanted = ceph_caps_for_mode(fmode); + if (fmode & CEPH_FILE_MODE_WR) + mask |= MAY_WRITE; + dentry = d_find_alias(inode); + if (!dentry) { + do_sync = true; + } else { + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + do_sync = true; + err = 0; + } else { + err = ceph_mds_check_access(mdsc, path, mask); + } + ceph_mdsc_free_path(path, pathlen); + dput(dentry); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + do_sync = true; + err = 0; + } + } + /* snapped files are read-only */ if (ceph_snap(inode) != CEPH_NOSNAP && (file->f_mode & FMODE_WRITE)) return -EROFS; @@ -402,7 +433,7 @@ int ceph_open(struct inode *inode, struct file *file) * asynchronously. */ spin_lock(&ci->i_ceph_lock); - if (__ceph_is_any_real_caps(ci) && + if (!do_sync && __ceph_is_any_real_caps(ci) && (((fmode & CEPH_FILE_MODE_WR) == 0) || ci->i_auth_cap)) { int mds_wanted = __ceph_caps_mds_wanted(ci, true); int issued = __ceph_caps_issued(ci, NULL); @@ -420,7 +451,7 @@ int ceph_open(struct inode *inode, struct file *file) ceph_check_caps(ci, 0); return ceph_init_file(inode, file, fmode); - } else if (ceph_snap(inode) != CEPH_NOSNAP && + } else if (!do_sync && ceph_snap(inode) != CEPH_NOSNAP && (ci->i_snap_caps & wanted) == wanted) { __ceph_touch_fmode(ci, mdsc, fmode); spin_unlock(&ci->i_ceph_lock); @@ -759,6 +790,9 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, bool try_async = ceph_test_mount_opt(fsc, ASYNC_DIROPS); int mask; int err; + char *path; + int pathlen; + u64 pathbase; doutc(cl, "%p %llx.%llx dentry %p '%pd' %s flags %d mode 0%o\n", dir, ceph_vinop(dir), dentry, dentry, @@ -776,6 +810,34 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry, */ flags &= ~O_TRUNC; + dn = d_find_alias(dir); + if (!dn) { + try_async = false; + } else { + path = ceph_mdsc_build_path(mdsc, dn, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + try_async = false; + err = 0; + } else { + int fmode = ceph_flags_to_mode(flags); + + mask = MAY_READ; + if (fmode & CEPH_FILE_MODE_WR) + mask |= MAY_WRITE; + err = ceph_mds_check_access(mdsc, path, mask); + } + ceph_mdsc_free_path(path, pathlen); + dput(dn); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + try_async = false; + err = 0; + } + } + retry: if (flags & O_CREAT) { if (ceph_quota_is_max_files_exceeded(dir)) diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index 7b2e77517f..249ddfbb1b 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -577,6 +577,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb) /* Set parameters for the netfs library */ netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false); + /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */ + __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags); spin_lock_init(&ci->i_ceph_lock); @@ -2480,6 +2482,34 @@ int __ceph_setattr(struct mnt_idmap *idmap, struct inode *inode, bool lock_snap_rwsem = false; bool fill_fscrypt; int truncate_retry = 20; /* The RMW will take around 50ms */ + struct dentry *dentry; + char *path; + int pathlen; + u64 pathbase; + bool do_sync = false; + + dentry = d_find_alias(inode); + if (!dentry) { + do_sync = true; + } else { + path = ceph_mdsc_build_path(mdsc, dentry, &pathlen, &pathbase, 0); + if (IS_ERR(path)) { + do_sync = true; + err = 0; + } else { + err = ceph_mds_check_access(mdsc, path, MAY_WRITE); + } + ceph_mdsc_free_path(path, pathlen); + dput(dentry); + + /* For none EACCES cases will let the MDS do the mds auth check */ + if (err == -EACCES) { + return err; + } else if (err < 0) { + do_sync = true; + err = 0; + } + } retry: prealloc_cf = ceph_alloc_cap_flush(); @@ -2526,7 +2556,7 @@ retry: /* It should never be re-set once set */ WARN_ON_ONCE(ci->fscrypt_auth); - if (issued & CEPH_CAP_AUTH_EXCL) { + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { dirtied |= CEPH_CAP_AUTH_EXCL; kfree(ci->fscrypt_auth); ci->fscrypt_auth = (u8 *)cia->fscrypt_auth; @@ -2555,7 +2585,7 @@ retry: ceph_vinop(inode), from_kuid(&init_user_ns, inode->i_uid), from_kuid(&init_user_ns, attr->ia_uid)); - if (issued & CEPH_CAP_AUTH_EXCL) { + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { inode->i_uid = fsuid; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || @@ -2573,7 +2603,7 @@ retry: ceph_vinop(inode), from_kgid(&init_user_ns, inode->i_gid), from_kgid(&init_user_ns, attr->ia_gid)); - if (issued & CEPH_CAP_AUTH_EXCL) { + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { inode->i_gid = fsgid; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || @@ -2587,7 +2617,7 @@ retry: if (ia_valid & ATTR_MODE) { doutc(cl, "%p %llx.%llx mode 0%o -> 0%o\n", inode, ceph_vinop(inode), inode->i_mode, attr->ia_mode); - if (issued & CEPH_CAP_AUTH_EXCL) { + if (!do_sync && (issued & CEPH_CAP_AUTH_EXCL)) { inode->i_mode = attr->ia_mode; dirtied |= CEPH_CAP_AUTH_EXCL; } else if ((issued & CEPH_CAP_AUTH_SHARED) == 0 || @@ -2606,11 +2636,11 @@ retry: inode, ceph_vinop(inode), atime.tv_sec, atime.tv_nsec, attr->ia_atime.tv_sec, attr->ia_atime.tv_nsec); - if (issued & CEPH_CAP_FILE_EXCL) { + if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) { ci->i_time_warp_seq++; inode_set_atime_to_ts(inode, attr->ia_atime); dirtied |= CEPH_CAP_FILE_EXCL; - } else if ((issued & CEPH_CAP_FILE_WR) && + } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) && timespec64_compare(&atime, &attr->ia_atime) < 0) { inode_set_atime_to_ts(inode, attr->ia_atime); @@ -2646,7 +2676,7 @@ retry: CEPH_FSCRYPT_BLOCK_SIZE)); req->r_fscrypt_file = attr->ia_size; fill_fscrypt = true; - } else if ((issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { + } else if (!do_sync && (issued & CEPH_CAP_FILE_EXCL) && attr->ia_size >= isize) { if (attr->ia_size > isize) { i_size_write(inode, attr->ia_size); inode->i_blocks = calc_inode_blocks(attr->ia_size); @@ -2683,11 +2713,11 @@ retry: inode, ceph_vinop(inode), mtime.tv_sec, mtime.tv_nsec, attr->ia_mtime.tv_sec, attr->ia_mtime.tv_nsec); - if (issued & CEPH_CAP_FILE_EXCL) { + if (!do_sync && (issued & CEPH_CAP_FILE_EXCL)) { ci->i_time_warp_seq++; inode_set_mtime_to_ts(inode, attr->ia_mtime); dirtied |= CEPH_CAP_FILE_EXCL; - } else if ((issued & CEPH_CAP_FILE_WR) && + } else if (!do_sync && (issued & CEPH_CAP_FILE_WR) && timespec64_compare(&mtime, &attr->ia_mtime) < 0) { inode_set_mtime_to_ts(inode, attr->ia_mtime); dirtied |= CEPH_CAP_FILE_WR; diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c index 360b686c3c..c2157f6e0c 100644 --- a/fs/ceph/mds_client.c +++ b/fs/ceph/mds_client.c @@ -4112,10 +4112,13 @@ static void handle_session(struct ceph_mds_session *session, void *p = msg->front.iov_base; void *end = p + msg->front.iov_len; struct ceph_mds_session_head *h; - u32 op; + struct ceph_mds_cap_auth *cap_auths = NULL; + u32 op, cap_auths_num = 0; u64 seq, features = 0; int wake = 0; bool blocklisted = false; + u32 i; + /* decode */ ceph_decode_need(&p, end, sizeof(*h), bad); @@ -4160,7 +4163,101 @@ static void handle_session(struct ceph_mds_session *session, } } + if (msg_version >= 6) { + ceph_decode_32_safe(&p, end, cap_auths_num, bad); + doutc(cl, "cap_auths_num %d\n", cap_auths_num); + + if (cap_auths_num && op != CEPH_SESSION_OPEN) { + WARN_ON_ONCE(op != CEPH_SESSION_OPEN); + goto skip_cap_auths; + } + + cap_auths = kcalloc(cap_auths_num, + sizeof(struct ceph_mds_cap_auth), + GFP_KERNEL); + if (!cap_auths) { + pr_err_client(cl, "No memory for cap_auths\n"); + return; + } + + for (i = 0; i < cap_auths_num; i++) { + u32 _len, j; + + /* struct_v, struct_compat, and struct_len in MDSCapAuth */ + ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad); + + /* struct_v, struct_compat, and struct_len in MDSCapMatch */ + ceph_decode_skip_n(&p, end, 2 + sizeof(u32), bad); + ceph_decode_64_safe(&p, end, cap_auths[i].match.uid, bad); + ceph_decode_32_safe(&p, end, _len, bad); + if (_len) { + cap_auths[i].match.gids = kcalloc(_len, sizeof(u32), + GFP_KERNEL); + if (!cap_auths[i].match.gids) { + pr_err_client(cl, "No memory for gids\n"); + goto fail; + } + + cap_auths[i].match.num_gids = _len; + for (j = 0; j < _len; j++) + ceph_decode_32_safe(&p, end, + cap_auths[i].match.gids[j], + bad); + } + + ceph_decode_32_safe(&p, end, _len, bad); + if (_len) { + cap_auths[i].match.path = kcalloc(_len + 1, sizeof(char), + GFP_KERNEL); + if (!cap_auths[i].match.path) { + pr_err_client(cl, "No memory for path\n"); + goto fail; + } + ceph_decode_copy(&p, cap_auths[i].match.path, _len); + + /* Remove the tailing '/' */ + while (_len && cap_auths[i].match.path[_len - 1] == '/') { + cap_auths[i].match.path[_len - 1] = '\0'; + _len -= 1; + } + } + + ceph_decode_32_safe(&p, end, _len, bad); + if (_len) { + cap_auths[i].match.fs_name = kcalloc(_len + 1, sizeof(char), + GFP_KERNEL); + if (!cap_auths[i].match.fs_name) { + pr_err_client(cl, "No memory for fs_name\n"); + goto fail; + } + ceph_decode_copy(&p, cap_auths[i].match.fs_name, _len); + } + + ceph_decode_8_safe(&p, end, cap_auths[i].match.root_squash, bad); + ceph_decode_8_safe(&p, end, cap_auths[i].readable, bad); + ceph_decode_8_safe(&p, end, cap_auths[i].writeable, bad); + doutc(cl, "uid %lld, num_gids %u, path %s, fs_name %s, root_squash %d, readable %d, writeable %d\n", + cap_auths[i].match.uid, cap_auths[i].match.num_gids, + cap_auths[i].match.path, cap_auths[i].match.fs_name, + cap_auths[i].match.root_squash, + cap_auths[i].readable, cap_auths[i].writeable); + } + } + +skip_cap_auths: mutex_lock(&mdsc->mutex); + if (op == CEPH_SESSION_OPEN) { + if (mdsc->s_cap_auths) { + for (i = 0; i < mdsc->s_cap_auths_num; i++) { + kfree(mdsc->s_cap_auths[i].match.gids); + kfree(mdsc->s_cap_auths[i].match.path); + kfree(mdsc->s_cap_auths[i].match.fs_name); + } + kfree(mdsc->s_cap_auths); + } + mdsc->s_cap_auths_num = cap_auths_num; + mdsc->s_cap_auths = cap_auths; + } if (op == CEPH_SESSION_CLOSE) { ceph_get_mds_session(session); __unregister_session(mdsc, session); @@ -4290,6 +4387,13 @@ bad: pr_err_client(cl, "corrupt message mds%d len %d\n", mds, (int)msg->front.iov_len); ceph_msg_dump(msg); +fail: + for (i = 0; i < cap_auths_num; i++) { + kfree(cap_auths[i].match.gids); + kfree(cap_auths[i].match.path); + kfree(cap_auths[i].match.fs_name); + } + kfree(cap_auths); return; } @@ -5499,6 +5603,170 @@ void send_flush_mdlog(struct ceph_mds_session *s) mutex_unlock(&s->s_mutex); } +static int ceph_mds_auth_match(struct ceph_mds_client *mdsc, + struct ceph_mds_cap_auth *auth, + char *tpath) +{ + const struct cred *cred = get_current_cred(); + u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); + u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); + struct ceph_client *cl = mdsc->fsc->client; + const char *spath = mdsc->fsc->mount_options->server_path; + bool gid_matched = false; + u32 gid, tlen, len; + int i, j; + + doutc(cl, "match.uid %lld\n", auth->match.uid); + if (auth->match.uid != MDS_AUTH_UID_ANY) { + if (auth->match.uid != caller_uid) + return 0; + if (auth->match.num_gids) { + for (i = 0; i < auth->match.num_gids; i++) { + if (caller_gid == auth->match.gids[i]) + gid_matched = true; + } + if (!gid_matched && cred->group_info->ngroups) { + for (i = 0; i < cred->group_info->ngroups; i++) { + gid = from_kgid(&init_user_ns, + cred->group_info->gid[i]); + for (j = 0; j < auth->match.num_gids; j++) { + if (gid == auth->match.gids[j]) { + gid_matched = true; + break; + } + } + if (gid_matched) + break; + } + } + if (!gid_matched) + return 0; + } + } + + /* path match */ + if (auth->match.path) { + if (!tpath) + return 0; + + tlen = strlen(tpath); + len = strlen(auth->match.path); + if (len) { + char *_tpath = tpath; + bool free_tpath = false; + int m, n; + + doutc(cl, "server path %s, tpath %s, match.path %s\n", + spath, tpath, auth->match.path); + if (spath && (m = strlen(spath)) != 1) { + /* mount path + '/' + tpath + an extra space */ + n = m + 1 + tlen + 1; + _tpath = kmalloc(n, GFP_NOFS); + if (!_tpath) + return -ENOMEM; + /* remove the leading '/' */ + snprintf(_tpath, n, "%s/%s", spath + 1, tpath); + free_tpath = true; + tlen = strlen(_tpath); + } + + /* + * Please note the tailing '/' for match.path has already + * been removed when parsing. + * + * Remove the tailing '/' for the target path. + */ + while (tlen && _tpath[tlen - 1] == '/') { + _tpath[tlen - 1] = '\0'; + tlen -= 1; + } + doutc(cl, "_tpath %s\n", _tpath); + + /* + * In case first == _tpath && tlen == len: + * match.path=/foo --> /foo _path=/foo --> match + * match.path=/foo/ --> /foo _path=/foo --> match + * + * In case first == _tmatch.path && tlen > len: + * match.path=/foo/ --> /foo _path=/foo/ --> match + * match.path=/foo --> /foo _path=/foo/ --> match + * match.path=/foo/ --> /foo _path=/foo/d --> match + * match.path=/foo --> /foo _path=/food --> mismatch + * + * All the other cases --> mismatch + */ + char *first = strstr(_tpath, auth->match.path); + if (first != _tpath) { + if (free_tpath) + kfree(_tpath); + return 0; + } + + if (tlen > len && _tpath[len] != '/') { + if (free_tpath) + kfree(_tpath); + return 0; + } + } + } + + doutc(cl, "matched\n"); + return 1; +} + +int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, int mask) +{ + const struct cred *cred = get_current_cred(); + u32 caller_uid = from_kuid(&init_user_ns, cred->fsuid); + u32 caller_gid = from_kgid(&init_user_ns, cred->fsgid); + struct ceph_mds_cap_auth *rw_perms_s = NULL; + struct ceph_client *cl = mdsc->fsc->client; + bool root_squash_perms = true; + int i, err; + + doutc(cl, "tpath '%s', mask %d, caller_uid %d, caller_gid %d\n", + tpath, mask, caller_uid, caller_gid); + + for (i = 0; i < mdsc->s_cap_auths_num; i++) { + struct ceph_mds_cap_auth *s = &mdsc->s_cap_auths[i]; + + err = ceph_mds_auth_match(mdsc, s, tpath); + if (err < 0) { + return err; + } else if (err > 0) { + /* always follow the last auth caps' permision */ + root_squash_perms = true; + rw_perms_s = NULL; + if ((mask & MAY_WRITE) && s->writeable && + s->match.root_squash && (!caller_uid || !caller_gid)) + root_squash_perms = false; + + if (((mask & MAY_WRITE) && !s->writeable) || + ((mask & MAY_READ) && !s->readable)) + rw_perms_s = s; + } + } + + doutc(cl, "root_squash_perms %d, rw_perms_s %p\n", root_squash_perms, + rw_perms_s); + if (root_squash_perms && rw_perms_s == NULL) { + doutc(cl, "access allowed\n"); + return 0; + } + + if (!root_squash_perms) { + doutc(cl, "root_squash is enabled and user(%d %d) isn't allowed to write", + caller_uid, caller_gid); + } + if (rw_perms_s) { + doutc(cl, "mds auth caps readable/writeable %d/%d while request r/w %d/%d", + rw_perms_s->readable, rw_perms_s->writeable, + !!(mask & MAY_READ), !!(mask & MAY_WRITE)); + } + doutc(cl, "access denied\n"); + return -EACCES; +} + /* * called before mount is ro, and before dentries are torn down. * (hmm, does this still race with new lookups?) diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h index b88e804152..cfa18cf915 100644 --- a/fs/ceph/mds_client.h +++ b/fs/ceph/mds_client.h @@ -35,8 +35,9 @@ enum ceph_feature_type { CEPHFS_FEATURE_32BITS_RETRY_FWD, CEPHFS_FEATURE_NEW_SNAPREALM_INFO, CEPHFS_FEATURE_HAS_OWNER_UIDGID, + CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, - CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_HAS_OWNER_UIDGID, + CEPHFS_FEATURE_MAX = CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, }; #define CEPHFS_FEATURES_CLIENT_SUPPORTED { \ @@ -52,6 +53,7 @@ enum ceph_feature_type { CEPHFS_FEATURE_OP_GETVXATTR, \ CEPHFS_FEATURE_32BITS_RETRY_FWD, \ CEPHFS_FEATURE_HAS_OWNER_UIDGID, \ + CEPHFS_FEATURE_MDS_AUTH_CAPS_CHECK, \ } /* @@ -71,6 +73,24 @@ enum ceph_feature_type { struct ceph_fs_client; struct ceph_cap; +#define MDS_AUTH_UID_ANY -1 + +struct ceph_mds_cap_match { + s64 uid; /* default to MDS_AUTH_UID_ANY */ + u32 num_gids; + u32 *gids; /* use these GIDs */ + char *path; /* require path to be child of this + (may be "" or "/" for any) */ + char *fs_name; + bool root_squash; /* default to false */ +}; + +struct ceph_mds_cap_auth { + struct ceph_mds_cap_match match; + bool readable; + bool writeable; +}; + /* * parsed info about a single inode. pointers are into the encoded * on-wire structures within the mds reply message payload. @@ -513,6 +533,9 @@ struct ceph_mds_client { struct rw_semaphore pool_perm_rwsem; struct rb_root pool_perm_tree; + u32 s_cap_auths_num; + struct ceph_mds_cap_auth *s_cap_auths; + char nodename[__NEW_UTS_LEN + 1]; }; @@ -581,6 +604,9 @@ extern void ceph_queue_cap_unlink_work(struct ceph_mds_client *mdsc); extern int ceph_iterate_session_caps(struct ceph_mds_session *session, int (*cb)(struct inode *, int mds, void *), void *arg); +extern int ceph_mds_check_access(struct ceph_mds_client *mdsc, char *tpath, + int mask); + extern void ceph_mdsc_pre_umount(struct ceph_mds_client *mdsc); static inline void ceph_mdsc_free_path(char *path, int len) diff --git a/fs/ceph/super.c b/fs/ceph/super.c index 885cb5d4e7..0cdf84cd17 100644 --- a/fs/ceph/super.c +++ b/fs/ceph/super.c @@ -961,7 +961,8 @@ static int __init init_caches(void) if (!ceph_mds_request_cachep) goto bad_mds_req; - ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT); + ceph_wb_pagevec_pool = mempool_create_kmalloc_pool(10, + (CEPH_MAX_WRITE_SIZE >> PAGE_SHIFT) * sizeof(struct page *)); if (!ceph_wb_pagevec_pool) goto bad_pagevec_pool; |