diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/client/fuse_ll.cc | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/client/fuse_ll.cc | 1814 |
1 files changed, 1814 insertions, 0 deletions
diff --git a/src/client/fuse_ll.cc b/src/client/fuse_ll.cc new file mode 100644 index 000000000..7f92dd668 --- /dev/null +++ b/src/client/fuse_ll.cc @@ -0,0 +1,1814 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <sys/file.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <limits.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> + +#if defined(__linux__) +#include <libgen.h> +#include <sys/vfs.h> +#include <sys/xattr.h> +#include <linux/magic.h> +#endif + +// ceph +#include "common/errno.h" +#include "common/safe_io.h" +#include "include/types.h" +#include "Client.h" +#include "Fh.h" +#include "ioctl.h" +#include "common/config.h" +#include "include/ceph_assert.h" +#include "include/cephfs/ceph_ll_client.h" +#include "include/ceph_fuse.h" + +#include "fuse_ll.h" +#include <fuse_lowlevel.h> + +#define dout_context g_ceph_context + +#define FINO_INO(x) ((x) & ((1ull<<48)-1ull)) +#define FINO_STAG(x) ((x) >> 48) +#define MAKE_FINO(i,s) ((i) | ((int64_t)(s) << 48)) +#define STAG_MASK 0xffff +#define G_NOSNAP_STAG 0 // for all CEPH_NOSNAP +#define G_SNAPDIR_STAG 1 // for all CEPH_SNAPDIR + +#define MINORBITS 20 +#define MINORMASK ((1U << MINORBITS) - 1) + +#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS)) +#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK)) +#define MKDEV(ma,mi) (((ma) << MINORBITS) | (mi)) + +#if defined(__linux__) +#ifndef FUSE_SUPER_MAGIC +#define FUSE_SUPER_MAGIC 0x65735546 +#endif + +#define _CEPH_CLIENT_ID "ceph.client_id" +#endif + +/* + * The dedicated struct for snapid <-> stag map for each ceph + * inode, and the stag is a number in range [2, 0xffff], and + * the stag number 0 is reserved for CEPH_NOSNAP and 1 is + * reserved for CEPH_SNAPDIR. + */ +struct ceph_fuse_fake_inode_stag { + ceph::unordered_map<uint64_t,int> snap_stag_map; // <snapid, stagid> + ceph::unordered_map<int, uint64_t> stag_snap_map; // <stagid, snapid> + int last_stag = 1; +}; + +using namespace std; + +static const ceph::unordered_map<int,int> cephfs_errno_to_system_errno = { + {CEPHFS_EBLOCKLISTED, ESHUTDOWN}, + {CEPHFS_EPERM, EPERM}, + {CEPHFS_ESTALE, ESTALE}, + {CEPHFS_ENOSPC, ENOSPC}, + {CEPHFS_ETIMEDOUT, ETIMEDOUT}, + {CEPHFS_EIO, EIO}, + {CEPHFS_ENOTCONN, ENOTCONN}, + {CEPHFS_EEXIST, EEXIST}, + {CEPHFS_EINTR, EINTR}, + {CEPHFS_EINVAL, EINVAL}, + {CEPHFS_EBADF, EBADF}, + {CEPHFS_EROFS, EROFS}, + {CEPHFS_EAGAIN, EAGAIN}, + {CEPHFS_EACCES, EACCES}, + {CEPHFS_ELOOP, ELOOP}, + {CEPHFS_EISDIR, EISDIR}, + {CEPHFS_ENOENT, ENOENT}, + {CEPHFS_ENOTDIR, ENOTDIR}, + {CEPHFS_ENAMETOOLONG, ENAMETOOLONG}, + {CEPHFS_EBUSY, EBUSY}, + {CEPHFS_EDQUOT, EDQUOT}, + {CEPHFS_EFBIG, EFBIG}, + {CEPHFS_ERANGE, ERANGE}, + {CEPHFS_ENXIO, ENXIO}, + {CEPHFS_ECANCELED, ECANCELED}, + {CEPHFS_ENODATA, ENODATA}, + {CEPHFS_EOPNOTSUPP, EOPNOTSUPP}, + {CEPHFS_EXDEV, EXDEV}, + {CEPHFS_ENOMEM, ENOMEM}, + {CEPHFS_ENOTRECOVERABLE, ENOTRECOVERABLE}, + {CEPHFS_ENOSYS, ENOSYS}, + {CEPHFS_ENOTEMPTY, ENOTEMPTY}, + {CEPHFS_EDEADLK, EDEADLK}, + {CEPHFS_EDOM, EDOM}, + {CEPHFS_EMLINK, EMLINK}, + {CEPHFS_ETIME, ETIME}, + {CEPHFS_EOLDSNAPC, EIO} // forcing to EIO for now +}; + +/* Requirements: + * cephfs_errno >= 0 + */ +static int get_sys_errno(int cephfs_errno) +{ + if (cephfs_errno == 0) + return 0; + + auto it = cephfs_errno_to_system_errno.find(cephfs_errno); + if (it != cephfs_errno_to_system_errno.end()) + return it->second; + return EIO; +} + +static uint32_t new_encode_dev(dev_t dev) +{ + unsigned major = MAJOR(dev); + unsigned minor = MINOR(dev); + return (minor & 0xff) | (major << 8) | ((minor & ~0xff) << 12); +} + +static dev_t new_decode_dev(uint32_t dev) +{ + unsigned major = (dev & 0xfff00) >> 8; + unsigned minor = (dev & 0xff) | ((dev >> 12) & 0xfff00); + return MKDEV(major, minor); +} + +class CephFuse::Handle { +public: + Handle(Client *c, int fd); + ~Handle(); + + int init(int argc, const char *argv[]); + int start(); + int loop(); + void finalize(); + + uint64_t fino_snap(uint64_t fino); + uint64_t make_fake_ino(inodeno_t ino, snapid_t snapid); + Inode * iget(fuse_ino_t fino); + void iput(Inode *in); + + int fd_on_success; + Client *client; + + struct fuse_session *se = nullptr; +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + struct fuse_cmdline_opts opts; + struct fuse_conn_info_opts *conn_opts; +#else + struct fuse_chan *ch = nullptr; + char *mountpoint = nullptr; +#endif + + ceph::mutex stag_lock = ceph::make_mutex("fuse_ll.cc stag_lock"); + + // a map of <ceph ino, fino stag/snapid map> + ceph::unordered_map<uint64_t, struct ceph_fuse_fake_inode_stag> g_fino_maps; + + pthread_key_t fuse_req_key = 0; + void set_fuse_req(fuse_req_t); + fuse_req_t get_fuse_req(); + + struct fuse_args args; +}; + +#if defined(__linux__) +static int already_fuse_mounted(const char *path, bool &already_mounted) +{ + struct statx path_statx; + struct statx parent_statx; + char path_copy[PATH_MAX] = {0}; + char *parent_path = NULL; + int err = 0; + + already_mounted = false; + + strncpy(path_copy, path, sizeof(path_copy)-1); + parent_path = dirname(path_copy); + + // get stat information for original path + if (-1 == statx(AT_FDCWD, path, AT_STATX_DONT_SYNC, STATX_INO, &path_statx)) { + err = errno; + derr << "fuse_ll: already_fuse_mounted: statx(" << path << ") failed with error " + << cpp_strerror(err) << dendl; + return err; + } + + // if path isn't directory, then it can't be a mountpoint. + if (!(path_statx.stx_mode & S_IFDIR)) { + err = EINVAL; + derr << "fuse_ll: already_fuse_mounted: " + << path << " is not a directory" << dendl; + return err; + } + + // get stat information for parent path + if (-1 == statx(AT_FDCWD, parent_path, AT_STATX_DONT_SYNC, STATX_INO, &parent_statx)) { + err = errno; + derr << "fuse_ll: already_fuse_mounted: statx(" << parent_path << ") failed with error " + << cpp_strerror(err) << dendl; + return err; + } + + // if original path and parent have different device ids, + // then the path is a mount point + // or, if they refer to the same path, then it's probably + // the root directory '/' and therefore path is a mountpoint + if( path_statx.stx_dev_major != parent_statx.stx_dev_major || + path_statx.stx_dev_minor != parent_statx.stx_dev_minor || + ( path_statx.stx_dev_major == parent_statx.stx_dev_major && + path_statx.stx_dev_minor == parent_statx.stx_dev_minor && + path_statx.stx_ino == parent_statx.stx_ino + ) + ) { + struct statfs path_statfs; + if (-1 == statfs(path, &path_statfs)) { + err = errno; + derr << "fuse_ll: already_fuse_mounted: statfs(" << path << ") failed with error " + << cpp_strerror(err) << dendl; + return err; + } + + if(FUSE_SUPER_MAGIC == path_statfs.f_type) { + // if getxattr returns positive length means value exist for ceph.client_id + // then ceph fuse is already mounted on path + char client_id[128] = {0}; + if (getxattr(path, _CEPH_CLIENT_ID, &client_id, sizeof(client_id)) > 0) { + already_mounted = true; + derr << path << " already mounted by " << client_id << dendl; + } + } + } + + return err; +} +#else // non-linux platforms +static int already_fuse_mounted(const char *path, bool &already_mounted) +{ + already_mounted = false; + return 0; +} +#endif + +static int getgroups(fuse_req_t req, gid_t **sgids) +{ +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + ceph_assert(sgids); + int c = fuse_req_getgroups(req, 0, NULL); + if (c < 0) { + return c; + } + if (c == 0) { + return 0; + } + + gid_t *gids = new (std::nothrow) gid_t[c]; + if (!gids) { + return -get_sys_errno(CEPHFS_ENOMEM); + } + c = fuse_req_getgroups(req, c, gids); + if (c < 0) { + delete[] gids; + } else { + *sgids = gids; + } + return c; +#endif + return -get_sys_errno(CEPHFS_ENOSYS); +} + +static void get_fuse_groups(UserPerm& perms, fuse_req_t req) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + if (cfuse->client->cct->_conf.get_val<bool>("fuse_set_user_groups")) { + gid_t *gids = NULL; + int count = getgroups(req, &gids); + + if (count > 0) { + perms.init_gids(gids, count); + } else if (count < 0) { + derr << __func__ << ": getgroups failed: " << cpp_strerror(-count) + << dendl; + } + } +} + + +static CephFuse::Handle *fuse_ll_req_prepare(fuse_req_t req) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(req); + cfuse->set_fuse_req(req); + return cfuse; +} + +static void fuse_ll_lookup(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + struct fuse_entry_param fe; + Inode *i2, *i1 = cfuse->iget(parent); // see below + int r; + UserPerm perms(ctx->uid, ctx->gid); + get_fuse_groups(perms, req); + + if (!i1) + { + r = cfuse->client->lookup_ino(parent, perms, &i1); + if (r < 0) { + fuse_reply_err(req, get_sys_errno(-r)); + return; + } + } + + memset(&fe, 0, sizeof(fe)); + r = cfuse->client->ll_lookup(i1, name, &fe.attr, &i2, perms); + if (r >= 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } else { + fuse_reply_err(req, get_sys_errno(-r)); + } + + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); +} + +// fuse3 has changed forget function signature +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) +static void fuse_ll_forget(fuse_req_t req, fuse_ino_t ino, + uint64_t nlookup) +#else +static void fuse_ll_forget(fuse_req_t req, fuse_ino_t ino, + long unsigned nlookup) +#endif +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Inode *in = cfuse->iget(ino); + if (in) + cfuse->client->ll_forget(in, nlookup+1); + fuse_reply_none(req); +} + +static void fuse_ll_getattr(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + struct stat stbuf; + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + (void) fi; // XXX + + if (cfuse->client->ll_getattr(in, &stbuf, perms) + == 0) { + stbuf.st_ino = cfuse->make_fake_ino(stbuf.st_ino, stbuf.st_dev); + stbuf.st_rdev = new_encode_dev(stbuf.st_rdev); + fuse_reply_attr(req, &stbuf, 0); + } else + fuse_reply_err(req, ENOENT); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_setattr(fuse_req_t req, fuse_ino_t ino, struct stat *attr, + int to_set, struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int mask = 0; + if (to_set & FUSE_SET_ATTR_MODE) mask |= CEPH_SETATTR_MODE; + if (to_set & FUSE_SET_ATTR_UID) mask |= CEPH_SETATTR_UID; + if (to_set & FUSE_SET_ATTR_GID) mask |= CEPH_SETATTR_GID; + if (to_set & FUSE_SET_ATTR_MTIME) mask |= CEPH_SETATTR_MTIME; + if (to_set & FUSE_SET_ATTR_ATIME) mask |= CEPH_SETATTR_ATIME; + if (to_set & FUSE_SET_ATTR_SIZE) mask |= CEPH_SETATTR_SIZE; +#if !defined(__APPLE__) + if (to_set & FUSE_SET_ATTR_MTIME_NOW) mask |= CEPH_SETATTR_MTIME_NOW; + if (to_set & FUSE_SET_ATTR_ATIME_NOW) mask |= CEPH_SETATTR_ATIME_NOW; +#endif + + int r = cfuse->client->ll_setattr(in, attr, mask, perms); + if (r == 0) + fuse_reply_attr(req, attr, 0); + else + fuse_reply_err(req, get_sys_errno(-r)); + + cfuse->iput(in); // iput required +} + +// XATTRS + +static void fuse_ll_setxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + const char *value, size_t size, + int flags +#if defined(__APPLE__) + ,uint32_t pos +#endif + ) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int r = cfuse->client->ll_setxattr(in, name, value, size, flags, perms); + fuse_reply_err(req, get_sys_errno(-r)); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_listxattr(fuse_req_t req, fuse_ino_t ino, size_t size) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + char buf[size]; + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int r = cfuse->client->ll_listxattr(in, buf, size, perms); + if (size == 0 && r >= 0) + fuse_reply_xattr(req, r); + else if (r >= 0) + fuse_reply_buf(req, buf, r); + else + fuse_reply_err(req, get_sys_errno(-r)); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_getxattr(fuse_req_t req, fuse_ino_t ino, const char *name, + size_t size +#if defined(__APPLE__) + ,uint32_t position +#endif + ) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + char buf[size]; + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int r = cfuse->client->ll_getxattr(in, name, buf, size, perms); + if (size == 0 && r >= 0) + fuse_reply_xattr(req, r); + else if (r >= 0) + fuse_reply_buf(req, buf, r); + else + fuse_reply_err(req, get_sys_errno(-r)); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_removexattr(fuse_req_t req, fuse_ino_t ino, + const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int r = cfuse->client->ll_removexattr(in, name, perms); + fuse_reply_err(req, get_sys_errno(-r)); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_opendir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perms(ctx->uid, ctx->gid); + void *dirp; + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int r = cfuse->client->ll_opendir(in, fi->flags, (dir_result_t **)&dirp, + perms); + if (r >= 0) { + fi->fh = (uint64_t)dirp; + fuse_reply_open(req, fi); + } else { + fuse_reply_err(req, get_sys_errno(-r)); + } + + cfuse->iput(in); // iput required +} + +static void fuse_ll_readlink(fuse_req_t req, fuse_ino_t ino) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + char buf[PATH_MAX + 1]; // leave room for a null terminator + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + int r = cfuse->client->ll_readlink(in, buf, sizeof(buf) - 1, perms); + if (r >= 0) { + buf[r] = '\0'; + fuse_reply_readlink(req, buf); + } else { + fuse_reply_err(req, get_sys_errno(-r)); + } + + cfuse->iput(in); // iput required +} + +static void fuse_ll_mknod(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, dev_t rdev) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + struct fuse_entry_param fe; + UserPerm perms(ctx->uid, ctx->gid); + Inode *i2, *i1 = cfuse->iget(parent); + if (!i1) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + memset(&fe, 0, sizeof(fe)); + + int r = cfuse->client->ll_mknod(i1, name, mode, new_decode_dev(rdev), + &fe.attr, &i2, perms); + if (r == 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } else { + fuse_reply_err(req, get_sys_errno(-r)); + } + + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); // iput required +} + +static void fuse_ll_mkdir(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Inode *i2, *i1; + struct fuse_entry_param fe; + + memset(&fe, 0, sizeof(fe)); + UserPerm perm(ctx->uid, ctx->gid); + get_fuse_groups(perm, req); +#ifdef HAVE_SYS_SYNCFS + auto fuse_multithreaded = cfuse->client->cct->_conf.get_val<bool>( + "fuse_multithreaded"); + auto fuse_syncfs_on_mksnap = cfuse->client->cct->_conf.get_val<bool>( + "fuse_syncfs_on_mksnap"); + if (cfuse->fino_snap(parent) == CEPH_SNAPDIR && + fuse_multithreaded && fuse_syncfs_on_mksnap) { + int err = 0; +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + int fd = ::open(cfuse->opts.mountpoint, O_RDONLY | O_DIRECTORY | O_CLOEXEC); +#else + int fd = ::open(cfuse->mountpoint, O_RDONLY | O_DIRECTORY | O_CLOEXEC); +#endif + if (fd < 0) { + err = errno; + } else { + int r = ::syncfs(fd); + if (r < 0) + err = errno; + ::close(fd); + } + if (err) { + fuse_reply_err(req, err); + return; + } + } +#endif + + i1 = cfuse->iget(parent); + if (!i1) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + int r = cfuse->client->ll_mkdir(i1, name, mode, &fe.attr, &i2, perm); + if (r == 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } else { + fuse_reply_err(req, get_sys_errno(-r)); + } + + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); // iput required +} + +static void fuse_ll_unlink(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perm(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(parent); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perm, req); + + int r = cfuse->client->ll_unlink(in, name, perm); + fuse_reply_err(req, get_sys_errno(-r)); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_rmdir(fuse_req_t req, fuse_ino_t parent, const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(parent); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int r = cfuse->client->ll_rmdir(in, name, perms); + fuse_reply_err(req, get_sys_errno(-r)); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_symlink(fuse_req_t req, const char *existing, + fuse_ino_t parent, const char *name) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + struct fuse_entry_param fe; + UserPerm perms(ctx->uid, ctx->gid); + Inode *i2, *i1 = cfuse->iget(parent); + if (!i1) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + memset(&fe, 0, sizeof(fe)); + + int r = cfuse->client->ll_symlink(i1, name, existing, &fe.attr, &i2, perms); + if (r == 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } else { + fuse_reply_err(req, get_sys_errno(-r)); + } + + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); // iput required +} + +static void fuse_ll_rename(fuse_req_t req, fuse_ino_t parent, const char *name, + fuse_ino_t newparent, const char *newname +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + , unsigned int flags +#endif + ) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perm(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(parent); + Inode *nin = cfuse->iget(newparent); + if (!in || !nin) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perm, req); + + int r = cfuse->client->ll_rename(in, name, nin, newname, perm); + fuse_reply_err(req, get_sys_errno(-r)); + + cfuse->iput(in); // iputs required + cfuse->iput(nin); +} + +static void fuse_ll_link(fuse_req_t req, fuse_ino_t ino, fuse_ino_t newparent, + const char *newname) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + struct fuse_entry_param fe; + Inode *in = cfuse->iget(ino); + Inode *nin = cfuse->iget(newparent); + if (!in || !nin) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + memset(&fe, 0, sizeof(fe)); + UserPerm perm(ctx->uid, ctx->gid); + get_fuse_groups(perm, req); + + /* + * Note that we could successfully link, but then fail the subsequent + * getattr and return an error. Perhaps we should ignore getattr errors, + * but then how do we tell FUSE that the attrs are bogus? + */ + int r = cfuse->client->ll_link(in, nin, newname, perm); + if (r == 0) { + r = cfuse->client->ll_getattr(in, &fe.attr, perm); + if (r == 0) { + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); + fe.attr.st_rdev = new_encode_dev(fe.attr.st_rdev); + fuse_reply_entry(req, &fe); + } + } + + if (r != 0) { + /* + * Many ll operations in libcephfs return an extra inode reference, but + * ll_link currently does not. Still, FUSE needs one for the new dentry, + * so we commandeer the reference taken earlier when ll_link is successful. + * On error however, we must put that reference. + */ + cfuse->iput(in); + fuse_reply_err(req, get_sys_errno(-r)); + } + + cfuse->iput(nin); +} + +static void fuse_ll_open(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + Fh *fh = NULL; + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int r = cfuse->client->ll_open(in, fi->flags, &fh, perms); + if (r == 0) { + fi->fh = (uint64_t)fh; +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + auto fuse_disable_pagecache = cfuse->client->cct->_conf.get_val<bool>( + "fuse_disable_pagecache"); + auto fuse_use_invalidate_cb = cfuse->client->cct->_conf.get_val<bool>( + "fuse_use_invalidate_cb"); + if (fuse_disable_pagecache) + fi->direct_io = 1; + else if (fuse_use_invalidate_cb) + fi->keep_cache = 1; +#endif + fuse_reply_open(req, fi); + } else { + fuse_reply_err(req, get_sys_errno(-r)); + } + + cfuse->iput(in); // iput required +} + +static void fuse_ll_read(fuse_req_t req, fuse_ino_t ino, size_t size, off_t off, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast<Fh*>(fi->fh); + bufferlist bl; + int r = cfuse->client->ll_read(fh, off, size, &bl); + if (r >= 0) { + vector<iovec> iov; + size_t len; + struct fuse_bufvec *bufv; + + if (bl.get_num_buffers() > IOV_MAX) + bl.rebuild(); + + bl.prepare_iov(&iov); + len = sizeof(struct fuse_bufvec) + sizeof(struct fuse_buf) * (iov.size() - 1); + bufv = (struct fuse_bufvec *)calloc(1, len); + if (bufv) { + int i = 0; + bufv->count = iov.size(); + for (auto &v: iov) { + bufv->buf[i].mem = v.iov_base; + bufv->buf[i++].size = v.iov_len; + } + fuse_reply_data(req, bufv, FUSE_BUF_SPLICE_MOVE); + free(bufv); + return; + } + iov.insert(iov.begin(), {0}); // the first one is reserved for fuse_out_header + fuse_reply_iov(req, &iov[0], iov.size()); + } else + fuse_reply_err(req, get_sys_errno(-r)); +} + +static void fuse_ll_write(fuse_req_t req, fuse_ino_t ino, const char *buf, + size_t size, off_t off, struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast<Fh*>(fi->fh); + int r = cfuse->client->ll_write(fh, off, size, buf); + if (r >= 0) + fuse_reply_write(req, r); + else + fuse_reply_err(req, get_sys_errno(-r)); +} + +static void fuse_ll_flush(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast<Fh*>(fi->fh); + int r = cfuse->client->ll_flush(fh); + fuse_reply_err(req, get_sys_errno(-r)); +} + +#ifdef FUSE_IOCTL_COMPAT +static void fuse_ll_ioctl(fuse_req_t req, fuse_ino_t ino, +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 5) + unsigned int cmd, +#else + int cmd, +#endif + void *arg, struct fuse_file_info *fi, + unsigned flags, const void *in_buf, size_t in_bufsz, size_t out_bufsz) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + + if (flags & FUSE_IOCTL_COMPAT) { + fuse_reply_err(req, ENOSYS); + return; + } + + switch (static_cast<unsigned>(cmd)) { + case CEPH_IOC_GET_LAYOUT: { + file_layout_t layout; + struct ceph_ioctl_layout l; + Fh *fh = (Fh*)fi->fh; + cfuse->client->ll_file_layout(fh, &layout); + l.stripe_unit = layout.stripe_unit; + l.stripe_count = layout.stripe_count; + l.object_size = layout.object_size; + l.data_pool = layout.pool_id; + fuse_reply_ioctl(req, 0, &l, sizeof(struct ceph_ioctl_layout)); + } + break; + default: + fuse_reply_err(req, EINVAL); + } +} +#endif + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) + +static void fuse_ll_fallocate(fuse_req_t req, fuse_ino_t ino, int mode, + off_t offset, off_t length, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = (Fh*)fi->fh; + int r = cfuse->client->ll_fallocate(fh, mode, offset, length); + fuse_reply_err(req, get_sys_errno(-r)); +} + +#endif + +static void fuse_ll_release(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast<Fh*>(fi->fh); + int r = cfuse->client->ll_release(fh); + fuse_reply_err(req, get_sys_errno(-r)); +} + +static void fuse_ll_fsync(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast<Fh*>(fi->fh); + int r = cfuse->client->ll_fsync(fh, datasync); + fuse_reply_err(req, get_sys_errno(-r)); +} + +struct readdir_context { + fuse_req_t req; + char *buf; + size_t size; + size_t pos; /* in buf */ + uint64_t snap; +}; + +/* + * return 0 on success, -1 if out of space + */ +static int fuse_ll_add_dirent(void *p, struct dirent *de, + struct ceph_statx *stx, off_t next_off, + Inode *in) +{ + struct readdir_context *c = (struct readdir_context *)p; + CephFuse::Handle *cfuse = (CephFuse::Handle *)fuse_req_userdata(c->req); + + struct stat st; + st.st_ino = cfuse->make_fake_ino(stx->stx_ino, c->snap); + st.st_mode = stx->stx_mode; + st.st_rdev = new_encode_dev(stx->stx_rdev); + + size_t room = c->size - c->pos; + size_t entrysize = fuse_add_direntry(c->req, c->buf + c->pos, room, + de->d_name, &st, next_off); + if (entrysize > room) + return -ENOSPC; + + /* success */ + c->pos += entrysize; + return 0; +} + +static void fuse_ll_readdir(fuse_req_t req, fuse_ino_t ino, size_t size, + off_t off, struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + + dir_result_t *dirp = reinterpret_cast<dir_result_t*>(fi->fh); + cfuse->client->seekdir(dirp, off); + + struct readdir_context rc; + rc.req = req; + rc.snap = cfuse->fino_snap(ino); + if (rc.snap == CEPH_MAXSNAP) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + rc.buf = new char[size]; + rc.size = size; + rc.pos = 0; + + int r = cfuse->client->readdir_r_cb(dirp, fuse_ll_add_dirent, &rc); + if (r == 0 || r == -CEPHFS_ENOSPC) /* ignore ENOSPC from our callback */ + fuse_reply_buf(req, rc.buf, rc.pos); + else + fuse_reply_err(req, get_sys_errno(-r)); + delete[] rc.buf; +} + +static void fuse_ll_releasedir(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + dir_result_t *dirp = reinterpret_cast<dir_result_t*>(fi->fh); + cfuse->client->ll_releasedir(dirp); + fuse_reply_err(req, 0); +} + +static void fuse_ll_fsyncdir(fuse_req_t req, fuse_ino_t ino, int datasync, + struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + dir_result_t *dirp = reinterpret_cast<dir_result_t*>(fi->fh); + int r = cfuse->client->ll_fsyncdir(dirp); + fuse_reply_err(req, get_sys_errno(-r)); +} + +static void fuse_ll_access(fuse_req_t req, fuse_ino_t ino, int mask) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int r = cfuse->client->inode_permission(in, perms, mask); + fuse_reply_err(req, get_sys_errno(-r)); + cfuse->iput(in); +} + +static void fuse_ll_create(fuse_req_t req, fuse_ino_t parent, const char *name, + mode_t mode, struct fuse_file_info *fi) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + struct fuse_entry_param fe; + Fh *fh = NULL; + UserPerm perms(ctx->uid, ctx->gid); + Inode *i1 = cfuse->iget(parent), *i2; + if (!i1) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + memset(&fe, 0, sizeof(fe)); + + // pass &i2 for the created inode so that ll_create takes an initial ll_ref + int r = cfuse->client->ll_create(i1, name, mode, fi->flags, &fe.attr, &i2, + &fh, perms); + if (r == 0) { + fi->fh = (uint64_t)fh; + fe.ino = cfuse->make_fake_ino(fe.attr.st_ino, fe.attr.st_dev); +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + auto fuse_disable_pagecache = cfuse->client->cct->_conf.get_val<bool>( + "fuse_disable_pagecache"); + auto fuse_use_invalidate_cb = cfuse->client->cct->_conf.get_val<bool>( + "fuse_use_invalidate_cb"); + if (fuse_disable_pagecache) + fi->direct_io = 1; + else if (fuse_use_invalidate_cb) + fi->keep_cache = 1; +#endif + fuse_reply_create(req, &fe, fi); + } else + fuse_reply_err(req, get_sys_errno(-r)); + // XXX NB, we dont iput(i2) because FUSE will do so in a matching + // fuse_ll_forget() + cfuse->iput(i1); // iput required +} + +static void fuse_ll_statfs(fuse_req_t req, fuse_ino_t ino) +{ + struct statvfs stbuf; + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + UserPerm perms(ctx->uid, ctx->gid); + Inode *in = cfuse->iget(ino); + if (!in) { + fuse_reply_err(req, get_sys_errno(CEPHFS_EINVAL)); + return; + } + + get_fuse_groups(perms, req); + + int r = cfuse->client->ll_statfs(in, &stbuf, perms); + if (r == 0) + fuse_reply_statfs(req, &stbuf); + else + fuse_reply_err(req, get_sys_errno(-r)); + + cfuse->iput(in); // iput required +} + +static void fuse_ll_getlk(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, struct flock *lock) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast<Fh*>(fi->fh); + + int r = cfuse->client->ll_getlk(fh, lock, fi->lock_owner); + if (r == 0) + fuse_reply_lock(req, lock); + else + fuse_reply_err(req, get_sys_errno(-r)); +} + +static void fuse_ll_setlk(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, struct flock *lock, int sleep) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = reinterpret_cast<Fh*>(fi->fh); + + // must use multithread if operation may block + auto fuse_multithreaded = cfuse->client->cct->_conf.get_val<bool>( + "fuse_multithreaded"); + if (!fuse_multithreaded && sleep && lock->l_type != F_UNLCK) { + fuse_reply_err(req, EDEADLK); + return; + } + + int r = cfuse->client->ll_setlk(fh, lock, fi->lock_owner, sleep); + fuse_reply_err(req, get_sys_errno(-r)); +} + +static void fuse_ll_interrupt(fuse_req_t req, void* data) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + cfuse->client->ll_interrupt(data); +} + +static void switch_interrupt_cb(void *handle, void* data) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + fuse_req_t req = cfuse->get_fuse_req(); + + if (data) + fuse_req_interrupt_func(req, fuse_ll_interrupt, data); + else + fuse_req_interrupt_func(req, NULL, NULL); +} + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) +static void fuse_ll_flock(fuse_req_t req, fuse_ino_t ino, + struct fuse_file_info *fi, int cmd) +{ + CephFuse::Handle *cfuse = fuse_ll_req_prepare(req); + Fh *fh = (Fh*)fi->fh; + + // must use multithread if operation may block + auto fuse_multithreaded = cfuse->client->cct->_conf.get_val<bool>( + "fuse_multithreaded"); + if (!fuse_multithreaded && !(cmd & (LOCK_NB | LOCK_UN))) { + fuse_reply_err(req, EDEADLK); + return; + } + + int r = cfuse->client->ll_flock(fh, cmd, fi->lock_owner); + fuse_reply_err(req, get_sys_errno(-r)); +} +#endif + +#if !defined(__APPLE__) +static mode_t umask_cb(void *handle) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + fuse_req_t req = cfuse->get_fuse_req(); + const struct fuse_ctx *ctx = fuse_req_ctx(req); + return ctx->umask; +} +#endif + +static void ino_invalidate_cb(void *handle, vinodeno_t vino, int64_t off, + int64_t len) +{ +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + fuse_ino_t fino = cfuse->make_fake_ino(vino.ino, vino.snapid); +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + fuse_lowlevel_notify_inval_inode(cfuse->se, fino, off, len); +#else + fuse_lowlevel_notify_inval_inode(cfuse->ch, fino, off, len); +#endif +#endif +} + +static void dentry_invalidate_cb(void *handle, vinodeno_t dirino, + vinodeno_t ino, const char *name, size_t len) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + fuse_ino_t fdirino = cfuse->make_fake_ino(dirino.ino, dirino.snapid); +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) + fuse_ino_t fino = 0; + if (ino.ino != inodeno_t()) + fino = cfuse->make_fake_ino(ino.ino, ino.snapid); +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + fuse_lowlevel_notify_delete(cfuse->se, fdirino, fino, name, len); +#else + fuse_lowlevel_notify_delete(cfuse->ch, fdirino, fino, name, len); +#endif +#elif FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) + fuse_lowlevel_notify_inval_entry(cfuse->ch, fdirino, name, len); +#endif +} + +static int remount_cb(void *handle) +{ + // used for trimming kernel dcache. when remounting a file system, linux kernel + // trims all unused dentries in the file system + char cmd[128+PATH_MAX]; + CephFuse::Handle *cfuse = (CephFuse::Handle *)handle; + snprintf(cmd, sizeof(cmd), "LIBMOUNT_FSTAB=/dev/null mount -i -o remount %s", +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + cfuse->opts.mountpoint); +#else + cfuse->mountpoint); +#endif + int r = system(cmd); + if (r != 0 && r != -1) { + r = WEXITSTATUS(r); + } + + return r; +} + +static void do_init(void *data, fuse_conn_info *conn) +{ + CephFuse::Handle *cfuse = (CephFuse::Handle *)data; + Client *client = cfuse->client; + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + fuse_apply_conn_info_opts(cfuse->conn_opts, conn); +#endif + + if(conn->capable & FUSE_CAP_SPLICE_MOVE) + conn->want |= FUSE_CAP_SPLICE_MOVE; + +#if !defined(__APPLE__) + if (!client->fuse_default_permissions && client->ll_handle_umask()) { + // apply umask in userspace if posix acl is enabled + if(conn->capable & FUSE_CAP_DONT_MASK) + conn->want |= FUSE_CAP_DONT_MASK; + } + if(conn->capable & FUSE_CAP_EXPORT_SUPPORT) + conn->want |= FUSE_CAP_EXPORT_SUPPORT; +#endif + + if (cfuse->fd_on_success) { + //cout << "fuse init signaling on fd " << fd_on_success << std::endl; + // see Preforker::daemonize(), ceph-fuse's parent process expects a `-1` + // from a daemonized child process. + uint32_t r = -1; + int err = safe_write(cfuse->fd_on_success, &r, sizeof(r)); + if (err) { + derr << "fuse_ll: do_init: safe_write failed with error " + << cpp_strerror(err) << dendl; + ceph_abort(); + } + //cout << "fuse init done signaling on fd " << fd_on_success << std::endl; + + // close stdout, etc. + ::close(0); + ::close(1); + ::close(2); + } +} + +const static struct fuse_lowlevel_ops fuse_ll_oper = { + init: do_init, + destroy: 0, + lookup: fuse_ll_lookup, + forget: fuse_ll_forget, + getattr: fuse_ll_getattr, + setattr: fuse_ll_setattr, + readlink: fuse_ll_readlink, + mknod: fuse_ll_mknod, + mkdir: fuse_ll_mkdir, + unlink: fuse_ll_unlink, + rmdir: fuse_ll_rmdir, + symlink: fuse_ll_symlink, + rename: fuse_ll_rename, + link: fuse_ll_link, + open: fuse_ll_open, + read: fuse_ll_read, + write: fuse_ll_write, + flush: fuse_ll_flush, + release: fuse_ll_release, + fsync: fuse_ll_fsync, + opendir: fuse_ll_opendir, + readdir: fuse_ll_readdir, + releasedir: fuse_ll_releasedir, + fsyncdir: fuse_ll_fsyncdir, + statfs: fuse_ll_statfs, + setxattr: fuse_ll_setxattr, + getxattr: fuse_ll_getxattr, + listxattr: fuse_ll_listxattr, + removexattr: fuse_ll_removexattr, + access: fuse_ll_access, + create: fuse_ll_create, + getlk: fuse_ll_getlk, + setlk: fuse_ll_setlk, + bmap: 0, +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 8) +#ifdef FUSE_IOCTL_COMPAT + ioctl: fuse_ll_ioctl, +#else + ioctl: 0, +#endif + poll: 0, +#endif +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) + write_buf: 0, + retrieve_reply: 0, + forget_multi: 0, + flock: fuse_ll_flock, +#endif +#if FUSE_VERSION >= FUSE_MAKE_VERSION(2, 9) + fallocate: fuse_ll_fallocate +#endif +}; + + +CephFuse::Handle::Handle(Client *c, int fd) : + fd_on_success(fd), + client(c) +{ + memset(&args, 0, sizeof(args)); +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + memset(&opts, 0, sizeof(opts)); +#endif +} + +CephFuse::Handle::~Handle() +{ + fuse_opt_free_args(&args); +} + +void CephFuse::Handle::finalize() +{ +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + if (se) { + fuse_remove_signal_handlers(se); + fuse_session_unmount(se); + fuse_session_destroy(se); + } + if (conn_opts) + free(conn_opts); + if (opts.mountpoint) + free(opts.mountpoint); +#else + if (se) + fuse_remove_signal_handlers(se); + if (ch) + fuse_session_remove_chan(ch); + if (se) + fuse_session_destroy(se); + if (ch) + fuse_unmount(mountpoint, ch); +#endif + + pthread_key_delete(fuse_req_key); +} + +int CephFuse::Handle::init(int argc, const char *argv[]) +{ + + int r = pthread_key_create(&fuse_req_key, NULL); + if (r) { + derr << "pthread_key_create failed." << dendl; + return r; + } + + // set up fuse argc/argv + int newargc = 0; + const char **newargv = (const char **) malloc((argc + 17) * sizeof(char *)); + if(!newargv) + return ENOMEM; + + newargv[newargc++] = argv[0]; + newargv[newargc++] = "-f"; // stay in foreground + + auto fuse_allow_other = client->cct->_conf.get_val<bool>( + "fuse_allow_other"); + auto fuse_default_permissions = client->cct->_conf.get_val<bool>( + "fuse_default_permissions"); +#if FUSE_VERSION < FUSE_MAKE_VERSION(3, 0) + auto fuse_big_writes = client->cct->_conf.get_val<bool>( + "fuse_big_writes"); +#endif + auto fuse_max_write = client->cct->_conf.get_val<Option::size_t>( + "fuse_max_write"); + auto fuse_atomic_o_trunc = client->cct->_conf.get_val<bool>( + "fuse_atomic_o_trunc"); + auto fuse_splice_read = client->cct->_conf.get_val<bool>( + "fuse_splice_read"); + auto fuse_splice_write = client->cct->_conf.get_val<bool>( + "fuse_splice_write"); + auto fuse_splice_move = client->cct->_conf.get_val<bool>( + "fuse_splice_move"); + auto fuse_debug = client->cct->_conf.get_val<bool>( + "fuse_debug"); + + if (fuse_allow_other) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "allow_other"; + } + if (fuse_default_permissions) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "default_permissions"; + } +#if defined(__linux__) +#if FUSE_VERSION < FUSE_MAKE_VERSION(3, 0) + if (fuse_big_writes) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "big_writes"; + } +#endif + if (fuse_max_write > 0) { + char strsplice[65]; + newargv[newargc++] = "-o"; + sprintf(strsplice, "max_write=%zu", (size_t)fuse_max_write); + newargv[newargc++] = strsplice; + } + if (fuse_atomic_o_trunc) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "atomic_o_trunc"; + } + if (fuse_splice_read) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "splice_read"; + } + if (fuse_splice_write) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "splice_write"; + } + if (fuse_splice_move) { + newargv[newargc++] = "-o"; + newargv[newargc++] = "splice_move"; + } +#endif + if (fuse_debug) + newargv[newargc++] = "-d"; + + for (int argctr = 1; argctr < argc; argctr++) + newargv[newargc++] = argv[argctr]; + + derr << "init, newargv = " << newargv << " newargc=" << newargc << dendl; + struct fuse_args a = FUSE_ARGS_INIT(newargc, (char**)newargv); + args = a; // Roundabout construction b/c FUSE_ARGS_INIT is for initialization not assignment + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + if (fuse_parse_cmdline(&args, &opts) == -1) { +#else + if (fuse_parse_cmdline(&args, &mountpoint, NULL, NULL) == -1) { +#endif + derr << "fuse_parse_cmdline failed." << dendl; + fuse_opt_free_args(&args); + free(newargv); + return EINVAL; + } + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + derr << "init, args.argv = " << args.argv << " args.argc=" << args.argc << dendl; + conn_opts = fuse_parse_conn_info_opts(&args); + if (!conn_opts) { + derr << "fuse_parse_conn_info_opts failed" << dendl; + fuse_opt_free_args(&args); + free(newargv); + return EINVAL; + } +#endif + + ceph_assert(args.allocated); // Checking fuse has realloc'd args so we can free newargv + free(newargv); + + struct ceph_client_callback_args cb_args = { + handle: this, + ino_cb: client->cct->_conf.get_val<bool>("fuse_use_invalidate_cb") ? + ino_invalidate_cb : NULL, + dentry_cb: dentry_invalidate_cb, + switch_intr_cb: switch_interrupt_cb, +#if defined(__linux__) + remount_cb: remount_cb, +#endif +#if !defined(__APPLE__) + umask_cb: umask_cb, +#endif + }; + r = client->ll_register_callbacks2(&cb_args); + if (r) { + derr << "registering callbacks failed: " << r << dendl; + return r; + } + + return 0; +} + +int CephFuse::Handle::start() +{ + bool is_mounted = false; +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + int err = already_fuse_mounted(opts.mountpoint, is_mounted); +#else + int err = already_fuse_mounted(mountpoint, is_mounted); +#endif + if (err) { + return err; + } + + if (is_mounted) { + return EBUSY; + } + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + se = fuse_session_new(&args, &fuse_ll_oper, sizeof(fuse_ll_oper), this); + if (!se) { + derr << "fuse_session_new failed" << dendl; + return EDOM; + } +#else + ch = fuse_mount(mountpoint, &args); + if (!ch) { + derr << "fuse_mount(mountpoint=" << mountpoint << ") failed." << dendl; + return EIO; + } + + se = fuse_lowlevel_new(&args, &fuse_ll_oper, sizeof(fuse_ll_oper), this); + if (!se) { + derr << "fuse_lowlevel_new failed" << dendl; + return EDOM; + } +#endif + + signal(SIGTERM, SIG_DFL); + signal(SIGINT, SIG_DFL); + if (fuse_set_signal_handlers(se) == -1) { + derr << "fuse_set_signal_handlers failed" << dendl; + return ENOSYS; + } + +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + if (fuse_session_mount(se, opts.mountpoint) != 0) { + derr << "fuse_session_mount failed" << dendl; + return ENOSYS; + } +#else + fuse_session_add_chan(se, ch); +#endif + + return 0; +} + +int CephFuse::Handle::loop() +{ + auto fuse_multithreaded = client->cct->_conf.get_val<bool>( + "fuse_multithreaded"); + if (fuse_multithreaded) { +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 12) + { + struct fuse_loop_config *conf = fuse_loop_cfg_create(); + ceph_assert(conf != nullptr); + + fuse_loop_cfg_set_clone_fd(conf, opts.clone_fd); + fuse_loop_cfg_set_idle_threads(conf, opts.max_idle_threads); + fuse_loop_cfg_set_max_threads(conf, opts.max_threads); + + int r = fuse_session_loop_mt(se, conf); + + fuse_loop_cfg_destroy(conf); + return r; + } +#elif FUSE_VERSION >= FUSE_MAKE_VERSION(3, 1) + { + struct fuse_loop_config conf = { + clone_fd: opts.clone_fd, + max_idle_threads: opts.max_idle_threads + }; + return fuse_session_loop_mt(se, &conf); + } +#elif FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + return fuse_session_loop_mt(se, opts.clone_fd); +#else + return fuse_session_loop_mt(se); +#endif + } else { + return fuse_session_loop(se); + } +} + +uint64_t CephFuse::Handle::fino_snap(uint64_t fino) +{ + if (fino == FUSE_ROOT_ID) + return CEPH_NOSNAP; + + if (client->use_faked_inos()) { + vinodeno_t vino = client->map_faked_ino(fino); + return vino.snapid; + } else { + std::lock_guard l(stag_lock); + uint64_t stag = FINO_STAG(fino); + if (stag == 0) + return CEPH_NOSNAP; + else if (stag == 1) + return CEPH_SNAPDIR; + + inodeno_t ino = FINO_INO(fino); + + // does the fino_maps for the ino exist ? + if (!g_fino_maps.count(ino)) + return CEPH_MAXSNAP; + + auto &fino_maps = g_fino_maps[ino]; + + // does the stagid <--> snapid map exist ? + if (!fino_maps.stag_snap_map.count(stag)) + return CEPH_MAXSNAP; + + // get the snapid + return fino_maps.stag_snap_map[stag]; + } +} + +Inode * CephFuse::Handle::iget(fuse_ino_t fino) +{ + if (fino == FUSE_ROOT_ID) + return client->get_root(); + + if (client->use_faked_inos()) { + return client->ll_get_inode((ino_t)fino); + } else { + uint64_t snap = fino_snap(fino); + if (snap == CEPH_MAXSNAP) + return NULL; + vinodeno_t vino(FINO_INO(fino), snap); + return client->ll_get_inode(vino); + } +} + +void CephFuse::Handle::iput(Inode *in) +{ + client->ll_put(in); +} + +uint64_t CephFuse::Handle::make_fake_ino(inodeno_t ino, snapid_t snapid) +{ + if (client->use_faked_inos()) { + // already faked by libcephfs + if (ino == client->get_root_ino()) + return FUSE_ROOT_ID; + + return ino; + } else { + if (snapid == CEPH_NOSNAP && ino == client->get_root_ino()) + return FUSE_ROOT_ID; + + int stag; + if (snapid == CEPH_NOSNAP) { + stag = G_NOSNAP_STAG; + } else if (snapid == CEPH_SNAPDIR) { + stag = G_SNAPDIR_STAG; + } else { + std::lock_guard l(stag_lock); + auto &fino_maps = g_fino_maps[ino]; // will insert it anyway if not exists + + // already exist ? + if (fino_maps.snap_stag_map.count(snapid)) { + inodeno_t fino = MAKE_FINO(ino, fino_maps.snap_stag_map[snapid]); + return fino; + } + + // create a new snapid <--> stagid map + int first = fino_maps.last_stag & STAG_MASK; + stag = (++fino_maps.last_stag) & STAG_MASK; + for (; stag != first; stag = (++fino_maps.last_stag) & STAG_MASK) { + // stag 0 is reserved for CEPH_NOSNAP and 1 for CEPH_SNAPDIR + if (stag == 0 || stag == 1) + continue; + + // the new stag is not used ? + if (!fino_maps.stag_snap_map.count(stag)) { + fino_maps.snap_stag_map[snapid] = stag; + fino_maps.stag_snap_map[stag] = snapid; + break; + } + + // the stag is already used by a snpaid, + // try to free it + auto _snapid = fino_maps.stag_snap_map[stag]; + if (!client->ll_get_snap_ref(_snapid)) { + fino_maps.snap_stag_map.erase(_snapid); + fino_maps.snap_stag_map[snapid] = stag; + fino_maps.stag_snap_map[stag] = snapid; + break; + } + } + if (stag == first) { + /* + * It shouldn't be here because the max snapshots for each + * directory is 4_K, and here we have around 64_K, which is + * from 0xffff - 2, stags could be used for each directory. + * + * More detail please see mds 'mds_max_snaps_per_dir' option. + */ + ceph_abort_msg("run out of stag"); + } + } + + inodeno_t fino = MAKE_FINO(ino, stag); + //cout << "make_fake_ino " << ino << "." << snapid << " -> " << fino << std::endl; + return fino; + } +} + +void CephFuse::Handle::set_fuse_req(fuse_req_t req) +{ + pthread_setspecific(fuse_req_key, (void*)req); +} + +fuse_req_t CephFuse::Handle::get_fuse_req() +{ + return (fuse_req_t) pthread_getspecific(fuse_req_key); +} + + +CephFuse::CephFuse(Client *c, int fd) : _handle(new CephFuse::Handle(c, fd)) +{ +} + +CephFuse::~CephFuse() +{ + delete _handle; +} + +int CephFuse::init(int argc, const char *argv[]) +{ + return _handle->init(argc, argv); +} + +int CephFuse::start() +{ + return _handle->start(); +} + +int CephFuse::loop() +{ + return _handle->loop(); +} + +void CephFuse::finalize() +{ + return _handle->finalize(); +} + +std::string CephFuse::get_mount_point() const +{ +#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0) + if (_handle->opts.mountpoint) { + return _handle->opts.mountpoint; +#else + if (_handle->mountpoint) { + return _handle->mountpoint; +#endif + } else { + return ""; + } +} |