summaryrefslogtreecommitdiffstats
path: root/src/include/cephfs
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-21 11:54:28 +0000
commite6918187568dbd01842d8d1d2c808ce16a894239 (patch)
tree64f88b554b444a49f656b6c656111a145cbbaa28 /src/include/cephfs
parentInitial commit. (diff)
downloadceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz
ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/include/cephfs')
-rw-r--r--src/include/cephfs/ceph_ll_client.h215
-rw-r--r--src/include/cephfs/libcephfs.h2201
-rw-r--r--src/include/cephfs/metrics/Types.h699
-rw-r--r--src/include/cephfs/types.h970
4 files changed, 4085 insertions, 0 deletions
diff --git a/src/include/cephfs/ceph_ll_client.h b/src/include/cephfs/ceph_ll_client.h
new file mode 100644
index 000000000..ac5b7c224
--- /dev/null
+++ b/src/include/cephfs/ceph_ll_client.h
@@ -0,0 +1,215 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * scalable distributed file system
+ *
+ * Copyright (C) Jeff Layton <jlayton@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ */
+
+#ifndef CEPH_CEPH_LL_CLIENT_H
+#define CEPH_CEPH_LL_CLIENT_H
+#include <stdint.h>
+
+#ifdef _WIN32
+#include "include/win32/fs_compat.h"
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+
+class Fh;
+
+struct inodeno_t;
+struct vinodeno_t;
+typedef struct vinodeno_t vinodeno;
+
+#else /* __cplusplus */
+
+typedef struct Fh Fh;
+
+typedef struct inodeno_t {
+ uint64_t val;
+} inodeno_t;
+
+typedef struct _snapid_t {
+ uint64_t val;
+} snapid_t;
+
+typedef struct vinodeno_t {
+ inodeno_t ino;
+ snapid_t snapid;
+} vinodeno_t;
+
+#endif /* __cplusplus */
+
+/*
+ * Heavily borrowed from David Howells' draft statx patchset.
+ *
+ * Since the xstat patches are still a work in progress, we borrow its data
+ * structures and #defines to implement ceph_getattrx. Once the xstat stuff
+ * has been merged we should drop this and switch over to using that instead.
+ */
+struct ceph_statx {
+ uint32_t stx_mask;
+ uint32_t stx_blksize;
+ uint32_t stx_nlink;
+ uint32_t stx_uid;
+ uint32_t stx_gid;
+ uint16_t stx_mode;
+ uint64_t stx_ino;
+ uint64_t stx_size;
+ uint64_t stx_blocks;
+ dev_t stx_dev;
+ dev_t stx_rdev;
+ struct timespec stx_atime;
+ struct timespec stx_ctime;
+ struct timespec stx_mtime;
+ struct timespec stx_btime;
+ uint64_t stx_version;
+};
+
+#define CEPH_STATX_MODE 0x00000001U /* Want/got stx_mode */
+#define CEPH_STATX_NLINK 0x00000002U /* Want/got stx_nlink */
+#define CEPH_STATX_UID 0x00000004U /* Want/got stx_uid */
+#define CEPH_STATX_GID 0x00000008U /* Want/got stx_gid */
+#define CEPH_STATX_RDEV 0x00000010U /* Want/got stx_rdev */
+#define CEPH_STATX_ATIME 0x00000020U /* Want/got stx_atime */
+#define CEPH_STATX_MTIME 0x00000040U /* Want/got stx_mtime */
+#define CEPH_STATX_CTIME 0x00000080U /* Want/got stx_ctime */
+#define CEPH_STATX_INO 0x00000100U /* Want/got stx_ino */
+#define CEPH_STATX_SIZE 0x00000200U /* Want/got stx_size */
+#define CEPH_STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */
+#define CEPH_STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */
+#define CEPH_STATX_BTIME 0x00000800U /* Want/got stx_btime */
+#define CEPH_STATX_VERSION 0x00001000U /* Want/got stx_version */
+#define CEPH_STATX_ALL_STATS 0x00001fffU /* All supported stats */
+
+/*
+ * Compatibility macros until these defines make their way into glibc
+ */
+#ifndef AT_STATX_DONT_SYNC
+#define AT_STATX_SYNC_TYPE 0x6000
+#define AT_STATX_SYNC_AS_STAT 0x0000
+#define AT_STATX_FORCE_SYNC 0x2000
+#define AT_STATX_DONT_SYNC 0x4000 /* Don't sync attributes with the server */
+#endif
+
+/*
+ * This is deprecated and just for backwards compatibility.
+ * Please use AT_STATX_DONT_SYNC instead.
+ */
+#define AT_NO_ATTR_SYNC AT_STATX_DONT_SYNC /* Deprecated */
+
+/*
+ * The statx interfaces only allow these flags. In order to allow us to add
+ * others in the future, we disallow setting any that aren't recognized.
+ */
+#define CEPH_REQ_FLAG_MASK (AT_SYMLINK_NOFOLLOW|AT_STATX_DONT_SYNC)
+
+/* fallocate mode flags */
+#ifndef FALLOC_FL_KEEP_SIZE
+#define FALLOC_FL_KEEP_SIZE 0x01
+#endif
+#ifndef FALLOC_FL_PUNCH_HOLE
+#define FALLOC_FL_PUNCH_HOLE 0x02
+#endif
+
+/** ceph_deleg_cb_t: Delegation recalls
+ *
+ * Called when there is an outstanding Delegation and there is conflicting
+ * access, either locally or via cap activity.
+ * @fh: open filehandle
+ * @priv: private info registered when delegation was acquired
+ */
+typedef void (*ceph_deleg_cb_t)(Fh *fh, void *priv);
+
+/**
+ * client_ino_callback_t: Inode data/metadata invalidation
+ *
+ * Called when the client wants to invalidate the cached data for a range
+ * in the file.
+ * @handle: client callback handle
+ * @ino: vino of inode to be invalidated
+ * @off: starting offset of content to be invalidated
+ * @len: length of region to invalidate
+ */
+typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino,
+ int64_t off, int64_t len);
+
+/**
+ * client_dentry_callback_t: Dentry invalidation
+ *
+ * Called when the client wants to purge a dentry from its cache.
+ * @handle: client callback handle
+ * @dirino: vino of directory that contains dentry to be invalidate
+ * @ino: vino of inode attached to dentry to be invalidated
+ * @name: name of dentry to be invalidated
+ * @len: length of @name
+ */
+typedef void (*client_dentry_callback_t)(void *handle, vinodeno_t dirino,
+ vinodeno_t ino, const char *name,
+ size_t len);
+
+/**
+ * client_remount_callback_t: Remount entire fs
+ *
+ * Called when the client needs to purge the dentry cache and the application
+ * doesn't have a way to purge an individual dentry. Mostly used for ceph-fuse
+ * on older kernels.
+ * @handle: client callback handle
+ */
+
+typedef int (*client_remount_callback_t)(void *handle);
+
+/**
+ * client_switch_interrupt_callback_t: Lock request interrupted
+ *
+ * Called before file lock request to set the interrupt handler while waiting
+ * After the wait, called with "data" set to NULL pointer.
+ * @handle: client callback handle
+ * @data: opaque data passed to interrupt before call, NULL pointer after.
+ */
+typedef void (*client_switch_interrupt_callback_t)(void *handle, void *data);
+
+/**
+ * client_umask_callback_t: Fetch umask of actor
+ *
+ * Called when the client needs the umask of the requestor.
+ * @handle: client callback handle
+ */
+typedef mode_t (*client_umask_callback_t)(void *handle);
+
+/**
+ * client_ino_release_t: Request that application release Inode references
+ *
+ * Called when the MDS wants to trim caps and Inode records.
+ * @handle: client callback handle
+ * @ino: vino of Inode being released
+ */
+typedef void (*client_ino_release_t)(void *handle, vinodeno_t ino);
+
+/*
+ * The handle is an opaque value that gets passed to some callbacks. Any fields
+ * set to NULL will be left alone. There is no way to unregister callbacks.
+ */
+struct ceph_client_callback_args {
+ void *handle;
+ client_ino_callback_t ino_cb;
+ client_dentry_callback_t dentry_cb;
+ client_switch_interrupt_callback_t switch_intr_cb;
+ client_remount_callback_t remount_cb;
+ client_umask_callback_t umask_cb;
+ client_ino_release_t ino_release_cb;
+};
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* CEPH_STATX_H */
+
diff --git a/src/include/cephfs/libcephfs.h b/src/include/cephfs/libcephfs.h
new file mode 100644
index 000000000..dc62698fa
--- /dev/null
+++ b/src/include/cephfs/libcephfs.h
@@ -0,0 +1,2201 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2009-2011 New Dream Network
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIB_H
+#define CEPH_LIB_H
+
+#if defined(__linux__)
+#include <features.h>
+#endif
+#include <utime.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/statvfs.h>
+#include <sys/socket.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <dirent.h>
+
+#include "ceph_ll_client.h"
+
+#ifdef __cplusplus
+namespace ceph::common {
+ class CephContext;
+}
+using CephContext = ceph::common::CephContext;
+extern "C" {
+#endif
+
+#define LIBCEPHFS_VER_MAJOR 10
+#define LIBCEPHFS_VER_MINOR 0
+#define LIBCEPHFS_VER_EXTRA 3
+
+#define LIBCEPHFS_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra)
+#define LIBCEPHFS_VERSION_CODE LIBCEPHFS_VERSION(LIBCEPHFS_VER_MAJOR, LIBCEPHFS_VER_MINOR, LIBCEPHFS_VER_EXTRA)
+
+#if __GNUC__ >= 4
+ #define LIBCEPHFS_DEPRECATED __attribute__((deprecated))
+ #pragma GCC diagnostic push
+ #pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+#else
+ #define LIBCEPHFS_DEPRECATED
+#endif
+
+/*
+ * If using glibc check that file offset is 64-bit.
+ */
+#if defined(__GLIBC__) && !defined(__USE_FILE_OFFSET64)
+# error libceph: glibc must define __USE_FILE_OFFSET64 or readdir results will be corrupted
+#endif
+
+/*
+ * XXXX redeclarations from ceph_fs.h, rados.h, etc. We need more of this
+ * in the interface, but shouldn't be re-typing it (and using different
+ * C data types).
+ */
+#ifndef __cplusplus
+
+#define CEPH_INO_ROOT 1
+#define CEPH_NOSNAP ((uint64_t)(-2))
+
+struct ceph_file_layout {
+ /* file -> object mapping */
+ uint32_t fl_stripe_unit; /* stripe unit, in bytes. must be multiple
+ of page size. */
+ uint32_t fl_stripe_count; /* over this many objects */
+ uint32_t fl_object_size; /* until objects are this big, then move to
+ new objects */
+ uint32_t fl_cas_hash; /* 0 = none; 1 = sha256 */
+
+ /* pg -> disk layout */
+ uint32_t fl_object_stripe_unit; /* for per-object parity, if any */
+
+ /* object -> pg layout */
+ uint32_t fl_pg_preferred; /* preferred primary for pg (-1 for none) */
+ uint32_t fl_pg_pool; /* namespace, crush rule, rep level */
+} __attribute__ ((packed));
+
+struct CephContext;
+#endif /* ! __cplusplus */
+
+struct UserPerm;
+typedef struct UserPerm UserPerm;
+
+struct Inode;
+typedef struct Inode Inode;
+
+struct ceph_mount_info;
+struct ceph_dir_result;
+
+// user supplied key,value pair to be associated with a snapshot.
+// callers can supply an array of this struct via ceph_mksnap().
+struct snap_metadata {
+ const char *key;
+ const char *value;
+};
+
+struct snap_info {
+ uint64_t id;
+ size_t nr_snap_metadata;
+ struct snap_metadata *snap_metadata;
+};
+
+struct ceph_snapdiff_entry_t {
+ struct dirent dir_entry;
+ uint64_t snapid; //should be snapid_t but prefer not to exposure it
+};
+
+/* setattr mask bits (up to an int in size) */
+#ifndef CEPH_SETATTR_MODE
+#define CEPH_SETATTR_MODE (1 << 0)
+#define CEPH_SETATTR_UID (1 << 1)
+#define CEPH_SETATTR_GID (1 << 2)
+#define CEPH_SETATTR_MTIME (1 << 3)
+#define CEPH_SETATTR_ATIME (1 << 4)
+#define CEPH_SETATTR_SIZE (1 << 5)
+#define CEPH_SETATTR_CTIME (1 << 6)
+#define CEPH_SETATTR_MTIME_NOW (1 << 7)
+#define CEPH_SETATTR_ATIME_NOW (1 << 8)
+#define CEPH_SETATTR_BTIME (1 << 9)
+#define CEPH_SETATTR_KILL_SGUID (1 << 10)
+#define CEPH_SETATTR_FSCRYPT_AUTH (1 << 11)
+#define CEPH_SETATTR_FSCRYPT_FILE (1 << 12)
+#define CEPH_SETATTR_KILL_SUID (1 << 13)
+#define CEPH_SETATTR_KILL_SGID (1 << 14)
+#endif
+
+/* define error codes for the mount function*/
+# define CEPHFS_ERROR_MON_MAP_BUILD 1000
+# define CEPHFS_ERROR_NEW_CLIENT 1002
+# define CEPHFS_ERROR_MESSENGER_START 1003
+
+/**
+ * Create a UserPerm credential object.
+ *
+ * Some calls (most notably, the ceph_ll_* ones), take a credential object
+ * that represents the credentials that the calling program is using. This
+ * function creates a new credential object for this purpose. Returns a
+ * pointer to the object, or NULL if it can't be allocated.
+ *
+ * Note that the gidlist array is used directly and is not copied. It must
+ * remain valid over the lifetime of the created UserPerm object.
+ *
+ * @param uid uid to be used
+ * @param gid gid to be used
+ * @param ngids number of gids in supplemental grouplist
+ * @param gidlist array of gid_t's in the list of groups
+ */
+UserPerm *ceph_userperm_new(uid_t uid, gid_t gid, int ngids, gid_t *gidlist);
+
+/**
+ * Destroy a UserPerm credential object.
+ *
+ * @param perm pointer to object to be destroyed
+ *
+ * Currently this just frees the object. Note that the gidlist array is not
+ * freed. The caller must do so if it's necessary.
+ */
+void ceph_userperm_destroy(UserPerm *perm);
+
+/**
+ * Get a pointer to the default UserPerm object for the mount.
+ *
+ * @param cmount the mount info handle
+ *
+ * Every cmount has a default set of credentials. This returns a pointer to
+ * that object.
+ *
+ * Unlike with ceph_userperm_new, this object should not be freed.
+ */
+struct UserPerm *ceph_mount_perms(struct ceph_mount_info *cmount);
+
+/**
+ * Set cmount's default permissions
+ *
+ * @param cmount the mount info handle
+ * @param perm permissions to set to default for mount
+ *
+ * Every cmount has a default set of credentials. This does a deep copy of
+ * the given permissions to the ones in the cmount. Must be done after
+ * ceph_init but before ceph_mount.
+ *
+ * Returns 0 on success, and -EISCONN if the cmount is already mounted.
+ */
+int ceph_mount_perms_set(struct ceph_mount_info *cmount, UserPerm *perm);
+
+/**
+ * @defgroup libcephfs_h_init Setup and Teardown
+ * These are the first and last functions that should be called
+ * when using libcephfs.
+ *
+ * @{
+ */
+
+/**
+ * Get the version of libcephfs.
+ *
+ * The version number is major.minor.patch.
+ *
+ * @param major where to store the major version number
+ * @param minor where to store the minor version number
+ * @param patch where to store the extra version number
+ */
+const char *ceph_version(int *major, int *minor, int *patch);
+
+/**
+ * Create a mount handle for interacting with Ceph. All libcephfs
+ * functions operate on a mount info handle.
+ *
+ * @param cmount the mount info handle to initialize
+ * @param id the id of the client. This can be a unique id that identifies
+ * this client, and will get appended onto "client.". Callers can
+ * pass in NULL, and the id will be the process id of the client.
+ * @returns 0 on success, negative error code on failure
+ */
+int ceph_create(struct ceph_mount_info **cmount, const char * const id);
+
+/**
+ * Create a mount handle from a CephContext, which holds the configuration
+ * for the ceph cluster. A CephContext can be acquired from an existing ceph_mount_info
+ * handle, using the @ref ceph_get_mount_context call. Note that using the same CephContext
+ * for two different mount handles results in the same client entity id being used.
+ *
+ * @param cmount the mount info handle to initialize
+ * @param conf reuse this pre-existing CephContext config
+ * @returns 0 on success, negative error code on failure
+ */
+#ifdef __cplusplus
+int ceph_create_with_context(struct ceph_mount_info **cmount, CephContext *conf);
+#else
+int ceph_create_with_context(struct ceph_mount_info **cmount, struct CephContext *conf);
+#endif
+
+#ifndef VOIDPTR_RADOS_T
+#define VOIDPTR_RADOS_T
+typedef void *rados_t;
+#endif // VOIDPTR_RADOS_T
+
+/**
+ * Create a mount handle from a rados_t, for using libcephfs in the
+ * same process as librados.
+ *
+ * @param cmount the mount info handle to initialize
+ * @param cluster reference to already-initialized librados handle
+ * @returns 0 on success, negative error code on failure
+ */
+int ceph_create_from_rados(struct ceph_mount_info **cmount, rados_t cluster);
+
+/**
+ * Initialize the filesystem client (but do not mount the filesystem yet)
+ *
+ * @returns 0 on success, negative error code on failure
+ */
+int ceph_init(struct ceph_mount_info *cmount);
+
+/**
+ * Optionally set which filesystem to mount, before calling mount.
+ *
+ * An error will be returned if this libcephfs instance is already
+ * mounted. This function is an alternative to setting the global
+ * client_fs setting. Using this function enables multiple libcephfs
+ * instances in the same process to mount different filesystems.
+ *
+ * The filesystem name is *not* validated in this function. That happens
+ * during mount(), where an ENOENT error will result if a non-existent
+ * filesystem was specified here.
+ *
+ * @param cmount the mount info handle
+ * @returns 0 on success, negative error code on failure
+ */
+int ceph_select_filesystem(struct ceph_mount_info *cmount, const char *fs_name);
+
+
+/**
+ * Perform a mount using the path for the root of the mount.
+ *
+ * It is optional to call ceph_init before this. If ceph_init has
+ * not already been called, it will be called in the course of this operation.
+ *
+ * @param cmount the mount info handle
+ * @param root the path for the root of the mount. This can be an existing
+ * directory within the ceph cluster, but most likely it will
+ * be "/". Passing in NULL is equivalent to "/".
+ * @returns 0 on success, negative error code on failure
+ */
+int ceph_mount(struct ceph_mount_info *cmount, const char *root);
+
+/**
+ * Return cluster ID for a mounted ceph filesystem
+ *
+ * Every ceph filesystem has a filesystem ID associated with it. This
+ * function returns that value. If the ceph_mount_info does not refer to a
+ * mounted filesystem, this returns a negative error code.
+ */
+int64_t ceph_get_fs_cid(struct ceph_mount_info *cmount);
+
+/**
+ * Execute a management command remotely on an MDS.
+ *
+ * Must have called ceph_init or ceph_mount before calling this.
+ *
+ * @param mds_spec string representing rank, MDS name, GID or '*'
+ * @param cmd array of null-terminated strings
+ * @param cmdlen length of cmd array
+ * @param inbuf non-null-terminated input data to command
+ * @param inbuflen length in octets of inbuf
+ * @param outbuf populated with pointer to buffer (command output data)
+ * @param outbuflen length of allocated outbuf
+ * @param outs populated with pointer to buffer (command error strings)
+ * @param outslen length of allocated outs
+ *
+ * @return 0 on success, negative error code on failure
+ *
+ */
+int ceph_mds_command(struct ceph_mount_info *cmount,
+ const char *mds_spec,
+ const char **cmd,
+ size_t cmdlen,
+ const char *inbuf, size_t inbuflen,
+ char **outbuf, size_t *outbuflen,
+ char **outs, size_t *outslen);
+
+/**
+ * Free a buffer, such as those used for output arrays from ceph_mds_command
+ */
+void ceph_buffer_free(char *buf);
+
+/**
+ * Unmount a mount handle.
+ *
+ * @param cmount the mount handle
+ * @return 0 on success, negative error code on failure
+ */
+int ceph_unmount(struct ceph_mount_info *cmount);
+
+/**
+ * Abort mds connections
+ *
+ * @param cmount the mount handle
+ * @return 0 on success, negative error code on failure
+ */
+int ceph_abort_conn(struct ceph_mount_info *cmount);
+
+/**
+ * Destroy the mount handle.
+ *
+ * The handle should not be mounted. This should be called on completion of
+ * all libcephfs functions.
+ *
+ * @param cmount the mount handle
+ * @return 0 on success, negative error code on failure.
+ */
+int ceph_release(struct ceph_mount_info *cmount);
+
+/**
+ * Deprecated. Unmount and destroy the ceph mount handle. This should be
+ * called on completion of all libcephfs functions.
+ *
+ * Equivalent to ceph_unmount() + ceph_release() without error handling.
+ *
+ * @param cmount the mount handle to shutdown
+ */
+void ceph_shutdown(struct ceph_mount_info *cmount);
+
+/**
+ * Return associated client addresses
+ *
+ * @param cmount the mount handle
+ * @param addrs the output addresses
+ * @returns 0 on success, a negative error code on failure
+ * @note the returned addrs should be free by the caller
+ */
+int ceph_getaddrs(struct ceph_mount_info *cmount, char** addrs);
+
+/**
+ * Get a global id for current instance
+ *
+ * The handle should not be mounted. This should be called on completion of
+ * all libcephfs functions.
+ *
+ * @param cmount the mount handle
+ * @returns instance global id
+ */
+uint64_t ceph_get_instance_id(struct ceph_mount_info *cmount);
+
+/**
+ * Extract the CephContext from the mount point handle.
+ *
+ * @param cmount the ceph mount handle to get the context from.
+ * @returns the CephContext associated with the mount handle.
+ */
+#ifdef __cplusplus
+CephContext *ceph_get_mount_context(struct ceph_mount_info *cmount);
+#else
+struct CephContext *ceph_get_mount_context(struct ceph_mount_info *cmount);
+#endif
+/*
+ * Check mount status.
+ *
+ * Return non-zero value if mounted. Otherwise, zero.
+ */
+int ceph_is_mounted(struct ceph_mount_info *cmount);
+
+/** @} init */
+
+/**
+ * @defgroup libcephfs_h_config Config
+ * Functions for manipulating the Ceph configuration at runtime.
+ *
+ * @{
+ */
+
+/**
+ * Load the ceph configuration from the specified config file.
+ *
+ * @param cmount the mount handle to load the configuration into.
+ * @param path_list the configuration file path
+ * @returns 0 on success, negative error code on failure
+ */
+int ceph_conf_read_file(struct ceph_mount_info *cmount, const char *path_list);
+
+/**
+ * Parse the command line arguments and load the configuration parameters.
+ *
+ * @param cmount the mount handle to load the configuration parameters into.
+ * @param argc count of the arguments in argv
+ * @param argv the argument list
+ * @returns 0 on success, negative error code on failure
+ */
+int ceph_conf_parse_argv(struct ceph_mount_info *cmount, int argc, const char **argv);
+
+/**
+ * Configure the cluster handle based on an environment variable
+ *
+ * The contents of the environment variable are parsed as if they were
+ * Ceph command line options. If var is NULL, the CEPH_ARGS
+ * environment variable is used.
+ *
+ * @pre ceph_mount() has not been called on the handle
+ *
+ * @note BUG: this is not threadsafe - it uses a static buffer
+ *
+ * @param cmount handle to configure
+ * @param var name of the environment variable to read
+ * @returns 0 on success, negative error code on failure
+ */
+int ceph_conf_parse_env(struct ceph_mount_info *cmount, const char *var);
+
+/** Sets a configuration value from a string.
+ *
+ * @param cmount the mount handle to set the configuration value on
+ * @param option the configuration option to set
+ * @param value the value of the configuration option to set
+ *
+ * @returns 0 on success, negative error code otherwise.
+ */
+int ceph_conf_set(struct ceph_mount_info *cmount, const char *option, const char *value);
+
+/** Set mount timeout.
+ *
+ * @param cmount mount handle to set the configuration value on
+ * @param timeout mount timeout interval
+ *
+ * @returns 0 on success, negative error code otherwise.
+ */
+int ceph_set_mount_timeout(struct ceph_mount_info *cmount, uint32_t timeout);
+
+/**
+ * Gets the configuration value as a string.
+ *
+ * @param cmount the mount handle to set the configuration value on
+ * @param option the config option to get
+ * @param buf the buffer to fill with the value
+ * @param len the length of the buffer.
+ * @returns the size of the buffer filled in with the value, or negative error code on failure
+ */
+int ceph_conf_get(struct ceph_mount_info *cmount, const char *option, char *buf, size_t len);
+
+/** @} config */
+
+/**
+ * @defgroup libcephfs_h_fsops File System Operations.
+ * Functions for getting/setting file system wide information specific to a particular
+ * mount handle.
+ *
+ * @{
+ */
+
+/**
+ * Perform a statfs on the ceph file system. This call fills in file system wide statistics
+ * into the passed in buffer.
+ *
+ * @param cmount the ceph mount handle to use for performing the statfs.
+ * @param path can be any path within the mounted filesystem
+ * @param stbuf the file system statistics filled in by this function.
+ * @return 0 on success, negative error code otherwise.
+ */
+int ceph_statfs(struct ceph_mount_info *cmount, const char *path, struct statvfs *stbuf);
+
+/**
+ * Synchronize all filesystem data to persistent media.
+ *
+ * @param cmount the ceph mount handle to use for performing the sync_fs.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_sync_fs(struct ceph_mount_info *cmount);
+
+/**
+ * Get the current working directory.
+ *
+ * @param cmount the ceph mount to get the current working directory for.
+ * @returns the path to the current working directory
+ */
+const char* ceph_getcwd(struct ceph_mount_info *cmount);
+
+/**
+ * Change the current working directory.
+ *
+ * @param cmount the ceph mount to change the current working directory for.
+ * @param path the path to the working directory to change into.
+ * @returns 0 on success, negative error code otherwise.
+ */
+int ceph_chdir(struct ceph_mount_info *cmount, const char *path);
+
+/** @} fsops */
+
+/**
+ * @defgroup libcephfs_h_dir Directory Operations.
+ * Functions for manipulating and listing directories.
+ *
+ * @{
+ */
+
+/**
+ * Open the given directory.
+ *
+ * @param cmount the ceph mount handle to use to open the directory
+ * @param name the path name of the directory to open. Must be either an absolute path
+ * or a path relative to the current working directory.
+ * @param dirpp the directory result pointer structure to fill in.
+ * @returns 0 on success or negative error code otherwise.
+ */
+int ceph_opendir(struct ceph_mount_info *cmount, const char *name, struct ceph_dir_result **dirpp);
+
+/**
+ * Open a directory referred to by a file descriptor
+ *
+ * @param cmount the ceph mount handle to use to open the directory
+ * @param dirfd open file descriptor for the directory
+ * @param dirpp the directory result pointer structure to fill in
+ * @returns 0 on success or negative error code otherwise
+ */
+int ceph_fdopendir(struct ceph_mount_info *cmount, int dirfd, struct ceph_dir_result **dirpp);
+
+/**
+ * Close the open directory.
+ *
+ * @param cmount the ceph mount handle to use for closing the directory
+ * @param dirp the directory result pointer (set by ceph_opendir) to close
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_closedir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp);
+
+/**
+ * Get the next entry in an open directory.
+ *
+ * @param cmount the ceph mount handle to use for performing the readdir.
+ * @param dirp the directory stream pointer from an opendir holding the state of the
+ * next entry to return.
+ * @returns the next directory entry or NULL if at the end of the directory (or the directory
+ * is empty. This pointer should not be freed by the caller, and is only safe to
+ * access between return and the next call to ceph_readdir or ceph_closedir.
+ */
+struct dirent * ceph_readdir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp);
+
+/**
+ * A safe version of ceph_readdir, where the directory entry struct is allocated by the caller.
+ *
+ * @param cmount the ceph mount handle to use for performing the readdir.
+ * @param dirp the directory stream pointer from an opendir holding the state of the
+ * next entry to return.
+ * @param de the directory entry pointer filled in with the next directory entry of the dirp state.
+ * @returns 1 if the next entry was filled in, 0 if the end of the directory stream was reached,
+ * and a negative error code on failure.
+ */
+int ceph_readdir_r(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, struct dirent *de);
+
+/**
+ * A safe version of ceph_readdir that also returns the file statistics (readdir+stat).
+ *
+ * @param cmount the ceph mount handle to use for performing the readdir_plus_r.
+ * @param dirp the directory stream pointer from an opendir holding the state of the
+ * next entry to return.
+ * @param de the directory entry pointer filled in with the next directory entry of the dirp state.
+ * @param stx the stats of the file/directory of the entry returned
+ * @param want mask showing desired inode attrs for returned entry
+ * @param flags bitmask of flags to use when filling out attributes
+ * @param out optional returned Inode argument. If non-NULL, then a reference will be taken on
+ * the inode and the pointer set on success.
+ * @returns 1 if the next entry was filled in, 0 if the end of the directory stream was reached,
+ * and a negative error code on failure.
+ */
+int ceph_readdirplus_r(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, struct dirent *de,
+ struct ceph_statx *stx, unsigned want, unsigned flags, struct Inode **out);
+
+struct ceph_snapdiff_info
+{
+ struct ceph_mount_info* cmount;
+ struct ceph_dir_result* dir1; // primary dir entry to build snapdiff for.
+ struct ceph_dir_result* dir_aux; // aux dir entry to identify the second snapshot.
+ // Can point to the parent dir entry if entry-in-question
+ // doesn't exist in the second snapshot
+};
+
+/**
+ * Opens snapdiff stream to get snapshots delta (aka snapdiff).
+ *
+ * @param cmount the ceph mount handle to use for snapdiff retrieval.
+ * @param root_path root path for snapshots-in-question
+ * @param rel_path subpath under the root to build delta for
+ * @param snap1 the first snapshot name
+ * @param snap2 the second snapshot name
+ * @param out resulting snapdiff stream handle to be used for snapdiff results
+ retrieval via ceph_readdir_snapdiff
+ * @returns 0 on success and negative error code otherwise
+ */
+int ceph_open_snapdiff(struct ceph_mount_info* cmount,
+ const char* root_path,
+ const char* rel_path,
+ const char* snap1,
+ const char* snap2,
+ struct ceph_snapdiff_info* out);
+/**
+ * Get the next snapshot delta entry.
+ *
+ * @param info snapdiff stream handle opened via ceph_open_snapdiff()
+ * @param out the next snapdiff entry which includes directory entry and the
+ * entry's snapshot id - later one for emerged/existing entry or
+ * former snapshot id for the removed entry.
+ * @returns >0 on success, 0 if no more entries in the stream and negative
+ * error code otherwise
+ */
+int ceph_readdir_snapdiff(struct ceph_snapdiff_info* snapdiff,
+ struct ceph_snapdiff_entry_t* out);
+/**
+ * Close snapdiff stream.
+ *
+ * @param info snapdiff stream handle opened via ceph_open_snapdiff()
+ * @returns 0 on success and negative error code otherwise
+ */
+int ceph_close_snapdiff(struct ceph_snapdiff_info* snapdiff);
+
+/**
+ * Gets multiple directory entries.
+ *
+ * @param cmount the ceph mount handle to use for performing the getdents.
+ * @param dirp the directory stream pointer from an opendir holding the state of the
+ * next entry/entries to return.
+ * @param name an array of struct dirent that gets filled in with the to fill returned directory entries into.
+ * @param buflen the length of the buffer, which should be the number of dirent structs * sizeof(struct dirent).
+ * @returns the length of the buffer that was filled in, will always be multiples of sizeof(struct dirent), or a
+ * negative error code. If the buffer is not large enough for a single entry, -ERANGE is returned.
+ */
+int ceph_getdents(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, char *name, int buflen);
+
+/**
+ * Gets multiple directory names.
+ *
+ * @param cmount the ceph mount handle to use for performing the getdents.
+ * @param dirp the directory stream pointer from an opendir holding the state of the
+ * next entry/entries to return.
+ * @param name a buffer to fill in with directory entry names.
+ * @param buflen the length of the buffer that can be filled in.
+ * @returns the length of the buffer filled in with entry names, or a negative error code on failure.
+ * If the buffer isn't large enough for a single entry, -ERANGE is returned.
+ */
+int ceph_getdnames(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, char *name, int buflen);
+
+/**
+ * Rewind the directory stream to the beginning of the directory.
+ *
+ * @param cmount the ceph mount handle to use for performing the rewinddir.
+ * @param dirp the directory stream pointer to rewind.
+ */
+void ceph_rewinddir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp);
+
+/**
+ * Get the current position of a directory stream.
+ *
+ * @param cmount the ceph mount handle to use for performing the telldir.
+ * @param dirp the directory stream pointer to get the current position of.
+ * @returns the position of the directory stream. Note that the offsets returned
+ * by ceph_telldir do not have a particular order (cannot be compared with
+ * inequality).
+ */
+int64_t ceph_telldir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp);
+
+/**
+ * Move the directory stream to a position specified by the given offset.
+ *
+ * @param cmount the ceph mount handle to use for performing the seekdir.
+ * @param dirp the directory stream pointer to move.
+ * @param offset the position to move the directory stream to. This offset should be
+ * a value returned by telldir. Note that this value does not refer to the nth
+ * entry in a directory, and can not be manipulated with plus or minus.
+ */
+void ceph_seekdir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, int64_t offset);
+
+/**
+ * Create a directory.
+ *
+ * @param cmount the ceph mount handle to use for making the directory.
+ * @param path the path of the directory to create. This must be either an
+ * absolute path or a relative path off of the current working directory.
+ * @param mode the permissions the directory should have once created.
+ * @returns 0 on success or a negative return code on error.
+ */
+int ceph_mkdir(struct ceph_mount_info *cmount, const char *path, mode_t mode);
+
+/**
+ * Create a directory relative to a file descriptor
+ *
+ * @param cmount the ceph mount handle to use for making the directory.
+ * @param dirfd open file descriptor for a directory (or CEPHFS_AT_FDCWD)
+ * @param relpath the path of the directory to create.
+ * @param mode the permissions the directory should have once created.
+ * @returns 0 on success or a negative return code on error.
+ */
+int ceph_mkdirat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, mode_t mode);
+
+/**
+ * Create a snapshot
+ *
+ * @param cmount the ceph mount handle to use for making the directory.
+ * @param path the path of the directory to create snapshot. This must be either an
+ * absolute path or a relative path off of the current working directory.
+ * @param name snapshot name
+ * @param mode the permissions the directory should have once created.
+ * @param snap_metadata array of snap metadata structs
+ * @param nr_snap_metadata number of snap metadata struct entries
+ * @returns 0 on success or a negative return code on error.
+ */
+int ceph_mksnap(struct ceph_mount_info *cmount, const char *path, const char *name,
+ mode_t mode, struct snap_metadata *snap_metadata, size_t nr_snap_metadata);
+
+/**
+ * Remove a snapshot
+ *
+ * @param cmount the ceph mount handle to use for making the directory.
+ * @param path the path of the directory to create snapshot. This must be either an
+ * absolute path or a relative path off of the current working directory.
+ * @param name snapshot name
+ * @returns 0 on success or a negative return code on error.
+ */
+int ceph_rmsnap(struct ceph_mount_info *cmount, const char *path, const char *name);
+
+/**
+ * Create multiple directories at once.
+ *
+ * @param cmount the ceph mount handle to use for making the directories.
+ * @param path the full path of directories and sub-directories that should
+ * be created.
+ * @param mode the permissions the directory should have once created.
+ * @returns 0 on success or a negative return code on error.
+ */
+int ceph_mkdirs(struct ceph_mount_info *cmount, const char *path, mode_t mode);
+
+/**
+ * Remove a directory.
+ *
+ * @param cmount the ceph mount handle to use for removing directories.
+ * @param path the path of the directory to remove.
+ * @returns 0 on success or a negative return code on error.
+ */
+int ceph_rmdir(struct ceph_mount_info *cmount, const char *path);
+
+/** @} dir */
+
+/**
+ * @defgroup libcephfs_h_links Links and Link Handling.
+ * Functions for creating and manipulating hard links and symbolic inks.
+ *
+ * @{
+ */
+
+/**
+ * Create a link.
+ *
+ * @param cmount the ceph mount handle to use for creating the link.
+ * @param existing the path to the existing file/directory to link to.
+ * @param newname the path to the new file/directory to link from.
+ * @returns 0 on success or a negative return code on error.
+ */
+int ceph_link(struct ceph_mount_info *cmount, const char *existing, const char *newname);
+
+/**
+ * Read a symbolic link.
+ *
+ * @param cmount the ceph mount handle to use for creating the link.
+ * @param path the path to the symlink to read
+ * @param buf the buffer to hold the path of the file that the symlink points to.
+ * @param size the length of the buffer
+ * @returns number of bytes copied on success or negative error code on failure
+ */
+int ceph_readlink(struct ceph_mount_info *cmount, const char *path, char *buf, int64_t size);
+
+/**
+ * Read a symbolic link relative to a file descriptor
+ *
+ * @param cmount the ceph mount handle to use for creating the link.
+ * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD)
+ * @param relpath the path to the symlink to read
+ * @param buf the buffer to hold the path of the file that the symlink points to.
+ * @param size the length of the buffer
+ * @returns number of bytes copied on success or negative error code on failure
+ */
+int ceph_readlinkat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, char *buf,
+ int64_t size);
+
+/**
+ * Creates a symbolic link.
+ *
+ * @param cmount the ceph mount handle to use for creating the symbolic link.
+ * @param existing the path to the existing file/directory to link to.
+ * @param newname the path to the new file/directory to link from.
+ * @returns 0 on success or a negative return code on failure.
+ */
+int ceph_symlink(struct ceph_mount_info *cmount, const char *existing, const char *newname);
+
+/**
+ * Creates a symbolic link relative to a file descriptor
+ *
+ * @param cmount the ceph mount handle to use for creating the symbolic link.
+ * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD)
+ * @param existing the path to the existing file/directory to link to.
+ * @param newname the path to the new file/directory to link from.
+ * @returns 0 on success or a negative return code on failure.
+ */
+int ceph_symlinkat(struct ceph_mount_info *cmount, const char *existing, int dirfd,
+ const char *newname);
+
+/** @} links */
+
+/**
+ * @defgroup libcephfs_h_files File manipulation and handling.
+ * Functions for creating and manipulating files.
+ *
+ * @{
+ */
+
+
+/**
+ * Checks if deleting a file, link or directory is allowed.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param path the path of the file, link or directory.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_may_delete(struct ceph_mount_info *cmount, const char *path);
+
+/**
+ * Removes a file, link, or symbolic link. If the file/link has multiple links to it, the
+ * file will not disappear from the namespace until all references to it are removed.
+ *
+ * @param cmount the ceph mount handle to use for performing the unlink.
+ * @param path the path of the file or link to unlink.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_unlink(struct ceph_mount_info *cmount, const char *path);
+
+/**
+ * Removes a file, link, or symbolic link relative to a file descriptor.
+ * If the file/link has multiple links to it, the file will not
+ * disappear from the namespace until all references to it are removed.
+ *
+ * @param cmount the ceph mount handle to use for performing the unlink.
+ * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD)
+ * @param relpath the path of the file or link to unlink.
+ * @param flags bitfield that can be used to set AT_* modifier flags (only AT_REMOVEDIR)
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_unlinkat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, int flags);
+
+/**
+ * Rename a file or directory.
+ *
+ * @param cmount the ceph mount handle to use for performing the rename.
+ * @param from the path to the existing file or directory.
+ * @param to the new name of the file or directory
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_rename(struct ceph_mount_info *cmount, const char *from, const char *to);
+
+/**
+ * Get an open file's extended statistics and attributes.
+ *
+ * @param cmount the ceph mount handle to use for performing the stat.
+ * @param fd the file descriptor of the file to get statistics of.
+ * @param stx the ceph_statx struct that will be filled in with the file's statistics.
+ * @param want bitfield of CEPH_STATX_* flags showing designed attributes
+ * @param flags bitfield that can be used to set AT_* modifier flags (AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC, AT_STATX_DONT_SYNC and AT_SYMLINK_NOFOLLOW)
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_fstatx(struct ceph_mount_info *cmount, int fd, struct ceph_statx *stx,
+ unsigned int want, unsigned int flags);
+
+/**
+ * Get attributes of a file relative to a file descriptor
+ *
+ * @param cmount the ceph mount handle to use for performing the stat.
+ * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD)
+ * @param relpath to the file/directory to get statistics of
+ * @param stx the ceph_statx struct that will be filled in with the file's statistics.
+ * @param want bitfield of CEPH_STATX_* flags showing designed attributes
+ * @param flags bitfield that can be used to set AT_* modifier flags (AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC, AT_STATX_DONT_SYNC and AT_SYMLINK_NOFOLLOW)
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_statxat(struct ceph_mount_info *cmount, int dirfd, const char *relpath,
+ struct ceph_statx *stx, unsigned int want, unsigned int flags);
+
+/**
+ * Get a file's extended statistics and attributes.
+ *
+ * @param cmount the ceph mount handle to use for performing the stat.
+ * @param path the file or directory to get the statistics of.
+ * @param stx the ceph_statx struct that will be filled in with the file's statistics.
+ * @param want bitfield of CEPH_STATX_* flags showing designed attributes
+ * @param flags bitfield that can be used to set AT_* modifier flags (AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC, AT_STATX_DONT_SYNC and AT_SYMLINK_NOFOLLOW)
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_statx(struct ceph_mount_info *cmount, const char *path, struct ceph_statx *stx,
+ unsigned int want, unsigned int flags);
+
+/**
+ * Get a file's statistics and attributes.
+ *
+ * ceph_stat() is deprecated, use ceph_statx() instead.
+ *
+ * @param cmount the ceph mount handle to use for performing the stat.
+ * @param path the file or directory to get the statistics of.
+ * @param stbuf the stat struct that will be filled in with the file's statistics.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_stat(struct ceph_mount_info *cmount, const char *path, struct stat *stbuf)
+ LIBCEPHFS_DEPRECATED;
+
+/**
+ * Get a file's statistics and attributes, without following symlinks.
+ *
+ * ceph_lstat() is deprecated, use ceph_statx(.., AT_SYMLINK_NOFOLLOW) instead.
+ *
+ * @param cmount the ceph mount handle to use for performing the stat.
+ * @param path the file or directory to get the statistics of.
+ * @param stbuf the stat struct that will be filled in with the file's statistics.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_lstat(struct ceph_mount_info *cmount, const char *path, struct stat *stbuf)
+ LIBCEPHFS_DEPRECATED;
+
+/**
+ * Get the open file's statistics.
+ *
+ * ceph_fstat() is deprecated, use ceph_fstatx() instead.
+ *
+ * @param cmount the ceph mount handle to use for performing the fstat.
+ * @param fd the file descriptor of the file to get statistics of.
+ * @param stbuf the stat struct of the file's statistics, filled in by the
+ * function.
+ * @returns 0 on success or a negative error code on failure
+ */
+int ceph_fstat(struct ceph_mount_info *cmount, int fd, struct stat *stbuf)
+ LIBCEPHFS_DEPRECATED;
+
+/**
+ * Set a file's attributes.
+ *
+ * @param cmount the ceph mount handle to use for performing the setattr.
+ * @param relpath the path to the file/directory to set the attributes of.
+ * @param stx the statx struct that must include attribute values to set on the file.
+ * @param mask a mask of all the CEPH_SETATTR_* values that have been set in the statx struct.
+ * @param flags mask of AT_* flags (only AT_ATTR_NOFOLLOW is respected for now)
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_setattrx(struct ceph_mount_info *cmount, const char *relpath, struct ceph_statx *stx, int mask, int flags);
+
+/**
+ * Set a file's attributes (extended version).
+ *
+ * @param cmount the ceph mount handle to use for performing the setattr.
+ * @param fd the fd of the open file/directory to set the attributes of.
+ * @param stx the statx struct that must include attribute values to set on the file.
+ * @param mask a mask of all the stat values that have been set on the stat struct.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_fsetattrx(struct ceph_mount_info *cmount, int fd, struct ceph_statx *stx, int mask);
+
+/**
+ * Change the mode bits (permissions) of a file/directory.
+ *
+ * @param cmount the ceph mount handle to use for performing the chmod.
+ * @param path the path to the file/directory to change the mode bits on.
+ * @param mode the new permissions to set.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_chmod(struct ceph_mount_info *cmount, const char *path, mode_t mode);
+
+/**
+ * Change the mode bits (permissions) of a file/directory. If the path is a
+ * symbolic link, it's not de-referenced.
+ *
+ * @param cmount the ceph mount handle to use for performing the chmod.
+ * @param path the path of file/directory to change the mode bits on.
+ * @param mode the new permissions to set.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_lchmod(struct ceph_mount_info *cmount, const char *path, mode_t mode);
+
+/**
+ * Change the mode bits (permissions) of an open file.
+ *
+ * @param cmount the ceph mount handle to use for performing the chmod.
+ * @param fd the open file descriptor to change the mode bits on.
+ * @param mode the new permissions to set.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_fchmod(struct ceph_mount_info *cmount, int fd, mode_t mode);
+
+/**
+ * Change the mode bits (permissions) of a file relative to a file descriptor.
+ *
+ * @param cmount the ceph mount handle to use for performing the chown.
+ * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD)
+ * @param relpath the relpath of the file/directory to change the ownership of.
+ * @param mode the new permissions to set.
+ * @param flags bitfield that can be used to set AT_* modifier flags (AT_SYMLINK_NOFOLLOW)
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_chmodat(struct ceph_mount_info *cmount, int dirfd, const char *relpath,
+ mode_t mode, int flags);
+
+/**
+ * Change the ownership of a file/directory.
+ *
+ * @param cmount the ceph mount handle to use for performing the chown.
+ * @param path the path of the file/directory to change the ownership of.
+ * @param uid the user id to set on the file/directory.
+ * @param gid the group id to set on the file/directory.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_chown(struct ceph_mount_info *cmount, const char *path, int uid, int gid);
+
+/**
+ * Change the ownership of a file from an open file descriptor.
+ *
+ * @param cmount the ceph mount handle to use for performing the chown.
+ * @param fd the fd of the open file/directory to change the ownership of.
+ * @param uid the user id to set on the file/directory.
+ * @param gid the group id to set on the file/directory.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_fchown(struct ceph_mount_info *cmount, int fd, int uid, int gid);
+
+/**
+ * Change the ownership of a file/directory, don't follow symlinks.
+ *
+ * @param cmount the ceph mount handle to use for performing the chown.
+ * @param path the path of the file/directory to change the ownership of.
+ * @param uid the user id to set on the file/directory.
+ * @param gid the group id to set on the file/directory.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_lchown(struct ceph_mount_info *cmount, const char *path, int uid, int gid);
+
+/**
+ * Change the ownership of a file/directory releative to a file descriptor.
+ *
+ * @param cmount the ceph mount handle to use for performing the chown.
+ * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD)
+ * @param relpath the relpath of the file/directory to change the ownership of.
+ * @param uid the user id to set on the file/directory.
+ * @param gid the group id to set on the file/directory.
+ * @param flags bitfield that can be used to set AT_* modifier flags (AT_SYMLINK_NOFOLLOW)
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_chownat(struct ceph_mount_info *cmount, int dirfd, const char *relpath,
+ uid_t uid, gid_t gid, int flags);
+
+/**
+ * Change file/directory last access and modification times.
+ *
+ * @param cmount the ceph mount handle to use for performing the utime.
+ * @param path the path to the file/directory to set the time values of.
+ * @param buf holding the access and modification times to set on the file.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_utime(struct ceph_mount_info *cmount, const char *path, struct utimbuf *buf);
+
+/**
+ * Change file/directory last access and modification times.
+ *
+ * @param cmount the ceph mount handle to use for performing the utime.
+ * @param fd the fd of the open file/directory to set the time values of.
+ * @param buf holding the access and modification times to set on the file.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_futime(struct ceph_mount_info *cmount, int fd, struct utimbuf *buf);
+
+/**
+ * Change file/directory last access and modification times.
+ *
+ * @param cmount the ceph mount handle to use for performing the utime.
+ * @param path the path to the file/directory to set the time values of.
+ * @param times holding the access and modification times to set on the file.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_utimes(struct ceph_mount_info *cmount, const char *path, struct timeval times[2]);
+
+/**
+ * Change file/directory last access and modification times, don't follow symlinks.
+ *
+ * @param cmount the ceph mount handle to use for performing the utime.
+ * @param path the path to the file/directory to set the time values of.
+ * @param times holding the access and modification times to set on the file.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_lutimes(struct ceph_mount_info *cmount, const char *path, struct timeval times[2]);
+
+/**
+ * Change file/directory last access and modification times.
+ *
+ * @param cmount the ceph mount handle to use for performing the utime.
+ * @param fd the fd of the open file/directory to set the time values of.
+ * @param times holding the access and modification times to set on the file.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_futimes(struct ceph_mount_info *cmount, int fd, struct timeval times[2]);
+
+/**
+ * Change file/directory last access and modification times.
+ *
+ * @param cmount the ceph mount handle to use for performing the utime.
+ * @param fd the fd of the open file/directory to set the time values of.
+ * @param times holding the access and modification times to set on the file.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_futimens(struct ceph_mount_info *cmount, int fd, struct timespec times[2]);
+
+/**
+ * Change file/directory last access and modification times relative
+ * to a file descriptor.
+ *
+ * @param cmount the ceph mount handle to use for performing the utime.
+ * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD)
+ * @param relpath the relpath of the file/directory to change the ownership of.
+ * @param dirfd the fd of the open file/directory to set the time values of.
+ * @param times holding the access and modification times to set on the file.
+ * @param flags bitfield that can be used to set AT_* modifier flags (AT_SYMLINK_NOFOLLOW)
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_utimensat(struct ceph_mount_info *cmount, int dirfd, const char *relpath,
+ struct timespec times[2], int flags);
+
+/**
+ * Apply or remove an advisory lock.
+ *
+ * @param cmount the ceph mount handle to use for performing the lock.
+ * @param fd the open file descriptor to change advisory lock.
+ * @param operation the advisory lock operation to be performed on the file
+ * descriptor among LOCK_SH (shared lock), LOCK_EX (exclusive lock),
+ * or LOCK_UN (remove lock). The LOCK_NB value can be ORed to perform a
+ * non-blocking operation.
+ * @param owner the user-supplied owner identifier (an arbitrary integer)
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_flock(struct ceph_mount_info *cmount, int fd, int operation,
+ uint64_t owner);
+
+/**
+ * Truncate the file to the given size. If this operation causes the
+ * file to expand, the empty bytes will be filled in with zeros.
+ *
+ * @param cmount the ceph mount handle to use for performing the truncate.
+ * @param path the path to the file to truncate.
+ * @param size the new size of the file.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_truncate(struct ceph_mount_info *cmount, const char *path, int64_t size);
+
+/**
+ * Make a block or character special file.
+ *
+ * @param cmount the ceph mount handle to use for performing the mknod.
+ * @param path the path to the special file.
+ * @param mode the permissions to use and the type of special file. The type can be
+ * one of S_IFREG, S_IFCHR, S_IFBLK, S_IFIFO.
+ * @param rdev If the file type is S_IFCHR or S_IFBLK then this parameter specifies the
+ * major and minor numbers of the newly created device special file. Otherwise,
+ * it is ignored.
+ * @returns 0 on success or negative error code on failure.
+ */
+int ceph_mknod(struct ceph_mount_info *cmount, const char *path, mode_t mode, dev_t rdev);
+/**
+ * Create and/or open a file.
+ *
+ * @param cmount the ceph mount handle to use for performing the open.
+ * @param path the path of the file to open. If the flags parameter includes O_CREAT,
+ * the file will first be created before opening.
+ * @param flags a set of option masks that control how the file is created/opened.
+ * @param mode the permissions to place on the file if the file does not exist and O_CREAT
+ * is specified in the flags.
+ * @returns a non-negative file descriptor number on success or a negative error code on failure.
+ */
+int ceph_open(struct ceph_mount_info *cmount, const char *path, int flags, mode_t mode);
+
+/**
+ * Create and/or open a file relative to a directory
+ *
+ * @param cmount the ceph mount handle to use for performing the open.
+ * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD)
+ * @param relpath the path of the file to open. If the flags parameter includes O_CREAT,
+ * the file will first be created before opening.
+ * @param flags a set of option masks that control how the file is created/opened.
+ * @param mode the permissions to place on the file if the file does not exist and O_CREAT
+ * is specified in the flags.
+ * @returns a non-negative file descriptor number on success or a negative error code on failure.
+ */
+int ceph_openat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, int flags, mode_t mode);
+
+/**
+ * Create and/or open a file with a specific file layout.
+ *
+ * @param cmount the ceph mount handle to use for performing the open.
+ * @param path the path of the file to open. If the flags parameter includes O_CREAT,
+ * the file will first be created before opening.
+ * @param flags a set of option masks that control how the file is created/opened.
+ * @param mode the permissions to place on the file if the file does not exist and O_CREAT
+ * is specified in the flags.
+ * @param stripe_unit the stripe unit size (option, 0 for default)
+ * @param stripe_count the stripe count (optional, 0 for default)
+ * @param object_size the object size (optional, 0 for default)
+ * @param data_pool name of target data pool name (optional, NULL or empty string for default)
+ * @returns a non-negative file descriptor number on success or a negative error code on failure.
+ */
+int ceph_open_layout(struct ceph_mount_info *cmount, const char *path, int flags,
+ mode_t mode, int stripe_unit, int stripe_count, int object_size,
+ const char *data_pool);
+
+/**
+ * Close the open file.
+ *
+ * @param cmount the ceph mount handle to use for performing the close.
+ * @param fd the file descriptor referring to the open file.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_close(struct ceph_mount_info *cmount, int fd);
+
+/**
+ * Reposition the open file stream based on the given offset.
+ *
+ * @param cmount the ceph mount handle to use for performing the lseek.
+ * @param fd the open file descriptor referring to the open file and holding the
+ * current position of the stream.
+ * @param offset the offset to set the stream to
+ * @param whence the flag to indicate what type of seeking to perform:
+ * SEEK_SET: the offset is set to the given offset in the file.
+ * SEEK_CUR: the offset is set to the current location plus @e offset bytes.
+ * SEEK_END: the offset is set to the end of the file plus @e offset bytes.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int64_t ceph_lseek(struct ceph_mount_info *cmount, int fd, int64_t offset, int whence);
+/**
+ * Read data from the file.
+ *
+ * @param cmount the ceph mount handle to use for performing the read.
+ * @param fd the file descriptor of the open file to read from.
+ * @param buf the buffer to read data into
+ * @param size the initial size of the buffer
+ * @param offset the offset in the file to read from. If this value is negative, the
+ * function reads from the current offset of the file descriptor.
+ * @returns the number of bytes read into buf, or a negative error code on failure.
+ */
+int ceph_read(struct ceph_mount_info *cmount, int fd, char *buf, int64_t size, int64_t offset);
+
+/**
+ * Read data from the file.
+ * @param cmount the ceph mount handle to use for performing the read.
+ * @param fd the file descriptor of the open file to read from.
+ * @param iov the iov structure to read data into
+ * @param iovcnt the number of items that iov includes
+ * @param offset the offset in the file to read from. If this value is negative, the
+ * function reads from the current offset of the file descriptor.
+ * @returns the number of bytes read into buf, or a negative error code on failure.
+ */
+int ceph_preadv(struct ceph_mount_info *cmount, int fd, const struct iovec *iov, int iovcnt,
+ int64_t offset);
+
+/**
+ * Write data to a file.
+ *
+ * @param cmount the ceph mount handle to use for performing the write.
+ * @param fd the file descriptor of the open file to write to
+ * @param buf the bytes to write to the file
+ * @param size the size of the buf array
+ * @param offset the offset of the file write into. If this value is negative, the
+ * function writes to the current offset of the file descriptor.
+ * @returns the number of bytes written, or a negative error code
+ */
+int ceph_write(struct ceph_mount_info *cmount, int fd, const char *buf, int64_t size,
+ int64_t offset);
+
+/**
+ * Write data to a file.
+ *
+ * @param cmount the ceph mount handle to use for performing the write.
+ * @param fd the file descriptor of the open file to write to
+ * @param iov the iov structure to read data into
+ * @param iovcnt the number of items that iov includes
+ * @param offset the offset of the file write into. If this value is negative, the
+ * function writes to the current offset of the file descriptor.
+ * @returns the number of bytes written, or a negative error code
+ */
+int ceph_pwritev(struct ceph_mount_info *cmount, int fd, const struct iovec *iov, int iovcnt,
+ int64_t offset);
+
+/**
+ * Truncate a file to the given size.
+ *
+ * @param cmount the ceph mount handle to use for performing the ftruncate.
+ * @param fd the file descriptor of the file to truncate
+ * @param size the new size of the file
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_ftruncate(struct ceph_mount_info *cmount, int fd, int64_t size);
+
+/**
+ * Synchronize an open file to persistent media.
+ *
+ * @param cmount the ceph mount handle to use for performing the fsync.
+ * @param fd the file descriptor of the file to sync.
+ * @param syncdataonly a boolean whether to synchronize metadata and data (0)
+ * or just data (1).
+ * @return 0 on success or a negative error code on failure.
+ */
+int ceph_fsync(struct ceph_mount_info *cmount, int fd, int syncdataonly);
+
+/**
+ * Preallocate or release disk space for the file for the byte range.
+ *
+ * @param cmount the ceph mount handle to use for performing the fallocate.
+ * @param fd the file descriptor of the file to fallocate.
+ * @param mode the flags determines the operation to be performed on the given range.
+ * default operation (0) allocate and initialize to zero the file in the byte range,
+ * and the file size will be changed if offset + length is greater than
+ * the file size. if the FALLOC_FL_KEEP_SIZE flag is specified in the mode,
+ * the file size will not be changed. if the FALLOC_FL_PUNCH_HOLE flag is
+ * specified in the mode, the operation is deallocate space and zero the byte range.
+ * @param offset the byte range starting.
+ * @param length the length of the range.
+ * @return 0 on success or a negative error code on failure.
+ */
+int ceph_fallocate(struct ceph_mount_info *cmount, int fd, int mode,
+ int64_t offset, int64_t length);
+
+/**
+ * Enable/disable lazyio for the file.
+ *
+ * @param cmount the ceph mount handle to use for performing the fsync.
+ * @param fd the file descriptor of the file to sync.
+ * @param enable a boolean to enable lazyio or disable lazyio.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_lazyio(struct ceph_mount_info *cmount, int fd, int enable);
+
+
+/**
+ * Flushes the write buffer for the file thereby propogating the buffered write to the file.
+ *
+ * @param cmount the ceph mount handle to use for performing the fsync.
+ * @param fd the file descriptor of the file to sync.
+ * @param offset a boolean to enable lazyio or disable lazyio.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_lazyio_propagate(struct ceph_mount_info *cmount, int fd, int64_t offset, size_t count);
+
+
+/**
+ * Flushes the write buffer for the file and invalidate the read cache. This allows a subsequent read operation to read and cache data directly from the file and hence everyone's propagated writes would be visible.
+ *
+ * @param cmount the ceph mount handle to use for performing the fsync.
+ * @param fd the file descriptor of the file to sync.
+ * @param offset a boolean to enable lazyio or disable lazyio.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_lazyio_synchronize(struct ceph_mount_info *cmount, int fd, int64_t offset, size_t count);
+
+/** @} file */
+
+/**
+ * @defgroup libcephfs_h_xattr Extended Attribute manipulation and handling.
+ * Functions for creating and manipulating extended attributes on files.
+ *
+ * @{
+ */
+
+/**
+ * Get an extended attribute.
+ *
+ * @param cmount the ceph mount handle to use for performing the getxattr.
+ * @param path the path to the file
+ * @param name the name of the extended attribute to get
+ * @param value a pre-allocated buffer to hold the xattr's value
+ * @param size the size of the pre-allocated buffer
+ * @returns the size of the value or a negative error code on failure.
+ */
+int ceph_getxattr(struct ceph_mount_info *cmount, const char *path, const char *name,
+ void *value, size_t size);
+
+/**
+ * Get an extended attribute.
+ *
+ * @param cmount the ceph mount handle to use for performing the getxattr.
+ * @param fd the open file descriptor referring to the file to get extended attribute from.
+ * @param name the name of the extended attribute to get
+ * @param value a pre-allocated buffer to hold the xattr's value
+ * @param size the size of the pre-allocated buffer
+ * @returns the size of the value or a negative error code on failure.
+ */
+int ceph_fgetxattr(struct ceph_mount_info *cmount, int fd, const char *name,
+ void *value, size_t size);
+
+/**
+ * Get an extended attribute without following symbolic links. This function is
+ * identical to ceph_getxattr, but if the path refers to a symbolic link,
+ * we get the extended attributes of the symlink rather than the attributes
+ * of the link itself.
+ *
+ * @param cmount the ceph mount handle to use for performing the lgetxattr.
+ * @param path the path to the file
+ * @param name the name of the extended attribute to get
+ * @param value a pre-allocated buffer to hold the xattr's value
+ * @param size the size of the pre-allocated buffer
+ * @returns the size of the value or a negative error code on failure.
+ */
+int ceph_lgetxattr(struct ceph_mount_info *cmount, const char *path, const char *name,
+ void *value, size_t size);
+
+/**
+ * List the extended attribute keys on a file.
+ *
+ * @param cmount the ceph mount handle to use for performing the listxattr.
+ * @param path the path to the file.
+ * @param list a buffer to be filled in with the list of extended attributes keys.
+ * @param size the size of the list buffer.
+ * @returns the size of the resulting list filled in.
+ */
+int ceph_listxattr(struct ceph_mount_info *cmount, const char *path, char *list, size_t size);
+
+/**
+ * List the extended attribute keys on a file.
+ *
+ * @param cmount the ceph mount handle to use for performing the listxattr.
+ * @param fd the open file descriptor referring to the file to list extended attributes on.
+ * @param list a buffer to be filled in with the list of extended attributes keys.
+ * @param size the size of the list buffer.
+ * @returns the size of the resulting list filled in.
+ */
+int ceph_flistxattr(struct ceph_mount_info *cmount, int fd, char *list, size_t size);
+
+/**
+ * Get the list of extended attribute keys on a file, but do not follow symbolic links.
+ *
+ * @param cmount the ceph mount handle to use for performing the llistxattr.
+ * @param path the path to the file.
+ * @param list a buffer to be filled in with the list of extended attributes keys.
+ * @param size the size of the list buffer.
+ * @returns the size of the resulting list filled in.
+ */
+int ceph_llistxattr(struct ceph_mount_info *cmount, const char *path, char *list, size_t size);
+
+/**
+ * Remove an extended attribute from a file.
+ *
+ * @param cmount the ceph mount handle to use for performing the removexattr.
+ * @param path the path to the file.
+ * @param name the name of the extended attribute to remove.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_removexattr(struct ceph_mount_info *cmount, const char *path, const char *name);
+
+/**
+ * Remove an extended attribute from a file.
+ *
+ * @param cmount the ceph mount handle to use for performing the removexattr.
+ * @param fd the open file descriptor referring to the file to remove extended attribute from.
+ * @param name the name of the extended attribute to remove.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_fremovexattr(struct ceph_mount_info *cmount, int fd, const char *name);
+
+/**
+ * Remove the extended attribute from a file, do not follow symbolic links.
+ *
+ * @param cmount the ceph mount handle to use for performing the lremovexattr.
+ * @param path the path to the file.
+ * @param name the name of the extended attribute to remove.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_lremovexattr(struct ceph_mount_info *cmount, const char *path, const char *name);
+
+/**
+ * Set an extended attribute on a file.
+ *
+ * @param cmount the ceph mount handle to use for performing the setxattr.
+ * @param path the path to the file.
+ * @param name the name of the extended attribute to set.
+ * @param value the bytes of the extended attribute value
+ * @param size the size of the extended attribute value
+ * @param flags the flags can be:
+ * CEPH_XATTR_CREATE: create the extended attribute. Must not exist.
+ * CEPH_XATTR_REPLACE: replace the extended attribute, Must already exist.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_setxattr(struct ceph_mount_info *cmount, const char *path, const char *name,
+ const void *value, size_t size, int flags);
+
+/**
+ * Set an extended attribute on a file.
+ *
+ * @param cmount the ceph mount handle to use for performing the setxattr.
+ * @param fd the open file descriptor referring to the file to set extended attribute on.
+ * @param name the name of the extended attribute to set.
+ * @param value the bytes of the extended attribute value
+ * @param size the size of the extended attribute value
+ * @param flags the flags can be:
+ * CEPH_XATTR_CREATE: create the extended attribute. Must not exist.
+ * CEPH_XATTR_REPLACE: replace the extended attribute, Must already exist.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_fsetxattr(struct ceph_mount_info *cmount, int fd, const char *name,
+ const void *value, size_t size, int flags);
+
+/**
+ * Set an extended attribute on a file, do not follow symbolic links.
+ *
+ * @param cmount the ceph mount handle to use for performing the lsetxattr.
+ * @param path the path to the file.
+ * @param name the name of the extended attribute to set.
+ * @param value the bytes of the extended attribute value
+ * @param size the size of the extended attribute value
+ * @param flags the flags can be:
+ * CEPH_XATTR_CREATE: create the extended attribute. Must not exist.
+ * CEPH_XATTR_REPLACE: replace the extended attribute, Must already exist.
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_lsetxattr(struct ceph_mount_info *cmount, const char *path, const char *name,
+ const void *value, size_t size, int flags);
+
+/** @} xattr */
+
+/**
+ * @defgroup libcephfs_h_filelayout Control File Layout.
+ * Functions for setting and getting the file layout of existing files.
+ *
+ * @{
+ */
+
+/**
+ * Get the file striping unit from an open file descriptor.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fh the open file descriptor referring to the file to get the striping unit of.
+ * @returns the striping unit of the file or a negative error code on failure.
+ */
+int ceph_get_file_stripe_unit(struct ceph_mount_info *cmount, int fh);
+
+/**
+ * Get the file striping unit.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param path the path of the file/directory get the striping unit of.
+ * @returns the striping unit of the file or a negative error code on failure.
+ */
+int ceph_get_path_stripe_unit(struct ceph_mount_info *cmount, const char *path);
+
+/**
+ * Get the file striping count from an open file descriptor.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fh the open file descriptor referring to the file to get the striping count of.
+ * @returns the striping count of the file or a negative error code on failure.
+ */
+int ceph_get_file_stripe_count(struct ceph_mount_info *cmount, int fh);
+
+/**
+ * Get the file striping count.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param path the path of the file/directory get the striping count of.
+ * @returns the striping count of the file or a negative error code on failure.
+ */
+int ceph_get_path_stripe_count(struct ceph_mount_info *cmount, const char *path);
+
+/**
+ * Get the file object size from an open file descriptor.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fh the open file descriptor referring to the file to get the object size of.
+ * @returns the object size of the file or a negative error code on failure.
+ */
+int ceph_get_file_object_size(struct ceph_mount_info *cmount, int fh);
+
+/**
+ * Get the file object size.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param path the path of the file/directory get the object size of.
+ * @returns the object size of the file or a negative error code on failure.
+ */
+int ceph_get_path_object_size(struct ceph_mount_info *cmount, const char *path);
+
+/**
+ * Get the file pool information from an open file descriptor.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fh the open file descriptor referring to the file to get the pool information of.
+ * @returns the ceph pool id that the file is in
+ */
+int ceph_get_file_pool(struct ceph_mount_info *cmount, int fh);
+
+/**
+ * Get the file pool information.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param path the path of the file/directory get the pool information of.
+ * @returns the ceph pool id that the file is in
+ */
+int ceph_get_path_pool(struct ceph_mount_info *cmount, const char *path);
+
+/**
+ * Get the name of the pool a opened file is stored in,
+ *
+ * Write the name of the file's pool to the buffer. If buflen is 0, return
+ * a suggested length for the buffer.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fh the open file descriptor referring to the file
+ * @param buf buffer to store the name in
+ * @param buflen size of the buffer
+ * @returns length in bytes of the pool name, or -ERANGE if the buffer is not large enough.
+ */
+int ceph_get_file_pool_name(struct ceph_mount_info *cmount, int fh, char *buf, size_t buflen);
+
+/**
+ * get the name of a pool by id
+ *
+ * Given a pool's numeric identifier, get the pool's alphanumeric name.
+ *
+ * @param cmount the ceph mount handle to use
+ * @param pool the numeric pool id
+ * @param buf buffer to sore the name in
+ * @param buflen size of the buffer
+ * @returns length in bytes of the pool name, or -ERANGE if the buffer is not large enough
+ */
+int ceph_get_pool_name(struct ceph_mount_info *cmount, int pool, char *buf, size_t buflen);
+
+/**
+ * Get the name of the pool a file is stored in
+ *
+ * Write the name of the file's pool to the buffer. If buflen is 0, return
+ * a suggested length for the buffer.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param path the path of the file/directory
+ * @param buf buffer to store the name in
+ * @param buflen size of the buffer
+ * @returns length in bytes of the pool name, or -ERANGE if the buffer is not large enough.
+ */
+int ceph_get_path_pool_name(struct ceph_mount_info *cmount, const char *path, char *buf, size_t buflen);
+
+/**
+ * Get the default pool name of cephfs
+ * Write the name of the default pool to the buffer. If buflen is 0, return
+ * a suggested length for the buffer.
+ * @param cmount the ceph mount handle to use.
+ * @param buf buffer to store the name in
+ * @param buflen size of the buffer
+ * @returns length in bytes of the pool name, or -ERANGE if the buffer is not large enough.
+ */
+int ceph_get_default_data_pool_name(struct ceph_mount_info *cmount, char *buf, size_t buflen);
+
+/**
+ * Get the file layout from an open file descriptor.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fh the open file descriptor referring to the file to get the layout of.
+ * @param stripe_unit where to store the striping unit of the file
+ * @param stripe_count where to store the striping count of the file
+ * @param object_size where to store the object size of the file
+ * @param pg_pool where to store the ceph pool id that the file is in
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_get_file_layout(struct ceph_mount_info *cmount, int fh, int *stripe_unit, int *stripe_count, int *object_size, int *pg_pool);
+
+/**
+ * Get the file layout.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param path the path of the file/directory get the layout of.
+ * @param stripe_unit where to store the striping unit of the file
+ * @param stripe_count where to store the striping count of the file
+ * @param object_size where to store the object size of the file
+ * @param pg_pool where to store the ceph pool id that the file is in
+ * @returns 0 on success or a negative error code on failure.
+ */
+int ceph_get_path_layout(struct ceph_mount_info *cmount, const char *path, int *stripe_unit, int *stripe_count, int *object_size, int *pg_pool);
+
+/**
+ * Get the file replication information from an open file descriptor.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fh the open file descriptor referring to the file to get the replication information of.
+ * @returns the replication factor of the file.
+ */
+int ceph_get_file_replication(struct ceph_mount_info *cmount, int fh);
+
+/**
+ * Get the file replication information.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param path the path of the file/directory get the replication information of.
+ * @returns the replication factor of the file.
+ */
+int ceph_get_path_replication(struct ceph_mount_info *cmount, const char *path);
+
+/**
+ * Get the id of the named pool.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param pool_name the name of the pool.
+ * @returns the pool id, or a negative error code on failure.
+ */
+int ceph_get_pool_id(struct ceph_mount_info *cmount, const char *pool_name);
+
+/**
+ * Get the pool replication factor.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param pool_id the pool id to look up
+ * @returns the replication factor, or a negative error code on failure.
+ */
+int ceph_get_pool_replication(struct ceph_mount_info *cmount, int pool_id);
+
+/**
+ * Get the OSD address where the primary copy of a file stripe is located.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fd the open file descriptor referring to the file to get the striping unit of.
+ * @param offset the offset into the file to specify the stripe. The offset can be
+ * anywhere within the stripe unit.
+ * @param addr the address of the OSD holding that stripe
+ * @param naddr the capacity of the address passed in.
+ * @returns the size of the addressed filled into the @e addr parameter, or a negative
+ * error code on failure.
+ */
+int ceph_get_file_stripe_address(struct ceph_mount_info *cmount, int fd, int64_t offset,
+ struct sockaddr_storage *addr, int naddr);
+
+/**
+ * Get the list of OSDs where the objects containing a file offset are located.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fd the open file descriptor referring to the file.
+ * @param offset the offset within the file.
+ * @param length return the number of bytes between the offset and the end of
+ * the stripe unit (optional).
+ * @param osds an integer array to hold the OSD ids.
+ * @param nosds the size of the integer array.
+ * @returns the number of items stored in the output array, or -ERANGE if the
+ * array is not large enough.
+ */
+int ceph_get_file_extent_osds(struct ceph_mount_info *cmount, int fd,
+ int64_t offset, int64_t *length, int *osds, int nosds);
+
+/**
+ * Get the fully qualified CRUSH location of an OSD.
+ *
+ * Returns (type, name) string pairs for each device in the CRUSH bucket
+ * hierarchy starting from the given osd to the root. Each pair element is
+ * separated by a NULL character.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param osd the OSD id.
+ * @param path buffer to store location.
+ * @param len size of buffer.
+ * @returns the amount of bytes written into the buffer, or -ERANGE if the
+ * array is not large enough.
+ */
+int ceph_get_osd_crush_location(struct ceph_mount_info *cmount,
+ int osd, char *path, size_t len);
+
+/**
+ * Get the network address of an OSD.
+ *
+ * @param cmount the ceph mount handle.
+ * @param osd the OSD id.
+ * @param addr the OSD network address.
+ * @returns zero on success, other returns a negative error code.
+ */
+int ceph_get_osd_addr(struct ceph_mount_info *cmount, int osd,
+ struct sockaddr_storage *addr);
+
+/**
+ * Get the file layout stripe unit granularity.
+ * @param cmount the ceph mount handle.
+ * @returns the stripe unit granularity or a negative error code on failure.
+ */
+int ceph_get_stripe_unit_granularity(struct ceph_mount_info *cmount);
+
+/** @} filelayout */
+
+/**
+ * No longer available. Do not use.
+ * These functions will return -EOPNOTSUPP.
+ */
+int ceph_set_default_file_stripe_unit(struct ceph_mount_info *cmount, int stripe);
+int ceph_set_default_file_stripe_count(struct ceph_mount_info *cmount, int count);
+int ceph_set_default_object_size(struct ceph_mount_info *cmount, int size);
+int ceph_set_default_preferred_pg(struct ceph_mount_info *cmount, int osd);
+int ceph_set_default_file_replication(struct ceph_mount_info *cmount, int replication);
+
+/**
+ * Read from local replicas when possible.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param val a boolean to set (1) or clear (0) the option to favor local objects
+ * for reads.
+ * @returns 0
+ */
+int ceph_localize_reads(struct ceph_mount_info *cmount, int val);
+
+/**
+ * Get the osd id of the local osd (if any)
+ *
+ * @param cmount the ceph mount handle to use.
+ * @returns the osd (if any) local to the node where this call is made, otherwise
+ * -1 is returned.
+ */
+int ceph_get_local_osd(struct ceph_mount_info *cmount);
+
+/** @} default_filelayout */
+
+/**
+ * Get the capabilities currently issued to the client.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param fd the file descriptor to get issued
+ * @returns the current capabilities issued to this client
+ * for the open file
+ */
+int ceph_debug_get_fd_caps(struct ceph_mount_info *cmount, int fd);
+
+/**
+ * Get the capabilities currently issued to the client.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param path the path to the file
+ * @returns the current capabilities issued to this client
+ * for the file
+ */
+int ceph_debug_get_file_caps(struct ceph_mount_info *cmount, const char *path);
+
+/* Low Level */
+struct Inode *ceph_ll_get_inode(struct ceph_mount_info *cmount,
+ vinodeno_t vino);
+
+int ceph_ll_lookup_vino(struct ceph_mount_info *cmount, vinodeno_t vino,
+ Inode **inode);
+
+int ceph_ll_lookup_inode(
+ struct ceph_mount_info *cmount,
+ struct inodeno_t ino,
+ Inode **inode);
+
+/**
+ * Get the root inode of FS. Increase counter of references for root Inode. You must call ceph_ll_forget for it!
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param parent pointer to pointer to Inode struct. Pointer to root inode will be returned
+ * @returns 0 if all good
+ */
+int ceph_ll_lookup_root(struct ceph_mount_info *cmount,
+ Inode **parent);
+int ceph_ll_lookup(struct ceph_mount_info *cmount, Inode *parent,
+ const char *name, Inode **out, struct ceph_statx *stx,
+ unsigned want, unsigned flags, const UserPerm *perms);
+int ceph_ll_put(struct ceph_mount_info *cmount, struct Inode *in);
+int ceph_ll_forget(struct ceph_mount_info *cmount, struct Inode *in,
+ int count);
+int ceph_ll_walk(struct ceph_mount_info *cmount, const char* name, Inode **i,
+ struct ceph_statx *stx, unsigned int want, unsigned int flags,
+ const UserPerm *perms);
+int ceph_ll_getattr(struct ceph_mount_info *cmount, struct Inode *in,
+ struct ceph_statx *stx, unsigned int want, unsigned int flags,
+ const UserPerm *perms);
+int ceph_ll_setattr(struct ceph_mount_info *cmount, struct Inode *in,
+ struct ceph_statx *stx, int mask, const UserPerm *perms);
+int ceph_ll_open(struct ceph_mount_info *cmount, struct Inode *in, int flags,
+ struct Fh **fh, const UserPerm *perms);
+off_t ceph_ll_lseek(struct ceph_mount_info *cmount, struct Fh* filehandle,
+ off_t offset, int whence);
+int ceph_ll_read(struct ceph_mount_info *cmount, struct Fh* filehandle,
+ int64_t off, uint64_t len, char* buf);
+int ceph_ll_fsync(struct ceph_mount_info *cmount, struct Fh *fh,
+ int syncdataonly);
+int ceph_ll_sync_inode(struct ceph_mount_info *cmount, struct Inode *in,
+ int syncdataonly);
+int ceph_ll_fallocate(struct ceph_mount_info *cmount, struct Fh *fh,
+ int mode, int64_t offset, int64_t length);
+int ceph_ll_write(struct ceph_mount_info *cmount, struct Fh* filehandle,
+ int64_t off, uint64_t len, const char *data);
+int64_t ceph_ll_readv(struct ceph_mount_info *cmount, struct Fh *fh,
+ const struct iovec *iov, int iovcnt, int64_t off);
+int64_t ceph_ll_writev(struct ceph_mount_info *cmount, struct Fh *fh,
+ const struct iovec *iov, int iovcnt, int64_t off);
+int ceph_ll_close(struct ceph_mount_info *cmount, struct Fh* filehandle);
+int ceph_ll_iclose(struct ceph_mount_info *cmount, struct Inode *in, int mode);
+/**
+ * Get xattr value by xattr name.
+ *
+ * @param cmount the ceph mount handle to use.
+ * @param in file handle
+ * @param name name of attribute
+ * @param value pointer to begin buffer
+ * @param size buffer size
+ * @param perms pointer to UserPerms object
+ * @returns size of returned buffer. Negative number in error case
+ */
+int ceph_ll_getxattr(struct ceph_mount_info *cmount, struct Inode *in,
+ const char *name, void *value, size_t size,
+ const UserPerm *perms);
+int ceph_ll_setxattr(struct ceph_mount_info *cmount, struct Inode *in,
+ const char *name, const void *value, size_t size,
+ int flags, const UserPerm *perms);
+int ceph_ll_listxattr(struct ceph_mount_info *cmount, struct Inode *in,
+ char *list, size_t buf_size, size_t *list_size,
+ const UserPerm *perms);
+int ceph_ll_removexattr(struct ceph_mount_info *cmount, struct Inode *in,
+ const char *name, const UserPerm *perms);
+int ceph_ll_create(struct ceph_mount_info *cmount, Inode *parent,
+ const char *name, mode_t mode, int oflags, Inode **outp,
+ Fh **fhp, struct ceph_statx *stx, unsigned want,
+ unsigned lflags, const UserPerm *perms);
+int ceph_ll_mknod(struct ceph_mount_info *cmount, Inode *parent,
+ const char *name, mode_t mode, dev_t rdev, Inode **out,
+ struct ceph_statx *stx, unsigned want, unsigned flags,
+ const UserPerm *perms);
+int ceph_ll_mkdir(struct ceph_mount_info *cmount, Inode *parent,
+ const char *name, mode_t mode, Inode **out,
+ struct ceph_statx *stx, unsigned want,
+ unsigned flags, const UserPerm *perms);
+int ceph_ll_link(struct ceph_mount_info *cmount, struct Inode *in,
+ struct Inode *newparent, const char *name,
+ const UserPerm *perms);
+int ceph_ll_opendir(struct ceph_mount_info *cmount, struct Inode *in,
+ struct ceph_dir_result **dirpp, const UserPerm *perms);
+int ceph_ll_releasedir(struct ceph_mount_info *cmount,
+ struct ceph_dir_result* dir);
+int ceph_ll_rename(struct ceph_mount_info *cmount, struct Inode *parent,
+ const char *name, struct Inode *newparent,
+ const char *newname, const UserPerm *perms);
+int ceph_ll_unlink(struct ceph_mount_info *cmount, struct Inode *in,
+ const char *name, const UserPerm *perms);
+int ceph_ll_statfs(struct ceph_mount_info *cmount, struct Inode *in,
+ struct statvfs *stbuf);
+int ceph_ll_readlink(struct ceph_mount_info *cmount, struct Inode *in,
+ char *buf, size_t bufsize, const UserPerm *perms);
+int ceph_ll_symlink(struct ceph_mount_info *cmount,
+ Inode *in, const char *name, const char *value,
+ Inode **out, struct ceph_statx *stx,
+ unsigned want, unsigned flags,
+ const UserPerm *perms);
+int ceph_ll_rmdir(struct ceph_mount_info *cmount, struct Inode *in,
+ const char *name, const UserPerm *perms);
+uint32_t ceph_ll_stripe_unit(struct ceph_mount_info *cmount,
+ struct Inode *in);
+uint32_t ceph_ll_file_layout(struct ceph_mount_info *cmount,
+ struct Inode *in,
+ struct ceph_file_layout *layout);
+uint64_t ceph_ll_snap_seq(struct ceph_mount_info *cmount,
+ struct Inode *in);
+int ceph_ll_get_stripe_osd(struct ceph_mount_info *cmount,
+ struct Inode *in,
+ uint64_t blockno,
+ struct ceph_file_layout* layout);
+int ceph_ll_num_osds(struct ceph_mount_info *cmount);
+int ceph_ll_osdaddr(struct ceph_mount_info *cmount,
+ int osd, uint32_t *addr);
+uint64_t ceph_ll_get_internal_offset(struct ceph_mount_info *cmount,
+ struct Inode *in, uint64_t blockno);
+int ceph_ll_read_block(struct ceph_mount_info *cmount,
+ struct Inode *in, uint64_t blockid,
+ char* bl, uint64_t offset, uint64_t length,
+ struct ceph_file_layout* layout);
+int ceph_ll_write_block(struct ceph_mount_info *cmount,
+ struct Inode *in, uint64_t blockid,
+ char* buf, uint64_t offset,
+ uint64_t length, struct ceph_file_layout* layout,
+ uint64_t snapseq, uint32_t sync);
+int ceph_ll_commit_blocks(struct ceph_mount_info *cmount,
+ struct Inode *in, uint64_t offset, uint64_t range);
+
+
+int ceph_ll_getlk(struct ceph_mount_info *cmount,
+ Fh *fh, struct flock *fl, uint64_t owner);
+int ceph_ll_setlk(struct ceph_mount_info *cmount,
+ Fh *fh, struct flock *fl, uint64_t owner, int sleep);
+
+int ceph_ll_lazyio(struct ceph_mount_info *cmount, Fh *fh, int enable);
+
+/*
+ * Delegation support
+ *
+ * Delegations are way for an application to request exclusive or
+ * semi-exclusive access to an Inode. The client requests the delegation and
+ * if it's successful it can reliably cache file data and metadata until the
+ * delegation is recalled.
+ *
+ * Recalls are issued via a callback function, provided by the application.
+ * Callback functions should act something like signal handlers. You want to
+ * do as little as possible in the callback. Any major work should be deferred
+ * in some fashion as it's difficult to predict the context in which this
+ * function will be called.
+ *
+ * Once the delegation has been recalled, the application should return it as
+ * soon as possible. The application has client_deleg_timeout seconds to
+ * return it, after which the cmount structure is forcibly unmounted and
+ * further calls into it fail.
+ *
+ * The application can set the client_deleg_timeout config option to suit its
+ * needs, but it should take care to choose a value that allows it to avoid
+ * forcible eviction from the cluster in the event of an application bug.
+ */
+
+/* Commands for manipulating delegation state */
+#ifndef CEPH_DELEGATION_NONE
+# define CEPH_DELEGATION_NONE 0
+# define CEPH_DELEGATION_RD 1
+# define CEPH_DELEGATION_WR 2
+#endif
+
+/**
+ * Get the amount of time that the client has to return caps
+ * @param cmount the ceph mount handle to use.
+ *
+ * In the event that a client does not return its caps, the MDS may blocklist
+ * it after this timeout. Applications should check this value and ensure
+ * that they set the delegation timeout to a value lower than this.
+ *
+ * This call returns the cap return timeout (in seconds) for this cmount, or
+ * zero if it's not mounted.
+ */
+uint32_t ceph_get_cap_return_timeout(struct ceph_mount_info *cmount);
+
+/**
+ * Set the delegation timeout for the mount (thereby enabling delegations)
+ * @param cmount the ceph mount handle to use.
+ * @param timeout the delegation timeout (in seconds)
+ *
+ * Since the client could end up blocklisted if it doesn't return delegations
+ * in time, we mandate that any application wanting to use delegations
+ * explicitly set the timeout beforehand. Until this call is done on the
+ * mount, attempts to set a delegation will return -ETIME.
+ *
+ * Once a delegation is recalled, if it is not returned in this amount of
+ * time, the cmount will be forcibly unmounted and further access attempts
+ * will fail (usually with -ENOTCONN errors).
+ *
+ * This value is further vetted against the cap return timeout, and this call
+ * can fail with -EINVAL if the timeout value is too long. Delegations can be
+ * disabled again by setting the timeout to 0.
+ */
+int ceph_set_deleg_timeout(struct ceph_mount_info *cmount, uint32_t timeout);
+
+/**
+ * Request a delegation on an open Fh
+ * @param cmount the ceph mount handle to use.
+ * @param fh file handle
+ * @param cmd CEPH_DELEGATION_* command
+ * @param cb callback function for recalling delegation
+ * @param priv opaque token passed back during recalls
+ *
+ * Returns 0 if the delegation was granted, -EAGAIN if there was a conflict
+ * and other error codes if there is a fatal error of some sort (e.g. -ENOMEM,
+ * -ETIME)
+ */
+int ceph_ll_delegation(struct ceph_mount_info *cmount, Fh *fh,
+ unsigned int cmd, ceph_deleg_cb_t cb, void *priv);
+
+mode_t ceph_umask(struct ceph_mount_info *cmount, mode_t mode);
+
+/* state reclaim */
+#define CEPH_RECLAIM_RESET 1
+
+/**
+ * Set ceph client uuid
+ * @param cmount the ceph mount handle to use.
+ * @param uuid the uuid to set
+ *
+ * Must be called before mount.
+ */
+void ceph_set_uuid(struct ceph_mount_info *cmount, const char *uuid);
+
+/**
+ * Set ceph client session timeout
+ * @param cmount the ceph mount handle to use.
+ * @param timeout the timeout to set
+ *
+ * Must be called before mount.
+ */
+void ceph_set_session_timeout(struct ceph_mount_info *cmount, unsigned timeout);
+
+/**
+ * Start to reclaim states of other client
+ * @param cmount the ceph mount handle to use.
+ * @param uuid uuid of client whose states need to be reclaimed
+ * @param flags flags that control how states get reclaimed
+ *
+ * Returns 0 success, -EOPNOTSUPP if mds does not support the operation,
+ * -ENOENT if CEPH_RECLAIM_RESET is specified and there is no client
+ * with the given uuid, -ENOTRECOVERABLE in all other error cases.
+ */
+int ceph_start_reclaim(struct ceph_mount_info *cmount,
+ const char *uuid, unsigned flags);
+
+/**
+ * finish reclaiming states of other client (
+ * @param cmount the ceph mount handle to use.
+ */
+void ceph_finish_reclaim(struct ceph_mount_info *cmount);
+
+/**
+ * Register a set of callbacks to be used with this cmount
+ *
+ * This is deprecated, use ceph_ll_register_callbacks2() instead.
+ *
+ * @param cmount the ceph mount handle on which the cb's should be registerd
+ * @param args callback arguments to register with the cmount
+ *
+ * Any fields set to NULL will be ignored. There currently is no way to
+ * unregister these callbacks, so this is a one-way change.
+ */
+void ceph_ll_register_callbacks(struct ceph_mount_info *cmount,
+ struct ceph_client_callback_args *args);
+
+/**
+ * Register a set of callbacks to be used with this cmount
+ * @param cmount the ceph mount handle on which the cb's should be registerd
+ * @param args callback arguments to register with the cmount
+ *
+ * Any fields set to NULL will be ignored. There currently is no way to
+ * unregister these callbacks, so this is a one-way change.
+ *
+ * Returns 0 on success or -EBUSY if the cmount is mounting or already mounted.
+ */
+int ceph_ll_register_callbacks2(struct ceph_mount_info *cmount,
+ struct ceph_client_callback_args *args);
+
+/**
+ * Get snapshot info
+ *
+ * @param cmount the ceph mount handle to use for making the directory.
+ * @param path the path of the snapshot. This must be either an
+ * absolute path or a relative path off of the current working directory.
+ * @returns 0 on success or a negative return code on error.
+ */
+int ceph_get_snap_info(struct ceph_mount_info *cmount,
+ const char *path, struct snap_info *snap_info);
+
+/**
+ * Free snapshot info buffers
+ *
+ * @param snap_info snapshot info struct (fetched via call to ceph_get_snap_info()).
+ */
+void ceph_free_snap_info_buffer(struct snap_info *snap_info);
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/src/include/cephfs/metrics/Types.h b/src/include/cephfs/metrics/Types.h
new file mode 100644
index 000000000..d7cf56138
--- /dev/null
+++ b/src/include/cephfs/metrics/Types.h
@@ -0,0 +1,699 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef CEPH_INCLUDE_CEPHFS_METRICS_TYPES_H
+#define CEPH_INCLUDE_CEPHFS_METRICS_TYPES_H
+
+#include <string>
+#include <boost/variant.hpp>
+
+#include "common/Formatter.h"
+#include "include/buffer_fwd.h"
+#include "include/encoding.h"
+#include "include/int_types.h"
+#include "include/stringify.h"
+#include "include/utime.h"
+
+namespace ceph { class Formatter; }
+
+enum ClientMetricType {
+ CLIENT_METRIC_TYPE_CAP_INFO,
+ CLIENT_METRIC_TYPE_READ_LATENCY,
+ CLIENT_METRIC_TYPE_WRITE_LATENCY,
+ CLIENT_METRIC_TYPE_METADATA_LATENCY,
+ CLIENT_METRIC_TYPE_DENTRY_LEASE,
+ CLIENT_METRIC_TYPE_OPENED_FILES,
+ CLIENT_METRIC_TYPE_PINNED_ICAPS,
+ CLIENT_METRIC_TYPE_OPENED_INODES,
+ CLIENT_METRIC_TYPE_READ_IO_SIZES,
+ CLIENT_METRIC_TYPE_WRITE_IO_SIZES,
+ CLIENT_METRIC_TYPE_AVG_READ_LATENCY,
+ CLIENT_METRIC_TYPE_STDEV_READ_LATENCY,
+ CLIENT_METRIC_TYPE_AVG_WRITE_LATENCY,
+ CLIENT_METRIC_TYPE_STDEV_WRITE_LATENCY,
+ CLIENT_METRIC_TYPE_AVG_METADATA_LATENCY,
+ CLIENT_METRIC_TYPE_STDEV_METADATA_LATENCY,
+};
+inline std::ostream &operator<<(std::ostream &os, const ClientMetricType &type) {
+ switch(type) {
+ case ClientMetricType::CLIENT_METRIC_TYPE_CAP_INFO:
+ os << "CAP_INFO";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_READ_LATENCY:
+ os << "READ_LATENCY";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_WRITE_LATENCY:
+ os << "WRITE_LATENCY";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_METADATA_LATENCY:
+ os << "METADATA_LATENCY";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_DENTRY_LEASE:
+ os << "DENTRY_LEASE";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_OPENED_FILES:
+ os << "OPENED_FILES";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_PINNED_ICAPS:
+ os << "PINNED_ICAPS";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_OPENED_INODES:
+ os << "OPENED_INODES";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_READ_IO_SIZES:
+ os << "READ_IO_SIZES";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_WRITE_IO_SIZES:
+ os << "WRITE_IO_SIZES";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_AVG_READ_LATENCY:
+ os << "AVG_READ_LATENCY";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_STDEV_READ_LATENCY:
+ os << "STDEV_READ_LATENCY";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_AVG_WRITE_LATENCY:
+ os << "AVG_WRITE_LATENCY";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_STDEV_WRITE_LATENCY:
+ os << "STDEV_WRITE_LATENCY";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_AVG_METADATA_LATENCY:
+ os << "AVG_METADATA_LATENCY";
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_STDEV_METADATA_LATENCY:
+ os << "STDEV_METADATA_LATENCY";
+ break;
+ default:
+ os << "(UNKNOWN:" << static_cast<std::underlying_type<ClientMetricType>::type>(type) << ")";
+ break;
+ }
+
+ return os;
+}
+
+struct ClientMetricPayloadBase {
+ ClientMetricPayloadBase(ClientMetricType type) : metric_type(type) {}
+
+ ClientMetricType get_type() const {
+ return metric_type;
+ }
+
+ void print_type(std::ostream *out) const {
+ *out << metric_type;
+ }
+
+ private:
+ ClientMetricType metric_type;
+};
+
+struct CapInfoPayload : public ClientMetricPayloadBase {
+ uint64_t cap_hits = 0;
+ uint64_t cap_misses = 0;
+ uint64_t nr_caps = 0;
+
+ CapInfoPayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_CAP_INFO) { }
+ CapInfoPayload(uint64_t cap_hits, uint64_t cap_misses, uint64_t nr_caps)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_CAP_INFO),
+ cap_hits(cap_hits), cap_misses(cap_misses), nr_caps(nr_caps) {
+ }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(1, 1, bl);
+ encode(cap_hits, bl);
+ encode(cap_misses, bl);
+ encode(nr_caps, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(1, iter);
+ decode(cap_hits, iter);
+ decode(cap_misses, iter);
+ decode(nr_caps, iter);
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("cap_hits", cap_hits);
+ f->dump_int("cap_misses", cap_misses);
+ f->dump_int("num_caps", nr_caps);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "cap_hits: " << cap_hits << " "
+ << "cap_misses: " << cap_misses << " "
+ << "num_caps: " << nr_caps;
+ }
+};
+
+struct ReadLatencyPayload : public ClientMetricPayloadBase {
+ utime_t lat;
+ utime_t mean;
+ uint64_t sq_sum; // sum of squares
+ uint64_t count; // IO count
+
+ ReadLatencyPayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_READ_LATENCY) { }
+ ReadLatencyPayload(utime_t lat, utime_t mean, uint64_t sq_sum, uint64_t count)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_READ_LATENCY),
+ lat(lat),
+ mean(mean),
+ sq_sum(sq_sum),
+ count(count) {
+ }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(2, 1, bl);
+ encode(lat, bl);
+ encode(mean, bl);
+ encode(sq_sum, bl);
+ encode(count, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(2, iter);
+ decode(lat, iter);
+ if (struct_v >= 2) {
+ decode(mean, iter);
+ decode(sq_sum, iter);
+ decode(count, iter);
+ }
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("latency", lat);
+ f->dump_int("avg_latency", mean);
+ f->dump_unsigned("sq_sum", sq_sum);
+ f->dump_unsigned("count", count);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "latency: " << lat << ", avg_latency: " << mean
+ << ", sq_sum: " << sq_sum << ", count=" << count;
+ }
+};
+
+struct WriteLatencyPayload : public ClientMetricPayloadBase {
+ utime_t lat;
+ utime_t mean;
+ uint64_t sq_sum; // sum of squares
+ uint64_t count; // IO count
+
+ WriteLatencyPayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_WRITE_LATENCY) { }
+ WriteLatencyPayload(utime_t lat, utime_t mean, uint64_t sq_sum, uint64_t count)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_WRITE_LATENCY),
+ lat(lat),
+ mean(mean),
+ sq_sum(sq_sum),
+ count(count){
+ }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(2, 1, bl);
+ encode(lat, bl);
+ encode(mean, bl);
+ encode(sq_sum, bl);
+ encode(count, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(2, iter);
+ decode(lat, iter);
+ if (struct_v >= 2) {
+ decode(mean, iter);
+ decode(sq_sum, iter);
+ decode(count, iter);
+ }
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("latency", lat);
+ f->dump_int("avg_latency", mean);
+ f->dump_unsigned("sq_sum", sq_sum);
+ f->dump_unsigned("count", count);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "latency: " << lat << ", avg_latency: " << mean
+ << ", sq_sum: " << sq_sum << ", count=" << count;
+ }
+};
+
+struct MetadataLatencyPayload : public ClientMetricPayloadBase {
+ utime_t lat;
+ utime_t mean;
+ uint64_t sq_sum; // sum of squares
+ uint64_t count; // IO count
+
+ MetadataLatencyPayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_METADATA_LATENCY) { }
+ MetadataLatencyPayload(utime_t lat, utime_t mean, uint64_t sq_sum, uint64_t count)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_METADATA_LATENCY),
+ lat(lat),
+ mean(mean),
+ sq_sum(sq_sum),
+ count(count) {
+ }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(2, 1, bl);
+ encode(lat, bl);
+ encode(mean, bl);
+ encode(sq_sum, bl);
+ encode(count, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(2, iter);
+ decode(lat, iter);
+ if (struct_v >= 2) {
+ decode(mean, iter);
+ decode(sq_sum, iter);
+ decode(count, iter);
+ }
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("latency", lat);
+ f->dump_int("avg_latency", mean);
+ f->dump_unsigned("sq_sum", sq_sum);
+ f->dump_unsigned("count", count);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "latency: " << lat << ", avg_latency: " << mean
+ << ", sq_sum: " << sq_sum << ", count=" << count;
+ }
+};
+
+struct DentryLeasePayload : public ClientMetricPayloadBase {
+ uint64_t dlease_hits = 0;
+ uint64_t dlease_misses = 0;
+ uint64_t nr_dentries = 0;
+
+ DentryLeasePayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_DENTRY_LEASE) { }
+ DentryLeasePayload(uint64_t dlease_hits, uint64_t dlease_misses, uint64_t nr_dentries)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_DENTRY_LEASE),
+ dlease_hits(dlease_hits), dlease_misses(dlease_misses), nr_dentries(nr_dentries) { }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(1, 1, bl);
+ encode(dlease_hits, bl);
+ encode(dlease_misses, bl);
+ encode(nr_dentries, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(1, iter);
+ decode(dlease_hits, iter);
+ decode(dlease_misses, iter);
+ decode(nr_dentries, iter);
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("dlease_hits", dlease_hits);
+ f->dump_int("dlease_misses", dlease_misses);
+ f->dump_int("num_dentries", nr_dentries);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "dlease_hits: " << dlease_hits << " "
+ << "dlease_misses: " << dlease_misses << " "
+ << "num_dentries: " << nr_dentries;
+ }
+};
+
+struct OpenedFilesPayload : public ClientMetricPayloadBase {
+ uint64_t opened_files = 0;
+ uint64_t total_inodes = 0;
+
+ OpenedFilesPayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_OPENED_FILES) { }
+ OpenedFilesPayload(uint64_t opened_files, uint64_t total_inodes)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_OPENED_FILES),
+ opened_files(opened_files), total_inodes(total_inodes) { }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(1, 1, bl);
+ encode(opened_files, bl);
+ encode(total_inodes, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(1, iter);
+ decode(opened_files, iter);
+ decode(total_inodes, iter);
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("opened_files", opened_files);
+ f->dump_int("total_inodes", total_inodes);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "opened_files: " << opened_files << " "
+ << "total_inodes: " << total_inodes;
+ }
+};
+
+struct PinnedIcapsPayload : public ClientMetricPayloadBase {
+ uint64_t pinned_icaps = 0;
+ uint64_t total_inodes = 0;
+
+ PinnedIcapsPayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_PINNED_ICAPS) { }
+ PinnedIcapsPayload(uint64_t pinned_icaps, uint64_t total_inodes)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_PINNED_ICAPS),
+ pinned_icaps(pinned_icaps), total_inodes(total_inodes) { }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(1, 1, bl);
+ encode(pinned_icaps, bl);
+ encode(total_inodes, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(1, iter);
+ decode(pinned_icaps, iter);
+ decode(total_inodes, iter);
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("pinned_icaps", pinned_icaps);
+ f->dump_int("total_inodes", total_inodes);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "pinned_icaps: " << pinned_icaps << " "
+ << "total_inodes: " << total_inodes;
+ }
+};
+
+struct OpenedInodesPayload : public ClientMetricPayloadBase {
+ uint64_t opened_inodes = 0;
+ uint64_t total_inodes = 0;
+
+ OpenedInodesPayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_OPENED_INODES) { }
+ OpenedInodesPayload(uint64_t opened_inodes, uint64_t total_inodes)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_OPENED_INODES),
+ opened_inodes(opened_inodes), total_inodes(total_inodes) { }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(1, 1, bl);
+ encode(opened_inodes, bl);
+ encode(total_inodes, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(1, iter);
+ decode(opened_inodes, iter);
+ decode(total_inodes, iter);
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("opened_inodes", opened_inodes);
+ f->dump_int("total_inodes", total_inodes);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "opened_inodes: " << opened_inodes << " "
+ << "total_inodes: " << total_inodes;
+ }
+};
+
+struct ReadIoSizesPayload : public ClientMetricPayloadBase {
+ uint64_t total_ops = 0;
+ uint64_t total_size = 0;
+
+ ReadIoSizesPayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_READ_IO_SIZES) { }
+ ReadIoSizesPayload(uint64_t total_ops, uint64_t total_size)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_READ_IO_SIZES),
+ total_ops(total_ops), total_size(total_size) { }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(1, 1, bl);
+ encode(total_ops, bl);
+ encode(total_size, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(1, iter);
+ decode(total_ops, iter);
+ decode(total_size, iter);
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("total_ops", total_ops);
+ f->dump_int("total_size", total_size);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "total_ops: " << total_ops << " total_size: " << total_size;
+ }
+};
+
+struct WriteIoSizesPayload : public ClientMetricPayloadBase {
+ uint64_t total_ops = 0;
+ uint64_t total_size = 0;
+
+ WriteIoSizesPayload()
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_WRITE_IO_SIZES) { }
+ WriteIoSizesPayload(uint64_t total_ops, uint64_t total_size)
+ : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_WRITE_IO_SIZES),
+ total_ops(total_ops), total_size(total_size) {
+ }
+
+ void encode(bufferlist &bl) const {
+ using ceph::encode;
+ ENCODE_START(1, 1, bl);
+ encode(total_ops, bl);
+ encode(total_size, bl);
+ ENCODE_FINISH(bl);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(1, iter);
+ decode(total_ops, iter);
+ decode(total_size, iter);
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ f->dump_int("total_ops", total_ops);
+ f->dump_int("total_size", total_size);
+ }
+
+ void print(std::ostream *out) const {
+ *out << "total_ops: " << total_ops << " total_size: " << total_size;
+ }
+};
+
+struct UnknownPayload : public ClientMetricPayloadBase {
+ UnknownPayload()
+ : ClientMetricPayloadBase(static_cast<ClientMetricType>(-1)) { }
+ UnknownPayload(ClientMetricType metric_type)
+ : ClientMetricPayloadBase(metric_type) { }
+
+ void encode(bufferlist &bl) const {
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+ DECODE_START(254, iter);
+ iter.seek(struct_len);
+ DECODE_FINISH(iter);
+ }
+
+ void dump(Formatter *f) const {
+ }
+
+ void print(std::ostream *out) const {
+ }
+};
+
+typedef boost::variant<CapInfoPayload,
+ ReadLatencyPayload,
+ WriteLatencyPayload,
+ MetadataLatencyPayload,
+ DentryLeasePayload,
+ OpenedFilesPayload,
+ PinnedIcapsPayload,
+ OpenedInodesPayload,
+ ReadIoSizesPayload,
+ WriteIoSizesPayload,
+ UnknownPayload> ClientMetricPayload;
+
+// metric update message sent by clients
+struct ClientMetricMessage {
+public:
+ ClientMetricMessage(const ClientMetricPayload &payload = UnknownPayload())
+ : payload(payload) {
+ }
+
+ class EncodePayloadVisitor : public boost::static_visitor<void> {
+ public:
+ explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) {
+ }
+
+ template <typename ClientMetricPayload>
+ inline void operator()(const ClientMetricPayload &payload) const {
+ using ceph::encode;
+ encode(static_cast<uint32_t>(payload.get_type()), m_bl);
+ payload.encode(m_bl);
+ }
+
+ private:
+ bufferlist &m_bl;
+ };
+
+ class DecodePayloadVisitor : public boost::static_visitor<void> {
+ public:
+ DecodePayloadVisitor(bufferlist::const_iterator &iter) : m_iter(iter) {
+ }
+
+ template <typename ClientMetricPayload>
+ inline void operator()(ClientMetricPayload &payload) const {
+ using ceph::decode;
+ payload.decode(m_iter);
+ }
+
+ private:
+ bufferlist::const_iterator &m_iter;
+ };
+
+ class DumpPayloadVisitor : public boost::static_visitor<void> {
+ public:
+ explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) {
+ }
+
+ template <typename ClientMetricPayload>
+ inline void operator()(const ClientMetricPayload &payload) const {
+ m_formatter->dump_string("client_metric_type", stringify(payload.get_type()));
+ payload.dump(m_formatter);
+ }
+
+ private:
+ Formatter *m_formatter;
+ };
+
+ class PrintPayloadVisitor : public boost::static_visitor<void> {
+ public:
+ explicit PrintPayloadVisitor(std::ostream *out) : _out(out) {
+ }
+
+ template <typename ClientMetricPayload>
+ inline void operator()(const ClientMetricPayload &payload) const {
+ *_out << "[client_metric_type: ";
+ payload.print_type(_out);
+ *_out << " ";
+ payload.print(_out);
+ *_out << "]";
+ }
+
+ private:
+ std::ostream *_out;
+ };
+
+ void encode(bufferlist &bl) const {
+ boost::apply_visitor(EncodePayloadVisitor(bl), payload);
+ }
+
+ void decode(bufferlist::const_iterator &iter) {
+ using ceph::decode;
+
+ uint32_t metric_type;
+ decode(metric_type, iter);
+
+ switch (metric_type) {
+ case ClientMetricType::CLIENT_METRIC_TYPE_CAP_INFO:
+ payload = CapInfoPayload();
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_READ_LATENCY:
+ payload = ReadLatencyPayload();
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_WRITE_LATENCY:
+ payload = WriteLatencyPayload();
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_METADATA_LATENCY:
+ payload = MetadataLatencyPayload();
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_DENTRY_LEASE:
+ payload = DentryLeasePayload();
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_OPENED_FILES:
+ payload = OpenedFilesPayload();
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_PINNED_ICAPS:
+ payload = PinnedIcapsPayload();
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_OPENED_INODES:
+ payload = OpenedInodesPayload();
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_READ_IO_SIZES:
+ payload = ReadIoSizesPayload();
+ break;
+ case ClientMetricType::CLIENT_METRIC_TYPE_WRITE_IO_SIZES:
+ payload = WriteIoSizesPayload();
+ break;
+ default:
+ payload = UnknownPayload(static_cast<ClientMetricType>(metric_type));
+ break;
+ }
+
+ boost::apply_visitor(DecodePayloadVisitor(iter), payload);
+ }
+
+ void dump(Formatter *f) const {
+ apply_visitor(DumpPayloadVisitor(f), payload);
+ }
+
+ void print(std::ostream *out) const {
+ apply_visitor(PrintPayloadVisitor(out), payload);
+ }
+
+ ClientMetricPayload payload;
+};
+WRITE_CLASS_ENCODER(ClientMetricMessage);
+
+#endif // CEPH_INCLUDE_CEPHFS_METRICS_TYPES_H
diff --git a/src/include/cephfs/types.h b/src/include/cephfs/types.h
new file mode 100644
index 000000000..cca0a6193
--- /dev/null
+++ b/src/include/cephfs/types.h
@@ -0,0 +1,970 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2020 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ */
+#ifndef CEPH_CEPHFS_TYPES_H
+#define CEPH_CEPHFS_TYPES_H
+#include "include/int_types.h"
+
+#include <ostream>
+#include <set>
+#include <map>
+#include <string_view>
+
+#include "common/config.h"
+#include "common/Clock.h"
+#include "common/DecayCounter.h"
+#include "common/StackStringStream.h"
+#include "common/entity_name.h"
+
+#include "include/compat.h"
+#include "include/Context.h"
+#include "include/frag.h"
+#include "include/xlist.h"
+#include "include/interval_set.h"
+#include "include/compact_set.h"
+#include "include/fs_types.h"
+#include "include/ceph_fs.h"
+
+#include "mds/inode_backtrace.h"
+
+#include <boost/spirit/include/qi.hpp>
+#include <boost/pool/pool.hpp>
+#include "include/ceph_assert.h"
+#include <boost/serialization/strong_typedef.hpp>
+#include "common/ceph_json.h"
+
+#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011"
+#define MAX_MDS 0x100
+
+BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t)
+extern const mds_gid_t MDS_GID_NONE;
+
+typedef int32_t fs_cluster_id_t;
+constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE = -1;
+
+// The namespace ID of the anonymous default filesystem from legacy systems
+constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS = 0;
+
+typedef int32_t mds_rank_t;
+constexpr mds_rank_t MDS_RANK_NONE = -1;
+constexpr mds_rank_t MDS_RANK_EPHEMERAL_DIST = -2;
+constexpr mds_rank_t MDS_RANK_EPHEMERAL_RAND = -3;
+
+struct scatter_info_t {
+ version_t version = 0;
+};
+
+struct frag_info_t : public scatter_info_t {
+ int64_t size() const { return nfiles + nsubdirs; }
+
+ void zero() {
+ *this = frag_info_t();
+ }
+
+ // *this += cur - acc;
+ void add_delta(const frag_info_t &cur, const frag_info_t &acc, bool *touched_mtime=0, bool *touched_chattr=0) {
+ if (cur.mtime > mtime) {
+ mtime = cur.mtime;
+ if (touched_mtime)
+ *touched_mtime = true;
+ }
+ if (cur.change_attr > change_attr) {
+ change_attr = cur.change_attr;
+ if (touched_chattr)
+ *touched_chattr = true;
+ }
+ nfiles += cur.nfiles - acc.nfiles;
+ nsubdirs += cur.nsubdirs - acc.nsubdirs;
+ }
+
+ void add(const frag_info_t& other) {
+ if (other.mtime > mtime)
+ mtime = other.mtime;
+ if (other.change_attr > change_attr)
+ change_attr = other.change_attr;
+ nfiles += other.nfiles;
+ nsubdirs += other.nsubdirs;
+ }
+
+ bool same_sums(const frag_info_t &o) const {
+ return mtime <= o.mtime &&
+ nfiles == o.nfiles &&
+ nsubdirs == o.nsubdirs;
+ }
+
+ void encode(ceph::buffer::list &bl) const;
+ void decode(ceph::buffer::list::const_iterator& bl);
+ void dump(ceph::Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(std::list<frag_info_t*>& ls);
+
+ // this frag
+ utime_t mtime;
+ uint64_t change_attr = 0;
+ int64_t nfiles = 0; // files
+ int64_t nsubdirs = 0; // subdirs
+};
+WRITE_CLASS_ENCODER(frag_info_t)
+
+inline bool operator==(const frag_info_t &l, const frag_info_t &r) {
+ return memcmp(&l, &r, sizeof(l)) == 0;
+}
+inline bool operator!=(const frag_info_t &l, const frag_info_t &r) {
+ return !(l == r);
+}
+
+std::ostream& operator<<(std::ostream &out, const frag_info_t &f);
+
+struct nest_info_t : public scatter_info_t {
+ int64_t rsize() const { return rfiles + rsubdirs; }
+
+ void zero() {
+ *this = nest_info_t();
+ }
+
+ void sub(const nest_info_t &other) {
+ add(other, -1);
+ }
+ void add(const nest_info_t &other, int fac=1) {
+ if (other.rctime > rctime)
+ rctime = other.rctime;
+ rbytes += fac*other.rbytes;
+ rfiles += fac*other.rfiles;
+ rsubdirs += fac*other.rsubdirs;
+ rsnaps += fac*other.rsnaps;
+ }
+
+ // *this += cur - acc;
+ void add_delta(const nest_info_t &cur, const nest_info_t &acc) {
+ if (cur.rctime > rctime)
+ rctime = cur.rctime;
+ rbytes += cur.rbytes - acc.rbytes;
+ rfiles += cur.rfiles - acc.rfiles;
+ rsubdirs += cur.rsubdirs - acc.rsubdirs;
+ rsnaps += cur.rsnaps - acc.rsnaps;
+ }
+
+ bool same_sums(const nest_info_t &o) const {
+ return rctime <= o.rctime &&
+ rbytes == o.rbytes &&
+ rfiles == o.rfiles &&
+ rsubdirs == o.rsubdirs &&
+ rsnaps == o.rsnaps;
+ }
+
+ void encode(ceph::buffer::list &bl) const;
+ void decode(ceph::buffer::list::const_iterator& bl);
+ void dump(ceph::Formatter *f) const;
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(std::list<nest_info_t*>& ls);
+
+ // this frag + children
+ utime_t rctime;
+ int64_t rbytes = 0;
+ int64_t rfiles = 0;
+ int64_t rsubdirs = 0;
+ int64_t rsnaps = 0;
+};
+WRITE_CLASS_ENCODER(nest_info_t)
+
+inline bool operator==(const nest_info_t &l, const nest_info_t &r) {
+ return memcmp(&l, &r, sizeof(l)) == 0;
+}
+inline bool operator!=(const nest_info_t &l, const nest_info_t &r) {
+ return !(l == r);
+}
+
+std::ostream& operator<<(std::ostream &out, const nest_info_t &n);
+
+struct vinodeno_t {
+ vinodeno_t() {}
+ vinodeno_t(inodeno_t i, snapid_t s) : ino(i), snapid(s) {}
+
+ void encode(ceph::buffer::list& bl) const {
+ using ceph::encode;
+ encode(ino, bl);
+ encode(snapid, bl);
+ }
+ void decode(ceph::buffer::list::const_iterator& p) {
+ using ceph::decode;
+ decode(ino, p);
+ decode(snapid, p);
+ }
+
+ inodeno_t ino;
+ snapid_t snapid;
+};
+WRITE_CLASS_ENCODER(vinodeno_t)
+
+inline bool operator==(const vinodeno_t &l, const vinodeno_t &r) {
+ return l.ino == r.ino && l.snapid == r.snapid;
+}
+inline bool operator!=(const vinodeno_t &l, const vinodeno_t &r) {
+ return !(l == r);
+}
+inline bool operator<(const vinodeno_t &l, const vinodeno_t &r) {
+ return
+ l.ino < r.ino ||
+ (l.ino == r.ino && l.snapid < r.snapid);
+}
+
+typedef enum {
+ QUOTA_MAX_FILES,
+ QUOTA_MAX_BYTES,
+ QUOTA_ANY
+} quota_max_t;
+
+struct quota_info_t
+{
+ void encode(ceph::buffer::list& bl) const {
+ ENCODE_START(1, 1, bl);
+ encode(max_bytes, bl);
+ encode(max_files, bl);
+ ENCODE_FINISH(bl);
+ }
+ void decode(ceph::buffer::list::const_iterator& p) {
+ DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p);
+ decode(max_bytes, p);
+ decode(max_files, p);
+ DECODE_FINISH(p);
+ }
+
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<quota_info_t *>& ls);
+
+ bool is_valid() const {
+ return max_bytes >=0 && max_files >=0;
+ }
+ bool is_enabled(quota_max_t type=QUOTA_ANY) const {
+ switch (type) {
+ case QUOTA_MAX_FILES:
+ return !!max_files;
+ case QUOTA_MAX_BYTES:
+ return !!max_bytes;
+ case QUOTA_ANY:
+ default:
+ return !!max_bytes || !!max_files;
+ }
+ }
+ void decode_json(JSONObj *obj);
+
+ int64_t max_bytes = 0;
+ int64_t max_files = 0;
+};
+WRITE_CLASS_ENCODER(quota_info_t)
+
+inline bool operator==(const quota_info_t &l, const quota_info_t &r) {
+ return memcmp(&l, &r, sizeof(l)) == 0;
+}
+
+std::ostream& operator<<(std::ostream &out, const quota_info_t &n);
+
+struct client_writeable_range_t {
+ struct byte_range_t {
+ uint64_t first = 0, last = 0; // interval client can write to
+ byte_range_t() {}
+ void decode_json(JSONObj *obj);
+ };
+
+ void encode(ceph::buffer::list &bl) const;
+ void decode(ceph::buffer::list::const_iterator& bl);
+ void dump(ceph::Formatter *f) const;
+ static void generate_test_instances(std::list<client_writeable_range_t*>& ls);
+
+ byte_range_t range;
+ snapid_t follows = 0; // aka "data+metadata flushed thru"
+};
+
+inline void decode(client_writeable_range_t::byte_range_t& range, ceph::buffer::list::const_iterator& bl) {
+ using ceph::decode;
+ decode(range.first, bl);
+ decode(range.last, bl);
+}
+
+WRITE_CLASS_ENCODER(client_writeable_range_t)
+
+std::ostream& operator<<(std::ostream& out, const client_writeable_range_t& r);
+
+inline bool operator==(const client_writeable_range_t& l,
+ const client_writeable_range_t& r) {
+ return l.range.first == r.range.first && l.range.last == r.range.last &&
+ l.follows == r.follows;
+}
+
+struct inline_data_t {
+public:
+ inline_data_t() {}
+ inline_data_t(const inline_data_t& o) : version(o.version) {
+ if (o.blp)
+ set_data(*o.blp);
+ }
+ inline_data_t& operator=(const inline_data_t& o) {
+ version = o.version;
+ if (o.blp)
+ set_data(*o.blp);
+ else
+ free_data();
+ return *this;
+ }
+
+ void free_data() {
+ blp.reset();
+ }
+ void get_data(ceph::buffer::list& ret) const {
+ if (blp)
+ ret = *blp;
+ else
+ ret.clear();
+ }
+ void set_data(const ceph::buffer::list& bl) {
+ if (!blp)
+ blp.reset(new ceph::buffer::list);
+ *blp = bl;
+ }
+ size_t length() const { return blp ? blp->length() : 0; }
+
+ bool operator==(const inline_data_t& o) const {
+ return length() == o.length() &&
+ (length() == 0 ||
+ (*const_cast<ceph::buffer::list*>(blp.get()) == *const_cast<ceph::buffer::list*>(o.blp.get())));
+ }
+ bool operator!=(const inline_data_t& o) const {
+ return !(*this == o);
+ }
+ void encode(ceph::buffer::list &bl) const;
+ void decode(ceph::buffer::list::const_iterator& bl);
+
+ version_t version = 1;
+
+private:
+ std::unique_ptr<ceph::buffer::list> blp;
+};
+WRITE_CLASS_ENCODER(inline_data_t)
+
+enum {
+ DAMAGE_STATS, // statistics (dirstat, size, etc)
+ DAMAGE_RSTATS, // recursive statistics (rstat, accounted_rstat)
+ DAMAGE_FRAGTREE // fragtree -- repair by searching
+};
+
+template<template<typename> class Allocator = std::allocator>
+struct inode_t {
+ /**
+ * ***************
+ * Do not forget to add any new fields to the compare() function.
+ * ***************
+ */
+ using client_range_map = std::map<client_t,client_writeable_range_t,std::less<client_t>,Allocator<std::pair<const client_t,client_writeable_range_t>>>;
+
+ inode_t()
+ {
+ clear_layout();
+ }
+
+ // file type
+ bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; }
+ bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; }
+ bool is_file() const { return (mode & S_IFMT) == S_IFREG; }
+
+ bool is_truncating() const { return (truncate_pending > 0); }
+ void truncate(uint64_t old_size, uint64_t new_size, const bufferlist &fbl) {
+ truncate(old_size, new_size);
+ fscrypt_last_block = fbl;
+ }
+ void truncate(uint64_t old_size, uint64_t new_size) {
+ ceph_assert(new_size <= old_size);
+ if (old_size > max_size_ever)
+ max_size_ever = old_size;
+ truncate_from = old_size;
+ size = new_size;
+ rstat.rbytes = new_size;
+ truncate_size = size;
+ truncate_seq++;
+ truncate_pending++;
+ }
+
+ bool has_layout() const {
+ return layout != file_layout_t();
+ }
+
+ void clear_layout() {
+ layout = file_layout_t();
+ }
+
+ uint64_t get_layout_size_increment() const {
+ return layout.get_period();
+ }
+
+ bool is_dirty_rstat() const { return !(rstat == accounted_rstat); }
+
+ uint64_t get_client_range(client_t client) const {
+ auto it = client_ranges.find(client);
+ return it != client_ranges.end() ? it->second.range.last : 0;
+ }
+
+ uint64_t get_max_size() const {
+ uint64_t max = 0;
+ for (std::map<client_t,client_writeable_range_t>::const_iterator p = client_ranges.begin();
+ p != client_ranges.end();
+ ++p)
+ if (p->second.range.last > max)
+ max = p->second.range.last;
+ return max;
+ }
+ void set_max_size(uint64_t new_max) {
+ if (new_max == 0) {
+ client_ranges.clear();
+ } else {
+ for (std::map<client_t,client_writeable_range_t>::iterator p = client_ranges.begin();
+ p != client_ranges.end();
+ ++p)
+ p->second.range.last = new_max;
+ }
+ }
+
+ void trim_client_ranges(snapid_t last) {
+ std::map<client_t, client_writeable_range_t>::iterator p = client_ranges.begin();
+ while (p != client_ranges.end()) {
+ if (p->second.follows >= last)
+ client_ranges.erase(p++);
+ else
+ ++p;
+ }
+ }
+
+ bool is_backtrace_updated() const {
+ return backtrace_version == version;
+ }
+ void update_backtrace(version_t pv=0) {
+ backtrace_version = pv ? pv : version;
+ }
+
+ void add_old_pool(int64_t l) {
+ backtrace_version = version;
+ old_pools.insert(l);
+ }
+
+ void encode(ceph::buffer::list &bl, uint64_t features) const;
+ void decode(ceph::buffer::list::const_iterator& bl);
+ void dump(ceph::Formatter *f) const;
+ static void client_ranges_cb(client_range_map& c, JSONObj *obj);
+ static void old_pools_cb(compact_set<int64_t, std::less<int64_t>, Allocator<int64_t> >& c, JSONObj *obj);
+ void decode_json(JSONObj *obj);
+ static void generate_test_instances(std::list<inode_t*>& ls);
+ /**
+ * Compare this inode_t with another that represent *the same inode*
+ * at different points in time.
+ * @pre The inodes are the same ino
+ *
+ * @param other The inode_t to compare ourselves with
+ * @param divergent A bool pointer which will be set to true
+ * if the values are different in a way that can't be explained
+ * by one being a newer version than the other.
+ *
+ * @returns 1 if we are newer than the other, 0 if equal, -1 if older.
+ */
+ int compare(const inode_t &other, bool *divergent) const;
+
+ // base (immutable)
+ inodeno_t ino = 0;
+ uint32_t rdev = 0; // if special file
+
+ // affected by any inode change...
+ utime_t ctime; // inode change time
+ utime_t btime; // birth time
+
+ // perm (namespace permissions)
+ uint32_t mode = 0;
+ uid_t uid = 0;
+ gid_t gid = 0;
+
+ // nlink
+ int32_t nlink = 0;
+
+ // file (data access)
+ ceph_dir_layout dir_layout = {}; // [dir only]
+ file_layout_t layout;
+ compact_set<int64_t, std::less<int64_t>, Allocator<int64_t>> old_pools;
+ uint64_t size = 0; // on directory, # dentries
+ uint64_t max_size_ever = 0; // max size the file has ever been
+ uint32_t truncate_seq = 0;
+ uint64_t truncate_size = 0, truncate_from = 0;
+ uint32_t truncate_pending = 0;
+ utime_t mtime; // file data modify time.
+ utime_t atime; // file data access time.
+ uint32_t time_warp_seq = 0; // count of (potential) mtime/atime timewarps (i.e., utimes())
+ inline_data_t inline_data; // FIXME check
+
+ // change attribute
+ uint64_t change_attr = 0;
+
+ client_range_map client_ranges; // client(s) can write to these ranges
+
+ // dirfrag, recursive accountin
+ frag_info_t dirstat; // protected by my filelock
+ nest_info_t rstat; // protected by my nestlock
+ nest_info_t accounted_rstat; // protected by parent's nestlock
+
+ quota_info_t quota;
+
+ mds_rank_t export_pin = MDS_RANK_NONE;
+
+ double export_ephemeral_random_pin = 0;
+ bool export_ephemeral_distributed_pin = false;
+
+ // special stuff
+ version_t version = 0; // auth only
+ version_t file_data_version = 0; // auth only
+ version_t xattr_version = 0;
+
+ utime_t last_scrub_stamp; // start time of last complete scrub
+ version_t last_scrub_version = 0;// (parent) start version of last complete scrub
+
+ version_t backtrace_version = 0;
+
+ snapid_t oldest_snap;
+
+ std::basic_string<char,std::char_traits<char>,Allocator<char>> stray_prior_path; //stores path before unlink
+
+ std::vector<uint8_t> fscrypt_auth;
+ std::vector<uint8_t> fscrypt_file;
+
+ bufferlist fscrypt_last_block;
+
+private:
+ bool older_is_consistent(const inode_t &other) const;
+};
+
+// These methods may be moved back to mdstypes.cc when we have pmr
+template<template<typename> class Allocator>
+void inode_t<Allocator>::encode(ceph::buffer::list &bl, uint64_t features) const
+{
+ ENCODE_START(19, 6, bl);
+
+ encode(ino, bl);
+ encode(rdev, bl);
+ encode(ctime, bl);
+
+ encode(mode, bl);
+ encode(uid, bl);
+ encode(gid, bl);
+
+ encode(nlink, bl);
+ {
+ // removed field
+ bool anchored = 0;
+ encode(anchored, bl);
+ }
+
+ encode(dir_layout, bl);
+ encode(layout, bl, features);
+ encode(size, bl);
+ encode(truncate_seq, bl);
+ encode(truncate_size, bl);
+ encode(truncate_from, bl);
+ encode(truncate_pending, bl);
+ encode(mtime, bl);
+ encode(atime, bl);
+ encode(time_warp_seq, bl);
+ encode(client_ranges, bl);
+
+ encode(dirstat, bl);
+ encode(rstat, bl);
+ encode(accounted_rstat, bl);
+
+ encode(version, bl);
+ encode(file_data_version, bl);
+ encode(xattr_version, bl);
+ encode(backtrace_version, bl);
+ encode(old_pools, bl);
+ encode(max_size_ever, bl);
+ encode(inline_data, bl);
+ encode(quota, bl);
+
+ encode(stray_prior_path, bl);
+
+ encode(last_scrub_version, bl);
+ encode(last_scrub_stamp, bl);
+
+ encode(btime, bl);
+ encode(change_attr, bl);
+
+ encode(export_pin, bl);
+
+ encode(export_ephemeral_random_pin, bl);
+ encode(export_ephemeral_distributed_pin, bl);
+
+ encode(!fscrypt_auth.empty(), bl);
+ encode(fscrypt_auth, bl);
+ encode(fscrypt_file, bl);
+ encode(fscrypt_last_block, bl);
+ ENCODE_FINISH(bl);
+}
+
+template<template<typename> class Allocator>
+void inode_t<Allocator>::decode(ceph::buffer::list::const_iterator &p)
+{
+ DECODE_START_LEGACY_COMPAT_LEN(19, 6, 6, p);
+
+ decode(ino, p);
+ decode(rdev, p);
+ decode(ctime, p);
+
+ decode(mode, p);
+ decode(uid, p);
+ decode(gid, p);
+
+ decode(nlink, p);
+ {
+ bool anchored;
+ decode(anchored, p);
+ }
+
+ if (struct_v >= 4)
+ decode(dir_layout, p);
+ else {
+ // FIPS zeroization audit 20191117: this memset is not security related.
+ memset(&dir_layout, 0, sizeof(dir_layout));
+ }
+ decode(layout, p);
+ decode(size, p);
+ decode(truncate_seq, p);
+ decode(truncate_size, p);
+ decode(truncate_from, p);
+ if (struct_v >= 5)
+ decode(truncate_pending, p);
+ else
+ truncate_pending = 0;
+ decode(mtime, p);
+ decode(atime, p);
+ decode(time_warp_seq, p);
+ if (struct_v >= 3) {
+ decode(client_ranges, p);
+ } else {
+ std::map<client_t, client_writeable_range_t::byte_range_t> m;
+ decode(m, p);
+ for (auto q = m.begin(); q != m.end(); ++q)
+ client_ranges[q->first].range = q->second;
+ }
+
+ decode(dirstat, p);
+ decode(rstat, p);
+ decode(accounted_rstat, p);
+
+ decode(version, p);
+ decode(file_data_version, p);
+ decode(xattr_version, p);
+ if (struct_v >= 2)
+ decode(backtrace_version, p);
+ if (struct_v >= 7)
+ decode(old_pools, p);
+ if (struct_v >= 8)
+ decode(max_size_ever, p);
+ if (struct_v >= 9) {
+ decode(inline_data, p);
+ } else {
+ inline_data.version = CEPH_INLINE_NONE;
+ }
+ if (struct_v < 10)
+ backtrace_version = 0; // force update backtrace
+ if (struct_v >= 11)
+ decode(quota, p);
+
+ if (struct_v >= 12) {
+ std::string tmp;
+ decode(tmp, p);
+ stray_prior_path = std::string_view(tmp);
+ }
+
+ if (struct_v >= 13) {
+ decode(last_scrub_version, p);
+ decode(last_scrub_stamp, p);
+ }
+ if (struct_v >= 14) {
+ decode(btime, p);
+ decode(change_attr, p);
+ } else {
+ btime = utime_t();
+ change_attr = 0;
+ }
+
+ if (struct_v >= 15) {
+ decode(export_pin, p);
+ } else {
+ export_pin = MDS_RANK_NONE;
+ }
+
+ if (struct_v >= 16) {
+ decode(export_ephemeral_random_pin, p);
+ decode(export_ephemeral_distributed_pin, p);
+ } else {
+ export_ephemeral_random_pin = 0;
+ export_ephemeral_distributed_pin = false;
+ }
+
+ if (struct_v >= 17) {
+ bool fscrypt_flag;
+ decode(fscrypt_flag, p); // ignored
+ }
+
+ if (struct_v >= 18) {
+ decode(fscrypt_auth, p);
+ decode(fscrypt_file, p);
+ }
+
+ if (struct_v >= 19) {
+ decode(fscrypt_last_block, p);
+ }
+ DECODE_FINISH(p);
+}
+
+template<template<typename> class Allocator>
+void inode_t<Allocator>::dump(ceph::Formatter *f) const
+{
+ f->dump_unsigned("ino", ino);
+ f->dump_unsigned("rdev", rdev);
+ f->dump_stream("ctime") << ctime;
+ f->dump_stream("btime") << btime;
+ f->dump_unsigned("mode", mode);
+ f->dump_unsigned("uid", uid);
+ f->dump_unsigned("gid", gid);
+ f->dump_unsigned("nlink", nlink);
+
+ f->open_object_section("dir_layout");
+ ::dump(dir_layout, f);
+ f->close_section();
+
+ f->dump_object("layout", layout);
+
+ f->open_array_section("old_pools");
+ for (const auto &p : old_pools) {
+ f->dump_int("pool", p);
+ }
+ f->close_section();
+
+ f->dump_unsigned("size", size);
+ f->dump_unsigned("truncate_seq", truncate_seq);
+ f->dump_unsigned("truncate_size", truncate_size);
+ f->dump_unsigned("truncate_from", truncate_from);
+ f->dump_unsigned("truncate_pending", truncate_pending);
+ f->dump_stream("mtime") << mtime;
+ f->dump_stream("atime") << atime;
+ f->dump_unsigned("time_warp_seq", time_warp_seq);
+ f->dump_unsigned("change_attr", change_attr);
+ f->dump_int("export_pin", export_pin);
+ f->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin);
+ f->dump_bool("export_ephemeral_distributed_pin", export_ephemeral_distributed_pin);
+
+ f->open_array_section("client_ranges");
+ for (const auto &p : client_ranges) {
+ f->open_object_section("client");
+ f->dump_unsigned("client", p.first.v);
+ p.second.dump(f);
+ f->close_section();
+ }
+ f->close_section();
+
+ f->open_object_section("dirstat");
+ dirstat.dump(f);
+ f->close_section();
+
+ f->open_object_section("rstat");
+ rstat.dump(f);
+ f->close_section();
+
+ f->open_object_section("accounted_rstat");
+ accounted_rstat.dump(f);
+ f->close_section();
+
+ f->dump_unsigned("version", version);
+ f->dump_unsigned("file_data_version", file_data_version);
+ f->dump_unsigned("xattr_version", xattr_version);
+ f->dump_unsigned("backtrace_version", backtrace_version);
+
+ f->dump_string("stray_prior_path", stray_prior_path);
+ f->dump_unsigned("max_size_ever", max_size_ever);
+
+ f->open_object_section("quota");
+ quota.dump(f);
+ f->close_section();
+
+ f->dump_stream("last_scrub_stamp") << last_scrub_stamp;
+ f->dump_unsigned("last_scrub_version", last_scrub_version);
+}
+
+template<template<typename> class Allocator>
+void inode_t<Allocator>::client_ranges_cb(typename inode_t<Allocator>::client_range_map& c, JSONObj *obj){
+
+ int64_t client;
+ JSONDecoder::decode_json("client", client, obj, true);
+ client_writeable_range_t client_range_tmp;
+ JSONDecoder::decode_json("byte range", client_range_tmp.range, obj, true);
+ JSONDecoder::decode_json("follows", client_range_tmp.follows.val, obj, true);
+ c[client] = client_range_tmp;
+}
+
+template<template<typename> class Allocator>
+void inode_t<Allocator>::old_pools_cb(compact_set<int64_t, std::less<int64_t>, Allocator<int64_t> >& c, JSONObj *obj){
+
+ int64_t tmp;
+ decode_json_obj(tmp, obj);
+ c.insert(tmp);
+}
+
+template<template<typename> class Allocator>
+void inode_t<Allocator>::decode_json(JSONObj *obj)
+{
+
+ JSONDecoder::decode_json("ino", ino.val, obj, true);
+ JSONDecoder::decode_json("rdev", rdev, obj, true);
+ //JSONDecoder::decode_json("ctime", ctime, obj, true);
+ //JSONDecoder::decode_json("btime", btime, obj, true);
+ JSONDecoder::decode_json("mode", mode, obj, true);
+ JSONDecoder::decode_json("uid", uid, obj, true);
+ JSONDecoder::decode_json("gid", gid, obj, true);
+ JSONDecoder::decode_json("nlink", nlink, obj, true);
+ JSONDecoder::decode_json("dir_layout", dir_layout, obj, true);
+ JSONDecoder::decode_json("layout", layout, obj, true);
+ JSONDecoder::decode_json("old_pools", old_pools, inode_t<Allocator>::old_pools_cb, obj, true);
+ JSONDecoder::decode_json("size", size, obj, true);
+ JSONDecoder::decode_json("truncate_seq", truncate_seq, obj, true);
+ JSONDecoder::decode_json("truncate_size", truncate_size, obj, true);
+ JSONDecoder::decode_json("truncate_from", truncate_from, obj, true);
+ JSONDecoder::decode_json("truncate_pending", truncate_pending, obj, true);
+ //JSONDecoder::decode_json("mtime", mtime, obj, true);
+ //JSONDecoder::decode_json("atime", atime, obj, true);
+ JSONDecoder::decode_json("time_warp_seq", time_warp_seq, obj, true);
+ JSONDecoder::decode_json("change_attr", change_attr, obj, true);
+ JSONDecoder::decode_json("export_pin", export_pin, obj, true);
+ JSONDecoder::decode_json("client_ranges", client_ranges, inode_t<Allocator>::client_ranges_cb, obj, true);
+ JSONDecoder::decode_json("dirstat", dirstat, obj, true);
+ JSONDecoder::decode_json("rstat", rstat, obj, true);
+ JSONDecoder::decode_json("accounted_rstat", accounted_rstat, obj, true);
+ JSONDecoder::decode_json("version", version, obj, true);
+ JSONDecoder::decode_json("file_data_version", file_data_version, obj, true);
+ JSONDecoder::decode_json("xattr_version", xattr_version, obj, true);
+ JSONDecoder::decode_json("backtrace_version", backtrace_version, obj, true);
+ JSONDecoder::decode_json("stray_prior_path", stray_prior_path, obj, true);
+ JSONDecoder::decode_json("max_size_ever", max_size_ever, obj, true);
+ JSONDecoder::decode_json("quota", quota, obj, true);
+ JSONDecoder::decode_json("last_scrub_stamp", last_scrub_stamp, obj, true);
+ JSONDecoder::decode_json("last_scrub_version", last_scrub_version, obj, true);
+}
+
+template<template<typename> class Allocator>
+void inode_t<Allocator>::generate_test_instances(std::list<inode_t*>& ls)
+{
+ ls.push_back(new inode_t<Allocator>);
+ ls.push_back(new inode_t<Allocator>);
+ ls.back()->ino = 1;
+ // i am lazy.
+}
+
+template<template<typename> class Allocator>
+int inode_t<Allocator>::compare(const inode_t<Allocator> &other, bool *divergent) const
+{
+ ceph_assert(ino == other.ino);
+ *divergent = false;
+ if (version == other.version) {
+ if (rdev != other.rdev ||
+ ctime != other.ctime ||
+ btime != other.btime ||
+ mode != other.mode ||
+ uid != other.uid ||
+ gid != other.gid ||
+ nlink != other.nlink ||
+ memcmp(&dir_layout, &other.dir_layout, sizeof(dir_layout)) ||
+ layout != other.layout ||
+ old_pools != other.old_pools ||
+ size != other.size ||
+ max_size_ever != other.max_size_ever ||
+ truncate_seq != other.truncate_seq ||
+ truncate_size != other.truncate_size ||
+ truncate_from != other.truncate_from ||
+ truncate_pending != other.truncate_pending ||
+ change_attr != other.change_attr ||
+ mtime != other.mtime ||
+ atime != other.atime ||
+ time_warp_seq != other.time_warp_seq ||
+ inline_data != other.inline_data ||
+ client_ranges != other.client_ranges ||
+ !(dirstat == other.dirstat) ||
+ !(rstat == other.rstat) ||
+ !(accounted_rstat == other.accounted_rstat) ||
+ file_data_version != other.file_data_version ||
+ xattr_version != other.xattr_version ||
+ backtrace_version != other.backtrace_version) {
+ *divergent = true;
+ }
+ return 0;
+ } else if (version > other.version) {
+ *divergent = !older_is_consistent(other);
+ return 1;
+ } else {
+ ceph_assert(version < other.version);
+ *divergent = !other.older_is_consistent(*this);
+ return -1;
+ }
+}
+
+template<template<typename> class Allocator>
+bool inode_t<Allocator>::older_is_consistent(const inode_t<Allocator> &other) const
+{
+ if (max_size_ever < other.max_size_ever ||
+ truncate_seq < other.truncate_seq ||
+ time_warp_seq < other.time_warp_seq ||
+ inline_data.version < other.inline_data.version ||
+ dirstat.version < other.dirstat.version ||
+ rstat.version < other.rstat.version ||
+ accounted_rstat.version < other.accounted_rstat.version ||
+ file_data_version < other.file_data_version ||
+ xattr_version < other.xattr_version ||
+ backtrace_version < other.backtrace_version) {
+ return false;
+ }
+ return true;
+}
+
+template<template<typename> class Allocator>
+inline void encode(const inode_t<Allocator> &c, ::ceph::buffer::list &bl, uint64_t features)
+{
+ ENCODE_DUMP_PRE();
+ c.encode(bl, features);
+ ENCODE_DUMP_POST(cl);
+}
+template<template<typename> class Allocator>
+inline void decode(inode_t<Allocator> &c, ::ceph::buffer::list::const_iterator &p)
+{
+ c.decode(p);
+}
+
+// parse a map of keys/values.
+namespace qi = boost::spirit::qi;
+
+template <typename Iterator>
+struct keys_and_values
+ : qi::grammar<Iterator, std::map<std::string, std::string>()>
+{
+ keys_and_values()
+ : keys_and_values::base_type(query)
+ {
+ query = pair >> *(qi::lit(' ') >> pair);
+ pair = key >> '=' >> value;
+ key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9");
+ value = +qi::char_("a-zA-Z0-9-_.");
+ }
+ qi::rule<Iterator, std::map<std::string, std::string>()> query;
+ qi::rule<Iterator, std::pair<std::string, std::string>()> pair;
+ qi::rule<Iterator, std::string()> key, value;
+};
+
+#endif