diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-21 11:54:28 +0000 |
commit | e6918187568dbd01842d8d1d2c808ce16a894239 (patch) | |
tree | 64f88b554b444a49f656b6c656111a145cbbaa28 /src/include/cephfs | |
parent | Initial commit. (diff) | |
download | ceph-e6918187568dbd01842d8d1d2c808ce16a894239.tar.xz ceph-e6918187568dbd01842d8d1d2c808ce16a894239.zip |
Adding upstream version 18.2.2.upstream/18.2.2
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/include/cephfs')
-rw-r--r-- | src/include/cephfs/ceph_ll_client.h | 215 | ||||
-rw-r--r-- | src/include/cephfs/libcephfs.h | 2201 | ||||
-rw-r--r-- | src/include/cephfs/metrics/Types.h | 699 | ||||
-rw-r--r-- | src/include/cephfs/types.h | 970 |
4 files changed, 4085 insertions, 0 deletions
diff --git a/src/include/cephfs/ceph_ll_client.h b/src/include/cephfs/ceph_ll_client.h new file mode 100644 index 000000000..ac5b7c224 --- /dev/null +++ b/src/include/cephfs/ceph_ll_client.h @@ -0,0 +1,215 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * scalable distributed file system + * + * Copyright (C) Jeff Layton <jlayton@redhat.com> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#ifndef CEPH_CEPH_LL_CLIENT_H +#define CEPH_CEPH_LL_CLIENT_H +#include <stdint.h> + +#ifdef _WIN32 +#include "include/win32/fs_compat.h" +#endif + +#ifdef __cplusplus +extern "C" { + +class Fh; + +struct inodeno_t; +struct vinodeno_t; +typedef struct vinodeno_t vinodeno; + +#else /* __cplusplus */ + +typedef struct Fh Fh; + +typedef struct inodeno_t { + uint64_t val; +} inodeno_t; + +typedef struct _snapid_t { + uint64_t val; +} snapid_t; + +typedef struct vinodeno_t { + inodeno_t ino; + snapid_t snapid; +} vinodeno_t; + +#endif /* __cplusplus */ + +/* + * Heavily borrowed from David Howells' draft statx patchset. + * + * Since the xstat patches are still a work in progress, we borrow its data + * structures and #defines to implement ceph_getattrx. Once the xstat stuff + * has been merged we should drop this and switch over to using that instead. + */ +struct ceph_statx { + uint32_t stx_mask; + uint32_t stx_blksize; + uint32_t stx_nlink; + uint32_t stx_uid; + uint32_t stx_gid; + uint16_t stx_mode; + uint64_t stx_ino; + uint64_t stx_size; + uint64_t stx_blocks; + dev_t stx_dev; + dev_t stx_rdev; + struct timespec stx_atime; + struct timespec stx_ctime; + struct timespec stx_mtime; + struct timespec stx_btime; + uint64_t stx_version; +}; + +#define CEPH_STATX_MODE 0x00000001U /* Want/got stx_mode */ +#define CEPH_STATX_NLINK 0x00000002U /* Want/got stx_nlink */ +#define CEPH_STATX_UID 0x00000004U /* Want/got stx_uid */ +#define CEPH_STATX_GID 0x00000008U /* Want/got stx_gid */ +#define CEPH_STATX_RDEV 0x00000010U /* Want/got stx_rdev */ +#define CEPH_STATX_ATIME 0x00000020U /* Want/got stx_atime */ +#define CEPH_STATX_MTIME 0x00000040U /* Want/got stx_mtime */ +#define CEPH_STATX_CTIME 0x00000080U /* Want/got stx_ctime */ +#define CEPH_STATX_INO 0x00000100U /* Want/got stx_ino */ +#define CEPH_STATX_SIZE 0x00000200U /* Want/got stx_size */ +#define CEPH_STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ +#define CEPH_STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ +#define CEPH_STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define CEPH_STATX_VERSION 0x00001000U /* Want/got stx_version */ +#define CEPH_STATX_ALL_STATS 0x00001fffU /* All supported stats */ + +/* + * Compatibility macros until these defines make their way into glibc + */ +#ifndef AT_STATX_DONT_SYNC +#define AT_STATX_SYNC_TYPE 0x6000 +#define AT_STATX_SYNC_AS_STAT 0x0000 +#define AT_STATX_FORCE_SYNC 0x2000 +#define AT_STATX_DONT_SYNC 0x4000 /* Don't sync attributes with the server */ +#endif + +/* + * This is deprecated and just for backwards compatibility. + * Please use AT_STATX_DONT_SYNC instead. + */ +#define AT_NO_ATTR_SYNC AT_STATX_DONT_SYNC /* Deprecated */ + +/* + * The statx interfaces only allow these flags. In order to allow us to add + * others in the future, we disallow setting any that aren't recognized. + */ +#define CEPH_REQ_FLAG_MASK (AT_SYMLINK_NOFOLLOW|AT_STATX_DONT_SYNC) + +/* fallocate mode flags */ +#ifndef FALLOC_FL_KEEP_SIZE +#define FALLOC_FL_KEEP_SIZE 0x01 +#endif +#ifndef FALLOC_FL_PUNCH_HOLE +#define FALLOC_FL_PUNCH_HOLE 0x02 +#endif + +/** ceph_deleg_cb_t: Delegation recalls + * + * Called when there is an outstanding Delegation and there is conflicting + * access, either locally or via cap activity. + * @fh: open filehandle + * @priv: private info registered when delegation was acquired + */ +typedef void (*ceph_deleg_cb_t)(Fh *fh, void *priv); + +/** + * client_ino_callback_t: Inode data/metadata invalidation + * + * Called when the client wants to invalidate the cached data for a range + * in the file. + * @handle: client callback handle + * @ino: vino of inode to be invalidated + * @off: starting offset of content to be invalidated + * @len: length of region to invalidate + */ +typedef void (*client_ino_callback_t)(void *handle, vinodeno_t ino, + int64_t off, int64_t len); + +/** + * client_dentry_callback_t: Dentry invalidation + * + * Called when the client wants to purge a dentry from its cache. + * @handle: client callback handle + * @dirino: vino of directory that contains dentry to be invalidate + * @ino: vino of inode attached to dentry to be invalidated + * @name: name of dentry to be invalidated + * @len: length of @name + */ +typedef void (*client_dentry_callback_t)(void *handle, vinodeno_t dirino, + vinodeno_t ino, const char *name, + size_t len); + +/** + * client_remount_callback_t: Remount entire fs + * + * Called when the client needs to purge the dentry cache and the application + * doesn't have a way to purge an individual dentry. Mostly used for ceph-fuse + * on older kernels. + * @handle: client callback handle + */ + +typedef int (*client_remount_callback_t)(void *handle); + +/** + * client_switch_interrupt_callback_t: Lock request interrupted + * + * Called before file lock request to set the interrupt handler while waiting + * After the wait, called with "data" set to NULL pointer. + * @handle: client callback handle + * @data: opaque data passed to interrupt before call, NULL pointer after. + */ +typedef void (*client_switch_interrupt_callback_t)(void *handle, void *data); + +/** + * client_umask_callback_t: Fetch umask of actor + * + * Called when the client needs the umask of the requestor. + * @handle: client callback handle + */ +typedef mode_t (*client_umask_callback_t)(void *handle); + +/** + * client_ino_release_t: Request that application release Inode references + * + * Called when the MDS wants to trim caps and Inode records. + * @handle: client callback handle + * @ino: vino of Inode being released + */ +typedef void (*client_ino_release_t)(void *handle, vinodeno_t ino); + +/* + * The handle is an opaque value that gets passed to some callbacks. Any fields + * set to NULL will be left alone. There is no way to unregister callbacks. + */ +struct ceph_client_callback_args { + void *handle; + client_ino_callback_t ino_cb; + client_dentry_callback_t dentry_cb; + client_switch_interrupt_callback_t switch_intr_cb; + client_remount_callback_t remount_cb; + client_umask_callback_t umask_cb; + client_ino_release_t ino_release_cb; +}; + +#ifdef __cplusplus +} +#endif + +#endif /* CEPH_STATX_H */ + diff --git a/src/include/cephfs/libcephfs.h b/src/include/cephfs/libcephfs.h new file mode 100644 index 000000000..dc62698fa --- /dev/null +++ b/src/include/cephfs/libcephfs.h @@ -0,0 +1,2201 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2009-2011 New Dream Network + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_LIB_H +#define CEPH_LIB_H + +#if defined(__linux__) +#include <features.h> +#endif +#include <utime.h> +#include <sys/stat.h> +#include <sys/time.h> +#include <sys/types.h> +#include <sys/statvfs.h> +#include <sys/socket.h> +#include <stdint.h> +#include <stdbool.h> +#include <fcntl.h> +#include <dirent.h> + +#include "ceph_ll_client.h" + +#ifdef __cplusplus +namespace ceph::common { + class CephContext; +} +using CephContext = ceph::common::CephContext; +extern "C" { +#endif + +#define LIBCEPHFS_VER_MAJOR 10 +#define LIBCEPHFS_VER_MINOR 0 +#define LIBCEPHFS_VER_EXTRA 3 + +#define LIBCEPHFS_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra) +#define LIBCEPHFS_VERSION_CODE LIBCEPHFS_VERSION(LIBCEPHFS_VER_MAJOR, LIBCEPHFS_VER_MINOR, LIBCEPHFS_VER_EXTRA) + +#if __GNUC__ >= 4 + #define LIBCEPHFS_DEPRECATED __attribute__((deprecated)) + #pragma GCC diagnostic push + #pragma GCC diagnostic ignored "-Wdeprecated-declarations" +#else + #define LIBCEPHFS_DEPRECATED +#endif + +/* + * If using glibc check that file offset is 64-bit. + */ +#if defined(__GLIBC__) && !defined(__USE_FILE_OFFSET64) +# error libceph: glibc must define __USE_FILE_OFFSET64 or readdir results will be corrupted +#endif + +/* + * XXXX redeclarations from ceph_fs.h, rados.h, etc. We need more of this + * in the interface, but shouldn't be re-typing it (and using different + * C data types). + */ +#ifndef __cplusplus + +#define CEPH_INO_ROOT 1 +#define CEPH_NOSNAP ((uint64_t)(-2)) + +struct ceph_file_layout { + /* file -> object mapping */ + uint32_t fl_stripe_unit; /* stripe unit, in bytes. must be multiple + of page size. */ + uint32_t fl_stripe_count; /* over this many objects */ + uint32_t fl_object_size; /* until objects are this big, then move to + new objects */ + uint32_t fl_cas_hash; /* 0 = none; 1 = sha256 */ + + /* pg -> disk layout */ + uint32_t fl_object_stripe_unit; /* for per-object parity, if any */ + + /* object -> pg layout */ + uint32_t fl_pg_preferred; /* preferred primary for pg (-1 for none) */ + uint32_t fl_pg_pool; /* namespace, crush rule, rep level */ +} __attribute__ ((packed)); + +struct CephContext; +#endif /* ! __cplusplus */ + +struct UserPerm; +typedef struct UserPerm UserPerm; + +struct Inode; +typedef struct Inode Inode; + +struct ceph_mount_info; +struct ceph_dir_result; + +// user supplied key,value pair to be associated with a snapshot. +// callers can supply an array of this struct via ceph_mksnap(). +struct snap_metadata { + const char *key; + const char *value; +}; + +struct snap_info { + uint64_t id; + size_t nr_snap_metadata; + struct snap_metadata *snap_metadata; +}; + +struct ceph_snapdiff_entry_t { + struct dirent dir_entry; + uint64_t snapid; //should be snapid_t but prefer not to exposure it +}; + +/* setattr mask bits (up to an int in size) */ +#ifndef CEPH_SETATTR_MODE +#define CEPH_SETATTR_MODE (1 << 0) +#define CEPH_SETATTR_UID (1 << 1) +#define CEPH_SETATTR_GID (1 << 2) +#define CEPH_SETATTR_MTIME (1 << 3) +#define CEPH_SETATTR_ATIME (1 << 4) +#define CEPH_SETATTR_SIZE (1 << 5) +#define CEPH_SETATTR_CTIME (1 << 6) +#define CEPH_SETATTR_MTIME_NOW (1 << 7) +#define CEPH_SETATTR_ATIME_NOW (1 << 8) +#define CEPH_SETATTR_BTIME (1 << 9) +#define CEPH_SETATTR_KILL_SGUID (1 << 10) +#define CEPH_SETATTR_FSCRYPT_AUTH (1 << 11) +#define CEPH_SETATTR_FSCRYPT_FILE (1 << 12) +#define CEPH_SETATTR_KILL_SUID (1 << 13) +#define CEPH_SETATTR_KILL_SGID (1 << 14) +#endif + +/* define error codes for the mount function*/ +# define CEPHFS_ERROR_MON_MAP_BUILD 1000 +# define CEPHFS_ERROR_NEW_CLIENT 1002 +# define CEPHFS_ERROR_MESSENGER_START 1003 + +/** + * Create a UserPerm credential object. + * + * Some calls (most notably, the ceph_ll_* ones), take a credential object + * that represents the credentials that the calling program is using. This + * function creates a new credential object for this purpose. Returns a + * pointer to the object, or NULL if it can't be allocated. + * + * Note that the gidlist array is used directly and is not copied. It must + * remain valid over the lifetime of the created UserPerm object. + * + * @param uid uid to be used + * @param gid gid to be used + * @param ngids number of gids in supplemental grouplist + * @param gidlist array of gid_t's in the list of groups + */ +UserPerm *ceph_userperm_new(uid_t uid, gid_t gid, int ngids, gid_t *gidlist); + +/** + * Destroy a UserPerm credential object. + * + * @param perm pointer to object to be destroyed + * + * Currently this just frees the object. Note that the gidlist array is not + * freed. The caller must do so if it's necessary. + */ +void ceph_userperm_destroy(UserPerm *perm); + +/** + * Get a pointer to the default UserPerm object for the mount. + * + * @param cmount the mount info handle + * + * Every cmount has a default set of credentials. This returns a pointer to + * that object. + * + * Unlike with ceph_userperm_new, this object should not be freed. + */ +struct UserPerm *ceph_mount_perms(struct ceph_mount_info *cmount); + +/** + * Set cmount's default permissions + * + * @param cmount the mount info handle + * @param perm permissions to set to default for mount + * + * Every cmount has a default set of credentials. This does a deep copy of + * the given permissions to the ones in the cmount. Must be done after + * ceph_init but before ceph_mount. + * + * Returns 0 on success, and -EISCONN if the cmount is already mounted. + */ +int ceph_mount_perms_set(struct ceph_mount_info *cmount, UserPerm *perm); + +/** + * @defgroup libcephfs_h_init Setup and Teardown + * These are the first and last functions that should be called + * when using libcephfs. + * + * @{ + */ + +/** + * Get the version of libcephfs. + * + * The version number is major.minor.patch. + * + * @param major where to store the major version number + * @param minor where to store the minor version number + * @param patch where to store the extra version number + */ +const char *ceph_version(int *major, int *minor, int *patch); + +/** + * Create a mount handle for interacting with Ceph. All libcephfs + * functions operate on a mount info handle. + * + * @param cmount the mount info handle to initialize + * @param id the id of the client. This can be a unique id that identifies + * this client, and will get appended onto "client.". Callers can + * pass in NULL, and the id will be the process id of the client. + * @returns 0 on success, negative error code on failure + */ +int ceph_create(struct ceph_mount_info **cmount, const char * const id); + +/** + * Create a mount handle from a CephContext, which holds the configuration + * for the ceph cluster. A CephContext can be acquired from an existing ceph_mount_info + * handle, using the @ref ceph_get_mount_context call. Note that using the same CephContext + * for two different mount handles results in the same client entity id being used. + * + * @param cmount the mount info handle to initialize + * @param conf reuse this pre-existing CephContext config + * @returns 0 on success, negative error code on failure + */ +#ifdef __cplusplus +int ceph_create_with_context(struct ceph_mount_info **cmount, CephContext *conf); +#else +int ceph_create_with_context(struct ceph_mount_info **cmount, struct CephContext *conf); +#endif + +#ifndef VOIDPTR_RADOS_T +#define VOIDPTR_RADOS_T +typedef void *rados_t; +#endif // VOIDPTR_RADOS_T + +/** + * Create a mount handle from a rados_t, for using libcephfs in the + * same process as librados. + * + * @param cmount the mount info handle to initialize + * @param cluster reference to already-initialized librados handle + * @returns 0 on success, negative error code on failure + */ +int ceph_create_from_rados(struct ceph_mount_info **cmount, rados_t cluster); + +/** + * Initialize the filesystem client (but do not mount the filesystem yet) + * + * @returns 0 on success, negative error code on failure + */ +int ceph_init(struct ceph_mount_info *cmount); + +/** + * Optionally set which filesystem to mount, before calling mount. + * + * An error will be returned if this libcephfs instance is already + * mounted. This function is an alternative to setting the global + * client_fs setting. Using this function enables multiple libcephfs + * instances in the same process to mount different filesystems. + * + * The filesystem name is *not* validated in this function. That happens + * during mount(), where an ENOENT error will result if a non-existent + * filesystem was specified here. + * + * @param cmount the mount info handle + * @returns 0 on success, negative error code on failure + */ +int ceph_select_filesystem(struct ceph_mount_info *cmount, const char *fs_name); + + +/** + * Perform a mount using the path for the root of the mount. + * + * It is optional to call ceph_init before this. If ceph_init has + * not already been called, it will be called in the course of this operation. + * + * @param cmount the mount info handle + * @param root the path for the root of the mount. This can be an existing + * directory within the ceph cluster, but most likely it will + * be "/". Passing in NULL is equivalent to "/". + * @returns 0 on success, negative error code on failure + */ +int ceph_mount(struct ceph_mount_info *cmount, const char *root); + +/** + * Return cluster ID for a mounted ceph filesystem + * + * Every ceph filesystem has a filesystem ID associated with it. This + * function returns that value. If the ceph_mount_info does not refer to a + * mounted filesystem, this returns a negative error code. + */ +int64_t ceph_get_fs_cid(struct ceph_mount_info *cmount); + +/** + * Execute a management command remotely on an MDS. + * + * Must have called ceph_init or ceph_mount before calling this. + * + * @param mds_spec string representing rank, MDS name, GID or '*' + * @param cmd array of null-terminated strings + * @param cmdlen length of cmd array + * @param inbuf non-null-terminated input data to command + * @param inbuflen length in octets of inbuf + * @param outbuf populated with pointer to buffer (command output data) + * @param outbuflen length of allocated outbuf + * @param outs populated with pointer to buffer (command error strings) + * @param outslen length of allocated outs + * + * @return 0 on success, negative error code on failure + * + */ +int ceph_mds_command(struct ceph_mount_info *cmount, + const char *mds_spec, + const char **cmd, + size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen); + +/** + * Free a buffer, such as those used for output arrays from ceph_mds_command + */ +void ceph_buffer_free(char *buf); + +/** + * Unmount a mount handle. + * + * @param cmount the mount handle + * @return 0 on success, negative error code on failure + */ +int ceph_unmount(struct ceph_mount_info *cmount); + +/** + * Abort mds connections + * + * @param cmount the mount handle + * @return 0 on success, negative error code on failure + */ +int ceph_abort_conn(struct ceph_mount_info *cmount); + +/** + * Destroy the mount handle. + * + * The handle should not be mounted. This should be called on completion of + * all libcephfs functions. + * + * @param cmount the mount handle + * @return 0 on success, negative error code on failure. + */ +int ceph_release(struct ceph_mount_info *cmount); + +/** + * Deprecated. Unmount and destroy the ceph mount handle. This should be + * called on completion of all libcephfs functions. + * + * Equivalent to ceph_unmount() + ceph_release() without error handling. + * + * @param cmount the mount handle to shutdown + */ +void ceph_shutdown(struct ceph_mount_info *cmount); + +/** + * Return associated client addresses + * + * @param cmount the mount handle + * @param addrs the output addresses + * @returns 0 on success, a negative error code on failure + * @note the returned addrs should be free by the caller + */ +int ceph_getaddrs(struct ceph_mount_info *cmount, char** addrs); + +/** + * Get a global id for current instance + * + * The handle should not be mounted. This should be called on completion of + * all libcephfs functions. + * + * @param cmount the mount handle + * @returns instance global id + */ +uint64_t ceph_get_instance_id(struct ceph_mount_info *cmount); + +/** + * Extract the CephContext from the mount point handle. + * + * @param cmount the ceph mount handle to get the context from. + * @returns the CephContext associated with the mount handle. + */ +#ifdef __cplusplus +CephContext *ceph_get_mount_context(struct ceph_mount_info *cmount); +#else +struct CephContext *ceph_get_mount_context(struct ceph_mount_info *cmount); +#endif +/* + * Check mount status. + * + * Return non-zero value if mounted. Otherwise, zero. + */ +int ceph_is_mounted(struct ceph_mount_info *cmount); + +/** @} init */ + +/** + * @defgroup libcephfs_h_config Config + * Functions for manipulating the Ceph configuration at runtime. + * + * @{ + */ + +/** + * Load the ceph configuration from the specified config file. + * + * @param cmount the mount handle to load the configuration into. + * @param path_list the configuration file path + * @returns 0 on success, negative error code on failure + */ +int ceph_conf_read_file(struct ceph_mount_info *cmount, const char *path_list); + +/** + * Parse the command line arguments and load the configuration parameters. + * + * @param cmount the mount handle to load the configuration parameters into. + * @param argc count of the arguments in argv + * @param argv the argument list + * @returns 0 on success, negative error code on failure + */ +int ceph_conf_parse_argv(struct ceph_mount_info *cmount, int argc, const char **argv); + +/** + * Configure the cluster handle based on an environment variable + * + * The contents of the environment variable are parsed as if they were + * Ceph command line options. If var is NULL, the CEPH_ARGS + * environment variable is used. + * + * @pre ceph_mount() has not been called on the handle + * + * @note BUG: this is not threadsafe - it uses a static buffer + * + * @param cmount handle to configure + * @param var name of the environment variable to read + * @returns 0 on success, negative error code on failure + */ +int ceph_conf_parse_env(struct ceph_mount_info *cmount, const char *var); + +/** Sets a configuration value from a string. + * + * @param cmount the mount handle to set the configuration value on + * @param option the configuration option to set + * @param value the value of the configuration option to set + * + * @returns 0 on success, negative error code otherwise. + */ +int ceph_conf_set(struct ceph_mount_info *cmount, const char *option, const char *value); + +/** Set mount timeout. + * + * @param cmount mount handle to set the configuration value on + * @param timeout mount timeout interval + * + * @returns 0 on success, negative error code otherwise. + */ +int ceph_set_mount_timeout(struct ceph_mount_info *cmount, uint32_t timeout); + +/** + * Gets the configuration value as a string. + * + * @param cmount the mount handle to set the configuration value on + * @param option the config option to get + * @param buf the buffer to fill with the value + * @param len the length of the buffer. + * @returns the size of the buffer filled in with the value, or negative error code on failure + */ +int ceph_conf_get(struct ceph_mount_info *cmount, const char *option, char *buf, size_t len); + +/** @} config */ + +/** + * @defgroup libcephfs_h_fsops File System Operations. + * Functions for getting/setting file system wide information specific to a particular + * mount handle. + * + * @{ + */ + +/** + * Perform a statfs on the ceph file system. This call fills in file system wide statistics + * into the passed in buffer. + * + * @param cmount the ceph mount handle to use for performing the statfs. + * @param path can be any path within the mounted filesystem + * @param stbuf the file system statistics filled in by this function. + * @return 0 on success, negative error code otherwise. + */ +int ceph_statfs(struct ceph_mount_info *cmount, const char *path, struct statvfs *stbuf); + +/** + * Synchronize all filesystem data to persistent media. + * + * @param cmount the ceph mount handle to use for performing the sync_fs. + * @returns 0 on success or negative error code on failure. + */ +int ceph_sync_fs(struct ceph_mount_info *cmount); + +/** + * Get the current working directory. + * + * @param cmount the ceph mount to get the current working directory for. + * @returns the path to the current working directory + */ +const char* ceph_getcwd(struct ceph_mount_info *cmount); + +/** + * Change the current working directory. + * + * @param cmount the ceph mount to change the current working directory for. + * @param path the path to the working directory to change into. + * @returns 0 on success, negative error code otherwise. + */ +int ceph_chdir(struct ceph_mount_info *cmount, const char *path); + +/** @} fsops */ + +/** + * @defgroup libcephfs_h_dir Directory Operations. + * Functions for manipulating and listing directories. + * + * @{ + */ + +/** + * Open the given directory. + * + * @param cmount the ceph mount handle to use to open the directory + * @param name the path name of the directory to open. Must be either an absolute path + * or a path relative to the current working directory. + * @param dirpp the directory result pointer structure to fill in. + * @returns 0 on success or negative error code otherwise. + */ +int ceph_opendir(struct ceph_mount_info *cmount, const char *name, struct ceph_dir_result **dirpp); + +/** + * Open a directory referred to by a file descriptor + * + * @param cmount the ceph mount handle to use to open the directory + * @param dirfd open file descriptor for the directory + * @param dirpp the directory result pointer structure to fill in + * @returns 0 on success or negative error code otherwise + */ +int ceph_fdopendir(struct ceph_mount_info *cmount, int dirfd, struct ceph_dir_result **dirpp); + +/** + * Close the open directory. + * + * @param cmount the ceph mount handle to use for closing the directory + * @param dirp the directory result pointer (set by ceph_opendir) to close + * @returns 0 on success or negative error code on failure. + */ +int ceph_closedir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp); + +/** + * Get the next entry in an open directory. + * + * @param cmount the ceph mount handle to use for performing the readdir. + * @param dirp the directory stream pointer from an opendir holding the state of the + * next entry to return. + * @returns the next directory entry or NULL if at the end of the directory (or the directory + * is empty. This pointer should not be freed by the caller, and is only safe to + * access between return and the next call to ceph_readdir or ceph_closedir. + */ +struct dirent * ceph_readdir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp); + +/** + * A safe version of ceph_readdir, where the directory entry struct is allocated by the caller. + * + * @param cmount the ceph mount handle to use for performing the readdir. + * @param dirp the directory stream pointer from an opendir holding the state of the + * next entry to return. + * @param de the directory entry pointer filled in with the next directory entry of the dirp state. + * @returns 1 if the next entry was filled in, 0 if the end of the directory stream was reached, + * and a negative error code on failure. + */ +int ceph_readdir_r(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, struct dirent *de); + +/** + * A safe version of ceph_readdir that also returns the file statistics (readdir+stat). + * + * @param cmount the ceph mount handle to use for performing the readdir_plus_r. + * @param dirp the directory stream pointer from an opendir holding the state of the + * next entry to return. + * @param de the directory entry pointer filled in with the next directory entry of the dirp state. + * @param stx the stats of the file/directory of the entry returned + * @param want mask showing desired inode attrs for returned entry + * @param flags bitmask of flags to use when filling out attributes + * @param out optional returned Inode argument. If non-NULL, then a reference will be taken on + * the inode and the pointer set on success. + * @returns 1 if the next entry was filled in, 0 if the end of the directory stream was reached, + * and a negative error code on failure. + */ +int ceph_readdirplus_r(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, struct dirent *de, + struct ceph_statx *stx, unsigned want, unsigned flags, struct Inode **out); + +struct ceph_snapdiff_info +{ + struct ceph_mount_info* cmount; + struct ceph_dir_result* dir1; // primary dir entry to build snapdiff for. + struct ceph_dir_result* dir_aux; // aux dir entry to identify the second snapshot. + // Can point to the parent dir entry if entry-in-question + // doesn't exist in the second snapshot +}; + +/** + * Opens snapdiff stream to get snapshots delta (aka snapdiff). + * + * @param cmount the ceph mount handle to use for snapdiff retrieval. + * @param root_path root path for snapshots-in-question + * @param rel_path subpath under the root to build delta for + * @param snap1 the first snapshot name + * @param snap2 the second snapshot name + * @param out resulting snapdiff stream handle to be used for snapdiff results + retrieval via ceph_readdir_snapdiff + * @returns 0 on success and negative error code otherwise + */ +int ceph_open_snapdiff(struct ceph_mount_info* cmount, + const char* root_path, + const char* rel_path, + const char* snap1, + const char* snap2, + struct ceph_snapdiff_info* out); +/** + * Get the next snapshot delta entry. + * + * @param info snapdiff stream handle opened via ceph_open_snapdiff() + * @param out the next snapdiff entry which includes directory entry and the + * entry's snapshot id - later one for emerged/existing entry or + * former snapshot id for the removed entry. + * @returns >0 on success, 0 if no more entries in the stream and negative + * error code otherwise + */ +int ceph_readdir_snapdiff(struct ceph_snapdiff_info* snapdiff, + struct ceph_snapdiff_entry_t* out); +/** + * Close snapdiff stream. + * + * @param info snapdiff stream handle opened via ceph_open_snapdiff() + * @returns 0 on success and negative error code otherwise + */ +int ceph_close_snapdiff(struct ceph_snapdiff_info* snapdiff); + +/** + * Gets multiple directory entries. + * + * @param cmount the ceph mount handle to use for performing the getdents. + * @param dirp the directory stream pointer from an opendir holding the state of the + * next entry/entries to return. + * @param name an array of struct dirent that gets filled in with the to fill returned directory entries into. + * @param buflen the length of the buffer, which should be the number of dirent structs * sizeof(struct dirent). + * @returns the length of the buffer that was filled in, will always be multiples of sizeof(struct dirent), or a + * negative error code. If the buffer is not large enough for a single entry, -ERANGE is returned. + */ +int ceph_getdents(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, char *name, int buflen); + +/** + * Gets multiple directory names. + * + * @param cmount the ceph mount handle to use for performing the getdents. + * @param dirp the directory stream pointer from an opendir holding the state of the + * next entry/entries to return. + * @param name a buffer to fill in with directory entry names. + * @param buflen the length of the buffer that can be filled in. + * @returns the length of the buffer filled in with entry names, or a negative error code on failure. + * If the buffer isn't large enough for a single entry, -ERANGE is returned. + */ +int ceph_getdnames(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, char *name, int buflen); + +/** + * Rewind the directory stream to the beginning of the directory. + * + * @param cmount the ceph mount handle to use for performing the rewinddir. + * @param dirp the directory stream pointer to rewind. + */ +void ceph_rewinddir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp); + +/** + * Get the current position of a directory stream. + * + * @param cmount the ceph mount handle to use for performing the telldir. + * @param dirp the directory stream pointer to get the current position of. + * @returns the position of the directory stream. Note that the offsets returned + * by ceph_telldir do not have a particular order (cannot be compared with + * inequality). + */ +int64_t ceph_telldir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp); + +/** + * Move the directory stream to a position specified by the given offset. + * + * @param cmount the ceph mount handle to use for performing the seekdir. + * @param dirp the directory stream pointer to move. + * @param offset the position to move the directory stream to. This offset should be + * a value returned by telldir. Note that this value does not refer to the nth + * entry in a directory, and can not be manipulated with plus or minus. + */ +void ceph_seekdir(struct ceph_mount_info *cmount, struct ceph_dir_result *dirp, int64_t offset); + +/** + * Create a directory. + * + * @param cmount the ceph mount handle to use for making the directory. + * @param path the path of the directory to create. This must be either an + * absolute path or a relative path off of the current working directory. + * @param mode the permissions the directory should have once created. + * @returns 0 on success or a negative return code on error. + */ +int ceph_mkdir(struct ceph_mount_info *cmount, const char *path, mode_t mode); + +/** + * Create a directory relative to a file descriptor + * + * @param cmount the ceph mount handle to use for making the directory. + * @param dirfd open file descriptor for a directory (or CEPHFS_AT_FDCWD) + * @param relpath the path of the directory to create. + * @param mode the permissions the directory should have once created. + * @returns 0 on success or a negative return code on error. + */ +int ceph_mkdirat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, mode_t mode); + +/** + * Create a snapshot + * + * @param cmount the ceph mount handle to use for making the directory. + * @param path the path of the directory to create snapshot. This must be either an + * absolute path or a relative path off of the current working directory. + * @param name snapshot name + * @param mode the permissions the directory should have once created. + * @param snap_metadata array of snap metadata structs + * @param nr_snap_metadata number of snap metadata struct entries + * @returns 0 on success or a negative return code on error. + */ +int ceph_mksnap(struct ceph_mount_info *cmount, const char *path, const char *name, + mode_t mode, struct snap_metadata *snap_metadata, size_t nr_snap_metadata); + +/** + * Remove a snapshot + * + * @param cmount the ceph mount handle to use for making the directory. + * @param path the path of the directory to create snapshot. This must be either an + * absolute path or a relative path off of the current working directory. + * @param name snapshot name + * @returns 0 on success or a negative return code on error. + */ +int ceph_rmsnap(struct ceph_mount_info *cmount, const char *path, const char *name); + +/** + * Create multiple directories at once. + * + * @param cmount the ceph mount handle to use for making the directories. + * @param path the full path of directories and sub-directories that should + * be created. + * @param mode the permissions the directory should have once created. + * @returns 0 on success or a negative return code on error. + */ +int ceph_mkdirs(struct ceph_mount_info *cmount, const char *path, mode_t mode); + +/** + * Remove a directory. + * + * @param cmount the ceph mount handle to use for removing directories. + * @param path the path of the directory to remove. + * @returns 0 on success or a negative return code on error. + */ +int ceph_rmdir(struct ceph_mount_info *cmount, const char *path); + +/** @} dir */ + +/** + * @defgroup libcephfs_h_links Links and Link Handling. + * Functions for creating and manipulating hard links and symbolic inks. + * + * @{ + */ + +/** + * Create a link. + * + * @param cmount the ceph mount handle to use for creating the link. + * @param existing the path to the existing file/directory to link to. + * @param newname the path to the new file/directory to link from. + * @returns 0 on success or a negative return code on error. + */ +int ceph_link(struct ceph_mount_info *cmount, const char *existing, const char *newname); + +/** + * Read a symbolic link. + * + * @param cmount the ceph mount handle to use for creating the link. + * @param path the path to the symlink to read + * @param buf the buffer to hold the path of the file that the symlink points to. + * @param size the length of the buffer + * @returns number of bytes copied on success or negative error code on failure + */ +int ceph_readlink(struct ceph_mount_info *cmount, const char *path, char *buf, int64_t size); + +/** + * Read a symbolic link relative to a file descriptor + * + * @param cmount the ceph mount handle to use for creating the link. + * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD) + * @param relpath the path to the symlink to read + * @param buf the buffer to hold the path of the file that the symlink points to. + * @param size the length of the buffer + * @returns number of bytes copied on success or negative error code on failure + */ +int ceph_readlinkat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, char *buf, + int64_t size); + +/** + * Creates a symbolic link. + * + * @param cmount the ceph mount handle to use for creating the symbolic link. + * @param existing the path to the existing file/directory to link to. + * @param newname the path to the new file/directory to link from. + * @returns 0 on success or a negative return code on failure. + */ +int ceph_symlink(struct ceph_mount_info *cmount, const char *existing, const char *newname); + +/** + * Creates a symbolic link relative to a file descriptor + * + * @param cmount the ceph mount handle to use for creating the symbolic link. + * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD) + * @param existing the path to the existing file/directory to link to. + * @param newname the path to the new file/directory to link from. + * @returns 0 on success or a negative return code on failure. + */ +int ceph_symlinkat(struct ceph_mount_info *cmount, const char *existing, int dirfd, + const char *newname); + +/** @} links */ + +/** + * @defgroup libcephfs_h_files File manipulation and handling. + * Functions for creating and manipulating files. + * + * @{ + */ + + +/** + * Checks if deleting a file, link or directory is allowed. + * + * @param cmount the ceph mount handle to use. + * @param path the path of the file, link or directory. + * @returns 0 on success or negative error code on failure. + */ +int ceph_may_delete(struct ceph_mount_info *cmount, const char *path); + +/** + * Removes a file, link, or symbolic link. If the file/link has multiple links to it, the + * file will not disappear from the namespace until all references to it are removed. + * + * @param cmount the ceph mount handle to use for performing the unlink. + * @param path the path of the file or link to unlink. + * @returns 0 on success or negative error code on failure. + */ +int ceph_unlink(struct ceph_mount_info *cmount, const char *path); + +/** + * Removes a file, link, or symbolic link relative to a file descriptor. + * If the file/link has multiple links to it, the file will not + * disappear from the namespace until all references to it are removed. + * + * @param cmount the ceph mount handle to use for performing the unlink. + * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD) + * @param relpath the path of the file or link to unlink. + * @param flags bitfield that can be used to set AT_* modifier flags (only AT_REMOVEDIR) + * @returns 0 on success or negative error code on failure. + */ +int ceph_unlinkat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, int flags); + +/** + * Rename a file or directory. + * + * @param cmount the ceph mount handle to use for performing the rename. + * @param from the path to the existing file or directory. + * @param to the new name of the file or directory + * @returns 0 on success or negative error code on failure. + */ +int ceph_rename(struct ceph_mount_info *cmount, const char *from, const char *to); + +/** + * Get an open file's extended statistics and attributes. + * + * @param cmount the ceph mount handle to use for performing the stat. + * @param fd the file descriptor of the file to get statistics of. + * @param stx the ceph_statx struct that will be filled in with the file's statistics. + * @param want bitfield of CEPH_STATX_* flags showing designed attributes + * @param flags bitfield that can be used to set AT_* modifier flags (AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC, AT_STATX_DONT_SYNC and AT_SYMLINK_NOFOLLOW) + * @returns 0 on success or negative error code on failure. + */ +int ceph_fstatx(struct ceph_mount_info *cmount, int fd, struct ceph_statx *stx, + unsigned int want, unsigned int flags); + +/** + * Get attributes of a file relative to a file descriptor + * + * @param cmount the ceph mount handle to use for performing the stat. + * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD) + * @param relpath to the file/directory to get statistics of + * @param stx the ceph_statx struct that will be filled in with the file's statistics. + * @param want bitfield of CEPH_STATX_* flags showing designed attributes + * @param flags bitfield that can be used to set AT_* modifier flags (AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC, AT_STATX_DONT_SYNC and AT_SYMLINK_NOFOLLOW) + * @returns 0 on success or negative error code on failure. + */ +int ceph_statxat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, + struct ceph_statx *stx, unsigned int want, unsigned int flags); + +/** + * Get a file's extended statistics and attributes. + * + * @param cmount the ceph mount handle to use for performing the stat. + * @param path the file or directory to get the statistics of. + * @param stx the ceph_statx struct that will be filled in with the file's statistics. + * @param want bitfield of CEPH_STATX_* flags showing designed attributes + * @param flags bitfield that can be used to set AT_* modifier flags (AT_STATX_SYNC_AS_STAT, AT_STATX_FORCE_SYNC, AT_STATX_DONT_SYNC and AT_SYMLINK_NOFOLLOW) + * @returns 0 on success or negative error code on failure. + */ +int ceph_statx(struct ceph_mount_info *cmount, const char *path, struct ceph_statx *stx, + unsigned int want, unsigned int flags); + +/** + * Get a file's statistics and attributes. + * + * ceph_stat() is deprecated, use ceph_statx() instead. + * + * @param cmount the ceph mount handle to use for performing the stat. + * @param path the file or directory to get the statistics of. + * @param stbuf the stat struct that will be filled in with the file's statistics. + * @returns 0 on success or negative error code on failure. + */ +int ceph_stat(struct ceph_mount_info *cmount, const char *path, struct stat *stbuf) + LIBCEPHFS_DEPRECATED; + +/** + * Get a file's statistics and attributes, without following symlinks. + * + * ceph_lstat() is deprecated, use ceph_statx(.., AT_SYMLINK_NOFOLLOW) instead. + * + * @param cmount the ceph mount handle to use for performing the stat. + * @param path the file or directory to get the statistics of. + * @param stbuf the stat struct that will be filled in with the file's statistics. + * @returns 0 on success or negative error code on failure. + */ +int ceph_lstat(struct ceph_mount_info *cmount, const char *path, struct stat *stbuf) + LIBCEPHFS_DEPRECATED; + +/** + * Get the open file's statistics. + * + * ceph_fstat() is deprecated, use ceph_fstatx() instead. + * + * @param cmount the ceph mount handle to use for performing the fstat. + * @param fd the file descriptor of the file to get statistics of. + * @param stbuf the stat struct of the file's statistics, filled in by the + * function. + * @returns 0 on success or a negative error code on failure + */ +int ceph_fstat(struct ceph_mount_info *cmount, int fd, struct stat *stbuf) + LIBCEPHFS_DEPRECATED; + +/** + * Set a file's attributes. + * + * @param cmount the ceph mount handle to use for performing the setattr. + * @param relpath the path to the file/directory to set the attributes of. + * @param stx the statx struct that must include attribute values to set on the file. + * @param mask a mask of all the CEPH_SETATTR_* values that have been set in the statx struct. + * @param flags mask of AT_* flags (only AT_ATTR_NOFOLLOW is respected for now) + * @returns 0 on success or negative error code on failure. + */ +int ceph_setattrx(struct ceph_mount_info *cmount, const char *relpath, struct ceph_statx *stx, int mask, int flags); + +/** + * Set a file's attributes (extended version). + * + * @param cmount the ceph mount handle to use for performing the setattr. + * @param fd the fd of the open file/directory to set the attributes of. + * @param stx the statx struct that must include attribute values to set on the file. + * @param mask a mask of all the stat values that have been set on the stat struct. + * @returns 0 on success or negative error code on failure. + */ +int ceph_fsetattrx(struct ceph_mount_info *cmount, int fd, struct ceph_statx *stx, int mask); + +/** + * Change the mode bits (permissions) of a file/directory. + * + * @param cmount the ceph mount handle to use for performing the chmod. + * @param path the path to the file/directory to change the mode bits on. + * @param mode the new permissions to set. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_chmod(struct ceph_mount_info *cmount, const char *path, mode_t mode); + +/** + * Change the mode bits (permissions) of a file/directory. If the path is a + * symbolic link, it's not de-referenced. + * + * @param cmount the ceph mount handle to use for performing the chmod. + * @param path the path of file/directory to change the mode bits on. + * @param mode the new permissions to set. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_lchmod(struct ceph_mount_info *cmount, const char *path, mode_t mode); + +/** + * Change the mode bits (permissions) of an open file. + * + * @param cmount the ceph mount handle to use for performing the chmod. + * @param fd the open file descriptor to change the mode bits on. + * @param mode the new permissions to set. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_fchmod(struct ceph_mount_info *cmount, int fd, mode_t mode); + +/** + * Change the mode bits (permissions) of a file relative to a file descriptor. + * + * @param cmount the ceph mount handle to use for performing the chown. + * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD) + * @param relpath the relpath of the file/directory to change the ownership of. + * @param mode the new permissions to set. + * @param flags bitfield that can be used to set AT_* modifier flags (AT_SYMLINK_NOFOLLOW) + * @returns 0 on success or negative error code on failure. + */ +int ceph_chmodat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, + mode_t mode, int flags); + +/** + * Change the ownership of a file/directory. + * + * @param cmount the ceph mount handle to use for performing the chown. + * @param path the path of the file/directory to change the ownership of. + * @param uid the user id to set on the file/directory. + * @param gid the group id to set on the file/directory. + * @returns 0 on success or negative error code on failure. + */ +int ceph_chown(struct ceph_mount_info *cmount, const char *path, int uid, int gid); + +/** + * Change the ownership of a file from an open file descriptor. + * + * @param cmount the ceph mount handle to use for performing the chown. + * @param fd the fd of the open file/directory to change the ownership of. + * @param uid the user id to set on the file/directory. + * @param gid the group id to set on the file/directory. + * @returns 0 on success or negative error code on failure. + */ +int ceph_fchown(struct ceph_mount_info *cmount, int fd, int uid, int gid); + +/** + * Change the ownership of a file/directory, don't follow symlinks. + * + * @param cmount the ceph mount handle to use for performing the chown. + * @param path the path of the file/directory to change the ownership of. + * @param uid the user id to set on the file/directory. + * @param gid the group id to set on the file/directory. + * @returns 0 on success or negative error code on failure. + */ +int ceph_lchown(struct ceph_mount_info *cmount, const char *path, int uid, int gid); + +/** + * Change the ownership of a file/directory releative to a file descriptor. + * + * @param cmount the ceph mount handle to use for performing the chown. + * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD) + * @param relpath the relpath of the file/directory to change the ownership of. + * @param uid the user id to set on the file/directory. + * @param gid the group id to set on the file/directory. + * @param flags bitfield that can be used to set AT_* modifier flags (AT_SYMLINK_NOFOLLOW) + * @returns 0 on success or negative error code on failure. + */ +int ceph_chownat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, + uid_t uid, gid_t gid, int flags); + +/** + * Change file/directory last access and modification times. + * + * @param cmount the ceph mount handle to use for performing the utime. + * @param path the path to the file/directory to set the time values of. + * @param buf holding the access and modification times to set on the file. + * @returns 0 on success or negative error code on failure. + */ +int ceph_utime(struct ceph_mount_info *cmount, const char *path, struct utimbuf *buf); + +/** + * Change file/directory last access and modification times. + * + * @param cmount the ceph mount handle to use for performing the utime. + * @param fd the fd of the open file/directory to set the time values of. + * @param buf holding the access and modification times to set on the file. + * @returns 0 on success or negative error code on failure. + */ +int ceph_futime(struct ceph_mount_info *cmount, int fd, struct utimbuf *buf); + +/** + * Change file/directory last access and modification times. + * + * @param cmount the ceph mount handle to use for performing the utime. + * @param path the path to the file/directory to set the time values of. + * @param times holding the access and modification times to set on the file. + * @returns 0 on success or negative error code on failure. + */ +int ceph_utimes(struct ceph_mount_info *cmount, const char *path, struct timeval times[2]); + +/** + * Change file/directory last access and modification times, don't follow symlinks. + * + * @param cmount the ceph mount handle to use for performing the utime. + * @param path the path to the file/directory to set the time values of. + * @param times holding the access and modification times to set on the file. + * @returns 0 on success or negative error code on failure. + */ +int ceph_lutimes(struct ceph_mount_info *cmount, const char *path, struct timeval times[2]); + +/** + * Change file/directory last access and modification times. + * + * @param cmount the ceph mount handle to use for performing the utime. + * @param fd the fd of the open file/directory to set the time values of. + * @param times holding the access and modification times to set on the file. + * @returns 0 on success or negative error code on failure. + */ +int ceph_futimes(struct ceph_mount_info *cmount, int fd, struct timeval times[2]); + +/** + * Change file/directory last access and modification times. + * + * @param cmount the ceph mount handle to use for performing the utime. + * @param fd the fd of the open file/directory to set the time values of. + * @param times holding the access and modification times to set on the file. + * @returns 0 on success or negative error code on failure. + */ +int ceph_futimens(struct ceph_mount_info *cmount, int fd, struct timespec times[2]); + +/** + * Change file/directory last access and modification times relative + * to a file descriptor. + * + * @param cmount the ceph mount handle to use for performing the utime. + * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD) + * @param relpath the relpath of the file/directory to change the ownership of. + * @param dirfd the fd of the open file/directory to set the time values of. + * @param times holding the access and modification times to set on the file. + * @param flags bitfield that can be used to set AT_* modifier flags (AT_SYMLINK_NOFOLLOW) + * @returns 0 on success or negative error code on failure. + */ +int ceph_utimensat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, + struct timespec times[2], int flags); + +/** + * Apply or remove an advisory lock. + * + * @param cmount the ceph mount handle to use for performing the lock. + * @param fd the open file descriptor to change advisory lock. + * @param operation the advisory lock operation to be performed on the file + * descriptor among LOCK_SH (shared lock), LOCK_EX (exclusive lock), + * or LOCK_UN (remove lock). The LOCK_NB value can be ORed to perform a + * non-blocking operation. + * @param owner the user-supplied owner identifier (an arbitrary integer) + * @returns 0 on success or negative error code on failure. + */ +int ceph_flock(struct ceph_mount_info *cmount, int fd, int operation, + uint64_t owner); + +/** + * Truncate the file to the given size. If this operation causes the + * file to expand, the empty bytes will be filled in with zeros. + * + * @param cmount the ceph mount handle to use for performing the truncate. + * @param path the path to the file to truncate. + * @param size the new size of the file. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_truncate(struct ceph_mount_info *cmount, const char *path, int64_t size); + +/** + * Make a block or character special file. + * + * @param cmount the ceph mount handle to use for performing the mknod. + * @param path the path to the special file. + * @param mode the permissions to use and the type of special file. The type can be + * one of S_IFREG, S_IFCHR, S_IFBLK, S_IFIFO. + * @param rdev If the file type is S_IFCHR or S_IFBLK then this parameter specifies the + * major and minor numbers of the newly created device special file. Otherwise, + * it is ignored. + * @returns 0 on success or negative error code on failure. + */ +int ceph_mknod(struct ceph_mount_info *cmount, const char *path, mode_t mode, dev_t rdev); +/** + * Create and/or open a file. + * + * @param cmount the ceph mount handle to use for performing the open. + * @param path the path of the file to open. If the flags parameter includes O_CREAT, + * the file will first be created before opening. + * @param flags a set of option masks that control how the file is created/opened. + * @param mode the permissions to place on the file if the file does not exist and O_CREAT + * is specified in the flags. + * @returns a non-negative file descriptor number on success or a negative error code on failure. + */ +int ceph_open(struct ceph_mount_info *cmount, const char *path, int flags, mode_t mode); + +/** + * Create and/or open a file relative to a directory + * + * @param cmount the ceph mount handle to use for performing the open. + * @param dirfd open file descriptor (or CEPHFS_AT_FDCWD) + * @param relpath the path of the file to open. If the flags parameter includes O_CREAT, + * the file will first be created before opening. + * @param flags a set of option masks that control how the file is created/opened. + * @param mode the permissions to place on the file if the file does not exist and O_CREAT + * is specified in the flags. + * @returns a non-negative file descriptor number on success or a negative error code on failure. + */ +int ceph_openat(struct ceph_mount_info *cmount, int dirfd, const char *relpath, int flags, mode_t mode); + +/** + * Create and/or open a file with a specific file layout. + * + * @param cmount the ceph mount handle to use for performing the open. + * @param path the path of the file to open. If the flags parameter includes O_CREAT, + * the file will first be created before opening. + * @param flags a set of option masks that control how the file is created/opened. + * @param mode the permissions to place on the file if the file does not exist and O_CREAT + * is specified in the flags. + * @param stripe_unit the stripe unit size (option, 0 for default) + * @param stripe_count the stripe count (optional, 0 for default) + * @param object_size the object size (optional, 0 for default) + * @param data_pool name of target data pool name (optional, NULL or empty string for default) + * @returns a non-negative file descriptor number on success or a negative error code on failure. + */ +int ceph_open_layout(struct ceph_mount_info *cmount, const char *path, int flags, + mode_t mode, int stripe_unit, int stripe_count, int object_size, + const char *data_pool); + +/** + * Close the open file. + * + * @param cmount the ceph mount handle to use for performing the close. + * @param fd the file descriptor referring to the open file. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_close(struct ceph_mount_info *cmount, int fd); + +/** + * Reposition the open file stream based on the given offset. + * + * @param cmount the ceph mount handle to use for performing the lseek. + * @param fd the open file descriptor referring to the open file and holding the + * current position of the stream. + * @param offset the offset to set the stream to + * @param whence the flag to indicate what type of seeking to perform: + * SEEK_SET: the offset is set to the given offset in the file. + * SEEK_CUR: the offset is set to the current location plus @e offset bytes. + * SEEK_END: the offset is set to the end of the file plus @e offset bytes. + * @returns 0 on success or a negative error code on failure. + */ +int64_t ceph_lseek(struct ceph_mount_info *cmount, int fd, int64_t offset, int whence); +/** + * Read data from the file. + * + * @param cmount the ceph mount handle to use for performing the read. + * @param fd the file descriptor of the open file to read from. + * @param buf the buffer to read data into + * @param size the initial size of the buffer + * @param offset the offset in the file to read from. If this value is negative, the + * function reads from the current offset of the file descriptor. + * @returns the number of bytes read into buf, or a negative error code on failure. + */ +int ceph_read(struct ceph_mount_info *cmount, int fd, char *buf, int64_t size, int64_t offset); + +/** + * Read data from the file. + * @param cmount the ceph mount handle to use for performing the read. + * @param fd the file descriptor of the open file to read from. + * @param iov the iov structure to read data into + * @param iovcnt the number of items that iov includes + * @param offset the offset in the file to read from. If this value is negative, the + * function reads from the current offset of the file descriptor. + * @returns the number of bytes read into buf, or a negative error code on failure. + */ +int ceph_preadv(struct ceph_mount_info *cmount, int fd, const struct iovec *iov, int iovcnt, + int64_t offset); + +/** + * Write data to a file. + * + * @param cmount the ceph mount handle to use for performing the write. + * @param fd the file descriptor of the open file to write to + * @param buf the bytes to write to the file + * @param size the size of the buf array + * @param offset the offset of the file write into. If this value is negative, the + * function writes to the current offset of the file descriptor. + * @returns the number of bytes written, or a negative error code + */ +int ceph_write(struct ceph_mount_info *cmount, int fd, const char *buf, int64_t size, + int64_t offset); + +/** + * Write data to a file. + * + * @param cmount the ceph mount handle to use for performing the write. + * @param fd the file descriptor of the open file to write to + * @param iov the iov structure to read data into + * @param iovcnt the number of items that iov includes + * @param offset the offset of the file write into. If this value is negative, the + * function writes to the current offset of the file descriptor. + * @returns the number of bytes written, or a negative error code + */ +int ceph_pwritev(struct ceph_mount_info *cmount, int fd, const struct iovec *iov, int iovcnt, + int64_t offset); + +/** + * Truncate a file to the given size. + * + * @param cmount the ceph mount handle to use for performing the ftruncate. + * @param fd the file descriptor of the file to truncate + * @param size the new size of the file + * @returns 0 on success or a negative error code on failure. + */ +int ceph_ftruncate(struct ceph_mount_info *cmount, int fd, int64_t size); + +/** + * Synchronize an open file to persistent media. + * + * @param cmount the ceph mount handle to use for performing the fsync. + * @param fd the file descriptor of the file to sync. + * @param syncdataonly a boolean whether to synchronize metadata and data (0) + * or just data (1). + * @return 0 on success or a negative error code on failure. + */ +int ceph_fsync(struct ceph_mount_info *cmount, int fd, int syncdataonly); + +/** + * Preallocate or release disk space for the file for the byte range. + * + * @param cmount the ceph mount handle to use for performing the fallocate. + * @param fd the file descriptor of the file to fallocate. + * @param mode the flags determines the operation to be performed on the given range. + * default operation (0) allocate and initialize to zero the file in the byte range, + * and the file size will be changed if offset + length is greater than + * the file size. if the FALLOC_FL_KEEP_SIZE flag is specified in the mode, + * the file size will not be changed. if the FALLOC_FL_PUNCH_HOLE flag is + * specified in the mode, the operation is deallocate space and zero the byte range. + * @param offset the byte range starting. + * @param length the length of the range. + * @return 0 on success or a negative error code on failure. + */ +int ceph_fallocate(struct ceph_mount_info *cmount, int fd, int mode, + int64_t offset, int64_t length); + +/** + * Enable/disable lazyio for the file. + * + * @param cmount the ceph mount handle to use for performing the fsync. + * @param fd the file descriptor of the file to sync. + * @param enable a boolean to enable lazyio or disable lazyio. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_lazyio(struct ceph_mount_info *cmount, int fd, int enable); + + +/** + * Flushes the write buffer for the file thereby propogating the buffered write to the file. + * + * @param cmount the ceph mount handle to use for performing the fsync. + * @param fd the file descriptor of the file to sync. + * @param offset a boolean to enable lazyio or disable lazyio. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_lazyio_propagate(struct ceph_mount_info *cmount, int fd, int64_t offset, size_t count); + + +/** + * Flushes the write buffer for the file and invalidate the read cache. This allows a subsequent read operation to read and cache data directly from the file and hence everyone's propagated writes would be visible. + * + * @param cmount the ceph mount handle to use for performing the fsync. + * @param fd the file descriptor of the file to sync. + * @param offset a boolean to enable lazyio or disable lazyio. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_lazyio_synchronize(struct ceph_mount_info *cmount, int fd, int64_t offset, size_t count); + +/** @} file */ + +/** + * @defgroup libcephfs_h_xattr Extended Attribute manipulation and handling. + * Functions for creating and manipulating extended attributes on files. + * + * @{ + */ + +/** + * Get an extended attribute. + * + * @param cmount the ceph mount handle to use for performing the getxattr. + * @param path the path to the file + * @param name the name of the extended attribute to get + * @param value a pre-allocated buffer to hold the xattr's value + * @param size the size of the pre-allocated buffer + * @returns the size of the value or a negative error code on failure. + */ +int ceph_getxattr(struct ceph_mount_info *cmount, const char *path, const char *name, + void *value, size_t size); + +/** + * Get an extended attribute. + * + * @param cmount the ceph mount handle to use for performing the getxattr. + * @param fd the open file descriptor referring to the file to get extended attribute from. + * @param name the name of the extended attribute to get + * @param value a pre-allocated buffer to hold the xattr's value + * @param size the size of the pre-allocated buffer + * @returns the size of the value or a negative error code on failure. + */ +int ceph_fgetxattr(struct ceph_mount_info *cmount, int fd, const char *name, + void *value, size_t size); + +/** + * Get an extended attribute without following symbolic links. This function is + * identical to ceph_getxattr, but if the path refers to a symbolic link, + * we get the extended attributes of the symlink rather than the attributes + * of the link itself. + * + * @param cmount the ceph mount handle to use for performing the lgetxattr. + * @param path the path to the file + * @param name the name of the extended attribute to get + * @param value a pre-allocated buffer to hold the xattr's value + * @param size the size of the pre-allocated buffer + * @returns the size of the value or a negative error code on failure. + */ +int ceph_lgetxattr(struct ceph_mount_info *cmount, const char *path, const char *name, + void *value, size_t size); + +/** + * List the extended attribute keys on a file. + * + * @param cmount the ceph mount handle to use for performing the listxattr. + * @param path the path to the file. + * @param list a buffer to be filled in with the list of extended attributes keys. + * @param size the size of the list buffer. + * @returns the size of the resulting list filled in. + */ +int ceph_listxattr(struct ceph_mount_info *cmount, const char *path, char *list, size_t size); + +/** + * List the extended attribute keys on a file. + * + * @param cmount the ceph mount handle to use for performing the listxattr. + * @param fd the open file descriptor referring to the file to list extended attributes on. + * @param list a buffer to be filled in with the list of extended attributes keys. + * @param size the size of the list buffer. + * @returns the size of the resulting list filled in. + */ +int ceph_flistxattr(struct ceph_mount_info *cmount, int fd, char *list, size_t size); + +/** + * Get the list of extended attribute keys on a file, but do not follow symbolic links. + * + * @param cmount the ceph mount handle to use for performing the llistxattr. + * @param path the path to the file. + * @param list a buffer to be filled in with the list of extended attributes keys. + * @param size the size of the list buffer. + * @returns the size of the resulting list filled in. + */ +int ceph_llistxattr(struct ceph_mount_info *cmount, const char *path, char *list, size_t size); + +/** + * Remove an extended attribute from a file. + * + * @param cmount the ceph mount handle to use for performing the removexattr. + * @param path the path to the file. + * @param name the name of the extended attribute to remove. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_removexattr(struct ceph_mount_info *cmount, const char *path, const char *name); + +/** + * Remove an extended attribute from a file. + * + * @param cmount the ceph mount handle to use for performing the removexattr. + * @param fd the open file descriptor referring to the file to remove extended attribute from. + * @param name the name of the extended attribute to remove. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_fremovexattr(struct ceph_mount_info *cmount, int fd, const char *name); + +/** + * Remove the extended attribute from a file, do not follow symbolic links. + * + * @param cmount the ceph mount handle to use for performing the lremovexattr. + * @param path the path to the file. + * @param name the name of the extended attribute to remove. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_lremovexattr(struct ceph_mount_info *cmount, const char *path, const char *name); + +/** + * Set an extended attribute on a file. + * + * @param cmount the ceph mount handle to use for performing the setxattr. + * @param path the path to the file. + * @param name the name of the extended attribute to set. + * @param value the bytes of the extended attribute value + * @param size the size of the extended attribute value + * @param flags the flags can be: + * CEPH_XATTR_CREATE: create the extended attribute. Must not exist. + * CEPH_XATTR_REPLACE: replace the extended attribute, Must already exist. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_setxattr(struct ceph_mount_info *cmount, const char *path, const char *name, + const void *value, size_t size, int flags); + +/** + * Set an extended attribute on a file. + * + * @param cmount the ceph mount handle to use for performing the setxattr. + * @param fd the open file descriptor referring to the file to set extended attribute on. + * @param name the name of the extended attribute to set. + * @param value the bytes of the extended attribute value + * @param size the size of the extended attribute value + * @param flags the flags can be: + * CEPH_XATTR_CREATE: create the extended attribute. Must not exist. + * CEPH_XATTR_REPLACE: replace the extended attribute, Must already exist. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_fsetxattr(struct ceph_mount_info *cmount, int fd, const char *name, + const void *value, size_t size, int flags); + +/** + * Set an extended attribute on a file, do not follow symbolic links. + * + * @param cmount the ceph mount handle to use for performing the lsetxattr. + * @param path the path to the file. + * @param name the name of the extended attribute to set. + * @param value the bytes of the extended attribute value + * @param size the size of the extended attribute value + * @param flags the flags can be: + * CEPH_XATTR_CREATE: create the extended attribute. Must not exist. + * CEPH_XATTR_REPLACE: replace the extended attribute, Must already exist. + * @returns 0 on success or a negative error code on failure. + */ +int ceph_lsetxattr(struct ceph_mount_info *cmount, const char *path, const char *name, + const void *value, size_t size, int flags); + +/** @} xattr */ + +/** + * @defgroup libcephfs_h_filelayout Control File Layout. + * Functions for setting and getting the file layout of existing files. + * + * @{ + */ + +/** + * Get the file striping unit from an open file descriptor. + * + * @param cmount the ceph mount handle to use. + * @param fh the open file descriptor referring to the file to get the striping unit of. + * @returns the striping unit of the file or a negative error code on failure. + */ +int ceph_get_file_stripe_unit(struct ceph_mount_info *cmount, int fh); + +/** + * Get the file striping unit. + * + * @param cmount the ceph mount handle to use. + * @param path the path of the file/directory get the striping unit of. + * @returns the striping unit of the file or a negative error code on failure. + */ +int ceph_get_path_stripe_unit(struct ceph_mount_info *cmount, const char *path); + +/** + * Get the file striping count from an open file descriptor. + * + * @param cmount the ceph mount handle to use. + * @param fh the open file descriptor referring to the file to get the striping count of. + * @returns the striping count of the file or a negative error code on failure. + */ +int ceph_get_file_stripe_count(struct ceph_mount_info *cmount, int fh); + +/** + * Get the file striping count. + * + * @param cmount the ceph mount handle to use. + * @param path the path of the file/directory get the striping count of. + * @returns the striping count of the file or a negative error code on failure. + */ +int ceph_get_path_stripe_count(struct ceph_mount_info *cmount, const char *path); + +/** + * Get the file object size from an open file descriptor. + * + * @param cmount the ceph mount handle to use. + * @param fh the open file descriptor referring to the file to get the object size of. + * @returns the object size of the file or a negative error code on failure. + */ +int ceph_get_file_object_size(struct ceph_mount_info *cmount, int fh); + +/** + * Get the file object size. + * + * @param cmount the ceph mount handle to use. + * @param path the path of the file/directory get the object size of. + * @returns the object size of the file or a negative error code on failure. + */ +int ceph_get_path_object_size(struct ceph_mount_info *cmount, const char *path); + +/** + * Get the file pool information from an open file descriptor. + * + * @param cmount the ceph mount handle to use. + * @param fh the open file descriptor referring to the file to get the pool information of. + * @returns the ceph pool id that the file is in + */ +int ceph_get_file_pool(struct ceph_mount_info *cmount, int fh); + +/** + * Get the file pool information. + * + * @param cmount the ceph mount handle to use. + * @param path the path of the file/directory get the pool information of. + * @returns the ceph pool id that the file is in + */ +int ceph_get_path_pool(struct ceph_mount_info *cmount, const char *path); + +/** + * Get the name of the pool a opened file is stored in, + * + * Write the name of the file's pool to the buffer. If buflen is 0, return + * a suggested length for the buffer. + * + * @param cmount the ceph mount handle to use. + * @param fh the open file descriptor referring to the file + * @param buf buffer to store the name in + * @param buflen size of the buffer + * @returns length in bytes of the pool name, or -ERANGE if the buffer is not large enough. + */ +int ceph_get_file_pool_name(struct ceph_mount_info *cmount, int fh, char *buf, size_t buflen); + +/** + * get the name of a pool by id + * + * Given a pool's numeric identifier, get the pool's alphanumeric name. + * + * @param cmount the ceph mount handle to use + * @param pool the numeric pool id + * @param buf buffer to sore the name in + * @param buflen size of the buffer + * @returns length in bytes of the pool name, or -ERANGE if the buffer is not large enough + */ +int ceph_get_pool_name(struct ceph_mount_info *cmount, int pool, char *buf, size_t buflen); + +/** + * Get the name of the pool a file is stored in + * + * Write the name of the file's pool to the buffer. If buflen is 0, return + * a suggested length for the buffer. + * + * @param cmount the ceph mount handle to use. + * @param path the path of the file/directory + * @param buf buffer to store the name in + * @param buflen size of the buffer + * @returns length in bytes of the pool name, or -ERANGE if the buffer is not large enough. + */ +int ceph_get_path_pool_name(struct ceph_mount_info *cmount, const char *path, char *buf, size_t buflen); + +/** + * Get the default pool name of cephfs + * Write the name of the default pool to the buffer. If buflen is 0, return + * a suggested length for the buffer. + * @param cmount the ceph mount handle to use. + * @param buf buffer to store the name in + * @param buflen size of the buffer + * @returns length in bytes of the pool name, or -ERANGE if the buffer is not large enough. + */ +int ceph_get_default_data_pool_name(struct ceph_mount_info *cmount, char *buf, size_t buflen); + +/** + * Get the file layout from an open file descriptor. + * + * @param cmount the ceph mount handle to use. + * @param fh the open file descriptor referring to the file to get the layout of. + * @param stripe_unit where to store the striping unit of the file + * @param stripe_count where to store the striping count of the file + * @param object_size where to store the object size of the file + * @param pg_pool where to store the ceph pool id that the file is in + * @returns 0 on success or a negative error code on failure. + */ +int ceph_get_file_layout(struct ceph_mount_info *cmount, int fh, int *stripe_unit, int *stripe_count, int *object_size, int *pg_pool); + +/** + * Get the file layout. + * + * @param cmount the ceph mount handle to use. + * @param path the path of the file/directory get the layout of. + * @param stripe_unit where to store the striping unit of the file + * @param stripe_count where to store the striping count of the file + * @param object_size where to store the object size of the file + * @param pg_pool where to store the ceph pool id that the file is in + * @returns 0 on success or a negative error code on failure. + */ +int ceph_get_path_layout(struct ceph_mount_info *cmount, const char *path, int *stripe_unit, int *stripe_count, int *object_size, int *pg_pool); + +/** + * Get the file replication information from an open file descriptor. + * + * @param cmount the ceph mount handle to use. + * @param fh the open file descriptor referring to the file to get the replication information of. + * @returns the replication factor of the file. + */ +int ceph_get_file_replication(struct ceph_mount_info *cmount, int fh); + +/** + * Get the file replication information. + * + * @param cmount the ceph mount handle to use. + * @param path the path of the file/directory get the replication information of. + * @returns the replication factor of the file. + */ +int ceph_get_path_replication(struct ceph_mount_info *cmount, const char *path); + +/** + * Get the id of the named pool. + * + * @param cmount the ceph mount handle to use. + * @param pool_name the name of the pool. + * @returns the pool id, or a negative error code on failure. + */ +int ceph_get_pool_id(struct ceph_mount_info *cmount, const char *pool_name); + +/** + * Get the pool replication factor. + * + * @param cmount the ceph mount handle to use. + * @param pool_id the pool id to look up + * @returns the replication factor, or a negative error code on failure. + */ +int ceph_get_pool_replication(struct ceph_mount_info *cmount, int pool_id); + +/** + * Get the OSD address where the primary copy of a file stripe is located. + * + * @param cmount the ceph mount handle to use. + * @param fd the open file descriptor referring to the file to get the striping unit of. + * @param offset the offset into the file to specify the stripe. The offset can be + * anywhere within the stripe unit. + * @param addr the address of the OSD holding that stripe + * @param naddr the capacity of the address passed in. + * @returns the size of the addressed filled into the @e addr parameter, or a negative + * error code on failure. + */ +int ceph_get_file_stripe_address(struct ceph_mount_info *cmount, int fd, int64_t offset, + struct sockaddr_storage *addr, int naddr); + +/** + * Get the list of OSDs where the objects containing a file offset are located. + * + * @param cmount the ceph mount handle to use. + * @param fd the open file descriptor referring to the file. + * @param offset the offset within the file. + * @param length return the number of bytes between the offset and the end of + * the stripe unit (optional). + * @param osds an integer array to hold the OSD ids. + * @param nosds the size of the integer array. + * @returns the number of items stored in the output array, or -ERANGE if the + * array is not large enough. + */ +int ceph_get_file_extent_osds(struct ceph_mount_info *cmount, int fd, + int64_t offset, int64_t *length, int *osds, int nosds); + +/** + * Get the fully qualified CRUSH location of an OSD. + * + * Returns (type, name) string pairs for each device in the CRUSH bucket + * hierarchy starting from the given osd to the root. Each pair element is + * separated by a NULL character. + * + * @param cmount the ceph mount handle to use. + * @param osd the OSD id. + * @param path buffer to store location. + * @param len size of buffer. + * @returns the amount of bytes written into the buffer, or -ERANGE if the + * array is not large enough. + */ +int ceph_get_osd_crush_location(struct ceph_mount_info *cmount, + int osd, char *path, size_t len); + +/** + * Get the network address of an OSD. + * + * @param cmount the ceph mount handle. + * @param osd the OSD id. + * @param addr the OSD network address. + * @returns zero on success, other returns a negative error code. + */ +int ceph_get_osd_addr(struct ceph_mount_info *cmount, int osd, + struct sockaddr_storage *addr); + +/** + * Get the file layout stripe unit granularity. + * @param cmount the ceph mount handle. + * @returns the stripe unit granularity or a negative error code on failure. + */ +int ceph_get_stripe_unit_granularity(struct ceph_mount_info *cmount); + +/** @} filelayout */ + +/** + * No longer available. Do not use. + * These functions will return -EOPNOTSUPP. + */ +int ceph_set_default_file_stripe_unit(struct ceph_mount_info *cmount, int stripe); +int ceph_set_default_file_stripe_count(struct ceph_mount_info *cmount, int count); +int ceph_set_default_object_size(struct ceph_mount_info *cmount, int size); +int ceph_set_default_preferred_pg(struct ceph_mount_info *cmount, int osd); +int ceph_set_default_file_replication(struct ceph_mount_info *cmount, int replication); + +/** + * Read from local replicas when possible. + * + * @param cmount the ceph mount handle to use. + * @param val a boolean to set (1) or clear (0) the option to favor local objects + * for reads. + * @returns 0 + */ +int ceph_localize_reads(struct ceph_mount_info *cmount, int val); + +/** + * Get the osd id of the local osd (if any) + * + * @param cmount the ceph mount handle to use. + * @returns the osd (if any) local to the node where this call is made, otherwise + * -1 is returned. + */ +int ceph_get_local_osd(struct ceph_mount_info *cmount); + +/** @} default_filelayout */ + +/** + * Get the capabilities currently issued to the client. + * + * @param cmount the ceph mount handle to use. + * @param fd the file descriptor to get issued + * @returns the current capabilities issued to this client + * for the open file + */ +int ceph_debug_get_fd_caps(struct ceph_mount_info *cmount, int fd); + +/** + * Get the capabilities currently issued to the client. + * + * @param cmount the ceph mount handle to use. + * @param path the path to the file + * @returns the current capabilities issued to this client + * for the file + */ +int ceph_debug_get_file_caps(struct ceph_mount_info *cmount, const char *path); + +/* Low Level */ +struct Inode *ceph_ll_get_inode(struct ceph_mount_info *cmount, + vinodeno_t vino); + +int ceph_ll_lookup_vino(struct ceph_mount_info *cmount, vinodeno_t vino, + Inode **inode); + +int ceph_ll_lookup_inode( + struct ceph_mount_info *cmount, + struct inodeno_t ino, + Inode **inode); + +/** + * Get the root inode of FS. Increase counter of references for root Inode. You must call ceph_ll_forget for it! + * + * @param cmount the ceph mount handle to use. + * @param parent pointer to pointer to Inode struct. Pointer to root inode will be returned + * @returns 0 if all good + */ +int ceph_ll_lookup_root(struct ceph_mount_info *cmount, + Inode **parent); +int ceph_ll_lookup(struct ceph_mount_info *cmount, Inode *parent, + const char *name, Inode **out, struct ceph_statx *stx, + unsigned want, unsigned flags, const UserPerm *perms); +int ceph_ll_put(struct ceph_mount_info *cmount, struct Inode *in); +int ceph_ll_forget(struct ceph_mount_info *cmount, struct Inode *in, + int count); +int ceph_ll_walk(struct ceph_mount_info *cmount, const char* name, Inode **i, + struct ceph_statx *stx, unsigned int want, unsigned int flags, + const UserPerm *perms); +int ceph_ll_getattr(struct ceph_mount_info *cmount, struct Inode *in, + struct ceph_statx *stx, unsigned int want, unsigned int flags, + const UserPerm *perms); +int ceph_ll_setattr(struct ceph_mount_info *cmount, struct Inode *in, + struct ceph_statx *stx, int mask, const UserPerm *perms); +int ceph_ll_open(struct ceph_mount_info *cmount, struct Inode *in, int flags, + struct Fh **fh, const UserPerm *perms); +off_t ceph_ll_lseek(struct ceph_mount_info *cmount, struct Fh* filehandle, + off_t offset, int whence); +int ceph_ll_read(struct ceph_mount_info *cmount, struct Fh* filehandle, + int64_t off, uint64_t len, char* buf); +int ceph_ll_fsync(struct ceph_mount_info *cmount, struct Fh *fh, + int syncdataonly); +int ceph_ll_sync_inode(struct ceph_mount_info *cmount, struct Inode *in, + int syncdataonly); +int ceph_ll_fallocate(struct ceph_mount_info *cmount, struct Fh *fh, + int mode, int64_t offset, int64_t length); +int ceph_ll_write(struct ceph_mount_info *cmount, struct Fh* filehandle, + int64_t off, uint64_t len, const char *data); +int64_t ceph_ll_readv(struct ceph_mount_info *cmount, struct Fh *fh, + const struct iovec *iov, int iovcnt, int64_t off); +int64_t ceph_ll_writev(struct ceph_mount_info *cmount, struct Fh *fh, + const struct iovec *iov, int iovcnt, int64_t off); +int ceph_ll_close(struct ceph_mount_info *cmount, struct Fh* filehandle); +int ceph_ll_iclose(struct ceph_mount_info *cmount, struct Inode *in, int mode); +/** + * Get xattr value by xattr name. + * + * @param cmount the ceph mount handle to use. + * @param in file handle + * @param name name of attribute + * @param value pointer to begin buffer + * @param size buffer size + * @param perms pointer to UserPerms object + * @returns size of returned buffer. Negative number in error case + */ +int ceph_ll_getxattr(struct ceph_mount_info *cmount, struct Inode *in, + const char *name, void *value, size_t size, + const UserPerm *perms); +int ceph_ll_setxattr(struct ceph_mount_info *cmount, struct Inode *in, + const char *name, const void *value, size_t size, + int flags, const UserPerm *perms); +int ceph_ll_listxattr(struct ceph_mount_info *cmount, struct Inode *in, + char *list, size_t buf_size, size_t *list_size, + const UserPerm *perms); +int ceph_ll_removexattr(struct ceph_mount_info *cmount, struct Inode *in, + const char *name, const UserPerm *perms); +int ceph_ll_create(struct ceph_mount_info *cmount, Inode *parent, + const char *name, mode_t mode, int oflags, Inode **outp, + Fh **fhp, struct ceph_statx *stx, unsigned want, + unsigned lflags, const UserPerm *perms); +int ceph_ll_mknod(struct ceph_mount_info *cmount, Inode *parent, + const char *name, mode_t mode, dev_t rdev, Inode **out, + struct ceph_statx *stx, unsigned want, unsigned flags, + const UserPerm *perms); +int ceph_ll_mkdir(struct ceph_mount_info *cmount, Inode *parent, + const char *name, mode_t mode, Inode **out, + struct ceph_statx *stx, unsigned want, + unsigned flags, const UserPerm *perms); +int ceph_ll_link(struct ceph_mount_info *cmount, struct Inode *in, + struct Inode *newparent, const char *name, + const UserPerm *perms); +int ceph_ll_opendir(struct ceph_mount_info *cmount, struct Inode *in, + struct ceph_dir_result **dirpp, const UserPerm *perms); +int ceph_ll_releasedir(struct ceph_mount_info *cmount, + struct ceph_dir_result* dir); +int ceph_ll_rename(struct ceph_mount_info *cmount, struct Inode *parent, + const char *name, struct Inode *newparent, + const char *newname, const UserPerm *perms); +int ceph_ll_unlink(struct ceph_mount_info *cmount, struct Inode *in, + const char *name, const UserPerm *perms); +int ceph_ll_statfs(struct ceph_mount_info *cmount, struct Inode *in, + struct statvfs *stbuf); +int ceph_ll_readlink(struct ceph_mount_info *cmount, struct Inode *in, + char *buf, size_t bufsize, const UserPerm *perms); +int ceph_ll_symlink(struct ceph_mount_info *cmount, + Inode *in, const char *name, const char *value, + Inode **out, struct ceph_statx *stx, + unsigned want, unsigned flags, + const UserPerm *perms); +int ceph_ll_rmdir(struct ceph_mount_info *cmount, struct Inode *in, + const char *name, const UserPerm *perms); +uint32_t ceph_ll_stripe_unit(struct ceph_mount_info *cmount, + struct Inode *in); +uint32_t ceph_ll_file_layout(struct ceph_mount_info *cmount, + struct Inode *in, + struct ceph_file_layout *layout); +uint64_t ceph_ll_snap_seq(struct ceph_mount_info *cmount, + struct Inode *in); +int ceph_ll_get_stripe_osd(struct ceph_mount_info *cmount, + struct Inode *in, + uint64_t blockno, + struct ceph_file_layout* layout); +int ceph_ll_num_osds(struct ceph_mount_info *cmount); +int ceph_ll_osdaddr(struct ceph_mount_info *cmount, + int osd, uint32_t *addr); +uint64_t ceph_ll_get_internal_offset(struct ceph_mount_info *cmount, + struct Inode *in, uint64_t blockno); +int ceph_ll_read_block(struct ceph_mount_info *cmount, + struct Inode *in, uint64_t blockid, + char* bl, uint64_t offset, uint64_t length, + struct ceph_file_layout* layout); +int ceph_ll_write_block(struct ceph_mount_info *cmount, + struct Inode *in, uint64_t blockid, + char* buf, uint64_t offset, + uint64_t length, struct ceph_file_layout* layout, + uint64_t snapseq, uint32_t sync); +int ceph_ll_commit_blocks(struct ceph_mount_info *cmount, + struct Inode *in, uint64_t offset, uint64_t range); + + +int ceph_ll_getlk(struct ceph_mount_info *cmount, + Fh *fh, struct flock *fl, uint64_t owner); +int ceph_ll_setlk(struct ceph_mount_info *cmount, + Fh *fh, struct flock *fl, uint64_t owner, int sleep); + +int ceph_ll_lazyio(struct ceph_mount_info *cmount, Fh *fh, int enable); + +/* + * Delegation support + * + * Delegations are way for an application to request exclusive or + * semi-exclusive access to an Inode. The client requests the delegation and + * if it's successful it can reliably cache file data and metadata until the + * delegation is recalled. + * + * Recalls are issued via a callback function, provided by the application. + * Callback functions should act something like signal handlers. You want to + * do as little as possible in the callback. Any major work should be deferred + * in some fashion as it's difficult to predict the context in which this + * function will be called. + * + * Once the delegation has been recalled, the application should return it as + * soon as possible. The application has client_deleg_timeout seconds to + * return it, after which the cmount structure is forcibly unmounted and + * further calls into it fail. + * + * The application can set the client_deleg_timeout config option to suit its + * needs, but it should take care to choose a value that allows it to avoid + * forcible eviction from the cluster in the event of an application bug. + */ + +/* Commands for manipulating delegation state */ +#ifndef CEPH_DELEGATION_NONE +# define CEPH_DELEGATION_NONE 0 +# define CEPH_DELEGATION_RD 1 +# define CEPH_DELEGATION_WR 2 +#endif + +/** + * Get the amount of time that the client has to return caps + * @param cmount the ceph mount handle to use. + * + * In the event that a client does not return its caps, the MDS may blocklist + * it after this timeout. Applications should check this value and ensure + * that they set the delegation timeout to a value lower than this. + * + * This call returns the cap return timeout (in seconds) for this cmount, or + * zero if it's not mounted. + */ +uint32_t ceph_get_cap_return_timeout(struct ceph_mount_info *cmount); + +/** + * Set the delegation timeout for the mount (thereby enabling delegations) + * @param cmount the ceph mount handle to use. + * @param timeout the delegation timeout (in seconds) + * + * Since the client could end up blocklisted if it doesn't return delegations + * in time, we mandate that any application wanting to use delegations + * explicitly set the timeout beforehand. Until this call is done on the + * mount, attempts to set a delegation will return -ETIME. + * + * Once a delegation is recalled, if it is not returned in this amount of + * time, the cmount will be forcibly unmounted and further access attempts + * will fail (usually with -ENOTCONN errors). + * + * This value is further vetted against the cap return timeout, and this call + * can fail with -EINVAL if the timeout value is too long. Delegations can be + * disabled again by setting the timeout to 0. + */ +int ceph_set_deleg_timeout(struct ceph_mount_info *cmount, uint32_t timeout); + +/** + * Request a delegation on an open Fh + * @param cmount the ceph mount handle to use. + * @param fh file handle + * @param cmd CEPH_DELEGATION_* command + * @param cb callback function for recalling delegation + * @param priv opaque token passed back during recalls + * + * Returns 0 if the delegation was granted, -EAGAIN if there was a conflict + * and other error codes if there is a fatal error of some sort (e.g. -ENOMEM, + * -ETIME) + */ +int ceph_ll_delegation(struct ceph_mount_info *cmount, Fh *fh, + unsigned int cmd, ceph_deleg_cb_t cb, void *priv); + +mode_t ceph_umask(struct ceph_mount_info *cmount, mode_t mode); + +/* state reclaim */ +#define CEPH_RECLAIM_RESET 1 + +/** + * Set ceph client uuid + * @param cmount the ceph mount handle to use. + * @param uuid the uuid to set + * + * Must be called before mount. + */ +void ceph_set_uuid(struct ceph_mount_info *cmount, const char *uuid); + +/** + * Set ceph client session timeout + * @param cmount the ceph mount handle to use. + * @param timeout the timeout to set + * + * Must be called before mount. + */ +void ceph_set_session_timeout(struct ceph_mount_info *cmount, unsigned timeout); + +/** + * Start to reclaim states of other client + * @param cmount the ceph mount handle to use. + * @param uuid uuid of client whose states need to be reclaimed + * @param flags flags that control how states get reclaimed + * + * Returns 0 success, -EOPNOTSUPP if mds does not support the operation, + * -ENOENT if CEPH_RECLAIM_RESET is specified and there is no client + * with the given uuid, -ENOTRECOVERABLE in all other error cases. + */ +int ceph_start_reclaim(struct ceph_mount_info *cmount, + const char *uuid, unsigned flags); + +/** + * finish reclaiming states of other client ( + * @param cmount the ceph mount handle to use. + */ +void ceph_finish_reclaim(struct ceph_mount_info *cmount); + +/** + * Register a set of callbacks to be used with this cmount + * + * This is deprecated, use ceph_ll_register_callbacks2() instead. + * + * @param cmount the ceph mount handle on which the cb's should be registerd + * @param args callback arguments to register with the cmount + * + * Any fields set to NULL will be ignored. There currently is no way to + * unregister these callbacks, so this is a one-way change. + */ +void ceph_ll_register_callbacks(struct ceph_mount_info *cmount, + struct ceph_client_callback_args *args); + +/** + * Register a set of callbacks to be used with this cmount + * @param cmount the ceph mount handle on which the cb's should be registerd + * @param args callback arguments to register with the cmount + * + * Any fields set to NULL will be ignored. There currently is no way to + * unregister these callbacks, so this is a one-way change. + * + * Returns 0 on success or -EBUSY if the cmount is mounting or already mounted. + */ +int ceph_ll_register_callbacks2(struct ceph_mount_info *cmount, + struct ceph_client_callback_args *args); + +/** + * Get snapshot info + * + * @param cmount the ceph mount handle to use for making the directory. + * @param path the path of the snapshot. This must be either an + * absolute path or a relative path off of the current working directory. + * @returns 0 on success or a negative return code on error. + */ +int ceph_get_snap_info(struct ceph_mount_info *cmount, + const char *path, struct snap_info *snap_info); + +/** + * Free snapshot info buffers + * + * @param snap_info snapshot info struct (fetched via call to ceph_get_snap_info()). + */ +void ceph_free_snap_info_buffer(struct snap_info *snap_info); +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/include/cephfs/metrics/Types.h b/src/include/cephfs/metrics/Types.h new file mode 100644 index 000000000..d7cf56138 --- /dev/null +++ b/src/include/cephfs/metrics/Types.h @@ -0,0 +1,699 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef CEPH_INCLUDE_CEPHFS_METRICS_TYPES_H +#define CEPH_INCLUDE_CEPHFS_METRICS_TYPES_H + +#include <string> +#include <boost/variant.hpp> + +#include "common/Formatter.h" +#include "include/buffer_fwd.h" +#include "include/encoding.h" +#include "include/int_types.h" +#include "include/stringify.h" +#include "include/utime.h" + +namespace ceph { class Formatter; } + +enum ClientMetricType { + CLIENT_METRIC_TYPE_CAP_INFO, + CLIENT_METRIC_TYPE_READ_LATENCY, + CLIENT_METRIC_TYPE_WRITE_LATENCY, + CLIENT_METRIC_TYPE_METADATA_LATENCY, + CLIENT_METRIC_TYPE_DENTRY_LEASE, + CLIENT_METRIC_TYPE_OPENED_FILES, + CLIENT_METRIC_TYPE_PINNED_ICAPS, + CLIENT_METRIC_TYPE_OPENED_INODES, + CLIENT_METRIC_TYPE_READ_IO_SIZES, + CLIENT_METRIC_TYPE_WRITE_IO_SIZES, + CLIENT_METRIC_TYPE_AVG_READ_LATENCY, + CLIENT_METRIC_TYPE_STDEV_READ_LATENCY, + CLIENT_METRIC_TYPE_AVG_WRITE_LATENCY, + CLIENT_METRIC_TYPE_STDEV_WRITE_LATENCY, + CLIENT_METRIC_TYPE_AVG_METADATA_LATENCY, + CLIENT_METRIC_TYPE_STDEV_METADATA_LATENCY, +}; +inline std::ostream &operator<<(std::ostream &os, const ClientMetricType &type) { + switch(type) { + case ClientMetricType::CLIENT_METRIC_TYPE_CAP_INFO: + os << "CAP_INFO"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_READ_LATENCY: + os << "READ_LATENCY"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_WRITE_LATENCY: + os << "WRITE_LATENCY"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_METADATA_LATENCY: + os << "METADATA_LATENCY"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_DENTRY_LEASE: + os << "DENTRY_LEASE"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_OPENED_FILES: + os << "OPENED_FILES"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_PINNED_ICAPS: + os << "PINNED_ICAPS"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_OPENED_INODES: + os << "OPENED_INODES"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_READ_IO_SIZES: + os << "READ_IO_SIZES"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_WRITE_IO_SIZES: + os << "WRITE_IO_SIZES"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_AVG_READ_LATENCY: + os << "AVG_READ_LATENCY"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_STDEV_READ_LATENCY: + os << "STDEV_READ_LATENCY"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_AVG_WRITE_LATENCY: + os << "AVG_WRITE_LATENCY"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_STDEV_WRITE_LATENCY: + os << "STDEV_WRITE_LATENCY"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_AVG_METADATA_LATENCY: + os << "AVG_METADATA_LATENCY"; + break; + case ClientMetricType::CLIENT_METRIC_TYPE_STDEV_METADATA_LATENCY: + os << "STDEV_METADATA_LATENCY"; + break; + default: + os << "(UNKNOWN:" << static_cast<std::underlying_type<ClientMetricType>::type>(type) << ")"; + break; + } + + return os; +} + +struct ClientMetricPayloadBase { + ClientMetricPayloadBase(ClientMetricType type) : metric_type(type) {} + + ClientMetricType get_type() const { + return metric_type; + } + + void print_type(std::ostream *out) const { + *out << metric_type; + } + + private: + ClientMetricType metric_type; +}; + +struct CapInfoPayload : public ClientMetricPayloadBase { + uint64_t cap_hits = 0; + uint64_t cap_misses = 0; + uint64_t nr_caps = 0; + + CapInfoPayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_CAP_INFO) { } + CapInfoPayload(uint64_t cap_hits, uint64_t cap_misses, uint64_t nr_caps) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_CAP_INFO), + cap_hits(cap_hits), cap_misses(cap_misses), nr_caps(nr_caps) { + } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(1, 1, bl); + encode(cap_hits, bl); + encode(cap_misses, bl); + encode(nr_caps, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(1, iter); + decode(cap_hits, iter); + decode(cap_misses, iter); + decode(nr_caps, iter); + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("cap_hits", cap_hits); + f->dump_int("cap_misses", cap_misses); + f->dump_int("num_caps", nr_caps); + } + + void print(std::ostream *out) const { + *out << "cap_hits: " << cap_hits << " " + << "cap_misses: " << cap_misses << " " + << "num_caps: " << nr_caps; + } +}; + +struct ReadLatencyPayload : public ClientMetricPayloadBase { + utime_t lat; + utime_t mean; + uint64_t sq_sum; // sum of squares + uint64_t count; // IO count + + ReadLatencyPayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_READ_LATENCY) { } + ReadLatencyPayload(utime_t lat, utime_t mean, uint64_t sq_sum, uint64_t count) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_READ_LATENCY), + lat(lat), + mean(mean), + sq_sum(sq_sum), + count(count) { + } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(2, 1, bl); + encode(lat, bl); + encode(mean, bl); + encode(sq_sum, bl); + encode(count, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(2, iter); + decode(lat, iter); + if (struct_v >= 2) { + decode(mean, iter); + decode(sq_sum, iter); + decode(count, iter); + } + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("latency", lat); + f->dump_int("avg_latency", mean); + f->dump_unsigned("sq_sum", sq_sum); + f->dump_unsigned("count", count); + } + + void print(std::ostream *out) const { + *out << "latency: " << lat << ", avg_latency: " << mean + << ", sq_sum: " << sq_sum << ", count=" << count; + } +}; + +struct WriteLatencyPayload : public ClientMetricPayloadBase { + utime_t lat; + utime_t mean; + uint64_t sq_sum; // sum of squares + uint64_t count; // IO count + + WriteLatencyPayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_WRITE_LATENCY) { } + WriteLatencyPayload(utime_t lat, utime_t mean, uint64_t sq_sum, uint64_t count) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_WRITE_LATENCY), + lat(lat), + mean(mean), + sq_sum(sq_sum), + count(count){ + } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(2, 1, bl); + encode(lat, bl); + encode(mean, bl); + encode(sq_sum, bl); + encode(count, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(2, iter); + decode(lat, iter); + if (struct_v >= 2) { + decode(mean, iter); + decode(sq_sum, iter); + decode(count, iter); + } + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("latency", lat); + f->dump_int("avg_latency", mean); + f->dump_unsigned("sq_sum", sq_sum); + f->dump_unsigned("count", count); + } + + void print(std::ostream *out) const { + *out << "latency: " << lat << ", avg_latency: " << mean + << ", sq_sum: " << sq_sum << ", count=" << count; + } +}; + +struct MetadataLatencyPayload : public ClientMetricPayloadBase { + utime_t lat; + utime_t mean; + uint64_t sq_sum; // sum of squares + uint64_t count; // IO count + + MetadataLatencyPayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_METADATA_LATENCY) { } + MetadataLatencyPayload(utime_t lat, utime_t mean, uint64_t sq_sum, uint64_t count) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_METADATA_LATENCY), + lat(lat), + mean(mean), + sq_sum(sq_sum), + count(count) { + } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(2, 1, bl); + encode(lat, bl); + encode(mean, bl); + encode(sq_sum, bl); + encode(count, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(2, iter); + decode(lat, iter); + if (struct_v >= 2) { + decode(mean, iter); + decode(sq_sum, iter); + decode(count, iter); + } + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("latency", lat); + f->dump_int("avg_latency", mean); + f->dump_unsigned("sq_sum", sq_sum); + f->dump_unsigned("count", count); + } + + void print(std::ostream *out) const { + *out << "latency: " << lat << ", avg_latency: " << mean + << ", sq_sum: " << sq_sum << ", count=" << count; + } +}; + +struct DentryLeasePayload : public ClientMetricPayloadBase { + uint64_t dlease_hits = 0; + uint64_t dlease_misses = 0; + uint64_t nr_dentries = 0; + + DentryLeasePayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_DENTRY_LEASE) { } + DentryLeasePayload(uint64_t dlease_hits, uint64_t dlease_misses, uint64_t nr_dentries) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_DENTRY_LEASE), + dlease_hits(dlease_hits), dlease_misses(dlease_misses), nr_dentries(nr_dentries) { } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(1, 1, bl); + encode(dlease_hits, bl); + encode(dlease_misses, bl); + encode(nr_dentries, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(1, iter); + decode(dlease_hits, iter); + decode(dlease_misses, iter); + decode(nr_dentries, iter); + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("dlease_hits", dlease_hits); + f->dump_int("dlease_misses", dlease_misses); + f->dump_int("num_dentries", nr_dentries); + } + + void print(std::ostream *out) const { + *out << "dlease_hits: " << dlease_hits << " " + << "dlease_misses: " << dlease_misses << " " + << "num_dentries: " << nr_dentries; + } +}; + +struct OpenedFilesPayload : public ClientMetricPayloadBase { + uint64_t opened_files = 0; + uint64_t total_inodes = 0; + + OpenedFilesPayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_OPENED_FILES) { } + OpenedFilesPayload(uint64_t opened_files, uint64_t total_inodes) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_OPENED_FILES), + opened_files(opened_files), total_inodes(total_inodes) { } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(1, 1, bl); + encode(opened_files, bl); + encode(total_inodes, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(1, iter); + decode(opened_files, iter); + decode(total_inodes, iter); + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("opened_files", opened_files); + f->dump_int("total_inodes", total_inodes); + } + + void print(std::ostream *out) const { + *out << "opened_files: " << opened_files << " " + << "total_inodes: " << total_inodes; + } +}; + +struct PinnedIcapsPayload : public ClientMetricPayloadBase { + uint64_t pinned_icaps = 0; + uint64_t total_inodes = 0; + + PinnedIcapsPayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_PINNED_ICAPS) { } + PinnedIcapsPayload(uint64_t pinned_icaps, uint64_t total_inodes) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_PINNED_ICAPS), + pinned_icaps(pinned_icaps), total_inodes(total_inodes) { } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(1, 1, bl); + encode(pinned_icaps, bl); + encode(total_inodes, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(1, iter); + decode(pinned_icaps, iter); + decode(total_inodes, iter); + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("pinned_icaps", pinned_icaps); + f->dump_int("total_inodes", total_inodes); + } + + void print(std::ostream *out) const { + *out << "pinned_icaps: " << pinned_icaps << " " + << "total_inodes: " << total_inodes; + } +}; + +struct OpenedInodesPayload : public ClientMetricPayloadBase { + uint64_t opened_inodes = 0; + uint64_t total_inodes = 0; + + OpenedInodesPayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_OPENED_INODES) { } + OpenedInodesPayload(uint64_t opened_inodes, uint64_t total_inodes) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_OPENED_INODES), + opened_inodes(opened_inodes), total_inodes(total_inodes) { } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(1, 1, bl); + encode(opened_inodes, bl); + encode(total_inodes, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(1, iter); + decode(opened_inodes, iter); + decode(total_inodes, iter); + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("opened_inodes", opened_inodes); + f->dump_int("total_inodes", total_inodes); + } + + void print(std::ostream *out) const { + *out << "opened_inodes: " << opened_inodes << " " + << "total_inodes: " << total_inodes; + } +}; + +struct ReadIoSizesPayload : public ClientMetricPayloadBase { + uint64_t total_ops = 0; + uint64_t total_size = 0; + + ReadIoSizesPayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_READ_IO_SIZES) { } + ReadIoSizesPayload(uint64_t total_ops, uint64_t total_size) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_READ_IO_SIZES), + total_ops(total_ops), total_size(total_size) { } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(1, 1, bl); + encode(total_ops, bl); + encode(total_size, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(1, iter); + decode(total_ops, iter); + decode(total_size, iter); + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("total_ops", total_ops); + f->dump_int("total_size", total_size); + } + + void print(std::ostream *out) const { + *out << "total_ops: " << total_ops << " total_size: " << total_size; + } +}; + +struct WriteIoSizesPayload : public ClientMetricPayloadBase { + uint64_t total_ops = 0; + uint64_t total_size = 0; + + WriteIoSizesPayload() + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_WRITE_IO_SIZES) { } + WriteIoSizesPayload(uint64_t total_ops, uint64_t total_size) + : ClientMetricPayloadBase(ClientMetricType::CLIENT_METRIC_TYPE_WRITE_IO_SIZES), + total_ops(total_ops), total_size(total_size) { + } + + void encode(bufferlist &bl) const { + using ceph::encode; + ENCODE_START(1, 1, bl); + encode(total_ops, bl); + encode(total_size, bl); + ENCODE_FINISH(bl); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(1, iter); + decode(total_ops, iter); + decode(total_size, iter); + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + f->dump_int("total_ops", total_ops); + f->dump_int("total_size", total_size); + } + + void print(std::ostream *out) const { + *out << "total_ops: " << total_ops << " total_size: " << total_size; + } +}; + +struct UnknownPayload : public ClientMetricPayloadBase { + UnknownPayload() + : ClientMetricPayloadBase(static_cast<ClientMetricType>(-1)) { } + UnknownPayload(ClientMetricType metric_type) + : ClientMetricPayloadBase(metric_type) { } + + void encode(bufferlist &bl) const { + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + DECODE_START(254, iter); + iter.seek(struct_len); + DECODE_FINISH(iter); + } + + void dump(Formatter *f) const { + } + + void print(std::ostream *out) const { + } +}; + +typedef boost::variant<CapInfoPayload, + ReadLatencyPayload, + WriteLatencyPayload, + MetadataLatencyPayload, + DentryLeasePayload, + OpenedFilesPayload, + PinnedIcapsPayload, + OpenedInodesPayload, + ReadIoSizesPayload, + WriteIoSizesPayload, + UnknownPayload> ClientMetricPayload; + +// metric update message sent by clients +struct ClientMetricMessage { +public: + ClientMetricMessage(const ClientMetricPayload &payload = UnknownPayload()) + : payload(payload) { + } + + class EncodePayloadVisitor : public boost::static_visitor<void> { + public: + explicit EncodePayloadVisitor(bufferlist &bl) : m_bl(bl) { + } + + template <typename ClientMetricPayload> + inline void operator()(const ClientMetricPayload &payload) const { + using ceph::encode; + encode(static_cast<uint32_t>(payload.get_type()), m_bl); + payload.encode(m_bl); + } + + private: + bufferlist &m_bl; + }; + + class DecodePayloadVisitor : public boost::static_visitor<void> { + public: + DecodePayloadVisitor(bufferlist::const_iterator &iter) : m_iter(iter) { + } + + template <typename ClientMetricPayload> + inline void operator()(ClientMetricPayload &payload) const { + using ceph::decode; + payload.decode(m_iter); + } + + private: + bufferlist::const_iterator &m_iter; + }; + + class DumpPayloadVisitor : public boost::static_visitor<void> { + public: + explicit DumpPayloadVisitor(Formatter *formatter) : m_formatter(formatter) { + } + + template <typename ClientMetricPayload> + inline void operator()(const ClientMetricPayload &payload) const { + m_formatter->dump_string("client_metric_type", stringify(payload.get_type())); + payload.dump(m_formatter); + } + + private: + Formatter *m_formatter; + }; + + class PrintPayloadVisitor : public boost::static_visitor<void> { + public: + explicit PrintPayloadVisitor(std::ostream *out) : _out(out) { + } + + template <typename ClientMetricPayload> + inline void operator()(const ClientMetricPayload &payload) const { + *_out << "[client_metric_type: "; + payload.print_type(_out); + *_out << " "; + payload.print(_out); + *_out << "]"; + } + + private: + std::ostream *_out; + }; + + void encode(bufferlist &bl) const { + boost::apply_visitor(EncodePayloadVisitor(bl), payload); + } + + void decode(bufferlist::const_iterator &iter) { + using ceph::decode; + + uint32_t metric_type; + decode(metric_type, iter); + + switch (metric_type) { + case ClientMetricType::CLIENT_METRIC_TYPE_CAP_INFO: + payload = CapInfoPayload(); + break; + case ClientMetricType::CLIENT_METRIC_TYPE_READ_LATENCY: + payload = ReadLatencyPayload(); + break; + case ClientMetricType::CLIENT_METRIC_TYPE_WRITE_LATENCY: + payload = WriteLatencyPayload(); + break; + case ClientMetricType::CLIENT_METRIC_TYPE_METADATA_LATENCY: + payload = MetadataLatencyPayload(); + break; + case ClientMetricType::CLIENT_METRIC_TYPE_DENTRY_LEASE: + payload = DentryLeasePayload(); + break; + case ClientMetricType::CLIENT_METRIC_TYPE_OPENED_FILES: + payload = OpenedFilesPayload(); + break; + case ClientMetricType::CLIENT_METRIC_TYPE_PINNED_ICAPS: + payload = PinnedIcapsPayload(); + break; + case ClientMetricType::CLIENT_METRIC_TYPE_OPENED_INODES: + payload = OpenedInodesPayload(); + break; + case ClientMetricType::CLIENT_METRIC_TYPE_READ_IO_SIZES: + payload = ReadIoSizesPayload(); + break; + case ClientMetricType::CLIENT_METRIC_TYPE_WRITE_IO_SIZES: + payload = WriteIoSizesPayload(); + break; + default: + payload = UnknownPayload(static_cast<ClientMetricType>(metric_type)); + break; + } + + boost::apply_visitor(DecodePayloadVisitor(iter), payload); + } + + void dump(Formatter *f) const { + apply_visitor(DumpPayloadVisitor(f), payload); + } + + void print(std::ostream *out) const { + apply_visitor(PrintPayloadVisitor(out), payload); + } + + ClientMetricPayload payload; +}; +WRITE_CLASS_ENCODER(ClientMetricMessage); + +#endif // CEPH_INCLUDE_CEPHFS_METRICS_TYPES_H diff --git a/src/include/cephfs/types.h b/src/include/cephfs/types.h new file mode 100644 index 000000000..cca0a6193 --- /dev/null +++ b/src/include/cephfs/types.h @@ -0,0 +1,970 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2020 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ +#ifndef CEPH_CEPHFS_TYPES_H +#define CEPH_CEPHFS_TYPES_H +#include "include/int_types.h" + +#include <ostream> +#include <set> +#include <map> +#include <string_view> + +#include "common/config.h" +#include "common/Clock.h" +#include "common/DecayCounter.h" +#include "common/StackStringStream.h" +#include "common/entity_name.h" + +#include "include/compat.h" +#include "include/Context.h" +#include "include/frag.h" +#include "include/xlist.h" +#include "include/interval_set.h" +#include "include/compact_set.h" +#include "include/fs_types.h" +#include "include/ceph_fs.h" + +#include "mds/inode_backtrace.h" + +#include <boost/spirit/include/qi.hpp> +#include <boost/pool/pool.hpp> +#include "include/ceph_assert.h" +#include <boost/serialization/strong_typedef.hpp> +#include "common/ceph_json.h" + +#define CEPH_FS_ONDISK_MAGIC "ceph fs volume v011" +#define MAX_MDS 0x100 + +BOOST_STRONG_TYPEDEF(uint64_t, mds_gid_t) +extern const mds_gid_t MDS_GID_NONE; + +typedef int32_t fs_cluster_id_t; +constexpr fs_cluster_id_t FS_CLUSTER_ID_NONE = -1; + +// The namespace ID of the anonymous default filesystem from legacy systems +constexpr fs_cluster_id_t FS_CLUSTER_ID_ANONYMOUS = 0; + +typedef int32_t mds_rank_t; +constexpr mds_rank_t MDS_RANK_NONE = -1; +constexpr mds_rank_t MDS_RANK_EPHEMERAL_DIST = -2; +constexpr mds_rank_t MDS_RANK_EPHEMERAL_RAND = -3; + +struct scatter_info_t { + version_t version = 0; +}; + +struct frag_info_t : public scatter_info_t { + int64_t size() const { return nfiles + nsubdirs; } + + void zero() { + *this = frag_info_t(); + } + + // *this += cur - acc; + void add_delta(const frag_info_t &cur, const frag_info_t &acc, bool *touched_mtime=0, bool *touched_chattr=0) { + if (cur.mtime > mtime) { + mtime = cur.mtime; + if (touched_mtime) + *touched_mtime = true; + } + if (cur.change_attr > change_attr) { + change_attr = cur.change_attr; + if (touched_chattr) + *touched_chattr = true; + } + nfiles += cur.nfiles - acc.nfiles; + nsubdirs += cur.nsubdirs - acc.nsubdirs; + } + + void add(const frag_info_t& other) { + if (other.mtime > mtime) + mtime = other.mtime; + if (other.change_attr > change_attr) + change_attr = other.change_attr; + nfiles += other.nfiles; + nsubdirs += other.nsubdirs; + } + + bool same_sums(const frag_info_t &o) const { + return mtime <= o.mtime && + nfiles == o.nfiles && + nsubdirs == o.nsubdirs; + } + + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator& bl); + void dump(ceph::Formatter *f) const; + void decode_json(JSONObj *obj); + static void generate_test_instances(std::list<frag_info_t*>& ls); + + // this frag + utime_t mtime; + uint64_t change_attr = 0; + int64_t nfiles = 0; // files + int64_t nsubdirs = 0; // subdirs +}; +WRITE_CLASS_ENCODER(frag_info_t) + +inline bool operator==(const frag_info_t &l, const frag_info_t &r) { + return memcmp(&l, &r, sizeof(l)) == 0; +} +inline bool operator!=(const frag_info_t &l, const frag_info_t &r) { + return !(l == r); +} + +std::ostream& operator<<(std::ostream &out, const frag_info_t &f); + +struct nest_info_t : public scatter_info_t { + int64_t rsize() const { return rfiles + rsubdirs; } + + void zero() { + *this = nest_info_t(); + } + + void sub(const nest_info_t &other) { + add(other, -1); + } + void add(const nest_info_t &other, int fac=1) { + if (other.rctime > rctime) + rctime = other.rctime; + rbytes += fac*other.rbytes; + rfiles += fac*other.rfiles; + rsubdirs += fac*other.rsubdirs; + rsnaps += fac*other.rsnaps; + } + + // *this += cur - acc; + void add_delta(const nest_info_t &cur, const nest_info_t &acc) { + if (cur.rctime > rctime) + rctime = cur.rctime; + rbytes += cur.rbytes - acc.rbytes; + rfiles += cur.rfiles - acc.rfiles; + rsubdirs += cur.rsubdirs - acc.rsubdirs; + rsnaps += cur.rsnaps - acc.rsnaps; + } + + bool same_sums(const nest_info_t &o) const { + return rctime <= o.rctime && + rbytes == o.rbytes && + rfiles == o.rfiles && + rsubdirs == o.rsubdirs && + rsnaps == o.rsnaps; + } + + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator& bl); + void dump(ceph::Formatter *f) const; + void decode_json(JSONObj *obj); + static void generate_test_instances(std::list<nest_info_t*>& ls); + + // this frag + children + utime_t rctime; + int64_t rbytes = 0; + int64_t rfiles = 0; + int64_t rsubdirs = 0; + int64_t rsnaps = 0; +}; +WRITE_CLASS_ENCODER(nest_info_t) + +inline bool operator==(const nest_info_t &l, const nest_info_t &r) { + return memcmp(&l, &r, sizeof(l)) == 0; +} +inline bool operator!=(const nest_info_t &l, const nest_info_t &r) { + return !(l == r); +} + +std::ostream& operator<<(std::ostream &out, const nest_info_t &n); + +struct vinodeno_t { + vinodeno_t() {} + vinodeno_t(inodeno_t i, snapid_t s) : ino(i), snapid(s) {} + + void encode(ceph::buffer::list& bl) const { + using ceph::encode; + encode(ino, bl); + encode(snapid, bl); + } + void decode(ceph::buffer::list::const_iterator& p) { + using ceph::decode; + decode(ino, p); + decode(snapid, p); + } + + inodeno_t ino; + snapid_t snapid; +}; +WRITE_CLASS_ENCODER(vinodeno_t) + +inline bool operator==(const vinodeno_t &l, const vinodeno_t &r) { + return l.ino == r.ino && l.snapid == r.snapid; +} +inline bool operator!=(const vinodeno_t &l, const vinodeno_t &r) { + return !(l == r); +} +inline bool operator<(const vinodeno_t &l, const vinodeno_t &r) { + return + l.ino < r.ino || + (l.ino == r.ino && l.snapid < r.snapid); +} + +typedef enum { + QUOTA_MAX_FILES, + QUOTA_MAX_BYTES, + QUOTA_ANY +} quota_max_t; + +struct quota_info_t +{ + void encode(ceph::buffer::list& bl) const { + ENCODE_START(1, 1, bl); + encode(max_bytes, bl); + encode(max_files, bl); + ENCODE_FINISH(bl); + } + void decode(ceph::buffer::list::const_iterator& p) { + DECODE_START_LEGACY_COMPAT_LEN(1, 1, 1, p); + decode(max_bytes, p); + decode(max_files, p); + DECODE_FINISH(p); + } + + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list<quota_info_t *>& ls); + + bool is_valid() const { + return max_bytes >=0 && max_files >=0; + } + bool is_enabled(quota_max_t type=QUOTA_ANY) const { + switch (type) { + case QUOTA_MAX_FILES: + return !!max_files; + case QUOTA_MAX_BYTES: + return !!max_bytes; + case QUOTA_ANY: + default: + return !!max_bytes || !!max_files; + } + } + void decode_json(JSONObj *obj); + + int64_t max_bytes = 0; + int64_t max_files = 0; +}; +WRITE_CLASS_ENCODER(quota_info_t) + +inline bool operator==(const quota_info_t &l, const quota_info_t &r) { + return memcmp(&l, &r, sizeof(l)) == 0; +} + +std::ostream& operator<<(std::ostream &out, const quota_info_t &n); + +struct client_writeable_range_t { + struct byte_range_t { + uint64_t first = 0, last = 0; // interval client can write to + byte_range_t() {} + void decode_json(JSONObj *obj); + }; + + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator& bl); + void dump(ceph::Formatter *f) const; + static void generate_test_instances(std::list<client_writeable_range_t*>& ls); + + byte_range_t range; + snapid_t follows = 0; // aka "data+metadata flushed thru" +}; + +inline void decode(client_writeable_range_t::byte_range_t& range, ceph::buffer::list::const_iterator& bl) { + using ceph::decode; + decode(range.first, bl); + decode(range.last, bl); +} + +WRITE_CLASS_ENCODER(client_writeable_range_t) + +std::ostream& operator<<(std::ostream& out, const client_writeable_range_t& r); + +inline bool operator==(const client_writeable_range_t& l, + const client_writeable_range_t& r) { + return l.range.first == r.range.first && l.range.last == r.range.last && + l.follows == r.follows; +} + +struct inline_data_t { +public: + inline_data_t() {} + inline_data_t(const inline_data_t& o) : version(o.version) { + if (o.blp) + set_data(*o.blp); + } + inline_data_t& operator=(const inline_data_t& o) { + version = o.version; + if (o.blp) + set_data(*o.blp); + else + free_data(); + return *this; + } + + void free_data() { + blp.reset(); + } + void get_data(ceph::buffer::list& ret) const { + if (blp) + ret = *blp; + else + ret.clear(); + } + void set_data(const ceph::buffer::list& bl) { + if (!blp) + blp.reset(new ceph::buffer::list); + *blp = bl; + } + size_t length() const { return blp ? blp->length() : 0; } + + bool operator==(const inline_data_t& o) const { + return length() == o.length() && + (length() == 0 || + (*const_cast<ceph::buffer::list*>(blp.get()) == *const_cast<ceph::buffer::list*>(o.blp.get()))); + } + bool operator!=(const inline_data_t& o) const { + return !(*this == o); + } + void encode(ceph::buffer::list &bl) const; + void decode(ceph::buffer::list::const_iterator& bl); + + version_t version = 1; + +private: + std::unique_ptr<ceph::buffer::list> blp; +}; +WRITE_CLASS_ENCODER(inline_data_t) + +enum { + DAMAGE_STATS, // statistics (dirstat, size, etc) + DAMAGE_RSTATS, // recursive statistics (rstat, accounted_rstat) + DAMAGE_FRAGTREE // fragtree -- repair by searching +}; + +template<template<typename> class Allocator = std::allocator> +struct inode_t { + /** + * *************** + * Do not forget to add any new fields to the compare() function. + * *************** + */ + using client_range_map = std::map<client_t,client_writeable_range_t,std::less<client_t>,Allocator<std::pair<const client_t,client_writeable_range_t>>>; + + inode_t() + { + clear_layout(); + } + + // file type + bool is_symlink() const { return (mode & S_IFMT) == S_IFLNK; } + bool is_dir() const { return (mode & S_IFMT) == S_IFDIR; } + bool is_file() const { return (mode & S_IFMT) == S_IFREG; } + + bool is_truncating() const { return (truncate_pending > 0); } + void truncate(uint64_t old_size, uint64_t new_size, const bufferlist &fbl) { + truncate(old_size, new_size); + fscrypt_last_block = fbl; + } + void truncate(uint64_t old_size, uint64_t new_size) { + ceph_assert(new_size <= old_size); + if (old_size > max_size_ever) + max_size_ever = old_size; + truncate_from = old_size; + size = new_size; + rstat.rbytes = new_size; + truncate_size = size; + truncate_seq++; + truncate_pending++; + } + + bool has_layout() const { + return layout != file_layout_t(); + } + + void clear_layout() { + layout = file_layout_t(); + } + + uint64_t get_layout_size_increment() const { + return layout.get_period(); + } + + bool is_dirty_rstat() const { return !(rstat == accounted_rstat); } + + uint64_t get_client_range(client_t client) const { + auto it = client_ranges.find(client); + return it != client_ranges.end() ? it->second.range.last : 0; + } + + uint64_t get_max_size() const { + uint64_t max = 0; + for (std::map<client_t,client_writeable_range_t>::const_iterator p = client_ranges.begin(); + p != client_ranges.end(); + ++p) + if (p->second.range.last > max) + max = p->second.range.last; + return max; + } + void set_max_size(uint64_t new_max) { + if (new_max == 0) { + client_ranges.clear(); + } else { + for (std::map<client_t,client_writeable_range_t>::iterator p = client_ranges.begin(); + p != client_ranges.end(); + ++p) + p->second.range.last = new_max; + } + } + + void trim_client_ranges(snapid_t last) { + std::map<client_t, client_writeable_range_t>::iterator p = client_ranges.begin(); + while (p != client_ranges.end()) { + if (p->second.follows >= last) + client_ranges.erase(p++); + else + ++p; + } + } + + bool is_backtrace_updated() const { + return backtrace_version == version; + } + void update_backtrace(version_t pv=0) { + backtrace_version = pv ? pv : version; + } + + void add_old_pool(int64_t l) { + backtrace_version = version; + old_pools.insert(l); + } + + void encode(ceph::buffer::list &bl, uint64_t features) const; + void decode(ceph::buffer::list::const_iterator& bl); + void dump(ceph::Formatter *f) const; + static void client_ranges_cb(client_range_map& c, JSONObj *obj); + static void old_pools_cb(compact_set<int64_t, std::less<int64_t>, Allocator<int64_t> >& c, JSONObj *obj); + void decode_json(JSONObj *obj); + static void generate_test_instances(std::list<inode_t*>& ls); + /** + * Compare this inode_t with another that represent *the same inode* + * at different points in time. + * @pre The inodes are the same ino + * + * @param other The inode_t to compare ourselves with + * @param divergent A bool pointer which will be set to true + * if the values are different in a way that can't be explained + * by one being a newer version than the other. + * + * @returns 1 if we are newer than the other, 0 if equal, -1 if older. + */ + int compare(const inode_t &other, bool *divergent) const; + + // base (immutable) + inodeno_t ino = 0; + uint32_t rdev = 0; // if special file + + // affected by any inode change... + utime_t ctime; // inode change time + utime_t btime; // birth time + + // perm (namespace permissions) + uint32_t mode = 0; + uid_t uid = 0; + gid_t gid = 0; + + // nlink + int32_t nlink = 0; + + // file (data access) + ceph_dir_layout dir_layout = {}; // [dir only] + file_layout_t layout; + compact_set<int64_t, std::less<int64_t>, Allocator<int64_t>> old_pools; + uint64_t size = 0; // on directory, # dentries + uint64_t max_size_ever = 0; // max size the file has ever been + uint32_t truncate_seq = 0; + uint64_t truncate_size = 0, truncate_from = 0; + uint32_t truncate_pending = 0; + utime_t mtime; // file data modify time. + utime_t atime; // file data access time. + uint32_t time_warp_seq = 0; // count of (potential) mtime/atime timewarps (i.e., utimes()) + inline_data_t inline_data; // FIXME check + + // change attribute + uint64_t change_attr = 0; + + client_range_map client_ranges; // client(s) can write to these ranges + + // dirfrag, recursive accountin + frag_info_t dirstat; // protected by my filelock + nest_info_t rstat; // protected by my nestlock + nest_info_t accounted_rstat; // protected by parent's nestlock + + quota_info_t quota; + + mds_rank_t export_pin = MDS_RANK_NONE; + + double export_ephemeral_random_pin = 0; + bool export_ephemeral_distributed_pin = false; + + // special stuff + version_t version = 0; // auth only + version_t file_data_version = 0; // auth only + version_t xattr_version = 0; + + utime_t last_scrub_stamp; // start time of last complete scrub + version_t last_scrub_version = 0;// (parent) start version of last complete scrub + + version_t backtrace_version = 0; + + snapid_t oldest_snap; + + std::basic_string<char,std::char_traits<char>,Allocator<char>> stray_prior_path; //stores path before unlink + + std::vector<uint8_t> fscrypt_auth; + std::vector<uint8_t> fscrypt_file; + + bufferlist fscrypt_last_block; + +private: + bool older_is_consistent(const inode_t &other) const; +}; + +// These methods may be moved back to mdstypes.cc when we have pmr +template<template<typename> class Allocator> +void inode_t<Allocator>::encode(ceph::buffer::list &bl, uint64_t features) const +{ + ENCODE_START(19, 6, bl); + + encode(ino, bl); + encode(rdev, bl); + encode(ctime, bl); + + encode(mode, bl); + encode(uid, bl); + encode(gid, bl); + + encode(nlink, bl); + { + // removed field + bool anchored = 0; + encode(anchored, bl); + } + + encode(dir_layout, bl); + encode(layout, bl, features); + encode(size, bl); + encode(truncate_seq, bl); + encode(truncate_size, bl); + encode(truncate_from, bl); + encode(truncate_pending, bl); + encode(mtime, bl); + encode(atime, bl); + encode(time_warp_seq, bl); + encode(client_ranges, bl); + + encode(dirstat, bl); + encode(rstat, bl); + encode(accounted_rstat, bl); + + encode(version, bl); + encode(file_data_version, bl); + encode(xattr_version, bl); + encode(backtrace_version, bl); + encode(old_pools, bl); + encode(max_size_ever, bl); + encode(inline_data, bl); + encode(quota, bl); + + encode(stray_prior_path, bl); + + encode(last_scrub_version, bl); + encode(last_scrub_stamp, bl); + + encode(btime, bl); + encode(change_attr, bl); + + encode(export_pin, bl); + + encode(export_ephemeral_random_pin, bl); + encode(export_ephemeral_distributed_pin, bl); + + encode(!fscrypt_auth.empty(), bl); + encode(fscrypt_auth, bl); + encode(fscrypt_file, bl); + encode(fscrypt_last_block, bl); + ENCODE_FINISH(bl); +} + +template<template<typename> class Allocator> +void inode_t<Allocator>::decode(ceph::buffer::list::const_iterator &p) +{ + DECODE_START_LEGACY_COMPAT_LEN(19, 6, 6, p); + + decode(ino, p); + decode(rdev, p); + decode(ctime, p); + + decode(mode, p); + decode(uid, p); + decode(gid, p); + + decode(nlink, p); + { + bool anchored; + decode(anchored, p); + } + + if (struct_v >= 4) + decode(dir_layout, p); + else { + // FIPS zeroization audit 20191117: this memset is not security related. + memset(&dir_layout, 0, sizeof(dir_layout)); + } + decode(layout, p); + decode(size, p); + decode(truncate_seq, p); + decode(truncate_size, p); + decode(truncate_from, p); + if (struct_v >= 5) + decode(truncate_pending, p); + else + truncate_pending = 0; + decode(mtime, p); + decode(atime, p); + decode(time_warp_seq, p); + if (struct_v >= 3) { + decode(client_ranges, p); + } else { + std::map<client_t, client_writeable_range_t::byte_range_t> m; + decode(m, p); + for (auto q = m.begin(); q != m.end(); ++q) + client_ranges[q->first].range = q->second; + } + + decode(dirstat, p); + decode(rstat, p); + decode(accounted_rstat, p); + + decode(version, p); + decode(file_data_version, p); + decode(xattr_version, p); + if (struct_v >= 2) + decode(backtrace_version, p); + if (struct_v >= 7) + decode(old_pools, p); + if (struct_v >= 8) + decode(max_size_ever, p); + if (struct_v >= 9) { + decode(inline_data, p); + } else { + inline_data.version = CEPH_INLINE_NONE; + } + if (struct_v < 10) + backtrace_version = 0; // force update backtrace + if (struct_v >= 11) + decode(quota, p); + + if (struct_v >= 12) { + std::string tmp; + decode(tmp, p); + stray_prior_path = std::string_view(tmp); + } + + if (struct_v >= 13) { + decode(last_scrub_version, p); + decode(last_scrub_stamp, p); + } + if (struct_v >= 14) { + decode(btime, p); + decode(change_attr, p); + } else { + btime = utime_t(); + change_attr = 0; + } + + if (struct_v >= 15) { + decode(export_pin, p); + } else { + export_pin = MDS_RANK_NONE; + } + + if (struct_v >= 16) { + decode(export_ephemeral_random_pin, p); + decode(export_ephemeral_distributed_pin, p); + } else { + export_ephemeral_random_pin = 0; + export_ephemeral_distributed_pin = false; + } + + if (struct_v >= 17) { + bool fscrypt_flag; + decode(fscrypt_flag, p); // ignored + } + + if (struct_v >= 18) { + decode(fscrypt_auth, p); + decode(fscrypt_file, p); + } + + if (struct_v >= 19) { + decode(fscrypt_last_block, p); + } + DECODE_FINISH(p); +} + +template<template<typename> class Allocator> +void inode_t<Allocator>::dump(ceph::Formatter *f) const +{ + f->dump_unsigned("ino", ino); + f->dump_unsigned("rdev", rdev); + f->dump_stream("ctime") << ctime; + f->dump_stream("btime") << btime; + f->dump_unsigned("mode", mode); + f->dump_unsigned("uid", uid); + f->dump_unsigned("gid", gid); + f->dump_unsigned("nlink", nlink); + + f->open_object_section("dir_layout"); + ::dump(dir_layout, f); + f->close_section(); + + f->dump_object("layout", layout); + + f->open_array_section("old_pools"); + for (const auto &p : old_pools) { + f->dump_int("pool", p); + } + f->close_section(); + + f->dump_unsigned("size", size); + f->dump_unsigned("truncate_seq", truncate_seq); + f->dump_unsigned("truncate_size", truncate_size); + f->dump_unsigned("truncate_from", truncate_from); + f->dump_unsigned("truncate_pending", truncate_pending); + f->dump_stream("mtime") << mtime; + f->dump_stream("atime") << atime; + f->dump_unsigned("time_warp_seq", time_warp_seq); + f->dump_unsigned("change_attr", change_attr); + f->dump_int("export_pin", export_pin); + f->dump_int("export_ephemeral_random_pin", export_ephemeral_random_pin); + f->dump_bool("export_ephemeral_distributed_pin", export_ephemeral_distributed_pin); + + f->open_array_section("client_ranges"); + for (const auto &p : client_ranges) { + f->open_object_section("client"); + f->dump_unsigned("client", p.first.v); + p.second.dump(f); + f->close_section(); + } + f->close_section(); + + f->open_object_section("dirstat"); + dirstat.dump(f); + f->close_section(); + + f->open_object_section("rstat"); + rstat.dump(f); + f->close_section(); + + f->open_object_section("accounted_rstat"); + accounted_rstat.dump(f); + f->close_section(); + + f->dump_unsigned("version", version); + f->dump_unsigned("file_data_version", file_data_version); + f->dump_unsigned("xattr_version", xattr_version); + f->dump_unsigned("backtrace_version", backtrace_version); + + f->dump_string("stray_prior_path", stray_prior_path); + f->dump_unsigned("max_size_ever", max_size_ever); + + f->open_object_section("quota"); + quota.dump(f); + f->close_section(); + + f->dump_stream("last_scrub_stamp") << last_scrub_stamp; + f->dump_unsigned("last_scrub_version", last_scrub_version); +} + +template<template<typename> class Allocator> +void inode_t<Allocator>::client_ranges_cb(typename inode_t<Allocator>::client_range_map& c, JSONObj *obj){ + + int64_t client; + JSONDecoder::decode_json("client", client, obj, true); + client_writeable_range_t client_range_tmp; + JSONDecoder::decode_json("byte range", client_range_tmp.range, obj, true); + JSONDecoder::decode_json("follows", client_range_tmp.follows.val, obj, true); + c[client] = client_range_tmp; +} + +template<template<typename> class Allocator> +void inode_t<Allocator>::old_pools_cb(compact_set<int64_t, std::less<int64_t>, Allocator<int64_t> >& c, JSONObj *obj){ + + int64_t tmp; + decode_json_obj(tmp, obj); + c.insert(tmp); +} + +template<template<typename> class Allocator> +void inode_t<Allocator>::decode_json(JSONObj *obj) +{ + + JSONDecoder::decode_json("ino", ino.val, obj, true); + JSONDecoder::decode_json("rdev", rdev, obj, true); + //JSONDecoder::decode_json("ctime", ctime, obj, true); + //JSONDecoder::decode_json("btime", btime, obj, true); + JSONDecoder::decode_json("mode", mode, obj, true); + JSONDecoder::decode_json("uid", uid, obj, true); + JSONDecoder::decode_json("gid", gid, obj, true); + JSONDecoder::decode_json("nlink", nlink, obj, true); + JSONDecoder::decode_json("dir_layout", dir_layout, obj, true); + JSONDecoder::decode_json("layout", layout, obj, true); + JSONDecoder::decode_json("old_pools", old_pools, inode_t<Allocator>::old_pools_cb, obj, true); + JSONDecoder::decode_json("size", size, obj, true); + JSONDecoder::decode_json("truncate_seq", truncate_seq, obj, true); + JSONDecoder::decode_json("truncate_size", truncate_size, obj, true); + JSONDecoder::decode_json("truncate_from", truncate_from, obj, true); + JSONDecoder::decode_json("truncate_pending", truncate_pending, obj, true); + //JSONDecoder::decode_json("mtime", mtime, obj, true); + //JSONDecoder::decode_json("atime", atime, obj, true); + JSONDecoder::decode_json("time_warp_seq", time_warp_seq, obj, true); + JSONDecoder::decode_json("change_attr", change_attr, obj, true); + JSONDecoder::decode_json("export_pin", export_pin, obj, true); + JSONDecoder::decode_json("client_ranges", client_ranges, inode_t<Allocator>::client_ranges_cb, obj, true); + JSONDecoder::decode_json("dirstat", dirstat, obj, true); + JSONDecoder::decode_json("rstat", rstat, obj, true); + JSONDecoder::decode_json("accounted_rstat", accounted_rstat, obj, true); + JSONDecoder::decode_json("version", version, obj, true); + JSONDecoder::decode_json("file_data_version", file_data_version, obj, true); + JSONDecoder::decode_json("xattr_version", xattr_version, obj, true); + JSONDecoder::decode_json("backtrace_version", backtrace_version, obj, true); + JSONDecoder::decode_json("stray_prior_path", stray_prior_path, obj, true); + JSONDecoder::decode_json("max_size_ever", max_size_ever, obj, true); + JSONDecoder::decode_json("quota", quota, obj, true); + JSONDecoder::decode_json("last_scrub_stamp", last_scrub_stamp, obj, true); + JSONDecoder::decode_json("last_scrub_version", last_scrub_version, obj, true); +} + +template<template<typename> class Allocator> +void inode_t<Allocator>::generate_test_instances(std::list<inode_t*>& ls) +{ + ls.push_back(new inode_t<Allocator>); + ls.push_back(new inode_t<Allocator>); + ls.back()->ino = 1; + // i am lazy. +} + +template<template<typename> class Allocator> +int inode_t<Allocator>::compare(const inode_t<Allocator> &other, bool *divergent) const +{ + ceph_assert(ino == other.ino); + *divergent = false; + if (version == other.version) { + if (rdev != other.rdev || + ctime != other.ctime || + btime != other.btime || + mode != other.mode || + uid != other.uid || + gid != other.gid || + nlink != other.nlink || + memcmp(&dir_layout, &other.dir_layout, sizeof(dir_layout)) || + layout != other.layout || + old_pools != other.old_pools || + size != other.size || + max_size_ever != other.max_size_ever || + truncate_seq != other.truncate_seq || + truncate_size != other.truncate_size || + truncate_from != other.truncate_from || + truncate_pending != other.truncate_pending || + change_attr != other.change_attr || + mtime != other.mtime || + atime != other.atime || + time_warp_seq != other.time_warp_seq || + inline_data != other.inline_data || + client_ranges != other.client_ranges || + !(dirstat == other.dirstat) || + !(rstat == other.rstat) || + !(accounted_rstat == other.accounted_rstat) || + file_data_version != other.file_data_version || + xattr_version != other.xattr_version || + backtrace_version != other.backtrace_version) { + *divergent = true; + } + return 0; + } else if (version > other.version) { + *divergent = !older_is_consistent(other); + return 1; + } else { + ceph_assert(version < other.version); + *divergent = !other.older_is_consistent(*this); + return -1; + } +} + +template<template<typename> class Allocator> +bool inode_t<Allocator>::older_is_consistent(const inode_t<Allocator> &other) const +{ + if (max_size_ever < other.max_size_ever || + truncate_seq < other.truncate_seq || + time_warp_seq < other.time_warp_seq || + inline_data.version < other.inline_data.version || + dirstat.version < other.dirstat.version || + rstat.version < other.rstat.version || + accounted_rstat.version < other.accounted_rstat.version || + file_data_version < other.file_data_version || + xattr_version < other.xattr_version || + backtrace_version < other.backtrace_version) { + return false; + } + return true; +} + +template<template<typename> class Allocator> +inline void encode(const inode_t<Allocator> &c, ::ceph::buffer::list &bl, uint64_t features) +{ + ENCODE_DUMP_PRE(); + c.encode(bl, features); + ENCODE_DUMP_POST(cl); +} +template<template<typename> class Allocator> +inline void decode(inode_t<Allocator> &c, ::ceph::buffer::list::const_iterator &p) +{ + c.decode(p); +} + +// parse a map of keys/values. +namespace qi = boost::spirit::qi; + +template <typename Iterator> +struct keys_and_values + : qi::grammar<Iterator, std::map<std::string, std::string>()> +{ + keys_and_values() + : keys_and_values::base_type(query) + { + query = pair >> *(qi::lit(' ') >> pair); + pair = key >> '=' >> value; + key = qi::char_("a-zA-Z_") >> *qi::char_("a-zA-Z_0-9"); + value = +qi::char_("a-zA-Z0-9-_."); + } + qi::rule<Iterator, std::map<std::string, std::string>()> query; + qi::rule<Iterator, std::pair<std::string, std::string>()> pair; + qi::rule<Iterator, std::string()> key, value; +}; + +#endif |