diff options
Diffstat (limited to '')
-rw-r--r-- | src/include/radosstriper/libradosstriper.h | 620 | ||||
-rw-r--r-- | src/include/radosstriper/libradosstriper.hpp | 241 |
2 files changed, 861 insertions, 0 deletions
diff --git a/src/include/radosstriper/libradosstriper.h b/src/include/radosstriper/libradosstriper.h new file mode 100644 index 000000000..a35345f7d --- /dev/null +++ b/src/include/radosstriper/libradosstriper.h @@ -0,0 +1,620 @@ +#ifndef CEPH_LIBRADOSSTRIPER_H +#define CEPH_LIBRADOSSTRIPER_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <string.h> + +#include "../rados/librados.h" + +#define LIBRADOSSTRIPER_VER_MAJOR 0 +#define LIBRADOSSTRIPER_VER_MINOR 0 +#define LIBRADOSSTRIPER_VER_EXTRA 0 + +#define LIBRADOSSTRIPER_VERSION(maj, min, extra) ((maj << 16) + (min << 8) + extra) + +#define LIBRADOSSTRIPER_VERSION_CODE LIBRADOSSTRIPER_VERSION(LIBRADOSSTRIPER_VER_MAJOR, LIBRADOSSTRIPER_VER_MINOR, LIBRADOSSTRIPER_VER_EXTRA) + +/** + * @typedef rados_striper_t + * + * A handle for interacting with striped objects in a RADOS cluster. + */ +typedef void *rados_striper_t; + +/** + * @defgroup libradosstriper_h_init Setup and Teardown + * These are the first and last functions to that should be called + * when using libradosstriper. + * + * @{ + */ + +/** + * Creates a rados striper using the given io context + * Striper has initially default object layout. + * See rados_striper_set_object_layout_*() to change this + * + * @param ioctx the rados context to use + * @param striper where to store the rados striper + * @returns 0 on success, negative error code on failure + */ + int rados_striper_create(rados_ioctx_t ioctx, + rados_striper_t *striper); + +/** + * Destroys a rados striper + * + * @param striper the striper to destroy + */ +void rados_striper_destroy(rados_striper_t striper); + +/** + * Sets the object layout's stripe unit of a rados striper for future objects. + * This layout will be used when new objects are created (by writing to them) + * Already existing objects will be opened with their own layout. + * + * @param striper the targeted striper + * @param stripe_unit the stripe_unit value of the new object layout + * @returns 0 on success, negative error code on failure + */ +int rados_striper_set_object_layout_stripe_unit(rados_striper_t striper, + unsigned int stripe_unit); + +/** + * Sets the object layout's stripe count of a rados striper for future objects. + * This layout will be used when new objects are created (by writing to them) + * Already existing objects will be opened with their own layout. + * + * @param striper the targeted striper + * @param stripe_count the stripe_count value of the new object layout + * @returns 0 on success, negative error code on failure + */ +int rados_striper_set_object_layout_stripe_count(rados_striper_t striper, + unsigned int stripe_count); + +/** + * Sets the object layout's object_size of a rados striper for future objects. + * This layout will be used when new objects are created (by writing to them) + * Already existing objects will be opened with their own layout. + * + * @param striper the targeted striper + * @param object_size the object_size value of the new object layout + * @returns 0 on success, negative error code on failure + */ +int rados_striper_set_object_layout_object_size(rados_striper_t striper, + unsigned int object_size); + +/** @} init */ + +/** + * @defgroup libradosstriper_h_synch_io Synchronous I/O + * Writes are striped to several rados objects which are then + * replicated to a number of OSDs based on the configuration + * of the pool they are in. These write functions block + * until data is in memory on all replicas of the object they're + * writing to - they are equivalent to doing the corresponding + * asynchronous write, and the calling + * rados_striper_ioctx_wait_for_complete(). + * + * @{ + */ + +/** + * Synchronously write data to a striped object at the specified offset + * + * @param striper the striper in which the write will occur + * @param soid the name of the striped object + * @param buf data to write + * @param len length of the data, in bytes + * @param off byte offset in the object to begin writing at + * @returns 0 on success, negative error code on failure + * failure + */ +int rados_striper_write(rados_striper_t striper, + const char *soid, + const char *buf, + size_t len, + uint64_t off); + +/** + * Synchronously write an entire striped object + * + * The striped object is filled with the provided data. If the striped object exists, + * it is truncated and then written. + * + * @param striper the striper in which the write will occur + * @param soid the name of the striped object + * @param buf data to write + * @param len length of the data, in bytes + * @returns 0 on success, negative error code on failure + */ +int rados_striper_write_full(rados_striper_t striper, + const char *soid, + const char *buf, + size_t len); + +/** + * Append data to an object + * + * @param striper the striper in which the write will occur + * @param soid the name of the striped object + * @param buf the data to append + * @param len length of buf (in bytes) + * @returns 0 on success, negative error code on failure + * failure + */ +int rados_striper_append(rados_striper_t striper, + const char *soid, + const char *buf, + size_t len); + +/** + * Synchronously read data from a striped object at the specified offset + * + * @param striper the striper in which the read will occur + * @param soid the name of the striped object + * @param buf where to store the results + * @param len the number of bytes to read + * @param off the offset to start reading from in the object + * @returns number of bytes read on success, negative error code on + * failure + */ +int rados_striper_read(rados_striper_t striper, + const char *soid, + char *buf, + size_t len, + uint64_t off); + +/** + * Synchronously removes a striped object + * + * @note There is no atomicity of the deletion and the striped + * object may be left incomplete if an error is returned (metadata + * all present, but some stripes missing) + * However, there is a atomicity of the metadata deletion and + * the deletion can not happen if any I/O is ongoing (it + * will return EBUSY). Identically, no I/O will be able to start + * during deletion (same EBUSY return code) + * @param striper the striper in which the remove will occur + * @param soid the name of the striped object + * @returns 0 on success, negative error code on failure + */ +int rados_striper_remove(rados_striper_t striper, + const char* soid); + +/** + * Resize an object + * + * If this enlarges the object, the new area is logically filled with + * zeroes. If this shrinks the object, the excess data is removed. + * + * @note the truncation is not fully atomic. The metadata part is, + * so the behavior will be atomic from user point of view when + * the object size is reduced. However, in case of failure, old data + * may stay around, hidden. They may reappear if the object size is + * later grown, instead of the expected 0s. When growing the + * object and in case of failure, the new 0 data may not be + * fully created. This can lead to ENOENT errors when + * writing/reading the missing parts. + * @note the truncation can not happen if any I/O is ongoing (it + * will return EBUSY). Identically, no I/O will be able to start + * during truncation (same EBUSY return code) + * @param io the rados context to use + * @param soid the name of the striped object + * @param size the new size of the object in bytes + * @returns 0 on success, negative error code on failure + */ +int rados_striper_trunc(rados_striper_t striper, const char *soid, uint64_t size); + +/** @} Synchronous I/O */ + +/** + * @defgroup libradosstriper_h_xattrs Xattrs + * Extended attributes are stored as extended attributes on the + * first rados regular object of the striped object. + * Thus, they have the same limitations as the underlying + * rados extended attributes. + * + * @{ + */ + +/** + * Get the value of an extended attribute on a striped object. + * + * @param striper the striper in which the getxattr will occur + * @param oid name of the striped object + * @param name which extended attribute to read + * @param buf where to store the result + * @param len size of buf in bytes + * @returns length of xattr value on success, negative error code on failure + */ +int rados_striper_getxattr(rados_striper_t striper, + const char *oid, + const char *name, + char *buf, + size_t len); + +/** + * Set an extended attribute on a striped object. + * + * @param striper the striper in which the setxattr will occur + * @param oid name of the object + * @param name which extended attribute to set + * @param buf what to store in the xattr + * @param len the number of bytes in buf + * @returns 0 on success, negative error code on failure + */ +int rados_striper_setxattr(rados_striper_t striper, + const char *oid, + const char *name, + const char *buf, + size_t len); + +/** + * Delete an extended attribute from a striped object. + * + * @param striper the striper in which the rmxattr will occur + * @param oid name of the object + * @param name which xattr to delete + * @returns 0 on success, negative error code on failure + */ +int rados_striper_rmxattr(rados_striper_t striper, + const char *oid, + const char *name); + +/** + * Start iterating over xattrs on a striped object. + * + * @post iter is a valid iterator + * + * @param striper the striper in which the getxattrs will occur + * @param oid name of the object + * @param iter where to store the iterator + * @returns 0 on success, negative error code on failure + */ +int rados_striper_getxattrs(rados_striper_t striper, + const char *oid, + rados_xattrs_iter_t *iter); + +/** + * Get the next xattr on the striped object + * + * @pre iter is a valid iterator + * + * @post name is the NULL-terminated name of the next xattr, and val + * contains the value of the xattr, which is of length len. If the end + * of the list has been reached, name and val are NULL, and len is 0. + * + * @param iter iterator to advance + * @param name where to store the name of the next xattr + * @param val where to store the value of the next xattr + * @param len the number of bytes in val + * @returns 0 on success, negative error code on failure + */ +int rados_striper_getxattrs_next(rados_xattrs_iter_t iter, + const char **name, + const char **val, + size_t *len); + +/** + * Close the xattr iterator. + * + * iter should not be used after this is called. + * + * @param iter the iterator to close + */ +void rados_striper_getxattrs_end(rados_xattrs_iter_t iter); + +/** @} Xattrs */ + +/** + * Synchronously get object stats (size/mtime) + * + * @param striper the striper in which the stat will occur + * @param soid the id of the striped object + * @param psize where to store object size + * @param pmtime where to store modification time + * @returns 0 on success, negative error code on failure + */ +int rados_striper_stat(rados_striper_t striper, + const char* soid, + uint64_t *psize, + time_t *pmtime); + +int rados_striper_stat2(rados_striper_t striper, + const char* soid, + uint64_t *psize, + struct timespec *pmtime); + +/** + * @defgroup libradosstriper_h_asynch_io Asynchronous I/O + * Read and write to objects without blocking. + * + * @{ + */ + +/** + * @typedef rados_striper_multi_completion_t + * Represents the state of a set of asynchronous operations + * it contains the aggregated return value once the operations complete + * and can be used to block until all operations are complete and/or safe. + */ +typedef void *rados_striper_multi_completion_t; + +/** + * Constructs a multi completion to use with asynchronous operations + * + * The complete and safe callbacks correspond to operations being + * acked and committed, respectively. The callbacks are called in + * order of receipt, so the safe callback may be triggered before the + * complete callback, and vice versa. This is affected by journalling + * on the OSDs. + * + * @note Read operations only get a complete callback. + * @note BUG: this should check for ENOMEM instead of throwing an exception + * + * @param cb_arg application-defined data passed to the callback functions + * @param cb_complete the function to be called when the operation is + * in memory on all relpicas + * @param cb_safe the function to be called when the operation is on + * stable storage on all replicas + * @param pc where to store the completion + * @returns 0 + */ +int rados_striper_multi_aio_create_completion(void *cb_arg, + rados_callback_t cb_complete, + rados_callback_t cb_safe, + rados_striper_multi_completion_t *pc); + +/** + * Block until all operation complete + * + * This means data is in memory on all replicas. + * + * @param c operations to wait for + * @returns 0 + */ +void rados_striper_multi_aio_wait_for_complete(rados_striper_multi_completion_t c); + +/** + * Block until all operation are safe + * + * This means data is on stable storage on all replicas. + * + * @param c operations to wait for + * @returns 0 + */ +void rados_striper_multi_aio_wait_for_safe(rados_striper_multi_completion_t c); + +/** + * Has a multi asynchronous operation completed? + * + * @warning This does not imply that the complete callback has + * finished + * + * @param c async operations to inspect + * @returns whether c is complete + */ +int rados_striper_multi_aio_is_complete(rados_striper_multi_completion_t c); + +/** + * Is a multi asynchronous operation safe? + * + * @warning This does not imply that the safe callback has + * finished + * + * @param c async operations to inspect + * @returns whether c is safe + */ +int rados_striper_multi_aio_is_safe(rados_striper_multi_completion_t c); + +/** + * Block until all operations complete and callback completes + * + * This means data is in memory on all replicas and can be read. + * + * @param c operations to wait for + * @returns 0 + */ +void rados_striper_multi_aio_wait_for_complete_and_cb(rados_striper_multi_completion_t c); + +/** + * Block until all operations are safe and callback has completed + * + * This means data is on stable storage on all replicas. + * + * @param c operations to wait for + * @returns 0 + */ +void rados_striper_multi_aio_wait_for_safe_and_cb(rados_striper_multi_completion_t c); + +/** + * Has a multi asynchronous operation and callback completed + * + * @param c async operations to inspect + * @returns whether c is complete + */ +int rados_striper_multi_aio_is_complete_and_cb(rados_striper_multi_completion_t c); + +/** + * Is a multi asynchronous operation safe and has the callback completed + * + * @param c async operations to inspect + * @returns whether c is safe + */ +int rados_striper_multi_aio_is_safe_and_cb(rados_striper_multi_completion_t c); + +/** + * Get the return value of a multi asychronous operation + * + * The return value is set when all operations are complete or safe, + * whichever comes first. + * + * @pre The operation is safe or complete + * + * @note BUG: complete callback may never be called when the safe + * message is received before the complete message + * + * @param c async operations to inspect + * @returns aggregated return value of the operations + */ +int rados_striper_multi_aio_get_return_value(rados_striper_multi_completion_t c); + +/** + * Release a multi asynchrnous IO completion + * + * Call this when you no longer need the completion. It may not be + * freed immediately if the operation is not acked and committed. + * + * @param c multi completion to release + */ +void rados_striper_multi_aio_release(rados_striper_multi_completion_t c); + +/** + * Asynchronously write data to a striped object at the specified offset + * + * The return value of the completion will be 0 on success, negative + * error code on failure. + * + * @param striper the striper in which the write will occur + * @param soid the name of the striped object + * @param completion what to do when the write is safe and complete + * @param buf data to write + * @param len length of the data, in bytes + * @param off byte offset in the object to begin writing at + * @returns 0 on success, negative error code on + * failure + */ +int rados_striper_aio_write(rados_striper_t striper, + const char *soid, + rados_completion_t completion, + const char *buf, + size_t len, + uint64_t off); + +/** + * Asynchronously appends data to a striped object + * + * The return value of the completion will be 0 on success, negative + * error code on failure. + * + * @param striper the striper in which the write will occur + * @param soid the name of the striped object + * @param completion what to do when the write is safe and complete + * @param buf data to write + * @param len length of the data, in bytes + * @returns 0 on success, negative error code on + * failure + */ +int rados_striper_aio_append(rados_striper_t striper, + const char *soid, + rados_completion_t completion, + const char *buf, + size_t len); + +/** + * Asynchronously fills and object with the provided data. + * If the object exists, it is truncated and then written. + * + * The return value of the completion will be 0 on success, negative + * error code on failure. + * + * @param striper the striper in which the write will occur + * @param soid the name of the striped object + * @param completion what to do when the write is safe and complete + * @param buf data to write + * @param len length of the data, in bytes + * @returns 0 on success, negative error code on + * failure + */ +int rados_striper_aio_write_full(rados_striper_t striper, + const char *soid, + rados_completion_t completion, + const char *buf, + size_t len); + +/** + * Asynchronously read data from a striped object at the specified offset + * + * The return value of the completion will be number of bytes read on + * success, negative error code on failure. + * + * @param striper the striper in which the read will occur + * @param soid the name of the striped object + * @param completion what to do when the read is safe and complete + * @param buf where to store the results + * @param len the number of bytes to read + * @param off the offset to start reading from in the object + * @returns 0 on success, negative error code on + * failure + */ +int rados_striper_aio_read(rados_striper_t striper, + const char *soid, + rados_completion_t completion, + char *buf, + const size_t len, + uint64_t off); + +/** + * Asynchronously removes a striped object + * + * @note There is no atomicity of the deletion and the striped + * object may be left incomplete if an error is returned (metadata + * all present, but some stripes missing) + * However, there is a atomicity of the metadata deletion and + * the deletion can not happen if any I/O is ongoing (it + * will return EBUSY). Identically, no I/O will be able to start + * during deletion (same EBUSY return code) + * @param striper the striper in which the remove will occur + * @param soid the name of the striped object + * @param completion what to do when the remove is safe and complete + * @returns 0 on success, negative error code on failure + */ + +int rados_striper_aio_remove(rados_striper_t striper, + const char* soid, + rados_completion_t completion); + +/** + * Block until all pending writes in a striper are safe + * + * This is not equivalent to calling rados_striper_multi_aio_wait_for_safe() on all + * write completions, since this waits for the associated callbacks to + * complete as well. + * + * @param striper the striper in which the flush will occur + * @returns 0 on success, negative error code on failure +*/ +void rados_striper_aio_flush(rados_striper_t striper); + +/** + * Asynchronously get object stats (size/mtime) + * + * @param striper the striper in which the stat will occur + * @param soid the id of the striped object + * @param psize where to store object size + * @param pmtime where to store modification time + * @param completion what to do when the stats is complete + * @returns 0 on success, negative error code on failure + */ +int rados_striper_aio_stat(rados_striper_t striper, + const char* soid, + rados_completion_t completion, + uint64_t *psize, + time_t *pmtime); + +int rados_striper_aio_stat2(rados_striper_t striper, + const char* soid, + rados_completion_t completion, + uint64_t *psize, + struct timespec *pmtime); +/** @} Asynchronous I/O */ + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/include/radosstriper/libradosstriper.hpp b/src/include/radosstriper/libradosstriper.hpp new file mode 100644 index 000000000..fb790b0d7 --- /dev/null +++ b/src/include/radosstriper/libradosstriper.hpp @@ -0,0 +1,241 @@ +#ifndef __LIBRADOSSTRIPER_HPP +#define __LIBRADOSSTRIPER_HPP + +#include <string.h> +#include <string> +#include <map> +#include "../rados/buffer.h" +#include "../rados/librados.hpp" + +#include "libradosstriper.h" + +namespace libradosstriper +{ + struct RadosStriperImpl; + struct MultiAioCompletionImpl; + + /* + * Completion object for multiple asynchronous IO + * It allows to internally handle several "requests" + */ + struct MultiAioCompletion { + MultiAioCompletion(MultiAioCompletionImpl *pc_) : pc(pc_) {} + ~MultiAioCompletion(); + int set_complete_callback(void *cb_arg, librados::callback_t cb); + int set_safe_callback(void *cb_arg, librados::callback_t cb) __attribute__ ((deprecated)); + void wait_for_complete(); + void wait_for_safe() __attribute__ ((deprecated)); + void wait_for_complete_and_cb(); + void wait_for_safe_and_cb() __attribute__ ((deprecated)); + bool is_complete(); + bool is_safe() __attribute__ ((deprecated)); + bool is_complete_and_cb(); + bool is_safe_and_cb() __attribute__ ((deprecated)); + int get_return_value(); + void release(); + MultiAioCompletionImpl *pc; + }; + + /* RadosStriper : This class allows to perform read/writes on striped objects + * + * Typical use (error checking omitted): + * + * RadosStriper rs; + * RadosStriper.striper_create("my_cluster", rs); + * bufferlist bl; + * ... put data in bl ... + * rs.write(object_name, bl, len, offset); + * bufferlist bl2; + * rs.read(object_name, &bl2, len, offset); + * ... + */ + class RadosStriper + { + public: + + /* + * constructor + */ + RadosStriper(); + + /* + * builds the C counter part of a RadosStriper + */ + static void to_rados_striper_t(RadosStriper &striper, + rados_striper_t *s); + + /* + * copy constructor + */ + RadosStriper(const RadosStriper& rs); + + /* + * operator= + */ + RadosStriper& operator=(const RadosStriper& rs); + + /* + * destructor + * Internally calling close() if an object is currently opened + */ + ~RadosStriper(); + + /* + * create method + */ + static int striper_create(librados::IoCtx& ioctx, + RadosStriper *striper); + + /* + * set object layout's stripe unit + * This layout will be used when new objects are created (by writing to them) + * Already existing objects will be opened with their own layout. + */ + int set_object_layout_stripe_unit(unsigned int stripe_unit); + + /* + * set object layout's stripe count + * This layout will be used when new objects are created (by writing to them) + * Already existing objects will be opened with their own layout. + */ + int set_object_layout_stripe_count(unsigned int stripe_count); + + /* + * set object layout's object size + * This layout will be used when new objects are created (by writing to them) + * Already existing objects will be opened with their own layout. + */ + int set_object_layout_object_size(unsigned int object_size); + + /** + * Get the value of an extended attribute on a striped object + */ + int getxattr(const std::string& oid, const char *name, ceph::bufferlist& bl); + + /** + * Set the value of an extended attribute on a striped object + */ + int setxattr(const std::string& oid, const char *name, ceph::bufferlist& bl); + + /** + * Delete an extended attribute from a striped object + */ + int rmxattr(const std::string& oid, const char *name); + + /** + * Start iterating over xattrs on a striped object. + */ + int getxattrs(const std::string& oid, + std::map<std::string, ceph::bufferlist>& attrset); + + /** + * synchronously write to the striped object at the specified offset. + * NOTE: this call steals the contents of @param bl. + */ + int write(const std::string& soid, const ceph::bufferlist& bl, size_t len, uint64_t off); + + /** + * synchronously fill the striped object with the specified data + * NOTE: this call steals the contents of @param bl. + */ + int write_full(const std::string& soid, const ceph::bufferlist& bl); + + /** + * synchronously append data to the striped object + * NOTE: this call steals the contents of @p bl. + */ + int append(const std::string& soid, const ceph::bufferlist& bl, size_t len); + + /** + * asynchronously write to the striped object at the specified offset. + * NOTE: this call steals the contents of @p bl. + */ + int aio_write(const std::string& soid, librados::AioCompletion *c, const ceph::bufferlist& bl, size_t len, uint64_t off); + + /** + * asynchronously fill the striped object with the specified data + * NOTE: this call steals the contents of @p bl. + */ + int aio_write_full(const std::string& soid, librados::AioCompletion *c, const ceph::bufferlist& bl); + + /** + * asynchronously append data to the striped object + * NOTE: this call steals the contents of @p bl. + */ + int aio_append(const std::string& soid, librados::AioCompletion *c, const ceph::bufferlist& bl, size_t len); + + /** + * synchronously read from the striped object at the specified offset. + */ + int read(const std::string& soid, ceph::bufferlist* pbl, size_t len, uint64_t off); + + /** + * asynchronously read from the striped object at the specified offset. + */ + int aio_read(const std::string& soid, librados::AioCompletion *c, ceph::bufferlist *pbl, size_t len, uint64_t off); + + /** + * synchronously get striped object stats (size/mtime) + */ + int stat(const std::string& soid, uint64_t *psize, time_t *pmtime); + int stat2(const std::string& soid, uint64_t *psize, struct timespec *pts); + + /** + * asynchronously get striped object stats (size/mtime) + */ + int aio_stat(const std::string& soid, librados::AioCompletion *c, + uint64_t *psize, time_t *pmtime); + int aio_stat2(const std::string& soid, librados::AioCompletion *c, + uint64_t *psize, struct timespec *pts); + + /** + * deletes a striped object. + * There is no atomicity of the deletion and the striped + * object may be left incomplete if an error is returned (metadata + * all present, but some stripes missing) + * However, there is a atomicity of the metadata deletion and + * the deletion can not happen if any I/O is ongoing (it + * will return EBUSY). Identically, no I/O will be able to start + * during deletion (same EBUSY return code) + */ + int remove(const std::string& soid); + int remove(const std::string& soid, int flags); + + /** + * asynchronous remove of striped objects + * See synchronous version for comments on (lack of) atomicity + */ + int aio_remove(const std::string& soid, librados::AioCompletion *c); + int aio_remove(const std::string& soid, librados::AioCompletion *c, int flags); + + /** + * Resizes a striped object + * the truncation can not happen if any I/O is ongoing (it + * will return EBUSY). Identically, no I/O will be able to start + * during truncation (same EBUSY return code) + */ + int trunc(const std::string& oid, uint64_t size); + + /** + * Wait for all currently pending aio writes to be safe. + * + * @returns 0 on success, negative error code on failure + */ + int aio_flush(); + + /** + * creation of multi aio completion objects + */ + static MultiAioCompletion *multi_aio_create_completion(); + static MultiAioCompletion *multi_aio_create_completion(void *cb_arg, + librados::callback_t cb_complete, + librados::callback_t cb_safe); + + private: + RadosStriperImpl *rados_striper_impl; + + }; + +} + +#endif |