summaryrefslogtreecommitdiffstats
path: root/src/libradosstriper
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 18:45:59 +0000
commit19fcec84d8d7d21e796c7624e521b60d28ee21ed (patch)
tree42d26aa27d1e3f7c0b8bd3fd14e7d7082f5008dc /src/libradosstriper
parentInitial commit. (diff)
downloadceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.tar.xz
ceph-19fcec84d8d7d21e796c7624e521b60d28ee21ed.zip
Adding upstream version 16.2.11+ds.upstream/16.2.11+dsupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/libradosstriper')
-rw-r--r--src/libradosstriper/CMakeLists.txt17
-rw-r--r--src/libradosstriper/MultiAioCompletionImpl.cc60
-rw-r--r--src/libradosstriper/MultiAioCompletionImpl.h169
-rw-r--r--src/libradosstriper/RadosStriperImpl.cc1606
-rw-r--r--src/libradosstriper/RadosStriperImpl.h276
-rw-r--r--src/libradosstriper/libradosstriper.cc669
6 files changed, 2797 insertions, 0 deletions
diff --git a/src/libradosstriper/CMakeLists.txt b/src/libradosstriper/CMakeLists.txt
new file mode 100644
index 000000000..a69192465
--- /dev/null
+++ b/src/libradosstriper/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(libradosstriper_srcs
+ libradosstriper.cc
+ RadosStriperImpl.cc
+ MultiAioCompletionImpl.cc)
+add_library(radosstriper ${CEPH_SHARED}
+ ${libradosstriper_srcs})
+target_link_libraries(radosstriper
+ PRIVATE
+ librados
+ librados_impl cls_lock_client osdc ceph-common
+ pthread ${CRYPTO_LIBS} ${EXTRALIBS})
+set_target_properties(radosstriper PROPERTIES
+ OUPUT_NAME radosstriper
+ VERSION 1.0.0
+ SOVERSION 1)
+
+install(TARGETS radosstriper DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/src/libradosstriper/MultiAioCompletionImpl.cc b/src/libradosstriper/MultiAioCompletionImpl.cc
new file mode 100644
index 000000000..acf9e0b6b
--- /dev/null
+++ b/src/libradosstriper/MultiAioCompletionImpl.cc
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/dout.h"
+
+#include "libradosstriper/MultiAioCompletionImpl.h"
+
+void libradosstriper::MultiAioCompletionImpl::complete_request(ssize_t r)
+{
+ lock.lock();
+ if (rval >= 0) {
+ if (r < 0 && r != -EEXIST)
+ rval = r;
+ else if (r > 0)
+ rval += r;
+ }
+ ceph_assert(pending_complete);
+ int count = --pending_complete;
+ if (!count && !building) {
+ complete();
+ }
+ put_unlock();
+}
+
+void libradosstriper::MultiAioCompletionImpl::safe_request(ssize_t r)
+{
+ lock.lock();
+ if (rval >= 0) {
+ if (r < 0 && r != -EEXIST)
+ rval = r;
+ }
+ ceph_assert(pending_safe);
+ int count = --pending_safe;
+ if (!count && !building) {
+ safe();
+ }
+ put_unlock();
+}
+
+void libradosstriper::MultiAioCompletionImpl::finish_adding_requests()
+{
+ std::scoped_lock l{lock};
+ ceph_assert(building);
+ building = false;
+ if (!pending_complete)
+ complete();
+ if (!pending_safe)
+ safe();
+}
diff --git a/src/libradosstriper/MultiAioCompletionImpl.h b/src/libradosstriper/MultiAioCompletionImpl.h
new file mode 100644
index 000000000..3ac3aae44
--- /dev/null
+++ b/src/libradosstriper/MultiAioCompletionImpl.h
@@ -0,0 +1,169 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIBRADOSSTRIPERSTRIPER_MULTIAIOCOMPLETIONIMPL_H
+#define CEPH_LIBRADOSSTRIPERSTRIPER_MULTIAIOCOMPLETIONIMPL_H
+
+#include <list>
+#include <mutex>
+#include "common/ceph_mutex.h"
+#include "include/radosstriper/libradosstriper.hpp"
+
+namespace libradosstriper {
+
+struct MultiAioCompletionImpl {
+
+ ceph::mutex lock = ceph::make_mutex("MultiAioCompletionImpl lock", false);
+ ceph::condition_variable cond;
+ int ref, rval;
+ int pending_complete, pending_safe;
+ rados_callback_t callback_complete, callback_safe;
+ void *callback_complete_arg, *callback_safe_arg;
+ bool building; ///< true if we are still building this completion
+ bufferlist bl; /// only used for read case in C api of rados striper
+ std::list<bufferlist*> bllist; /// keep temporary buffer lists used for destriping
+
+ MultiAioCompletionImpl()
+ : ref(1), rval(0),
+ pending_complete(0), pending_safe(0),
+ callback_complete(0), callback_safe(0),
+ callback_complete_arg(0), callback_safe_arg(0),
+ building(true) {};
+
+ ~MultiAioCompletionImpl() {
+ // deallocate temporary buffer lists
+ for (std::list<bufferlist*>::iterator it = bllist.begin();
+ it != bllist.end();
+ it++) {
+ delete *it;
+ }
+ bllist.clear();
+ }
+
+ int set_complete_callback(void *cb_arg, rados_callback_t cb) {
+ std::scoped_lock l{lock};
+ callback_complete = cb;
+ callback_complete_arg = cb_arg;
+ return 0;
+ }
+ int set_safe_callback(void *cb_arg, rados_callback_t cb) {
+ std::scoped_lock l{lock};
+ callback_safe = cb;
+ callback_safe_arg = cb_arg;
+ return 0;
+ }
+ int wait_for_complete() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return !pending_complete; });
+ return 0;
+ }
+ int wait_for_safe() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return !pending_safe; });
+ return 0;
+ }
+ bool is_complete() {
+ std::scoped_lock l{lock};
+ return pending_complete == 0;
+ }
+ bool is_safe() {
+ std::scoped_lock l{lock};
+ return pending_safe == 0;
+ }
+ void wait_for_complete_and_cb() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return !pending_complete && !callback_complete; });
+ }
+ void wait_for_safe_and_cb() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return !pending_safe && !callback_safe; });
+ }
+ bool is_complete_and_cb() {
+ std::scoped_lock l{lock};
+ return ((0 == pending_complete) && !callback_complete);
+ }
+ bool is_safe_and_cb() {
+ std::scoped_lock l{lock};
+ return ((0 == pending_safe) && !callback_safe);
+ }
+ int get_return_value() {
+ std::scoped_lock l{lock};
+ return rval;
+ }
+ void get() {
+ std::scoped_lock l{lock};
+ _get();
+ }
+ void _get() {
+ ceph_assert(ceph_mutex_is_locked(lock));
+ ceph_assert(ref > 0);
+ ++ref;
+ }
+ void put() {
+ lock.lock();
+ put_unlock();
+ }
+ void put_unlock() {
+ ceph_assert(ref > 0);
+ int n = --ref;
+ lock.unlock();
+ if (!n)
+ delete this;
+ }
+ void add_request() {
+ std::scoped_lock l{lock};
+ pending_complete++;
+ _get();
+ pending_safe++;
+ _get();
+ }
+ void add_safe_request() {
+ std::scoped_lock l{lock};
+ pending_complete++;
+ _get();
+ }
+ void complete() {
+ ceph_assert(ceph_mutex_is_locked(lock));
+ if (callback_complete) {
+ callback_complete(this, callback_complete_arg);
+ callback_complete = 0;
+ }
+ cond.notify_all();
+ }
+ void safe() {
+ ceph_assert(ceph_mutex_is_locked(lock));
+ if (callback_safe) {
+ callback_safe(this, callback_safe_arg);
+ callback_safe = 0;
+ }
+ cond.notify_all();
+ };
+
+ void complete_request(ssize_t r);
+ void safe_request(ssize_t r);
+ void finish_adding_requests();
+};
+
+inline void intrusive_ptr_add_ref(MultiAioCompletionImpl* ptr)
+{
+ ptr->get();
+}
+
+inline void intrusive_ptr_release(MultiAioCompletionImpl* ptr)
+{
+ ptr->put();
+}
+}
+
+#endif // CEPH_LIBRADOSSTRIPERSTRIPER_MULTIAIOCOMPLETIONIMPL_H
diff --git a/src/libradosstriper/RadosStriperImpl.cc b/src/libradosstriper/RadosStriperImpl.cc
new file mode 100644
index 000000000..60fafd463
--- /dev/null
+++ b/src/libradosstriper/RadosStriperImpl.cc
@@ -0,0 +1,1606 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <boost/algorithm/string/replace.hpp>
+
+#include "libradosstriper/RadosStriperImpl.h"
+
+#include <errno.h>
+
+#include <sstream>
+#include <iomanip>
+#include <algorithm>
+
+#include "include/types.h"
+#include "include/uuid.h"
+#include "include/ceph_fs.h"
+#include "common/dout.h"
+#include "common/strtol.h"
+#include "common/RefCountedObj.h"
+#include "osdc/Striper.h"
+#include "librados/AioCompletionImpl.h"
+#include <cls/lock/cls_lock_client.h>
+
+/*
+ * This file contents the actual implementation of the rados striped objects interface.
+ *
+ * Striped objects are stored in rados in a set of regular rados objects, after their
+ * content has been striped using the osdc/Striper interface.
+ *
+ * The external attributes of the striped object are mapped to the attributes of the
+ * first underlying object. This first object has a set of extra external attributes
+ * storing the layout of the striped object for future read back. These attributes are :
+ * - striper.layout.object_size : the size of rados objects used.
+ * Must be a multiple of striper.layout.stripe_unit
+ * - striper.layout.stripe_unit : the size of a stripe unit
+ * - striper.layout.stripe_count : the number of stripes used
+ * - striper.size : total striped object size
+ *
+ * In general operations on striped objects are not atomic.
+ * However, a certain number of safety guards have been put to make the interface closer
+ * to atomicity :
+ * - each data operation takes a shared lock on the first rados object for the
+ * whole time of the operation
+ * - the remove and trunc operations take an exclusive lock on the first rados object
+ * for the whole time of the operation
+ * This makes sure that no removal/truncation of a striped object occurs while
+ * data operations are happening and vice versa. It thus makes sure that the layout
+ * of a striped object does not change during data operation, which is essential for
+ * data consistency.
+ *
+ * Still the writing to a striped object is not atomic. This means in particular that
+ * the size of an object may not be in sync with its content at all times.
+ * As the size is always guaranteed to be updated first and in an atomic way, and as
+ * sparse striped objects are supported (see below), what will typically happen is
+ * that a reader that comes too soon after a write will read 0s instead of the actual
+ * data.
+ *
+ * Note that remove handles the pieces of the striped object in reverse order,
+ * so that the head object is removed last, making the completion of the deletion atomic.
+ *
+ * Striped objects can be sparse, typically in case data was written at the end of the
+ * striped object only. In such a case, some rados objects constituing the striped object
+ * may be missing. Other can be partial (only the beginning will have data)
+ * When dealing with such sparse striped files, missing objects are detected and
+ * considered as full of 0s. They are however not created until real data is written
+ * to them.
+ *
+ * There are a number of missing features/improvements that could be implemented.
+ * Here are some ideas :
+ * - implementation of missing entry points (compared to rados)
+ * In particular : clone_range, sparse_read, exec, aio_flush_async, tmaps, omaps, ...
+ *
+ */
+
+#define dout_subsys ceph_subsys_rados
+#undef dout_prefix
+#define dout_prefix *_dout << "libradosstriper: "
+
+/// size of xattr buffer
+#define XATTR_BUFFER_SIZE 32
+
+/// names of the different xattr entries
+#define XATTR_LAYOUT_STRIPE_UNIT "striper.layout.stripe_unit"
+#define XATTR_LAYOUT_STRIPE_COUNT "striper.layout.stripe_count"
+#define XATTR_LAYOUT_OBJECT_SIZE "striper.layout.object_size"
+#define XATTR_SIZE "striper.size"
+#define LOCK_PREFIX "lock."
+
+/// name of the lock used on objects to ensure layout stability during IO
+#define RADOS_LOCK_NAME "striper.lock"
+
+/// format of the extension of rados objects created for a given striped object
+#define RADOS_OBJECT_EXTENSION_FORMAT ".%016llx"
+
+/// default object layout
+struct ceph_file_layout default_file_layout = {
+ init_le32(1<<22), // fl_stripe_unit
+ init_le32(1), // fl_stripe_count
+ init_le32(1<<22), // fl_object_size
+ init_le32(0), // fl_cas_hash
+ init_le32(0), // fl_object_stripe_unit
+ init_le32(-1), // fl_unused
+ init_le32(-1), // fl_pg_pool
+};
+
+using libradosstriper::MultiAioCompletionImplPtr;
+
+namespace {
+
+///////////////////////// CompletionData /////////////////////////////
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous operations
+ */
+struct CompletionData : RefCountedObject {
+ /// complete method
+ void complete(int r);
+ /// striper to be used to handle the write completion
+ libradosstriper::RadosStriperImpl *m_striper;
+ /// striped object concerned by the write operation
+ std::string m_soid;
+ /// shared lock to be released at completion
+ std::string m_lockCookie;
+ /// completion handler
+ librados::IoCtxImpl::C_aio_Complete *m_ack;
+protected:
+ CompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion = 0);
+ ~CompletionData() override;
+
+};
+
+CompletionData::CompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion) :
+ RefCountedObject(striper->cct()),
+ m_striper(striper), m_soid(soid), m_lockCookie(lockCookie), m_ack(0) {
+ m_striper->get();
+ if (userCompletion) {
+ m_ack = new librados::IoCtxImpl::C_aio_Complete(userCompletion);
+ userCompletion->io = striper->m_ioCtxImpl;
+ }
+}
+
+CompletionData::~CompletionData() {
+ if (m_ack) delete m_ack;
+ m_striper->put();
+}
+
+void CompletionData::complete(int r) {
+ if (m_ack) m_ack->finish(r);
+}
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous read operations
+ */
+struct ReadCompletionData : CompletionData {
+ /// bufferlist containing final result
+ bufferlist* m_bl;
+ /// extents that will be read
+ std::vector<ObjectExtent>* m_extents;
+ /// intermediate results
+ std::vector<bufferlist>* m_resultbl;
+ /// return code of read completion, to be remembered until unlocking happened
+ int m_readRc;
+ /// completion object for the unlocking of the striped object at the end of the read
+ librados::AioCompletion *m_unlockCompletion;
+ /// complete method for when reading is over
+ void complete_read(int r);
+ /// complete method for when object is unlocked
+ void complete_unlock(int r);
+
+private:
+ FRIEND_MAKE_REF(ReadCompletionData);
+ ReadCompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ bufferlist* bl,
+ std::vector<ObjectExtent>* extents,
+ std::vector<bufferlist>* resultbl);
+ ~ReadCompletionData() override;
+};
+
+ReadCompletionData::ReadCompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ bufferlist* bl,
+ std::vector<ObjectExtent>* extents,
+ std::vector<bufferlist>* resultbl) :
+ CompletionData(striper, soid, lockCookie, userCompletion),
+ m_bl(bl), m_extents(extents), m_resultbl(resultbl), m_readRc(0),
+ m_unlockCompletion(0) {}
+
+ReadCompletionData::~ReadCompletionData() {
+ m_unlockCompletion->release();
+ delete m_extents;
+ delete m_resultbl;
+}
+
+void ReadCompletionData::complete_read(int r) {
+ // gather data into final buffer
+ Striper::StripedReadResult readResult;
+ vector<bufferlist>::iterator bit = m_resultbl->begin();
+ for (vector<ObjectExtent>::iterator eit = m_extents->begin();
+ eit != m_extents->end();
+ ++eit, ++bit) {
+ readResult.add_partial_result(m_striper->cct(), *bit, eit->buffer_extents);
+ }
+ m_bl->clear();
+ readResult.assemble_result(m_striper->cct(), *m_bl, true);
+ // Remember return code
+ m_readRc = r;
+}
+
+void ReadCompletionData::complete_unlock(int r) {
+ // call parent's completion method
+ // Note that we ignore the return code of the unlock as we cannot do much about it
+ CompletionData::complete(m_readRc?m_readRc:m_bl->length());
+}
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous write operations
+ */
+struct WriteCompletionData : CompletionData {
+ /// safe completion handler
+ librados::IoCtxImpl::C_aio_Complete *m_safe;
+ /// completion object for the unlocking of the striped object at the end of the write
+ librados::AioCompletion *m_unlockCompletion;
+ /// return code of write completion, to be remembered until unlocking happened
+ int m_writeRc;
+ /// complete method for when writing is over
+ void complete_write(int r);
+ /// complete method for when object is unlocked
+ void complete_unlock(int r);
+ /// safe method
+ void safe(int r);
+private:
+ FRIEND_MAKE_REF(WriteCompletionData);
+ /// constructor
+ WriteCompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion);
+ /// destructor
+ ~WriteCompletionData() override;
+};
+
+WriteCompletionData::WriteCompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion) :
+ CompletionData(striper, soid, lockCookie, userCompletion),
+ m_safe(0), m_unlockCompletion(0), m_writeRc(0) {
+ if (userCompletion) {
+ m_safe = new librados::IoCtxImpl::C_aio_Complete(userCompletion);
+ }
+}
+
+WriteCompletionData::~WriteCompletionData() {
+ m_unlockCompletion->release();
+ if (m_safe) delete m_safe;
+}
+
+void WriteCompletionData::complete_unlock(int r) {
+ // call parent's completion method
+ // Note that we ignore the return code of the unlock as we cannot do much about it
+ CompletionData::complete(m_writeRc);
+}
+
+void WriteCompletionData::complete_write(int r) {
+ // Remember return code
+ m_writeRc = r;
+}
+
+void WriteCompletionData::safe(int r) {
+ if (m_safe) m_safe->finish(r);
+}
+
+struct RemoveCompletionData : CompletionData {
+ /// removal flags
+ int flags;
+
+private:
+ FRIEND_MAKE_REF(RemoveCompletionData);
+ /**
+ * constructor
+ * note that the constructed object will take ownership of the lock
+ */
+ RemoveCompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ int flags = 0) :
+ CompletionData(striper, soid, lockCookie, userCompletion), flags(flags) {}
+};
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous truncate operations
+ */
+struct TruncateCompletionData : RefCountedObject {
+ /// striper to be used
+ libradosstriper::RadosStriperImpl *m_striper;
+ /// striped object concerned by the truncate operation
+ std::string m_soid;
+ /// the final size of the truncated object
+ uint64_t m_size;
+
+private:
+ FRIEND_MAKE_REF(TruncateCompletionData);
+ /// constructor
+ TruncateCompletionData(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ uint64_t size) :
+ RefCountedObject(striper->cct()),
+ m_striper(striper), m_soid(soid), m_size(size) {
+ m_striper->get();
+ }
+ /// destructor
+ ~TruncateCompletionData() override {
+ m_striper->put();
+ }
+};
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous read operations of a Rados File
+ */
+struct RadosReadCompletionData : RefCountedObject {
+ /// the multi asynch io completion object to be used
+ MultiAioCompletionImplPtr m_multiAioCompl;
+ /// the expected number of bytes
+ uint64_t m_expectedBytes;
+ /// the bufferlist object where data have been written
+ bufferlist *m_bl;
+
+private:
+ FRIEND_MAKE_REF(RadosReadCompletionData);
+ /// constructor
+ RadosReadCompletionData(MultiAioCompletionImplPtr multiAioCompl,
+ uint64_t expectedBytes,
+ bufferlist *bl,
+ CephContext *context) :
+ RefCountedObject(context),
+ m_multiAioCompl(multiAioCompl), m_expectedBytes(expectedBytes), m_bl(bl) {}
+};
+
+/**
+ * struct handling (most of) the data needed to pass to the call back
+ * function in asynchronous stat operations.
+ * Inherited by the actual type for adding time information in different
+ * versions (time_t or struct timespec)
+ */
+struct BasicStatCompletionData : CompletionData {
+ // MultiAioCompletionImpl used to handle the double aysnc
+ // call in the back (stat + getxattr)
+ libradosstriper::MultiAioCompletionImpl *m_multiCompletion;
+ // where to store the size of first objct
+ // this will be ignored but we need a place to store it when
+ // async stat is called
+ uint64_t m_objectSize;
+ // where to store the file size
+ uint64_t *m_psize;
+ /// the bufferlist object used for the getxattr call
+ bufferlist m_bl;
+ /// return code of the stat
+ int m_statRC;
+ /// return code of the getxattr
+ int m_getxattrRC;
+
+protected:
+ /// constructor
+ BasicStatCompletionData(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ librados::AioCompletionImpl *userCompletion,
+ libradosstriper::MultiAioCompletionImpl *multiCompletion,
+ uint64_t *psize) :
+ CompletionData(striper, soid, "", userCompletion),
+ m_multiCompletion(multiCompletion), m_psize(psize),
+ m_statRC(0), m_getxattrRC(0) {};
+
+};
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous stat operations.
+ * Simple templated extension of BasicStatCompletionData.
+ * The template parameter is the type of the time information
+ * (used with time_t for stat and struct timespec for stat2)
+ */
+template<class TimeType>
+struct StatCompletionData : BasicStatCompletionData {
+ // where to store the file time
+ TimeType *m_pmtime;
+private:
+ FRIEND_MAKE_REF(StatCompletionData);
+ /// constructor
+ StatCompletionData<TimeType>(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ librados::AioCompletionImpl *userCompletion,
+ libradosstriper::MultiAioCompletionImpl *multiCompletion,
+ uint64_t *psize,
+ TimeType *pmtime) :
+ BasicStatCompletionData(striper, soid, userCompletion, multiCompletion, psize),
+ m_pmtime(pmtime) {};
+};
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous remove operations of a Rados File
+ */
+struct RadosRemoveCompletionData : RefCountedObject {
+ /// the multi asynch io completion object to be used
+ MultiAioCompletionImplPtr m_multiAioCompl;
+private:
+ FRIEND_MAKE_REF(RadosRemoveCompletionData);
+ /// constructor
+ RadosRemoveCompletionData(MultiAioCompletionImplPtr multiAioCompl,
+ CephContext *context) :
+ RefCountedObject(context),
+ m_multiAioCompl(multiAioCompl) {};
+};
+
+
+} // namespace {
+
+///////////////////////// constructor /////////////////////////////
+
+libradosstriper::RadosStriperImpl::RadosStriperImpl(librados::IoCtx& ioctx, librados::IoCtxImpl *ioctx_impl) :
+ m_refCnt(0), m_radosCluster(ioctx), m_ioCtx(ioctx), m_ioCtxImpl(ioctx_impl),
+ m_layout(default_file_layout) {}
+
+///////////////////////// layout /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::setObjectLayoutStripeUnit
+(unsigned int stripe_unit)
+{
+ /* stripe unit must be non-zero, 64k increment */
+ if (!stripe_unit || (stripe_unit & (CEPH_MIN_STRIPE_UNIT-1)))
+ return -EINVAL;
+ m_layout.fl_stripe_unit = stripe_unit;
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::setObjectLayoutStripeCount
+(unsigned int stripe_count)
+{
+ /* stripe count must be non-zero */
+ if (!stripe_count)
+ return -EINVAL;
+ m_layout.fl_stripe_count = stripe_count;
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::setObjectLayoutObjectSize
+(unsigned int object_size)
+{
+ /* object size must be non-zero, 64k increment */
+ if (!object_size || (object_size & (CEPH_MIN_STRIPE_UNIT-1)))
+ return -EINVAL;
+ /* object size must be a multiple of stripe unit */
+ if (object_size < m_layout.fl_stripe_unit ||
+ object_size % m_layout.fl_stripe_unit)
+ return -EINVAL;
+ m_layout.fl_object_size = object_size;
+ return 0;
+}
+
+///////////////////////// xattrs /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::getxattr(const object_t& soid,
+ const char *name,
+ bufferlist& bl)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ return m_ioCtx.getxattr(firstObjOid, name, bl);
+}
+
+int libradosstriper::RadosStriperImpl::setxattr(const object_t& soid,
+ const char *name,
+ bufferlist& bl)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ return m_ioCtx.setxattr(firstObjOid, name, bl);
+}
+
+int libradosstriper::RadosStriperImpl::getxattrs(const object_t& soid,
+ map<string, bufferlist>& attrset)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ int rc = m_ioCtx.getxattrs(firstObjOid, attrset);
+ if (rc) return rc;
+ // cleanup internal attributes dedicated to striping and locking
+ attrset.erase(XATTR_LAYOUT_STRIPE_UNIT);
+ attrset.erase(XATTR_LAYOUT_STRIPE_COUNT);
+ attrset.erase(XATTR_LAYOUT_OBJECT_SIZE);
+ attrset.erase(XATTR_SIZE);
+ attrset.erase(std::string(LOCK_PREFIX) + RADOS_LOCK_NAME);
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::rmxattr(const object_t& soid,
+ const char *name)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ return m_ioCtx.rmxattr(firstObjOid, name);
+}
+
+///////////////////////// io /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::write(const std::string& soid,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off)
+{
+ // open the object. This will create it if needed, retrieve its layout
+ // and size and take a shared lock on it
+ ceph_file_layout layout;
+ std::string lockCookie;
+ int rc = createAndOpenStripedObject(soid, &layout, len+off, &lockCookie, true);
+ if (rc) return rc;
+ return write_in_open_object(soid, layout, lockCookie, bl, len, off);
+}
+
+int libradosstriper::RadosStriperImpl::append(const std::string& soid,
+ const bufferlist& bl,
+ size_t len)
+{
+ // open the object. This will create it if needed, retrieve its layout
+ // and size and take a shared lock on it
+ ceph_file_layout layout;
+ uint64_t size = len;
+ std::string lockCookie;
+ int rc = openStripedObjectForWrite(soid, &layout, &size, &lockCookie, false);
+ if (rc) return rc;
+ return write_in_open_object(soid, layout, lockCookie, bl, len, size);
+}
+
+int libradosstriper::RadosStriperImpl::write_full(const std::string& soid,
+ const bufferlist& bl)
+{
+ int rc = trunc(soid, 0);
+ if (rc && rc != -ENOENT) return rc; // ENOENT is obviously ok
+ return write(soid, bl, bl.length(), 0);
+}
+
+int libradosstriper::RadosStriperImpl::read(const std::string& soid,
+ bufferlist* bl,
+ size_t len,
+ uint64_t off)
+{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous method
+ int rc = aio_read(soid, &c, bl, len, off);
+ // and wait for completion
+ if (!rc) {
+ // wait for completion
+ c.wait_for_complete_and_cb();
+ // return result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+///////////////////////// asynchronous io /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::aio_write(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off)
+{
+ ceph_file_layout layout;
+ std::string lockCookie;
+ int rc = createAndOpenStripedObject(soid, &layout, len+off, &lockCookie, true);
+ if (rc) return rc;
+ return aio_write_in_open_object(soid, c, layout, lockCookie, bl, len, off);
+}
+
+int libradosstriper::RadosStriperImpl::aio_append(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const bufferlist& bl,
+ size_t len)
+{
+ ceph_file_layout layout;
+ uint64_t size = len;
+ std::string lockCookie;
+ int rc = openStripedObjectForWrite(soid, &layout, &size, &lockCookie, false);
+ if (rc) return rc;
+ // create a completion object
+ return aio_write_in_open_object(soid, c, layout, lockCookie, bl, len, size);
+}
+
+int libradosstriper::RadosStriperImpl::aio_write_full(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const bufferlist& bl)
+{
+ int rc = trunc(soid, 0);
+ if (rc) return rc;
+ return aio_write(soid, c, bl, bl.length(), 0);
+}
+
+static void rados_read_aio_unlock_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<ReadCompletionData>(static_cast<ReadCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->complete_unlock(comp->rval);
+}
+
+static void striper_read_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = static_cast<ReadCompletionData*>(arg);
+ // launch the async unlocking of the object
+ cdata->m_striper->aio_unlockObject(cdata->m_soid, cdata->m_lockCookie, cdata->m_unlockCompletion);
+ // complete the read part in parallel
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->complete_read(comp->rval);
+}
+
+static void rados_req_read_complete(rados_completion_t c, void *arg)
+{
+ auto data = static_cast<RadosReadCompletionData*>(arg);
+ int rc = rados_aio_get_return_value(c);
+ // We need to handle the case of sparse files here
+ if (rc == -ENOENT) {
+ // the object did not exist at all. This can happen for sparse files.
+ // we consider we've read 0 bytes and it will fall into next case
+ rc = 0;
+ }
+ ssize_t nread = rc;
+ if (rc >= 0 && (((uint64_t)rc) < data->m_expectedBytes)) {
+ // only partial data were present in the object (or the object did not
+ // even exist if we've gone through previous case).
+ // This is typical of sparse file and we need to complete with 0s.
+ unsigned int lenOfZeros = data->m_expectedBytes-rc;
+ unsigned int existingDataToZero = min(data->m_bl->length()-rc, lenOfZeros);
+ if (existingDataToZero > 0) {
+ data->m_bl->zero(rc, existingDataToZero);
+ }
+ if (lenOfZeros > existingDataToZero) {
+ ceph::bufferptr zeros(ceph::buffer::create(lenOfZeros-existingDataToZero));
+ zeros.zero();
+ data->m_bl->push_back(zeros);
+ }
+ nread = data->m_expectedBytes;
+ }
+ auto multi_aio_comp = data->m_multiAioCompl;
+ multi_aio_comp->complete_request(nread);
+ multi_aio_comp->safe_request(rc);
+}
+
+int libradosstriper::RadosStriperImpl::aio_read(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ bufferlist* bl,
+ size_t len,
+ uint64_t off)
+{
+ // open the object. This will retrieve its layout and size
+ // and take a shared lock on it
+ ceph_file_layout layout;
+ uint64_t size;
+ std::string lockCookie;
+ int rc = openStripedObjectForRead(soid, &layout, &size, &lockCookie);
+ if (rc) return rc;
+ // find out the actual number of bytes we can read
+ uint64_t read_len;
+ if (off >= size) {
+ // nothing to read ! We are done.
+ read_len = 0;
+ } else {
+ read_len = min(len, (size_t)(size-off));
+ }
+ // get list of extents to be read from
+ vector<ObjectExtent> *extents = new vector<ObjectExtent>();
+ if (read_len > 0) {
+ std::string format = soid;
+ boost::replace_all(format, "%", "%%");
+ format += RADOS_OBJECT_EXTENSION_FORMAT;
+ file_layout_t l;
+ l.from_legacy(layout);
+ Striper::file_to_extents(cct(), format.c_str(), &l, off, read_len,
+ 0, *extents);
+ }
+
+ // create a completion object and transfer ownership of extents and resultbl
+ vector<bufferlist> *resultbl = new vector<bufferlist>(extents->size());
+ auto cdata = ceph::make_ref<ReadCompletionData>(this, soid, lockCookie, c, bl, extents, resultbl);
+ c->is_read = true;
+ c->io = m_ioCtxImpl;
+ // create a completion for the unlocking of the striped object at the end of the read
+ librados::AioCompletion *unlock_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, rados_read_aio_unlock_complete);
+ cdata->m_unlockCompletion = unlock_completion;
+ // create the multiCompletion object handling the reads
+ MultiAioCompletionImplPtr nc{new libradosstriper::MultiAioCompletionImpl,
+ false};
+ nc->set_complete_callback(cdata.get(), striper_read_aio_req_complete);
+ // go through the extents
+ int r = 0, i = 0;
+ for (vector<ObjectExtent>::iterator p = extents->begin(); p != extents->end(); ++p) {
+ // create a buffer list describing where to place data read from current extend
+ bufferlist *oid_bl = &((*resultbl)[i++]);
+ for (vector<pair<uint64_t,uint64_t> >::iterator q = p->buffer_extents.begin();
+ q != p->buffer_extents.end();
+ ++q) {
+ bufferlist buffer_bl;
+ buffer_bl.substr_of(*bl, q->first, q->second);
+ oid_bl->append(buffer_bl);
+ }
+ // read all extends of a given object in one go
+ nc->add_request();
+ // we need 2 references on data as both rados_req_read_safe and rados_req_read_complete
+ // will release one
+ auto data = ceph::make_ref<RadosReadCompletionData>(nc, p->length, oid_bl, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data.detach(), rados_req_read_complete);
+ r = m_ioCtx.aio_read(p->oid.name, rados_completion, oid_bl, p->length, p->offset);
+ rados_completion->release();
+ if (r < 0)
+ break;
+ }
+ nc->finish_adding_requests();
+ return r;
+}
+
+int libradosstriper::RadosStriperImpl::aio_read(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ char* buf,
+ size_t len,
+ uint64_t off)
+{
+ // create a buffer list and store it inside the completion object
+ c->bl.clear();
+ c->bl.push_back(buffer::create_static(len, buf));
+ // call the bufferlist version of this method
+ return aio_read(soid, c, &c->bl, len, off);
+}
+
+int libradosstriper::RadosStriperImpl::aio_flush()
+{
+ int ret;
+ // pass to the rados level
+ ret = m_ioCtx.aio_flush();
+ if (ret < 0)
+ return ret;
+ //wait all CompletionData are released
+ std::unique_lock l{lock};
+ cond.wait(l, [this] {return m_refCnt <= 1;});
+ return ret;
+}
+
+///////////////////////// stat and deletion /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::stat(const std::string& soid, uint64_t *psize, time_t *pmtime)
+{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous version of stat
+ int rc = aio_stat(soid, &c, psize, pmtime);
+ if (rc == 0) {
+ // wait for completion of the remove
+ c.wait_for_complete();
+ // get result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+static void striper_stat_aio_stat_complete(rados_completion_t c, void *arg) {
+ auto data = ceph::ref_t<BasicStatCompletionData>(static_cast<BasicStatCompletionData*>(arg), false);
+ int rc = rados_aio_get_return_value(c);
+ if (rc == -ENOENT) {
+ // remember this has failed
+ data->m_statRC = rc;
+ }
+ data->m_multiCompletion->complete_request(rc);
+}
+
+static void striper_stat_aio_getxattr_complete(rados_completion_t c, void *arg) {
+ auto data = ceph::ref_t<BasicStatCompletionData>(static_cast<BasicStatCompletionData*>(arg), false);
+ int rc = rados_aio_get_return_value(c);
+ // We need to handle the case of sparse files here
+ if (rc < 0) {
+ // remember this has failed
+ data->m_getxattrRC = rc;
+ } else {
+ // this intermediate string allows to add a null terminator before calling strtol
+ std::string err;
+ std::string strsize(data->m_bl.c_str(), data->m_bl.length());
+ *data->m_psize = strict_strtoll(strsize.c_str(), 10, &err);
+ if (!err.empty()) {
+ lderr(data->m_striper->cct()) << XATTR_SIZE << " : " << err << dendl;
+ data->m_getxattrRC = -EINVAL;
+ }
+ rc = 0;
+ }
+ data->m_multiCompletion->complete_request(rc);
+}
+
+static void striper_stat_aio_req_complete(rados_striper_multi_completion_t c,
+ void *arg) {
+ auto data = ceph::ref_t<BasicStatCompletionData>(static_cast<BasicStatCompletionData*>(arg), false);
+ if (data->m_statRC) {
+ data->complete(data->m_statRC);
+ } else {
+ if (data->m_getxattrRC < 0) {
+ data->complete(data->m_getxattrRC);
+ } else {
+ data->complete(0);
+ }
+ }
+}
+
+template<class TimeType>
+int libradosstriper::RadosStriperImpl::aio_generic_stat
+(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ uint64_t *psize,
+ TimeType *pmtime,
+ typename libradosstriper::RadosStriperImpl::StatFunction<TimeType>::Type statFunction)
+{
+ // use a MultiAioCompletion object for dealing with the fact
+ // that we'll do 2 asynchronous calls in parallel
+ MultiAioCompletionImplPtr multi_completion{
+ new libradosstriper::MultiAioCompletionImpl, false};
+ // Data object used for passing context to asynchronous calls
+ std::string firstObjOid = getObjectId(soid, 0);
+ auto cdata = ceph::make_ref<StatCompletionData<TimeType>>(this, firstObjOid, c, multi_completion.get(), psize, pmtime);
+ multi_completion->set_complete_callback(cdata->get() /* create ref! */, striper_stat_aio_req_complete);
+ // use a regular AioCompletion for the stat async call
+ librados::AioCompletion *stat_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, striper_stat_aio_stat_complete);
+ multi_completion->add_safe_request();
+ object_t obj(firstObjOid);
+ int rc = (m_ioCtxImpl->*statFunction)(obj, stat_completion->pc,
+ &cdata->m_objectSize, cdata->m_pmtime);
+ stat_completion->release();
+ if (rc < 0) {
+ // nothing is really started so cancel everything
+ delete cdata.detach();
+ return rc;
+ }
+ // use a regular AioCompletion for the getxattr async call
+ librados::AioCompletion *getxattr_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, striper_stat_aio_getxattr_complete);
+ multi_completion->add_safe_request();
+ // in parallel, get the pmsize from the first object asynchronously
+ rc = m_ioCtxImpl->aio_getxattr(obj, getxattr_completion->pc,
+ XATTR_SIZE, cdata->m_bl);
+ getxattr_completion->release();
+ multi_completion->finish_adding_requests();
+ if (rc < 0) {
+ // the async stat is ongoing, so we need to go on
+ // we mark the getxattr as failed in the data object
+ cdata->m_getxattrRC = rc;
+ multi_completion->complete_request(rc);
+ return rc;
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::aio_stat(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ uint64_t *psize,
+ time_t *pmtime)
+{
+ return aio_generic_stat<time_t>(soid, c, psize, pmtime, &librados::IoCtxImpl::aio_stat);
+}
+
+int libradosstriper::RadosStriperImpl::stat2(const std::string& soid, uint64_t *psize, struct timespec *pts)
+{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous version of stat
+ int rc = aio_stat2(soid, &c, psize, pts);
+ if (rc == 0) {
+ // wait for completion of the remove
+ c.wait_for_complete_and_cb();
+ // get result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_stat2(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ uint64_t *psize,
+ struct timespec *pts)
+{
+ return aio_generic_stat<struct timespec>(soid, c, psize, pts, &librados::IoCtxImpl::aio_stat2);
+}
+
+static void rados_req_remove_complete(rados_completion_t c, void *arg)
+{
+ auto cdata = static_cast<RadosRemoveCompletionData*>(arg);
+ int rc = rados_aio_get_return_value(c);
+ // in case the object did not exist, it means we had a sparse file, all is fine
+ if (rc == -ENOENT) {
+ rc = 0;
+ }
+ cdata->m_multiAioCompl->complete_request(rc);
+ cdata->m_multiAioCompl->safe_request(rc);
+}
+
+static void striper_remove_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<RemoveCompletionData>(static_cast<RemoveCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ ldout(cdata->m_striper->cct(), 10)
+ << "RadosStriperImpl : striper_remove_aio_req_complete called for "
+ << cdata->m_soid << dendl;
+ int rc = comp->rval;
+ if (rc == 0) {
+ // All went fine, synchronously remove first object
+ rc = cdata->m_striper->m_ioCtx.remove(cdata->m_striper->getObjectId(cdata->m_soid, 0),
+ cdata->flags);
+ } else {
+ lderr(cdata->m_striper->cct())
+ << "RadosStriperImpl : deletion/truncation incomplete for " << cdata->m_soid
+ << ", as errors were encountered. The file is left present but it's content "
+ << " has been partially removed"
+ << dendl;
+ }
+ cdata->complete(rc);
+}
+
+int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags)
+{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous version of remove
+ int rc = aio_remove(soid, &c, flags);
+ if (rc == 0) {
+ // wait for completion of the remove
+ c.wait_for_complete_and_cb();
+ // get result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_remove(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ int flags)
+{
+ // the RemoveCompletionData object will lock the given soid for the duration
+ // of the removal
+ std::string lockCookie = getUUID();
+ int rc = m_ioCtx.lock_exclusive(getObjectId(soid, 0), RADOS_LOCK_NAME, lockCookie, "", 0, 0);
+ if (rc) return rc;
+ // create CompletionData for the async remove call
+ auto cdata = ceph::make_ref<RemoveCompletionData>(this, soid, lockCookie, c, flags);
+ MultiAioCompletionImplPtr multi_completion{
+ new libradosstriper::MultiAioCompletionImpl, false};
+ multi_completion->set_complete_callback(cdata->get() /* create ref! */, striper_remove_aio_req_complete);
+ // call asynchronous internal version of remove
+ ldout(cct(), 10)
+ << "RadosStriperImpl : Aio_remove starting for "
+ << soid << dendl;
+ rc = internal_aio_remove(soid, multi_completion);
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::internal_aio_remove(
+ const std::string& soid,
+ MultiAioCompletionImplPtr multi_completion,
+ int flags)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ try {
+ // check size and get number of rados objects to delete
+ uint64_t nb_objects = 0;
+ bufferlist bl2;
+ int rc = getxattr(soid, XATTR_SIZE, bl2);
+ if (rc < 0) {
+ // no object size (or not able to get it)
+ // try to find the number of object "by hand"
+ uint64_t psize;
+ time_t pmtime;
+ while (!m_ioCtx.stat(getObjectId(soid, nb_objects), &psize, &pmtime)) {
+ nb_objects++;
+ }
+ } else {
+ // count total number of rados objects in the striped object
+ std::string err;
+ // this intermediate string allows to add a null terminator before calling strtol
+ std::string strsize(bl2.c_str(), bl2.length());
+ uint64_t size = strict_strtoll(strsize.c_str(), 10, &err);
+ if (!err.empty()) {
+ lderr(cct()) << XATTR_SIZE << " : " << err << dendl;
+
+ return -EINVAL;
+ }
+ uint64_t object_size = m_layout.fl_object_size;
+ uint64_t su = m_layout.fl_stripe_unit;
+ uint64_t stripe_count = m_layout.fl_stripe_count;
+ uint64_t nb_complete_sets = size / (object_size*stripe_count);
+ uint64_t remaining_data = size % (object_size*stripe_count);
+ uint64_t remaining_stripe_units = (remaining_data + su -1) / su;
+ uint64_t remaining_objects = std::min(remaining_stripe_units, stripe_count);
+ nb_objects = nb_complete_sets * stripe_count + remaining_objects;
+ }
+ // delete rados objects in reverse order
+ // Note that we do not drop the first object. This one will only be dropped
+ // if all other removals have been successful, and this is done in the
+ // callback of the multi_completion object
+ int rcr = 0;
+ for (int i = nb_objects-1; i >= 1; i--) {
+ multi_completion->add_request();
+ auto data = ceph::make_ref<RadosRemoveCompletionData>(multi_completion, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data->get() /* create ref! */,
+ rados_req_remove_complete);
+ if (flags == 0) {
+ rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion);
+ } else {
+ rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion, flags);
+ }
+ rados_completion->release();
+ if (rcr < 0 and -ENOENT != rcr) {
+ lderr(cct()) << "RadosStriperImpl::remove : deletion incomplete for " << soid
+ << ", as " << getObjectId(soid, i) << " could not be deleted (rc=" << rc << ")"
+ << dendl;
+ break;
+ }
+ }
+ // we are over adding requests to the multi_completion object
+ multi_completion->finish_adding_requests();
+ // return
+ return rcr;
+ } catch (ErrorCode &e) {
+ // error caught when trying to take the exclusive lock
+ return e.m_code;
+ }
+
+}
+
+int libradosstriper::RadosStriperImpl::trunc(const std::string& soid, uint64_t size)
+{
+ // lock the object in exclusive mode
+ std::string firstObjOid = getObjectId(soid, 0);
+ librados::ObjectWriteOperation op;
+ op.assert_exists();
+ std::string lockCookie = RadosStriperImpl::getUUID();
+ utime_t dur = utime_t();
+ rados::cls::lock::lock(&op, RADOS_LOCK_NAME, ClsLockType::EXCLUSIVE, lockCookie, "", "", dur, 0);
+ int rc = m_ioCtx.operate(firstObjOid, &op);
+ if (rc) return rc;
+ // load layout and size
+ ceph_file_layout layout;
+ uint64_t original_size;
+ rc = internal_get_layout_and_size(firstObjOid, &layout, &original_size);
+ if (!rc) {
+ if (size < original_size) {
+ rc = truncate(soid, original_size, size, layout);
+ } else if (size > original_size) {
+ rc = grow(soid, original_size, size, layout);
+ }
+ }
+ // unlock object, ignore return code as we cannot do much
+ m_ioCtx.unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie);
+ // final return
+ return rc;
+}
+
+
+///////////////////////// private helpers /////////////////////////////
+
+std::string libradosstriper::RadosStriperImpl::getObjectId(const object_t& soid,
+ long long unsigned objectno)
+{
+ std::ostringstream s;
+ s << soid << '.' << std::setfill ('0') << std::setw(16) << std::hex << objectno;
+ return s.str();
+}
+
+void libradosstriper::RadosStriperImpl::unlockObject(const std::string& soid,
+ const std::string& lockCookie)
+{
+ // unlock the shared lock on the first rados object
+ std::string firstObjOid = getObjectId(soid, 0);
+ m_ioCtx.unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie);
+}
+
+void libradosstriper::RadosStriperImpl::aio_unlockObject(const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletion *c)
+{
+ // unlock the shared lock on the first rados object
+ std::string firstObjOid = getObjectId(soid, 0);
+ m_ioCtx.aio_unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie, c);
+}
+
+static void rados_write_aio_unlock_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<WriteCompletionData>(static_cast<WriteCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->complete_unlock(comp->rval);
+}
+
+static void striper_write_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<WriteCompletionData>(static_cast<WriteCompletionData*>(arg), false);
+ // launch the async unlocking of the object
+ cdata->m_striper->aio_unlockObject(cdata->m_soid, cdata->m_lockCookie, cdata->m_unlockCompletion);
+ // complete the write part in parallel
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->complete_write(comp->rval);
+}
+
+static void striper_write_aio_req_safe(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<WriteCompletionData>(static_cast<WriteCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->safe(comp->rval);
+}
+
+int libradosstriper::RadosStriperImpl::write_in_open_object(const std::string& soid,
+ const ceph_file_layout& layout,
+ const std::string& lockCookie,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off) {
+ // create a completion object to be passed to the callbacks of the multicompletion
+ // we need 3 references as striper_write_aio_req_complete will release two and
+ // striper_write_aio_req_safe will release one
+ auto cdata = ceph::make_ref<WriteCompletionData>(this, soid, lockCookie, nullptr);
+ // create a completion object for the unlocking of the striped object at the end of the write
+ librados::AioCompletion *unlock_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, rados_write_aio_unlock_complete);
+ cdata->m_unlockCompletion = unlock_completion;
+ // create the multicompletion that will handle the write completion
+ MultiAioCompletionImplPtr c{new libradosstriper::MultiAioCompletionImpl,
+ false};
+ c->set_complete_callback(cdata->get() /* create ref! */, striper_write_aio_req_complete);
+ c->set_safe_callback(cdata->get() /* create ref! */, striper_write_aio_req_safe);
+ // call the asynchronous API
+ int rc = internal_aio_write(soid, c, bl, len, off, layout);
+ if (!rc) {
+ // wait for completion and safety of data
+ c->wait_for_complete_and_cb();
+ c->wait_for_safe_and_cb();
+ // wait for the unlocking
+ unlock_completion->wait_for_complete();
+ // return result
+ rc = c->get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_write_in_open_object(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const ceph_file_layout& layout,
+ const std::string& lockCookie,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off) {
+ // create a completion object to be passed to the callbacks of the multicompletion
+ // we need 3 references as striper_write_aio_req_complete will release two and
+ // striper_write_aio_req_safe will release one
+ auto cdata = ceph::make_ref<WriteCompletionData>(this, soid, lockCookie, c);
+ m_ioCtxImpl->get();
+ c->io = m_ioCtxImpl;
+ // create a completion object for the unlocking of the striped object at the end of the write
+ librados::AioCompletion *unlock_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, rados_write_aio_unlock_complete);
+ cdata->m_unlockCompletion = unlock_completion;
+ // create the multicompletion that will handle the write completion
+ libradosstriper::MultiAioCompletionImplPtr nc{
+ new libradosstriper::MultiAioCompletionImpl, false};
+ nc->set_complete_callback(cdata->get() /* create ref! */, striper_write_aio_req_complete);
+ nc->set_safe_callback(cdata->get() /* create ref! */, striper_write_aio_req_safe);
+ // internal asynchronous API
+ int rc = internal_aio_write(soid, nc, bl, len, off, layout);
+ return rc;
+}
+
+static void rados_req_write_complete(rados_completion_t c, void *arg)
+{
+ auto comp = reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(arg);
+ comp->complete_request(rados_aio_get_return_value(c));
+ comp->safe_request(rados_aio_get_return_value(c));
+}
+
+int
+libradosstriper::RadosStriperImpl::internal_aio_write(const std::string& soid,
+ libradosstriper::MultiAioCompletionImplPtr c,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off,
+ const ceph_file_layout& layout)
+{
+ int r = 0;
+ // Do not try anything if we are called with empty buffer,
+ // file_to_extents would raise an exception
+ if (len > 0) {
+ // get list of extents to be written to
+ vector<ObjectExtent> extents;
+ std::string format = soid;
+ boost::replace_all(format, "%", "%%");
+ format += RADOS_OBJECT_EXTENSION_FORMAT;
+ file_layout_t l;
+ l.from_legacy(layout);
+ Striper::file_to_extents(cct(), format.c_str(), &l, off, len, 0, extents);
+ // go through the extents
+ for (vector<ObjectExtent>::iterator p = extents.begin(); p != extents.end(); ++p) {
+ // assemble pieces of a given object into a single buffer list
+ bufferlist oid_bl;
+ for (vector<pair<uint64_t,uint64_t> >::iterator q = p->buffer_extents.begin();
+ q != p->buffer_extents.end();
+ ++q) {
+ bufferlist buffer_bl;
+ buffer_bl.substr_of(bl, q->first, q->second);
+ oid_bl.append(buffer_bl);
+ }
+ // and write the object
+ c->add_request();
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(c.get(),
+ rados_req_write_complete);
+ r = m_ioCtx.aio_write(p->oid.name, rados_completion, oid_bl,
+ p->length, p->offset);
+ rados_completion->release();
+ if (r < 0)
+ break;
+ }
+ }
+ c->finish_adding_requests();
+ return r;
+}
+
+int libradosstriper::RadosStriperImpl::extract_uint32_attr
+(std::map<std::string, bufferlist> &attrs,
+ const std::string& key,
+ ceph_le32 *value)
+{
+ std::map<std::string, bufferlist>::iterator attrsIt = attrs.find(key);
+ if (attrsIt != attrs.end()) {
+ // this intermediate string allows to add a null terminator before calling strtol
+ std::string strvalue(attrsIt->second.c_str(), attrsIt->second.length());
+ std::string err;
+ *value = strict_strtol(strvalue.c_str(), 10, &err);
+ if (!err.empty()) {
+ lderr(cct()) << key << " : " << err << dendl;
+ return -EINVAL;
+ }
+ } else {
+ return -ENOENT;
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::extract_sizet_attr
+(std::map<std::string, bufferlist> &attrs,
+ const std::string& key,
+ size_t *value)
+{
+ std::map<std::string, bufferlist>::iterator attrsIt = attrs.find(key);
+ if (attrsIt != attrs.end()) {
+ // this intermediate string allows to add a null terminator before calling strtol
+ std::string strvalue(attrsIt->second.c_str(), attrsIt->second.length());
+ std::string err;
+ *value = strict_strtoll(strvalue.c_str(), 10, &err);
+ if (!err.empty()) {
+ lderr(cct()) << key << " : " << err << dendl;
+ return -EINVAL;
+ }
+ } else {
+ return -ENOENT;
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::internal_get_layout_and_size(
+ const std::string& oid,
+ ceph_file_layout *layout,
+ uint64_t *size)
+{
+ // get external attributes of the first rados object
+ std::map<std::string, bufferlist> attrs;
+ int rc = m_ioCtx.getxattrs(oid, attrs);
+ if (rc) return rc;
+ // deal with stripe_unit
+ rc = extract_uint32_attr(attrs, XATTR_LAYOUT_STRIPE_UNIT, &layout->fl_stripe_unit);
+ if (rc) return rc;
+ // deal with stripe_count
+ rc = extract_uint32_attr(attrs, XATTR_LAYOUT_STRIPE_COUNT, &layout->fl_stripe_count);
+ if (rc) return rc;
+ // deal with object_size
+ rc = extract_uint32_attr(attrs, XATTR_LAYOUT_OBJECT_SIZE, &layout->fl_object_size);
+ if (rc) return rc;
+ // deal with size
+ size_t ssize;
+ rc = extract_sizet_attr(attrs, XATTR_SIZE, &ssize);
+ if (rc) {
+ return rc;
+ }
+ *size = ssize;
+ // make valgrind happy by setting unused fl_pg_pool
+ layout->fl_pg_pool = 0;
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::openStripedObjectForRead(
+ const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t *size,
+ std::string *lockCookie)
+{
+ // take a lock the first rados object, if it exists and gets its size
+ // check, lock and size reading must be atomic and are thus done within a single operation
+ librados::ObjectWriteOperation op;
+ op.assert_exists();
+ *lockCookie = getUUID();
+ utime_t dur = utime_t();
+ rados::cls::lock::lock(&op, RADOS_LOCK_NAME, ClsLockType::SHARED, *lockCookie, "Tag", "", dur, 0);
+ std::string firstObjOid = getObjectId(soid, 0);
+ int rc = m_ioCtx.operate(firstObjOid, &op);
+ if (rc) {
+ // error case (including -ENOENT)
+ return rc;
+ }
+ rc = internal_get_layout_and_size(firstObjOid, layout, size);
+ if (rc) {
+ unlockObject(soid, *lockCookie);
+ lderr(cct()) << "RadosStriperImpl::openStripedObjectForRead : "
+ << "could not load layout and size for "
+ << soid << " : rc = " << rc << dendl;
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::openStripedObjectForWrite(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t *size,
+ std::string *lockCookie,
+ bool isFileSizeAbsolute)
+{
+ // take a lock the first rados object, if it exists
+ // check and lock must be atomic and are thus done within a single operation
+ librados::ObjectWriteOperation op;
+ op.assert_exists();
+ *lockCookie = getUUID();
+ utime_t dur = utime_t();
+ rados::cls::lock::lock(&op, RADOS_LOCK_NAME, ClsLockType::SHARED, *lockCookie, "Tag", "", dur, 0);
+ std::string firstObjOid = getObjectId(soid, 0);
+ int rc = m_ioCtx.operate(firstObjOid, &op);
+ if (rc) {
+ if (rc == -ENOENT) {
+ // object does not exist, delegate to createEmptyStripedObject
+ int rc = createAndOpenStripedObject(soid, layout, *size, lockCookie, isFileSizeAbsolute);
+ // return original size
+ *size = 0;
+ return rc;
+ } else {
+ return rc;
+ }
+ }
+ // all fine
+ uint64_t curSize;
+ rc = internal_get_layout_and_size(firstObjOid, layout, &curSize);
+ if (rc) {
+ unlockObject(soid, *lockCookie);
+ lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : "
+ << "could not load layout and size for "
+ << soid << " : rc = " << rc << dendl;
+ return rc;
+ }
+ // atomically update object size, only if smaller than current one
+ if (!isFileSizeAbsolute)
+ *size += curSize;
+ librados::ObjectWriteOperation writeOp;
+ writeOp.cmpxattr(XATTR_SIZE, LIBRADOS_CMPXATTR_OP_GT, *size);
+ std::ostringstream oss;
+ oss << *size;
+ bufferlist bl;
+ bl.append(oss.str());
+ writeOp.setxattr(XATTR_SIZE, bl);
+ rc = m_ioCtx.operate(firstObjOid, &writeOp);
+ // return current size
+ *size = curSize;
+ // handle case where objectsize is already bigger than size
+ if (-ECANCELED == rc)
+ rc = 0;
+ if (rc) {
+ unlockObject(soid, *lockCookie);
+ lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : "
+ << "could not set new size for "
+ << soid << " : rc = " << rc << dendl;
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::createAndOpenStripedObject(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t size,
+ std::string *lockCookie,
+ bool isFileSizeAbsolute)
+{
+ // build atomic write operation
+ librados::ObjectWriteOperation writeOp;
+ writeOp.create(true);
+ // object_size
+ std::ostringstream oss_object_size;
+ oss_object_size << m_layout.fl_object_size;
+ bufferlist bl_object_size;
+ bl_object_size.append(oss_object_size.str());
+ writeOp.setxattr(XATTR_LAYOUT_OBJECT_SIZE, bl_object_size);
+ // stripe unit
+ std::ostringstream oss_stripe_unit;
+ oss_stripe_unit << m_layout.fl_stripe_unit;
+ bufferlist bl_stripe_unit;
+ bl_stripe_unit.append(oss_stripe_unit.str());
+ writeOp.setxattr(XATTR_LAYOUT_STRIPE_UNIT, bl_stripe_unit);
+ // stripe count
+ std::ostringstream oss_stripe_count;
+ oss_stripe_count << m_layout.fl_stripe_count;
+ bufferlist bl_stripe_count;
+ bl_stripe_count.append(oss_stripe_count.str());
+ writeOp.setxattr(XATTR_LAYOUT_STRIPE_COUNT, bl_stripe_count);
+ // size
+ std::ostringstream oss_size;
+ oss_size << (isFileSizeAbsolute?size:0);
+ bufferlist bl_size;
+ bl_size.append(oss_size.str());
+ writeOp.setxattr(XATTR_SIZE, bl_size);
+ // effectively change attributes
+ std::string firstObjOid = getObjectId(soid, 0);
+ int rc = m_ioCtx.operate(firstObjOid, &writeOp);
+ // in case of error (but no EEXIST which would mean the object existed), return
+ if (rc && -EEXIST != rc) return rc;
+ // Otherwise open the object
+ uint64_t fileSize = size;
+ return openStripedObjectForWrite(soid, layout, &fileSize, lockCookie, isFileSizeAbsolute);
+}
+
+static void striper_truncate_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<TruncateCompletionData>(static_cast<TruncateCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ if (0 == comp->rval) {
+ // all went fine, change size in the external attributes
+ std::ostringstream oss;
+ oss << cdata->m_size;
+ bufferlist bl;
+ bl.append(oss.str());
+ cdata->m_striper->setxattr(cdata->m_soid, XATTR_SIZE, bl);
+ }
+}
+
+int libradosstriper::RadosStriperImpl::truncate(const std::string& soid,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout)
+{
+ auto cdata = ceph::make_ref<TruncateCompletionData>(this, soid, size);
+ libradosstriper::MultiAioCompletionImplPtr multi_completion{
+ new libradosstriper::MultiAioCompletionImpl, false};
+ multi_completion->set_complete_callback(cdata->get() /* create ref! */, striper_truncate_aio_req_complete);
+ // call asynchrous version of truncate
+ int rc = aio_truncate(soid, multi_completion, original_size, size, layout);
+ // wait for completion of the truncation
+ multi_completion->finish_adding_requests();
+ multi_completion->wait_for_complete_and_cb();
+ // return result
+ if (rc == 0) {
+ rc = multi_completion->get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_truncate
+(const std::string& soid,
+ libradosstriper::MultiAioCompletionImplPtr multi_completion,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout)
+{
+ // handle the underlying rados objects. 3 cases here :
+ // -- the objects belonging to object sets entirely located
+ // before the truncation are unchanged
+ // -- the objects belonging to the object set where the
+ // truncation took place are truncated or removed
+ // -- the objects belonging to object sets entirely located
+ // after the truncation are removed
+ // Note that we do it backward and that we change the size in
+ // the external attributes only at the end. This make sure that
+ // no rados object stays behind if we remove the striped object
+ // after a truncation has failed
+ uint64_t trunc_objectsetno = size / layout.fl_object_size / layout.fl_stripe_count;
+ uint64_t last_objectsetno = original_size / layout.fl_object_size / layout.fl_stripe_count;
+ bool exists = false;
+ for (int64_t objectno = (last_objectsetno+1) * layout.fl_stripe_count-1;
+ objectno >= (int64_t)((trunc_objectsetno + 1) * layout.fl_stripe_count);
+ objectno--) {
+ // if no object existed so far, check object existence
+ if (!exists) {
+ uint64_t nb_full_object_set = objectno / layout.fl_stripe_count;
+ uint64_t object_index_in_set = objectno % layout.fl_stripe_count;
+ uint64_t set_start_off = nb_full_object_set * layout.fl_object_size * layout.fl_stripe_count;
+ uint64_t object_start_off = set_start_off + object_index_in_set * layout.fl_stripe_unit;
+ exists = (original_size > object_start_off);
+ }
+ if (exists) {
+ // remove asynchronously
+ multi_completion->add_request();
+ auto data = ceph::make_ref<RadosRemoveCompletionData>(multi_completion, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data->get() /* create ref! */,
+ rados_req_remove_complete);
+ int rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion);
+ rados_completion->release();
+ // in case the object did not exist, it means we had a sparse file, all is fine
+ if (rc && rc != -ENOENT) return rc;
+ }
+ }
+ for (int64_t objectno = ((trunc_objectsetno + 1) * layout.fl_stripe_count) -1;
+ objectno >= (int64_t)(trunc_objectsetno * layout.fl_stripe_count);
+ objectno--) {
+ // if no object existed so far, check object existence
+ if (!exists) {
+ uint64_t object_start_off = ((objectno / layout.fl_stripe_count) * layout.fl_object_size) +
+ ((objectno % layout.fl_stripe_count) * layout.fl_stripe_unit);
+ exists = (original_size > object_start_off);
+ }
+ if (exists) {
+ // truncate
+ file_layout_t l;
+ l.from_legacy(layout);
+ uint64_t new_object_size = Striper::object_truncate_size(cct(), &l, objectno, size);
+ int rc;
+ if (new_object_size > 0 or 0 == objectno) {
+ // trunc is synchronous as there is no async version
+ // but note that only a single object will be truncated
+ // reducing the overload to a fixed amount
+ rc = m_ioCtx.trunc(getObjectId(soid, objectno), new_object_size);
+ } else {
+ // removes are asynchronous in order to speed up truncations of big files
+ multi_completion->add_request();
+ auto data = ceph::make_ref<RadosRemoveCompletionData>(multi_completion, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data->get() /* create ref! */,
+ rados_req_remove_complete);
+ rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion);
+ rados_completion->release();
+ }
+ // in case the object did not exist, it means we had a sparse file, all is fine
+ if (rc && rc != -ENOENT) return rc;
+ }
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::grow(const std::string& soid,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout)
+{
+ // handle the underlying rados objects. As we support sparse objects,
+ // we only have to change the size in the external attributes
+ std::ostringstream oss;
+ oss << size;
+ bufferlist bl;
+ bl.append(oss.str());
+ int rc = m_ioCtx.setxattr(getObjectId(soid, 0), XATTR_SIZE, bl);
+ return rc;
+}
+
+std::string libradosstriper::RadosStriperImpl::getUUID()
+{
+ struct uuid_d uuid;
+ uuid.generate_random();
+ char suuid[37];
+ uuid.print(suuid);
+ return std::string(suuid);
+}
diff --git a/src/libradosstriper/RadosStriperImpl.h b/src/libradosstriper/RadosStriperImpl.h
new file mode 100644
index 000000000..8226a9ba2
--- /dev/null
+++ b/src/libradosstriper/RadosStriperImpl.h
@@ -0,0 +1,276 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H
+#define CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H
+
+#include <string>
+
+#include <boost/intrusive_ptr.hpp>
+
+#include "include/rados/librados.h"
+#include "include/rados/librados.hpp"
+#include "include/radosstriper/libradosstriper.h"
+#include "include/radosstriper/libradosstriper.hpp"
+#include "MultiAioCompletionImpl.h"
+
+#include "librados/IoCtxImpl.h"
+#include "librados/AioCompletionImpl.h"
+#include "common/RefCountedObj.h"
+#include "common/ceph_context.h"
+
+namespace libradosstriper {
+
+using MultiAioCompletionImplPtr =
+ boost::intrusive_ptr<MultiAioCompletionImpl>;
+
+struct RadosStriperImpl {
+
+ /**
+ * exception wrapper around an error code
+ */
+ struct ErrorCode {
+ ErrorCode(int error) : m_code(error) {};
+ int m_code;
+ };
+
+ /*
+ * Constructor
+ * @param cluster_name name of the cluster, can be NULL
+ * @param client_name has 2 meanings depending on cluster_name
+ * - if cluster_name is null : this is the client id
+ * - else : this is the full client name in format type.id
+ */
+ RadosStriperImpl(librados::IoCtx& ioctx, librados::IoCtxImpl *ioctx_impl);
+ /// Destructor
+ ~RadosStriperImpl() {};
+
+ // configuration
+ int setObjectLayoutStripeUnit(unsigned int stripe_unit);
+ int setObjectLayoutStripeCount(unsigned int stripe_count);
+ int setObjectLayoutObjectSize(unsigned int object_size);
+
+ // xattrs
+ int getxattr(const object_t& soid, const char *name, bufferlist& bl);
+ int setxattr(const object_t& soid, const char *name, bufferlist& bl);
+ int getxattrs(const object_t& soid, map<string, bufferlist>& attrset);
+ int rmxattr(const object_t& soid, const char *name);
+
+ // io
+ int write(const std::string& soid, const bufferlist& bl, size_t len, uint64_t off);
+ int append(const std::string& soid, const bufferlist& bl, size_t len);
+ int write_full(const std::string& soid, const bufferlist& bl);
+ int read(const std::string& soid, bufferlist* pbl, size_t len, uint64_t off);
+
+ // asynchronous io
+ int aio_write(const std::string& soid, librados::AioCompletionImpl *c,
+ const bufferlist& bl, size_t len, uint64_t off);
+ int aio_append(const std::string& soid, librados::AioCompletionImpl *c,
+ const bufferlist& bl, size_t len);
+ int aio_write_full(const std::string& soid, librados::AioCompletionImpl *c,
+ const bufferlist& bl);
+ int aio_read(const std::string& soid, librados::AioCompletionImpl *c,
+ bufferlist* pbl, size_t len, uint64_t off);
+ int aio_read(const std::string& soid, librados::AioCompletionImpl *c,
+ char* buf, size_t len, uint64_t off);
+ int aio_flush();
+
+ // stat, deletion and truncation
+ int stat(const std::string& soid, uint64_t *psize, time_t *pmtime);
+ int stat2(const std::string& soid, uint64_t *psize, struct timespec *pts);
+ template<class TimeType>
+ struct StatFunction {
+ typedef int (librados::IoCtxImpl::*Type) (const object_t& oid,
+ librados::AioCompletionImpl *c,
+ uint64_t *psize, TimeType *pmtime);
+ };
+ template<class TimeType>
+ int aio_generic_stat(const std::string& soid, librados::AioCompletionImpl *c,
+ uint64_t *psize, TimeType *pmtime,
+ typename StatFunction<TimeType>::Type statFunction);
+ int aio_stat(const std::string& soid, librados::AioCompletionImpl *c,
+ uint64_t *psize, time_t *pmtime);
+ int aio_stat2(const std::string& soid, librados::AioCompletionImpl *c,
+ uint64_t *psize, struct timespec *pts);
+ int remove(const std::string& soid, int flags=0);
+ int trunc(const std::string& soid, uint64_t size);
+
+ // asynchronous remove. Note that the removal is not 100% parallelized :
+ // the removal of the first rados object of the striped object will be
+ // done via a syncrhonous call after the completion of all other removals.
+ // These are done asynchrounously and in parallel
+ int aio_remove(const std::string& soid, librados::AioCompletionImpl *c, int flags=0);
+
+ // reference counting
+ void get() {
+ std::lock_guard l{lock};
+ m_refCnt ++ ;
+ }
+ void put() {
+ bool deleteme = false;
+ lock.lock();
+ m_refCnt --;
+ if (m_refCnt == 0)
+ deleteme = true;
+ cond.notify_all();
+ lock.unlock();
+ if (deleteme)
+ delete this;
+ }
+
+ // objectid manipulation
+ std::string getObjectId(const object_t& soid, long long unsigned objectno);
+
+ // opening and closing of striped objects
+ void unlockObject(const std::string& soid,
+ const std::string& lockCookie);
+ void aio_unlockObject(const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletion *c);
+
+ // internal versions of IO method
+ int write_in_open_object(const std::string& soid,
+ const ceph_file_layout& layout,
+ const std::string& lockCookie,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off);
+ int aio_write_in_open_object(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const ceph_file_layout& layout,
+ const std::string& lockCookie,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off);
+ int internal_aio_write(const std::string& soid,
+ MultiAioCompletionImplPtr c,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off,
+ const ceph_file_layout& layout);
+
+ int extract_uint32_attr(std::map<std::string, bufferlist> &attrs,
+ const std::string& key,
+ ceph_le32 *value);
+
+ int extract_sizet_attr(std::map<std::string, bufferlist> &attrs,
+ const std::string& key,
+ size_t *value);
+
+ int internal_get_layout_and_size(const std::string& oid,
+ ceph_file_layout *layout,
+ uint64_t *size);
+
+ int internal_aio_remove(const std::string& soid,
+ MultiAioCompletionImplPtr multi_completion,
+ int flags=0);
+
+ /**
+ * opens an existing striped object and takes a shared lock on it
+ * @return 0 if everything is ok and the lock was taken. -errcode otherwise
+ * In particulae, if the striped object does not exists, -ENOENT is returned
+ * In case the return code in not 0, no lock is taken
+ */
+ int openStripedObjectForRead(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t *size,
+ std::string *lockCookie);
+
+ /**
+ * opens an existing striped object, takes a shared lock on it
+ * and sets its size to the size it will have after the write.
+ * In case the striped object does not exists, it will create it by
+ * calling createOrOpenStripedObject.
+ * @param layout this is filled with the layout of the file
+ * @param size new size of the file (together with isFileSizeAbsolute)
+ * In case of success, this is filled with the size of the file before the opening
+ * @param isFileSizeAbsolute if false, this means that the given size should
+ * be added to the current file size (append mode)
+ * @return 0 if everything is ok and the lock was taken. -errcode otherwise
+ * In case the return code in not 0, no lock is taken
+ */
+ int openStripedObjectForWrite(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t *size,
+ std::string *lockCookie,
+ bool isFileSizeAbsolute);
+ /**
+ * creates an empty striped object with the given size and opens it calling
+ * openStripedObjectForWrite, which implies taking a shared lock on it
+ * Also deals with the cases where the object was created in the mean time
+ * @param isFileSizeAbsolute if false, this means that the given size should
+ * be added to the current file size (append mode). This of course only makes
+ * sense in case the striped object already exists
+ * @return 0 if everything is ok and the lock was taken. -errcode otherwise
+ * In case the return code in not 0, no lock is taken
+ */
+ int createAndOpenStripedObject(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t size,
+ std::string *lockCookie,
+ bool isFileSizeAbsolute);
+
+ /**
+ * truncates an object synchronously. Should only be called with size < original_size
+ */
+ int truncate(const std::string& soid,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout);
+
+ /**
+ * truncates an object asynchronously. Should only be called with size < original_size
+ * note that the method is not 100% asynchronous, only the removal of rados objects
+ * is, the (potential) truncation of the rados object residing just at the truncation
+ * point is synchronous for lack of asynchronous truncation in the rados layer
+ */
+ int aio_truncate(const std::string& soid,
+ MultiAioCompletionImplPtr c,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout);
+
+ /**
+ * grows an object (adding 0s). Should only be called with size > original_size
+ */
+ int grow(const std::string& soid,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout);
+
+ /**
+ * creates a unique identifier
+ */
+ static std::string getUUID();
+
+ CephContext *cct() {
+ return (CephContext*)m_radosCluster.cct();
+ }
+
+ // reference counting
+ std::condition_variable cond;
+ int m_refCnt;
+ std::mutex lock;
+
+
+ // Context
+ librados::Rados m_radosCluster;
+ librados::IoCtx m_ioCtx;
+ librados::IoCtxImpl *m_ioCtxImpl;
+
+ // Default layout
+ ceph_file_layout m_layout;
+};
+}
+#endif
diff --git a/src/libradosstriper/libradosstriper.cc b/src/libradosstriper/libradosstriper.cc
new file mode 100644
index 000000000..e98dfc179
--- /dev/null
+++ b/src/libradosstriper/libradosstriper.cc
@@ -0,0 +1,669 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <errno.h>
+
+#include "libradosstriper/RadosStriperImpl.h"
+#include "libradosstriper/MultiAioCompletionImpl.h"
+
+#include "include/types.h"
+
+#include "include/radosstriper/libradosstriper.h"
+#include "include/radosstriper/libradosstriper.hpp"
+#include "librados/RadosXattrIter.h"
+
+/*
+ * This file implements the rados striper API.
+ * There are 2 flavours of it :
+ * - the C API, found in include/rados/libradosstriper.h
+ * - the C++ API, found in include/rados/libradosstriper.hpp
+ */
+
+///////////////////////////// C++ API //////////////////////////////
+
+libradosstriper::MultiAioCompletion::~MultiAioCompletion()
+{
+ ceph_assert(pc->ref == 1);
+ pc->put();
+}
+
+int libradosstriper::MultiAioCompletion::set_complete_callback
+(void *cb_arg, rados_callback_t cb)
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->set_complete_callback(cb_arg, cb);
+}
+
+int libradosstriper::MultiAioCompletion::set_safe_callback
+(void *cb_arg, rados_callback_t cb)
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->set_safe_callback(cb_arg, cb);
+}
+
+void libradosstriper::MultiAioCompletion::wait_for_complete()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->wait_for_complete();
+}
+
+void libradosstriper::MultiAioCompletion::wait_for_safe()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->wait_for_safe();
+}
+
+bool libradosstriper::MultiAioCompletion::is_complete()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->is_complete();
+}
+
+bool libradosstriper::MultiAioCompletion::is_safe()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->is_safe();
+}
+
+void libradosstriper::MultiAioCompletion::wait_for_complete_and_cb()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->wait_for_complete_and_cb();
+}
+
+void libradosstriper::MultiAioCompletion::MultiAioCompletion::wait_for_safe_and_cb()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->wait_for_safe_and_cb();
+}
+
+bool libradosstriper::MultiAioCompletion::is_complete_and_cb()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->is_complete_and_cb();
+}
+
+bool libradosstriper::MultiAioCompletion::is_safe_and_cb()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->is_safe_and_cb();
+}
+
+int libradosstriper::MultiAioCompletion::get_return_value()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->get_return_value();
+}
+
+void libradosstriper::MultiAioCompletion::release()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->put();
+ delete this;
+}
+
+libradosstriper::RadosStriper::RadosStriper() :
+ rados_striper_impl(0)
+{
+}
+
+void libradosstriper::RadosStriper::to_rados_striper_t(RadosStriper &striper, rados_striper_t *s)
+{
+ *s = (rados_striper_t)striper.rados_striper_impl;
+ striper.rados_striper_impl->get();
+}
+
+libradosstriper::RadosStriper::RadosStriper(const RadosStriper& rs)
+{
+ rados_striper_impl = rs.rados_striper_impl;
+ if (rados_striper_impl) {
+ rados_striper_impl->get();
+ }
+}
+
+libradosstriper::RadosStriper& libradosstriper::RadosStriper::operator=(const RadosStriper& rs)
+{
+ if (rados_striper_impl)
+ rados_striper_impl->put();
+ rados_striper_impl = rs.rados_striper_impl;
+ rados_striper_impl->get();
+ return *this;
+}
+
+libradosstriper::RadosStriper::~RadosStriper()
+{
+ if (rados_striper_impl)
+ rados_striper_impl->put();
+ rados_striper_impl = 0;
+}
+
+int libradosstriper::RadosStriper::striper_create(librados::IoCtx& ioctx,
+ RadosStriper *striper)
+{
+ try {
+ striper->rados_striper_impl = new libradosstriper::RadosStriperImpl(ioctx, ioctx.io_ctx_impl);
+ striper->rados_striper_impl->get();
+ } catch (int rc) {
+ return rc;
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriper::set_object_layout_stripe_unit
+(unsigned int stripe_unit)
+{
+ return rados_striper_impl->setObjectLayoutStripeUnit(stripe_unit);
+}
+
+int libradosstriper::RadosStriper::set_object_layout_stripe_count
+(unsigned int stripe_count)
+{
+ return rados_striper_impl->setObjectLayoutStripeCount(stripe_count);
+}
+
+int libradosstriper::RadosStriper::set_object_layout_object_size
+(unsigned int object_size)
+{
+ return rados_striper_impl->setObjectLayoutObjectSize(object_size);
+}
+
+int libradosstriper::RadosStriper::getxattr(const std::string& oid, const char *name, bufferlist& bl)
+{
+ return rados_striper_impl->getxattr(oid, name, bl);
+}
+
+int libradosstriper::RadosStriper::setxattr(const std::string& oid, const char *name, bufferlist& bl)
+{
+ return rados_striper_impl->setxattr(oid, name, bl);
+}
+
+int libradosstriper::RadosStriper::rmxattr(const std::string& oid, const char *name)
+{
+ return rados_striper_impl->rmxattr(oid, name);
+}
+
+int libradosstriper::RadosStriper::getxattrs(const std::string& oid,
+ std::map<std::string, bufferlist>& attrset)
+{
+ return rados_striper_impl->getxattrs(oid, attrset);
+}
+
+int libradosstriper::RadosStriper::write(const std::string& soid,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off)
+{
+ return rados_striper_impl->write(soid, bl, len, off);
+}
+
+int libradosstriper::RadosStriper::write_full(const std::string& soid,
+ const bufferlist& bl)
+{
+ return rados_striper_impl->write_full(soid, bl);
+}
+
+int libradosstriper::RadosStriper::append(const std::string& soid,
+ const bufferlist& bl,
+ size_t len)
+{
+ return rados_striper_impl->append(soid, bl, len);
+}
+
+int libradosstriper::RadosStriper::aio_write(const std::string& soid,
+ librados::AioCompletion *c,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off)
+{
+ return rados_striper_impl->aio_write(soid, c->pc, bl, len, off);
+}
+
+int libradosstriper::RadosStriper::aio_write_full(const std::string& soid,
+ librados::AioCompletion *c,
+ const bufferlist& bl)
+{
+ return rados_striper_impl->aio_write_full(soid, c->pc, bl);
+}
+
+int libradosstriper::RadosStriper::aio_append(const std::string& soid,
+ librados::AioCompletion *c,
+ const bufferlist& bl,
+ size_t len)
+{
+ return rados_striper_impl->aio_append(soid, c->pc, bl, len);
+}
+
+int libradosstriper::RadosStriper::read(const std::string& soid,
+ bufferlist* bl,
+ size_t len,
+ uint64_t off)
+{
+ bl->clear();
+ bl->push_back(buffer::create(len));
+ return rados_striper_impl->read(soid, bl, len, off);
+}
+
+int libradosstriper::RadosStriper::aio_read(const std::string& soid,
+ librados::AioCompletion *c,
+ bufferlist* bl,
+ size_t len,
+ uint64_t off)
+{
+ bl->clear();
+ bl->push_back(buffer::create(len));
+ return rados_striper_impl->aio_read(soid, c->pc, bl, len, off);
+}
+
+int libradosstriper::RadosStriper::stat(const std::string& soid, uint64_t *psize, time_t *pmtime)
+{
+ return rados_striper_impl->stat(soid, psize, pmtime);
+}
+
+int libradosstriper::RadosStriper::aio_stat(const std::string& soid,
+ librados::AioCompletion *c,
+ uint64_t *psize,
+ time_t *pmtime)
+{
+ return rados_striper_impl->aio_stat(soid, c->pc, psize, pmtime);
+}
+
+int libradosstriper::RadosStriper::stat2(const std::string& soid, uint64_t *psize, struct timespec *pts)
+{
+ return rados_striper_impl->stat2(soid, psize, pts);
+}
+
+int libradosstriper::RadosStriper::aio_stat2(const std::string& soid,
+ librados::AioCompletion *c,
+ uint64_t *psize,
+ struct timespec *pts)
+{
+ return rados_striper_impl->aio_stat2(soid, c->pc, psize, pts);
+}
+
+int libradosstriper::RadosStriper::remove(const std::string& soid)
+{
+ return rados_striper_impl->remove(soid);
+}
+
+int libradosstriper::RadosStriper::aio_remove(const std::string& soid,
+ librados::AioCompletion *c)
+{
+ return rados_striper_impl->aio_remove(soid, c->pc);
+}
+
+int libradosstriper::RadosStriper::remove(const std::string& soid, int flags)
+{
+ return rados_striper_impl->remove(soid, flags);
+}
+
+int libradosstriper::RadosStriper::aio_remove(const std::string& soid,
+ librados::AioCompletion *c,
+ int flags)
+{
+ return rados_striper_impl->aio_remove(soid, c->pc, flags);
+}
+
+int libradosstriper::RadosStriper::trunc(const std::string& soid, uint64_t size)
+{
+ return rados_striper_impl->trunc(soid, size);
+}
+
+int libradosstriper::RadosStriper::aio_flush()
+{
+ return rados_striper_impl->aio_flush();
+}
+
+libradosstriper::MultiAioCompletion* libradosstriper::RadosStriper::multi_aio_create_completion()
+{
+ MultiAioCompletionImpl *c = new MultiAioCompletionImpl;
+ return new MultiAioCompletion(c);
+}
+
+libradosstriper::MultiAioCompletion*
+libradosstriper::RadosStriper::multi_aio_create_completion(void *cb_arg,
+ librados::callback_t cb_complete,
+ librados::callback_t cb_safe)
+{
+ MultiAioCompletionImpl *c;
+ int r = rados_striper_multi_aio_create_completion(cb_arg, cb_complete, cb_safe, (void**)&c);
+ ceph_assert(r == 0);
+ return new MultiAioCompletion(c);
+}
+
+///////////////////////////// C API //////////////////////////////
+
+extern "C" int rados_striper_create(rados_ioctx_t ioctx,
+ rados_striper_t *striper)
+{
+ librados::IoCtx ctx;
+ librados::IoCtx::from_rados_ioctx_t(ioctx, ctx);
+ libradosstriper::RadosStriper striperp;
+ int rc = libradosstriper::RadosStriper::striper_create(ctx, &striperp);
+ if (0 == rc)
+ libradosstriper::RadosStriper::to_rados_striper_t(striperp, striper);
+ return rc;
+}
+
+extern "C" void rados_striper_destroy(rados_striper_t striper)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ impl->put();
+}
+
+extern "C" int rados_striper_set_object_layout_stripe_unit(rados_striper_t striper,
+ unsigned int stripe_unit)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->setObjectLayoutStripeUnit(stripe_unit);
+}
+
+extern "C" int rados_striper_set_object_layout_stripe_count(rados_striper_t striper,
+ unsigned int stripe_count)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->setObjectLayoutStripeCount(stripe_count);
+}
+
+extern "C" int rados_striper_set_object_layout_object_size(rados_striper_t striper,
+ unsigned int object_size)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->setObjectLayoutObjectSize(object_size);
+}
+
+extern "C" int rados_striper_write(rados_striper_t striper,
+ const char *soid,
+ const char *buf,
+ size_t len,
+ uint64_t off)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->write(soid, bl, len, off);
+}
+
+extern "C" int rados_striper_write_full(rados_striper_t striper,
+ const char *soid,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->write_full(soid, bl);
+}
+
+
+extern "C" int rados_striper_append(rados_striper_t striper,
+ const char *soid,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->append(soid, bl, len);
+}
+
+extern "C" int rados_striper_read(rados_striper_t striper,
+ const char *soid,
+ char *buf,
+ size_t len,
+ uint64_t off)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bufferptr bp = buffer::create_static(len, buf);
+ bl.push_back(bp);
+ int ret = impl->read(soid, &bl, len, off);
+ if (ret >= 0) {
+ if (bl.length() > len)
+ return -ERANGE;
+ if (!bl.is_provided_buffer(buf))
+ bl.begin().copy(bl.length(), buf);
+ ret = bl.length(); // hrm :/
+ }
+ return ret;
+}
+
+extern "C" int rados_striper_remove(rados_striper_t striper, const char* soid)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->remove(soid);
+}
+
+extern "C" int rados_striper_trunc(rados_striper_t striper, const char* soid, uint64_t size)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->trunc(soid, size);
+}
+
+extern "C" int rados_striper_getxattr(rados_striper_t striper,
+ const char *oid,
+ const char *name,
+ char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ object_t obj(oid);
+ bufferlist bl;
+ int ret = impl->getxattr(oid, name, bl);
+ if (ret >= 0) {
+ if (bl.length() > len)
+ return -ERANGE;
+ bl.begin().copy(bl.length(), buf);
+ ret = bl.length();
+ }
+ return ret;
+}
+
+extern "C" int rados_striper_setxattr(rados_striper_t striper,
+ const char *oid,
+ const char *name,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ object_t obj(oid);
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->setxattr(obj, name, bl);
+}
+
+extern "C" int rados_striper_rmxattr(rados_striper_t striper,
+ const char *oid,
+ const char *name)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ object_t obj(oid);
+ return impl->rmxattr(obj, name);
+}
+
+extern "C" int rados_striper_getxattrs(rados_striper_t striper,
+ const char *oid,
+ rados_xattrs_iter_t *iter)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ object_t obj(oid);
+ librados::RadosXattrsIter *it = new librados::RadosXattrsIter();
+ if (!it)
+ return -ENOMEM;
+ int ret = impl->getxattrs(obj, it->attrset);
+ if (ret) {
+ delete it;
+ return ret;
+ }
+ it->i = it->attrset.begin();
+ *iter = it;
+ return 0;
+}
+
+extern "C" int rados_striper_getxattrs_next(rados_xattrs_iter_t iter,
+ const char **name,
+ const char **val,
+ size_t *len)
+{
+ return rados_getxattrs_next(iter, name, val, len);
+}
+
+extern "C" void rados_striper_getxattrs_end(rados_xattrs_iter_t iter)
+{
+ return rados_getxattrs_end(iter);
+}
+
+extern "C" int rados_striper_stat(rados_striper_t striper,
+ const char* soid,
+ uint64_t *psize,
+ time_t *pmtime)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->stat(soid, psize, pmtime);
+}
+
+extern "C" int rados_striper_multi_aio_create_completion(void *cb_arg,
+ rados_callback_t cb_complete,
+ rados_callback_t cb_safe,
+ rados_striper_multi_completion_t *pc)
+{
+ libradosstriper::MultiAioCompletionImpl *c = new libradosstriper::MultiAioCompletionImpl;
+ if (cb_complete)
+ c->set_complete_callback(cb_arg, cb_complete);
+ if (cb_safe)
+ c->set_safe_callback(cb_arg, cb_safe);
+ *pc = c;
+ return 0;
+}
+
+extern "C" void rados_striper_multi_aio_wait_for_complete(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_complete();
+}
+
+extern "C" void rados_striper_multi_aio_wait_for_safe(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_safe();
+}
+
+extern "C" int rados_striper_multi_aio_is_complete(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->is_complete();
+}
+
+extern "C" int rados_striper_multi_aio_is_safe(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->is_safe();
+}
+
+extern "C" void rados_striper_multi_aio_wait_for_complete_and_cb(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_complete_and_cb();
+}
+
+extern "C" void rados_striper_multi_aio_wait_for_safe_and_cb(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_safe_and_cb();
+}
+
+extern "C" int rados_striper_multi_aio_is_complete_and_cb(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->is_complete_and_cb();
+}
+
+extern "C" int rados_striper_multi_aio_is_safe_and_cb(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->is_safe_and_cb();
+}
+
+extern "C" int rados_striper_multi_aio_get_return_value(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->get_return_value();
+}
+
+extern "C" void rados_striper_multi_aio_release(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->put();
+}
+
+extern "C" int rados_striper_aio_write(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion,
+ const char *buf,
+ size_t len,
+ uint64_t off)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->aio_write(soid, (librados::AioCompletionImpl*)completion, bl, len, off);
+}
+
+extern "C" int rados_striper_aio_append(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->aio_append(soid, (librados::AioCompletionImpl*)completion, bl, len);
+}
+
+extern "C" int rados_striper_aio_write_full(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->aio_write_full(soid, (librados::AioCompletionImpl*)completion, bl);
+}
+
+extern "C" int rados_striper_aio_read(rados_striper_t striper,
+ const char *soid,
+ rados_completion_t completion,
+ char *buf,
+ size_t len,
+ uint64_t off)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->aio_read(soid, (librados::AioCompletionImpl*)completion, buf, len, off);
+}
+
+extern "C" int rados_striper_aio_remove(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->aio_remove(soid, (librados::AioCompletionImpl*)completion);
+}
+
+extern "C" void rados_striper_aio_flush(rados_striper_t striper)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ impl->aio_flush();
+}
+
+extern "C" int rados_striper_aio_stat(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion,
+ uint64_t *psize,
+ time_t *pmtime)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->aio_stat(soid, (librados::AioCompletionImpl*)completion, psize, pmtime);
+}