summaryrefslogtreecommitdiffstats
path: root/src/librados
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/librados-config.cc59
-rw-r--r--src/librados/AioCompletionImpl.h208
-rw-r--r--src/librados/CMakeLists.txt42
-rw-r--r--src/librados/IoCtxImpl.cc2217
-rw-r--r--src/librados/IoCtxImpl.h299
-rw-r--r--src/librados/ListObjectImpl.h81
-rw-r--r--src/librados/PoolAsyncCompletionImpl.h110
-rw-r--r--src/librados/RadosClient.cc1180
-rw-r--r--src/librados/RadosClient.h198
-rw-r--r--src/librados/RadosXattrIter.cc29
-rw-r--r--src/librados/RadosXattrIter.h38
-rw-r--r--src/librados/librados.map41
-rw-r--r--src/librados/librados_asio.h213
-rw-r--r--src/librados/librados_c.cc4611
-rw-r--r--src/librados/librados_c.h29
-rw-r--r--src/librados/librados_cxx.cc3177
-rw-r--r--src/librados/librados_tp.cc9
-rw-r--r--src/librados/librados_util.cc63
-rw-r--r--src/librados/librados_util.h34
-rw-r--r--src/librados/snap_set_diff.cc116
-rw-r--r--src/librados/snap_set_diff.h18
-rw-r--r--src/libradosstriper/CMakeLists.txt17
-rw-r--r--src/libradosstriper/MultiAioCompletionImpl.cc60
-rw-r--r--src/libradosstriper/MultiAioCompletionImpl.h169
-rw-r--r--src/libradosstriper/RadosStriperImpl.cc1606
-rw-r--r--src/libradosstriper/RadosStriperImpl.h276
-rw-r--r--src/libradosstriper/libradosstriper.cc669
27 files changed, 15569 insertions, 0 deletions
diff --git a/src/librados-config.cc b/src/librados-config.cc
new file mode 100644
index 000000000..7948598b1
--- /dev/null
+++ b/src/librados-config.cc
@@ -0,0 +1,59 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License version 2, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+#include <iostream>
+
+#include <boost/program_options/cmdline.hpp>
+#include <boost/program_options/option.hpp>
+#include <boost/program_options/options_description.hpp>
+#include <boost/program_options/parsers.hpp>
+#include <boost/program_options/variables_map.hpp>
+
+#include "include/rados/librados.h"
+#include "ceph_ver.h"
+
+namespace po = boost::program_options;
+
+int main(int argc, const char **argv)
+{
+ po::options_description desc{"usage: librados-config [option]"};
+ desc.add_options()
+ ("help,h", "print this help message")
+ ("version", "library version")
+ ("vernum", "library version code")
+ ("release", "print release name");
+
+ po::parsed_options parsed =
+ po::command_line_parser(argc, argv).options(desc).run();
+ po::variables_map vm;
+ po::store(parsed, vm);
+ po::notify(vm);
+
+ if (vm.count("help")) {
+ std::cout << desc << std::endl;
+ } else if (vm.count("version")) {
+ int maj, min, ext;
+ rados_version(&maj, &min, &ext);
+ std::cout << maj << "." << min << "." << ext << std::endl;
+ } else if (vm.count("vernum")) {
+ std::cout << std::hex << LIBRADOS_VERSION_CODE << std::dec << std::endl;
+ } else if (vm.count("release")) {
+ std::cout << CEPH_RELEASE_NAME << ' '
+ << '(' << CEPH_RELEASE_TYPE << ')'
+ << std::endl;
+ } else {
+ std::cerr << argv[0] << ": -h or --help for usage" << std::endl;
+ return 1;
+ }
+}
+
diff --git a/src/librados/AioCompletionImpl.h b/src/librados/AioCompletionImpl.h
new file mode 100644
index 000000000..6f7e1b628
--- /dev/null
+++ b/src/librados/AioCompletionImpl.h
@@ -0,0 +1,208 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIBRADOS_AIOCOMPLETIONIMPL_H
+#define CEPH_LIBRADOS_AIOCOMPLETIONIMPL_H
+
+#include "common/ceph_mutex.h"
+#include "include/buffer.h"
+#include "include/xlist.h"
+#include "osd/osd_types.h"
+
+class IoCtxImpl;
+
+struct librados::AioCompletionImpl {
+ ceph::mutex lock = ceph::make_mutex("AioCompletionImpl lock", false);
+ ceph::condition_variable cond;
+ int ref = 1, rval = 0;
+ bool released = false;
+ bool complete = false;
+ version_t objver = 0;
+ ceph_tid_t tid = 0;
+
+ rados_callback_t callback_complete = nullptr, callback_safe = nullptr;
+ void *callback_complete_arg = nullptr, *callback_safe_arg = nullptr;
+
+ // for read
+ bool is_read = false;
+ bufferlist bl;
+ bufferlist *blp = nullptr;
+ char *out_buf = nullptr;
+
+ IoCtxImpl *io = nullptr;
+ ceph_tid_t aio_write_seq = 0;
+ xlist<AioCompletionImpl*>::item aio_write_list_item;
+
+ AioCompletionImpl() : aio_write_list_item(this) { }
+
+ int set_complete_callback(void *cb_arg, rados_callback_t cb) {
+ std::scoped_lock l{lock};
+ callback_complete = cb;
+ callback_complete_arg = cb_arg;
+ return 0;
+ }
+ int set_safe_callback(void *cb_arg, rados_callback_t cb) {
+ std::scoped_lock l{lock};
+ callback_safe = cb;
+ callback_safe_arg = cb_arg;
+ return 0;
+ }
+ int wait_for_complete() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return complete; });
+ return 0;
+ }
+ int wait_for_safe() {
+ return wait_for_complete();
+ }
+ int is_complete() {
+ std::scoped_lock l{lock};
+ return complete;
+ }
+ int is_safe() {
+ return is_complete();
+ }
+ int wait_for_complete_and_cb() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return complete && !callback_complete && !callback_safe; });
+ return 0;
+ }
+ int wait_for_safe_and_cb() {
+ return wait_for_complete_and_cb();
+ }
+ int is_complete_and_cb() {
+ std::scoped_lock l{lock};
+ return complete && !callback_complete && !callback_safe;
+ }
+ int is_safe_and_cb() {
+ return is_complete_and_cb();
+ }
+ int get_return_value() {
+ std::scoped_lock l{lock};
+ return rval;
+ }
+ uint64_t get_version() {
+ std::scoped_lock l{lock};
+ return objver;
+ }
+
+ void get() {
+ std::scoped_lock l{lock};
+ _get();
+ }
+ void _get() {
+ ceph_assert(ceph_mutex_is_locked(lock));
+ ceph_assert(ref > 0);
+ ++ref;
+ }
+ void release() {
+ lock.lock();
+ ceph_assert(!released);
+ released = true;
+ put_unlock();
+ }
+ void put() {
+ lock.lock();
+ put_unlock();
+ }
+ void put_unlock() {
+ ceph_assert(ref > 0);
+ int n = --ref;
+ lock.unlock();
+ if (!n)
+ delete this;
+ }
+};
+
+namespace librados {
+struct CB_AioComplete {
+ AioCompletionImpl *c;
+
+ explicit CB_AioComplete(AioCompletionImpl *cc) : c(cc) {
+ c->_get();
+ }
+
+ void operator()() {
+ rados_callback_t cb_complete = c->callback_complete;
+ void *cb_complete_arg = c->callback_complete_arg;
+ if (cb_complete)
+ cb_complete(c, cb_complete_arg);
+
+ rados_callback_t cb_safe = c->callback_safe;
+ void *cb_safe_arg = c->callback_safe_arg;
+ if (cb_safe)
+ cb_safe(c, cb_safe_arg);
+
+ c->lock.lock();
+ c->callback_complete = NULL;
+ c->callback_safe = NULL;
+ c->cond.notify_all();
+ c->put_unlock();
+ }
+};
+
+/**
+ * Fills in all completed request data, and calls both
+ * complete and safe callbacks if they exist.
+ *
+ * Not useful for usual I/O, but for special things like
+ * flush where we only want to wait for things to be safe,
+ * but allow users to specify any of the callbacks.
+ */
+struct CB_AioCompleteAndSafe {
+ AioCompletionImpl *c;
+
+
+ explicit CB_AioCompleteAndSafe(AioCompletionImpl *cc) : c(cc) {
+ c->get();
+ }
+
+ CB_AioCompleteAndSafe(const CB_AioCompleteAndSafe&) = delete;
+ CB_AioCompleteAndSafe& operator =(const CB_AioCompleteAndSafe&) = delete;
+ CB_AioCompleteAndSafe(CB_AioCompleteAndSafe&& rhs) {
+ c = rhs.c;
+ rhs.c = nullptr;
+ }
+ CB_AioCompleteAndSafe& operator =(CB_AioCompleteAndSafe&& rhs) {
+ c = rhs.c;
+ rhs.c = nullptr;
+ return *this;
+ }
+
+ void operator()(int r = 0) {
+ c->lock.lock();
+ c->rval = r;
+ c->complete = true;
+ c->lock.unlock();
+
+ rados_callback_t cb_complete = c->callback_complete;
+ void *cb_complete_arg = c->callback_complete_arg;
+ if (cb_complete)
+ cb_complete(c, cb_complete_arg);
+
+ rados_callback_t cb_safe = c->callback_safe;
+ void *cb_safe_arg = c->callback_safe_arg;
+ if (cb_safe)
+ cb_safe(c, cb_safe_arg);
+
+ c->lock.lock();
+ c->callback_complete = NULL;
+ c->callback_safe = NULL;
+ c->cond.notify_all();
+ c->put_unlock();
+ }
+};
+}
+
+#endif
diff --git a/src/librados/CMakeLists.txt b/src/librados/CMakeLists.txt
new file mode 100644
index 000000000..9e469eb17
--- /dev/null
+++ b/src/librados/CMakeLists.txt
@@ -0,0 +1,42 @@
+add_library(librados_impl STATIC
+ IoCtxImpl.cc
+ RadosXattrIter.cc
+ RadosClient.cc
+ librados_util.cc
+ librados_tp.cc)
+
+# C/C++ API
+add_library(librados ${CEPH_SHARED}
+ librados_c.cc
+ librados_cxx.cc
+ $<TARGET_OBJECTS:common_buffer_obj>)
+if(ENABLE_SHARED)
+ set_target_properties(librados PROPERTIES
+ OUTPUT_NAME rados
+ VERSION 2.0.0
+ SOVERSION 2
+ VISIBILITY_INLINES_HIDDEN ON)
+ if(HAVE_LINK_EXCLUDE_LIBS AND NOT WIN32)
+ set_property(TARGET librados APPEND_STRING PROPERTY
+ LINK_FLAGS " -Wl,--exclude-libs,ALL")
+ endif()
+ if(HAVE_LINK_VERSION_SCRIPT AND NOT WIN32)
+ set_property(TARGET librados APPEND_STRING PROPERTY
+ LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/librados.map")
+ endif()
+ if(WITH_STATIC_LIBSTDCXX)
+ set_property(TARGET librados APPEND_STRING PROPERTY
+ LINK_FLAGS " -static-libstdc++ -static-libgcc")
+ endif()
+endif()
+target_link_libraries(librados PRIVATE
+ librados_impl osdc ceph-common cls_lock_client
+ ${BLKID_LIBRARIES} ${CRYPTO_LIBS} ${EXTRALIBS} ${GSSAPI_LIBRARIES})
+install(TARGETS librados DESTINATION ${CMAKE_INSTALL_LIBDIR})
+
+if(WITH_LTTNG)
+ add_dependencies(librados_impl librados-tp)
+ if(WITH_EVENTTRACE)
+ add_dependencies(librados_impl eventtrace_tp)
+ endif()
+endif()
diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc
new file mode 100644
index 000000000..e54dd062b
--- /dev/null
+++ b/src/librados/IoCtxImpl.cc
@@ -0,0 +1,2217 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <limits.h>
+
+#include "IoCtxImpl.h"
+
+#include "librados/librados_c.h"
+#include "librados/AioCompletionImpl.h"
+#include "librados/PoolAsyncCompletionImpl.h"
+#include "librados/RadosClient.h"
+#include "include/ceph_assert.h"
+#include "common/valgrind.h"
+#include "common/EventTrace.h"
+
+#define dout_subsys ceph_subsys_rados
+#undef dout_prefix
+#define dout_prefix *_dout << "librados: "
+
+namespace bs = boost::system;
+namespace ca = ceph::async;
+namespace cb = ceph::buffer;
+
+namespace librados {
+namespace {
+
+struct CB_notify_Finish {
+ CephContext *cct;
+ Context *ctx;
+ Objecter *objecter;
+ Objecter::LingerOp *linger_op;
+ bufferlist *preply_bl;
+ char **preply_buf;
+ size_t *preply_buf_len;
+
+ CB_notify_Finish(CephContext *_cct, Context *_ctx, Objecter *_objecter,
+ Objecter::LingerOp *_linger_op, bufferlist *_preply_bl,
+ char **_preply_buf, size_t *_preply_buf_len)
+ : cct(_cct), ctx(_ctx), objecter(_objecter), linger_op(_linger_op),
+ preply_bl(_preply_bl), preply_buf(_preply_buf),
+ preply_buf_len(_preply_buf_len) {}
+
+
+ // move-only
+ CB_notify_Finish(const CB_notify_Finish&) = delete;
+ CB_notify_Finish& operator =(const CB_notify_Finish&) = delete;
+ CB_notify_Finish(CB_notify_Finish&&) = default;
+ CB_notify_Finish& operator =(CB_notify_Finish&&) = default;
+
+ void operator()(bs::error_code ec, bufferlist&& reply_bl) {
+ ldout(cct, 10) << __func__ << " completed notify (linger op "
+ << linger_op << "), ec = " << ec << dendl;
+
+ // pass result back to user
+ // NOTE: we do this regardless of what error code we return
+ if (preply_buf) {
+ if (reply_bl.length()) {
+ *preply_buf = (char*)malloc(reply_bl.length());
+ memcpy(*preply_buf, reply_bl.c_str(), reply_bl.length());
+ } else {
+ *preply_buf = NULL;
+ }
+ }
+ if (preply_buf_len)
+ *preply_buf_len = reply_bl.length();
+ if (preply_bl)
+ *preply_bl = std::move(reply_bl);
+
+ ctx->complete(ceph::from_error_code(ec));
+ }
+};
+
+struct CB_aio_linger_cancel {
+ Objecter *objecter;
+ Objecter::LingerOp *linger_op;
+
+ CB_aio_linger_cancel(Objecter *_objecter, Objecter::LingerOp *_linger_op)
+ : objecter(_objecter), linger_op(_linger_op)
+ {
+ }
+
+ void operator()() {
+ objecter->linger_cancel(linger_op);
+ }
+};
+
+struct C_aio_linger_Complete : public Context {
+ AioCompletionImpl *c;
+ Objecter::LingerOp *linger_op;
+ bool cancel;
+
+ C_aio_linger_Complete(AioCompletionImpl *_c, Objecter::LingerOp *_linger_op, bool _cancel)
+ : c(_c), linger_op(_linger_op), cancel(_cancel)
+ {
+ c->get();
+ }
+
+ void finish(int r) override {
+ if (cancel || r < 0)
+ boost::asio::defer(c->io->client->finish_strand,
+ CB_aio_linger_cancel(c->io->objecter,
+ linger_op));
+
+ c->lock.lock();
+ c->rval = r;
+ c->complete = true;
+ c->cond.notify_all();
+
+ if (c->callback_complete ||
+ c->callback_safe) {
+ boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c));
+ }
+ c->put_unlock();
+ }
+};
+
+struct C_aio_notify_Complete : public C_aio_linger_Complete {
+ ceph::mutex lock = ceph::make_mutex("C_aio_notify_Complete::lock");
+ bool acked = false;
+ bool finished = false;
+ int ret_val = 0;
+
+ C_aio_notify_Complete(AioCompletionImpl *_c, Objecter::LingerOp *_linger_op)
+ : C_aio_linger_Complete(_c, _linger_op, false) {
+ }
+
+ void handle_ack(int r) {
+ // invoked by C_aio_notify_Ack
+ lock.lock();
+ acked = true;
+ complete_unlock(r);
+ }
+
+ void complete(int r) override {
+ // invoked by C_notify_Finish
+ lock.lock();
+ finished = true;
+ complete_unlock(r);
+ }
+
+ void complete_unlock(int r) {
+ if (ret_val == 0 && r < 0) {
+ ret_val = r;
+ }
+
+ if (acked && finished) {
+ lock.unlock();
+ cancel = true;
+ C_aio_linger_Complete::complete(ret_val);
+ } else {
+ lock.unlock();
+ }
+ }
+};
+
+struct C_aio_notify_Ack : public Context {
+ CephContext *cct;
+ C_aio_notify_Complete *oncomplete;
+
+ C_aio_notify_Ack(CephContext *_cct,
+ C_aio_notify_Complete *_oncomplete)
+ : cct(_cct), oncomplete(_oncomplete)
+ {
+ }
+
+ void finish(int r) override
+ {
+ ldout(cct, 10) << __func__ << " linger op " << oncomplete->linger_op << " "
+ << "acked (" << r << ")" << dendl;
+ oncomplete->handle_ack(r);
+ }
+};
+
+struct C_aio_selfmanaged_snap_op_Complete : public Context {
+ librados::RadosClient *client;
+ librados::AioCompletionImpl *c;
+
+ C_aio_selfmanaged_snap_op_Complete(librados::RadosClient *client,
+ librados::AioCompletionImpl *c)
+ : client(client), c(c) {
+ c->get();
+ }
+
+ void finish(int r) override {
+ c->lock.lock();
+ c->rval = r;
+ c->complete = true;
+ c->cond.notify_all();
+
+ if (c->callback_complete || c->callback_safe) {
+ boost::asio::defer(client->finish_strand, librados::CB_AioComplete(c));
+ }
+ c->put_unlock();
+ }
+};
+
+struct C_aio_selfmanaged_snap_create_Complete : public C_aio_selfmanaged_snap_op_Complete {
+ snapid_t snapid;
+ uint64_t *dest_snapid;
+
+ C_aio_selfmanaged_snap_create_Complete(librados::RadosClient *client,
+ librados::AioCompletionImpl *c,
+ uint64_t *dest_snapid)
+ : C_aio_selfmanaged_snap_op_Complete(client, c),
+ dest_snapid(dest_snapid) {
+ }
+
+ void finish(int r) override {
+ if (r >= 0) {
+ *dest_snapid = snapid;
+ }
+ C_aio_selfmanaged_snap_op_Complete::finish(r);
+ }
+};
+
+} // anonymous namespace
+} // namespace librados
+
+librados::IoCtxImpl::IoCtxImpl() = default;
+
+librados::IoCtxImpl::IoCtxImpl(RadosClient *c, Objecter *objecter,
+ int64_t poolid, snapid_t s)
+ : client(c), poolid(poolid), snap_seq(s),
+ notify_timeout(c->cct->_conf->client_notify_timeout),
+ oloc(poolid),
+ aio_write_seq(0), objecter(objecter)
+{
+}
+
+void librados::IoCtxImpl::set_snap_read(snapid_t s)
+{
+ if (!s)
+ s = CEPH_NOSNAP;
+ ldout(client->cct, 10) << "set snap read " << snap_seq << " -> " << s << dendl;
+ snap_seq = s;
+}
+
+int librados::IoCtxImpl::set_snap_write_context(snapid_t seq, vector<snapid_t>& snaps)
+{
+ ::SnapContext n;
+ ldout(client->cct, 10) << "set snap write context: seq = " << seq
+ << " and snaps = " << snaps << dendl;
+ n.seq = seq;
+ n.snaps = snaps;
+ if (!n.is_valid())
+ return -EINVAL;
+ snapc = n;
+ return 0;
+}
+
+int librados::IoCtxImpl::get_object_hash_position(
+ const std::string& oid, uint32_t *hash_position)
+{
+ int64_t r = objecter->get_object_hash_position(poolid, oid, oloc.nspace);
+ if (r < 0)
+ return r;
+ *hash_position = (uint32_t)r;
+ return 0;
+}
+
+int librados::IoCtxImpl::get_object_pg_hash_position(
+ const std::string& oid, uint32_t *pg_hash_position)
+{
+ int64_t r = objecter->get_object_pg_hash_position(poolid, oid, oloc.nspace);
+ if (r < 0)
+ return r;
+ *pg_hash_position = (uint32_t)r;
+ return 0;
+}
+
+void librados::IoCtxImpl::queue_aio_write(AioCompletionImpl *c)
+{
+ get();
+ std::scoped_lock l{aio_write_list_lock};
+ ceph_assert(c->io == this);
+ c->aio_write_seq = ++aio_write_seq;
+ ldout(client->cct, 20) << "queue_aio_write " << this << " completion " << c
+ << " write_seq " << aio_write_seq << dendl;
+ aio_write_list.push_back(&c->aio_write_list_item);
+}
+
+void librados::IoCtxImpl::complete_aio_write(AioCompletionImpl *c)
+{
+ ldout(client->cct, 20) << "complete_aio_write " << c << dendl;
+ aio_write_list_lock.lock();
+ ceph_assert(c->io == this);
+ c->aio_write_list_item.remove_myself();
+
+ map<ceph_tid_t, std::list<AioCompletionImpl*> >::iterator waiters = aio_write_waiters.begin();
+ while (waiters != aio_write_waiters.end()) {
+ if (!aio_write_list.empty() &&
+ aio_write_list.front()->aio_write_seq <= waiters->first) {
+ ldout(client->cct, 20) << " next outstanding write is " << aio_write_list.front()->aio_write_seq
+ << " <= waiter " << waiters->first
+ << ", stopping" << dendl;
+ break;
+ }
+ ldout(client->cct, 20) << " waking waiters on seq " << waiters->first << dendl;
+ for (std::list<AioCompletionImpl*>::iterator it = waiters->second.begin();
+ it != waiters->second.end(); ++it) {
+ boost::asio::defer(client->finish_strand, CB_AioCompleteAndSafe(*it));
+ (*it)->put();
+ }
+ aio_write_waiters.erase(waiters++);
+ }
+
+ aio_write_cond.notify_all();
+ aio_write_list_lock.unlock();
+ put();
+}
+
+void librados::IoCtxImpl::flush_aio_writes_async(AioCompletionImpl *c)
+{
+ ldout(client->cct, 20) << "flush_aio_writes_async " << this
+ << " completion " << c << dendl;
+ std::lock_guard l(aio_write_list_lock);
+ ceph_tid_t seq = aio_write_seq;
+ if (aio_write_list.empty()) {
+ ldout(client->cct, 20) << "flush_aio_writes_async no writes. (tid "
+ << seq << ")" << dendl;
+ boost::asio::defer(client->finish_strand, CB_AioCompleteAndSafe(c));
+ } else {
+ ldout(client->cct, 20) << "flush_aio_writes_async " << aio_write_list.size()
+ << " writes in flight; waiting on tid " << seq << dendl;
+ c->get();
+ aio_write_waiters[seq].push_back(c);
+ }
+}
+
+void librados::IoCtxImpl::flush_aio_writes()
+{
+ ldout(client->cct, 20) << "flush_aio_writes" << dendl;
+ std::unique_lock l{aio_write_list_lock};
+ aio_write_cond.wait(l, [seq=aio_write_seq, this] {
+ return (aio_write_list.empty() ||
+ aio_write_list.front()->aio_write_seq > seq);
+ });
+}
+
+string librados::IoCtxImpl::get_cached_pool_name()
+{
+ std::string pn;
+ client->pool_get_name(get_id(), &pn);
+ return pn;
+}
+
+// SNAPS
+
+int librados::IoCtxImpl::snap_create(const char *snapName)
+{
+ int reply;
+ string sName(snapName);
+
+ ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::snap_create::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply);
+ objecter->create_pool_snap(poolid, sName, onfinish);
+
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done; });
+ return reply;
+}
+
+int librados::IoCtxImpl::selfmanaged_snap_create(uint64_t *psnapid)
+{
+ int reply;
+
+ ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::selfmanaged_snap_create::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply);
+ snapid_t snapid;
+ objecter->allocate_selfmanaged_snap(poolid, &snapid, onfinish);
+
+ {
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done; });
+ }
+ if (reply == 0)
+ *psnapid = snapid;
+ return reply;
+}
+
+void librados::IoCtxImpl::aio_selfmanaged_snap_create(uint64_t *snapid,
+ AioCompletionImpl *c)
+{
+ C_aio_selfmanaged_snap_create_Complete *onfinish =
+ new C_aio_selfmanaged_snap_create_Complete(client, c, snapid);
+ objecter->allocate_selfmanaged_snap(poolid, &onfinish->snapid,
+ onfinish);
+}
+
+int librados::IoCtxImpl::snap_remove(const char *snapName)
+{
+ int reply;
+ string sName(snapName);
+
+ ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::snap_remove::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply);
+ objecter->delete_pool_snap(poolid, sName, onfinish);
+ unique_lock l{mylock};
+ cond.wait(l, [&done] { return done; });
+ return reply;
+}
+
+int librados::IoCtxImpl::selfmanaged_snap_rollback_object(const object_t& oid,
+ ::SnapContext& snapc,
+ uint64_t snapid)
+{
+ int reply;
+
+ ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::snap_rollback::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ Context *onack = new C_SafeCond(mylock, cond, &done, &reply);
+
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.rollback(snapid);
+ objecter->mutate(oid, oloc,
+ op, snapc, ceph::real_clock::now(),
+ extra_op_flags,
+ onack, NULL);
+
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done; });
+ return reply;
+}
+
+int librados::IoCtxImpl::rollback(const object_t& oid, const char *snapName)
+{
+ snapid_t snap;
+
+ int r = objecter->pool_snap_by_name(poolid, snapName, &snap);
+ if (r < 0) {
+ return r;
+ }
+
+ return selfmanaged_snap_rollback_object(oid, snapc, snap);
+}
+
+int librados::IoCtxImpl::selfmanaged_snap_remove(uint64_t snapid)
+{
+ int reply;
+
+ ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::selfmanaged_snap_remove::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ objecter->delete_selfmanaged_snap(poolid, snapid_t(snapid),
+ new C_SafeCond(mylock, cond, &done, &reply));
+
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done; });
+ return (int)reply;
+}
+
+void librados::IoCtxImpl::aio_selfmanaged_snap_remove(uint64_t snapid,
+ AioCompletionImpl *c)
+{
+ Context *onfinish = new C_aio_selfmanaged_snap_op_Complete(client, c);
+ objecter->delete_selfmanaged_snap(poolid, snapid, onfinish);
+}
+
+int librados::IoCtxImpl::snap_list(vector<uint64_t> *snaps)
+{
+ return objecter->pool_snap_list(poolid, snaps);
+}
+
+int librados::IoCtxImpl::snap_lookup(const char *name, uint64_t *snapid)
+{
+ return objecter->pool_snap_by_name(poolid, name, (snapid_t *)snapid);
+}
+
+int librados::IoCtxImpl::snap_get_name(uint64_t snapid, std::string *s)
+{
+ pool_snap_info_t info;
+ int ret = objecter->pool_snap_get_info(poolid, snapid, &info);
+ if (ret < 0) {
+ return ret;
+ }
+ *s = info.name.c_str();
+ return 0;
+}
+
+int librados::IoCtxImpl::snap_get_stamp(uint64_t snapid, time_t *t)
+{
+ pool_snap_info_t info;
+ int ret = objecter->pool_snap_get_info(poolid, snapid, &info);
+ if (ret < 0) {
+ return ret;
+ }
+ *t = info.stamp.sec();
+ return 0;
+}
+
+
+// IO
+
+int librados::IoCtxImpl::nlist(Objecter::NListContext *context, int max_entries)
+{
+ bool done;
+ int r = 0;
+ ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::nlist::mylock");
+ ceph::condition_variable cond;
+
+ if (context->at_end())
+ return 0;
+
+ context->max_entries = max_entries;
+ context->nspace = oloc.nspace;
+
+ objecter->list_nobjects(context, new C_SafeCond(mylock, cond, &done, &r));
+
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done; });
+ return r;
+}
+
+uint32_t librados::IoCtxImpl::nlist_seek(Objecter::NListContext *context,
+ uint32_t pos)
+{
+ context->list.clear();
+ return objecter->list_nobjects_seek(context, pos);
+}
+
+uint32_t librados::IoCtxImpl::nlist_seek(Objecter::NListContext *context,
+ const rados_object_list_cursor& cursor)
+{
+ context->list.clear();
+ return objecter->list_nobjects_seek(context, *(const hobject_t *)cursor);
+}
+
+rados_object_list_cursor librados::IoCtxImpl::nlist_get_cursor(Objecter::NListContext *context)
+{
+ hobject_t *c = new hobject_t;
+
+ objecter->list_nobjects_get_cursor(context, c);
+ return (rados_object_list_cursor)c;
+}
+
+int librados::IoCtxImpl::create(const object_t& oid, bool exclusive)
+{
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.create(exclusive);
+ return operate(oid, &op, NULL);
+}
+
+/*
+ * add any version assert operations that are appropriate given the
+ * stat in the IoCtx, either the target version assert or any src
+ * object asserts. these affect a single ioctx operation, so clear
+ * the ioctx state when we're doing.
+ *
+ * return a pointer to the ObjectOperation if we added any events;
+ * this is convenient for passing the extra_ops argument into Objecter
+ * methods.
+ */
+::ObjectOperation *librados::IoCtxImpl::prepare_assert_ops(::ObjectOperation *op)
+{
+ ::ObjectOperation *pop = NULL;
+ if (assert_ver) {
+ op->assert_version(assert_ver);
+ assert_ver = 0;
+ pop = op;
+ }
+ return pop;
+}
+
+int librados::IoCtxImpl::write(const object_t& oid, bufferlist& bl,
+ size_t len, uint64_t off)
+{
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ bufferlist mybl;
+ mybl.substr_of(bl, 0, len);
+ op.write(off, mybl);
+ return operate(oid, &op, NULL);
+}
+
+int librados::IoCtxImpl::append(const object_t& oid, bufferlist& bl, size_t len)
+{
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ bufferlist mybl;
+ mybl.substr_of(bl, 0, len);
+ op.append(mybl);
+ return operate(oid, &op, NULL);
+}
+
+int librados::IoCtxImpl::write_full(const object_t& oid, bufferlist& bl)
+{
+ if (bl.length() > UINT_MAX/2)
+ return -E2BIG;
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.write_full(bl);
+ return operate(oid, &op, NULL);
+}
+
+int librados::IoCtxImpl::writesame(const object_t& oid, bufferlist& bl,
+ size_t write_len, uint64_t off)
+{
+ if ((bl.length() > UINT_MAX/2) || (write_len > UINT_MAX/2))
+ return -E2BIG;
+ if ((bl.length() == 0) || (write_len % bl.length()))
+ return -EINVAL;
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ bufferlist mybl;
+ mybl.substr_of(bl, 0, bl.length());
+ op.writesame(off, write_len, mybl);
+ return operate(oid, &op, NULL);
+}
+
+int librados::IoCtxImpl::operate(const object_t& oid, ::ObjectOperation *o,
+ ceph::real_time *pmtime, int flags)
+{
+ ceph::real_time ut = (pmtime ? *pmtime :
+ ceph::real_clock::now());
+
+ /* can't write to a snapshot */
+ if (snap_seq != CEPH_NOSNAP)
+ return -EROFS;
+
+ if (!o->size())
+ return 0;
+
+ ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::operate::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ int r;
+ version_t ver;
+
+ Context *oncommit = new C_SafeCond(mylock, cond, &done, &r);
+
+ int op = o->ops[0].op.op;
+ ldout(client->cct, 10) << ceph_osd_op_name(op) << " oid=" << oid
+ << " nspace=" << oloc.nspace << dendl;
+ Objecter::Op *objecter_op = objecter->prepare_mutate_op(
+ oid, oloc,
+ *o, snapc, ut,
+ flags | extra_op_flags,
+ oncommit, &ver);
+ objecter->op_submit(objecter_op);
+
+ {
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done;});
+ }
+ ldout(client->cct, 10) << "Objecter returned from "
+ << ceph_osd_op_name(op) << " r=" << r << dendl;
+
+ set_sync_op_version(ver);
+
+ return r;
+}
+
+int librados::IoCtxImpl::operate_read(const object_t& oid,
+ ::ObjectOperation *o,
+ bufferlist *pbl,
+ int flags)
+{
+ if (!o->size())
+ return 0;
+
+ ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::operate_read::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ int r;
+ version_t ver;
+
+ Context *onack = new C_SafeCond(mylock, cond, &done, &r);
+
+ int op = o->ops[0].op.op;
+ ldout(client->cct, 10) << ceph_osd_op_name(op) << " oid=" << oid << " nspace=" << oloc.nspace << dendl;
+ Objecter::Op *objecter_op = objecter->prepare_read_op(
+ oid, oloc,
+ *o, snap_seq, pbl,
+ flags | extra_op_flags,
+ onack, &ver);
+ objecter->op_submit(objecter_op);
+
+ {
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done; });
+ }
+ ldout(client->cct, 10) << "Objecter returned from "
+ << ceph_osd_op_name(op) << " r=" << r << dendl;
+
+ set_sync_op_version(ver);
+
+ return r;
+}
+
+int librados::IoCtxImpl::aio_operate_read(const object_t &oid,
+ ::ObjectOperation *o,
+ AioCompletionImpl *c,
+ int flags,
+ bufferlist *pbl,
+ const blkin_trace_info *trace_info)
+{
+ FUNCTRACE(client->cct);
+ Context *oncomplete = new C_aio_Complete(c);
+
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+ c->is_read = true;
+ c->io = this;
+
+ ZTracer::Trace trace;
+ if (trace_info) {
+ ZTracer::Trace parent_trace("", nullptr, trace_info);
+ trace.init("rados operate read", &objecter->trace_endpoint, &parent_trace);
+ }
+
+ trace.event("init root span");
+ Objecter::Op *objecter_op = objecter->prepare_read_op(
+ oid, oloc,
+ *o, snap_seq, pbl, flags | extra_op_flags,
+ oncomplete, &c->objver, nullptr, 0, &trace);
+ objecter->op_submit(objecter_op, &c->tid);
+ trace.event("rados operate read submitted");
+
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_operate(const object_t& oid,
+ ::ObjectOperation *o, AioCompletionImpl *c,
+ const SnapContext& snap_context, int flags,
+ const blkin_trace_info *trace_info)
+{
+ FUNCTRACE(client->cct);
+ OID_EVENT_TRACE(oid.name.c_str(), "RADOS_WRITE_OP_BEGIN");
+ auto ut = ceph::real_clock::now();
+ /* can't write to a snapshot */
+ if (snap_seq != CEPH_NOSNAP)
+ return -EROFS;
+
+ Context *oncomplete = new C_aio_Complete(c);
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+
+ c->io = this;
+ queue_aio_write(c);
+
+ ZTracer::Trace trace;
+ if (trace_info) {
+ ZTracer::Trace parent_trace("", nullptr, trace_info);
+ trace.init("rados operate", &objecter->trace_endpoint, &parent_trace);
+ }
+
+ trace.event("init root span");
+ Objecter::Op *op = objecter->prepare_mutate_op(
+ oid, oloc, *o, snap_context, ut, flags | extra_op_flags,
+ oncomplete, &c->objver, osd_reqid_t(), &trace);
+ objecter->op_submit(op, &c->tid);
+ trace.event("rados operate op submitted");
+
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_read(const object_t oid, AioCompletionImpl *c,
+ bufferlist *pbl, size_t len, uint64_t off,
+ uint64_t snapid, const blkin_trace_info *info)
+{
+ FUNCTRACE(client->cct);
+ if (len > (size_t) INT_MAX)
+ return -EDOM;
+
+ OID_EVENT_TRACE(oid.name.c_str(), "RADOS_READ_OP_BEGIN");
+ Context *oncomplete = new C_aio_Complete(c);
+
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+ c->is_read = true;
+ c->io = this;
+ c->blp = pbl;
+
+ ZTracer::Trace trace;
+ if (info)
+ trace.init("rados read", &objecter->trace_endpoint, info);
+
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc,
+ off, len, snapid, pbl, extra_op_flags,
+ oncomplete, &c->objver, nullptr, 0, &trace);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_read(const object_t oid, AioCompletionImpl *c,
+ char *buf, size_t len, uint64_t off,
+ uint64_t snapid, const blkin_trace_info *info)
+{
+ FUNCTRACE(client->cct);
+ if (len > (size_t) INT_MAX)
+ return -EDOM;
+
+ OID_EVENT_TRACE(oid.name.c_str(), "RADOS_READ_OP_BEGIN");
+ Context *oncomplete = new C_aio_Complete(c);
+
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+ c->is_read = true;
+ c->io = this;
+ c->bl.clear();
+ c->bl.push_back(buffer::create_static(len, buf));
+ c->blp = &c->bl;
+ c->out_buf = buf;
+
+ ZTracer::Trace trace;
+ if (info)
+ trace.init("rados read", &objecter->trace_endpoint, info);
+
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc,
+ off, len, snapid, &c->bl, extra_op_flags,
+ oncomplete, &c->objver, nullptr, 0, &trace);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+class C_ObjectOperation : public Context {
+public:
+ ::ObjectOperation m_ops;
+ explicit C_ObjectOperation(Context *c) : m_ctx(c) {}
+ void finish(int r) override {
+ m_ctx->complete(r);
+ }
+private:
+ Context *m_ctx;
+};
+
+int librados::IoCtxImpl::aio_sparse_read(const object_t oid,
+ AioCompletionImpl *c,
+ std::map<uint64_t,uint64_t> *m,
+ bufferlist *data_bl, size_t len,
+ uint64_t off, uint64_t snapid)
+{
+ FUNCTRACE(client->cct);
+ if (len > (size_t) INT_MAX)
+ return -EDOM;
+
+ Context *nested = new C_aio_Complete(c);
+ C_ObjectOperation *onack = new C_ObjectOperation(nested);
+
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) nested)->oid = oid;
+#endif
+ c->is_read = true;
+ c->io = this;
+
+ onack->m_ops.sparse_read(off, len, m, data_bl, NULL);
+
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc,
+ onack->m_ops, snapid, NULL, extra_op_flags,
+ onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_cmpext(const object_t& oid,
+ AioCompletionImpl *c,
+ uint64_t off,
+ bufferlist& cmp_bl)
+{
+ if (cmp_bl.length() > UINT_MAX/2)
+ return -E2BIG;
+
+ Context *onack = new C_aio_Complete(c);
+
+ c->is_read = true;
+ c->io = this;
+
+ Objecter::Op *o = objecter->prepare_cmpext_op(
+ oid, oloc, off, cmp_bl, snap_seq, extra_op_flags,
+ onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
+
+ return 0;
+}
+
+/* use m_ops.cmpext() + prepare_read_op() for non-bufferlist C API */
+int librados::IoCtxImpl::aio_cmpext(const object_t& oid,
+ AioCompletionImpl *c,
+ const char *cmp_buf,
+ size_t cmp_len,
+ uint64_t off)
+{
+ if (cmp_len > UINT_MAX/2)
+ return -E2BIG;
+
+ bufferlist cmp_bl;
+ cmp_bl.append(cmp_buf, cmp_len);
+
+ Context *nested = new C_aio_Complete(c);
+ C_ObjectOperation *onack = new C_ObjectOperation(nested);
+
+ c->is_read = true;
+ c->io = this;
+
+ onack->m_ops.cmpext(off, cmp_len, cmp_buf, NULL);
+
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc, onack->m_ops, snap_seq, NULL, extra_op_flags, onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_write(const object_t &oid, AioCompletionImpl *c,
+ const bufferlist& bl, size_t len,
+ uint64_t off, const blkin_trace_info *info)
+{
+ FUNCTRACE(client->cct);
+ auto ut = ceph::real_clock::now();
+ ldout(client->cct, 20) << "aio_write " << oid << " " << off << "~" << len << " snapc=" << snapc << " snap_seq=" << snap_seq << dendl;
+ OID_EVENT_TRACE(oid.name.c_str(), "RADOS_WRITE_OP_BEGIN");
+
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ /* can't write to a snapshot */
+ if (snap_seq != CEPH_NOSNAP)
+ return -EROFS;
+
+ Context *oncomplete = new C_aio_Complete(c);
+
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+ ZTracer::Trace trace;
+ if (info)
+ trace.init("rados write", &objecter->trace_endpoint, info);
+
+ c->io = this;
+ queue_aio_write(c);
+
+ Objecter::Op *o = objecter->prepare_write_op(
+ oid, oloc,
+ off, len, snapc, bl, ut, extra_op_flags,
+ oncomplete, &c->objver, nullptr, 0, &trace);
+ objecter->op_submit(o, &c->tid);
+
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_append(const object_t &oid, AioCompletionImpl *c,
+ const bufferlist& bl, size_t len)
+{
+ FUNCTRACE(client->cct);
+ auto ut = ceph::real_clock::now();
+
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ /* can't write to a snapshot */
+ if (snap_seq != CEPH_NOSNAP)
+ return -EROFS;
+
+ Context *oncomplete = new C_aio_Complete(c);
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+
+ c->io = this;
+ queue_aio_write(c);
+
+ Objecter::Op *o = objecter->prepare_append_op(
+ oid, oloc,
+ len, snapc, bl, ut, extra_op_flags,
+ oncomplete, &c->objver);
+ objecter->op_submit(o, &c->tid);
+
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_write_full(const object_t &oid,
+ AioCompletionImpl *c,
+ const bufferlist& bl)
+{
+ FUNCTRACE(client->cct);
+ auto ut = ceph::real_clock::now();
+
+ if (bl.length() > UINT_MAX/2)
+ return -E2BIG;
+ /* can't write to a snapshot */
+ if (snap_seq != CEPH_NOSNAP)
+ return -EROFS;
+
+ Context *oncomplete = new C_aio_Complete(c);
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+
+ c->io = this;
+ queue_aio_write(c);
+
+ Objecter::Op *o = objecter->prepare_write_full_op(
+ oid, oloc,
+ snapc, bl, ut, extra_op_flags,
+ oncomplete, &c->objver);
+ objecter->op_submit(o, &c->tid);
+
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_writesame(const object_t &oid,
+ AioCompletionImpl *c,
+ const bufferlist& bl,
+ size_t write_len,
+ uint64_t off)
+{
+ FUNCTRACE(client->cct);
+ auto ut = ceph::real_clock::now();
+
+ if ((bl.length() > UINT_MAX/2) || (write_len > UINT_MAX/2))
+ return -E2BIG;
+ if ((bl.length() == 0) || (write_len % bl.length()))
+ return -EINVAL;
+ /* can't write to a snapshot */
+ if (snap_seq != CEPH_NOSNAP)
+ return -EROFS;
+
+ Context *oncomplete = new C_aio_Complete(c);
+
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+ c->io = this;
+ queue_aio_write(c);
+
+ Objecter::Op *o = objecter->prepare_writesame_op(
+ oid, oloc,
+ write_len, off,
+ snapc, bl, ut, extra_op_flags,
+ oncomplete, &c->objver);
+ objecter->op_submit(o, &c->tid);
+
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_remove(const object_t &oid, AioCompletionImpl *c, int flags)
+{
+ FUNCTRACE(client->cct);
+ auto ut = ceph::real_clock::now();
+
+ /* can't write to a snapshot */
+ if (snap_seq != CEPH_NOSNAP)
+ return -EROFS;
+
+ Context *oncomplete = new C_aio_Complete(c);
+
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+ c->io = this;
+ queue_aio_write(c);
+
+ Objecter::Op *o = objecter->prepare_remove_op(
+ oid, oloc,
+ snapc, ut, flags | extra_op_flags,
+ oncomplete, &c->objver);
+ objecter->op_submit(o, &c->tid);
+
+ return 0;
+}
+
+
+int librados::IoCtxImpl::aio_stat(const object_t& oid, AioCompletionImpl *c,
+ uint64_t *psize, time_t *pmtime)
+{
+ C_aio_stat_Ack *onack = new C_aio_stat_Ack(c, pmtime);
+ c->is_read = true;
+ c->io = this;
+ Objecter::Op *o = objecter->prepare_stat_op(
+ oid, oloc,
+ snap_seq, psize, &onack->mtime, extra_op_flags,
+ onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_stat2(const object_t& oid, AioCompletionImpl *c,
+ uint64_t *psize, struct timespec *pts)
+{
+ C_aio_stat2_Ack *onack = new C_aio_stat2_Ack(c, pts);
+ c->is_read = true;
+ c->io = this;
+ Objecter::Op *o = objecter->prepare_stat_op(
+ oid, oloc,
+ snap_seq, psize, &onack->mtime, extra_op_flags,
+ onack, &c->objver);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_getxattr(const object_t& oid, AioCompletionImpl *c,
+ const char *name, bufferlist& bl)
+{
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.getxattr(name, &bl, NULL);
+ int r = aio_operate_read(oid, &rd, c, 0, &bl);
+ return r;
+}
+
+int librados::IoCtxImpl::aio_rmxattr(const object_t& oid, AioCompletionImpl *c,
+ const char *name)
+{
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.rmxattr(name);
+ return aio_operate(oid, &op, c, snapc, 0);
+}
+
+int librados::IoCtxImpl::aio_setxattr(const object_t& oid, AioCompletionImpl *c,
+ const char *name, bufferlist& bl)
+{
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.setxattr(name, bl);
+ return aio_operate(oid, &op, c, snapc, 0);
+}
+
+namespace {
+struct AioGetxattrsData {
+ AioGetxattrsData(librados::AioCompletionImpl *c, map<string, bufferlist>* attrset,
+ librados::RadosClient *_client) :
+ user_completion(c), user_attrset(attrset), client(_client) {}
+ struct librados::CB_AioCompleteAndSafe user_completion;
+ map<string, bufferlist> result_attrset;
+ map<std::string, bufferlist>* user_attrset;
+ librados::RadosClient *client;
+};
+}
+
+static void aio_getxattrs_complete(rados_completion_t c, void *arg) {
+ AioGetxattrsData *cdata = reinterpret_cast<AioGetxattrsData*>(arg);
+ int rc = rados_aio_get_return_value(c);
+ cdata->user_attrset->clear();
+ if (rc >= 0) {
+ for (map<string,bufferlist>::iterator p = cdata->result_attrset.begin();
+ p != cdata->result_attrset.end();
+ ++p) {
+ ldout(cdata->client->cct, 10) << "IoCtxImpl::getxattrs: xattr=" << p->first << dendl;
+ (*cdata->user_attrset)[p->first] = p->second;
+ }
+ }
+ cdata->user_completion(rc);
+ ((librados::AioCompletionImpl*)c)->put();
+ delete cdata;
+}
+
+int librados::IoCtxImpl::aio_getxattrs(const object_t& oid, AioCompletionImpl *c,
+ map<std::string, bufferlist>& attrset)
+{
+ AioGetxattrsData *cdata = new AioGetxattrsData(c, &attrset, client);
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.getxattrs(&cdata->result_attrset, NULL);
+ librados::AioCompletionImpl *comp = new librados::AioCompletionImpl;
+ comp->set_complete_callback(cdata, aio_getxattrs_complete);
+ return aio_operate_read(oid, &rd, comp, 0, NULL);
+}
+
+int librados::IoCtxImpl::aio_cancel(AioCompletionImpl *c)
+{
+ return objecter->op_cancel(c->tid, -ECANCELED);
+}
+
+
+int librados::IoCtxImpl::hit_set_list(uint32_t hash, AioCompletionImpl *c,
+ std::list< std::pair<time_t, time_t> > *pls)
+{
+ Context *oncomplete = new C_aio_Complete(c);
+ c->is_read = true;
+ c->io = this;
+
+ ::ObjectOperation rd;
+ rd.hit_set_ls(pls, NULL);
+ object_locator_t oloc(poolid);
+ Objecter::Op *o = objecter->prepare_pg_read_op(
+ hash, oloc, rd, NULL, extra_op_flags, oncomplete, NULL, NULL);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::hit_set_get(uint32_t hash, AioCompletionImpl *c,
+ time_t stamp,
+ bufferlist *pbl)
+{
+ Context *oncomplete = new C_aio_Complete(c);
+ c->is_read = true;
+ c->io = this;
+
+ ::ObjectOperation rd;
+ rd.hit_set_get(ceph::real_clock::from_time_t(stamp), pbl, 0);
+ object_locator_t oloc(poolid);
+ Objecter::Op *o = objecter->prepare_pg_read_op(
+ hash, oloc, rd, NULL, extra_op_flags, oncomplete, NULL, NULL);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::remove(const object_t& oid)
+{
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.remove();
+ return operate(oid, &op, nullptr, librados::OPERATION_FULL_FORCE);
+}
+
+int librados::IoCtxImpl::remove(const object_t& oid, int flags)
+{
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.remove();
+ return operate(oid, &op, NULL, flags);
+}
+
+int librados::IoCtxImpl::trunc(const object_t& oid, uint64_t size)
+{
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.truncate(size);
+ return operate(oid, &op, NULL);
+}
+
+int librados::IoCtxImpl::get_inconsistent_objects(const pg_t& pg,
+ const librados::object_id_t& start_after,
+ uint64_t max_to_get,
+ AioCompletionImpl *c,
+ std::vector<inconsistent_obj_t>* objects,
+ uint32_t* interval)
+{
+ Context *oncomplete = new C_aio_Complete(c);
+ c->is_read = true;
+ c->io = this;
+
+ ::ObjectOperation op;
+ op.scrub_ls(start_after, max_to_get, objects, interval, &c->rval);
+ object_locator_t oloc{poolid, pg.ps()};
+ Objecter::Op *o = objecter->prepare_pg_read_op(
+ oloc.hash, oloc, op, nullptr, CEPH_OSD_FLAG_PGOP | extra_op_flags, oncomplete,
+ nullptr, nullptr);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::get_inconsistent_snapsets(const pg_t& pg,
+ const librados::object_id_t& start_after,
+ uint64_t max_to_get,
+ AioCompletionImpl *c,
+ std::vector<inconsistent_snapset_t>* snapsets,
+ uint32_t* interval)
+{
+ Context *oncomplete = new C_aio_Complete(c);
+ c->is_read = true;
+ c->io = this;
+
+ ::ObjectOperation op;
+ op.scrub_ls(start_after, max_to_get, snapsets, interval, &c->rval);
+ object_locator_t oloc{poolid, pg.ps()};
+ Objecter::Op *o = objecter->prepare_pg_read_op(
+ oloc.hash, oloc, op, nullptr, CEPH_OSD_FLAG_PGOP | extra_op_flags, oncomplete,
+ nullptr, nullptr);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::tmap_update(const object_t& oid, bufferlist& cmdbl)
+{
+ ::ObjectOperation wr;
+ prepare_assert_ops(&wr);
+ wr.tmap_update(cmdbl);
+ return operate(oid, &wr, NULL);
+}
+
+int librados::IoCtxImpl::exec(const object_t& oid,
+ const char *cls, const char *method,
+ bufferlist& inbl, bufferlist& outbl)
+{
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.call(cls, method, inbl);
+ return operate_read(oid, &rd, &outbl);
+}
+
+int librados::IoCtxImpl::aio_exec(const object_t& oid, AioCompletionImpl *c,
+ const char *cls, const char *method,
+ bufferlist& inbl, bufferlist *outbl)
+{
+ FUNCTRACE(client->cct);
+ Context *oncomplete = new C_aio_Complete(c);
+
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+ c->is_read = true;
+ c->io = this;
+
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.call(cls, method, inbl);
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc, rd, snap_seq, outbl, extra_op_flags, oncomplete, &c->objver);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::aio_exec(const object_t& oid, AioCompletionImpl *c,
+ const char *cls, const char *method,
+ bufferlist& inbl, char *buf, size_t out_len)
+{
+ FUNCTRACE(client->cct);
+ Context *oncomplete = new C_aio_Complete(c);
+
+#if defined(WITH_EVENTTRACE)
+ ((C_aio_Complete *) oncomplete)->oid = oid;
+#endif
+ c->is_read = true;
+ c->io = this;
+ c->bl.clear();
+ c->bl.push_back(buffer::create_static(out_len, buf));
+ c->blp = &c->bl;
+ c->out_buf = buf;
+
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.call(cls, method, inbl);
+ Objecter::Op *o = objecter->prepare_read_op(
+ oid, oloc, rd, snap_seq, &c->bl, extra_op_flags, oncomplete, &c->objver);
+ objecter->op_submit(o, &c->tid);
+ return 0;
+}
+
+int librados::IoCtxImpl::read(const object_t& oid,
+ bufferlist& bl, size_t len, uint64_t off)
+{
+ if (len > (size_t) INT_MAX)
+ return -EDOM;
+ OID_EVENT_TRACE(oid.name.c_str(), "RADOS_READ_OP_BEGIN");
+
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.read(off, len, &bl, NULL, NULL);
+ int r = operate_read(oid, &rd, &bl);
+ if (r < 0)
+ return r;
+
+ if (bl.length() < len) {
+ ldout(client->cct, 10) << "Returned length " << bl.length()
+ << " less than original length "<< len << dendl;
+ }
+
+ return bl.length();
+}
+
+int librados::IoCtxImpl::cmpext(const object_t& oid, uint64_t off,
+ bufferlist& cmp_bl)
+{
+ if (cmp_bl.length() > UINT_MAX/2)
+ return -E2BIG;
+
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.cmpext(off, cmp_bl, NULL);
+ return operate_read(oid, &op, NULL);
+}
+
+int librados::IoCtxImpl::mapext(const object_t& oid,
+ uint64_t off, size_t len,
+ std::map<uint64_t,uint64_t>& m)
+{
+ bufferlist bl;
+
+ ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::read::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ int r;
+ Context *onack = new C_SafeCond(mylock, cond, &done, &r);
+
+ objecter->mapext(oid, oloc,
+ off, len, snap_seq, &bl, extra_op_flags,
+ onack);
+
+ {
+ unique_lock l{mylock};
+ cond.wait(l, [&done] { return done;});
+ }
+ ldout(client->cct, 10) << "Objecter returned from read r=" << r << dendl;
+
+ if (r < 0)
+ return r;
+
+ auto iter = bl.cbegin();
+ decode(m, iter);
+
+ return m.size();
+}
+
+int librados::IoCtxImpl::sparse_read(const object_t& oid,
+ std::map<uint64_t,uint64_t>& m,
+ bufferlist& data_bl, size_t len,
+ uint64_t off)
+{
+ if (len > (size_t) INT_MAX)
+ return -EDOM;
+
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.sparse_read(off, len, &m, &data_bl, NULL);
+
+ int r = operate_read(oid, &rd, NULL);
+ if (r < 0)
+ return r;
+
+ return m.size();
+}
+
+int librados::IoCtxImpl::checksum(const object_t& oid, uint8_t type,
+ const bufferlist &init_value, size_t len,
+ uint64_t off, size_t chunk_size,
+ bufferlist *pbl)
+{
+ if (len > (size_t) INT_MAX) {
+ return -EDOM;
+ }
+
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.checksum(type, init_value, off, len, chunk_size, pbl, nullptr, nullptr);
+
+ int r = operate_read(oid, &rd, nullptr);
+ if (r < 0) {
+ return r;
+ }
+
+ return 0;
+}
+
+int librados::IoCtxImpl::stat(const object_t& oid, uint64_t *psize, time_t *pmtime)
+{
+ uint64_t size;
+ real_time mtime;
+
+ if (!psize)
+ psize = &size;
+
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.stat(psize, &mtime, nullptr);
+ int r = operate_read(oid, &rd, NULL);
+
+ if (r >= 0 && pmtime) {
+ *pmtime = real_clock::to_time_t(mtime);
+ }
+
+ return r;
+}
+
+int librados::IoCtxImpl::stat2(const object_t& oid, uint64_t *psize, struct timespec *pts)
+{
+ uint64_t size;
+ ceph::real_time mtime;
+
+ if (!psize)
+ psize = &size;
+
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.stat(psize, &mtime, nullptr);
+ int r = operate_read(oid, &rd, NULL);
+ if (r < 0) {
+ return r;
+ }
+
+ if (pts) {
+ *pts = ceph::real_clock::to_timespec(mtime);
+ }
+
+ return 0;
+}
+
+int librados::IoCtxImpl::getxattr(const object_t& oid,
+ const char *name, bufferlist& bl)
+{
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.getxattr(name, &bl, NULL);
+ int r = operate_read(oid, &rd, &bl);
+ if (r < 0)
+ return r;
+
+ return bl.length();
+}
+
+int librados::IoCtxImpl::rmxattr(const object_t& oid, const char *name)
+{
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.rmxattr(name);
+ return operate(oid, &op, NULL);
+}
+
+int librados::IoCtxImpl::setxattr(const object_t& oid,
+ const char *name, bufferlist& bl)
+{
+ ::ObjectOperation op;
+ prepare_assert_ops(&op);
+ op.setxattr(name, bl);
+ return operate(oid, &op, NULL);
+}
+
+int librados::IoCtxImpl::getxattrs(const object_t& oid,
+ map<std::string, bufferlist>& attrset)
+{
+ map<string, bufferlist> aset;
+
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.getxattrs(&aset, NULL);
+ int r = operate_read(oid, &rd, NULL);
+
+ attrset.clear();
+ if (r >= 0) {
+ for (map<string,bufferlist>::iterator p = aset.begin(); p != aset.end(); ++p) {
+ ldout(client->cct, 10) << "IoCtxImpl::getxattrs: xattr=" << p->first << dendl;
+ attrset[p->first.c_str()] = p->second;
+ }
+ }
+
+ return r;
+}
+
+void librados::IoCtxImpl::set_sync_op_version(version_t ver)
+{
+ ANNOTATE_BENIGN_RACE_SIZED(&last_objver, sizeof(last_objver),
+ "IoCtxImpl last_objver");
+ last_objver = ver;
+}
+
+namespace librados {
+void intrusive_ptr_add_ref(IoCtxImpl *p) { p->get(); }
+void intrusive_ptr_release(IoCtxImpl *p) { p->put(); }
+}
+
+struct WatchInfo {
+ boost::intrusive_ptr<librados::IoCtxImpl> ioctx;
+ object_t oid;
+ librados::WatchCtx *ctx;
+ librados::WatchCtx2 *ctx2;
+
+ WatchInfo(librados::IoCtxImpl *io, object_t o,
+ librados::WatchCtx *c, librados::WatchCtx2 *c2)
+ : ioctx(io), oid(o), ctx(c), ctx2(c2) {}
+
+ void handle_notify(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist& bl) {
+ ldout(ioctx->client->cct, 10) << __func__ << " " << notify_id
+ << " cookie " << cookie
+ << " notifier_id " << notifier_id
+ << " len " << bl.length()
+ << dendl;
+
+ if (ctx2)
+ ctx2->handle_notify(notify_id, cookie, notifier_id, bl);
+ if (ctx) {
+ ctx->notify(0, 0, bl);
+
+ // send ACK back to OSD if using legacy protocol
+ bufferlist empty;
+ ioctx->notify_ack(oid, notify_id, cookie, empty);
+ }
+ }
+ void handle_error(uint64_t cookie, int err) {
+ ldout(ioctx->client->cct, 10) << __func__ << " cookie " << cookie
+ << " err " << err
+ << dendl;
+ if (ctx2)
+ ctx2->handle_error(cookie, err);
+ }
+
+ void operator()(bs::error_code ec,
+ uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_id,
+ bufferlist&& bl) {
+ if (ec) {
+ handle_error(cookie, ceph::from_error_code(ec));
+ } else {
+ handle_notify(notify_id, cookie, notifier_id, bl);
+ }
+ }
+};
+
+// internal WatchInfo that owns the context memory
+struct InternalWatchInfo : public WatchInfo {
+ std::unique_ptr<librados::WatchCtx> ctx;
+ std::unique_ptr<librados::WatchCtx2> ctx2;
+
+ InternalWatchInfo(librados::IoCtxImpl *io, object_t o,
+ librados::WatchCtx *c, librados::WatchCtx2 *c2)
+ : WatchInfo(io, o, c, c2), ctx(c), ctx2(c2) {}
+};
+
+int librados::IoCtxImpl::watch(const object_t& oid, uint64_t *handle,
+ librados::WatchCtx *ctx,
+ librados::WatchCtx2 *ctx2,
+ bool internal)
+{
+ return watch(oid, handle, ctx, ctx2, 0, internal);
+}
+
+int librados::IoCtxImpl::watch(const object_t& oid, uint64_t *handle,
+ librados::WatchCtx *ctx,
+ librados::WatchCtx2 *ctx2,
+ uint32_t timeout,
+ bool internal)
+{
+ ::ObjectOperation wr;
+ version_t objver;
+ C_SaferCond onfinish;
+
+ Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc,
+ extra_op_flags);
+ *handle = linger_op->get_cookie();
+ if (internal) {
+ linger_op->handle = InternalWatchInfo(this, oid, ctx, ctx2);
+ } else {
+ linger_op->handle = WatchInfo(this, oid, ctx, ctx2);
+ }
+ prepare_assert_ops(&wr);
+ wr.watch(*handle, CEPH_OSD_WATCH_OP_WATCH, timeout);
+ bufferlist bl;
+ objecter->linger_watch(linger_op, wr,
+ snapc, ceph::real_clock::now(), bl,
+ &onfinish,
+ &objver);
+
+ int r = onfinish.wait();
+
+ set_sync_op_version(objver);
+
+ if (r < 0) {
+ objecter->linger_cancel(linger_op);
+ *handle = 0;
+ }
+
+ return r;
+}
+
+int librados::IoCtxImpl::aio_watch(const object_t& oid,
+ AioCompletionImpl *c,
+ uint64_t *handle,
+ librados::WatchCtx *ctx,
+ librados::WatchCtx2 *ctx2,
+ bool internal) {
+ return aio_watch(oid, c, handle, ctx, ctx2, 0, internal);
+}
+
+int librados::IoCtxImpl::aio_watch(const object_t& oid,
+ AioCompletionImpl *c,
+ uint64_t *handle,
+ librados::WatchCtx *ctx,
+ librados::WatchCtx2 *ctx2,
+ uint32_t timeout,
+ bool internal)
+{
+ Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc,
+ extra_op_flags);
+ c->io = this;
+ Context *oncomplete = new C_aio_linger_Complete(c, linger_op, false);
+
+ ::ObjectOperation wr;
+ *handle = linger_op->get_cookie();
+ if (internal) {
+ linger_op->handle = InternalWatchInfo(this, oid, ctx, ctx2);
+ } else {
+ linger_op->handle = WatchInfo(this, oid, ctx, ctx2);
+ }
+
+ prepare_assert_ops(&wr);
+ wr.watch(*handle, CEPH_OSD_WATCH_OP_WATCH, timeout);
+ bufferlist bl;
+ objecter->linger_watch(linger_op, wr,
+ snapc, ceph::real_clock::now(), bl,
+ oncomplete, &c->objver);
+
+ return 0;
+}
+
+
+int librados::IoCtxImpl::notify_ack(
+ const object_t& oid,
+ uint64_t notify_id,
+ uint64_t cookie,
+ bufferlist& bl)
+{
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ rd.notify_ack(notify_id, cookie, bl);
+ objecter->read(oid, oloc, rd, snap_seq, (bufferlist*)NULL, extra_op_flags, 0, 0);
+ return 0;
+}
+
+int librados::IoCtxImpl::watch_check(uint64_t cookie)
+{
+ auto linger_op = reinterpret_cast<Objecter::LingerOp*>(cookie);
+ auto r = objecter->linger_check(linger_op);
+ if (r)
+ return 1 + std::chrono::duration_cast<
+ std::chrono::milliseconds>(*r).count();
+ else
+ return ceph::from_error_code(r.error());
+}
+
+int librados::IoCtxImpl::unwatch(uint64_t cookie)
+{
+ Objecter::LingerOp *linger_op = reinterpret_cast<Objecter::LingerOp*>(cookie);
+ C_SaferCond onfinish;
+ version_t ver = 0;
+
+ ::ObjectOperation wr;
+ prepare_assert_ops(&wr);
+ wr.watch(cookie, CEPH_OSD_WATCH_OP_UNWATCH);
+ objecter->mutate(linger_op->target.base_oid, oloc, wr,
+ snapc, ceph::real_clock::now(), extra_op_flags,
+ &onfinish, &ver);
+ objecter->linger_cancel(linger_op);
+
+ int r = onfinish.wait();
+ set_sync_op_version(ver);
+ return r;
+}
+
+int librados::IoCtxImpl::aio_unwatch(uint64_t cookie, AioCompletionImpl *c)
+{
+ c->io = this;
+ Objecter::LingerOp *linger_op = reinterpret_cast<Objecter::LingerOp*>(cookie);
+ Context *oncomplete = new C_aio_linger_Complete(c, linger_op, true);
+
+ ::ObjectOperation wr;
+ prepare_assert_ops(&wr);
+ wr.watch(cookie, CEPH_OSD_WATCH_OP_UNWATCH);
+ objecter->mutate(linger_op->target.base_oid, oloc, wr,
+ snapc, ceph::real_clock::now(), extra_op_flags,
+ oncomplete, &c->objver);
+ return 0;
+}
+
+int librados::IoCtxImpl::notify(const object_t& oid, bufferlist& bl,
+ uint64_t timeout_ms,
+ bufferlist *preply_bl,
+ char **preply_buf, size_t *preply_buf_len)
+{
+ Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc,
+ extra_op_flags);
+
+ C_SaferCond notify_finish_cond;
+ linger_op->on_notify_finish =
+ Objecter::LingerOp::OpComp::create(
+ objecter->service.get_executor(),
+ CB_notify_Finish(client->cct, &notify_finish_cond,
+ objecter, linger_op, preply_bl,
+ preply_buf, preply_buf_len));
+ uint32_t timeout = notify_timeout;
+ if (timeout_ms)
+ timeout = timeout_ms / 1000;
+
+ // Construct RADOS op
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ bufferlist inbl;
+ rd.notify(linger_op->get_cookie(), 1, timeout, bl, &inbl);
+
+ // Issue RADOS op
+ C_SaferCond onack;
+ version_t objver;
+ objecter->linger_notify(linger_op,
+ rd, snap_seq, inbl, NULL,
+ &onack, &objver);
+
+ ldout(client->cct, 10) << __func__ << " issued linger op " << linger_op << dendl;
+ int r = onack.wait();
+ ldout(client->cct, 10) << __func__ << " linger op " << linger_op
+ << " acked (" << r << ")" << dendl;
+
+ if (r == 0) {
+ ldout(client->cct, 10) << __func__ << " waiting for watch_notify finish "
+ << linger_op << dendl;
+ r = notify_finish_cond.wait();
+
+ } else {
+ ldout(client->cct, 10) << __func__ << " failed to initiate notify, r = "
+ << r << dendl;
+ notify_finish_cond.wait();
+ }
+
+ objecter->linger_cancel(linger_op);
+
+ set_sync_op_version(objver);
+ return r;
+}
+
+int librados::IoCtxImpl::aio_notify(const object_t& oid, AioCompletionImpl *c,
+ bufferlist& bl, uint64_t timeout_ms,
+ bufferlist *preply_bl, char **preply_buf,
+ size_t *preply_buf_len)
+{
+ Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc,
+ extra_op_flags);
+
+ c->io = this;
+
+ C_aio_notify_Complete *oncomplete = new C_aio_notify_Complete(c, linger_op);
+ linger_op->on_notify_finish =
+ Objecter::LingerOp::OpComp::create(
+ objecter->service.get_executor(),
+ CB_notify_Finish(client->cct, oncomplete,
+ objecter, linger_op,
+ preply_bl, preply_buf,
+ preply_buf_len));
+ Context *onack = new C_aio_notify_Ack(client->cct, oncomplete);
+
+ uint32_t timeout = notify_timeout;
+ if (timeout_ms)
+ timeout = timeout_ms / 1000;
+
+ // Construct RADOS op
+ ::ObjectOperation rd;
+ prepare_assert_ops(&rd);
+ bufferlist inbl;
+ rd.notify(linger_op->get_cookie(), 1, timeout, bl, &inbl);
+
+ // Issue RADOS op
+ objecter->linger_notify(linger_op,
+ rd, snap_seq, inbl, NULL,
+ onack, &c->objver);
+ return 0;
+}
+
+int librados::IoCtxImpl::set_alloc_hint(const object_t& oid,
+ uint64_t expected_object_size,
+ uint64_t expected_write_size,
+ uint32_t flags)
+{
+ ::ObjectOperation wr;
+ prepare_assert_ops(&wr);
+ wr.set_alloc_hint(expected_object_size, expected_write_size, flags);
+ return operate(oid, &wr, NULL);
+}
+
+version_t librados::IoCtxImpl::last_version()
+{
+ return last_objver;
+}
+
+void librados::IoCtxImpl::set_assert_version(uint64_t ver)
+{
+ assert_ver = ver;
+}
+
+void librados::IoCtxImpl::set_notify_timeout(uint32_t timeout)
+{
+ notify_timeout = timeout;
+}
+
+int librados::IoCtxImpl::cache_pin(const object_t& oid)
+{
+ ::ObjectOperation wr;
+ prepare_assert_ops(&wr);
+ wr.cache_pin();
+ return operate(oid, &wr, NULL);
+}
+
+int librados::IoCtxImpl::cache_unpin(const object_t& oid)
+{
+ ::ObjectOperation wr;
+ prepare_assert_ops(&wr);
+ wr.cache_unpin();
+ return operate(oid, &wr, NULL);
+}
+
+
+///////////////////////////// C_aio_stat_Ack ////////////////////////////
+
+librados::IoCtxImpl::C_aio_stat_Ack::C_aio_stat_Ack(AioCompletionImpl *_c,
+ time_t *pm)
+ : c(_c), pmtime(pm)
+{
+ ceph_assert(!c->io);
+ c->get();
+}
+
+void librados::IoCtxImpl::C_aio_stat_Ack::finish(int r)
+{
+ c->lock.lock();
+ c->rval = r;
+ c->complete = true;
+ c->cond.notify_all();
+
+ if (r >= 0 && pmtime) {
+ *pmtime = real_clock::to_time_t(mtime);
+ }
+
+ if (c->callback_complete) {
+ boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c));
+ }
+
+ c->put_unlock();
+}
+
+///////////////////////////// C_aio_stat2_Ack ////////////////////////////
+
+librados::IoCtxImpl::C_aio_stat2_Ack::C_aio_stat2_Ack(AioCompletionImpl *_c,
+ struct timespec *pt)
+ : c(_c), pts(pt)
+{
+ ceph_assert(!c->io);
+ c->get();
+}
+
+void librados::IoCtxImpl::C_aio_stat2_Ack::finish(int r)
+{
+ c->lock.lock();
+ c->rval = r;
+ c->complete = true;
+ c->cond.notify_all();
+
+ if (r >= 0 && pts) {
+ *pts = real_clock::to_timespec(mtime);
+ }
+
+ if (c->callback_complete) {
+ boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c));
+ }
+
+ c->put_unlock();
+}
+
+//////////////////////////// C_aio_Complete ////////////////////////////////
+
+librados::IoCtxImpl::C_aio_Complete::C_aio_Complete(AioCompletionImpl *_c)
+ : c(_c)
+{
+ c->get();
+}
+
+void librados::IoCtxImpl::C_aio_Complete::finish(int r)
+{
+ c->lock.lock();
+ // Leave an existing rval unless r != 0
+ if (r)
+ c->rval = r; // This clears the error set in C_ObjectOperation_scrub_ls::finish()
+ c->complete = true;
+ c->cond.notify_all();
+
+ if (r == 0 && c->blp && c->blp->length() > 0) {
+ if (c->out_buf && !c->blp->is_contiguous()) {
+ c->rval = -ERANGE;
+ } else {
+ if (c->out_buf && !c->blp->is_provided_buffer(c->out_buf))
+ c->blp->begin().copy(c->blp->length(), c->out_buf);
+
+ c->rval = c->blp->length();
+ }
+ }
+
+ if (c->callback_complete ||
+ c->callback_safe) {
+ boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c));
+ }
+
+ if (c->aio_write_seq) {
+ c->io->complete_aio_write(c);
+ }
+
+#if defined(WITH_EVENTTRACE)
+ OID_EVENT_TRACE(oid.name.c_str(), "RADOS_OP_COMPLETE");
+#endif
+ c->put_unlock();
+}
+
+void librados::IoCtxImpl::object_list_slice(
+ const hobject_t start,
+ const hobject_t finish,
+ const size_t n,
+ const size_t m,
+ hobject_t *split_start,
+ hobject_t *split_finish)
+{
+ if (start.is_max()) {
+ *split_start = hobject_t::get_max();
+ *split_finish = hobject_t::get_max();
+ return;
+ }
+
+ uint64_t start_hash = hobject_t::_reverse_bits(start.get_hash());
+ uint64_t finish_hash =
+ finish.is_max() ? 0x100000000 :
+ hobject_t::_reverse_bits(finish.get_hash());
+
+ uint64_t diff = finish_hash - start_hash;
+ uint64_t rev_start = start_hash + (diff * n / m);
+ uint64_t rev_finish = start_hash + (diff * (n + 1) / m);
+ if (n == 0) {
+ *split_start = start;
+ } else {
+ *split_start = hobject_t(
+ object_t(), string(), CEPH_NOSNAP,
+ hobject_t::_reverse_bits(rev_start), poolid, string());
+ }
+
+ if (n == m - 1)
+ *split_finish = finish;
+ else if (rev_finish >= 0x100000000)
+ *split_finish = hobject_t::get_max();
+ else
+ *split_finish = hobject_t(
+ object_t(), string(), CEPH_NOSNAP,
+ hobject_t::_reverse_bits(rev_finish), poolid, string());
+}
+
+int librados::IoCtxImpl::application_enable(const std::string& app_name,
+ bool force)
+{
+ auto c = new PoolAsyncCompletionImpl();
+ application_enable_async(app_name, force, c);
+
+ int r = c->wait();
+ ceph_assert(r == 0);
+
+ r = c->get_return_value();
+ c->release();
+ c->put();
+ if (r < 0) {
+ return r;
+ }
+
+ return client->wait_for_latest_osdmap();
+}
+
+void librados::IoCtxImpl::application_enable_async(const std::string& app_name,
+ bool force,
+ PoolAsyncCompletionImpl *c)
+{
+ // pre-Luminous clusters will return -EINVAL and application won't be
+ // preserved until Luminous is configured as minimim version.
+ if (!client->get_required_monitor_features().contains_all(
+ ceph::features::mon::FEATURE_LUMINOUS)) {
+ boost::asio::defer(client->finish_strand,
+ [cb = CB_PoolAsync_Safe(c)]() mutable {
+ cb(-EOPNOTSUPP);
+ });
+ return;
+ }
+
+ std::stringstream cmd;
+ cmd << "{"
+ << "\"prefix\": \"osd pool application enable\","
+ << "\"pool\": \"" << get_cached_pool_name() << "\","
+ << "\"app\": \"" << app_name << "\"";
+ if (force) {
+ cmd << ",\"yes_i_really_mean_it\": true";
+ }
+ cmd << "}";
+
+ std::vector<std::string> cmds;
+ cmds.push_back(cmd.str());
+ bufferlist inbl;
+ client->mon_command_async(cmds, inbl, nullptr, nullptr,
+ make_lambda_context(CB_PoolAsync_Safe(c)));
+}
+
+int librados::IoCtxImpl::application_list(std::set<std::string> *app_names)
+{
+ int r = 0;
+ app_names->clear();
+ objecter->with_osdmap([&](const OSDMap& o) {
+ auto pg_pool = o.get_pg_pool(poolid);
+ if (pg_pool == nullptr) {
+ r = -ENOENT;
+ return;
+ }
+
+ for (auto &pair : pg_pool->application_metadata) {
+ app_names->insert(pair.first);
+ }
+ });
+ return r;
+}
+
+int librados::IoCtxImpl::application_metadata_get(const std::string& app_name,
+ const std::string &key,
+ std::string* value)
+{
+ int r = 0;
+ objecter->with_osdmap([&](const OSDMap& o) {
+ auto pg_pool = o.get_pg_pool(poolid);
+ if (pg_pool == nullptr) {
+ r = -ENOENT;
+ return;
+ }
+
+ auto app_it = pg_pool->application_metadata.find(app_name);
+ if (app_it == pg_pool->application_metadata.end()) {
+ r = -ENOENT;
+ return;
+ }
+
+ auto it = app_it->second.find(key);
+ if (it == app_it->second.end()) {
+ r = -ENOENT;
+ return;
+ }
+
+ *value = it->second;
+ });
+ return r;
+}
+
+int librados::IoCtxImpl::application_metadata_set(const std::string& app_name,
+ const std::string &key,
+ const std::string& value)
+{
+ std::stringstream cmd;
+ cmd << "{"
+ << "\"prefix\":\"osd pool application set\","
+ << "\"pool\":\"" << get_cached_pool_name() << "\","
+ << "\"app\":\"" << app_name << "\","
+ << "\"key\":\"" << key << "\","
+ << "\"value\":\"" << value << "\""
+ << "}";
+
+ std::vector<std::string> cmds;
+ cmds.push_back(cmd.str());
+ bufferlist inbl;
+ int r = client->mon_command(cmds, inbl, nullptr, nullptr);
+ if (r < 0) {
+ return r;
+ }
+
+ // ensure we have the latest osd map epoch before proceeding
+ return client->wait_for_latest_osdmap();
+}
+
+int librados::IoCtxImpl::application_metadata_remove(const std::string& app_name,
+ const std::string &key)
+{
+ std::stringstream cmd;
+ cmd << "{"
+ << "\"prefix\":\"osd pool application rm\","
+ << "\"pool\":\"" << get_cached_pool_name() << "\","
+ << "\"app\":\"" << app_name << "\","
+ << "\"key\":\"" << key << "\""
+ << "}";
+
+ std::vector<std::string> cmds;
+ cmds.push_back(cmd.str());
+ bufferlist inbl;
+ int r = client->mon_command(cmds, inbl, nullptr, nullptr);
+ if (r < 0) {
+ return r;
+ }
+
+ // ensure we have the latest osd map epoch before proceeding
+ return client->wait_for_latest_osdmap();
+}
+
+int librados::IoCtxImpl::application_metadata_list(const std::string& app_name,
+ std::map<std::string, std::string> *values)
+{
+ int r = 0;
+ values->clear();
+ objecter->with_osdmap([&](const OSDMap& o) {
+ auto pg_pool = o.get_pg_pool(poolid);
+ if (pg_pool == nullptr) {
+ r = -ENOENT;
+ return;
+ }
+
+ auto it = pg_pool->application_metadata.find(app_name);
+ if (it == pg_pool->application_metadata.end()) {
+ r = -ENOENT;
+ return;
+ }
+
+ *values = it->second;
+ });
+ return r;
+}
+
diff --git a/src/librados/IoCtxImpl.h b/src/librados/IoCtxImpl.h
new file mode 100644
index 000000000..afc5de6ff
--- /dev/null
+++ b/src/librados/IoCtxImpl.h
@@ -0,0 +1,299 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIBRADOS_IOCTXIMPL_H
+#define CEPH_LIBRADOS_IOCTXIMPL_H
+
+#include <atomic>
+
+#include "common/Cond.h"
+#include "common/ceph_mutex.h"
+#include "common/snap_types.h"
+#include "common/zipkin_trace.h"
+#include "include/types.h"
+#include "include/rados/librados.h"
+#include "include/rados/librados.hpp"
+#include "include/xlist.h"
+#include "osd/osd_types.h"
+#include "osdc/Objecter.h"
+
+class RadosClient;
+
+struct librados::IoCtxImpl {
+ std::atomic<uint64_t> ref_cnt = { 0 };
+ RadosClient *client = nullptr;
+ int64_t poolid = 0;
+ snapid_t snap_seq;
+ ::SnapContext snapc;
+ uint64_t assert_ver = 0;
+ version_t last_objver = 0;
+ uint32_t notify_timeout = 30;
+ object_locator_t oloc;
+ int extra_op_flags = 0;
+
+ ceph::mutex aio_write_list_lock =
+ ceph::make_mutex("librados::IoCtxImpl::aio_write_list_lock");
+ ceph_tid_t aio_write_seq = 0;
+ ceph::condition_variable aio_write_cond;
+ xlist<AioCompletionImpl*> aio_write_list;
+ map<ceph_tid_t, std::list<AioCompletionImpl*> > aio_write_waiters;
+
+ Objecter *objecter = nullptr;
+
+ IoCtxImpl();
+ IoCtxImpl(RadosClient *c, Objecter *objecter,
+ int64_t poolid, snapid_t s);
+
+ void dup(const IoCtxImpl& rhs) {
+ // Copy everything except the ref count
+ client = rhs.client;
+ poolid = rhs.poolid;
+ snap_seq = rhs.snap_seq;
+ snapc = rhs.snapc;
+ assert_ver = rhs.assert_ver;
+ last_objver = rhs.last_objver;
+ notify_timeout = rhs.notify_timeout;
+ oloc = rhs.oloc;
+ extra_op_flags = rhs.extra_op_flags;
+ objecter = rhs.objecter;
+ }
+
+ void set_snap_read(snapid_t s);
+ int set_snap_write_context(snapid_t seq, vector<snapid_t>& snaps);
+
+ void get() {
+ ref_cnt++;
+ }
+
+ void put() {
+ if (--ref_cnt == 0)
+ delete this;
+ }
+
+ void queue_aio_write(struct AioCompletionImpl *c);
+ void complete_aio_write(struct AioCompletionImpl *c);
+ void flush_aio_writes_async(AioCompletionImpl *c);
+ void flush_aio_writes();
+
+ int64_t get_id() {
+ return poolid;
+ }
+
+ string get_cached_pool_name();
+
+ int get_object_hash_position(const std::string& oid, uint32_t *hash_position);
+ int get_object_pg_hash_position(const std::string& oid, uint32_t *pg_hash_position);
+
+ ::ObjectOperation *prepare_assert_ops(::ObjectOperation *op);
+
+ // snaps
+ int snap_list(vector<uint64_t> *snaps);
+ int snap_lookup(const char *name, uint64_t *snapid);
+ int snap_get_name(uint64_t snapid, std::string *s);
+ int snap_get_stamp(uint64_t snapid, time_t *t);
+ int snap_create(const char* snapname);
+ int selfmanaged_snap_create(uint64_t *snapid);
+ void aio_selfmanaged_snap_create(uint64_t *snapid, AioCompletionImpl *c);
+ int snap_remove(const char* snapname);
+ int rollback(const object_t& oid, const char *snapName);
+ int selfmanaged_snap_remove(uint64_t snapid);
+ void aio_selfmanaged_snap_remove(uint64_t snapid, AioCompletionImpl *c);
+ int selfmanaged_snap_rollback_object(const object_t& oid,
+ ::SnapContext& snapc, uint64_t snapid);
+
+ // io
+ int nlist(Objecter::NListContext *context, int max_entries);
+ uint32_t nlist_seek(Objecter::NListContext *context, uint32_t pos);
+ uint32_t nlist_seek(Objecter::NListContext *context, const rados_object_list_cursor& cursor);
+ rados_object_list_cursor nlist_get_cursor(Objecter::NListContext *context);
+ void object_list_slice(
+ const hobject_t start,
+ const hobject_t finish,
+ const size_t n,
+ const size_t m,
+ hobject_t *split_start,
+ hobject_t *split_finish);
+
+ int create(const object_t& oid, bool exclusive);
+ int write(const object_t& oid, bufferlist& bl, size_t len, uint64_t off);
+ int append(const object_t& oid, bufferlist& bl, size_t len);
+ int write_full(const object_t& oid, bufferlist& bl);
+ int writesame(const object_t& oid, bufferlist& bl,
+ size_t write_len, uint64_t offset);
+ int read(const object_t& oid, bufferlist& bl, size_t len, uint64_t off);
+ int mapext(const object_t& oid, uint64_t off, size_t len,
+ std::map<uint64_t,uint64_t>& m);
+ int sparse_read(const object_t& oid, std::map<uint64_t,uint64_t>& m,
+ bufferlist& bl, size_t len, uint64_t off);
+ int checksum(const object_t& oid, uint8_t type, const bufferlist &init_value,
+ size_t len, uint64_t off, size_t chunk_size, bufferlist *pbl);
+ int remove(const object_t& oid);
+ int remove(const object_t& oid, int flags);
+ int stat(const object_t& oid, uint64_t *psize, time_t *pmtime);
+ int stat2(const object_t& oid, uint64_t *psize, struct timespec *pts);
+ int trunc(const object_t& oid, uint64_t size);
+ int cmpext(const object_t& oid, uint64_t off, bufferlist& cmp_bl);
+
+ int tmap_update(const object_t& oid, bufferlist& cmdbl);
+
+ int exec(const object_t& oid, const char *cls, const char *method, bufferlist& inbl, bufferlist& outbl);
+
+ int getxattr(const object_t& oid, const char *name, bufferlist& bl);
+ int setxattr(const object_t& oid, const char *name, bufferlist& bl);
+ int getxattrs(const object_t& oid, map<string, bufferlist>& attrset);
+ int rmxattr(const object_t& oid, const char *name);
+
+ int operate(const object_t& oid, ::ObjectOperation *o, ceph::real_time *pmtime, int flags=0);
+ int operate_read(const object_t& oid, ::ObjectOperation *o, bufferlist *pbl, int flags=0);
+ int aio_operate(const object_t& oid, ::ObjectOperation *o,
+ AioCompletionImpl *c, const SnapContext& snap_context,
+ int flags, const blkin_trace_info *trace_info = nullptr);
+ int aio_operate_read(const object_t& oid, ::ObjectOperation *o,
+ AioCompletionImpl *c, int flags, bufferlist *pbl, const blkin_trace_info *trace_info = nullptr);
+
+ struct C_aio_stat_Ack : public Context {
+ librados::AioCompletionImpl *c;
+ time_t *pmtime;
+ ceph::real_time mtime;
+ C_aio_stat_Ack(AioCompletionImpl *_c, time_t *pm);
+ void finish(int r) override;
+ };
+
+ struct C_aio_stat2_Ack : public Context {
+ librados::AioCompletionImpl *c;
+ struct timespec *pts;
+ ceph::real_time mtime;
+ C_aio_stat2_Ack(AioCompletionImpl *_c, struct timespec *pts);
+ void finish(int r) override;
+ };
+
+ struct C_aio_Complete : public Context {
+#if defined(WITH_EVENTTRACE)
+ object_t oid;
+#endif
+ AioCompletionImpl *c;
+ explicit C_aio_Complete(AioCompletionImpl *_c);
+ void finish(int r) override;
+ };
+
+ int aio_read(const object_t oid, AioCompletionImpl *c,
+ bufferlist *pbl, size_t len, uint64_t off, uint64_t snapid,
+ const blkin_trace_info *info = nullptr);
+ int aio_read(object_t oid, AioCompletionImpl *c,
+ char *buf, size_t len, uint64_t off, uint64_t snapid,
+ const blkin_trace_info *info = nullptr);
+ int aio_sparse_read(const object_t oid, AioCompletionImpl *c,
+ std::map<uint64_t,uint64_t> *m, bufferlist *data_bl,
+ size_t len, uint64_t off, uint64_t snapid);
+ int aio_cmpext(const object_t& oid, AioCompletionImpl *c, uint64_t off,
+ bufferlist& cmp_bl);
+ int aio_cmpext(const object_t& oid, AioCompletionImpl *c,
+ const char *cmp_buf, size_t cmp_len, uint64_t off);
+ int aio_write(const object_t &oid, AioCompletionImpl *c,
+ const bufferlist& bl, size_t len, uint64_t off,
+ const blkin_trace_info *info = nullptr);
+ int aio_append(const object_t &oid, AioCompletionImpl *c,
+ const bufferlist& bl, size_t len);
+ int aio_write_full(const object_t &oid, AioCompletionImpl *c,
+ const bufferlist& bl);
+ int aio_writesame(const object_t &oid, AioCompletionImpl *c,
+ const bufferlist& bl, size_t write_len, uint64_t off);
+ int aio_remove(const object_t &oid, AioCompletionImpl *c, int flags=0);
+ int aio_exec(const object_t& oid, AioCompletionImpl *c, const char *cls,
+ const char *method, bufferlist& inbl, bufferlist *outbl);
+ int aio_exec(const object_t& oid, AioCompletionImpl *c, const char *cls,
+ const char *method, bufferlist& inbl, char *buf, size_t out_len);
+ int aio_stat(const object_t& oid, AioCompletionImpl *c, uint64_t *psize, time_t *pmtime);
+ int aio_stat2(const object_t& oid, AioCompletionImpl *c, uint64_t *psize, struct timespec *pts);
+ int aio_getxattr(const object_t& oid, AioCompletionImpl *c,
+ const char *name, bufferlist& bl);
+ int aio_setxattr(const object_t& oid, AioCompletionImpl *c,
+ const char *name, bufferlist& bl);
+ int aio_getxattrs(const object_t& oid, AioCompletionImpl *c,
+ map<string, bufferlist>& attrset);
+ int aio_rmxattr(const object_t& oid, AioCompletionImpl *c,
+ const char *name);
+ int aio_cancel(AioCompletionImpl *c);
+
+ int hit_set_list(uint32_t hash, AioCompletionImpl *c,
+ std::list< std::pair<time_t, time_t> > *pls);
+ int hit_set_get(uint32_t hash, AioCompletionImpl *c, time_t stamp,
+ bufferlist *pbl);
+
+ int get_inconsistent_objects(const pg_t& pg,
+ const librados::object_id_t& start_after,
+ uint64_t max_to_get,
+ AioCompletionImpl *c,
+ std::vector<inconsistent_obj_t>* objects,
+ uint32_t* interval);
+
+ int get_inconsistent_snapsets(const pg_t& pg,
+ const librados::object_id_t& start_after,
+ uint64_t max_to_get,
+ AioCompletionImpl *c,
+ std::vector<inconsistent_snapset_t>* snapsets,
+ uint32_t* interval);
+
+ void set_sync_op_version(version_t ver);
+ int watch(const object_t& oid, uint64_t *cookie, librados::WatchCtx *ctx,
+ librados::WatchCtx2 *ctx2, bool internal = false);
+ int watch(const object_t& oid, uint64_t *cookie, librados::WatchCtx *ctx,
+ librados::WatchCtx2 *ctx2, uint32_t timeout, bool internal = false);
+ int aio_watch(const object_t& oid, AioCompletionImpl *c, uint64_t *cookie,
+ librados::WatchCtx *ctx, librados::WatchCtx2 *ctx2,
+ bool internal = false);
+ int aio_watch(const object_t& oid, AioCompletionImpl *c, uint64_t *cookie,
+ librados::WatchCtx *ctx, librados::WatchCtx2 *ctx2,
+ uint32_t timeout, bool internal = false);
+ int watch_check(uint64_t cookie);
+ int unwatch(uint64_t cookie);
+ int aio_unwatch(uint64_t cookie, AioCompletionImpl *c);
+ int notify(const object_t& oid, bufferlist& bl, uint64_t timeout_ms,
+ bufferlist *preplybl, char **preply_buf, size_t *preply_buf_len);
+ int notify_ack(const object_t& oid, uint64_t notify_id, uint64_t cookie,
+ bufferlist& bl);
+ int aio_notify(const object_t& oid, AioCompletionImpl *c, bufferlist& bl,
+ uint64_t timeout_ms, bufferlist *preplybl, char **preply_buf,
+ size_t *preply_buf_len);
+
+ int set_alloc_hint(const object_t& oid,
+ uint64_t expected_object_size,
+ uint64_t expected_write_size,
+ uint32_t flags);
+
+ version_t last_version();
+ void set_assert_version(uint64_t ver);
+ void set_notify_timeout(uint32_t timeout);
+
+ int cache_pin(const object_t& oid);
+ int cache_unpin(const object_t& oid);
+
+ int application_enable(const std::string& app_name, bool force);
+ void application_enable_async(const std::string& app_name, bool force,
+ PoolAsyncCompletionImpl *c);
+ int application_list(std::set<std::string> *app_names);
+ int application_metadata_get(const std::string& app_name,
+ const std::string &key,
+ std::string* value);
+ int application_metadata_set(const std::string& app_name,
+ const std::string &key,
+ const std::string& value);
+ int application_metadata_remove(const std::string& app_name,
+ const std::string &key);
+ int application_metadata_list(const std::string& app_name,
+ std::map<std::string, std::string> *values);
+
+};
+
+#endif
diff --git a/src/librados/ListObjectImpl.h b/src/librados/ListObjectImpl.h
new file mode 100644
index 000000000..7396c1210
--- /dev/null
+++ b/src/librados/ListObjectImpl.h
@@ -0,0 +1,81 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 David Zafman <dzafman@redhat.com>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIBRADOS_LISTOBJECTIMPL_H
+#define CEPH_LIBRADOS_LISTOBJECTIMPL_H
+
+#include <string>
+#include <include/rados/librados.hpp>
+
+#include "include/cmp.h"
+
+namespace librados {
+struct ListObjectImpl {
+ std::string nspace;
+ std::string oid;
+ std::string locator;
+
+ ListObjectImpl() {}
+ ListObjectImpl(std::string n, std::string o, std::string l):
+ nspace(n), oid(o), locator(l) {}
+
+ const std::string& get_nspace() const { return nspace; }
+ const std::string& get_oid() const { return oid; }
+ const std::string& get_locator() const { return locator; }
+};
+WRITE_EQ_OPERATORS_3(ListObjectImpl, nspace, oid, locator)
+WRITE_CMP_OPERATORS_3(ListObjectImpl, nspace, oid, locator)
+inline std::ostream& operator<<(std::ostream& out, const struct ListObjectImpl& lop) {
+ out << (lop.nspace.size() ? lop.nspace + "/" : "") << lop.oid
+ << (lop.locator.size() ? "@" + lop.locator : "");
+ return out;
+}
+
+class NObjectIteratorImpl {
+ public:
+ NObjectIteratorImpl() {}
+ ~NObjectIteratorImpl();
+ NObjectIteratorImpl(const NObjectIteratorImpl &rhs);
+ NObjectIteratorImpl& operator=(const NObjectIteratorImpl& rhs);
+
+ bool operator==(const NObjectIteratorImpl& rhs) const;
+ bool operator!=(const NObjectIteratorImpl& rhs) const;
+ const ListObject& operator*() const;
+ const ListObject* operator->() const;
+ NObjectIteratorImpl &operator++(); // Preincrement
+ NObjectIteratorImpl operator++(int); // Postincrement
+ const ListObject *get_listobjectp() { return &cur_obj; }
+
+ /// get current hash position of the iterator, rounded to the current pg
+ uint32_t get_pg_hash_position() const;
+
+ /// move the iterator to a given hash position. this may (will!) be rounded to the nearest pg.
+ uint32_t seek(uint32_t pos);
+
+ /// move the iterator to a given cursor position
+ uint32_t seek(const librados::ObjectCursor& cursor);
+
+ /// get current cursor position
+ librados::ObjectCursor get_cursor();
+
+ void set_filter(const bufferlist &bl);
+
+ NObjectIteratorImpl(ObjListCtx *ctx_);
+ void get_next();
+ std::shared_ptr < ObjListCtx > ctx;
+ ListObject cur_obj;
+};
+
+}
+#endif
diff --git a/src/librados/PoolAsyncCompletionImpl.h b/src/librados/PoolAsyncCompletionImpl.h
new file mode 100644
index 000000000..73420fe35
--- /dev/null
+++ b/src/librados/PoolAsyncCompletionImpl.h
@@ -0,0 +1,110 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIBRADOS_POOLASYNCCOMPLETIONIMPL_H
+#define CEPH_LIBRADOS_POOLASYNCCOMPLETIONIMPL_H
+
+#include "common/ceph_mutex.h"
+
+#include <boost/intrusive_ptr.hpp>
+
+#include "include/rados/librados.h"
+#include "include/rados/librados.hpp"
+
+namespace librados {
+ struct PoolAsyncCompletionImpl {
+ ceph::mutex lock = ceph::make_mutex("PoolAsyncCompletionImpl lock");
+ ceph::condition_variable cond;
+ int ref = 1;
+ int rval = 0;
+ bool released = false;
+ bool done = false;
+
+ rados_callback_t callback = nullptr;
+ void *callback_arg = nullptr;
+
+ PoolAsyncCompletionImpl() = default;
+
+ int set_callback(void *cb_arg, rados_callback_t cb) {
+ std::scoped_lock l(lock);
+ callback = cb;
+ callback_arg = cb_arg;
+ return 0;
+ }
+ int wait() {
+ std::unique_lock l(lock);
+ while (!done)
+ cond.wait(l);
+ return 0;
+ }
+ int is_complete() {
+ std::scoped_lock l(lock);
+ return done;
+ }
+ int get_return_value() {
+ std::scoped_lock l(lock);
+ return rval;
+ }
+ void get() {
+ std::scoped_lock l(lock);
+ ceph_assert(ref > 0);
+ ref++;
+ }
+ void release() {
+ std::scoped_lock l(lock);
+ ceph_assert(!released);
+ released = true;
+ }
+ void put() {
+ std::unique_lock l(lock);
+ int n = --ref;
+ l.unlock();
+ if (!n)
+ delete this;
+ }
+ };
+
+ inline void intrusive_ptr_add_ref(PoolAsyncCompletionImpl* p) {
+ p->get();
+ }
+ inline void intrusive_ptr_release(PoolAsyncCompletionImpl* p) {
+ p->put();
+ }
+
+ class CB_PoolAsync_Safe {
+ boost::intrusive_ptr<PoolAsyncCompletionImpl> p;
+
+ public:
+ explicit CB_PoolAsync_Safe(boost::intrusive_ptr<PoolAsyncCompletionImpl> p)
+ : p(p) {}
+ ~CB_PoolAsync_Safe() = default;
+
+ void operator()(int r) {
+ auto c(std::move(p));
+ std::unique_lock l(c->lock);
+ c->rval = r;
+ c->done = true;
+ c->cond.notify_all();
+
+ if (c->callback) {
+ rados_callback_t cb = c->callback;
+ void *cb_arg = c->callback_arg;
+ l.unlock();
+ cb(c.get(), cb_arg);
+ l.lock();
+ }
+ }
+ };
+}
+#endif
diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc
new file mode 100644
index 000000000..04ea14f31
--- /dev/null
+++ b/src/librados/RadosClient.cc
@@ -0,0 +1,1180 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+
+#include <iostream>
+#include <string>
+#include <sstream>
+#include <pthread.h>
+#include <errno.h>
+
+#include "common/ceph_context.h"
+#include "common/config.h"
+#include "common/common_init.h"
+#include "common/ceph_json.h"
+#include "common/errno.h"
+#include "common/ceph_json.h"
+#include "common/async/blocked_completion.h"
+#include "include/buffer.h"
+#include "include/stringify.h"
+#include "include/util.h"
+
+#include "msg/Messenger.h"
+
+// needed for static_cast
+#include "messages/MLog.h"
+
+#include "AioCompletionImpl.h"
+#include "IoCtxImpl.h"
+#include "PoolAsyncCompletionImpl.h"
+#include "RadosClient.h"
+
+#include "include/ceph_assert.h"
+#include "common/EventTrace.h"
+
+#define dout_subsys ceph_subsys_rados
+#undef dout_prefix
+#define dout_prefix *_dout << "librados: "
+
+namespace bc = boost::container;
+namespace bs = boost::system;
+namespace ca = ceph::async;
+namespace cb = ceph::buffer;
+
+librados::RadosClient::RadosClient(CephContext *cct_)
+ : Dispatcher(cct_->get()),
+ cct_deleter{cct, [](CephContext *p) {p->put();}}
+{
+ auto& conf = cct->_conf;
+ conf.add_observer(this);
+ rados_mon_op_timeout = conf.get_val<std::chrono::seconds>("rados_mon_op_timeout");
+}
+
+int64_t librados::RadosClient::lookup_pool(const char *name)
+{
+ int r = wait_for_osdmap();
+ if (r < 0) {
+ return r;
+ }
+
+ int64_t ret = objecter->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name),
+ name);
+ if (-ENOENT == ret) {
+ // Make sure we have the latest map
+ int r = wait_for_latest_osdmap();
+ if (r < 0)
+ return r;
+ ret = objecter->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name),
+ name);
+ }
+
+ return ret;
+}
+
+bool librados::RadosClient::pool_requires_alignment(int64_t pool_id)
+{
+ bool requires;
+ int r = pool_requires_alignment2(pool_id, &requires);
+ if (r < 0) {
+ // Cast answer to false, this is a little bit problematic
+ // since we really don't know the answer yet, say.
+ return false;
+ }
+
+ return requires;
+}
+
+// a safer version of pool_requires_alignment
+int librados::RadosClient::pool_requires_alignment2(int64_t pool_id,
+ bool *requires)
+{
+ if (!requires)
+ return -EINVAL;
+
+ int r = wait_for_osdmap();
+ if (r < 0) {
+ return r;
+ }
+
+ return objecter->with_osdmap([requires, pool_id](const OSDMap& o) {
+ if (!o.have_pg_pool(pool_id)) {
+ return -ENOENT;
+ }
+ *requires = o.get_pg_pool(pool_id)->requires_aligned_append();
+ return 0;
+ });
+}
+
+uint64_t librados::RadosClient::pool_required_alignment(int64_t pool_id)
+{
+ uint64_t alignment;
+ int r = pool_required_alignment2(pool_id, &alignment);
+ if (r < 0) {
+ return 0;
+ }
+
+ return alignment;
+}
+
+// a safer version of pool_required_alignment
+int librados::RadosClient::pool_required_alignment2(int64_t pool_id,
+ uint64_t *alignment)
+{
+ if (!alignment)
+ return -EINVAL;
+
+ int r = wait_for_osdmap();
+ if (r < 0) {
+ return r;
+ }
+
+ return objecter->with_osdmap([alignment, pool_id](const OSDMap &o) {
+ if (!o.have_pg_pool(pool_id)) {
+ return -ENOENT;
+ }
+ *alignment = o.get_pg_pool(pool_id)->required_alignment();
+ return 0;
+ });
+}
+
+int librados::RadosClient::pool_get_name(uint64_t pool_id, std::string *s, bool wait_latest_map)
+{
+ int r = wait_for_osdmap();
+ if (r < 0)
+ return r;
+ retry:
+ objecter->with_osdmap([&](const OSDMap& o) {
+ if (!o.have_pg_pool(pool_id)) {
+ r = -ENOENT;
+ } else {
+ r = 0;
+ *s = o.get_pool_name(pool_id);
+ }
+ });
+ if (r == -ENOENT && wait_latest_map) {
+ r = wait_for_latest_osdmap();
+ if (r < 0)
+ return r;
+ wait_latest_map = false;
+ goto retry;
+ }
+
+ return r;
+}
+
+int librados::RadosClient::get_fsid(std::string *s)
+{
+ if (!s)
+ return -EINVAL;
+ std::lock_guard l(lock);
+ ostringstream oss;
+ oss << monclient.get_fsid();
+ *s = oss.str();
+ return 0;
+}
+
+int librados::RadosClient::ping_monitor(const string mon_id, string *result)
+{
+ int err = 0;
+ /* If we haven't yet connected, we have no way of telling whether we
+ * already built monc's initial monmap. IF we are in CONNECTED state,
+ * then it is safe to assume that we went through connect(), which does
+ * build a monmap.
+ */
+ if (state != CONNECTED) {
+ ldout(cct, 10) << __func__ << " build monmap" << dendl;
+ err = monclient.build_initial_monmap();
+ }
+ if (err < 0) {
+ return err;
+ }
+
+ err = monclient.ping_monitor(mon_id, result);
+ return err;
+}
+
+int librados::RadosClient::connect()
+{
+ int err;
+
+ // already connected?
+ if (state == CONNECTING)
+ return -EINPROGRESS;
+ if (state == CONNECTED)
+ return -EISCONN;
+ state = CONNECTING;
+
+ if (!cct->_log->is_started()) {
+ cct->_log->start();
+ }
+
+ {
+ MonClient mc_bootstrap(cct, poolctx);
+ err = mc_bootstrap.get_monmap_and_config();
+ if (err < 0)
+ return err;
+ }
+
+ common_init_finish(cct);
+
+ poolctx.start(cct->_conf.get_val<std::uint64_t>("librados_thread_count"));
+
+ // get monmap
+ err = monclient.build_initial_monmap();
+ if (err < 0)
+ goto out;
+
+ err = -ENOMEM;
+ messenger = Messenger::create_client_messenger(cct, "radosclient");
+ if (!messenger)
+ goto out;
+
+ // require OSDREPLYMUX feature. this means we will fail to talk to
+ // old servers. this is necessary because otherwise we won't know
+ // how to decompose the reply data into its constituent pieces.
+ messenger->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX));
+
+ ldout(cct, 1) << "starting msgr at " << messenger->get_myaddrs() << dendl;
+
+ ldout(cct, 1) << "starting objecter" << dendl;
+
+ objecter = new (std::nothrow) Objecter(cct, messenger, &monclient, poolctx);
+ if (!objecter)
+ goto out;
+ objecter->set_balanced_budget();
+
+ monclient.set_messenger(messenger);
+ mgrclient.set_messenger(messenger);
+
+ objecter->init();
+ messenger->add_dispatcher_head(&mgrclient);
+ messenger->add_dispatcher_tail(objecter);
+ messenger->add_dispatcher_tail(this);
+
+ messenger->start();
+
+ ldout(cct, 1) << "setting wanted keys" << dendl;
+ monclient.set_want_keys(
+ CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MGR);
+ ldout(cct, 1) << "calling monclient init" << dendl;
+ err = monclient.init();
+ if (err) {
+ ldout(cct, 0) << conf->name << " initialization error " << cpp_strerror(-err) << dendl;
+ shutdown();
+ goto out;
+ }
+
+ err = monclient.authenticate(std::chrono::duration<double>(conf.get_val<std::chrono::seconds>("client_mount_timeout")).count());
+ if (err) {
+ ldout(cct, 0) << conf->name << " authentication error " << cpp_strerror(-err) << dendl;
+ shutdown();
+ goto out;
+ }
+ messenger->set_myname(entity_name_t::CLIENT(monclient.get_global_id()));
+
+ // Detect older cluster, put mgrclient into compatible mode
+ mgrclient.set_mgr_optional(
+ !get_required_monitor_features().contains_all(
+ ceph::features::mon::FEATURE_LUMINOUS));
+
+ // MgrClient needs this (it doesn't have MonClient reference itself)
+ monclient.sub_want("mgrmap", 0, 0);
+ monclient.renew_subs();
+
+ if (service_daemon) {
+ ldout(cct, 10) << __func__ << " registering as " << service_name << "."
+ << daemon_name << dendl;
+ mgrclient.service_daemon_register(service_name, daemon_name,
+ daemon_metadata);
+ }
+ mgrclient.init();
+
+ objecter->set_client_incarnation(0);
+ objecter->start();
+ lock.lock();
+
+ state = CONNECTED;
+ instance_id = monclient.get_global_id();
+
+ lock.unlock();
+
+ ldout(cct, 1) << "init done" << dendl;
+ err = 0;
+
+ out:
+ if (err) {
+ state = DISCONNECTED;
+
+ if (objecter) {
+ delete objecter;
+ objecter = NULL;
+ }
+ if (messenger) {
+ delete messenger;
+ messenger = NULL;
+ }
+ }
+
+ return err;
+}
+
+void librados::RadosClient::shutdown()
+{
+ std::unique_lock l{lock};
+ if (state == DISCONNECTED) {
+ return;
+ }
+
+ bool need_objecter = false;
+ if (objecter && objecter->initialized) {
+ need_objecter = true;
+ }
+
+ if (state == CONNECTED) {
+ if (need_objecter) {
+ // make sure watch callbacks are flushed
+ watch_flush();
+ }
+ }
+ state = DISCONNECTED;
+ instance_id = 0;
+ l.unlock();
+ if (need_objecter) {
+ objecter->shutdown();
+ }
+ mgrclient.shutdown();
+
+ monclient.shutdown();
+ if (messenger) {
+ messenger->shutdown();
+ messenger->wait();
+ }
+ poolctx.stop();
+ ldout(cct, 1) << "shutdown" << dendl;
+}
+
+int librados::RadosClient::watch_flush()
+{
+ ldout(cct, 10) << __func__ << " enter" << dendl;
+ objecter->linger_callback_flush(ca::use_blocked);
+
+ ldout(cct, 10) << __func__ << " exit" << dendl;
+ return 0;
+}
+
+struct CB_aio_watch_flush_Complete {
+ librados::RadosClient *client;
+ librados::AioCompletionImpl *c;
+
+ CB_aio_watch_flush_Complete(librados::RadosClient *_client, librados::AioCompletionImpl *_c)
+ : client(_client), c(_c) {
+ c->get();
+ }
+
+ CB_aio_watch_flush_Complete(const CB_aio_watch_flush_Complete&) = delete;
+ CB_aio_watch_flush_Complete operator =(const CB_aio_watch_flush_Complete&) = delete;
+ CB_aio_watch_flush_Complete(CB_aio_watch_flush_Complete&& rhs) {
+ client = rhs.client;
+ c = rhs.c;
+ }
+ CB_aio_watch_flush_Complete& operator =(CB_aio_watch_flush_Complete&& rhs) {
+ client = rhs.client;
+ c = rhs.c;
+ return *this;
+ }
+
+ void operator()() {
+ c->lock.lock();
+ c->rval = 0;
+ c->complete = true;
+ c->cond.notify_all();
+
+ if (c->callback_complete ||
+ c->callback_safe) {
+ boost::asio::defer(client->finish_strand, librados::CB_AioComplete(c));
+ }
+ c->put_unlock();
+ }
+};
+
+int librados::RadosClient::async_watch_flush(AioCompletionImpl *c)
+{
+ ldout(cct, 10) << __func__ << " enter" << dendl;
+ objecter->linger_callback_flush(CB_aio_watch_flush_Complete(this, c));
+ ldout(cct, 10) << __func__ << " exit" << dendl;
+ return 0;
+}
+
+uint64_t librados::RadosClient::get_instance_id()
+{
+ return instance_id;
+}
+
+int librados::RadosClient::get_min_compatible_osd(int8_t* require_osd_release)
+{
+ int r = wait_for_osdmap();
+ if (r < 0) {
+ return r;
+ }
+
+ objecter->with_osdmap(
+ [require_osd_release](const OSDMap& o) {
+ *require_osd_release = to_integer<int8_t>(o.require_osd_release);
+ });
+ return 0;
+}
+
+int librados::RadosClient::get_min_compatible_client(int8_t* min_compat_client,
+ int8_t* require_min_compat_client)
+{
+ int r = wait_for_osdmap();
+ if (r < 0) {
+ return r;
+ }
+
+ objecter->with_osdmap(
+ [min_compat_client, require_min_compat_client](const OSDMap& o) {
+ *min_compat_client = to_integer<int8_t>(o.get_min_compat_client());
+ *require_min_compat_client =
+ to_integer<int8_t>(o.get_require_min_compat_client());
+ });
+ return 0;
+}
+
+librados::RadosClient::~RadosClient()
+{
+ cct->_conf.remove_observer(this);
+ if (messenger)
+ delete messenger;
+ if (objecter)
+ delete objecter;
+ cct = NULL;
+}
+
+int librados::RadosClient::create_ioctx(const char *name, IoCtxImpl **io)
+{
+ int64_t poolid = lookup_pool(name);
+ if (poolid < 0) {
+ return (int)poolid;
+ }
+
+ *io = new librados::IoCtxImpl(this, objecter, poolid, CEPH_NOSNAP);
+ return 0;
+}
+
+int librados::RadosClient::create_ioctx(int64_t pool_id, IoCtxImpl **io)
+{
+ std::string pool_name;
+ int r = pool_get_name(pool_id, &pool_name, true);
+ if (r < 0)
+ return r;
+ *io = new librados::IoCtxImpl(this, objecter, pool_id, CEPH_NOSNAP);
+ return 0;
+}
+
+bool librados::RadosClient::ms_dispatch(Message *m)
+{
+ bool ret;
+
+ std::lock_guard l(lock);
+ if (state == DISCONNECTED) {
+ ldout(cct, 10) << "disconnected, discarding " << *m << dendl;
+ m->put();
+ ret = true;
+ } else {
+ ret = _dispatch(m);
+ }
+ return ret;
+}
+
+void librados::RadosClient::ms_handle_connect(Connection *con)
+{
+}
+
+bool librados::RadosClient::ms_handle_reset(Connection *con)
+{
+ return false;
+}
+
+void librados::RadosClient::ms_handle_remote_reset(Connection *con)
+{
+}
+
+bool librados::RadosClient::ms_handle_refused(Connection *con)
+{
+ return false;
+}
+
+bool librados::RadosClient::_dispatch(Message *m)
+{
+ ceph_assert(ceph_mutex_is_locked(lock));
+ switch (m->get_type()) {
+ // OSD
+ case CEPH_MSG_OSD_MAP:
+ cond.notify_all();
+ m->put();
+ break;
+
+ case CEPH_MSG_MDS_MAP:
+ m->put();
+ break;
+
+ case MSG_LOG:
+ handle_log(static_cast<MLog *>(m));
+ break;
+
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+
+int librados::RadosClient::wait_for_osdmap()
+{
+ ceph_assert(ceph_mutex_is_not_locked_by_me(lock));
+
+ if (state != CONNECTED) {
+ return -ENOTCONN;
+ }
+
+ bool need_map = false;
+ objecter->with_osdmap([&](const OSDMap& o) {
+ if (o.get_epoch() == 0) {
+ need_map = true;
+ }
+ });
+
+ if (need_map) {
+ std::unique_lock l(lock);
+
+ ceph::timespan timeout = rados_mon_op_timeout;
+ if (objecter->with_osdmap(std::mem_fn(&OSDMap::get_epoch)) == 0) {
+ ldout(cct, 10) << __func__ << " waiting" << dendl;
+ while (objecter->with_osdmap(std::mem_fn(&OSDMap::get_epoch)) == 0) {
+ if (timeout == timeout.zero()) {
+ cond.wait(l);
+ } else {
+ if (cond.wait_for(l, timeout) == std::cv_status::timeout) {
+ lderr(cct) << "timed out waiting for first osdmap from monitors"
+ << dendl;
+ return -ETIMEDOUT;
+ }
+ }
+ }
+ ldout(cct, 10) << __func__ << " done waiting" << dendl;
+ }
+ return 0;
+ } else {
+ return 0;
+ }
+}
+
+
+int librados::RadosClient::wait_for_latest_osdmap()
+{
+ bs::error_code ec;
+ objecter->wait_for_latest_osdmap(ca::use_blocked[ec]);
+ return ceph::from_error_code(ec);
+}
+
+int librados::RadosClient::pool_list(std::list<std::pair<int64_t, string> >& v)
+{
+ int r = wait_for_osdmap();
+ if (r < 0)
+ return r;
+
+ objecter->with_osdmap([&](const OSDMap& o) {
+ for (auto p : o.get_pools())
+ v.push_back(std::make_pair(p.first, o.get_pool_name(p.first)));
+ });
+ return 0;
+}
+
+int librados::RadosClient::get_pool_stats(std::list<string>& pools,
+ map<string,::pool_stat_t> *result,
+ bool *pper_pool)
+{
+ bs::error_code ec;
+
+ std::vector<std::string> v(pools.begin(), pools.end());
+
+ auto [res, per_pool] = objecter->get_pool_stats(v, ca::use_blocked[ec]);
+ if (ec)
+ return ceph::from_error_code(ec);
+
+ if (per_pool)
+ *pper_pool = per_pool;
+ if (result)
+ result->insert(res.begin(), res.end());
+
+ return 0;
+}
+
+bool librados::RadosClient::get_pool_is_selfmanaged_snaps_mode(
+ const std::string& pool)
+{
+ bool ret = false;
+ objecter->with_osdmap([&](const OSDMap& osdmap) {
+ int64_t poolid = osdmap.lookup_pg_pool_name(pool);
+ if (poolid >= 0)
+ ret = osdmap.get_pg_pool(poolid)->is_unmanaged_snaps_mode();
+ });
+ return ret;
+}
+
+int librados::RadosClient::get_fs_stats(ceph_statfs& stats)
+{
+ ceph::mutex mylock = ceph::make_mutex("RadosClient::get_fs_stats::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ int ret = 0;
+ {
+ std::lock_guard l{mylock};
+ objecter->get_fs_stats(stats, boost::optional<int64_t> (),
+ new C_SafeCond(mylock, cond, &done, &ret));
+ }
+ {
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done;});
+ }
+ return ret;
+}
+
+void librados::RadosClient::get() {
+ std::lock_guard l(lock);
+ ceph_assert(refcnt > 0);
+ refcnt++;
+}
+
+bool librados::RadosClient::put() {
+ std::lock_guard l(lock);
+ ceph_assert(refcnt > 0);
+ refcnt--;
+ return (refcnt == 0);
+}
+
+int librados::RadosClient::pool_create(string& name,
+ int16_t crush_rule)
+{
+ if (!name.length())
+ return -EINVAL;
+
+ int r = wait_for_osdmap();
+ if (r < 0) {
+ return r;
+ }
+
+ ceph::mutex mylock = ceph::make_mutex("RadosClient::pool_create::mylock");
+ int reply;
+ ceph::condition_variable cond;
+ bool done;
+ Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply);
+ objecter->create_pool(name, onfinish, crush_rule);
+
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done; });
+ return reply;
+}
+
+int librados::RadosClient::pool_create_async(string& name,
+ PoolAsyncCompletionImpl *c,
+ int16_t crush_rule)
+{
+ int r = wait_for_osdmap();
+ if (r < 0)
+ return r;
+
+ Context *onfinish = make_lambda_context(CB_PoolAsync_Safe(c));
+ objecter->create_pool(name, onfinish, crush_rule);
+ return r;
+}
+
+int librados::RadosClient::pool_get_base_tier(int64_t pool_id, int64_t* base_tier)
+{
+ int r = wait_for_osdmap();
+ if (r < 0) {
+ return r;
+ }
+
+ objecter->with_osdmap([&](const OSDMap& o) {
+ const pg_pool_t* pool = o.get_pg_pool(pool_id);
+ if (pool) {
+ if (pool->tier_of < 0) {
+ *base_tier = pool_id;
+ } else {
+ *base_tier = pool->tier_of;
+ }
+ r = 0;
+ } else {
+ r = -ENOENT;
+ }
+ });
+ return r;
+}
+
+int librados::RadosClient::pool_delete(const char *name)
+{
+ int r = wait_for_osdmap();
+ if (r < 0) {
+ return r;
+ }
+
+ ceph::mutex mylock = ceph::make_mutex("RadosClient::pool_delete::mylock");
+ ceph::condition_variable cond;
+ bool done;
+ int ret;
+ Context *onfinish = new C_SafeCond(mylock, cond, &done, &ret);
+ objecter->delete_pool(name, onfinish);
+
+ std::unique_lock l{mylock};
+ cond.wait(l, [&done] { return done;});
+ return ret;
+}
+
+int librados::RadosClient::pool_delete_async(const char *name, PoolAsyncCompletionImpl *c)
+{
+ int r = wait_for_osdmap();
+ if (r < 0)
+ return r;
+
+ Context *onfinish = make_lambda_context(CB_PoolAsync_Safe(c));
+ objecter->delete_pool(name, onfinish);
+ return r;
+}
+
+void librados::RadosClient::blocklist_self(bool set) {
+ std::lock_guard l(lock);
+ objecter->blocklist_self(set);
+}
+
+std::string librados::RadosClient::get_addrs() const {
+ CachedStackStringStream cos;
+ *cos << messenger->get_myaddrs();
+ return std::string(cos->strv());
+}
+
+int librados::RadosClient::blocklist_add(const string& client_address,
+ uint32_t expire_seconds)
+{
+ entity_addr_t addr;
+ if (!addr.parse(client_address.c_str(), 0)) {
+ lderr(cct) << "unable to parse address " << client_address << dendl;
+ return -EINVAL;
+ }
+
+ std::stringstream cmd;
+ cmd << "{"
+ << "\"prefix\": \"osd blocklist\", "
+ << "\"blocklistop\": \"add\", "
+ << "\"addr\": \"" << client_address << "\"";
+ if (expire_seconds != 0) {
+ cmd << ", \"expire\": " << expire_seconds << ".0";
+ }
+ cmd << "}";
+
+ std::vector<std::string> cmds;
+ cmds.push_back(cmd.str());
+ bufferlist inbl;
+ int r = mon_command(cmds, inbl, NULL, NULL);
+ if (r == -EINVAL) {
+ // try legacy blacklist command
+ std::stringstream cmd;
+ cmd << "{"
+ << "\"prefix\": \"osd blacklist\", "
+ << "\"blacklistop\": \"add\", "
+ << "\"addr\": \"" << client_address << "\"";
+ if (expire_seconds != 0) {
+ cmd << ", \"expire\": " << expire_seconds << ".0";
+ }
+ cmd << "}";
+ cmds.clear();
+ cmds.push_back(cmd.str());
+ r = mon_command(cmds, inbl, NULL, NULL);
+ }
+ if (r < 0) {
+ return r;
+ }
+
+ // ensure we have the latest osd map epoch before proceeding
+ r = wait_for_latest_osdmap();
+ return r;
+}
+
+int librados::RadosClient::mon_command(const vector<string>& cmd,
+ const bufferlist &inbl,
+ bufferlist *outbl, string *outs)
+{
+ C_SaferCond ctx;
+ mon_command_async(cmd, inbl, outbl, outs, &ctx);
+ return ctx.wait();
+}
+
+void librados::RadosClient::mon_command_async(const vector<string>& cmd,
+ const bufferlist &inbl,
+ bufferlist *outbl, string *outs,
+ Context *on_finish)
+{
+ std::lock_guard l{lock};
+ monclient.start_mon_command(cmd, inbl,
+ [outs, outbl,
+ on_finish = std::unique_ptr<Context>(on_finish)]
+ (bs::error_code e,
+ std::string&& s,
+ ceph::bufferlist&& b) mutable {
+ if (outs)
+ *outs = std::move(s);
+ if (outbl)
+ *outbl = std::move(b);
+ if (on_finish)
+ on_finish.release()->complete(
+ ceph::from_error_code(e));
+ });
+}
+
+int librados::RadosClient::mgr_command(const vector<string>& cmd,
+ const bufferlist &inbl,
+ bufferlist *outbl, string *outs)
+{
+ std::lock_guard l(lock);
+
+ C_SaferCond cond;
+ int r = mgrclient.start_command(cmd, inbl, outbl, outs, &cond);
+ if (r < 0)
+ return r;
+
+ lock.unlock();
+ if (rados_mon_op_timeout.count() > 0) {
+ r = cond.wait_for(rados_mon_op_timeout);
+ } else {
+ r = cond.wait();
+ }
+ lock.lock();
+
+ return r;
+}
+
+int librados::RadosClient::mgr_command(
+ const string& name,
+ const vector<string>& cmd,
+ const bufferlist &inbl,
+ bufferlist *outbl, string *outs)
+{
+ std::lock_guard l(lock);
+
+ C_SaferCond cond;
+ int r = mgrclient.start_tell_command(name, cmd, inbl, outbl, outs, &cond);
+ if (r < 0)
+ return r;
+
+ lock.unlock();
+ if (rados_mon_op_timeout.count() > 0) {
+ r = cond.wait_for(rados_mon_op_timeout);
+ } else {
+ r = cond.wait();
+ }
+ lock.lock();
+
+ return r;
+}
+
+
+int librados::RadosClient::mon_command(int rank, const vector<string>& cmd,
+ const bufferlist &inbl,
+ bufferlist *outbl, string *outs)
+{
+ bs::error_code ec;
+ auto&& [s, bl] = monclient.start_mon_command(rank, cmd, inbl,
+ ca::use_blocked[ec]);
+ if (outs)
+ *outs = std::move(s);
+ if (outbl)
+ *outbl = std::move(bl);
+
+ return ceph::from_error_code(ec);
+}
+
+int librados::RadosClient::mon_command(string name, const vector<string>& cmd,
+ const bufferlist &inbl,
+ bufferlist *outbl, string *outs)
+{
+ bs::error_code ec;
+ auto&& [s, bl] = monclient.start_mon_command(name, cmd, inbl,
+ ca::use_blocked[ec]);
+ if (outs)
+ *outs = std::move(s);
+ if (outbl)
+ *outbl = std::move(bl);
+
+ return ceph::from_error_code(ec);
+}
+
+int librados::RadosClient::osd_command(int osd, vector<string>& cmd,
+ const bufferlist& inbl,
+ bufferlist *poutbl, string *prs)
+{
+ ceph_tid_t tid;
+
+ if (osd < 0)
+ return -EINVAL;
+
+
+ // XXX do anything with tid?
+ bs::error_code ec;
+ auto [s, bl] = objecter->osd_command(osd, std::move(cmd), cb::list(inbl),
+ &tid, ca::use_blocked[ec]);
+ if (poutbl)
+ *poutbl = std::move(bl);
+ if (prs)
+ *prs = std::move(s);
+ return ceph::from_error_code(ec);
+}
+
+int librados::RadosClient::pg_command(pg_t pgid, vector<string>& cmd,
+ const bufferlist& inbl,
+ bufferlist *poutbl, string *prs)
+{
+ ceph_tid_t tid;
+ bs::error_code ec;
+ auto [s, bl] = objecter->pg_command(pgid, std::move(cmd), inbl, &tid,
+ ca::use_blocked[ec]);
+ if (poutbl)
+ *poutbl = std::move(bl);
+ if (prs)
+ *prs = std::move(s);
+ return ceph::from_error_code(ec);
+}
+
+int librados::RadosClient::monitor_log(const string& level,
+ rados_log_callback_t cb,
+ rados_log_callback2_t cb2,
+ void *arg)
+{
+ std::lock_guard l(lock);
+
+ if (state != CONNECTED) {
+ return -ENOTCONN;
+ }
+
+ if (cb == NULL && cb2 == NULL) {
+ // stop watch
+ ldout(cct, 10) << __func__ << " removing cb " << (void*)log_cb
+ << " " << (void*)log_cb2 << dendl;
+ monclient.sub_unwant(log_watch);
+ log_watch.clear();
+ log_cb = NULL;
+ log_cb2 = NULL;
+ log_cb_arg = NULL;
+ return 0;
+ }
+
+ string watch_level;
+ if (level == "debug") {
+ watch_level = "log-debug";
+ } else if (level == "info") {
+ watch_level = "log-info";
+ } else if (level == "warn" || level == "warning") {
+ watch_level = "log-warn";
+ } else if (level == "err" || level == "error") {
+ watch_level = "log-error";
+ } else if (level == "sec") {
+ watch_level = "log-sec";
+ } else {
+ ldout(cct, 10) << __func__ << " invalid level " << level << dendl;
+ return -EINVAL;
+ }
+
+ if (log_cb || log_cb2)
+ monclient.sub_unwant(log_watch);
+
+ // (re)start watch
+ ldout(cct, 10) << __func__ << " add cb " << (void*)cb << " " << (void*)cb2
+ << " level " << level << dendl;
+ monclient.sub_want(watch_level, 0, 0);
+
+ monclient.renew_subs();
+ log_cb = cb;
+ log_cb2 = cb2;
+ log_cb_arg = arg;
+ log_watch = watch_level;
+ return 0;
+}
+
+void librados::RadosClient::handle_log(MLog *m)
+{
+ ceph_assert(ceph_mutex_is_locked(lock));
+ ldout(cct, 10) << __func__ << " version " << m->version << dendl;
+
+ if (log_last_version < m->version) {
+ log_last_version = m->version;
+
+ if (log_cb || log_cb2) {
+ for (std::deque<LogEntry>::iterator it = m->entries.begin(); it != m->entries.end(); ++it) {
+ LogEntry e = *it;
+ ostringstream ss;
+ ss << e.stamp << " " << e.name << " " << e.prio << " " << e.msg;
+ string line = ss.str();
+ string who = stringify(e.rank) + " " + stringify(e.addrs);
+ string name = stringify(e.name);
+ string level = stringify(e.prio);
+ struct timespec stamp;
+ e.stamp.to_timespec(&stamp);
+
+ ldout(cct, 20) << __func__ << " delivering " << ss.str() << dendl;
+ if (log_cb)
+ log_cb(log_cb_arg, line.c_str(), who.c_str(),
+ stamp.tv_sec, stamp.tv_nsec,
+ e.seq, level.c_str(), e.msg.c_str());
+ if (log_cb2)
+ log_cb2(log_cb_arg, line.c_str(),
+ e.channel.c_str(),
+ who.c_str(), name.c_str(),
+ stamp.tv_sec, stamp.tv_nsec,
+ e.seq, level.c_str(), e.msg.c_str());
+ }
+ }
+
+ monclient.sub_got(log_watch, log_last_version);
+ }
+
+ m->put();
+}
+
+int librados::RadosClient::service_daemon_register(
+ const std::string& service, ///< service name (e.g., 'rgw')
+ const std::string& name, ///< daemon name (e.g., 'gwfoo')
+ const std::map<std::string,std::string>& metadata)
+{
+ if (service_daemon) {
+ return -EEXIST;
+ }
+ if (service == "osd" ||
+ service == "mds" ||
+ service == "client" ||
+ service == "mon" ||
+ service == "mgr") {
+ // normal ceph entity types are not allowed!
+ return -EINVAL;
+ }
+ if (service.empty() || name.empty()) {
+ return -EINVAL;
+ }
+
+ collect_sys_info(&daemon_metadata, cct);
+
+ ldout(cct,10) << __func__ << " " << service << "." << name << dendl;
+ service_daemon = true;
+ service_name = service;
+ daemon_name = name;
+ daemon_metadata.insert(metadata.begin(), metadata.end());
+
+ if (state == DISCONNECTED) {
+ return 0;
+ }
+ if (state == CONNECTING) {
+ return -EBUSY;
+ }
+ mgrclient.service_daemon_register(service_name, daemon_name,
+ daemon_metadata);
+ return 0;
+}
+
+int librados::RadosClient::service_daemon_update_status(
+ std::map<std::string,std::string>&& status)
+{
+ if (state != CONNECTED) {
+ return -ENOTCONN;
+ }
+ return mgrclient.service_daemon_update_status(std::move(status));
+}
+
+mon_feature_t librados::RadosClient::get_required_monitor_features() const
+{
+ return monclient.with_monmap([](const MonMap &monmap) {
+ return monmap.get_required_features(); } );
+}
+
+int librados::RadosClient::get_inconsistent_pgs(int64_t pool_id,
+ std::vector<std::string>* pgs)
+{
+ vector<string> cmd = {
+ "{\"prefix\": \"pg ls\","
+ "\"pool\": " + std::to_string(pool_id) + ","
+ "\"states\": [\"inconsistent\"],"
+ "\"format\": \"json\"}"
+ };
+ bufferlist inbl, outbl;
+ string outstring;
+ if (auto ret = mgr_command(cmd, inbl, &outbl, &outstring); ret) {
+ return ret;
+ }
+ if (!outbl.length()) {
+ // no pg returned
+ return 0;
+ }
+ JSONParser parser;
+ if (!parser.parse(outbl.c_str(), outbl.length())) {
+ return -EINVAL;
+ }
+ vector<string> v;
+ if (!parser.is_array()) {
+ JSONObj *pgstat_obj = parser.find_obj("pg_stats");
+ if (!pgstat_obj)
+ return 0;
+ auto s = pgstat_obj->get_data();
+ JSONParser pg_stats;
+ if (!pg_stats.parse(s.c_str(), s.length())) {
+ return -EINVAL;
+ }
+ v = pg_stats.get_array_elements();
+ } else {
+ v = parser.get_array_elements();
+ }
+ for (auto i : v) {
+ JSONParser pg_json;
+ if (!pg_json.parse(i.c_str(), i.length())) {
+ return -EINVAL;
+ }
+ string pgid;
+ JSONDecoder::decode_json("pgid", pgid, &pg_json);
+ pgs->emplace_back(std::move(pgid));
+ }
+ return 0;
+}
+
+const char** librados::RadosClient::get_tracked_conf_keys() const
+{
+ static const char *config_keys[] = {
+ "librados_thread_count",
+ "rados_mon_op_timeout",
+ nullptr
+ };
+ return config_keys;
+}
+
+void librados::RadosClient::handle_conf_change(const ConfigProxy& conf,
+ const std::set<std::string> &changed)
+{
+ if (changed.count("librados_thread_count")) {
+ poolctx.stop();
+ poolctx.start(conf.get_val<std::uint64_t>("librados_thread_count"));
+ }
+ if (changed.count("rados_mon_op_timeout")) {
+ rados_mon_op_timeout = conf.get_val<std::chrono::seconds>("rados_mon_op_timeout");
+ }
+}
diff --git a/src/librados/RadosClient.h b/src/librados/RadosClient.h
new file mode 100644
index 000000000..0db094b18
--- /dev/null
+++ b/src/librados/RadosClient.h
@@ -0,0 +1,198 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+#ifndef CEPH_LIBRADOS_RADOSCLIENT_H
+#define CEPH_LIBRADOS_RADOSCLIENT_H
+
+#include <functional>
+#include <memory>
+#include <string>
+
+#include "msg/Dispatcher.h"
+
+#include "common/async/context_pool.h"
+#include "common/config_fwd.h"
+#include "common/Cond.h"
+#include "common/ceph_mutex.h"
+#include "common/ceph_time.h"
+#include "common/config_obs.h"
+#include "include/common_fwd.h"
+#include "include/rados/librados.h"
+#include "include/rados/librados.hpp"
+#include "mon/MonClient.h"
+#include "mgr/MgrClient.h"
+
+#include "IoCtxImpl.h"
+
+struct Context;
+class Message;
+class MLog;
+class Messenger;
+class AioCompletionImpl;
+
+namespace neorados { namespace detail { class RadosClient; }}
+
+class librados::RadosClient : public Dispatcher,
+ public md_config_obs_t
+{
+ friend neorados::detail::RadosClient;
+public:
+ using Dispatcher::cct;
+private:
+ std::unique_ptr<CephContext,
+ std::function<void(CephContext*)>> cct_deleter;
+
+public:
+ const ConfigProxy& conf{cct->_conf};
+ ceph::async::io_context_pool poolctx;
+private:
+ enum {
+ DISCONNECTED,
+ CONNECTING,
+ CONNECTED,
+ } state{DISCONNECTED};
+
+ MonClient monclient{cct, poolctx};
+ MgrClient mgrclient{cct, nullptr, &monclient.monmap};
+ Messenger *messenger{nullptr};
+
+ uint64_t instance_id{0};
+
+ bool _dispatch(Message *m);
+ bool ms_dispatch(Message *m) override;
+
+ void ms_handle_connect(Connection *con) override;
+ bool ms_handle_reset(Connection *con) override;
+ void ms_handle_remote_reset(Connection *con) override;
+ bool ms_handle_refused(Connection *con) override;
+
+ Objecter *objecter{nullptr};
+
+ ceph::mutex lock = ceph::make_mutex("librados::RadosClient::lock");
+ ceph::condition_variable cond;
+ int refcnt{1};
+
+ version_t log_last_version{0};
+ rados_log_callback_t log_cb{nullptr};
+ rados_log_callback2_t log_cb2{nullptr};
+ void *log_cb_arg{nullptr};
+ string log_watch;
+
+ bool service_daemon = false;
+ string daemon_name, service_name;
+ map<string,string> daemon_metadata;
+ ceph::timespan rados_mon_op_timeout{};
+
+ int wait_for_osdmap();
+
+public:
+ boost::asio::io_context::strand finish_strand{poolctx.get_io_context()};
+
+ explicit RadosClient(CephContext *cct);
+ ~RadosClient() override;
+ int ping_monitor(std::string mon_id, std::string *result);
+ int connect();
+ void shutdown();
+
+ int watch_flush();
+ int async_watch_flush(AioCompletionImpl *c);
+
+ uint64_t get_instance_id();
+
+ int get_min_compatible_osd(int8_t* require_osd_release);
+ int get_min_compatible_client(int8_t* min_compat_client,
+ int8_t* require_min_compat_client);
+
+ int wait_for_latest_osdmap();
+
+ int create_ioctx(const char *name, IoCtxImpl **io);
+ int create_ioctx(int64_t, IoCtxImpl **io);
+
+ int get_fsid(std::string *s);
+ int64_t lookup_pool(const char *name);
+ bool pool_requires_alignment(int64_t pool_id);
+ int pool_requires_alignment2(int64_t pool_id, bool *requires);
+ uint64_t pool_required_alignment(int64_t pool_id);
+ int pool_required_alignment2(int64_t pool_id, uint64_t *alignment);
+ int pool_get_name(uint64_t pool_id, std::string *name,
+ bool wait_latest_map = false);
+
+ int pool_list(std::list<std::pair<int64_t, string> >& ls);
+ int get_pool_stats(std::list<string>& ls, map<string,::pool_stat_t> *result,
+ bool *per_pool);
+ int get_fs_stats(ceph_statfs& result);
+ bool get_pool_is_selfmanaged_snaps_mode(const std::string& pool);
+
+ /*
+ -1 was set as the default value and monitor will pickup the right crush rule with below order:
+ a) osd pool default crush replicated ruleset
+ b) the first ruleset in crush ruleset
+ c) error out if no value find
+ */
+ int pool_create(string& name, int16_t crush_rule=-1);
+ int pool_create_async(string& name, PoolAsyncCompletionImpl *c,
+ int16_t crush_rule=-1);
+ int pool_get_base_tier(int64_t pool_id, int64_t* base_tier);
+ int pool_delete(const char *name);
+
+ int pool_delete_async(const char *name, PoolAsyncCompletionImpl *c);
+
+ int blocklist_add(const string& client_address, uint32_t expire_seconds);
+
+ int mon_command(const vector<string>& cmd, const bufferlist &inbl,
+ bufferlist *outbl, string *outs);
+ void mon_command_async(const vector<string>& cmd, const bufferlist &inbl,
+ bufferlist *outbl, string *outs, Context *on_finish);
+ int mon_command(int rank,
+ const vector<string>& cmd, const bufferlist &inbl,
+ bufferlist *outbl, string *outs);
+ int mon_command(string name,
+ const vector<string>& cmd, const bufferlist &inbl,
+ bufferlist *outbl, string *outs);
+ int mgr_command(const vector<string>& cmd, const bufferlist &inbl,
+ bufferlist *outbl, string *outs);
+ int mgr_command(
+ const string& name,
+ const vector<string>& cmd, const bufferlist &inbl,
+ bufferlist *outbl, string *outs);
+ int osd_command(int osd, vector<string>& cmd, const bufferlist& inbl,
+ bufferlist *poutbl, string *prs);
+ int pg_command(pg_t pgid, vector<string>& cmd, const bufferlist& inbl,
+ bufferlist *poutbl, string *prs);
+
+ void handle_log(MLog *m);
+ int monitor_log(const string& level, rados_log_callback_t cb,
+ rados_log_callback2_t cb2, void *arg);
+
+ void get();
+ bool put();
+ void blocklist_self(bool set);
+
+ std::string get_addrs() const;
+
+ int service_daemon_register(
+ const std::string& service, ///< service name (e.g., 'rgw')
+ const std::string& name, ///< daemon name (e.g., 'gwfoo')
+ const std::map<std::string,std::string>& metadata); ///< static metadata about daemon
+ int service_daemon_update_status(
+ std::map<std::string,std::string>&& status);
+
+ mon_feature_t get_required_monitor_features() const;
+
+ int get_inconsistent_pgs(int64_t pool_id, std::vector<std::string>* pgs);
+ const char** get_tracked_conf_keys() const override;
+ void handle_conf_change(const ConfigProxy& conf,
+ const std::set <std::string> &changed) override;
+};
+
+#endif
diff --git a/src/librados/RadosXattrIter.cc b/src/librados/RadosXattrIter.cc
new file mode 100644
index 000000000..f4fb39dd5
--- /dev/null
+++ b/src/librados/RadosXattrIter.cc
@@ -0,0 +1,29 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <stdlib.h>
+
+#include "RadosXattrIter.h"
+
+librados::RadosXattrsIter::RadosXattrsIter()
+ : val(NULL)
+{
+ i = attrset.end();
+}
+
+librados::RadosXattrsIter::~RadosXattrsIter()
+{
+ free(val);
+ val = NULL;
+}
diff --git a/src/librados/RadosXattrIter.h b/src/librados/RadosXattrIter.h
new file mode 100644
index 000000000..20a926140
--- /dev/null
+++ b/src/librados/RadosXattrIter.h
@@ -0,0 +1,38 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIBRADOS_XATTRITER_H
+#define CEPH_LIBRADOS_XATTRITER_H
+
+#include <string>
+#include <map>
+
+#include "include/buffer.h" // for bufferlist
+
+namespace librados {
+
+ /**
+ * iterator object used in implementation of the external
+ * attributes part of the C interface of librados
+ */
+ struct RadosXattrsIter {
+ RadosXattrsIter();
+ ~RadosXattrsIter();
+ std::map<std::string, bufferlist> attrset;
+ std::map<std::string, bufferlist>::iterator i;
+ char *val;
+ };
+};
+
+#endif
diff --git a/src/librados/librados.map b/src/librados/librados.map
new file mode 100644
index 000000000..279a0ba06
--- /dev/null
+++ b/src/librados/librados.map
@@ -0,0 +1,41 @@
+LIBRADOS_PRIVATE {
+ global:
+ extern "C++" {
+ "guard variable for boost::asio::detail::call_stack<boost::asio::detail::strand_executor_service::strand_impl, unsigned char>::top_";
+ "guard variable for boost::asio::detail::call_stack<boost::asio::detail::strand_service::strand_impl, unsigned char>::top_";
+ "guard variable for boost::asio::detail::call_stack<boost::asio::detail::thread_context, boost::asio::detail::thread_info_base>::top_";
+ "boost::asio::detail::call_stack<boost::asio::detail::strand_executor_service::strand_impl, unsigned char>::top_";
+ "boost::asio::detail::call_stack<boost::asio::detail::strand_service::strand_impl, unsigned char>::top_";
+ "boost::asio::detail::call_stack<boost::asio::detail::thread_context, boost::asio::detail::thread_info_base>::top_";
+
+ };
+ local: *;
+};
+
+LIBRADOS_14.2.0 {
+ global:
+ extern "C++" {
+ ceph::buffer::v15_2_0::*;
+ librados::v14_2_0::*;
+
+ "typeinfo for librados::v14_2_0::ObjectOperation";
+ "typeinfo name for librados::v14_2_0::ObjectOperation";
+ "vtable for librados::v14_2_0::ObjectOperation";
+
+ "typeinfo for librados::v14_2_0::ObjectReadOperation";
+ "typeinfo name for librados::v14_2_0::ObjectReadOperation";
+ "vtable for librados::v14_2_0::ObjectReadOperation";
+
+ "typeinfo for librados::v14_2_0::ObjectWriteOperation";
+ "typeinfo name for librados::v14_2_0::ObjectWriteOperation";
+ "vtable for librados::v14_2_0::ObjectWriteOperation";
+
+ "typeinfo for librados::v14_2_0::WatchCtx";
+ "typeinfo name for librados::v14_2_0::WatchCtx";
+ "vtable for librados::v14_2_0::WatchCtx";
+
+ "typeinfo for librados::v14_2_0::WatchCtx2";
+ "typeinfo name for librados::v14_2_0::WatchCtx2";
+ "vtable for librados::v14_2_0::WatchCtx2";
+ };
+} LIBRADOS_PRIVATE;
diff --git a/src/librados/librados_asio.h b/src/librados/librados_asio.h
new file mode 100644
index 000000000..c9b5ffba7
--- /dev/null
+++ b/src/librados/librados_asio.h
@@ -0,0 +1,213 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2017 Red Hat, Inc.
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ */
+
+#ifndef LIBRADOS_ASIO_H
+#define LIBRADOS_ASIO_H
+
+#include "include/rados/librados.hpp"
+#include "common/async/completion.h"
+
+/// Defines asynchronous librados operations that satisfy all of the
+/// "Requirements on asynchronous operations" imposed by the C++ Networking TS
+/// in section 13.2.7. Many of the type and variable names below are taken
+/// directly from those requirements.
+///
+/// The current draft of the Networking TS (as of 2017-11-27) is available here:
+/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/n4711.pdf
+///
+/// The boost::asio documentation duplicates these requirements here:
+/// http://www.boost.org/doc/libs/1_66_0/doc/html/boost_asio/reference/asynchronous_operations.html
+
+namespace librados {
+
+namespace detail {
+
+/// unique_ptr with custom deleter for AioCompletion
+struct AioCompletionDeleter {
+ void operator()(AioCompletion *c) { c->release(); }
+};
+using unique_aio_completion_ptr =
+ std::unique_ptr<AioCompletion, AioCompletionDeleter>;
+
+/// Invokes the given completion handler. When the type of Result is not void,
+/// storage is provided for it and that result is passed as an additional
+/// argument to the handler.
+template <typename Result>
+struct Invoker {
+ using Signature = void(boost::system::error_code, Result);
+ Result result;
+ template <typename Completion>
+ void dispatch(Completion&& completion, boost::system::error_code ec) {
+ ceph::async::dispatch(std::move(completion), ec, std::move(result));
+ }
+};
+// specialization for Result=void
+template <>
+struct Invoker<void> {
+ using Signature = void(boost::system::error_code);
+ template <typename Completion>
+ void dispatch(Completion&& completion, boost::system::error_code ec) {
+ ceph::async::dispatch(std::move(completion), ec);
+ }
+};
+
+template <typename Result>
+struct AsyncOp : Invoker<Result> {
+ unique_aio_completion_ptr aio_completion;
+
+ using Signature = typename Invoker<Result>::Signature;
+ using Completion = ceph::async::Completion<Signature, AsyncOp<Result>>;
+
+ static void aio_dispatch(completion_t cb, void *arg) {
+ // reclaim ownership of the completion
+ auto p = std::unique_ptr<Completion>{static_cast<Completion*>(arg)};
+ // move result out of Completion memory being freed
+ auto op = std::move(p->user_data);
+ const int ret = op.aio_completion->get_return_value();
+ boost::system::error_code ec;
+ if (ret < 0) {
+ ec.assign(-ret, boost::system::system_category());
+ }
+ op.dispatch(std::move(p), ec);
+ }
+
+ template <typename Executor1, typename CompletionHandler>
+ static auto create(const Executor1& ex1, CompletionHandler&& handler) {
+ auto p = Completion::create(ex1, std::move(handler));
+ p->user_data.aio_completion.reset(
+ Rados::aio_create_completion(p.get(), aio_dispatch));
+ return p;
+ }
+};
+
+} // namespace detail
+
+
+/// Calls IoCtx::aio_read() and arranges for the AioCompletion to call a
+/// given handler with signature (boost::system::error_code, bufferlist).
+template <typename ExecutionContext, typename CompletionToken>
+auto async_read(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
+ size_t len, uint64_t off, CompletionToken&& token)
+{
+ using Op = detail::AsyncOp<bufferlist>;
+ using Signature = typename Op::Signature;
+ boost::asio::async_completion<CompletionToken, Signature> init(token);
+ auto p = Op::create(ctx.get_executor(), init.completion_handler);
+ auto& op = p->user_data;
+
+ int ret = io.aio_read(oid, op.aio_completion.get(), &op.result, len, off);
+ if (ret < 0) {
+ auto ec = boost::system::error_code{-ret, boost::system::system_category()};
+ ceph::async::post(std::move(p), ec, bufferlist{});
+ } else {
+ p.release(); // release ownership until completion
+ }
+ return init.result.get();
+}
+
+/// Calls IoCtx::aio_write() and arranges for the AioCompletion to call a
+/// given handler with signature (boost::system::error_code).
+template <typename ExecutionContext, typename CompletionToken>
+auto async_write(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
+ bufferlist &bl, size_t len, uint64_t off,
+ CompletionToken&& token)
+{
+ using Op = detail::AsyncOp<void>;
+ using Signature = typename Op::Signature;
+ boost::asio::async_completion<CompletionToken, Signature> init(token);
+ auto p = Op::create(ctx.get_executor(), init.completion_handler);
+ auto& op = p->user_data;
+
+ int ret = io.aio_write(oid, op.aio_completion.get(), bl, len, off);
+ if (ret < 0) {
+ auto ec = boost::system::error_code{-ret, boost::system::system_category()};
+ ceph::async::post(std::move(p), ec);
+ } else {
+ p.release(); // release ownership until completion
+ }
+ return init.result.get();
+}
+
+/// Calls IoCtx::aio_operate() and arranges for the AioCompletion to call a
+/// given handler with signature (boost::system::error_code, bufferlist).
+template <typename ExecutionContext, typename CompletionToken>
+auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
+ ObjectReadOperation *read_op, int flags,
+ CompletionToken&& token)
+{
+ using Op = detail::AsyncOp<bufferlist>;
+ using Signature = typename Op::Signature;
+ boost::asio::async_completion<CompletionToken, Signature> init(token);
+ auto p = Op::create(ctx.get_executor(), init.completion_handler);
+ auto& op = p->user_data;
+
+ int ret = io.aio_operate(oid, op.aio_completion.get(), read_op,
+ flags, &op.result);
+ if (ret < 0) {
+ auto ec = boost::system::error_code{-ret, boost::system::system_category()};
+ ceph::async::post(std::move(p), ec, bufferlist{});
+ } else {
+ p.release(); // release ownership until completion
+ }
+ return init.result.get();
+}
+
+/// Calls IoCtx::aio_operate() and arranges for the AioCompletion to call a
+/// given handler with signature (boost::system::error_code).
+template <typename ExecutionContext, typename CompletionToken>
+auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
+ ObjectWriteOperation *write_op, int flags,
+ CompletionToken &&token)
+{
+ using Op = detail::AsyncOp<void>;
+ using Signature = typename Op::Signature;
+ boost::asio::async_completion<CompletionToken, Signature> init(token);
+ auto p = Op::create(ctx.get_executor(), init.completion_handler);
+ auto& op = p->user_data;
+
+ int ret = io.aio_operate(oid, op.aio_completion.get(), write_op, flags);
+ if (ret < 0) {
+ auto ec = boost::system::error_code{-ret, boost::system::system_category()};
+ ceph::async::post(std::move(p), ec);
+ } else {
+ p.release(); // release ownership until completion
+ }
+ return init.result.get();
+}
+
+/// Calls IoCtx::aio_notify() and arranges for the AioCompletion to call a
+/// given handler with signature (boost::system::error_code, bufferlist).
+template <typename ExecutionContext, typename CompletionToken>
+auto async_notify(ExecutionContext& ctx, IoCtx& io, const std::string& oid,
+ bufferlist& bl, uint64_t timeout_ms, CompletionToken &&token)
+{
+ using Op = detail::AsyncOp<bufferlist>;
+ using Signature = typename Op::Signature;
+ boost::asio::async_completion<CompletionToken, Signature> init(token);
+ auto p = Op::create(ctx.get_executor(), init.completion_handler);
+ auto& op = p->user_data;
+
+ int ret = io.aio_notify(oid, op.aio_completion.get(),
+ bl, timeout_ms, &op.result);
+ if (ret < 0) {
+ auto ec = boost::system::error_code{-ret, boost::system::system_category()};
+ ceph::async::post(std::move(p), ec, bufferlist{});
+ } else {
+ p.release(); // release ownership until completion
+ }
+ return init.result.get();
+}
+
+} // namespace librados
+
+#endif // LIBRADOS_ASIO_H
diff --git a/src/librados/librados_c.cc b/src/librados/librados_c.cc
new file mode 100644
index 000000000..6448cd6a8
--- /dev/null
+++ b/src/librados/librados_c.cc
@@ -0,0 +1,4611 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <limits.h>
+
+#include "acconfig.h"
+#include "common/config.h"
+#include "common/errno.h"
+#include "common/ceph_argparse.h"
+#include "common/ceph_json.h"
+#include "common/common_init.h"
+#include "common/TracepointProvider.h"
+#include "common/hobject.h"
+#include "common/async/waiter.h"
+#include "include/rados/librados.h"
+#include "include/types.h"
+#include <include/stringify.h>
+
+#include "librados/librados_c.h"
+#include "librados/AioCompletionImpl.h"
+#include "librados/IoCtxImpl.h"
+#include "librados/PoolAsyncCompletionImpl.h"
+#include "librados/RadosClient.h"
+#include "librados/RadosXattrIter.h"
+#include "librados/ListObjectImpl.h"
+#include "librados/librados_util.h"
+#include <cls/lock/cls_lock_client.h>
+
+#include <string>
+#include <map>
+#include <set>
+#include <vector>
+#include <list>
+#include <stdexcept>
+
+#ifdef WITH_LTTNG
+#define TRACEPOINT_DEFINE
+#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#include "tracing/librados.h"
+#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#undef TRACEPOINT_DEFINE
+#else
+#define tracepoint(...)
+#endif
+
+#if defined(HAVE_ASM_SYMVER) || defined(HAVE_ATTR_SYMVER)
+// prefer __attribute__() over global asm(".symver"). because the latter
+// is not parsed by the compiler and is partitioned away by GCC if
+// lto-partitions is enabled, in other words, these asm() statements
+// are dropped by the -flto option by default. the way to address it is
+// to use __attribute__. so this information can be processed by the
+// C compiler, and be preserved after LTO partitions the code
+#ifdef HAVE_ATTR_SYMVER
+#define LIBRADOS_C_API_BASE(fn) \
+ extern __typeof (_##fn##_base) _##fn##_base __attribute__((__symver__ (#fn "@")))
+#define LIBRADOS_C_API_BASE_DEFAULT(fn) \
+ extern __typeof (_##fn) _##fn __attribute__((__symver__ (#fn "@@")))
+#define LIBRADOS_C_API_DEFAULT(fn, ver) \
+ extern __typeof (_##fn) _##fn __attribute__((__symver__ (#fn "@@LIBRADOS_" #ver)))
+#else
+#define LIBRADOS_C_API_BASE(fn) \
+ asm(".symver _" #fn "_base, " #fn "@")
+#define LIBRADOS_C_API_BASE_DEFAULT(fn) \
+ asm(".symver _" #fn ", " #fn "@@")
+#define LIBRADOS_C_API_DEFAULT(fn, ver) \
+ asm(".symver _" #fn ", " #fn "@@LIBRADOS_" #ver)
+#endif
+
+#define LIBRADOS_C_API_BASE_F(fn) _ ## fn ## _base
+#define LIBRADOS_C_API_DEFAULT_F(fn) _ ## fn
+
+#else
+#define LIBRADOS_C_API_BASE(fn)
+#define LIBRADOS_C_API_BASE_DEFAULT(fn)
+#define LIBRADOS_C_API_DEFAULT(fn, ver)
+
+#define LIBRADOS_C_API_BASE_F(fn) _ ## fn ## _base
+// There shouldn't be multiple default versions of the same
+// function.
+#define LIBRADOS_C_API_DEFAULT_F(fn) fn
+#endif
+
+using std::string;
+using std::map;
+using std::set;
+using std::vector;
+using std::list;
+
+#define dout_subsys ceph_subsys_rados
+#undef dout_prefix
+#define dout_prefix *_dout << "librados: "
+
+#define RADOS_LIST_MAX_ENTRIES 1024
+
+static TracepointProvider::Traits tracepoint_traits("librados_tp.so", "rados_tracing");
+
+/*
+ * Structure of this file
+ *
+ * RadosClient and the related classes are the internal implementation of librados.
+ * Above that layer sits the C API, found in include/rados/librados.h, and
+ * the C++ API, found in include/rados/librados.hpp
+ *
+ * The C++ API sometimes implements things in terms of the C API.
+ * Both the C++ and C API rely on RadosClient.
+ *
+ * Visually:
+ * +--------------------------------------+
+ * | C++ API |
+ * +--------------------+ |
+ * | C API | |
+ * +--------------------+-----------------+
+ * | RadosClient |
+ * +--------------------------------------+
+ */
+
+///////////////////////////// C API //////////////////////////////
+
+static CephContext *rados_create_cct(
+ const char * const clustername,
+ CephInitParameters *iparams)
+{
+ // missing things compared to global_init:
+ // g_ceph_context, g_conf, g_lockdep, signal handlers
+ CephContext *cct = common_preinit(*iparams, CODE_ENVIRONMENT_LIBRARY, 0);
+ if (clustername)
+ cct->_conf->cluster = clustername;
+ cct->_conf.parse_env(cct->get_module_type()); // environment variables override
+ cct->_conf.apply_changes(nullptr);
+
+ TracepointProvider::initialize<tracepoint_traits>(cct);
+ return cct;
+}
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_create)(
+ rados_t *pcluster,
+ const char * const id)
+{
+ CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT);
+ if (id) {
+ iparams.name.set(CEPH_ENTITY_TYPE_CLIENT, id);
+ }
+ CephContext *cct = rados_create_cct("", &iparams);
+
+ tracepoint(librados, rados_create_enter, id);
+ *pcluster = reinterpret_cast<rados_t>(new librados::RadosClient(cct));
+ tracepoint(librados, rados_create_exit, 0, *pcluster);
+
+ cct->put();
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_create);
+
+// as above, but
+// 1) don't assume 'client.'; name is a full type.id namestr
+// 2) allow setting clustername
+// 3) flags is for future expansion (maybe some of the global_init()
+// behavior is appropriate for some consumers of librados, for instance)
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_create2)(
+ rados_t *pcluster,
+ const char *const clustername,
+ const char * const name,
+ uint64_t flags)
+{
+ // client is assumed, but from_str will override
+ int retval = 0;
+ CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT);
+ if (!name || !iparams.name.from_str(name)) {
+ retval = -EINVAL;
+ }
+
+ CephContext *cct = rados_create_cct(clustername, &iparams);
+ tracepoint(librados, rados_create2_enter, clustername, name, flags);
+ if (retval == 0) {
+ *pcluster = reinterpret_cast<rados_t>(new librados::RadosClient(cct));
+ }
+ tracepoint(librados, rados_create2_exit, retval, *pcluster);
+
+ cct->put();
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_create2);
+
+/* This function is intended for use by Ceph daemons. These daemons have
+ * already called global_init and want to use that particular configuration for
+ * their cluster.
+ */
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_create_with_context)(
+ rados_t *pcluster,
+ rados_config_t cct_)
+{
+ CephContext *cct = (CephContext *)cct_;
+ TracepointProvider::initialize<tracepoint_traits>(cct);
+
+ tracepoint(librados, rados_create_with_context_enter, cct_);
+ librados::RadosClient *radosp = new librados::RadosClient(cct);
+ *pcluster = (void *)radosp;
+ tracepoint(librados, rados_create_with_context_exit, 0, *pcluster);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_create_with_context);
+
+extern "C" rados_config_t LIBRADOS_C_API_DEFAULT_F(rados_cct)(rados_t cluster)
+{
+ tracepoint(librados, rados_cct_enter, cluster);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ rados_config_t retval = (rados_config_t)client->cct;
+ tracepoint(librados, rados_cct_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_cct);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_connect)(rados_t cluster)
+{
+ tracepoint(librados, rados_connect_enter, cluster);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ int retval = client->connect();
+ tracepoint(librados, rados_connect_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_connect);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_shutdown)(rados_t cluster)
+{
+ tracepoint(librados, rados_shutdown_enter, cluster);
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ radosp->shutdown();
+ delete radosp;
+ tracepoint(librados, rados_shutdown_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_shutdown);
+
+extern "C" uint64_t LIBRADOS_C_API_DEFAULT_F(rados_get_instance_id)(
+ rados_t cluster)
+{
+ tracepoint(librados, rados_get_instance_id_enter, cluster);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ uint64_t retval = client->get_instance_id();
+ tracepoint(librados, rados_get_instance_id_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_get_instance_id);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_get_min_compatible_osd)(
+ rados_t cluster,
+ int8_t* require_osd_release)
+{
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ return client->get_min_compatible_osd(require_osd_release);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_get_min_compatible_osd);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_get_min_compatible_client)(
+ rados_t cluster,
+ int8_t* min_compat_client,
+ int8_t* require_min_compat_client)
+{
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ return client->get_min_compatible_client(min_compat_client,
+ require_min_compat_client);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_get_min_compatible_client);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_version)(
+ int *major, int *minor, int *extra)
+{
+ tracepoint(librados, rados_version_enter, major, minor, extra);
+ if (major)
+ *major = LIBRADOS_VER_MAJOR;
+ if (minor)
+ *minor = LIBRADOS_VER_MINOR;
+ if (extra)
+ *extra = LIBRADOS_VER_EXTRA;
+ tracepoint(librados, rados_version_exit, LIBRADOS_VER_MAJOR, LIBRADOS_VER_MINOR, LIBRADOS_VER_EXTRA);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_version);
+
+
+// -- config --
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_read_file)(
+ rados_t cluster,
+ const char *path_list)
+{
+ tracepoint(librados, rados_conf_read_file_enter, cluster, path_list);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ auto& conf = client->cct->_conf;
+ ostringstream warnings;
+ int ret = conf.parse_config_files(path_list, &warnings, 0);
+ if (ret) {
+ if (warnings.tellp() > 0)
+ lderr(client->cct) << warnings.str() << dendl;
+ client->cct->_conf.complain_about_parse_error(client->cct);
+ tracepoint(librados, rados_conf_read_file_exit, ret);
+ return ret;
+ }
+ conf.parse_env(client->cct->get_module_type()); // environment variables override
+
+ conf.apply_changes(nullptr);
+ client->cct->_conf.complain_about_parse_error(client->cct);
+ tracepoint(librados, rados_conf_read_file_exit, 0);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_conf_read_file);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_parse_argv)(
+ rados_t cluster,
+ int argc,
+ const char **argv)
+{
+ tracepoint(librados, rados_conf_parse_argv_enter, cluster, argc);
+ int i;
+ for(i = 0; i < argc; i++) {
+ tracepoint(librados, rados_conf_parse_argv_arg, argv[i]);
+ }
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ auto& conf = client->cct->_conf;
+ vector<const char*> args;
+ argv_to_vec(argc, argv, args);
+ int ret = conf.parse_argv(args);
+ if (ret) {
+ tracepoint(librados, rados_conf_parse_argv_exit, ret);
+ return ret;
+ }
+ conf.apply_changes(nullptr);
+ tracepoint(librados, rados_conf_parse_argv_exit, 0);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_conf_parse_argv);
+
+// like above, but return the remainder of argv to contain remaining
+// unparsed args. Must be allocated to at least argc by caller.
+// remargv will contain n <= argc pointers to original argv[], the end
+// of which may be NULL
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_parse_argv_remainder)(
+ rados_t cluster, int argc,
+ const char **argv,
+ const char **remargv)
+{
+ tracepoint(librados, rados_conf_parse_argv_remainder_enter, cluster, argc);
+ unsigned int i;
+ for(i = 0; i < (unsigned int) argc; i++) {
+ tracepoint(librados, rados_conf_parse_argv_remainder_arg, argv[i]);
+ }
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ auto& conf = client->cct->_conf;
+ vector<const char*> args;
+ for (int i=0; i<argc; i++)
+ args.push_back(argv[i]);
+ int ret = conf.parse_argv(args);
+ if (ret) {
+ tracepoint(librados, rados_conf_parse_argv_remainder_exit, ret);
+ return ret;
+ }
+ conf.apply_changes(NULL);
+ ceph_assert(args.size() <= (unsigned int)argc);
+ for (i = 0; i < (unsigned int)argc; ++i) {
+ if (i < args.size())
+ remargv[i] = args[i];
+ else
+ remargv[i] = (const char *)NULL;
+ tracepoint(librados, rados_conf_parse_argv_remainder_remarg, remargv[i]);
+ }
+ tracepoint(librados, rados_conf_parse_argv_remainder_exit, 0);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_conf_parse_argv_remainder);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_parse_env)(
+ rados_t cluster, const char *env)
+{
+ tracepoint(librados, rados_conf_parse_env_enter, cluster, env);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ auto& conf = client->cct->_conf;
+ conf.parse_env(client->cct->get_module_type(), env);
+ conf.apply_changes(nullptr);
+ tracepoint(librados, rados_conf_parse_env_exit, 0);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_conf_parse_env);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_set)(
+ rados_t cluster,
+ const char *option,
+ const char *value)
+{
+ tracepoint(librados, rados_conf_set_enter, cluster, option, value);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ auto& conf = client->cct->_conf;
+ int ret = conf.set_val(option, value);
+ if (ret) {
+ tracepoint(librados, rados_conf_set_exit, ret);
+ return ret;
+ }
+ conf.apply_changes(nullptr);
+ tracepoint(librados, rados_conf_set_exit, 0);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_conf_set);
+
+/* cluster info */
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cluster_stat)(
+ rados_t cluster,
+ rados_cluster_stat_t *result)
+{
+ tracepoint(librados, rados_cluster_stat_enter, cluster);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+
+ ceph_statfs stats;
+ int r = client->get_fs_stats(stats);
+ result->kb = stats.kb;
+ result->kb_used = stats.kb_used;
+ result->kb_avail = stats.kb_avail;
+ result->num_objects = stats.num_objects;
+ tracepoint(librados, rados_cluster_stat_exit, r, result->kb, result->kb_used, result->kb_avail, result->num_objects);
+ return r;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_cluster_stat);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_get)(
+ rados_t cluster,
+ const char *option,
+ char *buf, size_t len)
+{
+ tracepoint(librados, rados_conf_get_enter, cluster, option, len);
+ char *tmp = buf;
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ const auto& conf = client->cct->_conf;
+ int retval = conf.get_val(option, &tmp, len);
+ tracepoint(librados, rados_conf_get_exit, retval, retval ? "" : option);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_conf_get);
+
+extern "C" int64_t LIBRADOS_C_API_DEFAULT_F(rados_pool_lookup)(
+ rados_t cluster,
+ const char *name)
+{
+ tracepoint(librados, rados_pool_lookup_enter, cluster, name);
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ int64_t retval = radosp->lookup_pool(name);
+ tracepoint(librados, rados_pool_lookup_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pool_lookup);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_reverse_lookup)(
+ rados_t cluster,
+ int64_t id,
+ char *buf,
+ size_t maxlen)
+{
+ tracepoint(librados, rados_pool_reverse_lookup_enter, cluster, id, maxlen);
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ std::string name;
+ int r = radosp->pool_get_name(id, &name, true);
+ if (r < 0) {
+ tracepoint(librados, rados_pool_reverse_lookup_exit, r, "");
+ return r;
+ }
+ if (name.length() >= maxlen) {
+ tracepoint(librados, rados_pool_reverse_lookup_exit, -ERANGE, "");
+ return -ERANGE;
+ }
+ strcpy(buf, name.c_str());
+ int retval = name.length();
+ tracepoint(librados, rados_pool_reverse_lookup_exit, retval, buf);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pool_reverse_lookup);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cluster_fsid)(
+ rados_t cluster,
+ char *buf,
+ size_t maxlen)
+{
+ tracepoint(librados, rados_cluster_fsid_enter, cluster, maxlen);
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ std::string fsid;
+ radosp->get_fsid(&fsid);
+ if (fsid.length() >= maxlen) {
+ tracepoint(librados, rados_cluster_fsid_exit, -ERANGE, "");
+ return -ERANGE;
+ }
+ strcpy(buf, fsid.c_str());
+ int retval = fsid.length();
+ tracepoint(librados, rados_cluster_fsid_exit, retval, buf);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_cluster_fsid);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_wait_for_latest_osdmap)(
+ rados_t cluster)
+{
+ tracepoint(librados, rados_wait_for_latest_osdmap_enter, cluster);
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ int retval = radosp->wait_for_latest_osdmap();
+ tracepoint(librados, rados_wait_for_latest_osdmap_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_wait_for_latest_osdmap);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_blocklist_add)(
+ rados_t cluster,
+ char *client_address,
+ uint32_t expire_seconds)
+{
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ return radosp->blocklist_add(client_address, expire_seconds);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_blocklist_add);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_blacklist_add)(
+ rados_t cluster,
+ char *client_address,
+ uint32_t expire_seconds)
+{
+ return LIBRADOS_C_API_DEFAULT_F(rados_blocklist_add)(
+ cluster, client_address, expire_seconds);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_blacklist_add);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_getaddrs)(
+ rados_t cluster,
+ char** addrs)
+{
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ auto s = radosp->get_addrs();
+ *addrs = strdup(s.c_str());
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_getaddrs);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_set_osdmap_full_try)(
+ rados_ioctx_t io)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ ctx->extra_op_flags |= CEPH_OSD_FLAG_FULL_TRY;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_set_osdmap_full_try);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_unset_osdmap_full_try)(
+ rados_ioctx_t io)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ ctx->extra_op_flags &= ~CEPH_OSD_FLAG_FULL_TRY;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_unset_osdmap_full_try);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_set_pool_full_try)(
+ rados_ioctx_t io)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ ctx->extra_op_flags |= CEPH_OSD_FLAG_FULL_TRY;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_set_pool_full_try);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_unset_pool_full_try)(
+ rados_ioctx_t io)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ ctx->extra_op_flags &= ~CEPH_OSD_FLAG_FULL_TRY;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_unset_pool_full_try);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_enable)(
+ rados_ioctx_t io,
+ const char *app_name,
+ int force)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ return ctx->application_enable(app_name, force != 0);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_application_enable);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_list)(
+ rados_ioctx_t io,
+ char *values,
+ size_t *values_len)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ std::set<std::string> app_names;
+ int r = ctx->application_list(&app_names);
+ if (r < 0) {
+ return r;
+ }
+
+ size_t total_len = 0;
+ for (auto app_name : app_names) {
+ total_len += app_name.size() + 1;
+ }
+
+ if (*values_len < total_len) {
+ *values_len = total_len;
+ return -ERANGE;
+ }
+
+ char *values_p = values;
+ for (auto app_name : app_names) {
+ size_t len = app_name.size() + 1;
+ strncpy(values_p, app_name.c_str(), len);
+ values_p += len;
+ }
+ *values_p = '\0';
+ *values_len = total_len;
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_application_list);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_metadata_get)(
+ rados_ioctx_t io,
+ const char *app_name,
+ const char *key,
+ char *value,
+ size_t *value_len)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ std::string value_str;
+ int r = ctx->application_metadata_get(app_name, key, &value_str);
+ if (r < 0) {
+ return r;
+ }
+
+ size_t len = value_str.size() + 1;
+ if (*value_len < len) {
+ *value_len = len;
+ return -ERANGE;
+ }
+
+ strncpy(value, value_str.c_str(), len);
+ *value_len = len;
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_application_metadata_get);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_metadata_set)(
+ rados_ioctx_t io,
+ const char *app_name,
+ const char *key,
+ const char *value)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ return ctx->application_metadata_set(app_name, key, value);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_application_metadata_set);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_metadata_remove)(
+ rados_ioctx_t io,
+ const char *app_name,
+ const char *key)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ return ctx->application_metadata_remove(app_name, key);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_application_metadata_remove);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_metadata_list)(
+ rados_ioctx_t io,
+ const char *app_name,
+ char *keys, size_t *keys_len,
+ char *values, size_t *vals_len)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ std::map<std::string, std::string> metadata;
+ int r = ctx->application_metadata_list(app_name, &metadata);
+ if (r < 0) {
+ return r;
+ }
+
+ size_t total_key_len = 0;
+ size_t total_val_len = 0;
+ for (auto pair : metadata) {
+ total_key_len += pair.first.size() + 1;
+ total_val_len += pair.second.size() + 1;
+ }
+
+ if (*keys_len < total_key_len || *vals_len < total_val_len) {
+ *keys_len = total_key_len;
+ *vals_len = total_val_len;
+ return -ERANGE;
+ }
+
+ char *keys_p = keys;
+ char *vals_p = values;
+ for (auto pair : metadata) {
+ size_t key_len = pair.first.size() + 1;
+ strncpy(keys_p, pair.first.c_str(), key_len);
+ keys_p += key_len;
+
+ size_t val_len = pair.second.size() + 1;
+ strncpy(vals_p, pair.second.c_str(), val_len);
+ vals_p += val_len;
+ }
+ *keys_p = '\0';
+ *keys_len = total_key_len;
+
+ *vals_p = '\0';
+ *vals_len = total_val_len;
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_application_metadata_list);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_list)(
+ rados_t cluster,
+ char *buf,
+ size_t len)
+{
+ tracepoint(librados, rados_pool_list_enter, cluster, len);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ std::list<std::pair<int64_t, std::string> > pools;
+ int r = client->pool_list(pools);
+ if (r < 0) {
+ tracepoint(librados, rados_pool_list_exit, r);
+ return r;
+ }
+
+ if (len > 0 && !buf) {
+ tracepoint(librados, rados_pool_list_exit, -EINVAL);
+ return -EINVAL;
+ }
+
+ char *b = buf;
+ if (b) {
+ // FIPS zeroization audit 20191116: this memset is not security related.
+ memset(b, 0, len);
+ }
+ int needed = 0;
+ std::list<std::pair<int64_t, std::string> >::const_iterator i = pools.begin();
+ std::list<std::pair<int64_t, std::string> >::const_iterator p_end =
+ pools.end();
+ for (; i != p_end; ++i) {
+ int rl = i->second.length() + 1;
+ if (len < (unsigned)rl)
+ break;
+ const char* pool = i->second.c_str();
+ tracepoint(librados, rados_pool_list_pool, pool);
+ if (b) {
+ strncat(b, pool, rl);
+ b += rl;
+ }
+ needed += rl;
+ len -= rl;
+ }
+ for (; i != p_end; ++i) {
+ int rl = i->second.length() + 1;
+ needed += rl;
+ }
+ int retval = needed + 1;
+ tracepoint(librados, rados_pool_list_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pool_list);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_inconsistent_pg_list)(
+ rados_t cluster,
+ int64_t pool_id,
+ char *buf,
+ size_t len)
+{
+ tracepoint(librados, rados_inconsistent_pg_list_enter, cluster, pool_id, len);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ std::vector<std::string> pgs;
+ if (int r = client->get_inconsistent_pgs(pool_id, &pgs); r < 0) {
+ tracepoint(librados, rados_inconsistent_pg_list_exit, r);
+ return r;
+ }
+
+ if (len > 0 && !buf) {
+ tracepoint(librados, rados_inconsistent_pg_list_exit, -EINVAL);
+ return -EINVAL;
+ }
+
+ char *b = buf;
+ if (b) {
+ // FIPS zeroization audit 20191116: this memset is not security related.
+ memset(b, 0, len);
+ }
+ int needed = 0;
+ for (const auto& s : pgs) {
+ unsigned rl = s.length() + 1;
+ if (b && len >= rl) {
+ tracepoint(librados, rados_inconsistent_pg_list_pg, s.c_str());
+ strncat(b, s.c_str(), rl);
+ b += rl;
+ len -= rl;
+ }
+ needed += rl;
+ }
+ int retval = needed + 1;
+ tracepoint(librados, rados_inconsistent_pg_list_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_inconsistent_pg_list);
+
+
+static void dict_to_map(const char *dict,
+ std::map<std::string, std::string>* dict_map)
+{
+ while (*dict != '\0') {
+ const char* key = dict;
+ dict += strlen(key) + 1;
+ const char* value = dict;
+ dict += strlen(value) + 1;
+ (*dict_map)[key] = value;
+ }
+}
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_service_register)(
+ rados_t cluster,
+ const char *service,
+ const char *daemon,
+ const char *metadata_dict)
+{
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+
+ std::map<std::string, std::string> metadata;
+ dict_to_map(metadata_dict, &metadata);
+
+ return client->service_daemon_register(service, daemon, metadata);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_service_register);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_service_update_status)(
+ rados_t cluster,
+ const char *status_dict)
+{
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+
+ std::map<std::string, std::string> status;
+ dict_to_map(status_dict, &status);
+
+ return client->service_daemon_update_status(std::move(status));
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_service_update_status);
+
+static void do_out_buffer(bufferlist& outbl, char **outbuf, size_t *outbuflen)
+{
+ if (outbuf) {
+ if (outbl.length() > 0) {
+ *outbuf = (char *)malloc(outbl.length());
+ memcpy(*outbuf, outbl.c_str(), outbl.length());
+ } else {
+ *outbuf = NULL;
+ }
+ }
+ if (outbuflen)
+ *outbuflen = outbl.length();
+}
+
+static void do_out_buffer(string& outbl, char **outbuf, size_t *outbuflen)
+{
+ if (outbuf) {
+ if (outbl.length() > 0) {
+ *outbuf = (char *)malloc(outbl.length());
+ memcpy(*outbuf, outbl.c_str(), outbl.length());
+ } else {
+ *outbuf = NULL;
+ }
+ }
+ if (outbuflen)
+ *outbuflen = outbl.length();
+}
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ping_monitor)(
+ rados_t cluster,
+ const char *mon_id,
+ char **outstr,
+ size_t *outstrlen)
+{
+ tracepoint(librados, rados_ping_monitor_enter, cluster, mon_id);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ string str;
+
+ if (!mon_id) {
+ tracepoint(librados, rados_ping_monitor_exit, -EINVAL, NULL, NULL);
+ return -EINVAL;
+ }
+
+ int ret = client->ping_monitor(mon_id, &str);
+ if (ret == 0) {
+ do_out_buffer(str, outstr, outstrlen);
+ }
+ tracepoint(librados, rados_ping_monitor_exit, ret, ret < 0 ? NULL : outstr, ret < 0 ? NULL : outstrlen);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ping_monitor);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_mon_command)(
+ rados_t cluster,
+ const char **cmd, size_t cmdlen,
+ const char *inbuf, size_t inbuflen,
+ char **outbuf, size_t *outbuflen,
+ char **outs, size_t *outslen)
+{
+ tracepoint(librados, rados_mon_command_enter, cluster, cmdlen, inbuf, inbuflen);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ bufferlist inbl;
+ bufferlist outbl;
+ string outstring;
+ vector<string> cmdvec;
+
+ for (size_t i = 0; i < cmdlen; i++) {
+ tracepoint(librados, rados_mon_command_cmd, cmd[i]);
+ cmdvec.push_back(cmd[i]);
+ }
+
+ inbl.append(inbuf, inbuflen);
+ int ret = client->mon_command(cmdvec, inbl, &outbl, &outstring);
+
+ do_out_buffer(outbl, outbuf, outbuflen);
+ do_out_buffer(outstring, outs, outslen);
+ tracepoint(librados, rados_mon_command_exit, ret, outbuf, outbuflen, outs, outslen);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_mon_command);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_mon_command_target)(
+ rados_t cluster,
+ const char *name,
+ const char **cmd, size_t cmdlen,
+ const char *inbuf, size_t inbuflen,
+ char **outbuf, size_t *outbuflen,
+ char **outs, size_t *outslen)
+{
+ tracepoint(librados, rados_mon_command_target_enter, cluster, name, cmdlen, inbuf, inbuflen);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ bufferlist inbl;
+ bufferlist outbl;
+ string outstring;
+ vector<string> cmdvec;
+
+ // is this a numeric id?
+ char *endptr;
+ errno = 0;
+ long rank = strtol(name, &endptr, 10);
+ if ((errno == ERANGE && (rank == LONG_MAX || rank == LONG_MIN)) ||
+ (errno != 0 && rank == 0) ||
+ endptr == name || // no digits
+ *endptr != '\0') { // extra characters
+ rank = -1;
+ }
+
+ for (size_t i = 0; i < cmdlen; i++) {
+ tracepoint(librados, rados_mon_command_target_cmd, cmd[i]);
+ cmdvec.push_back(cmd[i]);
+ }
+
+ inbl.append(inbuf, inbuflen);
+ int ret;
+ if (rank >= 0)
+ ret = client->mon_command(rank, cmdvec, inbl, &outbl, &outstring);
+ else
+ ret = client->mon_command(name, cmdvec, inbl, &outbl, &outstring);
+
+ do_out_buffer(outbl, outbuf, outbuflen);
+ do_out_buffer(outstring, outs, outslen);
+ tracepoint(librados, rados_mon_command_target_exit, ret, outbuf, outbuflen, outs, outslen);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_mon_command_target);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_osd_command)(
+ rados_t cluster, int osdid, const char **cmd,
+ size_t cmdlen,
+ const char *inbuf, size_t inbuflen,
+ char **outbuf, size_t *outbuflen,
+ char **outs, size_t *outslen)
+{
+ tracepoint(librados, rados_osd_command_enter, cluster, osdid, cmdlen, inbuf, inbuflen);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ bufferlist inbl;
+ bufferlist outbl;
+ string outstring;
+ vector<string> cmdvec;
+
+ for (size_t i = 0; i < cmdlen; i++) {
+ tracepoint(librados, rados_osd_command_cmd, cmd[i]);
+ cmdvec.push_back(cmd[i]);
+ }
+
+ inbl.append(inbuf, inbuflen);
+ int ret = client->osd_command(osdid, cmdvec, inbl, &outbl, &outstring);
+
+ do_out_buffer(outbl, outbuf, outbuflen);
+ do_out_buffer(outstring, outs, outslen);
+ tracepoint(librados, rados_osd_command_exit, ret, outbuf, outbuflen, outs, outslen);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_osd_command);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_mgr_command)(
+ rados_t cluster, const char **cmd,
+ size_t cmdlen,
+ const char *inbuf, size_t inbuflen,
+ char **outbuf, size_t *outbuflen,
+ char **outs, size_t *outslen)
+{
+ tracepoint(librados, rados_mgr_command_enter, cluster, cmdlen, inbuf,
+ inbuflen);
+
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ bufferlist inbl;
+ bufferlist outbl;
+ string outstring;
+ vector<string> cmdvec;
+
+ for (size_t i = 0; i < cmdlen; i++) {
+ tracepoint(librados, rados_mgr_command_cmd, cmd[i]);
+ cmdvec.push_back(cmd[i]);
+ }
+
+ inbl.append(inbuf, inbuflen);
+ int ret = client->mgr_command(cmdvec, inbl, &outbl, &outstring);
+
+ do_out_buffer(outbl, outbuf, outbuflen);
+ do_out_buffer(outstring, outs, outslen);
+ tracepoint(librados, rados_mgr_command_exit, ret, outbuf, outbuflen, outs,
+ outslen);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_mgr_command);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_mgr_command_target)(
+ rados_t cluster,
+ const char *name,
+ const char **cmd,
+ size_t cmdlen,
+ const char *inbuf, size_t inbuflen,
+ char **outbuf, size_t *outbuflen,
+ char **outs, size_t *outslen)
+{
+ tracepoint(librados, rados_mgr_command_target_enter, cluster, name, cmdlen,
+ inbuf, inbuflen);
+
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ bufferlist inbl;
+ bufferlist outbl;
+ string outstring;
+ vector<string> cmdvec;
+
+ for (size_t i = 0; i < cmdlen; i++) {
+ tracepoint(librados, rados_mgr_command_target_cmd, cmd[i]);
+ cmdvec.push_back(cmd[i]);
+ }
+
+ inbl.append(inbuf, inbuflen);
+ int ret = client->mgr_command(name, cmdvec, inbl, &outbl, &outstring);
+
+ do_out_buffer(outbl, outbuf, outbuflen);
+ do_out_buffer(outstring, outs, outslen);
+ tracepoint(librados, rados_mgr_command_target_exit, ret, outbuf, outbuflen,
+ outs, outslen);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_mgr_command_target);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pg_command)(
+ rados_t cluster, const char *pgstr,
+ const char **cmd, size_t cmdlen,
+ const char *inbuf, size_t inbuflen,
+ char **outbuf, size_t *outbuflen,
+ char **outs, size_t *outslen)
+{
+ tracepoint(librados, rados_pg_command_enter, cluster, pgstr, cmdlen, inbuf, inbuflen);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ bufferlist inbl;
+ bufferlist outbl;
+ string outstring;
+ pg_t pgid;
+ vector<string> cmdvec;
+
+ for (size_t i = 0; i < cmdlen; i++) {
+ tracepoint(librados, rados_pg_command_cmd, cmd[i]);
+ cmdvec.push_back(cmd[i]);
+ }
+
+ inbl.append(inbuf, inbuflen);
+ if (!pgid.parse(pgstr))
+ return -EINVAL;
+
+ int ret = client->pg_command(pgid, cmdvec, inbl, &outbl, &outstring);
+
+ do_out_buffer(outbl, outbuf, outbuflen);
+ do_out_buffer(outstring, outs, outslen);
+ tracepoint(librados, rados_pg_command_exit, ret, outbuf, outbuflen, outs, outslen);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pg_command);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_buffer_free)(char *buf)
+{
+ tracepoint(librados, rados_buffer_free_enter, buf);
+ if (buf)
+ free(buf);
+ tracepoint(librados, rados_buffer_free_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_buffer_free);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_monitor_log)(
+ rados_t cluster,
+ const char *level,
+ rados_log_callback_t cb,
+ void *arg)
+{
+ tracepoint(librados, rados_monitor_log_enter, cluster, level, cb, arg);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ int retval = client->monitor_log(level, cb, nullptr, arg);
+ tracepoint(librados, rados_monitor_log_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_monitor_log);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_monitor_log2)(
+ rados_t cluster,
+ const char *level,
+ rados_log_callback2_t cb,
+ void *arg)
+{
+ tracepoint(librados, rados_monitor_log2_enter, cluster, level, cb, arg);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ int retval = client->monitor_log(level, nullptr, cb, arg);
+ tracepoint(librados, rados_monitor_log2_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_monitor_log2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_create)(
+ rados_t cluster,
+ const char *name,
+ rados_ioctx_t *io)
+{
+ tracepoint(librados, rados_ioctx_create_enter, cluster, name);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ librados::IoCtxImpl *ctx;
+
+ int r = client->create_ioctx(name, &ctx);
+ if (r < 0) {
+ tracepoint(librados, rados_ioctx_create_exit, r, NULL);
+ return r;
+ }
+
+ *io = ctx;
+ ctx->get();
+ tracepoint(librados, rados_ioctx_create_exit, 0, ctx);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_create);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_create2)(
+ rados_t cluster,
+ int64_t pool_id,
+ rados_ioctx_t *io)
+{
+ tracepoint(librados, rados_ioctx_create2_enter, cluster, pool_id);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ librados::IoCtxImpl *ctx;
+
+ int r = client->create_ioctx(pool_id, &ctx);
+ if (r < 0) {
+ tracepoint(librados, rados_ioctx_create2_exit, r, NULL);
+ return r;
+ }
+
+ *io = ctx;
+ ctx->get();
+ tracepoint(librados, rados_ioctx_create2_exit, 0, ctx);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_create2);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_ioctx_destroy)(rados_ioctx_t io)
+{
+ tracepoint(librados, rados_ioctx_destroy_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ if (ctx) {
+ ctx->put();
+ }
+ tracepoint(librados, rados_ioctx_destroy_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_destroy);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_stat)(
+ rados_ioctx_t io,
+ struct rados_pool_stat_t *stats)
+{
+ tracepoint(librados, rados_ioctx_pool_stat_enter, io);
+ librados::IoCtxImpl *io_ctx_impl = (librados::IoCtxImpl *)io;
+ list<string> ls;
+ std::string pool_name;
+
+ int err = io_ctx_impl->client->pool_get_name(io_ctx_impl->get_id(), &pool_name);
+ if (err) {
+ tracepoint(librados, rados_ioctx_pool_stat_exit, err, stats);
+ return err;
+ }
+ ls.push_back(pool_name);
+
+ map<string, ::pool_stat_t> rawresult;
+ bool per_pool = false;
+ err = io_ctx_impl->client->get_pool_stats(ls, &rawresult, &per_pool);
+ if (err) {
+ tracepoint(librados, rados_ioctx_pool_stat_exit, err, stats);
+ return err;
+ }
+
+ ::pool_stat_t& r = rawresult[pool_name];
+ uint64_t allocated_bytes = r.get_allocated_data_bytes(per_pool) +
+ r.get_allocated_omap_bytes(per_pool);
+ // FIXME: raw_used_rate is unknown hence use 1.0 here
+ // meaning we keep net amount aggregated over all replicas
+ // Not a big deal so far since this field isn't exposed
+ uint64_t user_bytes = r.get_user_data_bytes(1.0, per_pool) +
+ r.get_user_omap_bytes(1.0, per_pool);
+
+ stats->num_kb = shift_round_up(allocated_bytes, 10);
+ stats->num_bytes = allocated_bytes;
+ stats->num_objects = r.stats.sum.num_objects;
+ stats->num_object_clones = r.stats.sum.num_object_clones;
+ stats->num_object_copies = r.stats.sum.num_object_copies;
+ stats->num_objects_missing_on_primary = r.stats.sum.num_objects_missing_on_primary;
+ stats->num_objects_unfound = r.stats.sum.num_objects_unfound;
+ stats->num_objects_degraded =
+ r.stats.sum.num_objects_degraded +
+ r.stats.sum.num_objects_misplaced; // FIXME: this is imprecise
+ stats->num_rd = r.stats.sum.num_rd;
+ stats->num_rd_kb = r.stats.sum.num_rd_kb;
+ stats->num_wr = r.stats.sum.num_wr;
+ stats->num_wr_kb = r.stats.sum.num_wr_kb;
+ stats->num_user_bytes = user_bytes;
+ stats->compressed_bytes_orig = r.store_stats.data_compressed_original;
+ stats->compressed_bytes = r.store_stats.data_compressed;
+ stats->compressed_bytes_alloc = r.store_stats.data_compressed_allocated;
+
+ tracepoint(librados, rados_ioctx_pool_stat_exit, 0, stats);
+ return 0;
+}
+LIBRADOS_C_API_DEFAULT(rados_ioctx_pool_stat, 14.2.0);
+
+extern "C" int LIBRADOS_C_API_BASE_F(rados_ioctx_pool_stat)(
+ rados_ioctx_t io, struct __librados_base::rados_pool_stat_t *stats)
+{
+ struct rados_pool_stat_t new_stats;
+ int r = LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_stat)(io, &new_stats);
+ if (r < 0) {
+ return r;
+ }
+
+ stats->num_bytes = new_stats.num_bytes;
+ stats->num_kb = new_stats.num_kb;
+ stats->num_objects = new_stats.num_objects;
+ stats->num_object_clones = new_stats.num_object_clones;
+ stats->num_object_copies = new_stats.num_object_copies;
+ stats->num_objects_missing_on_primary = new_stats.num_objects_missing_on_primary;
+ stats->num_objects_unfound = new_stats.num_objects_unfound;
+ stats->num_objects_degraded = new_stats.num_objects_degraded;
+ stats->num_rd = new_stats.num_rd;
+ stats->num_rd_kb = new_stats.num_rd_kb;
+ stats->num_wr = new_stats.num_wr;
+ stats->num_wr_kb = new_stats.num_wr_kb;
+ return 0;
+}
+LIBRADOS_C_API_BASE(rados_ioctx_pool_stat);
+
+extern "C" rados_config_t LIBRADOS_C_API_DEFAULT_F(rados_ioctx_cct)(
+ rados_ioctx_t io)
+{
+ tracepoint(librados, rados_ioctx_cct_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ rados_config_t retval = (rados_config_t)ctx->client->cct;
+ tracepoint(librados, rados_ioctx_cct_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_cct);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_set_read)(
+ rados_ioctx_t io,
+ rados_snap_t seq)
+{
+ tracepoint(librados, rados_ioctx_snap_set_read_enter, io, seq);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ ctx->set_snap_read((snapid_t)seq);
+ tracepoint(librados, rados_ioctx_snap_set_read_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_set_read);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_selfmanaged_snap_set_write_ctx)(
+ rados_ioctx_t io,
+ rados_snap_t seq,
+ rados_snap_t *snaps,
+ int num_snaps)
+{
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_set_write_ctx_enter, io, seq, snaps, num_snaps);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ vector<snapid_t> snv;
+ snv.resize(num_snaps);
+ for (int i=0; i<num_snaps; i++) {
+ snv[i] = (snapid_t)snaps[i];
+ }
+ int retval = ctx->set_snap_write_context((snapid_t)seq, snv);
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_set_write_ctx_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_selfmanaged_snap_set_write_ctx);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_write)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *buf,
+ size_t len,
+ uint64_t off)
+{
+ tracepoint(librados, rados_write_enter, io, o, buf, len, off);
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, len);
+ int retval = ctx->write(oid, bl, len, off);
+ tracepoint(librados, rados_write_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_append)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *buf,
+ size_t len)
+{
+ tracepoint(librados, rados_append_enter, io, o, buf, len);
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, len);
+ int retval = ctx->append(oid, bl, len);
+ tracepoint(librados, rados_append_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_append);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_write_full)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *buf,
+ size_t len)
+{
+ tracepoint(librados, rados_write_full_enter, io, o, buf, len);
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, len);
+ int retval = ctx->write_full(oid, bl);
+ tracepoint(librados, rados_write_full_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_full);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_writesame)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *buf,
+ size_t data_len,
+ size_t write_len,
+ uint64_t off)
+{
+ tracepoint(librados, rados_writesame_enter, io, o, buf, data_len, write_len, off);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, data_len);
+ int retval = ctx->writesame(oid, bl, write_len, off);
+ tracepoint(librados, rados_writesame_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_writesame);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_trunc)(
+ rados_ioctx_t io,
+ const char *o,
+ uint64_t size)
+{
+ tracepoint(librados, rados_trunc_enter, io, o, size);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->trunc(oid, size);
+ tracepoint(librados, rados_trunc_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_trunc);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_remove)(
+ rados_ioctx_t io,
+ const char *o)
+{
+ tracepoint(librados, rados_remove_enter, io, o);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->remove(oid);
+ tracepoint(librados, rados_remove_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_remove);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_read)(
+ rados_ioctx_t io,
+ const char *o,
+ char *buf,
+ size_t len,
+ uint64_t off)
+{
+ tracepoint(librados, rados_read_enter, io, o, buf, len, off);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int ret;
+ object_t oid(o);
+
+ bufferlist bl;
+ bufferptr bp = buffer::create_static(len, buf);
+ bl.push_back(bp);
+
+ ret = ctx->read(oid, bl, len, off);
+ if (ret >= 0) {
+ if (bl.length() > len) {
+ tracepoint(librados, rados_read_exit, -ERANGE, NULL);
+ return -ERANGE;
+ }
+ if (!bl.is_provided_buffer(buf))
+ bl.begin().copy(bl.length(), buf);
+ ret = bl.length(); // hrm :/
+ }
+
+ tracepoint(librados, rados_read_exit, ret, buf);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_checksum)(
+ rados_ioctx_t io, const char *o,
+ rados_checksum_type_t type,
+ const char *init_value, size_t init_value_len,
+ size_t len, uint64_t off, size_t chunk_size,
+ char *pchecksum, size_t checksum_len)
+{
+ tracepoint(librados, rados_checksum_enter, io, o, type, init_value,
+ init_value_len, len, off, chunk_size);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+
+ bufferlist init_value_bl;
+ init_value_bl.append(init_value, init_value_len);
+
+ bufferlist checksum_bl;
+
+ int retval = ctx->checksum(oid, get_checksum_op_type(type), init_value_bl,
+ len, off, chunk_size, &checksum_bl);
+ if (retval >= 0) {
+ if (checksum_bl.length() > checksum_len) {
+ tracepoint(librados, rados_checksum_exit, -ERANGE, NULL, 0);
+ return -ERANGE;
+ }
+
+ checksum_bl.begin().copy(checksum_bl.length(), pchecksum);
+ }
+ tracepoint(librados, rados_checksum_exit, retval, pchecksum, checksum_len);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_checksum);
+
+extern "C" uint64_t LIBRADOS_C_API_DEFAULT_F(rados_get_last_version)(
+ rados_ioctx_t io)
+{
+ tracepoint(librados, rados_get_last_version_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ uint64_t retval = ctx->last_version();
+ tracepoint(librados, rados_get_last_version_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_get_last_version);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_create)(
+ rados_t cluster,
+ const char *name)
+{
+ tracepoint(librados, rados_pool_create_enter, cluster, name);
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ string sname(name);
+ int retval = radosp->pool_create(sname);
+ tracepoint(librados, rados_pool_create_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pool_create);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_create_with_auid)(
+ rados_t cluster,
+ const char *name,
+ uint64_t auid)
+{
+ tracepoint(librados, rados_pool_create_with_auid_enter, cluster, name, auid);
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ string sname(name);
+ int retval = 0;
+ if (auid != CEPH_AUTH_UID_DEFAULT) {
+ retval = -EINVAL;
+ } else {
+ retval = radosp->pool_create(sname);
+ }
+ tracepoint(librados, rados_pool_create_with_auid_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pool_create_with_auid);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_create_with_crush_rule)(
+ rados_t cluster,
+ const char *name,
+ __u8 crush_rule_num)
+{
+ tracepoint(librados, rados_pool_create_with_crush_rule_enter, cluster, name, crush_rule_num);
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ string sname(name);
+ int retval = radosp->pool_create(sname, crush_rule_num);
+ tracepoint(librados, rados_pool_create_with_crush_rule_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pool_create_with_crush_rule);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_create_with_all)(
+ rados_t cluster,
+ const char *name,
+ uint64_t auid,
+ __u8 crush_rule_num)
+{
+ tracepoint(librados, rados_pool_create_with_all_enter, cluster, name, auid, crush_rule_num);
+ librados::RadosClient *radosp = (librados::RadosClient *)cluster;
+ string sname(name);
+ int retval = 0;
+ if (auid != CEPH_AUTH_UID_DEFAULT) {
+ retval = -EINVAL;
+ } else {
+ retval = radosp->pool_create(sname, crush_rule_num);
+ }
+ tracepoint(librados, rados_pool_create_with_all_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pool_create_with_all);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_get_base_tier)(
+ rados_t cluster,
+ int64_t pool_id,
+ int64_t* base_tier)
+{
+ tracepoint(librados, rados_pool_get_base_tier_enter, cluster, pool_id);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ int retval = client->pool_get_base_tier(pool_id, base_tier);
+ tracepoint(librados, rados_pool_get_base_tier_exit, retval, *base_tier);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pool_get_base_tier);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_delete)(
+ rados_t cluster,
+ const char *pool_name)
+{
+ tracepoint(librados, rados_pool_delete_enter, cluster, pool_name);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ int retval = client->pool_delete(pool_name);
+ tracepoint(librados, rados_pool_delete_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_pool_delete);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_set_auid)(
+ rados_ioctx_t io,
+ uint64_t auid)
+{
+ tracepoint(librados, rados_ioctx_pool_set_auid_enter, io, auid);
+ int retval = -EOPNOTSUPP;
+ tracepoint(librados, rados_ioctx_pool_set_auid_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_set_auid);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_get_auid)(
+ rados_ioctx_t io,
+ uint64_t *auid)
+{
+ tracepoint(librados, rados_ioctx_pool_get_auid_enter, io);
+ int retval = -EOPNOTSUPP;
+ tracepoint(librados, rados_ioctx_pool_get_auid_exit, retval, *auid);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_get_auid);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_requires_alignment)(
+ rados_ioctx_t io)
+{
+ tracepoint(librados, rados_ioctx_pool_requires_alignment_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->client->pool_requires_alignment(ctx->get_id());
+ tracepoint(librados, rados_ioctx_pool_requires_alignment_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_requires_alignment);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_requires_alignment2)(
+ rados_ioctx_t io,
+ int *requires)
+{
+ tracepoint(librados, rados_ioctx_pool_requires_alignment_enter2, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ bool requires_alignment;
+ int retval = ctx->client->pool_requires_alignment2(ctx->get_id(),
+ &requires_alignment);
+ tracepoint(librados, rados_ioctx_pool_requires_alignment_exit2, retval,
+ requires_alignment);
+ if (requires)
+ *requires = requires_alignment;
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_requires_alignment2);
+
+extern "C" uint64_t LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_required_alignment)(
+ rados_ioctx_t io)
+{
+ tracepoint(librados, rados_ioctx_pool_required_alignment_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ uint64_t retval = ctx->client->pool_required_alignment(ctx->get_id());
+ tracepoint(librados, rados_ioctx_pool_required_alignment_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_required_alignment);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_required_alignment2)(
+ rados_ioctx_t io,
+ uint64_t *alignment)
+{
+ tracepoint(librados, rados_ioctx_pool_required_alignment_enter2, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->client->pool_required_alignment2(ctx->get_id(),
+ alignment);
+ tracepoint(librados, rados_ioctx_pool_required_alignment_exit2, retval,
+ *alignment);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_required_alignment2);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_ioctx_locator_set_key)(
+ rados_ioctx_t io,
+ const char *key)
+{
+ tracepoint(librados, rados_ioctx_locator_set_key_enter, io, key);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ if (key)
+ ctx->oloc.key = key;
+ else
+ ctx->oloc.key = "";
+ tracepoint(librados, rados_ioctx_locator_set_key_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_locator_set_key);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_ioctx_set_namespace)(
+ rados_ioctx_t io,
+ const char *nspace)
+{
+ tracepoint(librados, rados_ioctx_set_namespace_enter, io, nspace);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ if (nspace)
+ ctx->oloc.nspace = nspace;
+ else
+ ctx->oloc.nspace = "";
+ tracepoint(librados, rados_ioctx_set_namespace_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_set_namespace);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_get_namespace)(
+ rados_ioctx_t io,
+ char *s,
+ unsigned maxlen)
+{
+ tracepoint(librados, rados_ioctx_get_namespace_enter, io, maxlen);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ auto length = ctx->oloc.nspace.length();
+ if (length >= maxlen) {
+ tracepoint(librados, rados_ioctx_get_namespace_exit, -ERANGE, "");
+ return -ERANGE;
+ }
+ strcpy(s, ctx->oloc.nspace.c_str());
+ int retval = (int)length;
+ tracepoint(librados, rados_ioctx_get_namespace_exit, retval, s);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_get_namespace);
+
+extern "C" rados_t LIBRADOS_C_API_DEFAULT_F(rados_ioctx_get_cluster)(
+ rados_ioctx_t io)
+{
+ tracepoint(librados, rados_ioctx_get_cluster_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ rados_t retval = (rados_t)ctx->client;
+ tracepoint(librados, rados_ioctx_get_cluster_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_get_cluster);
+
+extern "C" int64_t LIBRADOS_C_API_DEFAULT_F(rados_ioctx_get_id)(
+ rados_ioctx_t io)
+{
+ tracepoint(librados, rados_ioctx_get_id_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int64_t retval = ctx->get_id();
+ tracepoint(librados, rados_ioctx_get_id_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_get_id);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_get_pool_name)(
+ rados_ioctx_t io,
+ char *s,
+ unsigned maxlen)
+{
+ tracepoint(librados, rados_ioctx_get_pool_name_enter, io, maxlen);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ std::string pool_name;
+
+ int err = ctx->client->pool_get_name(ctx->get_id(), &pool_name);
+ if (err) {
+ tracepoint(librados, rados_ioctx_get_pool_name_exit, err, "");
+ return err;
+ }
+ if (pool_name.length() >= maxlen) {
+ tracepoint(librados, rados_ioctx_get_pool_name_exit, -ERANGE, "");
+ return -ERANGE;
+ }
+ strcpy(s, pool_name.c_str());
+ int retval = pool_name.length();
+ tracepoint(librados, rados_ioctx_get_pool_name_exit, retval, s);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_get_pool_name);
+
+// snaps
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_create)(
+ rados_ioctx_t io,
+ const char *snapname)
+{
+ tracepoint(librados, rados_ioctx_snap_create_enter, io, snapname);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->snap_create(snapname);
+ tracepoint(librados, rados_ioctx_snap_create_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_create);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_remove)(
+ rados_ioctx_t io,
+ const char *snapname)
+{
+ tracepoint(librados, rados_ioctx_snap_remove_enter, io, snapname);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->snap_remove(snapname);
+ tracepoint(librados, rados_ioctx_snap_remove_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_remove);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_rollback)(
+ rados_ioctx_t io,
+ const char *oid,
+ const char *snapname)
+{
+ tracepoint(librados, rados_ioctx_snap_rollback_enter, io, oid, snapname);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->rollback(oid, snapname);
+ tracepoint(librados, rados_ioctx_snap_rollback_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_rollback);
+
+// Deprecated name kept for backward compatibility
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_rollback)(
+ rados_ioctx_t io,
+ const char *oid,
+ const char *snapname)
+{
+ return LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_rollback)(io, oid, snapname);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_rollback);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_selfmanaged_snap_create)(
+ rados_ioctx_t io,
+ uint64_t *snapid)
+{
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_create_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->selfmanaged_snap_create(snapid);
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_create_exit, retval, *snapid);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_selfmanaged_snap_create);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_aio_ioctx_selfmanaged_snap_create)(
+ rados_ioctx_t io,
+ rados_snap_t *snapid,
+ rados_completion_t completion)
+{
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_create_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion;
+ ctx->aio_selfmanaged_snap_create(snapid, c);
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_create_exit, 0, 0);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_ioctx_selfmanaged_snap_create);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_selfmanaged_snap_remove)(
+ rados_ioctx_t io,
+ uint64_t snapid)
+{
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_remove_enter, io, snapid);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->selfmanaged_snap_remove(snapid);
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_remove_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_selfmanaged_snap_remove);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_aio_ioctx_selfmanaged_snap_remove)(
+ rados_ioctx_t io,
+ rados_snap_t snapid,
+ rados_completion_t completion)
+{
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_remove_enter, io, snapid);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion;
+ ctx->aio_selfmanaged_snap_remove(snapid, c);
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_remove_exit, 0);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_ioctx_selfmanaged_snap_remove);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_selfmanaged_snap_rollback)(
+ rados_ioctx_t io,
+ const char *oid,
+ uint64_t snapid)
+{
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_rollback_enter, io, oid, snapid);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->selfmanaged_snap_rollback_object(oid, ctx->snapc, snapid);
+ tracepoint(librados, rados_ioctx_selfmanaged_snap_rollback_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_selfmanaged_snap_rollback);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_list)(
+ rados_ioctx_t io,
+ rados_snap_t *snaps,
+ int maxlen)
+{
+ tracepoint(librados, rados_ioctx_snap_list_enter, io, maxlen);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ vector<uint64_t> snapvec;
+ int r = ctx->snap_list(&snapvec);
+ if (r < 0) {
+ tracepoint(librados, rados_ioctx_snap_list_exit, r, snaps, 0);
+ return r;
+ }
+ if ((int)snapvec.size() <= maxlen) {
+ for (unsigned i=0; i<snapvec.size(); i++) {
+ snaps[i] = snapvec[i];
+ }
+ int retval = snapvec.size();
+ tracepoint(librados, rados_ioctx_snap_list_exit, retval, snaps, retval);
+ return retval;
+ }
+ int retval = -ERANGE;
+ tracepoint(librados, rados_ioctx_snap_list_exit, retval, snaps, 0);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_list);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_lookup)(
+ rados_ioctx_t io,
+ const char *name,
+ rados_snap_t *id)
+{
+ tracepoint(librados, rados_ioctx_snap_lookup_enter, io, name);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->snap_lookup(name, (uint64_t *)id);
+ tracepoint(librados, rados_ioctx_snap_lookup_exit, retval, *id);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_lookup);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_get_name)(
+ rados_ioctx_t io,
+ rados_snap_t id,
+ char *name,
+ int maxlen)
+{
+ tracepoint(librados, rados_ioctx_snap_get_name_enter, io, id, maxlen);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ std::string sname;
+ int r = ctx->snap_get_name(id, &sname);
+ if (r < 0) {
+ tracepoint(librados, rados_ioctx_snap_get_name_exit, r, "");
+ return r;
+ }
+ if ((int)sname.length() >= maxlen) {
+ int retval = -ERANGE;
+ tracepoint(librados, rados_ioctx_snap_get_name_exit, retval, "");
+ return retval;
+ }
+ strncpy(name, sname.c_str(), maxlen);
+ tracepoint(librados, rados_ioctx_snap_get_name_exit, 0, name);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_get_name);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_get_stamp)(
+ rados_ioctx_t io,
+ rados_snap_t id,
+ time_t *t)
+{
+ tracepoint(librados, rados_ioctx_snap_get_stamp_enter, io, id);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->snap_get_stamp(id, t);
+ tracepoint(librados, rados_ioctx_snap_get_stamp_exit, retval, *t);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_get_stamp);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cmpext)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *cmp_buf,
+ size_t cmp_len,
+ uint64_t off)
+{
+ tracepoint(librados, rados_cmpext_enter, io, o, cmp_buf, cmp_len, off);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int ret;
+ object_t oid(o);
+
+ bufferlist cmp_bl;
+ cmp_bl.append(cmp_buf, cmp_len);
+
+ ret = ctx->cmpext(oid, off, cmp_bl);
+ tracepoint(librados, rados_cmpext_exit, ret);
+
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_cmpext);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_getxattr)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *name,
+ char *buf,
+ size_t len)
+{
+ tracepoint(librados, rados_getxattr_enter, io, o, name, len);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int ret;
+ object_t oid(o);
+ bufferlist bl;
+ bl.push_back(buffer::create_static(len, buf));
+ ret = ctx->getxattr(oid, name, bl);
+ if (ret >= 0) {
+ if (bl.length() > len) {
+ tracepoint(librados, rados_getxattr_exit, -ERANGE, buf, 0);
+ return -ERANGE;
+ }
+ if (!bl.is_provided_buffer(buf))
+ bl.begin().copy(bl.length(), buf);
+ ret = bl.length();
+ }
+
+ tracepoint(librados, rados_getxattr_exit, ret, buf, ret);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_getxattr);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_getxattrs)(
+ rados_ioctx_t io,
+ const char *oid,
+ rados_xattrs_iter_t *iter)
+{
+ tracepoint(librados, rados_getxattrs_enter, io, oid);
+ librados::RadosXattrsIter *it = new librados::RadosXattrsIter();
+ if (!it) {
+ tracepoint(librados, rados_getxattrs_exit, -ENOMEM, NULL);
+ return -ENOMEM;
+ }
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t obj(oid);
+ int ret = ctx->getxattrs(obj, it->attrset);
+ if (ret) {
+ delete it;
+ tracepoint(librados, rados_getxattrs_exit, ret, NULL);
+ return ret;
+ }
+ it->i = it->attrset.begin();
+
+ *iter = it;
+ tracepoint(librados, rados_getxattrs_exit, 0, *iter);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_getxattrs);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_getxattrs_next)(
+ rados_xattrs_iter_t iter,
+ const char **name,
+ const char **val,
+ size_t *len)
+{
+ tracepoint(librados, rados_getxattrs_next_enter, iter);
+ librados::RadosXattrsIter *it = static_cast<librados::RadosXattrsIter*>(iter);
+ if (it->val) {
+ free(it->val);
+ it->val = NULL;
+ }
+ if (it->i == it->attrset.end()) {
+ *name = NULL;
+ *val = NULL;
+ *len = 0;
+ tracepoint(librados, rados_getxattrs_next_exit, 0, NULL, NULL, 0);
+ return 0;
+ }
+ const std::string &s(it->i->first);
+ *name = s.c_str();
+ bufferlist &bl(it->i->second);
+ size_t bl_len = bl.length();
+ if (!bl_len) {
+ // malloc(0) is not guaranteed to return a valid pointer
+ *val = (char *)NULL;
+ } else {
+ it->val = (char*)malloc(bl_len);
+ if (!it->val) {
+ tracepoint(librados, rados_getxattrs_next_exit, -ENOMEM, *name, NULL, 0);
+ return -ENOMEM;
+ }
+ memcpy(it->val, bl.c_str(), bl_len);
+ *val = it->val;
+ }
+ *len = bl_len;
+ ++it->i;
+ tracepoint(librados, rados_getxattrs_next_exit, 0, *name, *val, *len);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_getxattrs_next);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_getxattrs_end)(
+ rados_xattrs_iter_t iter)
+{
+ tracepoint(librados, rados_getxattrs_end_enter, iter);
+ librados::RadosXattrsIter *it = static_cast<librados::RadosXattrsIter*>(iter);
+ delete it;
+ tracepoint(librados, rados_getxattrs_end_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_getxattrs_end);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_setxattr)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *name,
+ const char *buf,
+ size_t len)
+{
+ tracepoint(librados, rados_setxattr_enter, io, o, name, buf, len);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, len);
+ int retval = ctx->setxattr(oid, name, bl);
+ tracepoint(librados, rados_setxattr_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_setxattr);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_rmxattr)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *name)
+{
+ tracepoint(librados, rados_rmxattr_enter, io, o, name);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->rmxattr(oid, name);
+ tracepoint(librados, rados_rmxattr_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_rmxattr);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_stat)(
+ rados_ioctx_t io,
+ const char *o,
+ uint64_t *psize,
+ time_t *pmtime)
+{
+ tracepoint(librados, rados_stat_enter, io, o);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->stat(oid, psize, pmtime);
+ tracepoint(librados, rados_stat_exit, retval, psize, pmtime);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_stat);
+
+extern "C" int LIBRADOS_C_API_BASE_F(rados_tmap_update)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *cmdbuf,
+ size_t cmdbuflen)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist cmdbl;
+ cmdbl.append(cmdbuf, cmdbuflen);
+ return ctx->tmap_update(oid, cmdbl);
+}
+LIBRADOS_C_API_BASE(rados_tmap_update);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_tmap_update)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *cmdbuf,
+ size_t cmdbuflen)
+{
+ return -ENOTSUP;
+}
+LIBRADOS_C_API_DEFAULT(rados_tmap_update, 14.2.0);
+
+extern "C" int LIBRADOS_C_API_BASE_F(rados_tmap_put)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *buf,
+ size_t buflen)
+{
+ bufferlist bl;
+ bl.append(buf, buflen);
+
+ bufferlist header;
+ std::map<std::string, bufferlist> m;
+ bufferlist::const_iterator bl_it = bl.begin();
+ decode(header, bl_it);
+ decode(m, bl_it);
+
+ bufferlist out_bl;
+ encode(header, out_bl);
+ encode(m, out_bl);
+
+ return LIBRADOS_C_API_DEFAULT_F(rados_write_full)(
+ io, o, out_bl.c_str(), out_bl.length());
+}
+LIBRADOS_C_API_BASE(rados_tmap_put);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_tmap_put)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *buf,
+ size_t buflen)
+{
+ return -EOPNOTSUPP;
+}
+LIBRADOS_C_API_DEFAULT(rados_tmap_put, 14.2.0);
+
+extern "C" int LIBRADOS_C_API_BASE_F(rados_tmap_get)(
+ rados_ioctx_t io,
+ const char *o,
+ char *buf,
+ size_t buflen)
+{
+ return LIBRADOS_C_API_DEFAULT_F(rados_read)(io, o, buf, buflen, 0);
+}
+LIBRADOS_C_API_BASE(rados_tmap_get);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_tmap_get)(
+ rados_ioctx_t io,
+ const char *o,
+ char *buf,
+ size_t buflen)
+{
+ return -EOPNOTSUPP;
+}
+LIBRADOS_C_API_DEFAULT(rados_tmap_get, 14.2.0);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_exec)(
+ rados_ioctx_t io,
+ const char *o,
+ const char *cls,
+ const char *method,
+ const char *inbuf,
+ size_t in_len,
+ char *buf,
+ size_t out_len)
+{
+ tracepoint(librados, rados_exec_enter, io, o, cls, method, inbuf, in_len, out_len);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist inbl, outbl;
+ int ret;
+ inbl.append(inbuf, in_len);
+ ret = ctx->exec(oid, cls, method, inbl, outbl);
+ if (ret >= 0) {
+ if (outbl.length()) {
+ if (outbl.length() > out_len) {
+ tracepoint(librados, rados_exec_exit, -ERANGE, buf, 0);
+ return -ERANGE;
+ }
+ outbl.begin().copy(outbl.length(), buf);
+ ret = outbl.length(); // hrm :/
+ }
+ }
+ tracepoint(librados, rados_exec_exit, ret, buf, ret);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_exec);
+
+extern "C" rados_object_list_cursor LIBRADOS_C_API_DEFAULT_F(rados_object_list_begin)(
+ rados_ioctx_t io)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+
+ hobject_t *result = new hobject_t(ctx->objecter->enumerate_objects_begin());
+ return (rados_object_list_cursor)result;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_begin);
+
+extern "C" rados_object_list_cursor LIBRADOS_C_API_DEFAULT_F(rados_object_list_end)(
+ rados_ioctx_t io)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+
+ hobject_t *result = new hobject_t(ctx->objecter->enumerate_objects_end());
+ return (rados_object_list_cursor)result;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_end);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_object_list_is_end)(
+ rados_ioctx_t io,
+ rados_object_list_cursor cur)
+{
+ hobject_t *hobj = (hobject_t*)cur;
+ return hobj->is_max();
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_is_end);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_object_list_cursor_free)(
+ rados_ioctx_t io,
+ rados_object_list_cursor cur)
+{
+ hobject_t *hobj = (hobject_t*)cur;
+ delete hobj;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_cursor_free);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_object_list_cursor_cmp)(
+ rados_ioctx_t io,
+ rados_object_list_cursor lhs_cur,
+ rados_object_list_cursor rhs_cur)
+{
+ hobject_t *lhs = (hobject_t*)lhs_cur;
+ hobject_t *rhs = (hobject_t*)rhs_cur;
+ return cmp(*lhs, *rhs);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_cursor_cmp);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_object_list)(rados_ioctx_t io,
+ const rados_object_list_cursor start,
+ const rados_object_list_cursor finish,
+ const size_t result_item_count,
+ const char *filter_buf,
+ const size_t filter_buf_len,
+ rados_object_list_item *result_items,
+ rados_object_list_cursor *next)
+{
+ ceph_assert(next);
+
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+
+ // Zero out items so that they will be safe to free later
+ // FIPS zeroization audit 20191116: this memset is not security related.
+ memset(result_items, 0, sizeof(rados_object_list_item) * result_item_count);
+
+ bufferlist filter_bl;
+ if (filter_buf != nullptr) {
+ filter_bl.append(filter_buf, filter_buf_len);
+ }
+
+ ceph::async::waiter<boost::system::error_code,
+ std::vector<librados::ListObjectImpl>,
+ hobject_t> w;
+ ctx->objecter->enumerate_objects<librados::ListObjectImpl>(
+ ctx->poolid,
+ ctx->oloc.nspace,
+ *((hobject_t*)start),
+ *((hobject_t*)finish),
+ result_item_count,
+ filter_bl,
+ w);
+
+ hobject_t *next_hobj = (hobject_t*)(*next);
+ ceph_assert(next_hobj);
+
+ auto [ec, result, next_hash] = w.wait();
+
+ if (ec) {
+ *next_hobj = hobject_t::get_max();
+ return ceph::from_error_code(ec);
+ }
+
+ ceph_assert(result.size() <= result_item_count); // Don't overflow!
+
+ int k = 0;
+ for (auto i = result.begin(); i != result.end(); ++i) {
+ rados_object_list_item &item = result_items[k++];
+ do_out_buffer(i->oid, &item.oid, &item.oid_length);
+ do_out_buffer(i->nspace, &item.nspace, &item.nspace_length);
+ do_out_buffer(i->locator, &item.locator, &item.locator_length);
+ }
+
+ *next_hobj = next_hash;
+
+ return result.size();
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_object_list);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_object_list_free)(
+ const size_t result_size,
+ rados_object_list_item *results)
+{
+ ceph_assert(results);
+
+ for (unsigned int i = 0; i < result_size; ++i) {
+ LIBRADOS_C_API_DEFAULT_F(rados_buffer_free)(results[i].oid);
+ LIBRADOS_C_API_DEFAULT_F(rados_buffer_free)(results[i].locator);
+ LIBRADOS_C_API_DEFAULT_F(rados_buffer_free)(results[i].nspace);
+ }
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_free);
+
+/* list objects */
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_open)(
+ rados_ioctx_t io,
+ rados_list_ctx_t *listh)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+
+ tracepoint(librados, rados_nobjects_list_open_enter, io);
+
+ Objecter::NListContext *h = new Objecter::NListContext;
+ h->pool_id = ctx->poolid;
+ h->pool_snap_seq = ctx->snap_seq;
+ h->nspace = ctx->oloc.nspace; // After dropping compatibility need nspace
+ *listh = (void *)new librados::ObjListCtx(ctx, h);
+ tracepoint(librados, rados_nobjects_list_open_exit, 0, *listh);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_open);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_close)(
+ rados_list_ctx_t h)
+{
+ tracepoint(librados, rados_nobjects_list_close_enter, h);
+ librados::ObjListCtx *lh = (librados::ObjListCtx *)h;
+ delete lh;
+ tracepoint(librados, rados_nobjects_list_close_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_close);
+
+extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_seek)(
+ rados_list_ctx_t listctx,
+ uint32_t pos)
+{
+ librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx;
+ tracepoint(librados, rados_nobjects_list_seek_enter, listctx, pos);
+ uint32_t r = lh->ctx->nlist_seek(lh->nlc, pos);
+ tracepoint(librados, rados_nobjects_list_seek_exit, r);
+ return r;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_seek);
+
+extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_seek_cursor)(
+ rados_list_ctx_t listctx,
+ rados_object_list_cursor cursor)
+{
+ librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx;
+
+ tracepoint(librados, rados_nobjects_list_seek_cursor_enter, listctx);
+ uint32_t r = lh->ctx->nlist_seek(lh->nlc, cursor);
+ tracepoint(librados, rados_nobjects_list_seek_cursor_exit, r);
+ return r;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_seek_cursor);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_get_cursor)(
+ rados_list_ctx_t listctx,
+ rados_object_list_cursor *cursor)
+{
+ librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx;
+
+ tracepoint(librados, rados_nobjects_list_get_cursor_enter, listctx);
+ *cursor = lh->ctx->nlist_get_cursor(lh->nlc);
+ tracepoint(librados, rados_nobjects_list_get_cursor_exit, 0);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_get_cursor);
+
+extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_get_pg_hash_position)(
+ rados_list_ctx_t listctx)
+{
+ librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx;
+ tracepoint(librados, rados_nobjects_list_get_pg_hash_position_enter, listctx);
+ uint32_t retval = lh->nlc->get_pg_hash_position();
+ tracepoint(librados, rados_nobjects_list_get_pg_hash_position_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_get_pg_hash_position);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_next)(
+ rados_list_ctx_t listctx,
+ const char **entry,
+ const char **key,
+ const char **nspace)
+{
+ tracepoint(librados, rados_nobjects_list_next_enter, listctx);
+ uint32_t retval = rados_nobjects_list_next2(listctx, entry, key, nspace, NULL, NULL, NULL);
+ tracepoint(librados, rados_nobjects_list_next_exit, 0, *entry, key, nspace);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_next);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_next2)(
+ rados_list_ctx_t listctx,
+ const char **entry,
+ const char **key,
+ const char **nspace,
+ size_t *entry_size,
+ size_t *key_size,
+ size_t *nspace_size)
+{
+ tracepoint(librados, rados_nobjects_list_next2_enter, listctx);
+ librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx;
+ Objecter::NListContext *h = lh->nlc;
+
+ // if the list is non-empty, this method has been called before
+ if (!h->list.empty())
+ // so let's kill the previously-returned object
+ h->list.pop_front();
+
+ if (h->list.empty()) {
+ int ret = lh->ctx->nlist(lh->nlc, RADOS_LIST_MAX_ENTRIES);
+ if (ret < 0) {
+ tracepoint(librados, rados_nobjects_list_next2_exit, ret, NULL, NULL, NULL, NULL, NULL, NULL);
+ return ret;
+ }
+ if (h->list.empty()) {
+ tracepoint(librados, rados_nobjects_list_next2_exit, -ENOENT, NULL, NULL, NULL, NULL, NULL, NULL);
+ return -ENOENT;
+ }
+ }
+
+ *entry = h->list.front().oid.c_str();
+
+ if (key) {
+ if (h->list.front().locator.size())
+ *key = h->list.front().locator.c_str();
+ else
+ *key = NULL;
+ }
+ if (nspace)
+ *nspace = h->list.front().nspace.c_str();
+
+ if (entry_size)
+ *entry_size = h->list.front().oid.size();
+ if (key_size)
+ *key_size = h->list.front().locator.size();
+ if (nspace_size)
+ *nspace_size = h->list.front().nspace.size();
+
+ tracepoint(librados, rados_nobjects_list_next2_exit, 0, entry, key, nspace,
+ entry_size, key_size, nspace_size);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_next2);
+
+
+/*
+ * removed legacy v2 list objects stubs
+ *
+ * thse return -ENOTSUP where possible.
+ */
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_objects_list_open)(
+ rados_ioctx_t io,
+ rados_list_ctx_t *ctx)
+{
+ return -ENOTSUP;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_open);
+
+extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_objects_list_get_pg_hash_position)(
+ rados_list_ctx_t ctx)
+{
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_get_pg_hash_position);
+
+extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_objects_list_seek)(
+ rados_list_ctx_t ctx,
+ uint32_t pos)
+{
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_seek);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_objects_list_next)(
+ rados_list_ctx_t ctx,
+ const char **entry,
+ const char **key)
+{
+ return -ENOTSUP;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_next);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_objects_list_close)(
+ rados_list_ctx_t ctx)
+{
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_close);
+
+
+// -------------------------
+// aio
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_create_completion)(
+ void *cb_arg,
+ rados_callback_t cb_complete,
+ rados_callback_t cb_safe,
+ rados_completion_t *pc)
+{
+ tracepoint(librados, rados_aio_create_completion_enter, cb_arg, cb_complete, cb_safe);
+ librados::AioCompletionImpl *c = new librados::AioCompletionImpl;
+ if (cb_complete)
+ c->set_complete_callback(cb_arg, cb_complete);
+ if (cb_safe)
+ c->set_safe_callback(cb_arg, cb_safe);
+ *pc = c;
+ tracepoint(librados, rados_aio_create_completion_exit, 0, *pc);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_create_completion);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_create_completion2)(
+ void *cb_arg,
+ rados_callback_t cb_complete,
+ rados_completion_t *pc)
+{
+ tracepoint(librados, rados_aio_create_completion2_enter, cb_arg, cb_complete);
+ librados::AioCompletionImpl *c = new librados::AioCompletionImpl;
+ if (cb_complete)
+ c->set_complete_callback(cb_arg, cb_complete);
+ *pc = c;
+ tracepoint(librados, rados_aio_create_completion2_exit, 0, *pc);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_create_completion2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_wait_for_complete)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_wait_for_complete_enter, c);
+ int retval = ((librados::AioCompletionImpl*)c)->wait_for_complete();
+ tracepoint(librados, rados_aio_wait_for_complete_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_wait_for_complete);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_wait_for_safe)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_wait_for_safe_enter, c);
+ int retval = ((librados::AioCompletionImpl*)c)->wait_for_complete();
+ tracepoint(librados, rados_aio_wait_for_safe_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_wait_for_safe);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_is_complete)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_is_complete_enter, c);
+ int retval = ((librados::AioCompletionImpl*)c)->is_complete();
+ tracepoint(librados, rados_aio_is_complete_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_is_complete);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_is_safe)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_is_safe_enter, c);
+ int retval = ((librados::AioCompletionImpl*)c)->is_safe();
+ tracepoint(librados, rados_aio_is_safe_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_is_safe);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_wait_for_complete_and_cb)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_wait_for_complete_and_cb_enter, c);
+ int retval = ((librados::AioCompletionImpl*)c)->wait_for_complete_and_cb();
+ tracepoint(librados, rados_aio_wait_for_complete_and_cb_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_wait_for_complete_and_cb);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_wait_for_safe_and_cb)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_wait_for_safe_and_cb_enter, c);
+ int retval = ((librados::AioCompletionImpl*)c)->wait_for_safe_and_cb();
+ tracepoint(librados, rados_aio_wait_for_safe_and_cb_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_wait_for_safe_and_cb);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_is_complete_and_cb)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_is_complete_and_cb_enter, c);
+ int retval = ((librados::AioCompletionImpl*)c)->is_complete_and_cb();
+ tracepoint(librados, rados_aio_is_complete_and_cb_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_is_complete_and_cb);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_is_safe_and_cb)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_is_safe_and_cb_enter, c);
+ int retval = ((librados::AioCompletionImpl*)c)->is_safe_and_cb();
+ tracepoint(librados, rados_aio_is_safe_and_cb_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_is_safe_and_cb);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_get_return_value)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_get_return_value_enter, c);
+ int retval = ((librados::AioCompletionImpl*)c)->get_return_value();
+ tracepoint(librados, rados_aio_get_return_value_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_get_return_value);
+
+extern "C" uint64_t LIBRADOS_C_API_DEFAULT_F(rados_aio_get_version)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_get_version_enter, c);
+ uint64_t retval = ((librados::AioCompletionImpl*)c)->get_version();
+ tracepoint(librados, rados_aio_get_version_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_get_version);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_aio_release)(
+ rados_completion_t c)
+{
+ tracepoint(librados, rados_aio_release_enter, c);
+ ((librados::AioCompletionImpl*)c)->put();
+ tracepoint(librados, rados_aio_release_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_release);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_read)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ char *buf, size_t len, uint64_t off)
+{
+ tracepoint(librados, rados_aio_read_enter, io, o, completion, len, off);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->aio_read(oid, (librados::AioCompletionImpl*)completion,
+ buf, len, off, ctx->snap_seq);
+ tracepoint(librados, rados_aio_read_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_read);
+
+#ifdef WITH_BLKIN
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_read_traced)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ char *buf, size_t len, uint64_t off,
+ struct blkin_trace_info *info)
+{
+ tracepoint(librados, rados_aio_read_enter, io, o, completion, len, off);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->aio_read(oid, (librados::AioCompletionImpl*)completion,
+ buf, len, off, ctx->snap_seq, info);
+ tracepoint(librados, rados_aio_read_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_read_traced);
+#endif
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_write)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *buf, size_t len, uint64_t off)
+{
+ tracepoint(librados, rados_aio_write_enter, io, o, completion, buf, len, off);
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, len);
+ int retval = ctx->aio_write(oid, (librados::AioCompletionImpl*)completion,
+ bl, len, off);
+ tracepoint(librados, rados_aio_write_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_write);
+
+#ifdef WITH_BLKIN
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_write_traced)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *buf, size_t len, uint64_t off,
+ struct blkin_trace_info *info)
+{
+ tracepoint(librados, rados_aio_write_enter, io, o, completion, buf, len, off);
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, len);
+ int retval = ctx->aio_write(oid, (librados::AioCompletionImpl*)completion,
+ bl, len, off, info);
+ tracepoint(librados, rados_aio_write_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_write_traced);
+#endif
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_append)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *buf, size_t len)
+{
+ tracepoint(librados, rados_aio_append_enter, io, o, completion, buf, len);
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, len);
+ int retval = ctx->aio_append(oid, (librados::AioCompletionImpl*)completion,
+ bl, len);
+ tracepoint(librados, rados_aio_append_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_append);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_write_full)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *buf, size_t len)
+{
+ tracepoint(librados, rados_aio_write_full_enter, io, o, completion, buf, len);
+ if (len > UINT_MAX/2)
+ return -E2BIG;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, len);
+ int retval = ctx->aio_write_full(oid, (librados::AioCompletionImpl*)completion, bl);
+ tracepoint(librados, rados_aio_write_full_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_write_full);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_writesame)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *buf, size_t data_len,
+ size_t write_len, uint64_t off)
+{
+ tracepoint(librados, rados_aio_writesame_enter, io, o, completion, buf,
+ data_len, write_len, off);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, data_len);
+ int retval = ctx->aio_writesame(o, (librados::AioCompletionImpl*)completion,
+ bl, write_len, off);
+ tracepoint(librados, rados_aio_writesame_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_writesame);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_remove)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion)
+{
+ tracepoint(librados, rados_aio_remove_enter, io, o, completion);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->aio_remove(oid, (librados::AioCompletionImpl*)completion);
+ tracepoint(librados, rados_aio_remove_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_remove);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_flush_async)(
+ rados_ioctx_t io,
+ rados_completion_t completion)
+{
+ tracepoint(librados, rados_aio_flush_async_enter, io, completion);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ ctx->flush_aio_writes_async((librados::AioCompletionImpl*)completion);
+ tracepoint(librados, rados_aio_flush_async_exit, 0);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_flush_async);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_flush)(rados_ioctx_t io)
+{
+ tracepoint(librados, rados_aio_flush_enter, io);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ ctx->flush_aio_writes();
+ tracepoint(librados, rados_aio_flush_exit, 0);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_flush);
+
+struct AioGetxattrData {
+ AioGetxattrData(char* buf, rados_completion_t c, size_t l) :
+ user_buf(buf), len(l), user_completion((librados::AioCompletionImpl*)c) {}
+ bufferlist bl;
+ char* user_buf;
+ size_t len;
+ struct librados::CB_AioCompleteAndSafe user_completion;
+};
+
+static void rados_aio_getxattr_complete(rados_completion_t c, void *arg) {
+ AioGetxattrData *cdata = reinterpret_cast<AioGetxattrData*>(arg);
+ int rc = LIBRADOS_C_API_DEFAULT_F(rados_aio_get_return_value)(c);
+ if (rc >= 0) {
+ if (cdata->bl.length() > cdata->len) {
+ rc = -ERANGE;
+ } else {
+ if (!cdata->bl.is_provided_buffer(cdata->user_buf))
+ cdata->bl.begin().copy(cdata->bl.length(), cdata->user_buf);
+ rc = cdata->bl.length();
+ }
+ }
+ cdata->user_completion(rc);
+ reinterpret_cast<librados::AioCompletionImpl*>(c)->put();
+ delete cdata;
+}
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_getxattr)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *name, char *buf, size_t len)
+{
+ tracepoint(librados, rados_aio_getxattr_enter, io, o, completion, name, len);
+ // create data object to be passed to async callback
+ AioGetxattrData *cdata = new AioGetxattrData(buf, completion, len);
+ if (!cdata) {
+ tracepoint(librados, rados_aio_getxattr_exit, -ENOMEM, NULL, 0);
+ return -ENOMEM;
+ }
+ cdata->bl.push_back(buffer::create_static(len, buf));
+ // create completion callback
+ librados::AioCompletionImpl *c = new librados::AioCompletionImpl;
+ c->set_complete_callback(cdata, rados_aio_getxattr_complete);
+ // call async getxattr of IoCtx
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int ret = ctx->aio_getxattr(oid, c, name, cdata->bl);
+ tracepoint(librados, rados_aio_getxattr_exit, ret, buf, ret);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_getxattr);
+
+namespace {
+struct AioGetxattrsData {
+ AioGetxattrsData(rados_completion_t c, rados_xattrs_iter_t *_iter) :
+ iter(_iter), user_completion((librados::AioCompletionImpl*)c) {
+ it = new librados::RadosXattrsIter();
+ }
+ ~AioGetxattrsData() {
+ if (it) delete it;
+ }
+ librados::RadosXattrsIter *it;
+ rados_xattrs_iter_t *iter;
+ struct librados::CB_AioCompleteAndSafe user_completion;
+};
+}
+
+static void rados_aio_getxattrs_complete(rados_completion_t c, void *arg) {
+ AioGetxattrsData *cdata = reinterpret_cast<AioGetxattrsData*>(arg);
+ int rc = LIBRADOS_C_API_DEFAULT_F(rados_aio_get_return_value)(c);
+ if (rc) {
+ cdata->user_completion(rc);
+ } else {
+ cdata->it->i = cdata->it->attrset.begin();
+ *cdata->iter = cdata->it;
+ cdata->it = 0;
+ cdata->user_completion(0);
+ }
+ reinterpret_cast<librados::AioCompletionImpl*>(c)->put();
+ delete cdata;
+}
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_getxattrs)(
+ rados_ioctx_t io, const char *oid,
+ rados_completion_t completion,
+ rados_xattrs_iter_t *iter)
+{
+ tracepoint(librados, rados_aio_getxattrs_enter, io, oid, completion);
+ // create data object to be passed to async callback
+ AioGetxattrsData *cdata = new AioGetxattrsData(completion, iter);
+ if (!cdata) {
+ tracepoint(librados, rados_getxattrs_exit, -ENOMEM, NULL);
+ return -ENOMEM;
+ }
+ // create completion callback
+ librados::AioCompletionImpl *c = new librados::AioCompletionImpl;
+ c->set_complete_callback(cdata, rados_aio_getxattrs_complete);
+ // call async getxattrs of IoCtx
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t obj(oid);
+ int ret = ctx->aio_getxattrs(obj, c, cdata->it->attrset);
+ tracepoint(librados, rados_aio_getxattrs_exit, ret, cdata->it);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_getxattrs);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_setxattr)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *name, const char *buf, size_t len)
+{
+ tracepoint(librados, rados_aio_setxattr_enter, io, o, completion, name, buf, len);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ bl.append(buf, len);
+ int retval = ctx->aio_setxattr(oid, (librados::AioCompletionImpl*)completion, name, bl);
+ tracepoint(librados, rados_aio_setxattr_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_setxattr);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_rmxattr)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *name)
+{
+ tracepoint(librados, rados_aio_rmxattr_enter, io, o, completion, name);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->aio_rmxattr(oid, (librados::AioCompletionImpl*)completion, name);
+ tracepoint(librados, rados_aio_rmxattr_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_rmxattr);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_stat)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ uint64_t *psize, time_t *pmtime)
+{
+ tracepoint(librados, rados_aio_stat_enter, io, o, completion);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->aio_stat(oid, (librados::AioCompletionImpl*)completion,
+ psize, pmtime);
+ tracepoint(librados, rados_aio_stat_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_stat);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_cmpext)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion, const char *cmp_buf,
+ size_t cmp_len, uint64_t off)
+{
+ tracepoint(librados, rados_aio_cmpext_enter, io, o, completion, cmp_buf,
+ cmp_len, off);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->aio_cmpext(oid, (librados::AioCompletionImpl*)completion,
+ cmp_buf, cmp_len, off);
+ tracepoint(librados, rados_aio_cmpext_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_cmpext);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_cancel)(
+ rados_ioctx_t io,
+ rados_completion_t completion)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ return ctx->aio_cancel((librados::AioCompletionImpl*)completion);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_cancel);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_exec)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *cls, const char *method,
+ const char *inbuf, size_t in_len,
+ char *buf, size_t out_len)
+{
+ tracepoint(librados, rados_aio_exec_enter, io, o, completion);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist inbl;
+ inbl.append(inbuf, in_len);
+ int retval = ctx->aio_exec(oid, (librados::AioCompletionImpl*)completion,
+ cls, method, inbl, buf, out_len);
+ tracepoint(librados, rados_aio_exec_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_exec);
+
+struct C_WatchCB : public librados::WatchCtx {
+ rados_watchcb_t wcb;
+ void *arg;
+ C_WatchCB(rados_watchcb_t _wcb, void *_arg) : wcb(_wcb), arg(_arg) {}
+ void notify(uint8_t opcode, uint64_t ver, bufferlist& bl) override {
+ wcb(opcode, ver, arg);
+ }
+};
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch)(
+ rados_ioctx_t io, const char *o, uint64_t ver,
+ uint64_t *handle,
+ rados_watchcb_t watchcb, void *arg)
+{
+ tracepoint(librados, rados_watch_enter, io, o, ver, watchcb, arg);
+ uint64_t *cookie = handle;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ C_WatchCB *wc = new C_WatchCB(watchcb, arg);
+ int retval = ctx->watch(oid, cookie, wc, NULL, true);
+ tracepoint(librados, rados_watch_exit, retval, *handle);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_watch);
+
+struct C_WatchCB2 : public librados::WatchCtx2 {
+ rados_watchcb2_t wcb;
+ rados_watcherrcb_t errcb;
+ void *arg;
+ C_WatchCB2(rados_watchcb2_t _wcb,
+ rados_watcherrcb_t _errcb,
+ void *_arg) : wcb(_wcb), errcb(_errcb), arg(_arg) {}
+ void handle_notify(uint64_t notify_id,
+ uint64_t cookie,
+ uint64_t notifier_gid,
+ bufferlist& bl) override {
+ wcb(arg, notify_id, cookie, notifier_gid, bl.c_str(), bl.length());
+ }
+ void handle_error(uint64_t cookie, int err) override {
+ if (errcb)
+ errcb(arg, cookie, err);
+ }
+};
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch3)(
+ rados_ioctx_t io, const char *o, uint64_t *handle,
+ rados_watchcb2_t watchcb,
+ rados_watcherrcb_t watcherrcb,
+ uint32_t timeout,
+ void *arg)
+{
+ tracepoint(librados, rados_watch3_enter, io, o, handle, watchcb, timeout, arg);
+ int ret;
+ if (!watchcb || !o || !handle) {
+ ret = -EINVAL;
+ } else {
+ uint64_t *cookie = handle;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ C_WatchCB2 *wc = new C_WatchCB2(watchcb, watcherrcb, arg);
+ ret = ctx->watch(oid, cookie, NULL, wc, timeout, true);
+ }
+ tracepoint(librados, rados_watch3_exit, ret, handle ? *handle : 0);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_watch3);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch2)(
+ rados_ioctx_t io, const char *o, uint64_t *handle,
+ rados_watchcb2_t watchcb,
+ rados_watcherrcb_t watcherrcb,
+ void *arg)
+{
+ return LIBRADOS_C_API_DEFAULT_F(rados_watch3)(
+ io, o, handle, watchcb, watcherrcb, 0, arg);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_watch2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_watch2)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ uint64_t *handle,
+ rados_watchcb2_t watchcb,
+ rados_watcherrcb_t watcherrcb,
+ uint32_t timeout, void *arg)
+{
+ tracepoint(librados, rados_aio_watch2_enter, io, o, completion, handle, watchcb, timeout, arg);
+ int ret;
+ if (!completion || !watchcb || !o || !handle) {
+ ret = -EINVAL;
+ } else {
+ uint64_t *cookie = handle;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ librados::AioCompletionImpl *c =
+ reinterpret_cast<librados::AioCompletionImpl*>(completion);
+ C_WatchCB2 *wc = new C_WatchCB2(watchcb, watcherrcb, arg);
+ ret = ctx->aio_watch(oid, c, cookie, NULL, wc, timeout, true);
+ }
+ tracepoint(librados, rados_aio_watch2_exit, ret, handle ? *handle : 0);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_watch2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_watch)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ uint64_t *handle,
+ rados_watchcb2_t watchcb,
+ rados_watcherrcb_t watcherrcb, void *arg)
+{
+ return LIBRADOS_C_API_DEFAULT_F(rados_aio_watch2)(
+ io, o, completion, handle, watchcb, watcherrcb, 0, arg);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_watch);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_unwatch)(
+ rados_ioctx_t io,
+ const char *o,
+ uint64_t handle)
+{
+ tracepoint(librados, rados_unwatch_enter, io, o, handle);
+ uint64_t cookie = handle;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->unwatch(cookie);
+ tracepoint(librados, rados_unwatch_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_unwatch);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_unwatch2)(
+ rados_ioctx_t io,
+ uint64_t handle)
+{
+ tracepoint(librados, rados_unwatch2_enter, io, handle);
+ uint64_t cookie = handle;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->unwatch(cookie);
+ tracepoint(librados, rados_unwatch2_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_unwatch2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_unwatch)(
+ rados_ioctx_t io, uint64_t handle,
+ rados_completion_t completion)
+{
+ tracepoint(librados, rados_aio_unwatch_enter, io, handle, completion);
+ uint64_t cookie = handle;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ librados::AioCompletionImpl *c =
+ reinterpret_cast<librados::AioCompletionImpl*>(completion);
+ int retval = ctx->aio_unwatch(cookie, c);
+ tracepoint(librados, rados_aio_unwatch_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_unwatch);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch_check)(
+ rados_ioctx_t io,
+ uint64_t handle)
+{
+ tracepoint(librados, rados_watch_check_enter, io, handle);
+ uint64_t cookie = handle;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->watch_check(cookie);
+ tracepoint(librados, rados_watch_check_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_watch_check);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_notify)(
+ rados_ioctx_t io, const char *o,
+ uint64_t ver, const char *buf, int buf_len)
+{
+ tracepoint(librados, rados_notify_enter, io, o, ver, buf, buf_len);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ if (buf) {
+ bufferptr p = buffer::create(buf_len);
+ memcpy(p.c_str(), buf, buf_len);
+ bl.push_back(p);
+ }
+ int retval = ctx->notify(oid, bl, 0, NULL, NULL, NULL);
+ tracepoint(librados, rados_notify_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_notify);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_notify2)(
+ rados_ioctx_t io, const char *o,
+ const char *buf, int buf_len,
+ uint64_t timeout_ms,
+ char **reply_buffer,
+ size_t *reply_buffer_len)
+{
+ tracepoint(librados, rados_notify2_enter, io, o, buf, buf_len, timeout_ms);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ if (buf) {
+ bufferptr p = buffer::create(buf_len);
+ memcpy(p.c_str(), buf, buf_len);
+ bl.push_back(p);
+ }
+ int ret = ctx->notify(oid, bl, timeout_ms, NULL, reply_buffer, reply_buffer_len);
+ tracepoint(librados, rados_notify2_exit, ret);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_notify2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_decode_notify_response)(
+ char *reply_buffer, size_t reply_buffer_len,
+ struct notify_ack_t **acks, size_t *nr_acks,
+ struct notify_timeout_t **timeouts, size_t *nr_timeouts)
+{
+ if (!reply_buffer || !reply_buffer_len) {
+ return -EINVAL;
+ }
+
+ bufferlist bl;
+ bl.append(reply_buffer, reply_buffer_len);
+
+ map<pair<uint64_t,uint64_t>,bufferlist> acked;
+ set<pair<uint64_t,uint64_t>> missed;
+ auto iter = bl.cbegin();
+ decode(acked, iter);
+ decode(missed, iter);
+
+ *acks = nullptr;
+ *nr_acks = acked.size();
+ if (*nr_acks) {
+ *acks = new notify_ack_t[*nr_acks];
+ struct notify_ack_t *ack = *acks;
+ for (auto &[who, payload] : acked) {
+ ack->notifier_id = who.first;
+ ack->cookie = who.second;
+ ack->payload = nullptr;
+ ack->payload_len = payload.length();
+ if (ack->payload_len) {
+ ack->payload = (char *)malloc(ack->payload_len);
+ memcpy(ack->payload, payload.c_str(), ack->payload_len);
+ }
+
+ ack++;
+ }
+ }
+
+ *timeouts = nullptr;
+ *nr_timeouts = missed.size();
+ if (*nr_timeouts) {
+ *timeouts = new notify_timeout_t[*nr_timeouts];
+ struct notify_timeout_t *timeout = *timeouts;
+ for (auto &[notifier_id, cookie] : missed) {
+ timeout->notifier_id = notifier_id;
+ timeout->cookie = cookie;
+ timeout++;
+ }
+ }
+
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_decode_notify_response);
+
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_free_notify_response)(
+ struct notify_ack_t *acks, size_t nr_acks,
+ struct notify_timeout_t *timeouts)
+{
+ for (uint64_t n = 0; n < nr_acks; ++n) {
+ assert(acks);
+ if (acks[n].payload) {
+ free(acks[n].payload);
+ }
+ }
+ if (acks) {
+ delete[] acks;
+ }
+ if (timeouts) {
+ delete[] timeouts;
+ }
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_free_notify_response);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_notify)(
+ rados_ioctx_t io, const char *o,
+ rados_completion_t completion,
+ const char *buf, int buf_len,
+ uint64_t timeout_ms, char **reply_buffer,
+ size_t *reply_buffer_len)
+{
+ tracepoint(librados, rados_aio_notify_enter, io, o, completion, buf, buf_len,
+ timeout_ms);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ if (buf) {
+ bl.push_back(buffer::copy(buf, buf_len));
+ }
+ librados::AioCompletionImpl *c =
+ reinterpret_cast<librados::AioCompletionImpl*>(completion);
+ int ret = ctx->aio_notify(oid, c, bl, timeout_ms, NULL, reply_buffer,
+ reply_buffer_len);
+ tracepoint(librados, rados_aio_notify_exit, ret);
+ return ret;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_notify);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_notify_ack)(
+ rados_ioctx_t io, const char *o,
+ uint64_t notify_id, uint64_t handle,
+ const char *buf, int buf_len)
+{
+ tracepoint(librados, rados_notify_ack_enter, io, o, notify_id, handle, buf, buf_len);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ bufferlist bl;
+ if (buf) {
+ bufferptr p = buffer::create(buf_len);
+ memcpy(p.c_str(), buf, buf_len);
+ bl.push_back(p);
+ }
+ ctx->notify_ack(oid, notify_id, handle, bl);
+ tracepoint(librados, rados_notify_ack_exit, 0);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_notify_ack);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch_flush)(rados_t cluster)
+{
+ tracepoint(librados, rados_watch_flush_enter, cluster);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ int retval = client->watch_flush();
+ tracepoint(librados, rados_watch_flush_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_watch_flush);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_watch_flush)(
+ rados_t cluster,
+ rados_completion_t completion)
+{
+ tracepoint(librados, rados_aio_watch_flush_enter, cluster, completion);
+ librados::RadosClient *client = (librados::RadosClient *)cluster;
+ librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion;
+ int retval = client->async_watch_flush(c);
+ tracepoint(librados, rados_aio_watch_flush_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_watch_flush);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_set_alloc_hint)(
+ rados_ioctx_t io, const char *o,
+ uint64_t expected_object_size,
+ uint64_t expected_write_size)
+{
+ tracepoint(librados, rados_set_alloc_hint_enter, io, o, expected_object_size, expected_write_size);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->set_alloc_hint(oid, expected_object_size,
+ expected_write_size, 0);
+ tracepoint(librados, rados_set_alloc_hint_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_set_alloc_hint);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_set_alloc_hint2)(
+ rados_ioctx_t io, const char *o,
+ uint64_t expected_object_size,
+ uint64_t expected_write_size,
+ uint32_t flags)
+{
+ tracepoint(librados, rados_set_alloc_hint2_enter, io, o, expected_object_size, expected_write_size, flags);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->set_alloc_hint(oid, expected_object_size,
+ expected_write_size, flags);
+ tracepoint(librados, rados_set_alloc_hint2_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_set_alloc_hint2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_lock_exclusive)(
+ rados_ioctx_t io, const char * o,
+ const char * name, const char * cookie,
+ const char * desc,
+ struct timeval * duration, uint8_t flags)
+{
+ tracepoint(librados, rados_lock_exclusive_enter, io, o, name, cookie, desc, duration, flags);
+ librados::IoCtx ctx;
+ librados::IoCtx::from_rados_ioctx_t(io, ctx);
+
+ int retval = ctx.lock_exclusive(o, name, cookie, desc, duration, flags);
+ tracepoint(librados, rados_lock_exclusive_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_lock_exclusive);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_lock_shared)(
+ rados_ioctx_t io, const char * o,
+ const char * name, const char * cookie,
+ const char * tag, const char * desc,
+ struct timeval * duration, uint8_t flags)
+{
+ tracepoint(librados, rados_lock_shared_enter, io, o, name, cookie, tag, desc, duration, flags);
+ librados::IoCtx ctx;
+ librados::IoCtx::from_rados_ioctx_t(io, ctx);
+
+ int retval = ctx.lock_shared(o, name, cookie, tag, desc, duration, flags);
+ tracepoint(librados, rados_lock_shared_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_lock_shared);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_unlock)(
+ rados_ioctx_t io, const char *o, const char *name,
+ const char *cookie)
+{
+ tracepoint(librados, rados_unlock_enter, io, o, name, cookie);
+ librados::IoCtx ctx;
+ librados::IoCtx::from_rados_ioctx_t(io, ctx);
+
+ int retval = ctx.unlock(o, name, cookie);
+ tracepoint(librados, rados_unlock_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_unlock);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_unlock)(
+ rados_ioctx_t io, const char *o, const char *name,
+ const char *cookie, rados_completion_t completion)
+{
+ tracepoint(librados, rados_aio_unlock_enter, io, o, name, cookie, completion);
+ librados::IoCtx ctx;
+ librados::IoCtx::from_rados_ioctx_t(io, ctx);
+ librados::AioCompletionImpl *comp = (librados::AioCompletionImpl*)completion;
+ comp->get();
+ librados::AioCompletion c(comp);
+ int retval = ctx.aio_unlock(o, name, cookie, &c);
+ tracepoint(librados, rados_aio_unlock_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_unlock);
+
+extern "C" ssize_t LIBRADOS_C_API_DEFAULT_F(rados_list_lockers)(
+ rados_ioctx_t io, const char *o,
+ const char *name, int *exclusive,
+ char *tag, size_t *tag_len,
+ char *clients, size_t *clients_len,
+ char *cookies, size_t *cookies_len,
+ char *addrs, size_t *addrs_len)
+{
+ tracepoint(librados, rados_list_lockers_enter, io, o, name, *tag_len, *clients_len, *cookies_len, *addrs_len);
+ librados::IoCtx ctx;
+ librados::IoCtx::from_rados_ioctx_t(io, ctx);
+ std::string name_str = name;
+ std::string oid = o;
+ std::string tag_str;
+ int tmp_exclusive;
+ std::list<librados::locker_t> lockers;
+ int r = ctx.list_lockers(oid, name_str, &tmp_exclusive, &tag_str, &lockers);
+ if (r < 0) {
+ tracepoint(librados, rados_list_lockers_exit, r, *exclusive, "", *tag_len, *clients_len, *cookies_len, *addrs_len);
+ return r;
+ }
+
+ size_t clients_total = 0;
+ size_t cookies_total = 0;
+ size_t addrs_total = 0;
+ list<librados::locker_t>::const_iterator it;
+ for (it = lockers.begin(); it != lockers.end(); ++it) {
+ clients_total += it->client.length() + 1;
+ cookies_total += it->cookie.length() + 1;
+ addrs_total += it->address.length() + 1;
+ }
+
+ bool too_short = ((clients_total > *clients_len) ||
+ (cookies_total > *cookies_len) ||
+ (addrs_total > *addrs_len) ||
+ (tag_str.length() + 1 > *tag_len));
+ *clients_len = clients_total;
+ *cookies_len = cookies_total;
+ *addrs_len = addrs_total;
+ *tag_len = tag_str.length() + 1;
+ if (too_short) {
+ tracepoint(librados, rados_list_lockers_exit, -ERANGE, *exclusive, "", *tag_len, *clients_len, *cookies_len, *addrs_len);
+ return -ERANGE;
+ }
+
+ strcpy(tag, tag_str.c_str());
+ char *clients_p = clients;
+ char *cookies_p = cookies;
+ char *addrs_p = addrs;
+ for (it = lockers.begin(); it != lockers.end(); ++it) {
+ strcpy(clients_p, it->client.c_str());
+ strcpy(cookies_p, it->cookie.c_str());
+ strcpy(addrs_p, it->address.c_str());
+ tracepoint(librados, rados_list_lockers_locker, clients_p, cookies_p, addrs_p);
+ clients_p += it->client.length() + 1;
+ cookies_p += it->cookie.length() + 1;
+ addrs_p += it->address.length() + 1;
+ }
+ if (tmp_exclusive)
+ *exclusive = 1;
+ else
+ *exclusive = 0;
+
+ int retval = lockers.size();
+ tracepoint(librados, rados_list_lockers_exit, retval, *exclusive, tag, *tag_len, *clients_len, *cookies_len, *addrs_len);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_list_lockers);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_break_lock)(
+ rados_ioctx_t io, const char *o,
+ const char *name, const char *client,
+ const char *cookie)
+{
+ tracepoint(librados, rados_break_lock_enter, io, o, name, client, cookie);
+ librados::IoCtx ctx;
+ librados::IoCtx::from_rados_ioctx_t(io, ctx);
+
+ int retval = ctx.break_lock(o, name, client, cookie);
+ tracepoint(librados, rados_break_lock_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_break_lock);
+
+extern "C" rados_write_op_t LIBRADOS_C_API_DEFAULT_F(rados_create_write_op)()
+{
+ tracepoint(librados, rados_create_write_op_enter);
+ rados_write_op_t retval = new (std::nothrow)::ObjectOperation;
+ tracepoint(librados, rados_create_write_op_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_create_write_op);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_release_write_op)(
+ rados_write_op_t write_op)
+{
+ tracepoint(librados, rados_release_write_op_enter, write_op);
+ delete (::ObjectOperation*)write_op;
+ tracepoint(librados, rados_release_write_op_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_release_write_op);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_set_flags)(
+ rados_write_op_t write_op,
+ int flags)
+{
+ tracepoint(librados, rados_write_op_set_flags_enter, write_op, flags);
+ ((::ObjectOperation *)write_op)->set_last_op_flags(get_op_flags(flags));
+ tracepoint(librados, rados_write_op_set_flags_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_set_flags);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_assert_version)(
+ rados_write_op_t write_op,
+ uint64_t ver)
+{
+ tracepoint(librados, rados_write_op_assert_version_enter, write_op, ver);
+ ((::ObjectOperation *)write_op)->assert_version(ver);
+ tracepoint(librados, rados_write_op_assert_version_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_assert_version);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_assert_exists)(
+ rados_write_op_t write_op)
+{
+ tracepoint(librados, rados_write_op_assert_exists_enter, write_op);
+ ((::ObjectOperation *)write_op)->stat(nullptr, nullptr, nullptr);
+ tracepoint(librados, rados_write_op_assert_exists_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_assert_exists);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_cmpext)(
+ rados_write_op_t write_op,
+ const char *cmp_buf,
+ size_t cmp_len,
+ uint64_t off,
+ int *prval)
+{
+ tracepoint(librados, rados_write_op_cmpext_enter, write_op, cmp_buf,
+ cmp_len, off, prval);
+ ((::ObjectOperation *)write_op)->cmpext(off, cmp_len, cmp_buf, prval);
+ tracepoint(librados, rados_write_op_cmpext_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_cmpext);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_cmpxattr)(
+ rados_write_op_t write_op,
+ const char *name,
+ uint8_t comparison_operator,
+ const char *value,
+ size_t value_len)
+{
+ tracepoint(librados, rados_write_op_cmpxattr_enter, write_op, name, comparison_operator, value, value_len);
+ bufferlist bl;
+ bl.append(value, value_len);
+ ((::ObjectOperation *)write_op)->cmpxattr(name,
+ comparison_operator,
+ CEPH_OSD_CMPXATTR_MODE_STRING,
+ bl);
+ tracepoint(librados, rados_write_op_cmpxattr_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_cmpxattr);
+
+static void rados_c_omap_cmp(ObjectOperation *op,
+ const char *key,
+ uint8_t comparison_operator,
+ const char *val,
+ size_t key_len,
+ size_t val_len,
+ int *prval)
+{
+ bufferlist bl;
+ bl.append(val, val_len);
+ std::map<std::string, pair<bufferlist, int> > assertions;
+ string lkey = string(key, key_len);
+
+ assertions[lkey] = std::make_pair(bl, comparison_operator);
+ op->omap_cmp(assertions, prval);
+}
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_cmp)(
+ rados_write_op_t write_op,
+ const char *key,
+ uint8_t comparison_operator,
+ const char *val,
+ size_t val_len,
+ int *prval)
+{
+ tracepoint(librados, rados_write_op_omap_cmp_enter, write_op, key, comparison_operator, val, val_len, prval);
+ rados_c_omap_cmp((::ObjectOperation *)write_op, key, comparison_operator,
+ val, strlen(key), val_len, prval);
+ tracepoint(librados, rados_write_op_omap_cmp_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_cmp);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_cmp2)(
+ rados_write_op_t write_op,
+ const char *key,
+ uint8_t comparison_operator,
+ const char *val,
+ size_t key_len,
+ size_t val_len,
+ int *prval)
+{
+ tracepoint(librados, rados_write_op_omap_cmp_enter, write_op, key, comparison_operator, val, val_len, prval);
+ rados_c_omap_cmp((::ObjectOperation *)write_op, key, comparison_operator,
+ val, key_len, val_len, prval);
+ tracepoint(librados, rados_write_op_omap_cmp_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_cmp2);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_setxattr)(
+ rados_write_op_t write_op,
+ const char *name,
+ const char *value,
+ size_t value_len)
+{
+ tracepoint(librados, rados_write_op_setxattr_enter, write_op, name, value, value_len);
+ bufferlist bl;
+ bl.append(value, value_len);
+ ((::ObjectOperation *)write_op)->setxattr(name, bl);
+ tracepoint(librados, rados_write_op_setxattr_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_setxattr);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_rmxattr)(
+ rados_write_op_t write_op,
+ const char *name)
+{
+ tracepoint(librados, rados_write_op_rmxattr_enter, write_op, name);
+ ((::ObjectOperation *)write_op)->rmxattr(name);
+ tracepoint(librados, rados_write_op_rmxattr_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_rmxattr);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_create)(
+ rados_write_op_t write_op,
+ int exclusive,
+ const char* category) // unused
+{
+ tracepoint(librados, rados_write_op_create_enter, write_op, exclusive);
+ ::ObjectOperation *oo = (::ObjectOperation *) write_op;
+ oo->create(!!exclusive);
+ tracepoint(librados, rados_write_op_create_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_create);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_write)(
+ rados_write_op_t write_op,
+ const char *buffer,
+ size_t len,
+ uint64_t offset)
+{
+ tracepoint(librados, rados_write_op_write_enter, write_op, buffer, len, offset);
+ bufferlist bl;
+ bl.append(buffer,len);
+ ((::ObjectOperation *)write_op)->write(offset, bl);
+ tracepoint(librados, rados_write_op_write_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_write);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_write_full)(
+ rados_write_op_t write_op,
+ const char *buffer,
+ size_t len)
+{
+ tracepoint(librados, rados_write_op_write_full_enter, write_op, buffer, len);
+ bufferlist bl;
+ bl.append(buffer,len);
+ ((::ObjectOperation *)write_op)->write_full(bl);
+ tracepoint(librados, rados_write_op_write_full_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_write_full);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_writesame)(
+ rados_write_op_t write_op,
+ const char *buffer,
+ size_t data_len,
+ size_t write_len,
+ uint64_t offset)
+{
+ tracepoint(librados, rados_write_op_writesame_enter, write_op, buffer, data_len, write_len, offset);
+ bufferlist bl;
+ bl.append(buffer, data_len);
+ ((::ObjectOperation *)write_op)->writesame(offset, write_len, bl);
+ tracepoint(librados, rados_write_op_writesame_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_writesame);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_append)(
+ rados_write_op_t write_op,
+ const char *buffer,
+ size_t len)
+{
+ tracepoint(librados, rados_write_op_append_enter, write_op, buffer, len);
+ bufferlist bl;
+ bl.append(buffer,len);
+ ((::ObjectOperation *)write_op)->append(bl);
+ tracepoint(librados, rados_write_op_append_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_append);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_remove)(
+ rados_write_op_t write_op)
+{
+ tracepoint(librados, rados_write_op_remove_enter, write_op);
+ ((::ObjectOperation *)write_op)->remove();
+ tracepoint(librados, rados_write_op_remove_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_remove);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_truncate)(
+ rados_write_op_t write_op,
+ uint64_t offset)
+{
+ tracepoint(librados, rados_write_op_truncate_enter, write_op, offset);
+ ((::ObjectOperation *)write_op)->truncate(offset);
+ tracepoint(librados, rados_write_op_truncate_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_truncate);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_zero)(
+ rados_write_op_t write_op,
+ uint64_t offset,
+ uint64_t len)
+{
+ tracepoint(librados, rados_write_op_zero_enter, write_op, offset, len);
+ ((::ObjectOperation *)write_op)->zero(offset, len);
+ tracepoint(librados, rados_write_op_zero_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_zero);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_exec)(
+ rados_write_op_t write_op,
+ const char *cls,
+ const char *method,
+ const char *in_buf,
+ size_t in_len,
+ int *prval)
+{
+ tracepoint(librados, rados_write_op_exec_enter, write_op, cls, method, in_buf, in_len, prval);
+ bufferlist inbl;
+ inbl.append(in_buf, in_len);
+ ((::ObjectOperation *)write_op)->call(cls, method, inbl, NULL, NULL, prval);
+ tracepoint(librados, rados_write_op_exec_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_exec);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_set)(
+ rados_write_op_t write_op,
+ char const* const* keys,
+ char const* const* vals,
+ const size_t *lens,
+ size_t num)
+{
+ tracepoint(librados, rados_write_op_omap_set_enter, write_op, num);
+ std::map<std::string, bufferlist> entries;
+ for (size_t i = 0; i < num; ++i) {
+ tracepoint(librados, rados_write_op_omap_set_entry, keys[i], vals[i], lens[i]);
+ bufferlist bl(lens[i]);
+ bl.append(vals[i], lens[i]);
+ entries[keys[i]] = bl;
+ }
+ ((::ObjectOperation *)write_op)->omap_set(entries);
+ tracepoint(librados, rados_write_op_omap_set_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_set);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_set2)(
+ rados_write_op_t write_op,
+ char const* const* keys,
+ char const* const* vals,
+ const size_t *key_lens,
+ const size_t *val_lens,
+ size_t num)
+{
+ tracepoint(librados, rados_write_op_omap_set_enter, write_op, num);
+ std::map<std::string, bufferlist> entries;
+ for (size_t i = 0; i < num; ++i) {
+ bufferlist bl(val_lens[i]);
+ bl.append(vals[i], val_lens[i]);
+ string key(keys[i], key_lens[i]);
+ entries[key] = bl;
+ }
+ ((::ObjectOperation *)write_op)->omap_set(entries);
+ tracepoint(librados, rados_write_op_omap_set_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_set2);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_rm_keys)(
+ rados_write_op_t write_op,
+ char const* const* keys,
+ size_t keys_len)
+{
+ tracepoint(librados, rados_write_op_omap_rm_keys_enter, write_op, keys_len);
+ for(size_t i = 0; i < keys_len; i++) {
+ tracepoint(librados, rados_write_op_omap_rm_keys_entry, keys[i]);
+ }
+ std::set<std::string> to_remove(keys, keys + keys_len);
+ ((::ObjectOperation *)write_op)->omap_rm_keys(to_remove);
+ tracepoint(librados, rados_write_op_omap_rm_keys_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_rm_keys);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_rm_keys2)(
+ rados_write_op_t write_op,
+ char const* const* keys,
+ const size_t* key_lens,
+ size_t keys_len)
+{
+ tracepoint(librados, rados_write_op_omap_rm_keys_enter, write_op, keys_len);
+ std::set<std::string> to_remove;
+ for(size_t i = 0; i < keys_len; i++) {
+ to_remove.emplace(keys[i], key_lens[i]);
+ }
+ ((::ObjectOperation *)write_op)->omap_rm_keys(to_remove);
+ tracepoint(librados, rados_write_op_omap_rm_keys_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_rm_keys2);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_rm_range2)(
+ rados_write_op_t write_op,
+ const char *key_begin,
+ size_t key_begin_len,
+ const char *key_end,
+ size_t key_end_len)
+{
+ tracepoint(librados, rados_write_op_omap_rm_range_enter,
+ write_op, key_begin, key_end);
+ ((::ObjectOperation *)write_op)->omap_rm_range({key_begin, key_begin_len},
+ {key_end, key_end_len});
+ tracepoint(librados, rados_write_op_omap_rm_range_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_rm_range2);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_clear)(
+ rados_write_op_t write_op)
+{
+ tracepoint(librados, rados_write_op_omap_clear_enter, write_op);
+ ((::ObjectOperation *)write_op)->omap_clear();
+ tracepoint(librados, rados_write_op_omap_clear_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_clear);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_set_alloc_hint)(
+ rados_write_op_t write_op,
+ uint64_t expected_object_size,
+ uint64_t expected_write_size)
+{
+ tracepoint(librados, rados_write_op_set_alloc_hint_enter, write_op, expected_object_size, expected_write_size);
+ ((::ObjectOperation *)write_op)->set_alloc_hint(expected_object_size,
+ expected_write_size, 0);
+ tracepoint(librados, rados_write_op_set_alloc_hint_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_set_alloc_hint);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_set_alloc_hint2)(
+ rados_write_op_t write_op,
+ uint64_t expected_object_size,
+ uint64_t expected_write_size,
+ uint32_t flags)
+{
+ tracepoint(librados, rados_write_op_set_alloc_hint2_enter, write_op, expected_object_size, expected_write_size, flags);
+ ((::ObjectOperation *)write_op)->set_alloc_hint(expected_object_size,
+ expected_write_size,
+ flags);
+ tracepoint(librados, rados_write_op_set_alloc_hint2_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_set_alloc_hint2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_write_op_operate)(
+ rados_write_op_t write_op,
+ rados_ioctx_t io,
+ const char *oid,
+ time_t *mtime,
+ int flags)
+{
+ tracepoint(librados, rados_write_op_operate_enter, write_op, io, oid, mtime, flags);
+ object_t obj(oid);
+ ::ObjectOperation *oo = (::ObjectOperation *) write_op;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+
+ ceph::real_time *prt = NULL;
+ ceph::real_time rt;
+
+ if (mtime) {
+ rt = ceph::real_clock::from_time_t(*mtime);
+ prt = &rt;
+ }
+
+ int retval = ctx->operate(obj, oo, prt, translate_flags(flags));
+ tracepoint(librados, rados_write_op_operate_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_operate);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_write_op_operate2)(
+ rados_write_op_t write_op,
+ rados_ioctx_t io,
+ const char *oid,
+ struct timespec *ts,
+ int flags)
+{
+ tracepoint(librados, rados_write_op_operate2_enter, write_op, io, oid, ts, flags);
+ object_t obj(oid);
+ ::ObjectOperation *oo = (::ObjectOperation *) write_op;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+
+ ceph::real_time *prt = NULL;
+ ceph::real_time rt;
+
+ if (ts) {
+ rt = ceph::real_clock::from_timespec(*ts);
+ prt = &rt;
+ }
+
+ int retval = ctx->operate(obj, oo, prt, translate_flags(flags));
+ tracepoint(librados, rados_write_op_operate_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_operate2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_write_op_operate)(
+ rados_write_op_t write_op,
+ rados_ioctx_t io,
+ rados_completion_t completion,
+ const char *oid,
+ time_t *mtime,
+ int flags)
+{
+ tracepoint(librados, rados_aio_write_op_operate_enter, write_op, io, completion, oid, mtime, flags);
+ object_t obj(oid);
+ ::ObjectOperation *oo = (::ObjectOperation *) write_op;
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion;
+ int retval = ctx->aio_operate(obj, oo, c, ctx->snapc, translate_flags(flags));
+ tracepoint(librados, rados_aio_write_op_operate_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_write_op_operate);
+
+extern "C" rados_read_op_t LIBRADOS_C_API_DEFAULT_F(rados_create_read_op)()
+{
+ tracepoint(librados, rados_create_read_op_enter);
+ rados_read_op_t retval = new (std::nothrow)::ObjectOperation;
+ tracepoint(librados, rados_create_read_op_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_create_read_op);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_release_read_op)(
+ rados_read_op_t read_op)
+{
+ tracepoint(librados, rados_release_read_op_enter, read_op);
+ delete (::ObjectOperation *)read_op;
+ tracepoint(librados, rados_release_read_op_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_release_read_op);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_set_flags)(
+ rados_read_op_t read_op,
+ int flags)
+{
+ tracepoint(librados, rados_read_op_set_flags_enter, read_op, flags);
+ ((::ObjectOperation *)read_op)->set_last_op_flags(get_op_flags(flags));
+ tracepoint(librados, rados_read_op_set_flags_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_set_flags);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_assert_version)(
+ rados_read_op_t read_op,
+ uint64_t ver)
+{
+ tracepoint(librados, rados_read_op_assert_version_enter, read_op, ver);
+ ((::ObjectOperation *)read_op)->assert_version(ver);
+ tracepoint(librados, rados_read_op_assert_version_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_assert_version);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_assert_exists)(
+ rados_read_op_t read_op)
+{
+ tracepoint(librados, rados_read_op_assert_exists_enter, read_op);
+ ((::ObjectOperation *)read_op)->stat(nullptr, nullptr, nullptr);
+ tracepoint(librados, rados_read_op_assert_exists_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_assert_exists);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_cmpext)(
+ rados_read_op_t read_op,
+ const char *cmp_buf,
+ size_t cmp_len,
+ uint64_t off,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_cmpext_enter, read_op, cmp_buf,
+ cmp_len, off, prval);
+ ((::ObjectOperation *)read_op)->cmpext(off, cmp_len, cmp_buf, prval);
+ tracepoint(librados, rados_read_op_cmpext_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_cmpext);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_cmpxattr)(
+ rados_read_op_t read_op,
+ const char *name,
+ uint8_t comparison_operator,
+ const char *value,
+ size_t value_len)
+{
+ tracepoint(librados, rados_read_op_cmpxattr_enter, read_op, name, comparison_operator, value, value_len);
+ bufferlist bl;
+ bl.append(value, value_len);
+ ((::ObjectOperation *)read_op)->cmpxattr(name,
+ comparison_operator,
+ CEPH_OSD_CMPXATTR_MODE_STRING,
+ bl);
+ tracepoint(librados, rados_read_op_cmpxattr_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_cmpxattr);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_cmp)(
+ rados_read_op_t read_op,
+ const char *key,
+ uint8_t comparison_operator,
+ const char *val,
+ size_t val_len,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_omap_cmp_enter, read_op, key, comparison_operator, val, val_len, prval);
+ rados_c_omap_cmp((::ObjectOperation *)read_op, key, comparison_operator,
+ val, strlen(key), val_len, prval);
+ tracepoint(librados, rados_read_op_omap_cmp_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_cmp);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_cmp2)(
+ rados_read_op_t read_op,
+ const char *key,
+ uint8_t comparison_operator,
+ const char *val,
+ size_t key_len,
+ size_t val_len,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_omap_cmp_enter, read_op, key, comparison_operator, val, val_len, prval);
+ rados_c_omap_cmp((::ObjectOperation *)read_op, key, comparison_operator,
+ val, key_len, val_len, prval);
+ tracepoint(librados, rados_read_op_omap_cmp_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_cmp2);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_stat)(
+ rados_read_op_t read_op,
+ uint64_t *psize,
+ time_t *pmtime,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_stat_enter, read_op, psize, pmtime, prval);
+ ((::ObjectOperation *)read_op)->stat(psize, pmtime, prval);
+ tracepoint(librados, rados_read_op_stat_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_stat);
+
+class C_bl_to_buf : public Context {
+ char *out_buf;
+ size_t out_len;
+ size_t *bytes_read;
+ int *prval;
+public:
+ bufferlist out_bl;
+ C_bl_to_buf(char *out_buf,
+ size_t out_len,
+ size_t *bytes_read,
+ int *prval) : out_buf(out_buf), out_len(out_len),
+ bytes_read(bytes_read), prval(prval) {}
+ void finish(int r) override {
+ if (out_bl.length() > out_len) {
+ if (prval)
+ *prval = -ERANGE;
+ if (bytes_read)
+ *bytes_read = 0;
+ return;
+ }
+ if (bytes_read)
+ *bytes_read = out_bl.length();
+ if (out_buf && !out_bl.is_provided_buffer(out_buf))
+ out_bl.begin().copy(out_bl.length(), out_buf);
+ }
+};
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_read)(
+ rados_read_op_t read_op,
+ uint64_t offset,
+ size_t len,
+ char *buf,
+ size_t *bytes_read,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_read_enter, read_op, offset, len, buf, bytes_read, prval);
+ C_bl_to_buf *ctx = new C_bl_to_buf(buf, len, bytes_read, prval);
+ ctx->out_bl.push_back(buffer::create_static(len, buf));
+ ((::ObjectOperation *)read_op)->read(offset, len, &ctx->out_bl, prval, ctx);
+ tracepoint(librados, rados_read_op_read_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_read);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_checksum)(
+ rados_read_op_t read_op,
+ rados_checksum_type_t type,
+ const char *init_value,
+ size_t init_value_len,
+ uint64_t offset, size_t len,
+ size_t chunk_size, char *pchecksum,
+ size_t checksum_len, int *prval)
+{
+ tracepoint(librados, rados_read_op_checksum_enter, read_op, type, init_value,
+ init_value_len, offset, len, chunk_size);
+ bufferlist init_value_bl;
+ init_value_bl.append(init_value, init_value_len);
+
+ C_bl_to_buf *ctx = nullptr;
+ if (pchecksum != nullptr) {
+ ctx = new C_bl_to_buf(pchecksum, checksum_len, nullptr, prval);
+ }
+ ((::ObjectOperation *)read_op)->checksum(get_checksum_op_type(type),
+ init_value_bl, offset, len,
+ chunk_size,
+ (ctx ? &ctx->out_bl : nullptr),
+ prval, ctx);
+ tracepoint(librados, rados_read_op_checksum_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_checksum);
+
+class C_out_buffer : public Context {
+ char **out_buf;
+ size_t *out_len;
+public:
+ bufferlist out_bl;
+ C_out_buffer(char **out_buf, size_t *out_len) : out_buf(out_buf),
+ out_len(out_len) {}
+ void finish(int r) override {
+ // ignore r since we don't know the meaning of return values
+ // from custom class methods
+ do_out_buffer(out_bl, out_buf, out_len);
+ }
+};
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_exec)(
+ rados_read_op_t read_op,
+ const char *cls,
+ const char *method,
+ const char *in_buf,
+ size_t in_len,
+ char **out_buf,
+ size_t *out_len,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_exec_enter, read_op, cls, method, in_buf, in_len, out_buf, out_len, prval);
+ bufferlist inbl;
+ inbl.append(in_buf, in_len);
+ C_out_buffer *ctx = new C_out_buffer(out_buf, out_len);
+ ((::ObjectOperation *)read_op)->call(cls, method, inbl, &ctx->out_bl, ctx,
+ prval);
+ tracepoint(librados, rados_read_op_exec_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_exec);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_exec_user_buf)(
+ rados_read_op_t read_op,
+ const char *cls,
+ const char *method,
+ const char *in_buf,
+ size_t in_len,
+ char *out_buf,
+ size_t out_len,
+ size_t *used_len,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_exec_user_buf_enter, read_op, cls, method, in_buf, in_len, out_buf, out_len, used_len, prval);
+ C_bl_to_buf *ctx = new C_bl_to_buf(out_buf, out_len, used_len, prval);
+ bufferlist inbl;
+ inbl.append(in_buf, in_len);
+ ((::ObjectOperation *)read_op)->call(cls, method, inbl, &ctx->out_bl, ctx,
+ prval);
+ tracepoint(librados, rados_read_op_exec_user_buf_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_exec_user_buf);
+
+struct RadosOmapIter {
+ std::map<std::string, bufferlist> values;
+ std::map<std::string, bufferlist>::iterator i;
+};
+
+class C_OmapIter : public Context {
+ RadosOmapIter *iter;
+public:
+ explicit C_OmapIter(RadosOmapIter *iter) : iter(iter) {}
+ void finish(int r) override {
+ iter->i = iter->values.begin();
+ }
+};
+
+class C_XattrsIter : public Context {
+ librados::RadosXattrsIter *iter;
+public:
+ explicit C_XattrsIter(librados::RadosXattrsIter *iter) : iter(iter) {}
+ void finish(int r) override {
+ iter->i = iter->attrset.begin();
+ }
+};
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_getxattrs)(
+ rados_read_op_t read_op,
+ rados_xattrs_iter_t *iter,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_getxattrs_enter, read_op, prval);
+ librados::RadosXattrsIter *xattrs_iter = new librados::RadosXattrsIter;
+ ((::ObjectOperation *)read_op)->getxattrs(&xattrs_iter->attrset, prval);
+ ((::ObjectOperation *)read_op)->set_handler(new C_XattrsIter(xattrs_iter));
+ *iter = xattrs_iter;
+ tracepoint(librados, rados_read_op_getxattrs_exit, *iter);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_getxattrs);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_vals)(
+ rados_read_op_t read_op,
+ const char *start_after,
+ const char *filter_prefix,
+ uint64_t max_return,
+ rados_omap_iter_t *iter,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_omap_get_vals_enter, read_op, start_after, filter_prefix, max_return, prval);
+ RadosOmapIter *omap_iter = new RadosOmapIter;
+ const char *start = start_after ? start_after : "";
+ const char *filter = filter_prefix ? filter_prefix : "";
+ ((::ObjectOperation *)read_op)->omap_get_vals(
+ start,
+ filter,
+ max_return,
+ &omap_iter->values,
+ nullptr,
+ prval);
+ ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter));
+ *iter = omap_iter;
+ tracepoint(librados, rados_read_op_omap_get_vals_exit, *iter);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_vals);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_vals2)(
+ rados_read_op_t read_op,
+ const char *start_after,
+ const char *filter_prefix,
+ uint64_t max_return,
+ rados_omap_iter_t *iter,
+ unsigned char *pmore,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_omap_get_vals_enter, read_op, start_after, filter_prefix, max_return, prval);
+ RadosOmapIter *omap_iter = new RadosOmapIter;
+ const char *start = start_after ? start_after : "";
+ const char *filter = filter_prefix ? filter_prefix : "";
+ ((::ObjectOperation *)read_op)->omap_get_vals(
+ start,
+ filter,
+ max_return,
+ &omap_iter->values,
+ (bool*)pmore,
+ prval);
+ ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter));
+ *iter = omap_iter;
+ tracepoint(librados, rados_read_op_omap_get_vals_exit, *iter);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_vals2);
+
+struct C_OmapKeysIter : public Context {
+ RadosOmapIter *iter;
+ std::set<std::string> keys;
+ explicit C_OmapKeysIter(RadosOmapIter *iter) : iter(iter) {}
+ void finish(int r) override {
+ // map each key to an empty bl
+ for (std::set<std::string>::const_iterator i = keys.begin();
+ i != keys.end(); ++i) {
+ iter->values[*i];
+ }
+ iter->i = iter->values.begin();
+ }
+};
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_keys)(
+ rados_read_op_t read_op,
+ const char *start_after,
+ uint64_t max_return,
+ rados_omap_iter_t *iter,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_omap_get_keys_enter, read_op, start_after, max_return, prval);
+ RadosOmapIter *omap_iter = new RadosOmapIter;
+ C_OmapKeysIter *ctx = new C_OmapKeysIter(omap_iter);
+ ((::ObjectOperation *)read_op)->omap_get_keys(
+ start_after ? start_after : "",
+ max_return, &ctx->keys, nullptr, prval);
+ ((::ObjectOperation *)read_op)->set_handler(ctx);
+ *iter = omap_iter;
+ tracepoint(librados, rados_read_op_omap_get_keys_exit, *iter);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_keys);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_keys2)(
+ rados_read_op_t read_op,
+ const char *start_after,
+ uint64_t max_return,
+ rados_omap_iter_t *iter,
+ unsigned char *pmore,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_omap_get_keys_enter, read_op, start_after, max_return, prval);
+ RadosOmapIter *omap_iter = new RadosOmapIter;
+ C_OmapKeysIter *ctx = new C_OmapKeysIter(omap_iter);
+ ((::ObjectOperation *)read_op)->omap_get_keys(
+ start_after ? start_after : "",
+ max_return, &ctx->keys,
+ (bool*)pmore, prval);
+ ((::ObjectOperation *)read_op)->set_handler(ctx);
+ *iter = omap_iter;
+ tracepoint(librados, rados_read_op_omap_get_keys_exit, *iter);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_keys2);
+
+static void internal_rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op,
+ set<string>& to_get,
+ rados_omap_iter_t *iter,
+ int *prval)
+{
+ RadosOmapIter *omap_iter = new RadosOmapIter;
+ ((::ObjectOperation *)read_op)->omap_get_vals_by_keys(to_get,
+ &omap_iter->values,
+ prval);
+ ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter));
+ *iter = omap_iter;
+}
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_vals_by_keys)(
+ rados_read_op_t read_op,
+ char const* const* keys,
+ size_t keys_len,
+ rados_omap_iter_t *iter,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_omap_get_vals_by_keys_enter, read_op, keys, keys_len, iter, prval);
+ std::set<std::string> to_get(keys, keys + keys_len);
+ internal_rados_read_op_omap_get_vals_by_keys(read_op, to_get, iter, prval);
+ tracepoint(librados, rados_read_op_omap_get_vals_by_keys_exit, *iter);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_vals_by_keys);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_vals_by_keys2)(
+ rados_read_op_t read_op,
+ char const* const* keys,
+ size_t num_keys,
+ const size_t* key_lens,
+ rados_omap_iter_t *iter,
+ int *prval)
+{
+ tracepoint(librados, rados_read_op_omap_get_vals_by_keys_enter, read_op, keys, num_keys, iter, prval);
+ std::set<std::string> to_get;
+ for (size_t i = 0; i < num_keys; i++) {
+ to_get.emplace(keys[i], key_lens[i]);
+ }
+ internal_rados_read_op_omap_get_vals_by_keys(read_op, to_get, iter, prval);
+ tracepoint(librados, rados_read_op_omap_get_vals_by_keys_exit, *iter);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_vals_by_keys2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_omap_get_next2)(
+ rados_omap_iter_t iter,
+ char **key,
+ char **val,
+ size_t *key_len,
+ size_t *val_len)
+{
+ tracepoint(librados, rados_omap_get_next_enter, iter);
+ RadosOmapIter *it = static_cast<RadosOmapIter *>(iter);
+ if (it->i == it->values.end()) {
+ if (key)
+ *key = NULL;
+ if (val)
+ *val = NULL;
+ if (key_len)
+ *key_len = 0;
+ if (val_len)
+ *val_len = 0;
+ tracepoint(librados, rados_omap_get_next_exit, 0, key, val, val_len);
+ return 0;
+ }
+ if (key)
+ *key = (char*)it->i->first.c_str();
+ if (val)
+ *val = it->i->second.c_str();
+ if (key_len)
+ *key_len = it->i->first.length();
+ if (val_len)
+ *val_len = it->i->second.length();
+ ++it->i;
+ tracepoint(librados, rados_omap_get_next_exit, 0, key, val, val_len);
+ return 0;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_omap_get_next2);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_omap_get_next)(
+ rados_omap_iter_t iter,
+ char **key,
+ char **val,
+ size_t *len)
+{
+ return LIBRADOS_C_API_DEFAULT_F(rados_omap_get_next2)(iter, key, val, nullptr, len);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_omap_get_next);
+
+extern "C" unsigned int LIBRADOS_C_API_DEFAULT_F(rados_omap_iter_size)(
+ rados_omap_iter_t iter)
+{
+ RadosOmapIter *it = static_cast<RadosOmapIter *>(iter);
+ return it->values.size();
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_omap_iter_size);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_omap_get_end)(
+ rados_omap_iter_t iter)
+{
+ tracepoint(librados, rados_omap_get_end_enter, iter);
+ RadosOmapIter *it = static_cast<RadosOmapIter *>(iter);
+ delete it;
+ tracepoint(librados, rados_omap_get_end_exit);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_omap_get_end);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_read_op_operate)(
+ rados_read_op_t read_op,
+ rados_ioctx_t io,
+ const char *oid,
+ int flags)
+{
+ tracepoint(librados, rados_read_op_operate_enter, read_op, io, oid, flags);
+ object_t obj(oid);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ int retval = ctx->operate_read(obj, (::ObjectOperation *)read_op, NULL,
+ translate_flags(flags));
+ tracepoint(librados, rados_read_op_operate_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_operate);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_read_op_operate)(
+ rados_read_op_t read_op,
+ rados_ioctx_t io,
+ rados_completion_t completion,
+ const char *oid,
+ int flags)
+{
+ tracepoint(librados, rados_aio_read_op_operate_enter, read_op, io, completion, oid, flags);
+ object_t obj(oid);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion;
+ int retval = ctx->aio_operate_read(obj, (::ObjectOperation *)read_op,
+ c, translate_flags(flags), NULL);
+ tracepoint(librados, rados_aio_read_op_operate_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_aio_read_op_operate);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cache_pin)(
+ rados_ioctx_t io,
+ const char *o)
+{
+ tracepoint(librados, rados_cache_pin_enter, io, o);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->cache_pin(oid);
+ tracepoint(librados, rados_cache_pin_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_cache_pin);
+
+extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cache_unpin)(
+ rados_ioctx_t io,
+ const char *o)
+{
+ tracepoint(librados, rados_cache_unpin_enter, io, o);
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+ object_t oid(o);
+ int retval = ctx->cache_unpin(oid);
+ tracepoint(librados, rados_cache_unpin_exit, retval);
+ return retval;
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_cache_unpin);
+
+extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_object_list_slice)(
+ rados_ioctx_t io,
+ const rados_object_list_cursor start,
+ const rados_object_list_cursor finish,
+ const size_t n,
+ const size_t m,
+ rados_object_list_cursor *split_start,
+ rados_object_list_cursor *split_finish)
+{
+ librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io;
+
+ ceph_assert(split_start);
+ ceph_assert(split_finish);
+ hobject_t *split_start_hobj = (hobject_t*)(*split_start);
+ hobject_t *split_finish_hobj = (hobject_t*)(*split_finish);
+ ceph_assert(split_start_hobj);
+ ceph_assert(split_finish_hobj);
+ hobject_t *start_hobj = (hobject_t*)(start);
+ hobject_t *finish_hobj = (hobject_t*)(finish);
+
+ ctx->object_list_slice(
+ *start_hobj,
+ *finish_hobj,
+ n,
+ m,
+ split_start_hobj,
+ split_finish_hobj);
+}
+LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_slice);
diff --git a/src/librados/librados_c.h b/src/librados/librados_c.h
new file mode 100644
index 000000000..33381d518
--- /dev/null
+++ b/src/librados/librados_c.h
@@ -0,0 +1,29 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef LIBRADOS_C_H
+#define LIBRADOS_C_H
+
+#include "include/types.h"
+#include "include/rados/librados.h"
+
+namespace __librados_base {
+
+struct rados_pool_stat_t {
+ uint64_t num_bytes;
+ uint64_t num_kb;
+ uint64_t num_objects;
+ uint64_t num_object_clones;
+ uint64_t num_object_copies;
+ uint64_t num_objects_missing_on_primary;
+ uint64_t num_objects_unfound;
+ uint64_t num_objects_degraded;
+ uint64_t num_rd;
+ uint64_t num_rd_kb;
+ uint64_t num_wr;
+ uint64_t num_wr_kb;
+};
+
+} // namespace __librados_base
+
+#endif // LIBRADOS_C_H
diff --git a/src/librados/librados_cxx.cc b/src/librados/librados_cxx.cc
new file mode 100644
index 000000000..8c9ac3c91
--- /dev/null
+++ b/src/librados/librados_cxx.cc
@@ -0,0 +1,3177 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <limits.h>
+
+#include "common/config.h"
+#include "common/errno.h"
+#include "common/ceph_argparse.h"
+#include "common/ceph_json.h"
+#include "common/common_init.h"
+#include "common/TracepointProvider.h"
+#include "common/hobject.h"
+#include "common/async/waiter.h"
+#include "include/rados/librados.h"
+#include "include/rados/librados.hpp"
+#include "include/types.h"
+#include <include/stringify.h>
+
+#include "librados/AioCompletionImpl.h"
+#include "librados/IoCtxImpl.h"
+#include "librados/PoolAsyncCompletionImpl.h"
+#include "librados/RadosClient.h"
+#include "librados/RadosXattrIter.h"
+#include "librados/ListObjectImpl.h"
+#include "librados/librados_util.h"
+#include "cls/lock/cls_lock_client.h"
+
+#include <string>
+#include <map>
+#include <set>
+#include <vector>
+#include <list>
+#include <stdexcept>
+#include <system_error>
+
+#ifdef WITH_LTTNG
+#define TRACEPOINT_DEFINE
+#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#include "tracing/librados.h"
+#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#undef TRACEPOINT_DEFINE
+#else
+#define tracepoint(...)
+#endif
+
+using std::string;
+using std::map;
+using std::set;
+using std::vector;
+using std::list;
+
+#define dout_subsys ceph_subsys_rados
+#undef dout_prefix
+#define dout_prefix *_dout << "librados: "
+
+static TracepointProvider::Traits tracepoint_traits("librados_tp.so", "rados_tracing");
+
+/*
+ * Structure of this file
+ *
+ * RadosClient and the related classes are the internal implementation of librados.
+ * Above that layer sits the C API, found in include/rados/librados.h, and
+ * the C++ API, found in include/rados/librados.hpp
+ *
+ * The C++ API sometimes implements things in terms of the C API.
+ * Both the C++ and C API rely on RadosClient.
+ *
+ * Visually:
+ * +--------------------------------------+
+ * | C++ API |
+ * +--------------------+ |
+ * | C API | |
+ * +--------------------+-----------------+
+ * | RadosClient |
+ * +--------------------------------------+
+ */
+
+namespace librados {
+
+struct ObjectOperationImpl {
+ ::ObjectOperation o;
+ real_time rt;
+ real_time *prt;
+
+ ObjectOperationImpl() : prt(NULL) {}
+};
+
+}
+
+size_t librados::ObjectOperation::size()
+{
+ ::ObjectOperation *o = &impl->o;
+ if (o)
+ return o->size();
+ else
+ return 0;
+}
+
+//deprcated
+void librados::ObjectOperation::set_op_flags(ObjectOperationFlags flags)
+{
+ set_op_flags2((int)flags);
+}
+
+void librados::ObjectOperation::set_op_flags2(int flags)
+{
+ ceph_assert(impl);
+ impl->o.set_last_op_flags(get_op_flags(flags));
+}
+
+void librados::ObjectOperation::cmpext(uint64_t off,
+ const bufferlist &cmp_bl,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ bufferlist c = cmp_bl;
+ o->cmpext(off, c, prval);
+}
+
+void librados::ObjectOperation::cmpxattr(const char *name, uint8_t op, const bufferlist& v)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->cmpxattr(name, op, CEPH_OSD_CMPXATTR_MODE_STRING, v);
+}
+
+void librados::ObjectOperation::cmpxattr(const char *name, uint8_t op, uint64_t v)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ bufferlist bl;
+ encode(v, bl);
+ o->cmpxattr(name, op, CEPH_OSD_CMPXATTR_MODE_U64, bl);
+}
+
+void librados::ObjectOperation::assert_version(uint64_t ver)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->assert_version(ver);
+}
+
+void librados::ObjectOperation::assert_exists()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->stat(nullptr, nullptr, nullptr);
+}
+
+void librados::ObjectOperation::exec(const char *cls, const char *method,
+ bufferlist& inbl)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->call(cls, method, inbl);
+}
+
+void librados::ObjectOperation::exec(const char *cls, const char *method, bufferlist& inbl, bufferlist *outbl, int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->call(cls, method, inbl, outbl, NULL, prval);
+}
+
+class ObjectOpCompletionCtx : public Context {
+ librados::ObjectOperationCompletion *completion;
+ bufferlist bl;
+public:
+ explicit ObjectOpCompletionCtx(librados::ObjectOperationCompletion *c) : completion(c) {}
+ void finish(int r) override {
+ completion->handle_completion(r, bl);
+ delete completion;
+ }
+
+ bufferlist *outbl() {
+ return &bl;
+ }
+};
+
+void librados::ObjectOperation::exec(const char *cls, const char *method, bufferlist& inbl, librados::ObjectOperationCompletion *completion)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+
+ ObjectOpCompletionCtx *ctx = new ObjectOpCompletionCtx(completion);
+
+ o->call(cls, method, inbl, ctx->outbl(), ctx, NULL);
+}
+
+void librados::ObjectReadOperation::stat(uint64_t *psize, time_t *pmtime, int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->stat(psize, pmtime, prval);
+}
+
+void librados::ObjectReadOperation::stat2(uint64_t *psize, struct timespec *pts, int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->stat(psize, pts, prval);
+}
+
+void librados::ObjectReadOperation::read(size_t off, uint64_t len, bufferlist *pbl, int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->read(off, len, pbl, prval, NULL);
+}
+
+void librados::ObjectReadOperation::sparse_read(uint64_t off, uint64_t len,
+ std::map<uint64_t,uint64_t> *m,
+ bufferlist *data_bl, int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->sparse_read(off, len, m, data_bl, prval);
+}
+
+void librados::ObjectReadOperation::checksum(rados_checksum_type_t type,
+ const bufferlist &init_value_bl,
+ uint64_t off, size_t len,
+ size_t chunk_size, bufferlist *pbl,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->checksum(get_checksum_op_type(type), init_value_bl, off, len, chunk_size,
+ pbl, prval, nullptr);
+}
+
+void librados::ObjectReadOperation::getxattr(const char *name, bufferlist *pbl, int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->getxattr(name, pbl, prval);
+}
+
+void librados::ObjectReadOperation::omap_get_vals(
+ const std::string &start_after,
+ const std::string &filter_prefix,
+ uint64_t max_return,
+ std::map<std::string, bufferlist> *out_vals,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_get_vals(start_after, filter_prefix, max_return, out_vals, nullptr,
+ prval);
+}
+
+void librados::ObjectReadOperation::omap_get_vals2(
+ const std::string &start_after,
+ const std::string &filter_prefix,
+ uint64_t max_return,
+ std::map<std::string, bufferlist> *out_vals,
+ bool *pmore,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_get_vals(start_after, filter_prefix, max_return, out_vals, pmore,
+ prval);
+}
+
+void librados::ObjectReadOperation::omap_get_vals(
+ const std::string &start_after,
+ uint64_t max_return,
+ std::map<std::string, bufferlist> *out_vals,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_get_vals(start_after, "", max_return, out_vals, nullptr, prval);
+}
+
+void librados::ObjectReadOperation::omap_get_vals2(
+ const std::string &start_after,
+ uint64_t max_return,
+ std::map<std::string, bufferlist> *out_vals,
+ bool *pmore,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_get_vals(start_after, "", max_return, out_vals, pmore, prval);
+}
+
+void librados::ObjectReadOperation::omap_get_keys(
+ const std::string &start_after,
+ uint64_t max_return,
+ std::set<std::string> *out_keys,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_get_keys(start_after, max_return, out_keys, nullptr, prval);
+}
+
+void librados::ObjectReadOperation::omap_get_keys2(
+ const std::string &start_after,
+ uint64_t max_return,
+ std::set<std::string> *out_keys,
+ bool *pmore,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_get_keys(start_after, max_return, out_keys, pmore, prval);
+}
+
+void librados::ObjectReadOperation::omap_get_header(bufferlist *bl, int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_get_header(bl, prval);
+}
+
+void librados::ObjectReadOperation::omap_get_vals_by_keys(
+ const std::set<std::string> &keys,
+ std::map<std::string, bufferlist> *map,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_get_vals_by_keys(keys, map, prval);
+}
+
+void librados::ObjectOperation::omap_cmp(
+ const std::map<std::string, pair<bufferlist, int> > &assertions,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_cmp(assertions, prval);
+}
+
+void librados::ObjectReadOperation::list_watchers(
+ list<obj_watch_t> *out_watchers,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->list_watchers(out_watchers, prval);
+}
+
+void librados::ObjectReadOperation::list_snaps(
+ snap_set_t *out_snaps,
+ int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->list_snaps(out_snaps, prval);
+}
+
+void librados::ObjectReadOperation::is_dirty(bool *is_dirty, int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->is_dirty(is_dirty, prval);
+}
+
+int librados::IoCtx::omap_get_vals(const std::string& oid,
+ const std::string& orig_start_after,
+ const std::string& filter_prefix,
+ uint64_t max_return,
+ std::map<std::string, bufferlist> *out_vals)
+{
+ bool first = true;
+ string start_after = orig_start_after;
+ bool more = true;
+ while (max_return > 0 && more) {
+ std::map<std::string,bufferlist> out;
+ ObjectReadOperation op;
+ op.omap_get_vals2(start_after, filter_prefix, max_return, &out, &more,
+ nullptr);
+ bufferlist bl;
+ int ret = operate(oid, &op, &bl);
+ if (ret < 0) {
+ return ret;
+ }
+ if (more) {
+ if (out.empty()) {
+ return -EINVAL; // wth
+ }
+ start_after = out.rbegin()->first;
+ }
+ if (out.size() <= max_return) {
+ max_return -= out.size();
+ } else {
+ max_return = 0;
+ }
+ if (first) {
+ out_vals->swap(out);
+ first = false;
+ } else {
+ out_vals->insert(out.begin(), out.end());
+ out.clear();
+ }
+ }
+ return 0;
+}
+
+int librados::IoCtx::omap_get_vals2(
+ const std::string& oid,
+ const std::string& start_after,
+ const std::string& filter_prefix,
+ uint64_t max_return,
+ std::map<std::string, bufferlist> *out_vals,
+ bool *pmore)
+{
+ ObjectReadOperation op;
+ int r;
+ op.omap_get_vals2(start_after, filter_prefix, max_return, out_vals, pmore, &r);
+ bufferlist bl;
+ int ret = operate(oid, &op, &bl);
+ if (ret < 0)
+ return ret;
+ return r;
+}
+
+void librados::ObjectReadOperation::getxattrs(map<string, bufferlist> *pattrs, int *prval)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->getxattrs(pattrs, prval);
+}
+
+void librados::ObjectWriteOperation::mtime(time_t *pt)
+{
+ ceph_assert(impl);
+ if (pt) {
+ impl->rt = ceph::real_clock::from_time_t(*pt);
+ impl->prt = &impl->rt;
+ }
+}
+
+void librados::ObjectWriteOperation::mtime2(struct timespec *pts)
+{
+ ceph_assert(impl);
+ if (pts) {
+ impl->rt = ceph::real_clock::from_timespec(*pts);
+ impl->prt = &impl->rt;
+ }
+}
+
+void librados::ObjectWriteOperation::create(bool exclusive)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->create(exclusive);
+}
+
+void librados::ObjectWriteOperation::create(bool exclusive,
+ const std::string& category) // unused
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->create(exclusive);
+}
+
+void librados::ObjectWriteOperation::write(uint64_t off, const bufferlist& bl)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ bufferlist c = bl;
+ o->write(off, c);
+}
+
+void librados::ObjectWriteOperation::write_full(const bufferlist& bl)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ bufferlist c = bl;
+ o->write_full(c);
+}
+
+void librados::ObjectWriteOperation::writesame(uint64_t off, uint64_t write_len,
+ const bufferlist& bl)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ bufferlist c = bl;
+ o->writesame(off, write_len, c);
+}
+
+void librados::ObjectWriteOperation::append(const bufferlist& bl)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ bufferlist c = bl;
+ o->append(c);
+}
+
+void librados::ObjectWriteOperation::remove()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->remove();
+}
+
+void librados::ObjectWriteOperation::truncate(uint64_t off)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->truncate(off);
+}
+
+void librados::ObjectWriteOperation::zero(uint64_t off, uint64_t len)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->zero(off, len);
+}
+
+void librados::ObjectWriteOperation::rmxattr(const char *name)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->rmxattr(name);
+}
+
+void librados::ObjectWriteOperation::setxattr(const char *name, const bufferlist& v)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->setxattr(name, v);
+}
+
+void librados::ObjectWriteOperation::setxattr(const char *name,
+ const buffer::list&& v)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->setxattr(name, std::move(v));
+}
+
+void librados::ObjectWriteOperation::omap_set(
+ const map<string, bufferlist> &map)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_set(map);
+}
+
+void librados::ObjectWriteOperation::omap_set_header(const bufferlist &bl)
+{
+ ceph_assert(impl);
+ bufferlist c = bl;
+ ::ObjectOperation *o = &impl->o;
+ o->omap_set_header(c);
+}
+
+void librados::ObjectWriteOperation::omap_clear()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_clear();
+}
+
+void librados::ObjectWriteOperation::omap_rm_keys(
+ const std::set<std::string> &to_rm)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->omap_rm_keys(to_rm);
+}
+
+void librados::ObjectWriteOperation::copy_from(const std::string& src,
+ const IoCtx& src_ioctx,
+ uint64_t src_version,
+ uint32_t src_fadvise_flags)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->copy_from(object_t(src), src_ioctx.io_ctx_impl->snap_seq,
+ src_ioctx.io_ctx_impl->oloc, src_version, 0, src_fadvise_flags);
+}
+
+void librados::ObjectWriteOperation::copy_from2(const std::string& src,
+ const IoCtx& src_ioctx,
+ uint64_t src_version,
+ uint32_t truncate_seq,
+ uint64_t truncate_size,
+ uint32_t src_fadvise_flags)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->copy_from2(object_t(src), src_ioctx.io_ctx_impl->snap_seq,
+ src_ioctx.io_ctx_impl->oloc, src_version, 0,
+ truncate_seq, truncate_size, src_fadvise_flags);
+}
+
+void librados::ObjectWriteOperation::undirty()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->undirty();
+}
+
+void librados::ObjectReadOperation::cache_flush()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->cache_flush();
+}
+
+void librados::ObjectReadOperation::cache_try_flush()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->cache_try_flush();
+}
+
+void librados::ObjectReadOperation::cache_evict()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->cache_evict();
+}
+
+void librados::ObjectReadOperation::tier_flush()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->tier_flush();
+}
+
+void librados::ObjectReadOperation::tier_evict()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->tier_evict();
+}
+
+void librados::ObjectWriteOperation::set_redirect(const std::string& tgt_obj,
+ const IoCtx& tgt_ioctx,
+ uint64_t tgt_version,
+ int flag)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->set_redirect(object_t(tgt_obj), tgt_ioctx.io_ctx_impl->snap_seq,
+ tgt_ioctx.io_ctx_impl->oloc, tgt_version, flag);
+}
+
+void librados::ObjectReadOperation::set_chunk(uint64_t src_offset,
+ uint64_t src_length,
+ const IoCtx& tgt_ioctx,
+ string tgt_oid,
+ uint64_t tgt_offset,
+ int flag)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->set_chunk(src_offset, src_length,
+ tgt_ioctx.io_ctx_impl->oloc, object_t(tgt_oid), tgt_offset, flag);
+}
+
+void librados::ObjectWriteOperation::tier_promote()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->tier_promote();
+}
+
+void librados::ObjectWriteOperation::unset_manifest()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->unset_manifest();
+}
+
+void librados::ObjectWriteOperation::tmap_update(const bufferlist& cmdbl)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ bufferlist c = cmdbl;
+ o->tmap_update(c);
+}
+
+void librados::ObjectWriteOperation::selfmanaged_snap_rollback(snap_t snapid)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->rollback(snapid);
+}
+
+// You must specify the snapid not the name normally used with pool snapshots
+void librados::ObjectWriteOperation::snap_rollback(snap_t snapid)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->rollback(snapid);
+}
+
+void librados::ObjectWriteOperation::set_alloc_hint(
+ uint64_t expected_object_size,
+ uint64_t expected_write_size)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->set_alloc_hint(expected_object_size, expected_write_size, 0);
+}
+void librados::ObjectWriteOperation::set_alloc_hint2(
+ uint64_t expected_object_size,
+ uint64_t expected_write_size,
+ uint32_t flags)
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->set_alloc_hint(expected_object_size, expected_write_size, flags);
+}
+
+void librados::ObjectWriteOperation::cache_pin()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->cache_pin();
+}
+
+void librados::ObjectWriteOperation::cache_unpin()
+{
+ ceph_assert(impl);
+ ::ObjectOperation *o = &impl->o;
+ o->cache_unpin();
+}
+
+librados::WatchCtx::
+~WatchCtx()
+{
+}
+
+librados::WatchCtx2::
+~WatchCtx2()
+{
+}
+
+///////////////////////////// NObjectIteratorImpl /////////////////////////////
+librados::NObjectIteratorImpl::NObjectIteratorImpl(ObjListCtx *ctx_)
+ : ctx(ctx_)
+{
+}
+
+librados::NObjectIteratorImpl::~NObjectIteratorImpl()
+{
+ ctx.reset();
+}
+
+librados::NObjectIteratorImpl::NObjectIteratorImpl(const NObjectIteratorImpl &rhs)
+{
+ *this = rhs;
+}
+
+librados::NObjectIteratorImpl& librados::NObjectIteratorImpl::operator=(const librados::NObjectIteratorImpl &rhs)
+{
+ if (&rhs == this)
+ return *this;
+ if (rhs.ctx.get() == NULL) {
+ ctx.reset();
+ return *this;
+ }
+ Objecter::NListContext *list_ctx = new Objecter::NListContext(*rhs.ctx->nlc);
+ ctx.reset(new ObjListCtx(rhs.ctx->ctx, list_ctx));
+ cur_obj = rhs.cur_obj;
+ return *this;
+}
+
+bool librados::NObjectIteratorImpl::operator==(const librados::NObjectIteratorImpl& rhs) const {
+
+ if (ctx.get() == NULL) {
+ if (rhs.ctx.get() == NULL)
+ return true;
+ return rhs.ctx->nlc->at_end();
+ }
+ if (rhs.ctx.get() == NULL) {
+ // Redundant but same as ObjectIterator version
+ if (ctx.get() == NULL)
+ return true;
+ return ctx->nlc->at_end();
+ }
+ return ctx.get() == rhs.ctx.get();
+}
+
+bool librados::NObjectIteratorImpl::operator!=(const librados::NObjectIteratorImpl& rhs) const {
+ return !(*this == rhs);
+}
+
+const librados::ListObject& librados::NObjectIteratorImpl::operator*() const {
+ return cur_obj;
+}
+
+const librados::ListObject* librados::NObjectIteratorImpl::operator->() const {
+ return &cur_obj;
+}
+
+librados::NObjectIteratorImpl& librados::NObjectIteratorImpl::operator++()
+{
+ get_next();
+ return *this;
+}
+
+librados::NObjectIteratorImpl librados::NObjectIteratorImpl::operator++(int)
+{
+ librados::NObjectIteratorImpl ret(*this);
+ get_next();
+ return ret;
+}
+
+uint32_t librados::NObjectIteratorImpl::seek(uint32_t pos)
+{
+ uint32_t r = rados_nobjects_list_seek(ctx.get(), pos);
+ get_next();
+ return r;
+}
+
+uint32_t librados::NObjectIteratorImpl::seek(const ObjectCursor& cursor)
+{
+ uint32_t r = rados_nobjects_list_seek_cursor(ctx.get(), (rados_object_list_cursor)cursor.c_cursor);
+ get_next();
+ return r;
+}
+
+librados::ObjectCursor librados::NObjectIteratorImpl::get_cursor()
+{
+ librados::ObjListCtx *lh = (librados::ObjListCtx *)ctx.get();
+ librados::ObjectCursor oc;
+ oc.set(lh->ctx->nlist_get_cursor(lh->nlc));
+ return oc;
+}
+
+void librados::NObjectIteratorImpl::set_filter(const bufferlist &bl)
+{
+ ceph_assert(ctx);
+ ctx->nlc->filter = bl;
+}
+
+void librados::NObjectIteratorImpl::get_next()
+{
+ const char *entry, *key, *nspace;
+ size_t entry_size, key_size, nspace_size;
+ if (ctx->nlc->at_end())
+ return;
+ int ret = rados_nobjects_list_next2(ctx.get(), &entry, &key, &nspace,
+ &entry_size, &key_size, &nspace_size);
+ if (ret == -ENOENT) {
+ return;
+ }
+ else if (ret) {
+ throw std::system_error(-ret, std::system_category(),
+ "rados_nobjects_list_next2");
+ }
+
+ if (cur_obj.impl == NULL)
+ cur_obj.impl = new ListObjectImpl();
+ cur_obj.impl->nspace = string{nspace, nspace_size};
+ cur_obj.impl->oid = string{entry, entry_size};
+ cur_obj.impl->locator = key ? string(key, key_size) : string();
+}
+
+uint32_t librados::NObjectIteratorImpl::get_pg_hash_position() const
+{
+ return ctx->nlc->get_pg_hash_position();
+}
+
+///////////////////////////// NObjectIterator /////////////////////////////
+librados::NObjectIterator::NObjectIterator(ObjListCtx *ctx_)
+{
+ impl = new NObjectIteratorImpl(ctx_);
+}
+
+librados::NObjectIterator::~NObjectIterator()
+{
+ delete impl;
+}
+
+librados::NObjectIterator::NObjectIterator(const NObjectIterator &rhs)
+{
+ if (rhs.impl == NULL) {
+ impl = NULL;
+ return;
+ }
+ impl = new NObjectIteratorImpl();
+ *impl = *(rhs.impl);
+}
+
+librados::NObjectIterator& librados::NObjectIterator::operator=(const librados::NObjectIterator &rhs)
+{
+ if (rhs.impl == NULL) {
+ delete impl;
+ impl = NULL;
+ return *this;
+ }
+ if (impl == NULL)
+ impl = new NObjectIteratorImpl();
+ *impl = *(rhs.impl);
+ return *this;
+}
+
+bool librados::NObjectIterator::operator==(const librados::NObjectIterator& rhs) const
+{
+ if (impl && rhs.impl) {
+ return *impl == *(rhs.impl);
+ } else {
+ return impl == rhs.impl;
+ }
+}
+
+bool librados::NObjectIterator::operator!=(const librados::NObjectIterator& rhs) const
+{
+ return !(*this == rhs);
+}
+
+const librados::ListObject& librados::NObjectIterator::operator*() const {
+ ceph_assert(impl);
+ return *(impl->get_listobjectp());
+}
+
+const librados::ListObject* librados::NObjectIterator::operator->() const {
+ ceph_assert(impl);
+ return impl->get_listobjectp();
+}
+
+librados::NObjectIterator& librados::NObjectIterator::operator++()
+{
+ ceph_assert(impl);
+ impl->get_next();
+ return *this;
+}
+
+librados::NObjectIterator librados::NObjectIterator::operator++(int)
+{
+ librados::NObjectIterator ret(*this);
+ impl->get_next();
+ return ret;
+}
+
+uint32_t librados::NObjectIterator::seek(uint32_t pos)
+{
+ ceph_assert(impl);
+ return impl->seek(pos);
+}
+
+uint32_t librados::NObjectIterator::seek(const ObjectCursor& cursor)
+{
+ ceph_assert(impl);
+ return impl->seek(cursor);
+}
+
+librados::ObjectCursor librados::NObjectIterator::get_cursor()
+{
+ ceph_assert(impl);
+ return impl->get_cursor();
+}
+
+void librados::NObjectIterator::set_filter(const bufferlist &bl)
+{
+ impl->set_filter(bl);
+}
+
+void librados::NObjectIterator::get_next()
+{
+ ceph_assert(impl);
+ impl->get_next();
+}
+
+uint32_t librados::NObjectIterator::get_pg_hash_position() const
+{
+ ceph_assert(impl);
+ return impl->get_pg_hash_position();
+}
+
+const librados::NObjectIterator librados::NObjectIterator::__EndObjectIterator(NULL);
+
+///////////////////////////// PoolAsyncCompletion //////////////////////////////
+librados::PoolAsyncCompletion::PoolAsyncCompletion::~PoolAsyncCompletion()
+{
+ auto c = reinterpret_cast<PoolAsyncCompletionImpl *>(pc);
+ c->release();
+}
+
+int librados::PoolAsyncCompletion::PoolAsyncCompletion::set_callback(void *cb_arg,
+ rados_callback_t cb)
+{
+ PoolAsyncCompletionImpl *c = (PoolAsyncCompletionImpl *)pc;
+ return c->set_callback(cb_arg, cb);
+}
+
+int librados::PoolAsyncCompletion::PoolAsyncCompletion::wait()
+{
+ PoolAsyncCompletionImpl *c = (PoolAsyncCompletionImpl *)pc;
+ return c->wait();
+}
+
+bool librados::PoolAsyncCompletion::PoolAsyncCompletion::is_complete()
+{
+ PoolAsyncCompletionImpl *c = (PoolAsyncCompletionImpl *)pc;
+ return c->is_complete();
+}
+
+int librados::PoolAsyncCompletion::PoolAsyncCompletion::get_return_value()
+{
+ PoolAsyncCompletionImpl *c = (PoolAsyncCompletionImpl *)pc;
+ return c->get_return_value();
+}
+
+void librados::PoolAsyncCompletion::PoolAsyncCompletion::release()
+{
+ delete this;
+}
+
+///////////////////////////// AioCompletion //////////////////////////////
+librados::AioCompletion::AioCompletion::~AioCompletion()
+{
+ auto c = reinterpret_cast<AioCompletionImpl *>(pc);
+ c->release();
+}
+
+int librados::AioCompletion::AioCompletion::set_complete_callback(void *cb_arg, rados_callback_t cb)
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->set_complete_callback(cb_arg, cb);
+}
+
+int librados::AioCompletion::AioCompletion::set_safe_callback(void *cb_arg, rados_callback_t cb)
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->set_safe_callback(cb_arg, cb);
+}
+
+int librados::AioCompletion::AioCompletion::wait_for_complete()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->wait_for_complete();
+}
+
+int librados::AioCompletion::AioCompletion::wait_for_safe()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->wait_for_complete();
+}
+
+bool librados::AioCompletion::AioCompletion::is_complete()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->is_complete();
+}
+
+bool librados::AioCompletion::AioCompletion::is_safe()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->is_safe();
+}
+
+int librados::AioCompletion::AioCompletion::wait_for_complete_and_cb()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->wait_for_complete_and_cb();
+}
+
+int librados::AioCompletion::AioCompletion::wait_for_safe_and_cb()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->wait_for_safe_and_cb();
+}
+
+bool librados::AioCompletion::AioCompletion::is_complete_and_cb()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->is_complete_and_cb();
+}
+
+bool librados::AioCompletion::AioCompletion::is_safe_and_cb()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->is_safe_and_cb();
+}
+
+int librados::AioCompletion::AioCompletion::get_return_value()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->get_return_value();
+}
+
+int librados::AioCompletion::AioCompletion::get_version()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->get_version();
+}
+
+uint64_t librados::AioCompletion::AioCompletion::get_version64()
+{
+ AioCompletionImpl *c = (AioCompletionImpl *)pc;
+ return c->get_version();
+}
+
+void librados::AioCompletion::AioCompletion::release()
+{
+ delete this;
+}
+
+///////////////////////////// IoCtx //////////////////////////////
+librados::IoCtx::IoCtx() : io_ctx_impl(NULL)
+{
+}
+
+void librados::IoCtx::from_rados_ioctx_t(rados_ioctx_t p, IoCtx &io)
+{
+ IoCtxImpl *io_ctx_impl = (IoCtxImpl*)p;
+
+ io.io_ctx_impl = io_ctx_impl;
+ if (io_ctx_impl) {
+ io_ctx_impl->get();
+ }
+}
+
+librados::IoCtx::IoCtx(const IoCtx& rhs)
+{
+ io_ctx_impl = rhs.io_ctx_impl;
+ if (io_ctx_impl) {
+ io_ctx_impl->get();
+ }
+}
+
+librados::IoCtx& librados::IoCtx::operator=(const IoCtx& rhs)
+{
+ if (io_ctx_impl)
+ io_ctx_impl->put();
+ io_ctx_impl = rhs.io_ctx_impl;
+ io_ctx_impl->get();
+ return *this;
+}
+
+librados::IoCtx::IoCtx(IoCtx&& rhs) noexcept
+ : io_ctx_impl(std::exchange(rhs.io_ctx_impl, nullptr))
+{
+}
+
+librados::IoCtx& librados::IoCtx::operator=(IoCtx&& rhs) noexcept
+{
+ if (io_ctx_impl)
+ io_ctx_impl->put();
+ io_ctx_impl = std::exchange(rhs.io_ctx_impl, nullptr);
+ return *this;
+}
+
+librados::IoCtx::~IoCtx()
+{
+ close();
+}
+
+bool librados::IoCtx::is_valid() const {
+ return io_ctx_impl != nullptr;
+}
+
+void librados::IoCtx::close()
+{
+ if (io_ctx_impl)
+ io_ctx_impl->put();
+ io_ctx_impl = 0;
+}
+
+void librados::IoCtx::dup(const IoCtx& rhs)
+{
+ if (io_ctx_impl)
+ io_ctx_impl->put();
+ io_ctx_impl = new IoCtxImpl();
+ io_ctx_impl->get();
+ io_ctx_impl->dup(*rhs.io_ctx_impl);
+}
+
+int librados::IoCtx::set_auid(uint64_t auid_)
+{
+ return -EOPNOTSUPP;
+}
+
+int librados::IoCtx::set_auid_async(uint64_t auid_, PoolAsyncCompletion *c)
+{
+ return -EOPNOTSUPP;
+}
+
+int librados::IoCtx::get_auid(uint64_t *auid_)
+{
+ return -EOPNOTSUPP;
+}
+
+bool librados::IoCtx::pool_requires_alignment()
+{
+ return io_ctx_impl->client->pool_requires_alignment(get_id());
+}
+
+int librados::IoCtx::pool_requires_alignment2(bool *requires)
+{
+ return io_ctx_impl->client->pool_requires_alignment2(get_id(), requires);
+}
+
+uint64_t librados::IoCtx::pool_required_alignment()
+{
+ return io_ctx_impl->client->pool_required_alignment(get_id());
+}
+
+int librados::IoCtx::pool_required_alignment2(uint64_t *alignment)
+{
+ return io_ctx_impl->client->pool_required_alignment2(get_id(), alignment);
+}
+
+std::string librados::IoCtx::get_pool_name()
+{
+ std::string s;
+ io_ctx_impl->client->pool_get_name(get_id(), &s);
+ return s;
+}
+
+std::string librados::IoCtx::get_pool_name() const
+{
+ return io_ctx_impl->get_cached_pool_name();
+}
+
+uint64_t librados::IoCtx::get_instance_id() const
+{
+ return io_ctx_impl->client->get_instance_id();
+}
+
+int librados::IoCtx::create(const std::string& oid, bool exclusive)
+{
+ object_t obj(oid);
+ return io_ctx_impl->create(obj, exclusive);
+}
+
+int librados::IoCtx::create(const std::string& oid, bool exclusive,
+ const std::string& category) // unused
+{
+ object_t obj(oid);
+ return io_ctx_impl->create(obj, exclusive);
+}
+
+int librados::IoCtx::write(const std::string& oid, bufferlist& bl, size_t len, uint64_t off)
+{
+ object_t obj(oid);
+ return io_ctx_impl->write(obj, bl, len, off);
+}
+
+int librados::IoCtx::append(const std::string& oid, bufferlist& bl, size_t len)
+{
+ object_t obj(oid);
+ return io_ctx_impl->append(obj, bl, len);
+}
+
+int librados::IoCtx::write_full(const std::string& oid, bufferlist& bl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->write_full(obj, bl);
+}
+
+int librados::IoCtx::writesame(const std::string& oid, bufferlist& bl,
+ size_t write_len, uint64_t off)
+{
+ object_t obj(oid);
+ return io_ctx_impl->writesame(obj, bl, write_len, off);
+}
+
+
+int librados::IoCtx::read(const std::string& oid, bufferlist& bl, size_t len, uint64_t off)
+{
+ object_t obj(oid);
+ return io_ctx_impl->read(obj, bl, len, off);
+}
+
+int librados::IoCtx::checksum(const std::string& oid,
+ rados_checksum_type_t type,
+ const bufferlist &init_value_bl, size_t len,
+ uint64_t off, size_t chunk_size, bufferlist *pbl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->checksum(obj, get_checksum_op_type(type), init_value_bl,
+ len, off, chunk_size, pbl);
+}
+
+int librados::IoCtx::remove(const std::string& oid)
+{
+ object_t obj(oid);
+ return io_ctx_impl->remove(obj);
+}
+
+int librados::IoCtx::remove(const std::string& oid, int flags)
+{
+ object_t obj(oid);
+ return io_ctx_impl->remove(obj, flags);
+}
+
+int librados::IoCtx::trunc(const std::string& oid, uint64_t size)
+{
+ object_t obj(oid);
+ return io_ctx_impl->trunc(obj, size);
+}
+
+int librados::IoCtx::mapext(const std::string& oid, uint64_t off, size_t len,
+ std::map<uint64_t,uint64_t>& m)
+{
+ object_t obj(oid);
+ return io_ctx_impl->mapext(obj, off, len, m);
+}
+
+int librados::IoCtx::cmpext(const std::string& oid, uint64_t off, bufferlist& cmp_bl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->cmpext(obj, off, cmp_bl);
+}
+
+int librados::IoCtx::sparse_read(const std::string& oid, std::map<uint64_t,uint64_t>& m,
+ bufferlist& bl, size_t len, uint64_t off)
+{
+ object_t obj(oid);
+ return io_ctx_impl->sparse_read(obj, m, bl, len, off);
+}
+
+int librados::IoCtx::getxattr(const std::string& oid, const char *name, bufferlist& bl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->getxattr(obj, name, bl);
+}
+
+int librados::IoCtx::getxattrs(const std::string& oid, map<std::string, bufferlist>& attrset)
+{
+ object_t obj(oid);
+ return io_ctx_impl->getxattrs(obj, attrset);
+}
+
+int librados::IoCtx::setxattr(const std::string& oid, const char *name, bufferlist& bl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->setxattr(obj, name, bl);
+}
+
+int librados::IoCtx::rmxattr(const std::string& oid, const char *name)
+{
+ object_t obj(oid);
+ return io_ctx_impl->rmxattr(obj, name);
+}
+
+int librados::IoCtx::stat(const std::string& oid, uint64_t *psize, time_t *pmtime)
+{
+ object_t obj(oid);
+ return io_ctx_impl->stat(obj, psize, pmtime);
+}
+
+int librados::IoCtx::stat2(const std::string& oid, uint64_t *psize, struct timespec *pts)
+{
+ object_t obj(oid);
+ return io_ctx_impl->stat2(obj, psize, pts);
+}
+
+int librados::IoCtx::exec(const std::string& oid, const char *cls, const char *method,
+ bufferlist& inbl, bufferlist& outbl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->exec(obj, cls, method, inbl, outbl);
+}
+
+int librados::IoCtx::tmap_update(const std::string& oid, bufferlist& cmdbl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->tmap_update(obj, cmdbl);
+}
+
+int librados::IoCtx::omap_get_vals(const std::string& oid,
+ const std::string& start_after,
+ uint64_t max_return,
+ std::map<std::string, bufferlist> *out_vals)
+{
+ return omap_get_vals(oid, start_after, string(), max_return, out_vals);
+}
+
+int librados::IoCtx::omap_get_vals2(
+ const std::string& oid,
+ const std::string& start_after,
+ uint64_t max_return,
+ std::map<std::string, bufferlist> *out_vals,
+ bool *pmore)
+{
+ ObjectReadOperation op;
+ int r;
+ op.omap_get_vals2(start_after, max_return, out_vals, pmore, &r);
+ bufferlist bl;
+ int ret = operate(oid, &op, &bl);
+ if (ret < 0)
+ return ret;
+ return r;
+}
+
+int librados::IoCtx::omap_get_keys(const std::string& oid,
+ const std::string& orig_start_after,
+ uint64_t max_return,
+ std::set<std::string> *out_keys)
+{
+ bool first = true;
+ string start_after = orig_start_after;
+ bool more = true;
+ while (max_return > 0 && more) {
+ std::set<std::string> out;
+ ObjectReadOperation op;
+ op.omap_get_keys2(start_after, max_return, &out, &more, nullptr);
+ bufferlist bl;
+ int ret = operate(oid, &op, &bl);
+ if (ret < 0) {
+ return ret;
+ }
+ if (more) {
+ if (out.empty()) {
+ return -EINVAL; // wth
+ }
+ start_after = *out.rbegin();
+ }
+ if (out.size() <= max_return) {
+ max_return -= out.size();
+ } else {
+ max_return = 0;
+ }
+ if (first) {
+ out_keys->swap(out);
+ first = false;
+ } else {
+ out_keys->insert(out.begin(), out.end());
+ out.clear();
+ }
+ }
+ return 0;
+}
+
+int librados::IoCtx::omap_get_keys2(
+ const std::string& oid,
+ const std::string& start_after,
+ uint64_t max_return,
+ std::set<std::string> *out_keys,
+ bool *pmore)
+{
+ ObjectReadOperation op;
+ int r;
+ op.omap_get_keys2(start_after, max_return, out_keys, pmore, &r);
+ bufferlist bl;
+ int ret = operate(oid, &op, &bl);
+ if (ret < 0)
+ return ret;
+ return r;
+}
+
+int librados::IoCtx::omap_get_header(const std::string& oid,
+ bufferlist *bl)
+{
+ ObjectReadOperation op;
+ int r;
+ op.omap_get_header(bl, &r);
+ bufferlist b;
+ int ret = operate(oid, &op, &b);
+ if (ret < 0)
+ return ret;
+
+ return r;
+}
+
+int librados::IoCtx::omap_get_vals_by_keys(const std::string& oid,
+ const std::set<std::string>& keys,
+ std::map<std::string, bufferlist> *vals)
+{
+ ObjectReadOperation op;
+ int r;
+ bufferlist bl;
+ op.omap_get_vals_by_keys(keys, vals, &r);
+ int ret = operate(oid, &op, &bl);
+ if (ret < 0)
+ return ret;
+
+ return r;
+}
+
+int librados::IoCtx::omap_set(const std::string& oid,
+ const map<string, bufferlist>& m)
+{
+ ObjectWriteOperation op;
+ op.omap_set(m);
+ return operate(oid, &op);
+}
+
+int librados::IoCtx::omap_set_header(const std::string& oid,
+ const bufferlist& bl)
+{
+ ObjectWriteOperation op;
+ op.omap_set_header(bl);
+ return operate(oid, &op);
+}
+
+int librados::IoCtx::omap_clear(const std::string& oid)
+{
+ ObjectWriteOperation op;
+ op.omap_clear();
+ return operate(oid, &op);
+}
+
+int librados::IoCtx::omap_rm_keys(const std::string& oid,
+ const std::set<std::string>& keys)
+{
+ ObjectWriteOperation op;
+ op.omap_rm_keys(keys);
+ return operate(oid, &op);
+}
+
+int librados::IoCtx::operate(const std::string& oid, librados::ObjectWriteOperation *o)
+{
+ object_t obj(oid);
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ return io_ctx_impl->operate(obj, &o->impl->o, (ceph::real_time *)o->impl->prt);
+}
+
+int librados::IoCtx::operate(const std::string& oid, librados::ObjectWriteOperation *o, int flags)
+{
+ object_t obj(oid);
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ return io_ctx_impl->operate(obj, &o->impl->o, (ceph::real_time *)o->impl->prt, translate_flags(flags));
+}
+
+int librados::IoCtx::operate(const std::string& oid, librados::ObjectReadOperation *o, bufferlist *pbl)
+{
+ object_t obj(oid);
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ return io_ctx_impl->operate_read(obj, &o->impl->o, pbl);
+}
+
+int librados::IoCtx::operate(const std::string& oid, librados::ObjectReadOperation *o, bufferlist *pbl, int flags)
+{
+ object_t obj(oid);
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ return io_ctx_impl->operate_read(obj, &o->impl->o, pbl, translate_flags(flags));
+}
+
+int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
+ librados::ObjectWriteOperation *o)
+{
+ object_t obj(oid);
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc,
+ io_ctx_impl->snapc, 0);
+}
+int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
+ ObjectWriteOperation *o, int flags)
+{
+ object_t obj(oid);
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc,
+ io_ctx_impl->snapc,
+ translate_flags(flags));
+}
+
+int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
+ librados::ObjectWriteOperation *o,
+ snap_t snap_seq, std::vector<snap_t>& snaps)
+{
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ object_t obj(oid);
+ vector<snapid_t> snv;
+ snv.resize(snaps.size());
+ for (size_t i = 0; i < snaps.size(); ++i)
+ snv[i] = snaps[i];
+ SnapContext snapc(snap_seq, snv);
+ return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc,
+ snapc, 0);
+}
+
+int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
+ librados::ObjectWriteOperation *o,
+ snap_t snap_seq, std::vector<snap_t>& snaps,
+ const blkin_trace_info *trace_info)
+{
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ object_t obj(oid);
+ vector<snapid_t> snv;
+ snv.resize(snaps.size());
+ for (size_t i = 0; i < snaps.size(); ++i)
+ snv[i] = snaps[i];
+ SnapContext snapc(snap_seq, snv);
+ return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc,
+ snapc, 0, trace_info);
+}
+
+int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
+ librados::ObjectWriteOperation *o,
+ snap_t snap_seq, std::vector<snap_t>& snaps, int flags,
+ const blkin_trace_info *trace_info)
+{
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ object_t obj(oid);
+ vector<snapid_t> snv;
+ snv.resize(snaps.size());
+ for (size_t i = 0; i < snaps.size(); ++i)
+ snv[i] = snaps[i];
+ SnapContext snapc(snap_seq, snv);
+ return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc, snapc,
+ translate_flags(flags), trace_info);
+}
+
+int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
+ librados::ObjectReadOperation *o,
+ bufferlist *pbl)
+{
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ object_t obj(oid);
+ return io_ctx_impl->aio_operate_read(obj, &o->impl->o, c->pc,
+ 0, pbl);
+}
+
+// deprecated
+int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
+ librados::ObjectReadOperation *o,
+ snap_t snapid_unused_deprecated,
+ int flags, bufferlist *pbl)
+{
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ object_t obj(oid);
+ int op_flags = 0;
+ if (flags & OPERATION_BALANCE_READS)
+ op_flags |= CEPH_OSD_FLAG_BALANCE_READS;
+ if (flags & OPERATION_LOCALIZE_READS)
+ op_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
+ if (flags & OPERATION_ORDER_READS_WRITES)
+ op_flags |= CEPH_OSD_FLAG_RWORDERED;
+
+ return io_ctx_impl->aio_operate_read(obj, &o->impl->o, c->pc,
+ op_flags, pbl);
+}
+
+int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
+ librados::ObjectReadOperation *o,
+ int flags, bufferlist *pbl)
+{
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ object_t obj(oid);
+ return io_ctx_impl->aio_operate_read(obj, &o->impl->o, c->pc,
+ translate_flags(flags), pbl);
+}
+
+int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c,
+ librados::ObjectReadOperation *o,
+ int flags, bufferlist *pbl, const blkin_trace_info *trace_info)
+{
+ if (unlikely(!o->impl))
+ return -EINVAL;
+ object_t obj(oid);
+ return io_ctx_impl->aio_operate_read(obj, &o->impl->o, c->pc,
+ translate_flags(flags), pbl, trace_info);
+}
+
+void librados::IoCtx::snap_set_read(snap_t seq)
+{
+ io_ctx_impl->set_snap_read(seq);
+}
+
+int librados::IoCtx::selfmanaged_snap_set_write_ctx(snap_t seq, vector<snap_t>& snaps)
+{
+ vector<snapid_t> snv;
+ snv.resize(snaps.size());
+ for (unsigned i=0; i<snaps.size(); i++)
+ snv[i] = snaps[i];
+ return io_ctx_impl->set_snap_write_context(seq, snv);
+}
+
+int librados::IoCtx::snap_create(const char *snapname)
+{
+ return io_ctx_impl->snap_create(snapname);
+}
+
+int librados::IoCtx::snap_lookup(const char *name, snap_t *snapid)
+{
+ return io_ctx_impl->snap_lookup(name, snapid);
+}
+
+int librados::IoCtx::snap_get_stamp(snap_t snapid, time_t *t)
+{
+ return io_ctx_impl->snap_get_stamp(snapid, t);
+}
+
+int librados::IoCtx::snap_get_name(snap_t snapid, std::string *s)
+{
+ return io_ctx_impl->snap_get_name(snapid, s);
+}
+
+int librados::IoCtx::snap_remove(const char *snapname)
+{
+ return io_ctx_impl->snap_remove(snapname);
+}
+
+int librados::IoCtx::snap_list(std::vector<snap_t> *snaps)
+{
+ return io_ctx_impl->snap_list(snaps);
+}
+
+int librados::IoCtx::snap_rollback(const std::string& oid, const char *snapname)
+{
+ return io_ctx_impl->rollback(oid, snapname);
+}
+
+// Deprecated name kept for backward compatibility
+int librados::IoCtx::rollback(const std::string& oid, const char *snapname)
+{
+ return snap_rollback(oid, snapname);
+}
+
+int librados::IoCtx::selfmanaged_snap_create(uint64_t *snapid)
+{
+ return io_ctx_impl->selfmanaged_snap_create(snapid);
+}
+
+void librados::IoCtx::aio_selfmanaged_snap_create(uint64_t *snapid,
+ AioCompletion *c)
+{
+ io_ctx_impl->aio_selfmanaged_snap_create(snapid, c->pc);
+}
+
+int librados::IoCtx::selfmanaged_snap_remove(uint64_t snapid)
+{
+ return io_ctx_impl->selfmanaged_snap_remove(snapid);
+}
+
+void librados::IoCtx::aio_selfmanaged_snap_remove(uint64_t snapid,
+ AioCompletion *c)
+{
+ io_ctx_impl->aio_selfmanaged_snap_remove(snapid, c->pc);
+}
+
+int librados::IoCtx::selfmanaged_snap_rollback(const std::string& oid, uint64_t snapid)
+{
+ return io_ctx_impl->selfmanaged_snap_rollback_object(oid,
+ io_ctx_impl->snapc,
+ snapid);
+}
+
+int librados::IoCtx::lock_exclusive(const std::string &oid, const std::string &name,
+ const std::string &cookie,
+ const std::string &description,
+ struct timeval * duration, uint8_t flags)
+{
+ utime_t dur = utime_t();
+ if (duration)
+ dur.set_from_timeval(duration);
+
+ return rados::cls::lock::lock(this, oid, name, ClsLockType::EXCLUSIVE, cookie, "",
+ description, dur, flags);
+}
+
+int librados::IoCtx::lock_shared(const std::string &oid, const std::string &name,
+ const std::string &cookie, const std::string &tag,
+ const std::string &description,
+ struct timeval * duration, uint8_t flags)
+{
+ utime_t dur = utime_t();
+ if (duration)
+ dur.set_from_timeval(duration);
+
+ return rados::cls::lock::lock(this, oid, name, ClsLockType::SHARED, cookie, tag,
+ description, dur, flags);
+}
+
+int librados::IoCtx::unlock(const std::string &oid, const std::string &name,
+ const std::string &cookie)
+{
+ return rados::cls::lock::unlock(this, oid, name, cookie);
+}
+
+struct AioUnlockCompletion : public librados::ObjectOperationCompletion {
+ librados::AioCompletionImpl *completion;
+ AioUnlockCompletion(librados::AioCompletion *c) : completion(c->pc) {
+ completion->get();
+ };
+ void handle_completion(int r, bufferlist& outbl) override {
+ rados_callback_t cb = completion->callback_complete;
+ void *cb_arg = completion->callback_complete_arg;
+ cb(completion, cb_arg);
+ completion->lock.lock();
+ completion->callback_complete = NULL;
+ completion->cond.notify_all();
+ completion->put_unlock();
+ }
+};
+
+int librados::IoCtx::aio_unlock(const std::string &oid, const std::string &name,
+ const std::string &cookie, AioCompletion *c)
+{
+ return rados::cls::lock::aio_unlock(this, oid, name, cookie, c);
+}
+
+int librados::IoCtx::break_lock(const std::string &oid, const std::string &name,
+ const std::string &client, const std::string &cookie)
+{
+ entity_name_t locker;
+ if (!locker.parse(client))
+ return -EINVAL;
+ return rados::cls::lock::break_lock(this, oid, name, cookie, locker);
+}
+
+int librados::IoCtx::list_lockers(const std::string &oid, const std::string &name,
+ int *exclusive,
+ std::string *tag,
+ std::list<librados::locker_t> *lockers)
+{
+ std::list<librados::locker_t> tmp_lockers;
+ map<rados::cls::lock::locker_id_t, rados::cls::lock::locker_info_t> rados_lockers;
+ std::string tmp_tag;
+ ClsLockType tmp_type;
+ int r = rados::cls::lock::get_lock_info(this, oid, name, &rados_lockers, &tmp_type, &tmp_tag);
+ if (r < 0)
+ return r;
+
+ map<rados::cls::lock::locker_id_t, rados::cls::lock::locker_info_t>::iterator map_it;
+ for (map_it = rados_lockers.begin(); map_it != rados_lockers.end(); ++map_it) {
+ librados::locker_t locker;
+ locker.client = stringify(map_it->first.locker);
+ locker.cookie = map_it->first.cookie;
+ locker.address = stringify(map_it->second.addr);
+ tmp_lockers.push_back(locker);
+ }
+
+ if (lockers)
+ *lockers = tmp_lockers;
+ if (tag)
+ *tag = tmp_tag;
+ if (exclusive) {
+ if (tmp_type == ClsLockType::EXCLUSIVE)
+ *exclusive = 1;
+ else
+ *exclusive = 0;
+ }
+
+ return tmp_lockers.size();
+}
+
+librados::NObjectIterator librados::IoCtx::nobjects_begin(
+ const bufferlist &filter)
+{
+ rados_list_ctx_t listh;
+ rados_nobjects_list_open(io_ctx_impl, &listh);
+ NObjectIterator iter((ObjListCtx*)listh);
+ if (filter.length() > 0) {
+ iter.set_filter(filter);
+ }
+ iter.get_next();
+ return iter;
+}
+
+librados::NObjectIterator librados::IoCtx::nobjects_begin(
+ uint32_t pos, const bufferlist &filter)
+{
+ rados_list_ctx_t listh;
+ rados_nobjects_list_open(io_ctx_impl, &listh);
+ NObjectIterator iter((ObjListCtx*)listh);
+ if (filter.length() > 0) {
+ iter.set_filter(filter);
+ }
+ iter.seek(pos);
+ return iter;
+}
+
+librados::NObjectIterator librados::IoCtx::nobjects_begin(
+ const ObjectCursor& cursor, const bufferlist &filter)
+{
+ rados_list_ctx_t listh;
+ rados_nobjects_list_open(io_ctx_impl, &listh);
+ NObjectIterator iter((ObjListCtx*)listh);
+ if (filter.length() > 0) {
+ iter.set_filter(filter);
+ }
+ iter.seek(cursor);
+ return iter;
+}
+
+const librados::NObjectIterator& librados::IoCtx::nobjects_end() const
+{
+ return NObjectIterator::__EndObjectIterator;
+}
+
+int librados::IoCtx::hit_set_list(uint32_t hash, AioCompletion *c,
+ std::list< std::pair<time_t, time_t> > *pls)
+{
+ return io_ctx_impl->hit_set_list(hash, c->pc, pls);
+}
+
+int librados::IoCtx::hit_set_get(uint32_t hash, AioCompletion *c, time_t stamp,
+ bufferlist *pbl)
+{
+ return io_ctx_impl->hit_set_get(hash, c->pc, stamp, pbl);
+}
+
+
+
+uint64_t librados::IoCtx::get_last_version()
+{
+ return io_ctx_impl->last_version();
+}
+
+int librados::IoCtx::aio_read(const std::string& oid, librados::AioCompletion *c,
+ bufferlist *pbl, size_t len, uint64_t off)
+{
+ return io_ctx_impl->aio_read(oid, c->pc, pbl, len, off,
+ io_ctx_impl->snap_seq);
+}
+
+int librados::IoCtx::aio_read(const std::string& oid, librados::AioCompletion *c,
+ bufferlist *pbl, size_t len, uint64_t off,
+ uint64_t snapid)
+{
+ return io_ctx_impl->aio_read(oid, c->pc, pbl, len, off, snapid);
+}
+
+int librados::IoCtx::aio_exec(const std::string& oid,
+ librados::AioCompletion *c, const char *cls,
+ const char *method, bufferlist& inbl,
+ bufferlist *outbl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->aio_exec(obj, c->pc, cls, method, inbl, outbl);
+}
+
+int librados::IoCtx::aio_cmpext(const std::string& oid,
+ librados::AioCompletion *c,
+ uint64_t off,
+ bufferlist& cmp_bl)
+{
+ return io_ctx_impl->aio_cmpext(oid, c->pc, off, cmp_bl);
+}
+
+int librados::IoCtx::aio_sparse_read(const std::string& oid, librados::AioCompletion *c,
+ std::map<uint64_t,uint64_t> *m, bufferlist *data_bl,
+ size_t len, uint64_t off)
+{
+ return io_ctx_impl->aio_sparse_read(oid, c->pc,
+ m, data_bl, len, off,
+ io_ctx_impl->snap_seq);
+}
+
+int librados::IoCtx::aio_sparse_read(const std::string& oid, librados::AioCompletion *c,
+ std::map<uint64_t,uint64_t> *m, bufferlist *data_bl,
+ size_t len, uint64_t off, uint64_t snapid)
+{
+ return io_ctx_impl->aio_sparse_read(oid, c->pc,
+ m, data_bl, len, off, snapid);
+}
+
+int librados::IoCtx::aio_write(const std::string& oid, librados::AioCompletion *c,
+ const bufferlist& bl, size_t len, uint64_t off)
+{
+ return io_ctx_impl->aio_write(oid, c->pc, bl, len, off);
+}
+
+int librados::IoCtx::aio_append(const std::string& oid, librados::AioCompletion *c,
+ const bufferlist& bl, size_t len)
+{
+ return io_ctx_impl->aio_append(oid, c->pc, bl, len);
+}
+
+int librados::IoCtx::aio_write_full(const std::string& oid, librados::AioCompletion *c,
+ const bufferlist& bl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->aio_write_full(obj, c->pc, bl);
+}
+
+int librados::IoCtx::aio_writesame(const std::string& oid, librados::AioCompletion *c,
+ const bufferlist& bl, size_t write_len,
+ uint64_t off)
+{
+ return io_ctx_impl->aio_writesame(oid, c->pc, bl, write_len, off);
+}
+
+
+int librados::IoCtx::aio_remove(const std::string& oid, librados::AioCompletion *c)
+{
+ return io_ctx_impl->aio_remove(oid, c->pc);
+}
+
+int librados::IoCtx::aio_remove(const std::string& oid, librados::AioCompletion *c, int flags)
+{
+ return io_ctx_impl->aio_remove(oid, c->pc, flags);
+}
+
+int librados::IoCtx::aio_flush_async(librados::AioCompletion *c)
+{
+ io_ctx_impl->flush_aio_writes_async(c->pc);
+ return 0;
+}
+
+int librados::IoCtx::aio_flush()
+{
+ io_ctx_impl->flush_aio_writes();
+ return 0;
+}
+
+struct AioGetxattrDataPP {
+ AioGetxattrDataPP(librados::AioCompletionImpl *c, bufferlist *_bl) :
+ bl(_bl), completion(c) {}
+ bufferlist *bl;
+ struct librados::CB_AioCompleteAndSafe completion;
+};
+
+static void rados_aio_getxattr_completepp(rados_completion_t c, void *arg) {
+ AioGetxattrDataPP *cdata = reinterpret_cast<AioGetxattrDataPP*>(arg);
+ int rc = rados_aio_get_return_value(c);
+ if (rc >= 0) {
+ rc = cdata->bl->length();
+ }
+ cdata->completion(rc);
+ delete cdata;
+}
+
+int librados::IoCtx::aio_getxattr(const std::string& oid, librados::AioCompletion *c,
+ const char *name, bufferlist& bl)
+{
+ // create data object to be passed to async callback
+ AioGetxattrDataPP *cdata = new AioGetxattrDataPP(c->pc, &bl);
+ if (!cdata) {
+ return -ENOMEM;
+ }
+ // create completion callback
+ librados::AioCompletionImpl *comp = new librados::AioCompletionImpl;
+ comp->set_complete_callback(cdata, rados_aio_getxattr_completepp);
+ // call actual getxattr from IoCtxImpl
+ object_t obj(oid);
+ return io_ctx_impl->aio_getxattr(obj, comp, name, bl);
+}
+
+int librados::IoCtx::aio_getxattrs(const std::string& oid, AioCompletion *c,
+ map<std::string, bufferlist>& attrset)
+{
+ object_t obj(oid);
+ return io_ctx_impl->aio_getxattrs(obj, c->pc, attrset);
+}
+
+int librados::IoCtx::aio_setxattr(const std::string& oid, AioCompletion *c,
+ const char *name, bufferlist& bl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->aio_setxattr(obj, c->pc, name, bl);
+}
+
+int librados::IoCtx::aio_rmxattr(const std::string& oid, AioCompletion *c,
+ const char *name)
+{
+ object_t obj(oid);
+ return io_ctx_impl->aio_rmxattr(obj, c->pc, name);
+}
+
+int librados::IoCtx::aio_stat(const std::string& oid, librados::AioCompletion *c,
+ uint64_t *psize, time_t *pmtime)
+{
+ object_t obj(oid);
+ return io_ctx_impl->aio_stat(obj, c->pc, psize, pmtime);
+}
+
+int librados::IoCtx::aio_cancel(librados::AioCompletion *c)
+{
+ return io_ctx_impl->aio_cancel(c->pc);
+}
+
+int librados::IoCtx::watch(const string& oid, uint64_t ver, uint64_t *cookie,
+ librados::WatchCtx *ctx)
+{
+ object_t obj(oid);
+ return io_ctx_impl->watch(obj, cookie, ctx, NULL);
+}
+
+int librados::IoCtx::watch2(const string& oid, uint64_t *cookie,
+ librados::WatchCtx2 *ctx2)
+{
+ object_t obj(oid);
+ return io_ctx_impl->watch(obj, cookie, NULL, ctx2);
+}
+
+int librados::IoCtx::watch3(const string& oid, uint64_t *cookie,
+ librados::WatchCtx2 *ctx2, uint32_t timeout)
+{
+ object_t obj(oid);
+ return io_ctx_impl->watch(obj, cookie, NULL, ctx2, timeout);
+}
+
+int librados::IoCtx::aio_watch(const string& oid, AioCompletion *c,
+ uint64_t *cookie,
+ librados::WatchCtx2 *ctx2)
+{
+ object_t obj(oid);
+ return io_ctx_impl->aio_watch(obj, c->pc, cookie, NULL, ctx2);
+}
+
+int librados::IoCtx::aio_watch2(const string& oid, AioCompletion *c,
+ uint64_t *cookie,
+ librados::WatchCtx2 *ctx2,
+ uint32_t timeout)
+{
+ object_t obj(oid);
+ return io_ctx_impl->aio_watch(obj, c->pc, cookie, NULL, ctx2, timeout);
+}
+
+int librados::IoCtx::unwatch(const string& oid, uint64_t handle)
+{
+ return io_ctx_impl->unwatch(handle);
+}
+
+int librados::IoCtx::unwatch2(uint64_t handle)
+{
+ return io_ctx_impl->unwatch(handle);
+}
+
+int librados::IoCtx::aio_unwatch(uint64_t handle, AioCompletion *c)
+{
+ return io_ctx_impl->aio_unwatch(handle, c->pc);
+}
+
+int librados::IoCtx::watch_check(uint64_t handle)
+{
+ return io_ctx_impl->watch_check(handle);
+}
+
+int librados::IoCtx::notify(const string& oid, uint64_t ver, bufferlist& bl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->notify(obj, bl, 0, NULL, NULL, NULL);
+}
+
+int librados::IoCtx::notify2(const string& oid, bufferlist& bl,
+ uint64_t timeout_ms, bufferlist *preplybl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->notify(obj, bl, timeout_ms, preplybl, NULL, NULL);
+}
+
+int librados::IoCtx::aio_notify(const string& oid, AioCompletion *c,
+ bufferlist& bl, uint64_t timeout_ms,
+ bufferlist *preplybl)
+{
+ object_t obj(oid);
+ return io_ctx_impl->aio_notify(obj, c->pc, bl, timeout_ms, preplybl, NULL,
+ NULL);
+}
+
+void librados::IoCtx::decode_notify_response(bufferlist &bl,
+ std::vector<librados::notify_ack_t> *acks,
+ std::vector<librados::notify_timeout_t> *timeouts)
+{
+ map<pair<uint64_t,uint64_t>,bufferlist> acked;
+ set<pair<uint64_t,uint64_t>> missed;
+
+ auto iter = bl.cbegin();
+ decode(acked, iter);
+ decode(missed, iter);
+
+ for (auto &[who, payload] : acked) {
+ acks->emplace_back(librados::notify_ack_t{who.first, who.second, payload});
+ }
+ for (auto &[notifier_id, cookie] : missed) {
+ timeouts->emplace_back(librados::notify_timeout_t{notifier_id, cookie});
+ }
+}
+
+void librados::IoCtx::notify_ack(const std::string& o,
+ uint64_t notify_id, uint64_t handle,
+ bufferlist& bl)
+{
+ io_ctx_impl->notify_ack(o, notify_id, handle, bl);
+}
+
+int librados::IoCtx::list_watchers(const std::string& oid,
+ std::list<obj_watch_t> *out_watchers)
+{
+ ObjectReadOperation op;
+ int r;
+ op.list_watchers(out_watchers, &r);
+ bufferlist bl;
+ int ret = operate(oid, &op, &bl);
+ if (ret < 0)
+ return ret;
+
+ return r;
+}
+
+int librados::IoCtx::list_snaps(const std::string& oid,
+ snap_set_t *out_snaps)
+{
+ ObjectReadOperation op;
+ int r;
+ if (io_ctx_impl->snap_seq != CEPH_SNAPDIR)
+ return -EINVAL;
+ op.list_snaps(out_snaps, &r);
+ bufferlist bl;
+ int ret = operate(oid, &op, &bl);
+ if (ret < 0)
+ return ret;
+
+ return r;
+}
+
+void librados::IoCtx::set_notify_timeout(uint32_t timeout)
+{
+ io_ctx_impl->set_notify_timeout(timeout);
+}
+
+int librados::IoCtx::set_alloc_hint(const std::string& o,
+ uint64_t expected_object_size,
+ uint64_t expected_write_size)
+{
+ object_t oid(o);
+ return io_ctx_impl->set_alloc_hint(oid, expected_object_size,
+ expected_write_size, 0);
+}
+
+int librados::IoCtx::set_alloc_hint2(const std::string& o,
+ uint64_t expected_object_size,
+ uint64_t expected_write_size,
+ uint32_t flags)
+{
+ object_t oid(o);
+ return io_ctx_impl->set_alloc_hint(oid, expected_object_size,
+ expected_write_size, flags);
+}
+
+void librados::IoCtx::set_assert_version(uint64_t ver)
+{
+ io_ctx_impl->set_assert_version(ver);
+}
+
+void librados::IoCtx::locator_set_key(const string& key)
+{
+ io_ctx_impl->oloc.key = key;
+}
+
+void librados::IoCtx::set_namespace(const string& nspace)
+{
+ io_ctx_impl->oloc.nspace = nspace;
+}
+
+std::string librados::IoCtx::get_namespace() const
+{
+ return io_ctx_impl->oloc.nspace;
+}
+
+int64_t librados::IoCtx::get_id()
+{
+ return io_ctx_impl->get_id();
+}
+
+uint32_t librados::IoCtx::get_object_hash_position(const std::string& oid)
+{
+ uint32_t hash;
+ int r = io_ctx_impl->get_object_hash_position(oid, &hash);
+ if (r < 0)
+ hash = 0;
+ return hash;
+}
+
+uint32_t librados::IoCtx::get_object_pg_hash_position(const std::string& oid)
+{
+ uint32_t hash;
+ int r = io_ctx_impl->get_object_pg_hash_position(oid, &hash);
+ if (r < 0)
+ hash = 0;
+ return hash;
+}
+
+int librados::IoCtx::get_object_hash_position2(
+ const std::string& oid, uint32_t *hash_position)
+{
+ return io_ctx_impl->get_object_hash_position(oid, hash_position);
+}
+
+int librados::IoCtx::get_object_pg_hash_position2(
+ const std::string& oid, uint32_t *pg_hash_position)
+{
+ return io_ctx_impl->get_object_pg_hash_position(oid, pg_hash_position);
+}
+
+librados::config_t librados::IoCtx::cct()
+{
+ return (config_t)io_ctx_impl->client->cct;
+}
+
+librados::IoCtx::IoCtx(IoCtxImpl *io_ctx_impl_)
+ : io_ctx_impl(io_ctx_impl_)
+{
+}
+
+void librados::IoCtx::set_osdmap_full_try()
+{
+ io_ctx_impl->extra_op_flags |= CEPH_OSD_FLAG_FULL_TRY;
+}
+
+void librados::IoCtx::unset_osdmap_full_try()
+{
+ io_ctx_impl->extra_op_flags &= ~CEPH_OSD_FLAG_FULL_TRY;
+}
+
+bool librados::IoCtx::get_pool_full_try()
+{
+ return (io_ctx_impl->extra_op_flags & CEPH_OSD_FLAG_FULL_TRY) != 0;
+}
+
+void librados::IoCtx::set_pool_full_try()
+{
+ io_ctx_impl->extra_op_flags |= CEPH_OSD_FLAG_FULL_TRY;
+}
+
+void librados::IoCtx::unset_pool_full_try()
+{
+ io_ctx_impl->extra_op_flags &= ~CEPH_OSD_FLAG_FULL_TRY;
+}
+
+///////////////////////////// Rados //////////////////////////////
+void librados::Rados::version(int *major, int *minor, int *extra)
+{
+ rados_version(major, minor, extra);
+}
+
+librados::Rados::Rados() : client(NULL)
+{
+}
+
+librados::Rados::Rados(IoCtx &ioctx)
+{
+ client = ioctx.io_ctx_impl->client;
+ ceph_assert(client != NULL);
+ client->get();
+}
+
+librados::Rados::~Rados()
+{
+ shutdown();
+}
+
+void librados::Rados::from_rados_t(rados_t cluster, Rados &rados) {
+ if (rados.client) {
+ rados.client->put();
+ }
+ rados.client = static_cast<RadosClient*>(cluster);
+ if (rados.client) {
+ rados.client->get();
+ }
+}
+
+int librados::Rados::init(const char * const id)
+{
+ return rados_create((rados_t *)&client, id);
+}
+
+int librados::Rados::init2(const char * const name,
+ const char * const clustername, uint64_t flags)
+{
+ return rados_create2((rados_t *)&client, clustername, name, flags);
+}
+
+int librados::Rados::init_with_context(config_t cct_)
+{
+ return rados_create_with_context((rados_t *)&client, (rados_config_t)cct_);
+}
+
+int librados::Rados::connect()
+{
+ return client->connect();
+}
+
+librados::config_t librados::Rados::cct()
+{
+ return (config_t)client->cct;
+}
+
+int librados::Rados::watch_flush()
+{
+ if (!client)
+ return -EINVAL;
+ return client->watch_flush();
+}
+
+int librados::Rados::aio_watch_flush(AioCompletion *c)
+{
+ if (!client)
+ return -EINVAL;
+ return client->async_watch_flush(c->pc);
+}
+
+void librados::Rados::shutdown()
+{
+ if (!client)
+ return;
+ if (client->put()) {
+ client->shutdown();
+ delete client;
+ client = NULL;
+ }
+}
+
+uint64_t librados::Rados::get_instance_id()
+{
+ return client->get_instance_id();
+}
+
+int librados::Rados::get_min_compatible_osd(int8_t* require_osd_release)
+{
+ return client->get_min_compatible_osd(require_osd_release);
+}
+
+int librados::Rados::get_min_compatible_client(int8_t* min_compat_client,
+ int8_t* require_min_compat_client)
+{
+ return client->get_min_compatible_client(min_compat_client,
+ require_min_compat_client);
+}
+
+int librados::Rados::conf_read_file(const char * const path) const
+{
+ return rados_conf_read_file((rados_t)client, path);
+}
+
+int librados::Rados::conf_parse_argv(int argc, const char ** argv) const
+{
+ return rados_conf_parse_argv((rados_t)client, argc, argv);
+}
+
+int librados::Rados::conf_parse_argv_remainder(int argc, const char ** argv,
+ const char ** remargv) const
+{
+ return rados_conf_parse_argv_remainder((rados_t)client, argc, argv, remargv);
+}
+
+int librados::Rados::conf_parse_env(const char *name) const
+{
+ return rados_conf_parse_env((rados_t)client, name);
+}
+
+int librados::Rados::conf_set(const char *option, const char *value)
+{
+ return rados_conf_set((rados_t)client, option, value);
+}
+
+int librados::Rados::conf_get(const char *option, std::string &val)
+{
+ char *str = NULL;
+ const auto& conf = client->cct->_conf;
+ int ret = conf.get_val(option, &str, -1);
+ if (ret) {
+ free(str);
+ return ret;
+ }
+ val = str;
+ free(str);
+ return 0;
+}
+
+int librados::Rados::service_daemon_register(
+ const std::string& service, ///< service name (e.g., 'rgw')
+ const std::string& name, ///< daemon name (e.g., 'gwfoo')
+ const std::map<std::string,std::string>& metadata) ///< static metadata about daemon
+{
+ return client->service_daemon_register(service, name, metadata);
+}
+
+int librados::Rados::service_daemon_update_status(
+ std::map<std::string,std::string>&& status)
+{
+ return client->service_daemon_update_status(std::move(status));
+}
+
+int librados::Rados::pool_create(const char *name)
+{
+ string str(name);
+ return client->pool_create(str);
+}
+
+int librados::Rados::pool_create(const char *name, uint64_t auid)
+{
+ if (auid != CEPH_AUTH_UID_DEFAULT) {
+ return -EINVAL;
+ }
+ string str(name);
+ return client->pool_create(str);
+}
+
+int librados::Rados::pool_create(const char *name, uint64_t auid, __u8 crush_rule)
+{
+ if (auid != CEPH_AUTH_UID_DEFAULT) {
+ return -EINVAL;
+ }
+ string str(name);
+ return client->pool_create(str, crush_rule);
+}
+
+int librados::Rados::pool_create_with_rule(const char *name, __u8 crush_rule)
+{
+ string str(name);
+ return client->pool_create(str, crush_rule);
+}
+
+int librados::Rados::pool_create_async(const char *name, PoolAsyncCompletion *c)
+{
+ string str(name);
+ return client->pool_create_async(str, c->pc);
+}
+
+int librados::Rados::pool_create_async(const char *name, uint64_t auid, PoolAsyncCompletion *c)
+{
+ if (auid != CEPH_AUTH_UID_DEFAULT) {
+ return -EINVAL;
+ }
+ string str(name);
+ return client->pool_create_async(str, c->pc);
+}
+
+int librados::Rados::pool_create_async(const char *name, uint64_t auid, __u8 crush_rule,
+ PoolAsyncCompletion *c)
+{
+ if (auid != CEPH_AUTH_UID_DEFAULT) {
+ return -EINVAL;
+ }
+ string str(name);
+ return client->pool_create_async(str, c->pc, crush_rule);
+}
+
+int librados::Rados::pool_create_with_rule_async(
+ const char *name, __u8 crush_rule,
+ PoolAsyncCompletion *c)
+{
+ string str(name);
+ return client->pool_create_async(str, c->pc, crush_rule);
+}
+
+int librados::Rados::pool_get_base_tier(int64_t pool_id, int64_t* base_tier)
+{
+ tracepoint(librados, rados_pool_get_base_tier_enter, (rados_t)client, pool_id);
+ int retval = client->pool_get_base_tier(pool_id, base_tier);
+ tracepoint(librados, rados_pool_get_base_tier_exit, retval, *base_tier);
+ return retval;
+}
+
+int librados::Rados::pool_delete(const char *name)
+{
+ return client->pool_delete(name);
+}
+
+int librados::Rados::pool_delete_async(const char *name, PoolAsyncCompletion *c)
+{
+ return client->pool_delete_async(name, c->pc);
+}
+
+int librados::Rados::pool_list(std::list<std::string>& v)
+{
+ std::list<std::pair<int64_t, std::string> > pools;
+ int r = client->pool_list(pools);
+ if (r < 0) {
+ return r;
+ }
+
+ v.clear();
+ for (std::list<std::pair<int64_t, std::string> >::iterator it = pools.begin();
+ it != pools.end(); ++it) {
+ v.push_back(it->second);
+ }
+ return 0;
+}
+
+int librados::Rados::pool_list2(std::list<std::pair<int64_t, std::string> >& v)
+{
+ return client->pool_list(v);
+}
+
+int64_t librados::Rados::pool_lookup(const char *name)
+{
+ return client->lookup_pool(name);
+}
+
+int librados::Rados::pool_reverse_lookup(int64_t id, std::string *name)
+{
+ return client->pool_get_name(id, name, true);
+}
+
+int librados::Rados::mon_command(string cmd, const bufferlist& inbl,
+ bufferlist *outbl, string *outs)
+{
+ vector<string> cmdvec;
+ cmdvec.push_back(cmd);
+ return client->mon_command(cmdvec, inbl, outbl, outs);
+}
+
+int librados::Rados::osd_command(int osdid, std::string cmd, const bufferlist& inbl,
+ bufferlist *outbl, std::string *outs)
+{
+ vector<string> cmdvec;
+ cmdvec.push_back(cmd);
+ return client->osd_command(osdid, cmdvec, inbl, outbl, outs);
+}
+
+int librados::Rados::mgr_command(std::string cmd, const bufferlist& inbl,
+ bufferlist *outbl, std::string *outs)
+{
+ vector<string> cmdvec;
+ cmdvec.push_back(cmd);
+ return client->mgr_command(cmdvec, inbl, outbl, outs);
+}
+
+
+
+int librados::Rados::pg_command(const char *pgstr, std::string cmd, const bufferlist& inbl,
+ bufferlist *outbl, std::string *outs)
+{
+ vector<string> cmdvec;
+ cmdvec.push_back(cmd);
+
+ pg_t pgid;
+ if (!pgid.parse(pgstr))
+ return -EINVAL;
+
+ return client->pg_command(pgid, cmdvec, inbl, outbl, outs);
+}
+
+int librados::Rados::ioctx_create(const char *name, IoCtx &io)
+{
+ rados_ioctx_t p;
+ int ret = rados_ioctx_create((rados_t)client, name, &p);
+ if (ret)
+ return ret;
+ io.close();
+ io.io_ctx_impl = (IoCtxImpl*)p;
+ return 0;
+}
+
+int librados::Rados::ioctx_create2(int64_t pool_id, IoCtx &io)
+{
+ rados_ioctx_t p;
+ int ret = rados_ioctx_create2((rados_t)client, pool_id, &p);
+ if (ret)
+ return ret;
+ io.close();
+ io.io_ctx_impl = (IoCtxImpl*)p;
+ return 0;
+}
+
+void librados::Rados::test_blocklist_self(bool set)
+{
+ client->blocklist_self(set);
+}
+
+int librados::Rados::get_pool_stats(std::list<string>& v,
+ stats_map& result)
+{
+ map<string,::pool_stat_t> rawresult;
+ bool per_pool = false;
+ int r = client->get_pool_stats(v, &rawresult, &per_pool);
+ for (map<string,::pool_stat_t>::iterator p = rawresult.begin();
+ p != rawresult.end();
+ ++p) {
+ pool_stat_t& pv = result[p->first];
+ auto& pstat = p->second;
+ store_statfs_t &statfs = pstat.store_stats;
+ uint64_t allocated_bytes = pstat.get_allocated_data_bytes(per_pool) +
+ pstat.get_allocated_omap_bytes(per_pool);
+ // FIXME: raw_used_rate is unknown hence use 1.0 here
+ // meaning we keep net amount aggregated over all replicas
+ // Not a big deal so far since this field isn't exposed
+ uint64_t user_bytes = pstat.get_user_data_bytes(1.0, per_pool) +
+ pstat.get_user_omap_bytes(1.0, per_pool);
+
+ object_stat_sum_t *sum = &p->second.stats.sum;
+ pv.num_kb = shift_round_up(allocated_bytes, 10);
+ pv.num_bytes = allocated_bytes;
+ pv.num_objects = sum->num_objects;
+ pv.num_object_clones = sum->num_object_clones;
+ pv.num_object_copies = sum->num_object_copies;
+ pv.num_objects_missing_on_primary = sum->num_objects_missing_on_primary;
+ pv.num_objects_unfound = sum->num_objects_unfound;
+ pv.num_objects_degraded = sum->num_objects_degraded;
+ pv.num_rd = sum->num_rd;
+ pv.num_rd_kb = sum->num_rd_kb;
+ pv.num_wr = sum->num_wr;
+ pv.num_wr_kb = sum->num_wr_kb;
+ pv.num_user_bytes = user_bytes;
+ pv.compressed_bytes_orig = statfs.data_compressed_original;
+ pv.compressed_bytes = statfs.data_compressed;
+ pv.compressed_bytes_alloc = statfs.data_compressed_allocated;
+ }
+ return r;
+}
+
+int librados::Rados::get_pool_stats(std::list<string>& v,
+ std::map<string, stats_map>& result)
+{
+ stats_map m;
+ int r = get_pool_stats(v, m);
+ if (r < 0)
+ return r;
+ for (map<string,pool_stat_t>::iterator p = m.begin();
+ p != m.end();
+ ++p) {
+ result[p->first][string()] = p->second;
+ }
+ return r;
+}
+
+int librados::Rados::get_pool_stats(std::list<string>& v,
+ string& category, // unused
+ std::map<string, stats_map>& result)
+{
+ return -EOPNOTSUPP;
+}
+
+bool librados::Rados::get_pool_is_selfmanaged_snaps_mode(const std::string& pool)
+{
+ return client->get_pool_is_selfmanaged_snaps_mode(pool);
+}
+
+int librados::Rados::cluster_stat(cluster_stat_t& result)
+{
+ ceph_statfs stats;
+ int r = client->get_fs_stats(stats);
+ result.kb = stats.kb;
+ result.kb_used = stats.kb_used;
+ result.kb_avail = stats.kb_avail;
+ result.num_objects = stats.num_objects;
+ return r;
+}
+
+int librados::Rados::cluster_fsid(string *fsid)
+{
+ return client->get_fsid(fsid);
+}
+
+namespace librados {
+ struct PlacementGroupImpl {
+ pg_t pgid;
+ };
+
+ PlacementGroup::PlacementGroup()
+ : impl{new PlacementGroupImpl}
+ {}
+
+ PlacementGroup::PlacementGroup(const PlacementGroup& pg)
+ : impl{new PlacementGroupImpl}
+ {
+ impl->pgid = pg.impl->pgid;
+ }
+
+ PlacementGroup::~PlacementGroup()
+ {}
+
+ bool PlacementGroup::parse(const char* s)
+ {
+ return impl->pgid.parse(s);
+ }
+}
+
+std::ostream& librados::operator<<(std::ostream& out,
+ const librados::PlacementGroup& pg)
+{
+ return out << pg.impl->pgid;
+}
+
+int librados::Rados::get_inconsistent_pgs(int64_t pool_id,
+ std::vector<PlacementGroup>* pgs)
+{
+ std::vector<string> pgids;
+ if (auto ret = client->get_inconsistent_pgs(pool_id, &pgids); ret) {
+ return ret;
+ }
+ for (const auto& pgid : pgids) {
+ librados::PlacementGroup pg;
+ if (!pg.parse(pgid.c_str())) {
+ return -EINVAL;
+ }
+ pgs->emplace_back(pg);
+ }
+ return 0;
+}
+
+int librados::Rados::get_inconsistent_objects(const PlacementGroup& pg,
+ const object_id_t &start_after,
+ unsigned max_return,
+ AioCompletion *c,
+ std::vector<inconsistent_obj_t>* objects,
+ uint32_t* interval)
+{
+ IoCtx ioctx;
+ const pg_t pgid = pg.impl->pgid;
+ int r = ioctx_create2(pgid.pool(), ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ return ioctx.io_ctx_impl->get_inconsistent_objects(pgid,
+ start_after,
+ max_return,
+ c->pc,
+ objects,
+ interval);
+}
+
+int librados::Rados::get_inconsistent_snapsets(const PlacementGroup& pg,
+ const object_id_t &start_after,
+ unsigned max_return,
+ AioCompletion *c,
+ std::vector<inconsistent_snapset_t>* snapsets,
+ uint32_t* interval)
+{
+ IoCtx ioctx;
+ const pg_t pgid = pg.impl->pgid;
+ int r = ioctx_create2(pgid.pool(), ioctx);
+ if (r < 0) {
+ return r;
+ }
+
+ return ioctx.io_ctx_impl->get_inconsistent_snapsets(pgid,
+ start_after,
+ max_return,
+ c->pc,
+ snapsets,
+ interval);
+}
+
+int librados::Rados::wait_for_latest_osdmap()
+{
+ return client->wait_for_latest_osdmap();
+}
+
+int librados::Rados::blocklist_add(const std::string& client_address,
+ uint32_t expire_seconds)
+{
+ return client->blocklist_add(client_address, expire_seconds);
+}
+
+std::string librados::Rados::get_addrs() const {
+ return client->get_addrs();
+}
+
+librados::PoolAsyncCompletion *librados::Rados::pool_async_create_completion()
+{
+ PoolAsyncCompletionImpl *c = new PoolAsyncCompletionImpl;
+ return new PoolAsyncCompletion(c);
+}
+
+librados::AioCompletion *librados::Rados::aio_create_completion()
+{
+ AioCompletionImpl *c = new AioCompletionImpl;
+ return new AioCompletion(c);
+}
+
+librados::AioCompletion *librados::Rados::aio_create_completion(void *cb_arg,
+ callback_t cb_complete,
+ callback_t cb_safe)
+{
+ AioCompletionImpl *c;
+ int r = rados_aio_create_completion(cb_arg, cb_complete, cb_safe, (void**)&c);
+ ceph_assert(r == 0);
+ return new AioCompletion(c);
+}
+
+librados::AioCompletion *librados::Rados::aio_create_completion(void *cb_arg,
+ callback_t cb_complete)
+{
+ AioCompletionImpl *c;
+ int r = rados_aio_create_completion2(cb_arg, cb_complete, (void**)&c);
+ ceph_assert(r == 0);
+ return new AioCompletion(c);
+}
+
+librados::ObjectOperation::ObjectOperation() : impl(new ObjectOperationImpl) {}
+
+librados::ObjectOperation::ObjectOperation(ObjectOperation&& rhs)
+ : impl(rhs.impl) {
+ rhs.impl = nullptr;
+}
+
+librados::ObjectOperation&
+librados::ObjectOperation::operator =(ObjectOperation&& rhs) {
+ delete impl;
+ impl = rhs.impl;
+ rhs.impl = nullptr;
+ return *this;
+}
+
+librados::ObjectOperation::~ObjectOperation() {
+ delete impl;
+}
+
+///////////////////////////// ListObject //////////////////////////////
+librados::ListObject::ListObject() : impl(NULL)
+{
+}
+
+librados::ListObject::ListObject(librados::ListObjectImpl *i): impl(i)
+{
+}
+
+librados::ListObject::ListObject(const ListObject& rhs)
+{
+ if (rhs.impl == NULL) {
+ impl = NULL;
+ return;
+ }
+ impl = new ListObjectImpl();
+ *impl = *(rhs.impl);
+}
+
+librados::ListObject& librados::ListObject::operator=(const ListObject& rhs)
+{
+ if (rhs.impl == NULL) {
+ delete impl;
+ impl = NULL;
+ return *this;
+ }
+ if (impl == NULL)
+ impl = new ListObjectImpl();
+ *impl = *(rhs.impl);
+ return *this;
+}
+
+librados::ListObject::~ListObject()
+{
+ if (impl)
+ delete impl;
+ impl = NULL;
+}
+
+const std::string& librados::ListObject::get_nspace() const
+{
+ return impl->get_nspace();
+}
+
+const std::string& librados::ListObject::get_oid() const
+{
+ return impl->get_oid();
+}
+
+const std::string& librados::ListObject::get_locator() const
+{
+ return impl->get_locator();
+}
+
+std::ostream& librados::operator<<(std::ostream& out, const librados::ListObject& lop)
+{
+ out << *(lop.impl);
+ return out;
+}
+
+librados::ObjectCursor::ObjectCursor()
+{
+ c_cursor = (rados_object_list_cursor)new hobject_t();
+}
+
+librados::ObjectCursor::~ObjectCursor()
+{
+ hobject_t *h = (hobject_t *)c_cursor;
+ delete h;
+}
+
+librados::ObjectCursor::ObjectCursor(rados_object_list_cursor c)
+{
+ if (!c) {
+ c_cursor = nullptr;
+ } else {
+ c_cursor = (rados_object_list_cursor)new hobject_t(*(hobject_t *)c);
+ }
+}
+
+librados::ObjectCursor& librados::ObjectCursor::operator=(const librados::ObjectCursor& rhs)
+{
+ if (rhs.c_cursor != nullptr) {
+ hobject_t *h = (hobject_t*)rhs.c_cursor;
+ c_cursor = (rados_object_list_cursor)(new hobject_t(*h));
+ } else {
+ c_cursor = nullptr;
+ }
+ return *this;
+}
+
+bool librados::ObjectCursor::operator<(const librados::ObjectCursor &rhs) const
+{
+ const hobject_t lhs_hobj = (c_cursor == nullptr) ? hobject_t() : *((hobject_t*)c_cursor);
+ const hobject_t rhs_hobj = (rhs.c_cursor == nullptr) ? hobject_t() : *((hobject_t*)(rhs.c_cursor));
+ return lhs_hobj < rhs_hobj;
+}
+
+bool librados::ObjectCursor::operator==(const librados::ObjectCursor &rhs) const
+{
+ const hobject_t lhs_hobj = (c_cursor == nullptr) ? hobject_t() : *((hobject_t*)c_cursor);
+ const hobject_t rhs_hobj = (rhs.c_cursor == nullptr) ? hobject_t() : *((hobject_t*)(rhs.c_cursor));
+ return cmp(lhs_hobj, rhs_hobj) == 0;
+}
+librados::ObjectCursor::ObjectCursor(const librados::ObjectCursor &rhs)
+{
+ *this = rhs;
+}
+
+librados::ObjectCursor librados::IoCtx::object_list_begin()
+{
+ hobject_t *h = new hobject_t(io_ctx_impl->objecter->enumerate_objects_begin());
+ ObjectCursor oc;
+ oc.set((rados_object_list_cursor)h);
+ return oc;
+}
+
+
+librados::ObjectCursor librados::IoCtx::object_list_end()
+{
+ hobject_t *h = new hobject_t(io_ctx_impl->objecter->enumerate_objects_end());
+ librados::ObjectCursor oc;
+ oc.set((rados_object_list_cursor)h);
+ return oc;
+}
+
+
+void librados::ObjectCursor::set(rados_object_list_cursor c)
+{
+ delete (hobject_t*)c_cursor;
+ c_cursor = c;
+}
+
+string librados::ObjectCursor::to_str() const
+{
+ stringstream ss;
+ ss << *(hobject_t *)c_cursor;
+ return ss.str();
+}
+
+bool librados::ObjectCursor::from_str(const string& s)
+{
+ if (s.empty()) {
+ *(hobject_t *)c_cursor = hobject_t();
+ return true;
+ }
+ return ((hobject_t *)c_cursor)->parse(s);
+}
+
+CEPH_RADOS_API std::ostream& librados::operator<<(std::ostream& os, const librados::ObjectCursor& oc)
+{
+ if (oc.c_cursor) {
+ os << *(hobject_t *)oc.c_cursor;
+ } else {
+ os << hobject_t();
+ }
+ return os;
+}
+
+bool librados::IoCtx::object_list_is_end(const ObjectCursor &oc)
+{
+ hobject_t *h = (hobject_t *)oc.c_cursor;
+ return h->is_max();
+}
+
+int librados::IoCtx::object_list(const ObjectCursor &start,
+ const ObjectCursor &finish,
+ const size_t result_item_count,
+ const bufferlist &filter,
+ std::vector<ObjectItem> *result,
+ ObjectCursor *next)
+{
+ ceph_assert(result != nullptr);
+ ceph_assert(next != nullptr);
+ result->clear();
+
+ ceph::async::waiter<boost::system::error_code,
+ std::vector<librados::ListObjectImpl>,
+ hobject_t> w;
+ io_ctx_impl->objecter->enumerate_objects<librados::ListObjectImpl>(
+ io_ctx_impl->poolid,
+ io_ctx_impl->oloc.nspace,
+ *((hobject_t*)start.c_cursor),
+ *((hobject_t*)finish.c_cursor),
+ result_item_count,
+ filter,
+ w);
+
+ auto [ec, obj_result, next_hash] = w.wait();
+ if (ec) {
+ next->set((rados_object_list_cursor)(new hobject_t(hobject_t::get_max())));
+ return ceph::from_error_code(ec);
+ }
+
+ next->set((rados_object_list_cursor)(new hobject_t(next_hash)));
+
+ for (auto i = obj_result.begin();
+ i != obj_result.end(); ++i) {
+ ObjectItem oi;
+ oi.oid = i->oid;
+ oi.nspace = i->nspace;
+ oi.locator = i->locator;
+ result->push_back(oi);
+ }
+
+ return obj_result.size();
+}
+
+void librados::IoCtx::object_list_slice(
+ const ObjectCursor start,
+ const ObjectCursor finish,
+ const size_t n,
+ const size_t m,
+ ObjectCursor *split_start,
+ ObjectCursor *split_finish)
+{
+ ceph_assert(split_start != nullptr);
+ ceph_assert(split_finish != nullptr);
+
+ io_ctx_impl->object_list_slice(
+ *((hobject_t*)(start.c_cursor)),
+ *((hobject_t*)(finish.c_cursor)),
+ n,
+ m,
+ (hobject_t*)(split_start->c_cursor),
+ (hobject_t*)(split_finish->c_cursor));
+}
+
+int librados::IoCtx::application_enable(const std::string& app_name,
+ bool force)
+{
+ return io_ctx_impl->application_enable(app_name, force);
+}
+
+int librados::IoCtx::application_enable_async(const std::string& app_name,
+ bool force,
+ PoolAsyncCompletion *c)
+{
+ io_ctx_impl->application_enable_async(app_name, force, c->pc);
+ return 0;
+}
+
+int librados::IoCtx::application_list(std::set<std::string> *app_names)
+{
+ return io_ctx_impl->application_list(app_names);
+}
+
+int librados::IoCtx::application_metadata_get(const std::string& app_name,
+ const std::string &key,
+ std::string* value)
+{
+ return io_ctx_impl->application_metadata_get(app_name, key, value);
+}
+
+int librados::IoCtx::application_metadata_set(const std::string& app_name,
+ const std::string &key,
+ const std::string& value)
+{
+ return io_ctx_impl->application_metadata_set(app_name, key, value);
+}
+
+int librados::IoCtx::application_metadata_remove(const std::string& app_name,
+ const std::string &key)
+{
+ return io_ctx_impl->application_metadata_remove(app_name, key);
+}
+
+int librados::IoCtx::application_metadata_list(const std::string& app_name,
+ std::map<std::string, std::string> *values)
+{
+ return io_ctx_impl->application_metadata_list(app_name, values);
+}
diff --git a/src/librados/librados_tp.cc b/src/librados/librados_tp.cc
new file mode 100644
index 000000000..b696de871
--- /dev/null
+++ b/src/librados/librados_tp.cc
@@ -0,0 +1,9 @@
+#include "acconfig.h"
+
+#ifdef WITH_LTTNG
+#define TRACEPOINT_DEFINE
+#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#include "tracing/librados.h"
+#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE
+#undef TRACEPOINT_DEFINE
+#endif
diff --git a/src/librados/librados_util.cc b/src/librados/librados_util.cc
new file mode 100644
index 000000000..72cd96947
--- /dev/null
+++ b/src/librados/librados_util.cc
@@ -0,0 +1,63 @@
+#include "librados_util.h"
+
+uint8_t get_checksum_op_type(rados_checksum_type_t type) {
+ switch (type) {
+ case LIBRADOS_CHECKSUM_TYPE_XXHASH32:
+ return CEPH_OSD_CHECKSUM_OP_TYPE_XXHASH32;
+ case LIBRADOS_CHECKSUM_TYPE_XXHASH64:
+ return CEPH_OSD_CHECKSUM_OP_TYPE_XXHASH64;
+ case LIBRADOS_CHECKSUM_TYPE_CRC32C:
+ return CEPH_OSD_CHECKSUM_OP_TYPE_CRC32C;
+ default:
+ return -1;
+ }
+}
+
+int get_op_flags(int flags)
+{
+ int rados_flags = 0;
+ if (flags & LIBRADOS_OP_FLAG_EXCL)
+ rados_flags |= CEPH_OSD_OP_FLAG_EXCL;
+ if (flags & LIBRADOS_OP_FLAG_FAILOK)
+ rados_flags |= CEPH_OSD_OP_FLAG_FAILOK;
+ if (flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM)
+ rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_RANDOM;
+ if (flags & LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL)
+ rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL;
+ if (flags & LIBRADOS_OP_FLAG_FADVISE_WILLNEED)
+ rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_WILLNEED;
+ if (flags & LIBRADOS_OP_FLAG_FADVISE_DONTNEED)
+ rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_DONTNEED;
+ if (flags & LIBRADOS_OP_FLAG_FADVISE_NOCACHE)
+ rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_NOCACHE;
+ return rados_flags;
+}
+
+int translate_flags(int flags)
+{
+ int op_flags = 0;
+ if (flags & librados::OPERATION_BALANCE_READS)
+ op_flags |= CEPH_OSD_FLAG_BALANCE_READS;
+ if (flags & librados::OPERATION_LOCALIZE_READS)
+ op_flags |= CEPH_OSD_FLAG_LOCALIZE_READS;
+ if (flags & librados::OPERATION_ORDER_READS_WRITES)
+ op_flags |= CEPH_OSD_FLAG_RWORDERED;
+ if (flags & librados::OPERATION_IGNORE_CACHE)
+ op_flags |= CEPH_OSD_FLAG_IGNORE_CACHE;
+ if (flags & librados::OPERATION_SKIPRWLOCKS)
+ op_flags |= CEPH_OSD_FLAG_SKIPRWLOCKS;
+ if (flags & librados::OPERATION_IGNORE_OVERLAY)
+ op_flags |= CEPH_OSD_FLAG_IGNORE_OVERLAY;
+ if (flags & librados::OPERATION_FULL_TRY)
+ op_flags |= CEPH_OSD_FLAG_FULL_TRY;
+ if (flags & librados::OPERATION_FULL_FORCE)
+ op_flags |= CEPH_OSD_FLAG_FULL_FORCE;
+ if (flags & librados::OPERATION_IGNORE_REDIRECT)
+ op_flags |= CEPH_OSD_FLAG_IGNORE_REDIRECT;
+ if (flags & librados::OPERATION_ORDERSNAP)
+ op_flags |= CEPH_OSD_FLAG_ORDERSNAP;
+ if (flags & librados::OPERATION_RETURNVEC)
+ op_flags |= CEPH_OSD_FLAG_RETURNVEC;
+
+ return op_flags;
+}
diff --git a/src/librados/librados_util.h b/src/librados/librados_util.h
new file mode 100644
index 000000000..ab9c461f4
--- /dev/null
+++ b/src/librados/librados_util.h
@@ -0,0 +1,34 @@
+#include <cstdint>
+#include "acconfig.h"
+#include "include/rados/librados.h"
+#include "IoCtxImpl.h"
+
+#ifdef WITH_LTTNG
+#include "tracing/librados.h"
+#else
+#define tracepoint(...)
+#endif
+
+uint8_t get_checksum_op_type(rados_checksum_type_t type);
+int get_op_flags(int flags);
+int translate_flags(int flags);
+
+struct librados::ObjListCtx {
+ librados::IoCtxImpl dupctx;
+ librados::IoCtxImpl *ctx;
+ Objecter::NListContext *nlc;
+ bool legacy_list_api;
+
+ ObjListCtx(IoCtxImpl *c, Objecter::NListContext *nl, bool legacy=false)
+ : nlc(nl),
+ legacy_list_api(legacy) {
+ // Get our own private IoCtxImpl so that namespace setting isn't
+ // changed by caller between uses.
+ ctx = &dupctx;
+ dupctx.dup(*c);
+ }
+ ~ObjListCtx() {
+ ctx = NULL;
+ delete nlc;
+ }
+};
diff --git a/src/librados/snap_set_diff.cc b/src/librados/snap_set_diff.cc
new file mode 100644
index 000000000..b42ad9bcd
--- /dev/null
+++ b/src/librados/snap_set_diff.cc
@@ -0,0 +1,116 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#include <vector>
+
+#include "snap_set_diff.h"
+#include "common/ceph_context.h"
+#include "include/rados/librados.hpp"
+#include "include/interval_set.h"
+#include "common/debug.h"
+
+#define dout_subsys ceph_subsys_rados
+
+/**
+ * calculate intervals/extents that vary between two snapshots
+ */
+void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set,
+ librados::snap_t start, librados::snap_t end,
+ interval_set<uint64_t> *diff, uint64_t *end_size,
+ bool *end_exists, librados::snap_t *clone_end_snap_id,
+ bool *whole_object)
+{
+ ldout(cct, 10) << "calc_snap_set_diff start " << start << " end " << end
+ << ", snap_set seq " << snap_set.seq << dendl;
+ bool saw_start = false;
+ uint64_t start_size = 0;
+ diff->clear();
+ *end_size = 0;
+ *end_exists = false;
+ *clone_end_snap_id = 0;
+ *whole_object = false;
+
+ for (vector<librados::clone_info_t>::const_iterator r = snap_set.clones.begin();
+ r != snap_set.clones.end();
+ ) {
+ // make an interval, and hide the fact that the HEAD doesn't
+ // include itself in the snaps list
+ librados::snap_t a, b;
+ if (r->cloneid == librados::SNAP_HEAD) {
+ // head is valid starting from right after the last seen seq
+ a = snap_set.seq + 1;
+ b = librados::SNAP_HEAD;
+ } else if (r->snaps.empty()) {
+ ldout(cct, 1) << "clone " << r->cloneid
+ << ": empty snaps, return whole object" << dendl;
+ diff->clear();
+ *whole_object = true;
+ return;
+ } else {
+ a = r->snaps[0];
+ // note: b might be < r->cloneid if a snap has been trimmed.
+ b = r->snaps[r->snaps.size()-1];
+ }
+ ldout(cct, 20) << " clone " << r->cloneid << " snaps " << r->snaps
+ << " -> [" << a << "," << b << "]"
+ << " size " << r->size << " overlap to next " << r->overlap << dendl;
+
+ if (b < start) {
+ // this is before start
+ ++r;
+ continue;
+ }
+
+ if (!saw_start) {
+ if (start < a) {
+ ldout(cct, 20) << " start, after " << start << dendl;
+ // this means the object didn't exist at start
+ if (r->size)
+ diff->insert(0, r->size);
+ start_size = 0;
+ } else {
+ ldout(cct, 20) << " start" << dendl;
+ start_size = r->size;
+ }
+ saw_start = true;
+ }
+
+ *end_size = r->size;
+ if (end < a) {
+ ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl;
+ *end_exists = false;
+ diff->clear();
+ if (start_size) {
+ diff->insert(0, start_size);
+ }
+ break;
+ }
+ if (end <= b) {
+ ldout(cct, 20) << " end" << dendl;
+ *end_exists = true;
+ *clone_end_snap_id = b;
+ break;
+ }
+
+ // start with the max(this size, next size), and subtract off any
+ // overlap
+ const vector<pair<uint64_t, uint64_t> > *overlap = &r->overlap;
+ interval_set<uint64_t> diff_to_next;
+ uint64_t max_size = r->size;
+ ++r;
+ if (r != snap_set.clones.end()) {
+ if (r->size > max_size)
+ max_size = r->size;
+ }
+ if (max_size)
+ diff_to_next.insert(0, max_size);
+ for (vector<pair<uint64_t, uint64_t> >::const_iterator p = overlap->begin();
+ p != overlap->end();
+ ++p) {
+ diff_to_next.erase(p->first, p->second);
+ }
+ ldout(cct, 20) << " diff_to_next " << diff_to_next << dendl;
+ diff->union_of(diff_to_next);
+ ldout(cct, 20) << " diff now " << *diff << dendl;
+ }
+}
diff --git a/src/librados/snap_set_diff.h b/src/librados/snap_set_diff.h
new file mode 100644
index 000000000..33deeb3ae
--- /dev/null
+++ b/src/librados/snap_set_diff.h
@@ -0,0 +1,18 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+
+#ifndef __CEPH_OSDC_SNAP_SET_DIFF_H
+#define __CEPH_OSDC_SNAP_SET_DIFF_H
+
+#include "include/common_fwd.h"
+#include "include/rados/rados_types.hpp"
+#include "include/interval_set.h"
+
+void calc_snap_set_diff(CephContext *cct,
+ const librados::snap_set_t& snap_set,
+ librados::snap_t start, librados::snap_t end,
+ interval_set<uint64_t> *diff, uint64_t *end_size,
+ bool *end_exists, librados::snap_t *clone_end_snap_id,
+ bool *whole_object);
+
+#endif
diff --git a/src/libradosstriper/CMakeLists.txt b/src/libradosstriper/CMakeLists.txt
new file mode 100644
index 000000000..a69192465
--- /dev/null
+++ b/src/libradosstriper/CMakeLists.txt
@@ -0,0 +1,17 @@
+set(libradosstriper_srcs
+ libradosstriper.cc
+ RadosStriperImpl.cc
+ MultiAioCompletionImpl.cc)
+add_library(radosstriper ${CEPH_SHARED}
+ ${libradosstriper_srcs})
+target_link_libraries(radosstriper
+ PRIVATE
+ librados
+ librados_impl cls_lock_client osdc ceph-common
+ pthread ${CRYPTO_LIBS} ${EXTRALIBS})
+set_target_properties(radosstriper PROPERTIES
+ OUPUT_NAME radosstriper
+ VERSION 1.0.0
+ SOVERSION 1)
+
+install(TARGETS radosstriper DESTINATION ${CMAKE_INSTALL_LIBDIR})
diff --git a/src/libradosstriper/MultiAioCompletionImpl.cc b/src/libradosstriper/MultiAioCompletionImpl.cc
new file mode 100644
index 000000000..acf9e0b6b
--- /dev/null
+++ b/src/libradosstriper/MultiAioCompletionImpl.cc
@@ -0,0 +1,60 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include "common/dout.h"
+
+#include "libradosstriper/MultiAioCompletionImpl.h"
+
+void libradosstriper::MultiAioCompletionImpl::complete_request(ssize_t r)
+{
+ lock.lock();
+ if (rval >= 0) {
+ if (r < 0 && r != -EEXIST)
+ rval = r;
+ else if (r > 0)
+ rval += r;
+ }
+ ceph_assert(pending_complete);
+ int count = --pending_complete;
+ if (!count && !building) {
+ complete();
+ }
+ put_unlock();
+}
+
+void libradosstriper::MultiAioCompletionImpl::safe_request(ssize_t r)
+{
+ lock.lock();
+ if (rval >= 0) {
+ if (r < 0 && r != -EEXIST)
+ rval = r;
+ }
+ ceph_assert(pending_safe);
+ int count = --pending_safe;
+ if (!count && !building) {
+ safe();
+ }
+ put_unlock();
+}
+
+void libradosstriper::MultiAioCompletionImpl::finish_adding_requests()
+{
+ std::scoped_lock l{lock};
+ ceph_assert(building);
+ building = false;
+ if (!pending_complete)
+ complete();
+ if (!pending_safe)
+ safe();
+}
diff --git a/src/libradosstriper/MultiAioCompletionImpl.h b/src/libradosstriper/MultiAioCompletionImpl.h
new file mode 100644
index 000000000..3ac3aae44
--- /dev/null
+++ b/src/libradosstriper/MultiAioCompletionImpl.h
@@ -0,0 +1,169 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIBRADOSSTRIPERSTRIPER_MULTIAIOCOMPLETIONIMPL_H
+#define CEPH_LIBRADOSSTRIPERSTRIPER_MULTIAIOCOMPLETIONIMPL_H
+
+#include <list>
+#include <mutex>
+#include "common/ceph_mutex.h"
+#include "include/radosstriper/libradosstriper.hpp"
+
+namespace libradosstriper {
+
+struct MultiAioCompletionImpl {
+
+ ceph::mutex lock = ceph::make_mutex("MultiAioCompletionImpl lock", false);
+ ceph::condition_variable cond;
+ int ref, rval;
+ int pending_complete, pending_safe;
+ rados_callback_t callback_complete, callback_safe;
+ void *callback_complete_arg, *callback_safe_arg;
+ bool building; ///< true if we are still building this completion
+ bufferlist bl; /// only used for read case in C api of rados striper
+ std::list<bufferlist*> bllist; /// keep temporary buffer lists used for destriping
+
+ MultiAioCompletionImpl()
+ : ref(1), rval(0),
+ pending_complete(0), pending_safe(0),
+ callback_complete(0), callback_safe(0),
+ callback_complete_arg(0), callback_safe_arg(0),
+ building(true) {};
+
+ ~MultiAioCompletionImpl() {
+ // deallocate temporary buffer lists
+ for (std::list<bufferlist*>::iterator it = bllist.begin();
+ it != bllist.end();
+ it++) {
+ delete *it;
+ }
+ bllist.clear();
+ }
+
+ int set_complete_callback(void *cb_arg, rados_callback_t cb) {
+ std::scoped_lock l{lock};
+ callback_complete = cb;
+ callback_complete_arg = cb_arg;
+ return 0;
+ }
+ int set_safe_callback(void *cb_arg, rados_callback_t cb) {
+ std::scoped_lock l{lock};
+ callback_safe = cb;
+ callback_safe_arg = cb_arg;
+ return 0;
+ }
+ int wait_for_complete() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return !pending_complete; });
+ return 0;
+ }
+ int wait_for_safe() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return !pending_safe; });
+ return 0;
+ }
+ bool is_complete() {
+ std::scoped_lock l{lock};
+ return pending_complete == 0;
+ }
+ bool is_safe() {
+ std::scoped_lock l{lock};
+ return pending_safe == 0;
+ }
+ void wait_for_complete_and_cb() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return !pending_complete && !callback_complete; });
+ }
+ void wait_for_safe_and_cb() {
+ std::unique_lock l{lock};
+ cond.wait(l, [this] { return !pending_safe && !callback_safe; });
+ }
+ bool is_complete_and_cb() {
+ std::scoped_lock l{lock};
+ return ((0 == pending_complete) && !callback_complete);
+ }
+ bool is_safe_and_cb() {
+ std::scoped_lock l{lock};
+ return ((0 == pending_safe) && !callback_safe);
+ }
+ int get_return_value() {
+ std::scoped_lock l{lock};
+ return rval;
+ }
+ void get() {
+ std::scoped_lock l{lock};
+ _get();
+ }
+ void _get() {
+ ceph_assert(ceph_mutex_is_locked(lock));
+ ceph_assert(ref > 0);
+ ++ref;
+ }
+ void put() {
+ lock.lock();
+ put_unlock();
+ }
+ void put_unlock() {
+ ceph_assert(ref > 0);
+ int n = --ref;
+ lock.unlock();
+ if (!n)
+ delete this;
+ }
+ void add_request() {
+ std::scoped_lock l{lock};
+ pending_complete++;
+ _get();
+ pending_safe++;
+ _get();
+ }
+ void add_safe_request() {
+ std::scoped_lock l{lock};
+ pending_complete++;
+ _get();
+ }
+ void complete() {
+ ceph_assert(ceph_mutex_is_locked(lock));
+ if (callback_complete) {
+ callback_complete(this, callback_complete_arg);
+ callback_complete = 0;
+ }
+ cond.notify_all();
+ }
+ void safe() {
+ ceph_assert(ceph_mutex_is_locked(lock));
+ if (callback_safe) {
+ callback_safe(this, callback_safe_arg);
+ callback_safe = 0;
+ }
+ cond.notify_all();
+ };
+
+ void complete_request(ssize_t r);
+ void safe_request(ssize_t r);
+ void finish_adding_requests();
+};
+
+inline void intrusive_ptr_add_ref(MultiAioCompletionImpl* ptr)
+{
+ ptr->get();
+}
+
+inline void intrusive_ptr_release(MultiAioCompletionImpl* ptr)
+{
+ ptr->put();
+}
+}
+
+#endif // CEPH_LIBRADOSSTRIPERSTRIPER_MULTIAIOCOMPLETIONIMPL_H
diff --git a/src/libradosstriper/RadosStriperImpl.cc b/src/libradosstriper/RadosStriperImpl.cc
new file mode 100644
index 000000000..60fafd463
--- /dev/null
+++ b/src/libradosstriper/RadosStriperImpl.cc
@@ -0,0 +1,1606 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <boost/algorithm/string/replace.hpp>
+
+#include "libradosstriper/RadosStriperImpl.h"
+
+#include <errno.h>
+
+#include <sstream>
+#include <iomanip>
+#include <algorithm>
+
+#include "include/types.h"
+#include "include/uuid.h"
+#include "include/ceph_fs.h"
+#include "common/dout.h"
+#include "common/strtol.h"
+#include "common/RefCountedObj.h"
+#include "osdc/Striper.h"
+#include "librados/AioCompletionImpl.h"
+#include <cls/lock/cls_lock_client.h>
+
+/*
+ * This file contents the actual implementation of the rados striped objects interface.
+ *
+ * Striped objects are stored in rados in a set of regular rados objects, after their
+ * content has been striped using the osdc/Striper interface.
+ *
+ * The external attributes of the striped object are mapped to the attributes of the
+ * first underlying object. This first object has a set of extra external attributes
+ * storing the layout of the striped object for future read back. These attributes are :
+ * - striper.layout.object_size : the size of rados objects used.
+ * Must be a multiple of striper.layout.stripe_unit
+ * - striper.layout.stripe_unit : the size of a stripe unit
+ * - striper.layout.stripe_count : the number of stripes used
+ * - striper.size : total striped object size
+ *
+ * In general operations on striped objects are not atomic.
+ * However, a certain number of safety guards have been put to make the interface closer
+ * to atomicity :
+ * - each data operation takes a shared lock on the first rados object for the
+ * whole time of the operation
+ * - the remove and trunc operations take an exclusive lock on the first rados object
+ * for the whole time of the operation
+ * This makes sure that no removal/truncation of a striped object occurs while
+ * data operations are happening and vice versa. It thus makes sure that the layout
+ * of a striped object does not change during data operation, which is essential for
+ * data consistency.
+ *
+ * Still the writing to a striped object is not atomic. This means in particular that
+ * the size of an object may not be in sync with its content at all times.
+ * As the size is always guaranteed to be updated first and in an atomic way, and as
+ * sparse striped objects are supported (see below), what will typically happen is
+ * that a reader that comes too soon after a write will read 0s instead of the actual
+ * data.
+ *
+ * Note that remove handles the pieces of the striped object in reverse order,
+ * so that the head object is removed last, making the completion of the deletion atomic.
+ *
+ * Striped objects can be sparse, typically in case data was written at the end of the
+ * striped object only. In such a case, some rados objects constituing the striped object
+ * may be missing. Other can be partial (only the beginning will have data)
+ * When dealing with such sparse striped files, missing objects are detected and
+ * considered as full of 0s. They are however not created until real data is written
+ * to them.
+ *
+ * There are a number of missing features/improvements that could be implemented.
+ * Here are some ideas :
+ * - implementation of missing entry points (compared to rados)
+ * In particular : clone_range, sparse_read, exec, aio_flush_async, tmaps, omaps, ...
+ *
+ */
+
+#define dout_subsys ceph_subsys_rados
+#undef dout_prefix
+#define dout_prefix *_dout << "libradosstriper: "
+
+/// size of xattr buffer
+#define XATTR_BUFFER_SIZE 32
+
+/// names of the different xattr entries
+#define XATTR_LAYOUT_STRIPE_UNIT "striper.layout.stripe_unit"
+#define XATTR_LAYOUT_STRIPE_COUNT "striper.layout.stripe_count"
+#define XATTR_LAYOUT_OBJECT_SIZE "striper.layout.object_size"
+#define XATTR_SIZE "striper.size"
+#define LOCK_PREFIX "lock."
+
+/// name of the lock used on objects to ensure layout stability during IO
+#define RADOS_LOCK_NAME "striper.lock"
+
+/// format of the extension of rados objects created for a given striped object
+#define RADOS_OBJECT_EXTENSION_FORMAT ".%016llx"
+
+/// default object layout
+struct ceph_file_layout default_file_layout = {
+ init_le32(1<<22), // fl_stripe_unit
+ init_le32(1), // fl_stripe_count
+ init_le32(1<<22), // fl_object_size
+ init_le32(0), // fl_cas_hash
+ init_le32(0), // fl_object_stripe_unit
+ init_le32(-1), // fl_unused
+ init_le32(-1), // fl_pg_pool
+};
+
+using libradosstriper::MultiAioCompletionImplPtr;
+
+namespace {
+
+///////////////////////// CompletionData /////////////////////////////
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous operations
+ */
+struct CompletionData : RefCountedObject {
+ /// complete method
+ void complete(int r);
+ /// striper to be used to handle the write completion
+ libradosstriper::RadosStriperImpl *m_striper;
+ /// striped object concerned by the write operation
+ std::string m_soid;
+ /// shared lock to be released at completion
+ std::string m_lockCookie;
+ /// completion handler
+ librados::IoCtxImpl::C_aio_Complete *m_ack;
+protected:
+ CompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion = 0);
+ ~CompletionData() override;
+
+};
+
+CompletionData::CompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion) :
+ RefCountedObject(striper->cct()),
+ m_striper(striper), m_soid(soid), m_lockCookie(lockCookie), m_ack(0) {
+ m_striper->get();
+ if (userCompletion) {
+ m_ack = new librados::IoCtxImpl::C_aio_Complete(userCompletion);
+ userCompletion->io = striper->m_ioCtxImpl;
+ }
+}
+
+CompletionData::~CompletionData() {
+ if (m_ack) delete m_ack;
+ m_striper->put();
+}
+
+void CompletionData::complete(int r) {
+ if (m_ack) m_ack->finish(r);
+}
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous read operations
+ */
+struct ReadCompletionData : CompletionData {
+ /// bufferlist containing final result
+ bufferlist* m_bl;
+ /// extents that will be read
+ std::vector<ObjectExtent>* m_extents;
+ /// intermediate results
+ std::vector<bufferlist>* m_resultbl;
+ /// return code of read completion, to be remembered until unlocking happened
+ int m_readRc;
+ /// completion object for the unlocking of the striped object at the end of the read
+ librados::AioCompletion *m_unlockCompletion;
+ /// complete method for when reading is over
+ void complete_read(int r);
+ /// complete method for when object is unlocked
+ void complete_unlock(int r);
+
+private:
+ FRIEND_MAKE_REF(ReadCompletionData);
+ ReadCompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ bufferlist* bl,
+ std::vector<ObjectExtent>* extents,
+ std::vector<bufferlist>* resultbl);
+ ~ReadCompletionData() override;
+};
+
+ReadCompletionData::ReadCompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ bufferlist* bl,
+ std::vector<ObjectExtent>* extents,
+ std::vector<bufferlist>* resultbl) :
+ CompletionData(striper, soid, lockCookie, userCompletion),
+ m_bl(bl), m_extents(extents), m_resultbl(resultbl), m_readRc(0),
+ m_unlockCompletion(0) {}
+
+ReadCompletionData::~ReadCompletionData() {
+ m_unlockCompletion->release();
+ delete m_extents;
+ delete m_resultbl;
+}
+
+void ReadCompletionData::complete_read(int r) {
+ // gather data into final buffer
+ Striper::StripedReadResult readResult;
+ vector<bufferlist>::iterator bit = m_resultbl->begin();
+ for (vector<ObjectExtent>::iterator eit = m_extents->begin();
+ eit != m_extents->end();
+ ++eit, ++bit) {
+ readResult.add_partial_result(m_striper->cct(), *bit, eit->buffer_extents);
+ }
+ m_bl->clear();
+ readResult.assemble_result(m_striper->cct(), *m_bl, true);
+ // Remember return code
+ m_readRc = r;
+}
+
+void ReadCompletionData::complete_unlock(int r) {
+ // call parent's completion method
+ // Note that we ignore the return code of the unlock as we cannot do much about it
+ CompletionData::complete(m_readRc?m_readRc:m_bl->length());
+}
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous write operations
+ */
+struct WriteCompletionData : CompletionData {
+ /// safe completion handler
+ librados::IoCtxImpl::C_aio_Complete *m_safe;
+ /// completion object for the unlocking of the striped object at the end of the write
+ librados::AioCompletion *m_unlockCompletion;
+ /// return code of write completion, to be remembered until unlocking happened
+ int m_writeRc;
+ /// complete method for when writing is over
+ void complete_write(int r);
+ /// complete method for when object is unlocked
+ void complete_unlock(int r);
+ /// safe method
+ void safe(int r);
+private:
+ FRIEND_MAKE_REF(WriteCompletionData);
+ /// constructor
+ WriteCompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion);
+ /// destructor
+ ~WriteCompletionData() override;
+};
+
+WriteCompletionData::WriteCompletionData
+(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion) :
+ CompletionData(striper, soid, lockCookie, userCompletion),
+ m_safe(0), m_unlockCompletion(0), m_writeRc(0) {
+ if (userCompletion) {
+ m_safe = new librados::IoCtxImpl::C_aio_Complete(userCompletion);
+ }
+}
+
+WriteCompletionData::~WriteCompletionData() {
+ m_unlockCompletion->release();
+ if (m_safe) delete m_safe;
+}
+
+void WriteCompletionData::complete_unlock(int r) {
+ // call parent's completion method
+ // Note that we ignore the return code of the unlock as we cannot do much about it
+ CompletionData::complete(m_writeRc);
+}
+
+void WriteCompletionData::complete_write(int r) {
+ // Remember return code
+ m_writeRc = r;
+}
+
+void WriteCompletionData::safe(int r) {
+ if (m_safe) m_safe->finish(r);
+}
+
+struct RemoveCompletionData : CompletionData {
+ /// removal flags
+ int flags;
+
+private:
+ FRIEND_MAKE_REF(RemoveCompletionData);
+ /**
+ * constructor
+ * note that the constructed object will take ownership of the lock
+ */
+ RemoveCompletionData(libradosstriper::RadosStriperImpl * striper,
+ const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletionImpl *userCompletion,
+ int flags = 0) :
+ CompletionData(striper, soid, lockCookie, userCompletion), flags(flags) {}
+};
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous truncate operations
+ */
+struct TruncateCompletionData : RefCountedObject {
+ /// striper to be used
+ libradosstriper::RadosStriperImpl *m_striper;
+ /// striped object concerned by the truncate operation
+ std::string m_soid;
+ /// the final size of the truncated object
+ uint64_t m_size;
+
+private:
+ FRIEND_MAKE_REF(TruncateCompletionData);
+ /// constructor
+ TruncateCompletionData(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ uint64_t size) :
+ RefCountedObject(striper->cct()),
+ m_striper(striper), m_soid(soid), m_size(size) {
+ m_striper->get();
+ }
+ /// destructor
+ ~TruncateCompletionData() override {
+ m_striper->put();
+ }
+};
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous read operations of a Rados File
+ */
+struct RadosReadCompletionData : RefCountedObject {
+ /// the multi asynch io completion object to be used
+ MultiAioCompletionImplPtr m_multiAioCompl;
+ /// the expected number of bytes
+ uint64_t m_expectedBytes;
+ /// the bufferlist object where data have been written
+ bufferlist *m_bl;
+
+private:
+ FRIEND_MAKE_REF(RadosReadCompletionData);
+ /// constructor
+ RadosReadCompletionData(MultiAioCompletionImplPtr multiAioCompl,
+ uint64_t expectedBytes,
+ bufferlist *bl,
+ CephContext *context) :
+ RefCountedObject(context),
+ m_multiAioCompl(multiAioCompl), m_expectedBytes(expectedBytes), m_bl(bl) {}
+};
+
+/**
+ * struct handling (most of) the data needed to pass to the call back
+ * function in asynchronous stat operations.
+ * Inherited by the actual type for adding time information in different
+ * versions (time_t or struct timespec)
+ */
+struct BasicStatCompletionData : CompletionData {
+ // MultiAioCompletionImpl used to handle the double aysnc
+ // call in the back (stat + getxattr)
+ libradosstriper::MultiAioCompletionImpl *m_multiCompletion;
+ // where to store the size of first objct
+ // this will be ignored but we need a place to store it when
+ // async stat is called
+ uint64_t m_objectSize;
+ // where to store the file size
+ uint64_t *m_psize;
+ /// the bufferlist object used for the getxattr call
+ bufferlist m_bl;
+ /// return code of the stat
+ int m_statRC;
+ /// return code of the getxattr
+ int m_getxattrRC;
+
+protected:
+ /// constructor
+ BasicStatCompletionData(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ librados::AioCompletionImpl *userCompletion,
+ libradosstriper::MultiAioCompletionImpl *multiCompletion,
+ uint64_t *psize) :
+ CompletionData(striper, soid, "", userCompletion),
+ m_multiCompletion(multiCompletion), m_psize(psize),
+ m_statRC(0), m_getxattrRC(0) {};
+
+};
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous stat operations.
+ * Simple templated extension of BasicStatCompletionData.
+ * The template parameter is the type of the time information
+ * (used with time_t for stat and struct timespec for stat2)
+ */
+template<class TimeType>
+struct StatCompletionData : BasicStatCompletionData {
+ // where to store the file time
+ TimeType *m_pmtime;
+private:
+ FRIEND_MAKE_REF(StatCompletionData);
+ /// constructor
+ StatCompletionData<TimeType>(libradosstriper::RadosStriperImpl* striper,
+ const std::string& soid,
+ librados::AioCompletionImpl *userCompletion,
+ libradosstriper::MultiAioCompletionImpl *multiCompletion,
+ uint64_t *psize,
+ TimeType *pmtime) :
+ BasicStatCompletionData(striper, soid, userCompletion, multiCompletion, psize),
+ m_pmtime(pmtime) {};
+};
+
+/**
+ * struct handling the data needed to pass to the call back
+ * function in asynchronous remove operations of a Rados File
+ */
+struct RadosRemoveCompletionData : RefCountedObject {
+ /// the multi asynch io completion object to be used
+ MultiAioCompletionImplPtr m_multiAioCompl;
+private:
+ FRIEND_MAKE_REF(RadosRemoveCompletionData);
+ /// constructor
+ RadosRemoveCompletionData(MultiAioCompletionImplPtr multiAioCompl,
+ CephContext *context) :
+ RefCountedObject(context),
+ m_multiAioCompl(multiAioCompl) {};
+};
+
+
+} // namespace {
+
+///////////////////////// constructor /////////////////////////////
+
+libradosstriper::RadosStriperImpl::RadosStriperImpl(librados::IoCtx& ioctx, librados::IoCtxImpl *ioctx_impl) :
+ m_refCnt(0), m_radosCluster(ioctx), m_ioCtx(ioctx), m_ioCtxImpl(ioctx_impl),
+ m_layout(default_file_layout) {}
+
+///////////////////////// layout /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::setObjectLayoutStripeUnit
+(unsigned int stripe_unit)
+{
+ /* stripe unit must be non-zero, 64k increment */
+ if (!stripe_unit || (stripe_unit & (CEPH_MIN_STRIPE_UNIT-1)))
+ return -EINVAL;
+ m_layout.fl_stripe_unit = stripe_unit;
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::setObjectLayoutStripeCount
+(unsigned int stripe_count)
+{
+ /* stripe count must be non-zero */
+ if (!stripe_count)
+ return -EINVAL;
+ m_layout.fl_stripe_count = stripe_count;
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::setObjectLayoutObjectSize
+(unsigned int object_size)
+{
+ /* object size must be non-zero, 64k increment */
+ if (!object_size || (object_size & (CEPH_MIN_STRIPE_UNIT-1)))
+ return -EINVAL;
+ /* object size must be a multiple of stripe unit */
+ if (object_size < m_layout.fl_stripe_unit ||
+ object_size % m_layout.fl_stripe_unit)
+ return -EINVAL;
+ m_layout.fl_object_size = object_size;
+ return 0;
+}
+
+///////////////////////// xattrs /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::getxattr(const object_t& soid,
+ const char *name,
+ bufferlist& bl)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ return m_ioCtx.getxattr(firstObjOid, name, bl);
+}
+
+int libradosstriper::RadosStriperImpl::setxattr(const object_t& soid,
+ const char *name,
+ bufferlist& bl)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ return m_ioCtx.setxattr(firstObjOid, name, bl);
+}
+
+int libradosstriper::RadosStriperImpl::getxattrs(const object_t& soid,
+ map<string, bufferlist>& attrset)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ int rc = m_ioCtx.getxattrs(firstObjOid, attrset);
+ if (rc) return rc;
+ // cleanup internal attributes dedicated to striping and locking
+ attrset.erase(XATTR_LAYOUT_STRIPE_UNIT);
+ attrset.erase(XATTR_LAYOUT_STRIPE_COUNT);
+ attrset.erase(XATTR_LAYOUT_OBJECT_SIZE);
+ attrset.erase(XATTR_SIZE);
+ attrset.erase(std::string(LOCK_PREFIX) + RADOS_LOCK_NAME);
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::rmxattr(const object_t& soid,
+ const char *name)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ return m_ioCtx.rmxattr(firstObjOid, name);
+}
+
+///////////////////////// io /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::write(const std::string& soid,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off)
+{
+ // open the object. This will create it if needed, retrieve its layout
+ // and size and take a shared lock on it
+ ceph_file_layout layout;
+ std::string lockCookie;
+ int rc = createAndOpenStripedObject(soid, &layout, len+off, &lockCookie, true);
+ if (rc) return rc;
+ return write_in_open_object(soid, layout, lockCookie, bl, len, off);
+}
+
+int libradosstriper::RadosStriperImpl::append(const std::string& soid,
+ const bufferlist& bl,
+ size_t len)
+{
+ // open the object. This will create it if needed, retrieve its layout
+ // and size and take a shared lock on it
+ ceph_file_layout layout;
+ uint64_t size = len;
+ std::string lockCookie;
+ int rc = openStripedObjectForWrite(soid, &layout, &size, &lockCookie, false);
+ if (rc) return rc;
+ return write_in_open_object(soid, layout, lockCookie, bl, len, size);
+}
+
+int libradosstriper::RadosStriperImpl::write_full(const std::string& soid,
+ const bufferlist& bl)
+{
+ int rc = trunc(soid, 0);
+ if (rc && rc != -ENOENT) return rc; // ENOENT is obviously ok
+ return write(soid, bl, bl.length(), 0);
+}
+
+int libradosstriper::RadosStriperImpl::read(const std::string& soid,
+ bufferlist* bl,
+ size_t len,
+ uint64_t off)
+{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous method
+ int rc = aio_read(soid, &c, bl, len, off);
+ // and wait for completion
+ if (!rc) {
+ // wait for completion
+ c.wait_for_complete_and_cb();
+ // return result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+///////////////////////// asynchronous io /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::aio_write(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off)
+{
+ ceph_file_layout layout;
+ std::string lockCookie;
+ int rc = createAndOpenStripedObject(soid, &layout, len+off, &lockCookie, true);
+ if (rc) return rc;
+ return aio_write_in_open_object(soid, c, layout, lockCookie, bl, len, off);
+}
+
+int libradosstriper::RadosStriperImpl::aio_append(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const bufferlist& bl,
+ size_t len)
+{
+ ceph_file_layout layout;
+ uint64_t size = len;
+ std::string lockCookie;
+ int rc = openStripedObjectForWrite(soid, &layout, &size, &lockCookie, false);
+ if (rc) return rc;
+ // create a completion object
+ return aio_write_in_open_object(soid, c, layout, lockCookie, bl, len, size);
+}
+
+int libradosstriper::RadosStriperImpl::aio_write_full(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const bufferlist& bl)
+{
+ int rc = trunc(soid, 0);
+ if (rc) return rc;
+ return aio_write(soid, c, bl, bl.length(), 0);
+}
+
+static void rados_read_aio_unlock_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<ReadCompletionData>(static_cast<ReadCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->complete_unlock(comp->rval);
+}
+
+static void striper_read_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = static_cast<ReadCompletionData*>(arg);
+ // launch the async unlocking of the object
+ cdata->m_striper->aio_unlockObject(cdata->m_soid, cdata->m_lockCookie, cdata->m_unlockCompletion);
+ // complete the read part in parallel
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->complete_read(comp->rval);
+}
+
+static void rados_req_read_complete(rados_completion_t c, void *arg)
+{
+ auto data = static_cast<RadosReadCompletionData*>(arg);
+ int rc = rados_aio_get_return_value(c);
+ // We need to handle the case of sparse files here
+ if (rc == -ENOENT) {
+ // the object did not exist at all. This can happen for sparse files.
+ // we consider we've read 0 bytes and it will fall into next case
+ rc = 0;
+ }
+ ssize_t nread = rc;
+ if (rc >= 0 && (((uint64_t)rc) < data->m_expectedBytes)) {
+ // only partial data were present in the object (or the object did not
+ // even exist if we've gone through previous case).
+ // This is typical of sparse file and we need to complete with 0s.
+ unsigned int lenOfZeros = data->m_expectedBytes-rc;
+ unsigned int existingDataToZero = min(data->m_bl->length()-rc, lenOfZeros);
+ if (existingDataToZero > 0) {
+ data->m_bl->zero(rc, existingDataToZero);
+ }
+ if (lenOfZeros > existingDataToZero) {
+ ceph::bufferptr zeros(ceph::buffer::create(lenOfZeros-existingDataToZero));
+ zeros.zero();
+ data->m_bl->push_back(zeros);
+ }
+ nread = data->m_expectedBytes;
+ }
+ auto multi_aio_comp = data->m_multiAioCompl;
+ multi_aio_comp->complete_request(nread);
+ multi_aio_comp->safe_request(rc);
+}
+
+int libradosstriper::RadosStriperImpl::aio_read(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ bufferlist* bl,
+ size_t len,
+ uint64_t off)
+{
+ // open the object. This will retrieve its layout and size
+ // and take a shared lock on it
+ ceph_file_layout layout;
+ uint64_t size;
+ std::string lockCookie;
+ int rc = openStripedObjectForRead(soid, &layout, &size, &lockCookie);
+ if (rc) return rc;
+ // find out the actual number of bytes we can read
+ uint64_t read_len;
+ if (off >= size) {
+ // nothing to read ! We are done.
+ read_len = 0;
+ } else {
+ read_len = min(len, (size_t)(size-off));
+ }
+ // get list of extents to be read from
+ vector<ObjectExtent> *extents = new vector<ObjectExtent>();
+ if (read_len > 0) {
+ std::string format = soid;
+ boost::replace_all(format, "%", "%%");
+ format += RADOS_OBJECT_EXTENSION_FORMAT;
+ file_layout_t l;
+ l.from_legacy(layout);
+ Striper::file_to_extents(cct(), format.c_str(), &l, off, read_len,
+ 0, *extents);
+ }
+
+ // create a completion object and transfer ownership of extents and resultbl
+ vector<bufferlist> *resultbl = new vector<bufferlist>(extents->size());
+ auto cdata = ceph::make_ref<ReadCompletionData>(this, soid, lockCookie, c, bl, extents, resultbl);
+ c->is_read = true;
+ c->io = m_ioCtxImpl;
+ // create a completion for the unlocking of the striped object at the end of the read
+ librados::AioCompletion *unlock_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, rados_read_aio_unlock_complete);
+ cdata->m_unlockCompletion = unlock_completion;
+ // create the multiCompletion object handling the reads
+ MultiAioCompletionImplPtr nc{new libradosstriper::MultiAioCompletionImpl,
+ false};
+ nc->set_complete_callback(cdata.get(), striper_read_aio_req_complete);
+ // go through the extents
+ int r = 0, i = 0;
+ for (vector<ObjectExtent>::iterator p = extents->begin(); p != extents->end(); ++p) {
+ // create a buffer list describing where to place data read from current extend
+ bufferlist *oid_bl = &((*resultbl)[i++]);
+ for (vector<pair<uint64_t,uint64_t> >::iterator q = p->buffer_extents.begin();
+ q != p->buffer_extents.end();
+ ++q) {
+ bufferlist buffer_bl;
+ buffer_bl.substr_of(*bl, q->first, q->second);
+ oid_bl->append(buffer_bl);
+ }
+ // read all extends of a given object in one go
+ nc->add_request();
+ // we need 2 references on data as both rados_req_read_safe and rados_req_read_complete
+ // will release one
+ auto data = ceph::make_ref<RadosReadCompletionData>(nc, p->length, oid_bl, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data.detach(), rados_req_read_complete);
+ r = m_ioCtx.aio_read(p->oid.name, rados_completion, oid_bl, p->length, p->offset);
+ rados_completion->release();
+ if (r < 0)
+ break;
+ }
+ nc->finish_adding_requests();
+ return r;
+}
+
+int libradosstriper::RadosStriperImpl::aio_read(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ char* buf,
+ size_t len,
+ uint64_t off)
+{
+ // create a buffer list and store it inside the completion object
+ c->bl.clear();
+ c->bl.push_back(buffer::create_static(len, buf));
+ // call the bufferlist version of this method
+ return aio_read(soid, c, &c->bl, len, off);
+}
+
+int libradosstriper::RadosStriperImpl::aio_flush()
+{
+ int ret;
+ // pass to the rados level
+ ret = m_ioCtx.aio_flush();
+ if (ret < 0)
+ return ret;
+ //wait all CompletionData are released
+ std::unique_lock l{lock};
+ cond.wait(l, [this] {return m_refCnt <= 1;});
+ return ret;
+}
+
+///////////////////////// stat and deletion /////////////////////////////
+
+int libradosstriper::RadosStriperImpl::stat(const std::string& soid, uint64_t *psize, time_t *pmtime)
+{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous version of stat
+ int rc = aio_stat(soid, &c, psize, pmtime);
+ if (rc == 0) {
+ // wait for completion of the remove
+ c.wait_for_complete();
+ // get result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+static void striper_stat_aio_stat_complete(rados_completion_t c, void *arg) {
+ auto data = ceph::ref_t<BasicStatCompletionData>(static_cast<BasicStatCompletionData*>(arg), false);
+ int rc = rados_aio_get_return_value(c);
+ if (rc == -ENOENT) {
+ // remember this has failed
+ data->m_statRC = rc;
+ }
+ data->m_multiCompletion->complete_request(rc);
+}
+
+static void striper_stat_aio_getxattr_complete(rados_completion_t c, void *arg) {
+ auto data = ceph::ref_t<BasicStatCompletionData>(static_cast<BasicStatCompletionData*>(arg), false);
+ int rc = rados_aio_get_return_value(c);
+ // We need to handle the case of sparse files here
+ if (rc < 0) {
+ // remember this has failed
+ data->m_getxattrRC = rc;
+ } else {
+ // this intermediate string allows to add a null terminator before calling strtol
+ std::string err;
+ std::string strsize(data->m_bl.c_str(), data->m_bl.length());
+ *data->m_psize = strict_strtoll(strsize.c_str(), 10, &err);
+ if (!err.empty()) {
+ lderr(data->m_striper->cct()) << XATTR_SIZE << " : " << err << dendl;
+ data->m_getxattrRC = -EINVAL;
+ }
+ rc = 0;
+ }
+ data->m_multiCompletion->complete_request(rc);
+}
+
+static void striper_stat_aio_req_complete(rados_striper_multi_completion_t c,
+ void *arg) {
+ auto data = ceph::ref_t<BasicStatCompletionData>(static_cast<BasicStatCompletionData*>(arg), false);
+ if (data->m_statRC) {
+ data->complete(data->m_statRC);
+ } else {
+ if (data->m_getxattrRC < 0) {
+ data->complete(data->m_getxattrRC);
+ } else {
+ data->complete(0);
+ }
+ }
+}
+
+template<class TimeType>
+int libradosstriper::RadosStriperImpl::aio_generic_stat
+(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ uint64_t *psize,
+ TimeType *pmtime,
+ typename libradosstriper::RadosStriperImpl::StatFunction<TimeType>::Type statFunction)
+{
+ // use a MultiAioCompletion object for dealing with the fact
+ // that we'll do 2 asynchronous calls in parallel
+ MultiAioCompletionImplPtr multi_completion{
+ new libradosstriper::MultiAioCompletionImpl, false};
+ // Data object used for passing context to asynchronous calls
+ std::string firstObjOid = getObjectId(soid, 0);
+ auto cdata = ceph::make_ref<StatCompletionData<TimeType>>(this, firstObjOid, c, multi_completion.get(), psize, pmtime);
+ multi_completion->set_complete_callback(cdata->get() /* create ref! */, striper_stat_aio_req_complete);
+ // use a regular AioCompletion for the stat async call
+ librados::AioCompletion *stat_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, striper_stat_aio_stat_complete);
+ multi_completion->add_safe_request();
+ object_t obj(firstObjOid);
+ int rc = (m_ioCtxImpl->*statFunction)(obj, stat_completion->pc,
+ &cdata->m_objectSize, cdata->m_pmtime);
+ stat_completion->release();
+ if (rc < 0) {
+ // nothing is really started so cancel everything
+ delete cdata.detach();
+ return rc;
+ }
+ // use a regular AioCompletion for the getxattr async call
+ librados::AioCompletion *getxattr_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, striper_stat_aio_getxattr_complete);
+ multi_completion->add_safe_request();
+ // in parallel, get the pmsize from the first object asynchronously
+ rc = m_ioCtxImpl->aio_getxattr(obj, getxattr_completion->pc,
+ XATTR_SIZE, cdata->m_bl);
+ getxattr_completion->release();
+ multi_completion->finish_adding_requests();
+ if (rc < 0) {
+ // the async stat is ongoing, so we need to go on
+ // we mark the getxattr as failed in the data object
+ cdata->m_getxattrRC = rc;
+ multi_completion->complete_request(rc);
+ return rc;
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::aio_stat(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ uint64_t *psize,
+ time_t *pmtime)
+{
+ return aio_generic_stat<time_t>(soid, c, psize, pmtime, &librados::IoCtxImpl::aio_stat);
+}
+
+int libradosstriper::RadosStriperImpl::stat2(const std::string& soid, uint64_t *psize, struct timespec *pts)
+{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous version of stat
+ int rc = aio_stat2(soid, &c, psize, pts);
+ if (rc == 0) {
+ // wait for completion of the remove
+ c.wait_for_complete_and_cb();
+ // get result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_stat2(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ uint64_t *psize,
+ struct timespec *pts)
+{
+ return aio_generic_stat<struct timespec>(soid, c, psize, pts, &librados::IoCtxImpl::aio_stat2);
+}
+
+static void rados_req_remove_complete(rados_completion_t c, void *arg)
+{
+ auto cdata = static_cast<RadosRemoveCompletionData*>(arg);
+ int rc = rados_aio_get_return_value(c);
+ // in case the object did not exist, it means we had a sparse file, all is fine
+ if (rc == -ENOENT) {
+ rc = 0;
+ }
+ cdata->m_multiAioCompl->complete_request(rc);
+ cdata->m_multiAioCompl->safe_request(rc);
+}
+
+static void striper_remove_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<RemoveCompletionData>(static_cast<RemoveCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ ldout(cdata->m_striper->cct(), 10)
+ << "RadosStriperImpl : striper_remove_aio_req_complete called for "
+ << cdata->m_soid << dendl;
+ int rc = comp->rval;
+ if (rc == 0) {
+ // All went fine, synchronously remove first object
+ rc = cdata->m_striper->m_ioCtx.remove(cdata->m_striper->getObjectId(cdata->m_soid, 0),
+ cdata->flags);
+ } else {
+ lderr(cdata->m_striper->cct())
+ << "RadosStriperImpl : deletion/truncation incomplete for " << cdata->m_soid
+ << ", as errors were encountered. The file is left present but it's content "
+ << " has been partially removed"
+ << dendl;
+ }
+ cdata->complete(rc);
+}
+
+int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags)
+{
+ // create a completion object
+ librados::AioCompletionImpl c;
+ // call asynchronous version of remove
+ int rc = aio_remove(soid, &c, flags);
+ if (rc == 0) {
+ // wait for completion of the remove
+ c.wait_for_complete_and_cb();
+ // get result
+ rc = c.get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_remove(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ int flags)
+{
+ // the RemoveCompletionData object will lock the given soid for the duration
+ // of the removal
+ std::string lockCookie = getUUID();
+ int rc = m_ioCtx.lock_exclusive(getObjectId(soid, 0), RADOS_LOCK_NAME, lockCookie, "", 0, 0);
+ if (rc) return rc;
+ // create CompletionData for the async remove call
+ auto cdata = ceph::make_ref<RemoveCompletionData>(this, soid, lockCookie, c, flags);
+ MultiAioCompletionImplPtr multi_completion{
+ new libradosstriper::MultiAioCompletionImpl, false};
+ multi_completion->set_complete_callback(cdata->get() /* create ref! */, striper_remove_aio_req_complete);
+ // call asynchronous internal version of remove
+ ldout(cct(), 10)
+ << "RadosStriperImpl : Aio_remove starting for "
+ << soid << dendl;
+ rc = internal_aio_remove(soid, multi_completion);
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::internal_aio_remove(
+ const std::string& soid,
+ MultiAioCompletionImplPtr multi_completion,
+ int flags)
+{
+ std::string firstObjOid = getObjectId(soid, 0);
+ try {
+ // check size and get number of rados objects to delete
+ uint64_t nb_objects = 0;
+ bufferlist bl2;
+ int rc = getxattr(soid, XATTR_SIZE, bl2);
+ if (rc < 0) {
+ // no object size (or not able to get it)
+ // try to find the number of object "by hand"
+ uint64_t psize;
+ time_t pmtime;
+ while (!m_ioCtx.stat(getObjectId(soid, nb_objects), &psize, &pmtime)) {
+ nb_objects++;
+ }
+ } else {
+ // count total number of rados objects in the striped object
+ std::string err;
+ // this intermediate string allows to add a null terminator before calling strtol
+ std::string strsize(bl2.c_str(), bl2.length());
+ uint64_t size = strict_strtoll(strsize.c_str(), 10, &err);
+ if (!err.empty()) {
+ lderr(cct()) << XATTR_SIZE << " : " << err << dendl;
+
+ return -EINVAL;
+ }
+ uint64_t object_size = m_layout.fl_object_size;
+ uint64_t su = m_layout.fl_stripe_unit;
+ uint64_t stripe_count = m_layout.fl_stripe_count;
+ uint64_t nb_complete_sets = size / (object_size*stripe_count);
+ uint64_t remaining_data = size % (object_size*stripe_count);
+ uint64_t remaining_stripe_units = (remaining_data + su -1) / su;
+ uint64_t remaining_objects = std::min(remaining_stripe_units, stripe_count);
+ nb_objects = nb_complete_sets * stripe_count + remaining_objects;
+ }
+ // delete rados objects in reverse order
+ // Note that we do not drop the first object. This one will only be dropped
+ // if all other removals have been successful, and this is done in the
+ // callback of the multi_completion object
+ int rcr = 0;
+ for (int i = nb_objects-1; i >= 1; i--) {
+ multi_completion->add_request();
+ auto data = ceph::make_ref<RadosRemoveCompletionData>(multi_completion, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data->get() /* create ref! */,
+ rados_req_remove_complete);
+ if (flags == 0) {
+ rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion);
+ } else {
+ rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion, flags);
+ }
+ rados_completion->release();
+ if (rcr < 0 and -ENOENT != rcr) {
+ lderr(cct()) << "RadosStriperImpl::remove : deletion incomplete for " << soid
+ << ", as " << getObjectId(soid, i) << " could not be deleted (rc=" << rc << ")"
+ << dendl;
+ break;
+ }
+ }
+ // we are over adding requests to the multi_completion object
+ multi_completion->finish_adding_requests();
+ // return
+ return rcr;
+ } catch (ErrorCode &e) {
+ // error caught when trying to take the exclusive lock
+ return e.m_code;
+ }
+
+}
+
+int libradosstriper::RadosStriperImpl::trunc(const std::string& soid, uint64_t size)
+{
+ // lock the object in exclusive mode
+ std::string firstObjOid = getObjectId(soid, 0);
+ librados::ObjectWriteOperation op;
+ op.assert_exists();
+ std::string lockCookie = RadosStriperImpl::getUUID();
+ utime_t dur = utime_t();
+ rados::cls::lock::lock(&op, RADOS_LOCK_NAME, ClsLockType::EXCLUSIVE, lockCookie, "", "", dur, 0);
+ int rc = m_ioCtx.operate(firstObjOid, &op);
+ if (rc) return rc;
+ // load layout and size
+ ceph_file_layout layout;
+ uint64_t original_size;
+ rc = internal_get_layout_and_size(firstObjOid, &layout, &original_size);
+ if (!rc) {
+ if (size < original_size) {
+ rc = truncate(soid, original_size, size, layout);
+ } else if (size > original_size) {
+ rc = grow(soid, original_size, size, layout);
+ }
+ }
+ // unlock object, ignore return code as we cannot do much
+ m_ioCtx.unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie);
+ // final return
+ return rc;
+}
+
+
+///////////////////////// private helpers /////////////////////////////
+
+std::string libradosstriper::RadosStriperImpl::getObjectId(const object_t& soid,
+ long long unsigned objectno)
+{
+ std::ostringstream s;
+ s << soid << '.' << std::setfill ('0') << std::setw(16) << std::hex << objectno;
+ return s.str();
+}
+
+void libradosstriper::RadosStriperImpl::unlockObject(const std::string& soid,
+ const std::string& lockCookie)
+{
+ // unlock the shared lock on the first rados object
+ std::string firstObjOid = getObjectId(soid, 0);
+ m_ioCtx.unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie);
+}
+
+void libradosstriper::RadosStriperImpl::aio_unlockObject(const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletion *c)
+{
+ // unlock the shared lock on the first rados object
+ std::string firstObjOid = getObjectId(soid, 0);
+ m_ioCtx.aio_unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie, c);
+}
+
+static void rados_write_aio_unlock_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<WriteCompletionData>(static_cast<WriteCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->complete_unlock(comp->rval);
+}
+
+static void striper_write_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<WriteCompletionData>(static_cast<WriteCompletionData*>(arg), false);
+ // launch the async unlocking of the object
+ cdata->m_striper->aio_unlockObject(cdata->m_soid, cdata->m_lockCookie, cdata->m_unlockCompletion);
+ // complete the write part in parallel
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->complete_write(comp->rval);
+}
+
+static void striper_write_aio_req_safe(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<WriteCompletionData>(static_cast<WriteCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ cdata->safe(comp->rval);
+}
+
+int libradosstriper::RadosStriperImpl::write_in_open_object(const std::string& soid,
+ const ceph_file_layout& layout,
+ const std::string& lockCookie,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off) {
+ // create a completion object to be passed to the callbacks of the multicompletion
+ // we need 3 references as striper_write_aio_req_complete will release two and
+ // striper_write_aio_req_safe will release one
+ auto cdata = ceph::make_ref<WriteCompletionData>(this, soid, lockCookie, nullptr);
+ // create a completion object for the unlocking of the striped object at the end of the write
+ librados::AioCompletion *unlock_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, rados_write_aio_unlock_complete);
+ cdata->m_unlockCompletion = unlock_completion;
+ // create the multicompletion that will handle the write completion
+ MultiAioCompletionImplPtr c{new libradosstriper::MultiAioCompletionImpl,
+ false};
+ c->set_complete_callback(cdata->get() /* create ref! */, striper_write_aio_req_complete);
+ c->set_safe_callback(cdata->get() /* create ref! */, striper_write_aio_req_safe);
+ // call the asynchronous API
+ int rc = internal_aio_write(soid, c, bl, len, off, layout);
+ if (!rc) {
+ // wait for completion and safety of data
+ c->wait_for_complete_and_cb();
+ c->wait_for_safe_and_cb();
+ // wait for the unlocking
+ unlock_completion->wait_for_complete();
+ // return result
+ rc = c->get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_write_in_open_object(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const ceph_file_layout& layout,
+ const std::string& lockCookie,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off) {
+ // create a completion object to be passed to the callbacks of the multicompletion
+ // we need 3 references as striper_write_aio_req_complete will release two and
+ // striper_write_aio_req_safe will release one
+ auto cdata = ceph::make_ref<WriteCompletionData>(this, soid, lockCookie, c);
+ m_ioCtxImpl->get();
+ c->io = m_ioCtxImpl;
+ // create a completion object for the unlocking of the striped object at the end of the write
+ librados::AioCompletion *unlock_completion =
+ librados::Rados::aio_create_completion(cdata->get() /* create ref! */, rados_write_aio_unlock_complete);
+ cdata->m_unlockCompletion = unlock_completion;
+ // create the multicompletion that will handle the write completion
+ libradosstriper::MultiAioCompletionImplPtr nc{
+ new libradosstriper::MultiAioCompletionImpl, false};
+ nc->set_complete_callback(cdata->get() /* create ref! */, striper_write_aio_req_complete);
+ nc->set_safe_callback(cdata->get() /* create ref! */, striper_write_aio_req_safe);
+ // internal asynchronous API
+ int rc = internal_aio_write(soid, nc, bl, len, off, layout);
+ return rc;
+}
+
+static void rados_req_write_complete(rados_completion_t c, void *arg)
+{
+ auto comp = reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(arg);
+ comp->complete_request(rados_aio_get_return_value(c));
+ comp->safe_request(rados_aio_get_return_value(c));
+}
+
+int
+libradosstriper::RadosStriperImpl::internal_aio_write(const std::string& soid,
+ libradosstriper::MultiAioCompletionImplPtr c,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off,
+ const ceph_file_layout& layout)
+{
+ int r = 0;
+ // Do not try anything if we are called with empty buffer,
+ // file_to_extents would raise an exception
+ if (len > 0) {
+ // get list of extents to be written to
+ vector<ObjectExtent> extents;
+ std::string format = soid;
+ boost::replace_all(format, "%", "%%");
+ format += RADOS_OBJECT_EXTENSION_FORMAT;
+ file_layout_t l;
+ l.from_legacy(layout);
+ Striper::file_to_extents(cct(), format.c_str(), &l, off, len, 0, extents);
+ // go through the extents
+ for (vector<ObjectExtent>::iterator p = extents.begin(); p != extents.end(); ++p) {
+ // assemble pieces of a given object into a single buffer list
+ bufferlist oid_bl;
+ for (vector<pair<uint64_t,uint64_t> >::iterator q = p->buffer_extents.begin();
+ q != p->buffer_extents.end();
+ ++q) {
+ bufferlist buffer_bl;
+ buffer_bl.substr_of(bl, q->first, q->second);
+ oid_bl.append(buffer_bl);
+ }
+ // and write the object
+ c->add_request();
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(c.get(),
+ rados_req_write_complete);
+ r = m_ioCtx.aio_write(p->oid.name, rados_completion, oid_bl,
+ p->length, p->offset);
+ rados_completion->release();
+ if (r < 0)
+ break;
+ }
+ }
+ c->finish_adding_requests();
+ return r;
+}
+
+int libradosstriper::RadosStriperImpl::extract_uint32_attr
+(std::map<std::string, bufferlist> &attrs,
+ const std::string& key,
+ ceph_le32 *value)
+{
+ std::map<std::string, bufferlist>::iterator attrsIt = attrs.find(key);
+ if (attrsIt != attrs.end()) {
+ // this intermediate string allows to add a null terminator before calling strtol
+ std::string strvalue(attrsIt->second.c_str(), attrsIt->second.length());
+ std::string err;
+ *value = strict_strtol(strvalue.c_str(), 10, &err);
+ if (!err.empty()) {
+ lderr(cct()) << key << " : " << err << dendl;
+ return -EINVAL;
+ }
+ } else {
+ return -ENOENT;
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::extract_sizet_attr
+(std::map<std::string, bufferlist> &attrs,
+ const std::string& key,
+ size_t *value)
+{
+ std::map<std::string, bufferlist>::iterator attrsIt = attrs.find(key);
+ if (attrsIt != attrs.end()) {
+ // this intermediate string allows to add a null terminator before calling strtol
+ std::string strvalue(attrsIt->second.c_str(), attrsIt->second.length());
+ std::string err;
+ *value = strict_strtoll(strvalue.c_str(), 10, &err);
+ if (!err.empty()) {
+ lderr(cct()) << key << " : " << err << dendl;
+ return -EINVAL;
+ }
+ } else {
+ return -ENOENT;
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::internal_get_layout_and_size(
+ const std::string& oid,
+ ceph_file_layout *layout,
+ uint64_t *size)
+{
+ // get external attributes of the first rados object
+ std::map<std::string, bufferlist> attrs;
+ int rc = m_ioCtx.getxattrs(oid, attrs);
+ if (rc) return rc;
+ // deal with stripe_unit
+ rc = extract_uint32_attr(attrs, XATTR_LAYOUT_STRIPE_UNIT, &layout->fl_stripe_unit);
+ if (rc) return rc;
+ // deal with stripe_count
+ rc = extract_uint32_attr(attrs, XATTR_LAYOUT_STRIPE_COUNT, &layout->fl_stripe_count);
+ if (rc) return rc;
+ // deal with object_size
+ rc = extract_uint32_attr(attrs, XATTR_LAYOUT_OBJECT_SIZE, &layout->fl_object_size);
+ if (rc) return rc;
+ // deal with size
+ size_t ssize;
+ rc = extract_sizet_attr(attrs, XATTR_SIZE, &ssize);
+ if (rc) {
+ return rc;
+ }
+ *size = ssize;
+ // make valgrind happy by setting unused fl_pg_pool
+ layout->fl_pg_pool = 0;
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::openStripedObjectForRead(
+ const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t *size,
+ std::string *lockCookie)
+{
+ // take a lock the first rados object, if it exists and gets its size
+ // check, lock and size reading must be atomic and are thus done within a single operation
+ librados::ObjectWriteOperation op;
+ op.assert_exists();
+ *lockCookie = getUUID();
+ utime_t dur = utime_t();
+ rados::cls::lock::lock(&op, RADOS_LOCK_NAME, ClsLockType::SHARED, *lockCookie, "Tag", "", dur, 0);
+ std::string firstObjOid = getObjectId(soid, 0);
+ int rc = m_ioCtx.operate(firstObjOid, &op);
+ if (rc) {
+ // error case (including -ENOENT)
+ return rc;
+ }
+ rc = internal_get_layout_and_size(firstObjOid, layout, size);
+ if (rc) {
+ unlockObject(soid, *lockCookie);
+ lderr(cct()) << "RadosStriperImpl::openStripedObjectForRead : "
+ << "could not load layout and size for "
+ << soid << " : rc = " << rc << dendl;
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::openStripedObjectForWrite(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t *size,
+ std::string *lockCookie,
+ bool isFileSizeAbsolute)
+{
+ // take a lock the first rados object, if it exists
+ // check and lock must be atomic and are thus done within a single operation
+ librados::ObjectWriteOperation op;
+ op.assert_exists();
+ *lockCookie = getUUID();
+ utime_t dur = utime_t();
+ rados::cls::lock::lock(&op, RADOS_LOCK_NAME, ClsLockType::SHARED, *lockCookie, "Tag", "", dur, 0);
+ std::string firstObjOid = getObjectId(soid, 0);
+ int rc = m_ioCtx.operate(firstObjOid, &op);
+ if (rc) {
+ if (rc == -ENOENT) {
+ // object does not exist, delegate to createEmptyStripedObject
+ int rc = createAndOpenStripedObject(soid, layout, *size, lockCookie, isFileSizeAbsolute);
+ // return original size
+ *size = 0;
+ return rc;
+ } else {
+ return rc;
+ }
+ }
+ // all fine
+ uint64_t curSize;
+ rc = internal_get_layout_and_size(firstObjOid, layout, &curSize);
+ if (rc) {
+ unlockObject(soid, *lockCookie);
+ lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : "
+ << "could not load layout and size for "
+ << soid << " : rc = " << rc << dendl;
+ return rc;
+ }
+ // atomically update object size, only if smaller than current one
+ if (!isFileSizeAbsolute)
+ *size += curSize;
+ librados::ObjectWriteOperation writeOp;
+ writeOp.cmpxattr(XATTR_SIZE, LIBRADOS_CMPXATTR_OP_GT, *size);
+ std::ostringstream oss;
+ oss << *size;
+ bufferlist bl;
+ bl.append(oss.str());
+ writeOp.setxattr(XATTR_SIZE, bl);
+ rc = m_ioCtx.operate(firstObjOid, &writeOp);
+ // return current size
+ *size = curSize;
+ // handle case where objectsize is already bigger than size
+ if (-ECANCELED == rc)
+ rc = 0;
+ if (rc) {
+ unlockObject(soid, *lockCookie);
+ lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : "
+ << "could not set new size for "
+ << soid << " : rc = " << rc << dendl;
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::createAndOpenStripedObject(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t size,
+ std::string *lockCookie,
+ bool isFileSizeAbsolute)
+{
+ // build atomic write operation
+ librados::ObjectWriteOperation writeOp;
+ writeOp.create(true);
+ // object_size
+ std::ostringstream oss_object_size;
+ oss_object_size << m_layout.fl_object_size;
+ bufferlist bl_object_size;
+ bl_object_size.append(oss_object_size.str());
+ writeOp.setxattr(XATTR_LAYOUT_OBJECT_SIZE, bl_object_size);
+ // stripe unit
+ std::ostringstream oss_stripe_unit;
+ oss_stripe_unit << m_layout.fl_stripe_unit;
+ bufferlist bl_stripe_unit;
+ bl_stripe_unit.append(oss_stripe_unit.str());
+ writeOp.setxattr(XATTR_LAYOUT_STRIPE_UNIT, bl_stripe_unit);
+ // stripe count
+ std::ostringstream oss_stripe_count;
+ oss_stripe_count << m_layout.fl_stripe_count;
+ bufferlist bl_stripe_count;
+ bl_stripe_count.append(oss_stripe_count.str());
+ writeOp.setxattr(XATTR_LAYOUT_STRIPE_COUNT, bl_stripe_count);
+ // size
+ std::ostringstream oss_size;
+ oss_size << (isFileSizeAbsolute?size:0);
+ bufferlist bl_size;
+ bl_size.append(oss_size.str());
+ writeOp.setxattr(XATTR_SIZE, bl_size);
+ // effectively change attributes
+ std::string firstObjOid = getObjectId(soid, 0);
+ int rc = m_ioCtx.operate(firstObjOid, &writeOp);
+ // in case of error (but no EEXIST which would mean the object existed), return
+ if (rc && -EEXIST != rc) return rc;
+ // Otherwise open the object
+ uint64_t fileSize = size;
+ return openStripedObjectForWrite(soid, layout, &fileSize, lockCookie, isFileSizeAbsolute);
+}
+
+static void striper_truncate_aio_req_complete(rados_striper_multi_completion_t c, void *arg)
+{
+ auto cdata = ceph::ref_t<TruncateCompletionData>(static_cast<TruncateCompletionData*>(arg), false);
+ libradosstriper::MultiAioCompletionImpl *comp =
+ reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c);
+ if (0 == comp->rval) {
+ // all went fine, change size in the external attributes
+ std::ostringstream oss;
+ oss << cdata->m_size;
+ bufferlist bl;
+ bl.append(oss.str());
+ cdata->m_striper->setxattr(cdata->m_soid, XATTR_SIZE, bl);
+ }
+}
+
+int libradosstriper::RadosStriperImpl::truncate(const std::string& soid,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout)
+{
+ auto cdata = ceph::make_ref<TruncateCompletionData>(this, soid, size);
+ libradosstriper::MultiAioCompletionImplPtr multi_completion{
+ new libradosstriper::MultiAioCompletionImpl, false};
+ multi_completion->set_complete_callback(cdata->get() /* create ref! */, striper_truncate_aio_req_complete);
+ // call asynchrous version of truncate
+ int rc = aio_truncate(soid, multi_completion, original_size, size, layout);
+ // wait for completion of the truncation
+ multi_completion->finish_adding_requests();
+ multi_completion->wait_for_complete_and_cb();
+ // return result
+ if (rc == 0) {
+ rc = multi_completion->get_return_value();
+ }
+ return rc;
+}
+
+int libradosstriper::RadosStriperImpl::aio_truncate
+(const std::string& soid,
+ libradosstriper::MultiAioCompletionImplPtr multi_completion,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout)
+{
+ // handle the underlying rados objects. 3 cases here :
+ // -- the objects belonging to object sets entirely located
+ // before the truncation are unchanged
+ // -- the objects belonging to the object set where the
+ // truncation took place are truncated or removed
+ // -- the objects belonging to object sets entirely located
+ // after the truncation are removed
+ // Note that we do it backward and that we change the size in
+ // the external attributes only at the end. This make sure that
+ // no rados object stays behind if we remove the striped object
+ // after a truncation has failed
+ uint64_t trunc_objectsetno = size / layout.fl_object_size / layout.fl_stripe_count;
+ uint64_t last_objectsetno = original_size / layout.fl_object_size / layout.fl_stripe_count;
+ bool exists = false;
+ for (int64_t objectno = (last_objectsetno+1) * layout.fl_stripe_count-1;
+ objectno >= (int64_t)((trunc_objectsetno + 1) * layout.fl_stripe_count);
+ objectno--) {
+ // if no object existed so far, check object existence
+ if (!exists) {
+ uint64_t nb_full_object_set = objectno / layout.fl_stripe_count;
+ uint64_t object_index_in_set = objectno % layout.fl_stripe_count;
+ uint64_t set_start_off = nb_full_object_set * layout.fl_object_size * layout.fl_stripe_count;
+ uint64_t object_start_off = set_start_off + object_index_in_set * layout.fl_stripe_unit;
+ exists = (original_size > object_start_off);
+ }
+ if (exists) {
+ // remove asynchronously
+ multi_completion->add_request();
+ auto data = ceph::make_ref<RadosRemoveCompletionData>(multi_completion, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data->get() /* create ref! */,
+ rados_req_remove_complete);
+ int rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion);
+ rados_completion->release();
+ // in case the object did not exist, it means we had a sparse file, all is fine
+ if (rc && rc != -ENOENT) return rc;
+ }
+ }
+ for (int64_t objectno = ((trunc_objectsetno + 1) * layout.fl_stripe_count) -1;
+ objectno >= (int64_t)(trunc_objectsetno * layout.fl_stripe_count);
+ objectno--) {
+ // if no object existed so far, check object existence
+ if (!exists) {
+ uint64_t object_start_off = ((objectno / layout.fl_stripe_count) * layout.fl_object_size) +
+ ((objectno % layout.fl_stripe_count) * layout.fl_stripe_unit);
+ exists = (original_size > object_start_off);
+ }
+ if (exists) {
+ // truncate
+ file_layout_t l;
+ l.from_legacy(layout);
+ uint64_t new_object_size = Striper::object_truncate_size(cct(), &l, objectno, size);
+ int rc;
+ if (new_object_size > 0 or 0 == objectno) {
+ // trunc is synchronous as there is no async version
+ // but note that only a single object will be truncated
+ // reducing the overload to a fixed amount
+ rc = m_ioCtx.trunc(getObjectId(soid, objectno), new_object_size);
+ } else {
+ // removes are asynchronous in order to speed up truncations of big files
+ multi_completion->add_request();
+ auto data = ceph::make_ref<RadosRemoveCompletionData>(multi_completion, cct());
+ librados::AioCompletion *rados_completion =
+ librados::Rados::aio_create_completion(data->get() /* create ref! */,
+ rados_req_remove_complete);
+ rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion);
+ rados_completion->release();
+ }
+ // in case the object did not exist, it means we had a sparse file, all is fine
+ if (rc && rc != -ENOENT) return rc;
+ }
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriperImpl::grow(const std::string& soid,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout)
+{
+ // handle the underlying rados objects. As we support sparse objects,
+ // we only have to change the size in the external attributes
+ std::ostringstream oss;
+ oss << size;
+ bufferlist bl;
+ bl.append(oss.str());
+ int rc = m_ioCtx.setxattr(getObjectId(soid, 0), XATTR_SIZE, bl);
+ return rc;
+}
+
+std::string libradosstriper::RadosStriperImpl::getUUID()
+{
+ struct uuid_d uuid;
+ uuid.generate_random();
+ char suuid[37];
+ uuid.print(suuid);
+ return std::string(suuid);
+}
diff --git a/src/libradosstriper/RadosStriperImpl.h b/src/libradosstriper/RadosStriperImpl.h
new file mode 100644
index 000000000..8226a9ba2
--- /dev/null
+++ b/src/libradosstriper/RadosStriperImpl.h
@@ -0,0 +1,276 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#ifndef CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H
+#define CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H
+
+#include <string>
+
+#include <boost/intrusive_ptr.hpp>
+
+#include "include/rados/librados.h"
+#include "include/rados/librados.hpp"
+#include "include/radosstriper/libradosstriper.h"
+#include "include/radosstriper/libradosstriper.hpp"
+#include "MultiAioCompletionImpl.h"
+
+#include "librados/IoCtxImpl.h"
+#include "librados/AioCompletionImpl.h"
+#include "common/RefCountedObj.h"
+#include "common/ceph_context.h"
+
+namespace libradosstriper {
+
+using MultiAioCompletionImplPtr =
+ boost::intrusive_ptr<MultiAioCompletionImpl>;
+
+struct RadosStriperImpl {
+
+ /**
+ * exception wrapper around an error code
+ */
+ struct ErrorCode {
+ ErrorCode(int error) : m_code(error) {};
+ int m_code;
+ };
+
+ /*
+ * Constructor
+ * @param cluster_name name of the cluster, can be NULL
+ * @param client_name has 2 meanings depending on cluster_name
+ * - if cluster_name is null : this is the client id
+ * - else : this is the full client name in format type.id
+ */
+ RadosStriperImpl(librados::IoCtx& ioctx, librados::IoCtxImpl *ioctx_impl);
+ /// Destructor
+ ~RadosStriperImpl() {};
+
+ // configuration
+ int setObjectLayoutStripeUnit(unsigned int stripe_unit);
+ int setObjectLayoutStripeCount(unsigned int stripe_count);
+ int setObjectLayoutObjectSize(unsigned int object_size);
+
+ // xattrs
+ int getxattr(const object_t& soid, const char *name, bufferlist& bl);
+ int setxattr(const object_t& soid, const char *name, bufferlist& bl);
+ int getxattrs(const object_t& soid, map<string, bufferlist>& attrset);
+ int rmxattr(const object_t& soid, const char *name);
+
+ // io
+ int write(const std::string& soid, const bufferlist& bl, size_t len, uint64_t off);
+ int append(const std::string& soid, const bufferlist& bl, size_t len);
+ int write_full(const std::string& soid, const bufferlist& bl);
+ int read(const std::string& soid, bufferlist* pbl, size_t len, uint64_t off);
+
+ // asynchronous io
+ int aio_write(const std::string& soid, librados::AioCompletionImpl *c,
+ const bufferlist& bl, size_t len, uint64_t off);
+ int aio_append(const std::string& soid, librados::AioCompletionImpl *c,
+ const bufferlist& bl, size_t len);
+ int aio_write_full(const std::string& soid, librados::AioCompletionImpl *c,
+ const bufferlist& bl);
+ int aio_read(const std::string& soid, librados::AioCompletionImpl *c,
+ bufferlist* pbl, size_t len, uint64_t off);
+ int aio_read(const std::string& soid, librados::AioCompletionImpl *c,
+ char* buf, size_t len, uint64_t off);
+ int aio_flush();
+
+ // stat, deletion and truncation
+ int stat(const std::string& soid, uint64_t *psize, time_t *pmtime);
+ int stat2(const std::string& soid, uint64_t *psize, struct timespec *pts);
+ template<class TimeType>
+ struct StatFunction {
+ typedef int (librados::IoCtxImpl::*Type) (const object_t& oid,
+ librados::AioCompletionImpl *c,
+ uint64_t *psize, TimeType *pmtime);
+ };
+ template<class TimeType>
+ int aio_generic_stat(const std::string& soid, librados::AioCompletionImpl *c,
+ uint64_t *psize, TimeType *pmtime,
+ typename StatFunction<TimeType>::Type statFunction);
+ int aio_stat(const std::string& soid, librados::AioCompletionImpl *c,
+ uint64_t *psize, time_t *pmtime);
+ int aio_stat2(const std::string& soid, librados::AioCompletionImpl *c,
+ uint64_t *psize, struct timespec *pts);
+ int remove(const std::string& soid, int flags=0);
+ int trunc(const std::string& soid, uint64_t size);
+
+ // asynchronous remove. Note that the removal is not 100% parallelized :
+ // the removal of the first rados object of the striped object will be
+ // done via a syncrhonous call after the completion of all other removals.
+ // These are done asynchrounously and in parallel
+ int aio_remove(const std::string& soid, librados::AioCompletionImpl *c, int flags=0);
+
+ // reference counting
+ void get() {
+ std::lock_guard l{lock};
+ m_refCnt ++ ;
+ }
+ void put() {
+ bool deleteme = false;
+ lock.lock();
+ m_refCnt --;
+ if (m_refCnt == 0)
+ deleteme = true;
+ cond.notify_all();
+ lock.unlock();
+ if (deleteme)
+ delete this;
+ }
+
+ // objectid manipulation
+ std::string getObjectId(const object_t& soid, long long unsigned objectno);
+
+ // opening and closing of striped objects
+ void unlockObject(const std::string& soid,
+ const std::string& lockCookie);
+ void aio_unlockObject(const std::string& soid,
+ const std::string& lockCookie,
+ librados::AioCompletion *c);
+
+ // internal versions of IO method
+ int write_in_open_object(const std::string& soid,
+ const ceph_file_layout& layout,
+ const std::string& lockCookie,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off);
+ int aio_write_in_open_object(const std::string& soid,
+ librados::AioCompletionImpl *c,
+ const ceph_file_layout& layout,
+ const std::string& lockCookie,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off);
+ int internal_aio_write(const std::string& soid,
+ MultiAioCompletionImplPtr c,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off,
+ const ceph_file_layout& layout);
+
+ int extract_uint32_attr(std::map<std::string, bufferlist> &attrs,
+ const std::string& key,
+ ceph_le32 *value);
+
+ int extract_sizet_attr(std::map<std::string, bufferlist> &attrs,
+ const std::string& key,
+ size_t *value);
+
+ int internal_get_layout_and_size(const std::string& oid,
+ ceph_file_layout *layout,
+ uint64_t *size);
+
+ int internal_aio_remove(const std::string& soid,
+ MultiAioCompletionImplPtr multi_completion,
+ int flags=0);
+
+ /**
+ * opens an existing striped object and takes a shared lock on it
+ * @return 0 if everything is ok and the lock was taken. -errcode otherwise
+ * In particulae, if the striped object does not exists, -ENOENT is returned
+ * In case the return code in not 0, no lock is taken
+ */
+ int openStripedObjectForRead(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t *size,
+ std::string *lockCookie);
+
+ /**
+ * opens an existing striped object, takes a shared lock on it
+ * and sets its size to the size it will have after the write.
+ * In case the striped object does not exists, it will create it by
+ * calling createOrOpenStripedObject.
+ * @param layout this is filled with the layout of the file
+ * @param size new size of the file (together with isFileSizeAbsolute)
+ * In case of success, this is filled with the size of the file before the opening
+ * @param isFileSizeAbsolute if false, this means that the given size should
+ * be added to the current file size (append mode)
+ * @return 0 if everything is ok and the lock was taken. -errcode otherwise
+ * In case the return code in not 0, no lock is taken
+ */
+ int openStripedObjectForWrite(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t *size,
+ std::string *lockCookie,
+ bool isFileSizeAbsolute);
+ /**
+ * creates an empty striped object with the given size and opens it calling
+ * openStripedObjectForWrite, which implies taking a shared lock on it
+ * Also deals with the cases where the object was created in the mean time
+ * @param isFileSizeAbsolute if false, this means that the given size should
+ * be added to the current file size (append mode). This of course only makes
+ * sense in case the striped object already exists
+ * @return 0 if everything is ok and the lock was taken. -errcode otherwise
+ * In case the return code in not 0, no lock is taken
+ */
+ int createAndOpenStripedObject(const std::string& soid,
+ ceph_file_layout *layout,
+ uint64_t size,
+ std::string *lockCookie,
+ bool isFileSizeAbsolute);
+
+ /**
+ * truncates an object synchronously. Should only be called with size < original_size
+ */
+ int truncate(const std::string& soid,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout);
+
+ /**
+ * truncates an object asynchronously. Should only be called with size < original_size
+ * note that the method is not 100% asynchronous, only the removal of rados objects
+ * is, the (potential) truncation of the rados object residing just at the truncation
+ * point is synchronous for lack of asynchronous truncation in the rados layer
+ */
+ int aio_truncate(const std::string& soid,
+ MultiAioCompletionImplPtr c,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout);
+
+ /**
+ * grows an object (adding 0s). Should only be called with size > original_size
+ */
+ int grow(const std::string& soid,
+ uint64_t original_size,
+ uint64_t size,
+ ceph_file_layout &layout);
+
+ /**
+ * creates a unique identifier
+ */
+ static std::string getUUID();
+
+ CephContext *cct() {
+ return (CephContext*)m_radosCluster.cct();
+ }
+
+ // reference counting
+ std::condition_variable cond;
+ int m_refCnt;
+ std::mutex lock;
+
+
+ // Context
+ librados::Rados m_radosCluster;
+ librados::IoCtx m_ioCtx;
+ librados::IoCtxImpl *m_ioCtxImpl;
+
+ // Default layout
+ ceph_file_layout m_layout;
+};
+}
+#endif
diff --git a/src/libradosstriper/libradosstriper.cc b/src/libradosstriper/libradosstriper.cc
new file mode 100644
index 000000000..e98dfc179
--- /dev/null
+++ b/src/libradosstriper/libradosstriper.cc
@@ -0,0 +1,669 @@
+// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*-
+// vim: ts=8 sw=2 smarttab
+/*
+ * Ceph - scalable distributed file system
+ *
+ * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch>
+ *
+ * This is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License version 2.1, as published by the Free Software
+ * Foundation. See file COPYING.
+ *
+ */
+
+#include <errno.h>
+
+#include "libradosstriper/RadosStriperImpl.h"
+#include "libradosstriper/MultiAioCompletionImpl.h"
+
+#include "include/types.h"
+
+#include "include/radosstriper/libradosstriper.h"
+#include "include/radosstriper/libradosstriper.hpp"
+#include "librados/RadosXattrIter.h"
+
+/*
+ * This file implements the rados striper API.
+ * There are 2 flavours of it :
+ * - the C API, found in include/rados/libradosstriper.h
+ * - the C++ API, found in include/rados/libradosstriper.hpp
+ */
+
+///////////////////////////// C++ API //////////////////////////////
+
+libradosstriper::MultiAioCompletion::~MultiAioCompletion()
+{
+ ceph_assert(pc->ref == 1);
+ pc->put();
+}
+
+int libradosstriper::MultiAioCompletion::set_complete_callback
+(void *cb_arg, rados_callback_t cb)
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->set_complete_callback(cb_arg, cb);
+}
+
+int libradosstriper::MultiAioCompletion::set_safe_callback
+(void *cb_arg, rados_callback_t cb)
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->set_safe_callback(cb_arg, cb);
+}
+
+void libradosstriper::MultiAioCompletion::wait_for_complete()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->wait_for_complete();
+}
+
+void libradosstriper::MultiAioCompletion::wait_for_safe()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->wait_for_safe();
+}
+
+bool libradosstriper::MultiAioCompletion::is_complete()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->is_complete();
+}
+
+bool libradosstriper::MultiAioCompletion::is_safe()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->is_safe();
+}
+
+void libradosstriper::MultiAioCompletion::wait_for_complete_and_cb()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->wait_for_complete_and_cb();
+}
+
+void libradosstriper::MultiAioCompletion::MultiAioCompletion::wait_for_safe_and_cb()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->wait_for_safe_and_cb();
+}
+
+bool libradosstriper::MultiAioCompletion::is_complete_and_cb()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->is_complete_and_cb();
+}
+
+bool libradosstriper::MultiAioCompletion::is_safe_and_cb()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->is_safe_and_cb();
+}
+
+int libradosstriper::MultiAioCompletion::get_return_value()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ return c->get_return_value();
+}
+
+void libradosstriper::MultiAioCompletion::release()
+{
+ MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc;
+ c->put();
+ delete this;
+}
+
+libradosstriper::RadosStriper::RadosStriper() :
+ rados_striper_impl(0)
+{
+}
+
+void libradosstriper::RadosStriper::to_rados_striper_t(RadosStriper &striper, rados_striper_t *s)
+{
+ *s = (rados_striper_t)striper.rados_striper_impl;
+ striper.rados_striper_impl->get();
+}
+
+libradosstriper::RadosStriper::RadosStriper(const RadosStriper& rs)
+{
+ rados_striper_impl = rs.rados_striper_impl;
+ if (rados_striper_impl) {
+ rados_striper_impl->get();
+ }
+}
+
+libradosstriper::RadosStriper& libradosstriper::RadosStriper::operator=(const RadosStriper& rs)
+{
+ if (rados_striper_impl)
+ rados_striper_impl->put();
+ rados_striper_impl = rs.rados_striper_impl;
+ rados_striper_impl->get();
+ return *this;
+}
+
+libradosstriper::RadosStriper::~RadosStriper()
+{
+ if (rados_striper_impl)
+ rados_striper_impl->put();
+ rados_striper_impl = 0;
+}
+
+int libradosstriper::RadosStriper::striper_create(librados::IoCtx& ioctx,
+ RadosStriper *striper)
+{
+ try {
+ striper->rados_striper_impl = new libradosstriper::RadosStriperImpl(ioctx, ioctx.io_ctx_impl);
+ striper->rados_striper_impl->get();
+ } catch (int rc) {
+ return rc;
+ }
+ return 0;
+}
+
+int libradosstriper::RadosStriper::set_object_layout_stripe_unit
+(unsigned int stripe_unit)
+{
+ return rados_striper_impl->setObjectLayoutStripeUnit(stripe_unit);
+}
+
+int libradosstriper::RadosStriper::set_object_layout_stripe_count
+(unsigned int stripe_count)
+{
+ return rados_striper_impl->setObjectLayoutStripeCount(stripe_count);
+}
+
+int libradosstriper::RadosStriper::set_object_layout_object_size
+(unsigned int object_size)
+{
+ return rados_striper_impl->setObjectLayoutObjectSize(object_size);
+}
+
+int libradosstriper::RadosStriper::getxattr(const std::string& oid, const char *name, bufferlist& bl)
+{
+ return rados_striper_impl->getxattr(oid, name, bl);
+}
+
+int libradosstriper::RadosStriper::setxattr(const std::string& oid, const char *name, bufferlist& bl)
+{
+ return rados_striper_impl->setxattr(oid, name, bl);
+}
+
+int libradosstriper::RadosStriper::rmxattr(const std::string& oid, const char *name)
+{
+ return rados_striper_impl->rmxattr(oid, name);
+}
+
+int libradosstriper::RadosStriper::getxattrs(const std::string& oid,
+ std::map<std::string, bufferlist>& attrset)
+{
+ return rados_striper_impl->getxattrs(oid, attrset);
+}
+
+int libradosstriper::RadosStriper::write(const std::string& soid,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off)
+{
+ return rados_striper_impl->write(soid, bl, len, off);
+}
+
+int libradosstriper::RadosStriper::write_full(const std::string& soid,
+ const bufferlist& bl)
+{
+ return rados_striper_impl->write_full(soid, bl);
+}
+
+int libradosstriper::RadosStriper::append(const std::string& soid,
+ const bufferlist& bl,
+ size_t len)
+{
+ return rados_striper_impl->append(soid, bl, len);
+}
+
+int libradosstriper::RadosStriper::aio_write(const std::string& soid,
+ librados::AioCompletion *c,
+ const bufferlist& bl,
+ size_t len,
+ uint64_t off)
+{
+ return rados_striper_impl->aio_write(soid, c->pc, bl, len, off);
+}
+
+int libradosstriper::RadosStriper::aio_write_full(const std::string& soid,
+ librados::AioCompletion *c,
+ const bufferlist& bl)
+{
+ return rados_striper_impl->aio_write_full(soid, c->pc, bl);
+}
+
+int libradosstriper::RadosStriper::aio_append(const std::string& soid,
+ librados::AioCompletion *c,
+ const bufferlist& bl,
+ size_t len)
+{
+ return rados_striper_impl->aio_append(soid, c->pc, bl, len);
+}
+
+int libradosstriper::RadosStriper::read(const std::string& soid,
+ bufferlist* bl,
+ size_t len,
+ uint64_t off)
+{
+ bl->clear();
+ bl->push_back(buffer::create(len));
+ return rados_striper_impl->read(soid, bl, len, off);
+}
+
+int libradosstriper::RadosStriper::aio_read(const std::string& soid,
+ librados::AioCompletion *c,
+ bufferlist* bl,
+ size_t len,
+ uint64_t off)
+{
+ bl->clear();
+ bl->push_back(buffer::create(len));
+ return rados_striper_impl->aio_read(soid, c->pc, bl, len, off);
+}
+
+int libradosstriper::RadosStriper::stat(const std::string& soid, uint64_t *psize, time_t *pmtime)
+{
+ return rados_striper_impl->stat(soid, psize, pmtime);
+}
+
+int libradosstriper::RadosStriper::aio_stat(const std::string& soid,
+ librados::AioCompletion *c,
+ uint64_t *psize,
+ time_t *pmtime)
+{
+ return rados_striper_impl->aio_stat(soid, c->pc, psize, pmtime);
+}
+
+int libradosstriper::RadosStriper::stat2(const std::string& soid, uint64_t *psize, struct timespec *pts)
+{
+ return rados_striper_impl->stat2(soid, psize, pts);
+}
+
+int libradosstriper::RadosStriper::aio_stat2(const std::string& soid,
+ librados::AioCompletion *c,
+ uint64_t *psize,
+ struct timespec *pts)
+{
+ return rados_striper_impl->aio_stat2(soid, c->pc, psize, pts);
+}
+
+int libradosstriper::RadosStriper::remove(const std::string& soid)
+{
+ return rados_striper_impl->remove(soid);
+}
+
+int libradosstriper::RadosStriper::aio_remove(const std::string& soid,
+ librados::AioCompletion *c)
+{
+ return rados_striper_impl->aio_remove(soid, c->pc);
+}
+
+int libradosstriper::RadosStriper::remove(const std::string& soid, int flags)
+{
+ return rados_striper_impl->remove(soid, flags);
+}
+
+int libradosstriper::RadosStriper::aio_remove(const std::string& soid,
+ librados::AioCompletion *c,
+ int flags)
+{
+ return rados_striper_impl->aio_remove(soid, c->pc, flags);
+}
+
+int libradosstriper::RadosStriper::trunc(const std::string& soid, uint64_t size)
+{
+ return rados_striper_impl->trunc(soid, size);
+}
+
+int libradosstriper::RadosStriper::aio_flush()
+{
+ return rados_striper_impl->aio_flush();
+}
+
+libradosstriper::MultiAioCompletion* libradosstriper::RadosStriper::multi_aio_create_completion()
+{
+ MultiAioCompletionImpl *c = new MultiAioCompletionImpl;
+ return new MultiAioCompletion(c);
+}
+
+libradosstriper::MultiAioCompletion*
+libradosstriper::RadosStriper::multi_aio_create_completion(void *cb_arg,
+ librados::callback_t cb_complete,
+ librados::callback_t cb_safe)
+{
+ MultiAioCompletionImpl *c;
+ int r = rados_striper_multi_aio_create_completion(cb_arg, cb_complete, cb_safe, (void**)&c);
+ ceph_assert(r == 0);
+ return new MultiAioCompletion(c);
+}
+
+///////////////////////////// C API //////////////////////////////
+
+extern "C" int rados_striper_create(rados_ioctx_t ioctx,
+ rados_striper_t *striper)
+{
+ librados::IoCtx ctx;
+ librados::IoCtx::from_rados_ioctx_t(ioctx, ctx);
+ libradosstriper::RadosStriper striperp;
+ int rc = libradosstriper::RadosStriper::striper_create(ctx, &striperp);
+ if (0 == rc)
+ libradosstriper::RadosStriper::to_rados_striper_t(striperp, striper);
+ return rc;
+}
+
+extern "C" void rados_striper_destroy(rados_striper_t striper)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ impl->put();
+}
+
+extern "C" int rados_striper_set_object_layout_stripe_unit(rados_striper_t striper,
+ unsigned int stripe_unit)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->setObjectLayoutStripeUnit(stripe_unit);
+}
+
+extern "C" int rados_striper_set_object_layout_stripe_count(rados_striper_t striper,
+ unsigned int stripe_count)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->setObjectLayoutStripeCount(stripe_count);
+}
+
+extern "C" int rados_striper_set_object_layout_object_size(rados_striper_t striper,
+ unsigned int object_size)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->setObjectLayoutObjectSize(object_size);
+}
+
+extern "C" int rados_striper_write(rados_striper_t striper,
+ const char *soid,
+ const char *buf,
+ size_t len,
+ uint64_t off)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->write(soid, bl, len, off);
+}
+
+extern "C" int rados_striper_write_full(rados_striper_t striper,
+ const char *soid,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->write_full(soid, bl);
+}
+
+
+extern "C" int rados_striper_append(rados_striper_t striper,
+ const char *soid,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->append(soid, bl, len);
+}
+
+extern "C" int rados_striper_read(rados_striper_t striper,
+ const char *soid,
+ char *buf,
+ size_t len,
+ uint64_t off)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bufferptr bp = buffer::create_static(len, buf);
+ bl.push_back(bp);
+ int ret = impl->read(soid, &bl, len, off);
+ if (ret >= 0) {
+ if (bl.length() > len)
+ return -ERANGE;
+ if (!bl.is_provided_buffer(buf))
+ bl.begin().copy(bl.length(), buf);
+ ret = bl.length(); // hrm :/
+ }
+ return ret;
+}
+
+extern "C" int rados_striper_remove(rados_striper_t striper, const char* soid)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->remove(soid);
+}
+
+extern "C" int rados_striper_trunc(rados_striper_t striper, const char* soid, uint64_t size)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->trunc(soid, size);
+}
+
+extern "C" int rados_striper_getxattr(rados_striper_t striper,
+ const char *oid,
+ const char *name,
+ char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ object_t obj(oid);
+ bufferlist bl;
+ int ret = impl->getxattr(oid, name, bl);
+ if (ret >= 0) {
+ if (bl.length() > len)
+ return -ERANGE;
+ bl.begin().copy(bl.length(), buf);
+ ret = bl.length();
+ }
+ return ret;
+}
+
+extern "C" int rados_striper_setxattr(rados_striper_t striper,
+ const char *oid,
+ const char *name,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ object_t obj(oid);
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->setxattr(obj, name, bl);
+}
+
+extern "C" int rados_striper_rmxattr(rados_striper_t striper,
+ const char *oid,
+ const char *name)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ object_t obj(oid);
+ return impl->rmxattr(obj, name);
+}
+
+extern "C" int rados_striper_getxattrs(rados_striper_t striper,
+ const char *oid,
+ rados_xattrs_iter_t *iter)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ object_t obj(oid);
+ librados::RadosXattrsIter *it = new librados::RadosXattrsIter();
+ if (!it)
+ return -ENOMEM;
+ int ret = impl->getxattrs(obj, it->attrset);
+ if (ret) {
+ delete it;
+ return ret;
+ }
+ it->i = it->attrset.begin();
+ *iter = it;
+ return 0;
+}
+
+extern "C" int rados_striper_getxattrs_next(rados_xattrs_iter_t iter,
+ const char **name,
+ const char **val,
+ size_t *len)
+{
+ return rados_getxattrs_next(iter, name, val, len);
+}
+
+extern "C" void rados_striper_getxattrs_end(rados_xattrs_iter_t iter)
+{
+ return rados_getxattrs_end(iter);
+}
+
+extern "C" int rados_striper_stat(rados_striper_t striper,
+ const char* soid,
+ uint64_t *psize,
+ time_t *pmtime)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->stat(soid, psize, pmtime);
+}
+
+extern "C" int rados_striper_multi_aio_create_completion(void *cb_arg,
+ rados_callback_t cb_complete,
+ rados_callback_t cb_safe,
+ rados_striper_multi_completion_t *pc)
+{
+ libradosstriper::MultiAioCompletionImpl *c = new libradosstriper::MultiAioCompletionImpl;
+ if (cb_complete)
+ c->set_complete_callback(cb_arg, cb_complete);
+ if (cb_safe)
+ c->set_safe_callback(cb_arg, cb_safe);
+ *pc = c;
+ return 0;
+}
+
+extern "C" void rados_striper_multi_aio_wait_for_complete(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_complete();
+}
+
+extern "C" void rados_striper_multi_aio_wait_for_safe(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_safe();
+}
+
+extern "C" int rados_striper_multi_aio_is_complete(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->is_complete();
+}
+
+extern "C" int rados_striper_multi_aio_is_safe(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->is_safe();
+}
+
+extern "C" void rados_striper_multi_aio_wait_for_complete_and_cb(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_complete_and_cb();
+}
+
+extern "C" void rados_striper_multi_aio_wait_for_safe_and_cb(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_safe_and_cb();
+}
+
+extern "C" int rados_striper_multi_aio_is_complete_and_cb(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->is_complete_and_cb();
+}
+
+extern "C" int rados_striper_multi_aio_is_safe_and_cb(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->is_safe_and_cb();
+}
+
+extern "C" int rados_striper_multi_aio_get_return_value(rados_striper_multi_completion_t c)
+{
+ return ((libradosstriper::MultiAioCompletionImpl*)c)->get_return_value();
+}
+
+extern "C" void rados_striper_multi_aio_release(rados_striper_multi_completion_t c)
+{
+ ((libradosstriper::MultiAioCompletionImpl*)c)->put();
+}
+
+extern "C" int rados_striper_aio_write(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion,
+ const char *buf,
+ size_t len,
+ uint64_t off)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->aio_write(soid, (librados::AioCompletionImpl*)completion, bl, len, off);
+}
+
+extern "C" int rados_striper_aio_append(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->aio_append(soid, (librados::AioCompletionImpl*)completion, bl, len);
+}
+
+extern "C" int rados_striper_aio_write_full(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion,
+ const char *buf,
+ size_t len)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ bufferlist bl;
+ bl.append(buf, len);
+ return impl->aio_write_full(soid, (librados::AioCompletionImpl*)completion, bl);
+}
+
+extern "C" int rados_striper_aio_read(rados_striper_t striper,
+ const char *soid,
+ rados_completion_t completion,
+ char *buf,
+ size_t len,
+ uint64_t off)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->aio_read(soid, (librados::AioCompletionImpl*)completion, buf, len, off);
+}
+
+extern "C" int rados_striper_aio_remove(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->aio_remove(soid, (librados::AioCompletionImpl*)completion);
+}
+
+extern "C" void rados_striper_aio_flush(rados_striper_t striper)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ impl->aio_flush();
+}
+
+extern "C" int rados_striper_aio_stat(rados_striper_t striper,
+ const char* soid,
+ rados_completion_t completion,
+ uint64_t *psize,
+ time_t *pmtime)
+{
+ libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper;
+ return impl->aio_stat(soid, (librados::AioCompletionImpl*)completion, psize, pmtime);
+}