diff options
Diffstat (limited to '')
27 files changed, 15569 insertions, 0 deletions
diff --git a/src/librados-config.cc b/src/librados-config.cc new file mode 100644 index 000000000..7948598b1 --- /dev/null +++ b/src/librados-config.cc @@ -0,0 +1,59 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2006 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License version 2, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#include <iostream> + +#include <boost/program_options/cmdline.hpp> +#include <boost/program_options/option.hpp> +#include <boost/program_options/options_description.hpp> +#include <boost/program_options/parsers.hpp> +#include <boost/program_options/variables_map.hpp> + +#include "include/rados/librados.h" +#include "ceph_ver.h" + +namespace po = boost::program_options; + +int main(int argc, const char **argv) +{ + po::options_description desc{"usage: librados-config [option]"}; + desc.add_options() + ("help,h", "print this help message") + ("version", "library version") + ("vernum", "library version code") + ("release", "print release name"); + + po::parsed_options parsed = + po::command_line_parser(argc, argv).options(desc).run(); + po::variables_map vm; + po::store(parsed, vm); + po::notify(vm); + + if (vm.count("help")) { + std::cout << desc << std::endl; + } else if (vm.count("version")) { + int maj, min, ext; + rados_version(&maj, &min, &ext); + std::cout << maj << "." << min << "." << ext << std::endl; + } else if (vm.count("vernum")) { + std::cout << std::hex << LIBRADOS_VERSION_CODE << std::dec << std::endl; + } else if (vm.count("release")) { + std::cout << CEPH_RELEASE_NAME << ' ' + << '(' << CEPH_RELEASE_TYPE << ')' + << std::endl; + } else { + std::cerr << argv[0] << ": -h or --help for usage" << std::endl; + return 1; + } +} + diff --git a/src/librados/AioCompletionImpl.h b/src/librados/AioCompletionImpl.h new file mode 100644 index 000000000..6f7e1b628 --- /dev/null +++ b/src/librados/AioCompletionImpl.h @@ -0,0 +1,208 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_LIBRADOS_AIOCOMPLETIONIMPL_H +#define CEPH_LIBRADOS_AIOCOMPLETIONIMPL_H + +#include "common/ceph_mutex.h" +#include "include/buffer.h" +#include "include/xlist.h" +#include "osd/osd_types.h" + +class IoCtxImpl; + +struct librados::AioCompletionImpl { + ceph::mutex lock = ceph::make_mutex("AioCompletionImpl lock", false); + ceph::condition_variable cond; + int ref = 1, rval = 0; + bool released = false; + bool complete = false; + version_t objver = 0; + ceph_tid_t tid = 0; + + rados_callback_t callback_complete = nullptr, callback_safe = nullptr; + void *callback_complete_arg = nullptr, *callback_safe_arg = nullptr; + + // for read + bool is_read = false; + bufferlist bl; + bufferlist *blp = nullptr; + char *out_buf = nullptr; + + IoCtxImpl *io = nullptr; + ceph_tid_t aio_write_seq = 0; + xlist<AioCompletionImpl*>::item aio_write_list_item; + + AioCompletionImpl() : aio_write_list_item(this) { } + + int set_complete_callback(void *cb_arg, rados_callback_t cb) { + std::scoped_lock l{lock}; + callback_complete = cb; + callback_complete_arg = cb_arg; + return 0; + } + int set_safe_callback(void *cb_arg, rados_callback_t cb) { + std::scoped_lock l{lock}; + callback_safe = cb; + callback_safe_arg = cb_arg; + return 0; + } + int wait_for_complete() { + std::unique_lock l{lock}; + cond.wait(l, [this] { return complete; }); + return 0; + } + int wait_for_safe() { + return wait_for_complete(); + } + int is_complete() { + std::scoped_lock l{lock}; + return complete; + } + int is_safe() { + return is_complete(); + } + int wait_for_complete_and_cb() { + std::unique_lock l{lock}; + cond.wait(l, [this] { return complete && !callback_complete && !callback_safe; }); + return 0; + } + int wait_for_safe_and_cb() { + return wait_for_complete_and_cb(); + } + int is_complete_and_cb() { + std::scoped_lock l{lock}; + return complete && !callback_complete && !callback_safe; + } + int is_safe_and_cb() { + return is_complete_and_cb(); + } + int get_return_value() { + std::scoped_lock l{lock}; + return rval; + } + uint64_t get_version() { + std::scoped_lock l{lock}; + return objver; + } + + void get() { + std::scoped_lock l{lock}; + _get(); + } + void _get() { + ceph_assert(ceph_mutex_is_locked(lock)); + ceph_assert(ref > 0); + ++ref; + } + void release() { + lock.lock(); + ceph_assert(!released); + released = true; + put_unlock(); + } + void put() { + lock.lock(); + put_unlock(); + } + void put_unlock() { + ceph_assert(ref > 0); + int n = --ref; + lock.unlock(); + if (!n) + delete this; + } +}; + +namespace librados { +struct CB_AioComplete { + AioCompletionImpl *c; + + explicit CB_AioComplete(AioCompletionImpl *cc) : c(cc) { + c->_get(); + } + + void operator()() { + rados_callback_t cb_complete = c->callback_complete; + void *cb_complete_arg = c->callback_complete_arg; + if (cb_complete) + cb_complete(c, cb_complete_arg); + + rados_callback_t cb_safe = c->callback_safe; + void *cb_safe_arg = c->callback_safe_arg; + if (cb_safe) + cb_safe(c, cb_safe_arg); + + c->lock.lock(); + c->callback_complete = NULL; + c->callback_safe = NULL; + c->cond.notify_all(); + c->put_unlock(); + } +}; + +/** + * Fills in all completed request data, and calls both + * complete and safe callbacks if they exist. + * + * Not useful for usual I/O, but for special things like + * flush where we only want to wait for things to be safe, + * but allow users to specify any of the callbacks. + */ +struct CB_AioCompleteAndSafe { + AioCompletionImpl *c; + + + explicit CB_AioCompleteAndSafe(AioCompletionImpl *cc) : c(cc) { + c->get(); + } + + CB_AioCompleteAndSafe(const CB_AioCompleteAndSafe&) = delete; + CB_AioCompleteAndSafe& operator =(const CB_AioCompleteAndSafe&) = delete; + CB_AioCompleteAndSafe(CB_AioCompleteAndSafe&& rhs) { + c = rhs.c; + rhs.c = nullptr; + } + CB_AioCompleteAndSafe& operator =(CB_AioCompleteAndSafe&& rhs) { + c = rhs.c; + rhs.c = nullptr; + return *this; + } + + void operator()(int r = 0) { + c->lock.lock(); + c->rval = r; + c->complete = true; + c->lock.unlock(); + + rados_callback_t cb_complete = c->callback_complete; + void *cb_complete_arg = c->callback_complete_arg; + if (cb_complete) + cb_complete(c, cb_complete_arg); + + rados_callback_t cb_safe = c->callback_safe; + void *cb_safe_arg = c->callback_safe_arg; + if (cb_safe) + cb_safe(c, cb_safe_arg); + + c->lock.lock(); + c->callback_complete = NULL; + c->callback_safe = NULL; + c->cond.notify_all(); + c->put_unlock(); + } +}; +} + +#endif diff --git a/src/librados/CMakeLists.txt b/src/librados/CMakeLists.txt new file mode 100644 index 000000000..9e469eb17 --- /dev/null +++ b/src/librados/CMakeLists.txt @@ -0,0 +1,42 @@ +add_library(librados_impl STATIC + IoCtxImpl.cc + RadosXattrIter.cc + RadosClient.cc + librados_util.cc + librados_tp.cc) + +# C/C++ API +add_library(librados ${CEPH_SHARED} + librados_c.cc + librados_cxx.cc + $<TARGET_OBJECTS:common_buffer_obj>) +if(ENABLE_SHARED) + set_target_properties(librados PROPERTIES + OUTPUT_NAME rados + VERSION 2.0.0 + SOVERSION 2 + VISIBILITY_INLINES_HIDDEN ON) + if(HAVE_LINK_EXCLUDE_LIBS AND NOT WIN32) + set_property(TARGET librados APPEND_STRING PROPERTY + LINK_FLAGS " -Wl,--exclude-libs,ALL") + endif() + if(HAVE_LINK_VERSION_SCRIPT AND NOT WIN32) + set_property(TARGET librados APPEND_STRING PROPERTY + LINK_FLAGS " -Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/librados.map") + endif() + if(WITH_STATIC_LIBSTDCXX) + set_property(TARGET librados APPEND_STRING PROPERTY + LINK_FLAGS " -static-libstdc++ -static-libgcc") + endif() +endif() +target_link_libraries(librados PRIVATE + librados_impl osdc ceph-common cls_lock_client + ${BLKID_LIBRARIES} ${CRYPTO_LIBS} ${EXTRALIBS} ${GSSAPI_LIBRARIES}) +install(TARGETS librados DESTINATION ${CMAKE_INSTALL_LIBDIR}) + +if(WITH_LTTNG) + add_dependencies(librados_impl librados-tp) + if(WITH_EVENTTRACE) + add_dependencies(librados_impl eventtrace_tp) + endif() +endif() diff --git a/src/librados/IoCtxImpl.cc b/src/librados/IoCtxImpl.cc new file mode 100644 index 000000000..e54dd062b --- /dev/null +++ b/src/librados/IoCtxImpl.cc @@ -0,0 +1,2217 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <limits.h> + +#include "IoCtxImpl.h" + +#include "librados/librados_c.h" +#include "librados/AioCompletionImpl.h" +#include "librados/PoolAsyncCompletionImpl.h" +#include "librados/RadosClient.h" +#include "include/ceph_assert.h" +#include "common/valgrind.h" +#include "common/EventTrace.h" + +#define dout_subsys ceph_subsys_rados +#undef dout_prefix +#define dout_prefix *_dout << "librados: " + +namespace bs = boost::system; +namespace ca = ceph::async; +namespace cb = ceph::buffer; + +namespace librados { +namespace { + +struct CB_notify_Finish { + CephContext *cct; + Context *ctx; + Objecter *objecter; + Objecter::LingerOp *linger_op; + bufferlist *preply_bl; + char **preply_buf; + size_t *preply_buf_len; + + CB_notify_Finish(CephContext *_cct, Context *_ctx, Objecter *_objecter, + Objecter::LingerOp *_linger_op, bufferlist *_preply_bl, + char **_preply_buf, size_t *_preply_buf_len) + : cct(_cct), ctx(_ctx), objecter(_objecter), linger_op(_linger_op), + preply_bl(_preply_bl), preply_buf(_preply_buf), + preply_buf_len(_preply_buf_len) {} + + + // move-only + CB_notify_Finish(const CB_notify_Finish&) = delete; + CB_notify_Finish& operator =(const CB_notify_Finish&) = delete; + CB_notify_Finish(CB_notify_Finish&&) = default; + CB_notify_Finish& operator =(CB_notify_Finish&&) = default; + + void operator()(bs::error_code ec, bufferlist&& reply_bl) { + ldout(cct, 10) << __func__ << " completed notify (linger op " + << linger_op << "), ec = " << ec << dendl; + + // pass result back to user + // NOTE: we do this regardless of what error code we return + if (preply_buf) { + if (reply_bl.length()) { + *preply_buf = (char*)malloc(reply_bl.length()); + memcpy(*preply_buf, reply_bl.c_str(), reply_bl.length()); + } else { + *preply_buf = NULL; + } + } + if (preply_buf_len) + *preply_buf_len = reply_bl.length(); + if (preply_bl) + *preply_bl = std::move(reply_bl); + + ctx->complete(ceph::from_error_code(ec)); + } +}; + +struct CB_aio_linger_cancel { + Objecter *objecter; + Objecter::LingerOp *linger_op; + + CB_aio_linger_cancel(Objecter *_objecter, Objecter::LingerOp *_linger_op) + : objecter(_objecter), linger_op(_linger_op) + { + } + + void operator()() { + objecter->linger_cancel(linger_op); + } +}; + +struct C_aio_linger_Complete : public Context { + AioCompletionImpl *c; + Objecter::LingerOp *linger_op; + bool cancel; + + C_aio_linger_Complete(AioCompletionImpl *_c, Objecter::LingerOp *_linger_op, bool _cancel) + : c(_c), linger_op(_linger_op), cancel(_cancel) + { + c->get(); + } + + void finish(int r) override { + if (cancel || r < 0) + boost::asio::defer(c->io->client->finish_strand, + CB_aio_linger_cancel(c->io->objecter, + linger_op)); + + c->lock.lock(); + c->rval = r; + c->complete = true; + c->cond.notify_all(); + + if (c->callback_complete || + c->callback_safe) { + boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c)); + } + c->put_unlock(); + } +}; + +struct C_aio_notify_Complete : public C_aio_linger_Complete { + ceph::mutex lock = ceph::make_mutex("C_aio_notify_Complete::lock"); + bool acked = false; + bool finished = false; + int ret_val = 0; + + C_aio_notify_Complete(AioCompletionImpl *_c, Objecter::LingerOp *_linger_op) + : C_aio_linger_Complete(_c, _linger_op, false) { + } + + void handle_ack(int r) { + // invoked by C_aio_notify_Ack + lock.lock(); + acked = true; + complete_unlock(r); + } + + void complete(int r) override { + // invoked by C_notify_Finish + lock.lock(); + finished = true; + complete_unlock(r); + } + + void complete_unlock(int r) { + if (ret_val == 0 && r < 0) { + ret_val = r; + } + + if (acked && finished) { + lock.unlock(); + cancel = true; + C_aio_linger_Complete::complete(ret_val); + } else { + lock.unlock(); + } + } +}; + +struct C_aio_notify_Ack : public Context { + CephContext *cct; + C_aio_notify_Complete *oncomplete; + + C_aio_notify_Ack(CephContext *_cct, + C_aio_notify_Complete *_oncomplete) + : cct(_cct), oncomplete(_oncomplete) + { + } + + void finish(int r) override + { + ldout(cct, 10) << __func__ << " linger op " << oncomplete->linger_op << " " + << "acked (" << r << ")" << dendl; + oncomplete->handle_ack(r); + } +}; + +struct C_aio_selfmanaged_snap_op_Complete : public Context { + librados::RadosClient *client; + librados::AioCompletionImpl *c; + + C_aio_selfmanaged_snap_op_Complete(librados::RadosClient *client, + librados::AioCompletionImpl *c) + : client(client), c(c) { + c->get(); + } + + void finish(int r) override { + c->lock.lock(); + c->rval = r; + c->complete = true; + c->cond.notify_all(); + + if (c->callback_complete || c->callback_safe) { + boost::asio::defer(client->finish_strand, librados::CB_AioComplete(c)); + } + c->put_unlock(); + } +}; + +struct C_aio_selfmanaged_snap_create_Complete : public C_aio_selfmanaged_snap_op_Complete { + snapid_t snapid; + uint64_t *dest_snapid; + + C_aio_selfmanaged_snap_create_Complete(librados::RadosClient *client, + librados::AioCompletionImpl *c, + uint64_t *dest_snapid) + : C_aio_selfmanaged_snap_op_Complete(client, c), + dest_snapid(dest_snapid) { + } + + void finish(int r) override { + if (r >= 0) { + *dest_snapid = snapid; + } + C_aio_selfmanaged_snap_op_Complete::finish(r); + } +}; + +} // anonymous namespace +} // namespace librados + +librados::IoCtxImpl::IoCtxImpl() = default; + +librados::IoCtxImpl::IoCtxImpl(RadosClient *c, Objecter *objecter, + int64_t poolid, snapid_t s) + : client(c), poolid(poolid), snap_seq(s), + notify_timeout(c->cct->_conf->client_notify_timeout), + oloc(poolid), + aio_write_seq(0), objecter(objecter) +{ +} + +void librados::IoCtxImpl::set_snap_read(snapid_t s) +{ + if (!s) + s = CEPH_NOSNAP; + ldout(client->cct, 10) << "set snap read " << snap_seq << " -> " << s << dendl; + snap_seq = s; +} + +int librados::IoCtxImpl::set_snap_write_context(snapid_t seq, vector<snapid_t>& snaps) +{ + ::SnapContext n; + ldout(client->cct, 10) << "set snap write context: seq = " << seq + << " and snaps = " << snaps << dendl; + n.seq = seq; + n.snaps = snaps; + if (!n.is_valid()) + return -EINVAL; + snapc = n; + return 0; +} + +int librados::IoCtxImpl::get_object_hash_position( + const std::string& oid, uint32_t *hash_position) +{ + int64_t r = objecter->get_object_hash_position(poolid, oid, oloc.nspace); + if (r < 0) + return r; + *hash_position = (uint32_t)r; + return 0; +} + +int librados::IoCtxImpl::get_object_pg_hash_position( + const std::string& oid, uint32_t *pg_hash_position) +{ + int64_t r = objecter->get_object_pg_hash_position(poolid, oid, oloc.nspace); + if (r < 0) + return r; + *pg_hash_position = (uint32_t)r; + return 0; +} + +void librados::IoCtxImpl::queue_aio_write(AioCompletionImpl *c) +{ + get(); + std::scoped_lock l{aio_write_list_lock}; + ceph_assert(c->io == this); + c->aio_write_seq = ++aio_write_seq; + ldout(client->cct, 20) << "queue_aio_write " << this << " completion " << c + << " write_seq " << aio_write_seq << dendl; + aio_write_list.push_back(&c->aio_write_list_item); +} + +void librados::IoCtxImpl::complete_aio_write(AioCompletionImpl *c) +{ + ldout(client->cct, 20) << "complete_aio_write " << c << dendl; + aio_write_list_lock.lock(); + ceph_assert(c->io == this); + c->aio_write_list_item.remove_myself(); + + map<ceph_tid_t, std::list<AioCompletionImpl*> >::iterator waiters = aio_write_waiters.begin(); + while (waiters != aio_write_waiters.end()) { + if (!aio_write_list.empty() && + aio_write_list.front()->aio_write_seq <= waiters->first) { + ldout(client->cct, 20) << " next outstanding write is " << aio_write_list.front()->aio_write_seq + << " <= waiter " << waiters->first + << ", stopping" << dendl; + break; + } + ldout(client->cct, 20) << " waking waiters on seq " << waiters->first << dendl; + for (std::list<AioCompletionImpl*>::iterator it = waiters->second.begin(); + it != waiters->second.end(); ++it) { + boost::asio::defer(client->finish_strand, CB_AioCompleteAndSafe(*it)); + (*it)->put(); + } + aio_write_waiters.erase(waiters++); + } + + aio_write_cond.notify_all(); + aio_write_list_lock.unlock(); + put(); +} + +void librados::IoCtxImpl::flush_aio_writes_async(AioCompletionImpl *c) +{ + ldout(client->cct, 20) << "flush_aio_writes_async " << this + << " completion " << c << dendl; + std::lock_guard l(aio_write_list_lock); + ceph_tid_t seq = aio_write_seq; + if (aio_write_list.empty()) { + ldout(client->cct, 20) << "flush_aio_writes_async no writes. (tid " + << seq << ")" << dendl; + boost::asio::defer(client->finish_strand, CB_AioCompleteAndSafe(c)); + } else { + ldout(client->cct, 20) << "flush_aio_writes_async " << aio_write_list.size() + << " writes in flight; waiting on tid " << seq << dendl; + c->get(); + aio_write_waiters[seq].push_back(c); + } +} + +void librados::IoCtxImpl::flush_aio_writes() +{ + ldout(client->cct, 20) << "flush_aio_writes" << dendl; + std::unique_lock l{aio_write_list_lock}; + aio_write_cond.wait(l, [seq=aio_write_seq, this] { + return (aio_write_list.empty() || + aio_write_list.front()->aio_write_seq > seq); + }); +} + +string librados::IoCtxImpl::get_cached_pool_name() +{ + std::string pn; + client->pool_get_name(get_id(), &pn); + return pn; +} + +// SNAPS + +int librados::IoCtxImpl::snap_create(const char *snapName) +{ + int reply; + string sName(snapName); + + ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::snap_create::mylock"); + ceph::condition_variable cond; + bool done; + Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply); + objecter->create_pool_snap(poolid, sName, onfinish); + + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); + return reply; +} + +int librados::IoCtxImpl::selfmanaged_snap_create(uint64_t *psnapid) +{ + int reply; + + ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::selfmanaged_snap_create::mylock"); + ceph::condition_variable cond; + bool done; + Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply); + snapid_t snapid; + objecter->allocate_selfmanaged_snap(poolid, &snapid, onfinish); + + { + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); + } + if (reply == 0) + *psnapid = snapid; + return reply; +} + +void librados::IoCtxImpl::aio_selfmanaged_snap_create(uint64_t *snapid, + AioCompletionImpl *c) +{ + C_aio_selfmanaged_snap_create_Complete *onfinish = + new C_aio_selfmanaged_snap_create_Complete(client, c, snapid); + objecter->allocate_selfmanaged_snap(poolid, &onfinish->snapid, + onfinish); +} + +int librados::IoCtxImpl::snap_remove(const char *snapName) +{ + int reply; + string sName(snapName); + + ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::snap_remove::mylock"); + ceph::condition_variable cond; + bool done; + Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply); + objecter->delete_pool_snap(poolid, sName, onfinish); + unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); + return reply; +} + +int librados::IoCtxImpl::selfmanaged_snap_rollback_object(const object_t& oid, + ::SnapContext& snapc, + uint64_t snapid) +{ + int reply; + + ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::snap_rollback::mylock"); + ceph::condition_variable cond; + bool done; + Context *onack = new C_SafeCond(mylock, cond, &done, &reply); + + ::ObjectOperation op; + prepare_assert_ops(&op); + op.rollback(snapid); + objecter->mutate(oid, oloc, + op, snapc, ceph::real_clock::now(), + extra_op_flags, + onack, NULL); + + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); + return reply; +} + +int librados::IoCtxImpl::rollback(const object_t& oid, const char *snapName) +{ + snapid_t snap; + + int r = objecter->pool_snap_by_name(poolid, snapName, &snap); + if (r < 0) { + return r; + } + + return selfmanaged_snap_rollback_object(oid, snapc, snap); +} + +int librados::IoCtxImpl::selfmanaged_snap_remove(uint64_t snapid) +{ + int reply; + + ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::selfmanaged_snap_remove::mylock"); + ceph::condition_variable cond; + bool done; + objecter->delete_selfmanaged_snap(poolid, snapid_t(snapid), + new C_SafeCond(mylock, cond, &done, &reply)); + + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); + return (int)reply; +} + +void librados::IoCtxImpl::aio_selfmanaged_snap_remove(uint64_t snapid, + AioCompletionImpl *c) +{ + Context *onfinish = new C_aio_selfmanaged_snap_op_Complete(client, c); + objecter->delete_selfmanaged_snap(poolid, snapid, onfinish); +} + +int librados::IoCtxImpl::snap_list(vector<uint64_t> *snaps) +{ + return objecter->pool_snap_list(poolid, snaps); +} + +int librados::IoCtxImpl::snap_lookup(const char *name, uint64_t *snapid) +{ + return objecter->pool_snap_by_name(poolid, name, (snapid_t *)snapid); +} + +int librados::IoCtxImpl::snap_get_name(uint64_t snapid, std::string *s) +{ + pool_snap_info_t info; + int ret = objecter->pool_snap_get_info(poolid, snapid, &info); + if (ret < 0) { + return ret; + } + *s = info.name.c_str(); + return 0; +} + +int librados::IoCtxImpl::snap_get_stamp(uint64_t snapid, time_t *t) +{ + pool_snap_info_t info; + int ret = objecter->pool_snap_get_info(poolid, snapid, &info); + if (ret < 0) { + return ret; + } + *t = info.stamp.sec(); + return 0; +} + + +// IO + +int librados::IoCtxImpl::nlist(Objecter::NListContext *context, int max_entries) +{ + bool done; + int r = 0; + ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::nlist::mylock"); + ceph::condition_variable cond; + + if (context->at_end()) + return 0; + + context->max_entries = max_entries; + context->nspace = oloc.nspace; + + objecter->list_nobjects(context, new C_SafeCond(mylock, cond, &done, &r)); + + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); + return r; +} + +uint32_t librados::IoCtxImpl::nlist_seek(Objecter::NListContext *context, + uint32_t pos) +{ + context->list.clear(); + return objecter->list_nobjects_seek(context, pos); +} + +uint32_t librados::IoCtxImpl::nlist_seek(Objecter::NListContext *context, + const rados_object_list_cursor& cursor) +{ + context->list.clear(); + return objecter->list_nobjects_seek(context, *(const hobject_t *)cursor); +} + +rados_object_list_cursor librados::IoCtxImpl::nlist_get_cursor(Objecter::NListContext *context) +{ + hobject_t *c = new hobject_t; + + objecter->list_nobjects_get_cursor(context, c); + return (rados_object_list_cursor)c; +} + +int librados::IoCtxImpl::create(const object_t& oid, bool exclusive) +{ + ::ObjectOperation op; + prepare_assert_ops(&op); + op.create(exclusive); + return operate(oid, &op, NULL); +} + +/* + * add any version assert operations that are appropriate given the + * stat in the IoCtx, either the target version assert or any src + * object asserts. these affect a single ioctx operation, so clear + * the ioctx state when we're doing. + * + * return a pointer to the ObjectOperation if we added any events; + * this is convenient for passing the extra_ops argument into Objecter + * methods. + */ +::ObjectOperation *librados::IoCtxImpl::prepare_assert_ops(::ObjectOperation *op) +{ + ::ObjectOperation *pop = NULL; + if (assert_ver) { + op->assert_version(assert_ver); + assert_ver = 0; + pop = op; + } + return pop; +} + +int librados::IoCtxImpl::write(const object_t& oid, bufferlist& bl, + size_t len, uint64_t off) +{ + if (len > UINT_MAX/2) + return -E2BIG; + ::ObjectOperation op; + prepare_assert_ops(&op); + bufferlist mybl; + mybl.substr_of(bl, 0, len); + op.write(off, mybl); + return operate(oid, &op, NULL); +} + +int librados::IoCtxImpl::append(const object_t& oid, bufferlist& bl, size_t len) +{ + if (len > UINT_MAX/2) + return -E2BIG; + ::ObjectOperation op; + prepare_assert_ops(&op); + bufferlist mybl; + mybl.substr_of(bl, 0, len); + op.append(mybl); + return operate(oid, &op, NULL); +} + +int librados::IoCtxImpl::write_full(const object_t& oid, bufferlist& bl) +{ + if (bl.length() > UINT_MAX/2) + return -E2BIG; + ::ObjectOperation op; + prepare_assert_ops(&op); + op.write_full(bl); + return operate(oid, &op, NULL); +} + +int librados::IoCtxImpl::writesame(const object_t& oid, bufferlist& bl, + size_t write_len, uint64_t off) +{ + if ((bl.length() > UINT_MAX/2) || (write_len > UINT_MAX/2)) + return -E2BIG; + if ((bl.length() == 0) || (write_len % bl.length())) + return -EINVAL; + ::ObjectOperation op; + prepare_assert_ops(&op); + bufferlist mybl; + mybl.substr_of(bl, 0, bl.length()); + op.writesame(off, write_len, mybl); + return operate(oid, &op, NULL); +} + +int librados::IoCtxImpl::operate(const object_t& oid, ::ObjectOperation *o, + ceph::real_time *pmtime, int flags) +{ + ceph::real_time ut = (pmtime ? *pmtime : + ceph::real_clock::now()); + + /* can't write to a snapshot */ + if (snap_seq != CEPH_NOSNAP) + return -EROFS; + + if (!o->size()) + return 0; + + ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::operate::mylock"); + ceph::condition_variable cond; + bool done; + int r; + version_t ver; + + Context *oncommit = new C_SafeCond(mylock, cond, &done, &r); + + int op = o->ops[0].op.op; + ldout(client->cct, 10) << ceph_osd_op_name(op) << " oid=" << oid + << " nspace=" << oloc.nspace << dendl; + Objecter::Op *objecter_op = objecter->prepare_mutate_op( + oid, oloc, + *o, snapc, ut, + flags | extra_op_flags, + oncommit, &ver); + objecter->op_submit(objecter_op); + + { + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done;}); + } + ldout(client->cct, 10) << "Objecter returned from " + << ceph_osd_op_name(op) << " r=" << r << dendl; + + set_sync_op_version(ver); + + return r; +} + +int librados::IoCtxImpl::operate_read(const object_t& oid, + ::ObjectOperation *o, + bufferlist *pbl, + int flags) +{ + if (!o->size()) + return 0; + + ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::operate_read::mylock"); + ceph::condition_variable cond; + bool done; + int r; + version_t ver; + + Context *onack = new C_SafeCond(mylock, cond, &done, &r); + + int op = o->ops[0].op.op; + ldout(client->cct, 10) << ceph_osd_op_name(op) << " oid=" << oid << " nspace=" << oloc.nspace << dendl; + Objecter::Op *objecter_op = objecter->prepare_read_op( + oid, oloc, + *o, snap_seq, pbl, + flags | extra_op_flags, + onack, &ver); + objecter->op_submit(objecter_op); + + { + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); + } + ldout(client->cct, 10) << "Objecter returned from " + << ceph_osd_op_name(op) << " r=" << r << dendl; + + set_sync_op_version(ver); + + return r; +} + +int librados::IoCtxImpl::aio_operate_read(const object_t &oid, + ::ObjectOperation *o, + AioCompletionImpl *c, + int flags, + bufferlist *pbl, + const blkin_trace_info *trace_info) +{ + FUNCTRACE(client->cct); + Context *oncomplete = new C_aio_Complete(c); + +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + c->is_read = true; + c->io = this; + + ZTracer::Trace trace; + if (trace_info) { + ZTracer::Trace parent_trace("", nullptr, trace_info); + trace.init("rados operate read", &objecter->trace_endpoint, &parent_trace); + } + + trace.event("init root span"); + Objecter::Op *objecter_op = objecter->prepare_read_op( + oid, oloc, + *o, snap_seq, pbl, flags | extra_op_flags, + oncomplete, &c->objver, nullptr, 0, &trace); + objecter->op_submit(objecter_op, &c->tid); + trace.event("rados operate read submitted"); + + return 0; +} + +int librados::IoCtxImpl::aio_operate(const object_t& oid, + ::ObjectOperation *o, AioCompletionImpl *c, + const SnapContext& snap_context, int flags, + const blkin_trace_info *trace_info) +{ + FUNCTRACE(client->cct); + OID_EVENT_TRACE(oid.name.c_str(), "RADOS_WRITE_OP_BEGIN"); + auto ut = ceph::real_clock::now(); + /* can't write to a snapshot */ + if (snap_seq != CEPH_NOSNAP) + return -EROFS; + + Context *oncomplete = new C_aio_Complete(c); +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + + c->io = this; + queue_aio_write(c); + + ZTracer::Trace trace; + if (trace_info) { + ZTracer::Trace parent_trace("", nullptr, trace_info); + trace.init("rados operate", &objecter->trace_endpoint, &parent_trace); + } + + trace.event("init root span"); + Objecter::Op *op = objecter->prepare_mutate_op( + oid, oloc, *o, snap_context, ut, flags | extra_op_flags, + oncomplete, &c->objver, osd_reqid_t(), &trace); + objecter->op_submit(op, &c->tid); + trace.event("rados operate op submitted"); + + return 0; +} + +int librados::IoCtxImpl::aio_read(const object_t oid, AioCompletionImpl *c, + bufferlist *pbl, size_t len, uint64_t off, + uint64_t snapid, const blkin_trace_info *info) +{ + FUNCTRACE(client->cct); + if (len > (size_t) INT_MAX) + return -EDOM; + + OID_EVENT_TRACE(oid.name.c_str(), "RADOS_READ_OP_BEGIN"); + Context *oncomplete = new C_aio_Complete(c); + +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + c->is_read = true; + c->io = this; + c->blp = pbl; + + ZTracer::Trace trace; + if (info) + trace.init("rados read", &objecter->trace_endpoint, info); + + Objecter::Op *o = objecter->prepare_read_op( + oid, oloc, + off, len, snapid, pbl, extra_op_flags, + oncomplete, &c->objver, nullptr, 0, &trace); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::aio_read(const object_t oid, AioCompletionImpl *c, + char *buf, size_t len, uint64_t off, + uint64_t snapid, const blkin_trace_info *info) +{ + FUNCTRACE(client->cct); + if (len > (size_t) INT_MAX) + return -EDOM; + + OID_EVENT_TRACE(oid.name.c_str(), "RADOS_READ_OP_BEGIN"); + Context *oncomplete = new C_aio_Complete(c); + +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + c->is_read = true; + c->io = this; + c->bl.clear(); + c->bl.push_back(buffer::create_static(len, buf)); + c->blp = &c->bl; + c->out_buf = buf; + + ZTracer::Trace trace; + if (info) + trace.init("rados read", &objecter->trace_endpoint, info); + + Objecter::Op *o = objecter->prepare_read_op( + oid, oloc, + off, len, snapid, &c->bl, extra_op_flags, + oncomplete, &c->objver, nullptr, 0, &trace); + objecter->op_submit(o, &c->tid); + return 0; +} + +class C_ObjectOperation : public Context { +public: + ::ObjectOperation m_ops; + explicit C_ObjectOperation(Context *c) : m_ctx(c) {} + void finish(int r) override { + m_ctx->complete(r); + } +private: + Context *m_ctx; +}; + +int librados::IoCtxImpl::aio_sparse_read(const object_t oid, + AioCompletionImpl *c, + std::map<uint64_t,uint64_t> *m, + bufferlist *data_bl, size_t len, + uint64_t off, uint64_t snapid) +{ + FUNCTRACE(client->cct); + if (len > (size_t) INT_MAX) + return -EDOM; + + Context *nested = new C_aio_Complete(c); + C_ObjectOperation *onack = new C_ObjectOperation(nested); + +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) nested)->oid = oid; +#endif + c->is_read = true; + c->io = this; + + onack->m_ops.sparse_read(off, len, m, data_bl, NULL); + + Objecter::Op *o = objecter->prepare_read_op( + oid, oloc, + onack->m_ops, snapid, NULL, extra_op_flags, + onack, &c->objver); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::aio_cmpext(const object_t& oid, + AioCompletionImpl *c, + uint64_t off, + bufferlist& cmp_bl) +{ + if (cmp_bl.length() > UINT_MAX/2) + return -E2BIG; + + Context *onack = new C_aio_Complete(c); + + c->is_read = true; + c->io = this; + + Objecter::Op *o = objecter->prepare_cmpext_op( + oid, oloc, off, cmp_bl, snap_seq, extra_op_flags, + onack, &c->objver); + objecter->op_submit(o, &c->tid); + + return 0; +} + +/* use m_ops.cmpext() + prepare_read_op() for non-bufferlist C API */ +int librados::IoCtxImpl::aio_cmpext(const object_t& oid, + AioCompletionImpl *c, + const char *cmp_buf, + size_t cmp_len, + uint64_t off) +{ + if (cmp_len > UINT_MAX/2) + return -E2BIG; + + bufferlist cmp_bl; + cmp_bl.append(cmp_buf, cmp_len); + + Context *nested = new C_aio_Complete(c); + C_ObjectOperation *onack = new C_ObjectOperation(nested); + + c->is_read = true; + c->io = this; + + onack->m_ops.cmpext(off, cmp_len, cmp_buf, NULL); + + Objecter::Op *o = objecter->prepare_read_op( + oid, oloc, onack->m_ops, snap_seq, NULL, extra_op_flags, onack, &c->objver); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::aio_write(const object_t &oid, AioCompletionImpl *c, + const bufferlist& bl, size_t len, + uint64_t off, const blkin_trace_info *info) +{ + FUNCTRACE(client->cct); + auto ut = ceph::real_clock::now(); + ldout(client->cct, 20) << "aio_write " << oid << " " << off << "~" << len << " snapc=" << snapc << " snap_seq=" << snap_seq << dendl; + OID_EVENT_TRACE(oid.name.c_str(), "RADOS_WRITE_OP_BEGIN"); + + if (len > UINT_MAX/2) + return -E2BIG; + /* can't write to a snapshot */ + if (snap_seq != CEPH_NOSNAP) + return -EROFS; + + Context *oncomplete = new C_aio_Complete(c); + +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + ZTracer::Trace trace; + if (info) + trace.init("rados write", &objecter->trace_endpoint, info); + + c->io = this; + queue_aio_write(c); + + Objecter::Op *o = objecter->prepare_write_op( + oid, oloc, + off, len, snapc, bl, ut, extra_op_flags, + oncomplete, &c->objver, nullptr, 0, &trace); + objecter->op_submit(o, &c->tid); + + return 0; +} + +int librados::IoCtxImpl::aio_append(const object_t &oid, AioCompletionImpl *c, + const bufferlist& bl, size_t len) +{ + FUNCTRACE(client->cct); + auto ut = ceph::real_clock::now(); + + if (len > UINT_MAX/2) + return -E2BIG; + /* can't write to a snapshot */ + if (snap_seq != CEPH_NOSNAP) + return -EROFS; + + Context *oncomplete = new C_aio_Complete(c); +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + + c->io = this; + queue_aio_write(c); + + Objecter::Op *o = objecter->prepare_append_op( + oid, oloc, + len, snapc, bl, ut, extra_op_flags, + oncomplete, &c->objver); + objecter->op_submit(o, &c->tid); + + return 0; +} + +int librados::IoCtxImpl::aio_write_full(const object_t &oid, + AioCompletionImpl *c, + const bufferlist& bl) +{ + FUNCTRACE(client->cct); + auto ut = ceph::real_clock::now(); + + if (bl.length() > UINT_MAX/2) + return -E2BIG; + /* can't write to a snapshot */ + if (snap_seq != CEPH_NOSNAP) + return -EROFS; + + Context *oncomplete = new C_aio_Complete(c); +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + + c->io = this; + queue_aio_write(c); + + Objecter::Op *o = objecter->prepare_write_full_op( + oid, oloc, + snapc, bl, ut, extra_op_flags, + oncomplete, &c->objver); + objecter->op_submit(o, &c->tid); + + return 0; +} + +int librados::IoCtxImpl::aio_writesame(const object_t &oid, + AioCompletionImpl *c, + const bufferlist& bl, + size_t write_len, + uint64_t off) +{ + FUNCTRACE(client->cct); + auto ut = ceph::real_clock::now(); + + if ((bl.length() > UINT_MAX/2) || (write_len > UINT_MAX/2)) + return -E2BIG; + if ((bl.length() == 0) || (write_len % bl.length())) + return -EINVAL; + /* can't write to a snapshot */ + if (snap_seq != CEPH_NOSNAP) + return -EROFS; + + Context *oncomplete = new C_aio_Complete(c); + +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + c->io = this; + queue_aio_write(c); + + Objecter::Op *o = objecter->prepare_writesame_op( + oid, oloc, + write_len, off, + snapc, bl, ut, extra_op_flags, + oncomplete, &c->objver); + objecter->op_submit(o, &c->tid); + + return 0; +} + +int librados::IoCtxImpl::aio_remove(const object_t &oid, AioCompletionImpl *c, int flags) +{ + FUNCTRACE(client->cct); + auto ut = ceph::real_clock::now(); + + /* can't write to a snapshot */ + if (snap_seq != CEPH_NOSNAP) + return -EROFS; + + Context *oncomplete = new C_aio_Complete(c); + +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + c->io = this; + queue_aio_write(c); + + Objecter::Op *o = objecter->prepare_remove_op( + oid, oloc, + snapc, ut, flags | extra_op_flags, + oncomplete, &c->objver); + objecter->op_submit(o, &c->tid); + + return 0; +} + + +int librados::IoCtxImpl::aio_stat(const object_t& oid, AioCompletionImpl *c, + uint64_t *psize, time_t *pmtime) +{ + C_aio_stat_Ack *onack = new C_aio_stat_Ack(c, pmtime); + c->is_read = true; + c->io = this; + Objecter::Op *o = objecter->prepare_stat_op( + oid, oloc, + snap_seq, psize, &onack->mtime, extra_op_flags, + onack, &c->objver); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::aio_stat2(const object_t& oid, AioCompletionImpl *c, + uint64_t *psize, struct timespec *pts) +{ + C_aio_stat2_Ack *onack = new C_aio_stat2_Ack(c, pts); + c->is_read = true; + c->io = this; + Objecter::Op *o = objecter->prepare_stat_op( + oid, oloc, + snap_seq, psize, &onack->mtime, extra_op_flags, + onack, &c->objver); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::aio_getxattr(const object_t& oid, AioCompletionImpl *c, + const char *name, bufferlist& bl) +{ + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.getxattr(name, &bl, NULL); + int r = aio_operate_read(oid, &rd, c, 0, &bl); + return r; +} + +int librados::IoCtxImpl::aio_rmxattr(const object_t& oid, AioCompletionImpl *c, + const char *name) +{ + ::ObjectOperation op; + prepare_assert_ops(&op); + op.rmxattr(name); + return aio_operate(oid, &op, c, snapc, 0); +} + +int librados::IoCtxImpl::aio_setxattr(const object_t& oid, AioCompletionImpl *c, + const char *name, bufferlist& bl) +{ + ::ObjectOperation op; + prepare_assert_ops(&op); + op.setxattr(name, bl); + return aio_operate(oid, &op, c, snapc, 0); +} + +namespace { +struct AioGetxattrsData { + AioGetxattrsData(librados::AioCompletionImpl *c, map<string, bufferlist>* attrset, + librados::RadosClient *_client) : + user_completion(c), user_attrset(attrset), client(_client) {} + struct librados::CB_AioCompleteAndSafe user_completion; + map<string, bufferlist> result_attrset; + map<std::string, bufferlist>* user_attrset; + librados::RadosClient *client; +}; +} + +static void aio_getxattrs_complete(rados_completion_t c, void *arg) { + AioGetxattrsData *cdata = reinterpret_cast<AioGetxattrsData*>(arg); + int rc = rados_aio_get_return_value(c); + cdata->user_attrset->clear(); + if (rc >= 0) { + for (map<string,bufferlist>::iterator p = cdata->result_attrset.begin(); + p != cdata->result_attrset.end(); + ++p) { + ldout(cdata->client->cct, 10) << "IoCtxImpl::getxattrs: xattr=" << p->first << dendl; + (*cdata->user_attrset)[p->first] = p->second; + } + } + cdata->user_completion(rc); + ((librados::AioCompletionImpl*)c)->put(); + delete cdata; +} + +int librados::IoCtxImpl::aio_getxattrs(const object_t& oid, AioCompletionImpl *c, + map<std::string, bufferlist>& attrset) +{ + AioGetxattrsData *cdata = new AioGetxattrsData(c, &attrset, client); + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.getxattrs(&cdata->result_attrset, NULL); + librados::AioCompletionImpl *comp = new librados::AioCompletionImpl; + comp->set_complete_callback(cdata, aio_getxattrs_complete); + return aio_operate_read(oid, &rd, comp, 0, NULL); +} + +int librados::IoCtxImpl::aio_cancel(AioCompletionImpl *c) +{ + return objecter->op_cancel(c->tid, -ECANCELED); +} + + +int librados::IoCtxImpl::hit_set_list(uint32_t hash, AioCompletionImpl *c, + std::list< std::pair<time_t, time_t> > *pls) +{ + Context *oncomplete = new C_aio_Complete(c); + c->is_read = true; + c->io = this; + + ::ObjectOperation rd; + rd.hit_set_ls(pls, NULL); + object_locator_t oloc(poolid); + Objecter::Op *o = objecter->prepare_pg_read_op( + hash, oloc, rd, NULL, extra_op_flags, oncomplete, NULL, NULL); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::hit_set_get(uint32_t hash, AioCompletionImpl *c, + time_t stamp, + bufferlist *pbl) +{ + Context *oncomplete = new C_aio_Complete(c); + c->is_read = true; + c->io = this; + + ::ObjectOperation rd; + rd.hit_set_get(ceph::real_clock::from_time_t(stamp), pbl, 0); + object_locator_t oloc(poolid); + Objecter::Op *o = objecter->prepare_pg_read_op( + hash, oloc, rd, NULL, extra_op_flags, oncomplete, NULL, NULL); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::remove(const object_t& oid) +{ + ::ObjectOperation op; + prepare_assert_ops(&op); + op.remove(); + return operate(oid, &op, nullptr, librados::OPERATION_FULL_FORCE); +} + +int librados::IoCtxImpl::remove(const object_t& oid, int flags) +{ + ::ObjectOperation op; + prepare_assert_ops(&op); + op.remove(); + return operate(oid, &op, NULL, flags); +} + +int librados::IoCtxImpl::trunc(const object_t& oid, uint64_t size) +{ + ::ObjectOperation op; + prepare_assert_ops(&op); + op.truncate(size); + return operate(oid, &op, NULL); +} + +int librados::IoCtxImpl::get_inconsistent_objects(const pg_t& pg, + const librados::object_id_t& start_after, + uint64_t max_to_get, + AioCompletionImpl *c, + std::vector<inconsistent_obj_t>* objects, + uint32_t* interval) +{ + Context *oncomplete = new C_aio_Complete(c); + c->is_read = true; + c->io = this; + + ::ObjectOperation op; + op.scrub_ls(start_after, max_to_get, objects, interval, &c->rval); + object_locator_t oloc{poolid, pg.ps()}; + Objecter::Op *o = objecter->prepare_pg_read_op( + oloc.hash, oloc, op, nullptr, CEPH_OSD_FLAG_PGOP | extra_op_flags, oncomplete, + nullptr, nullptr); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::get_inconsistent_snapsets(const pg_t& pg, + const librados::object_id_t& start_after, + uint64_t max_to_get, + AioCompletionImpl *c, + std::vector<inconsistent_snapset_t>* snapsets, + uint32_t* interval) +{ + Context *oncomplete = new C_aio_Complete(c); + c->is_read = true; + c->io = this; + + ::ObjectOperation op; + op.scrub_ls(start_after, max_to_get, snapsets, interval, &c->rval); + object_locator_t oloc{poolid, pg.ps()}; + Objecter::Op *o = objecter->prepare_pg_read_op( + oloc.hash, oloc, op, nullptr, CEPH_OSD_FLAG_PGOP | extra_op_flags, oncomplete, + nullptr, nullptr); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::tmap_update(const object_t& oid, bufferlist& cmdbl) +{ + ::ObjectOperation wr; + prepare_assert_ops(&wr); + wr.tmap_update(cmdbl); + return operate(oid, &wr, NULL); +} + +int librados::IoCtxImpl::exec(const object_t& oid, + const char *cls, const char *method, + bufferlist& inbl, bufferlist& outbl) +{ + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.call(cls, method, inbl); + return operate_read(oid, &rd, &outbl); +} + +int librados::IoCtxImpl::aio_exec(const object_t& oid, AioCompletionImpl *c, + const char *cls, const char *method, + bufferlist& inbl, bufferlist *outbl) +{ + FUNCTRACE(client->cct); + Context *oncomplete = new C_aio_Complete(c); + +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + c->is_read = true; + c->io = this; + + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.call(cls, method, inbl); + Objecter::Op *o = objecter->prepare_read_op( + oid, oloc, rd, snap_seq, outbl, extra_op_flags, oncomplete, &c->objver); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::aio_exec(const object_t& oid, AioCompletionImpl *c, + const char *cls, const char *method, + bufferlist& inbl, char *buf, size_t out_len) +{ + FUNCTRACE(client->cct); + Context *oncomplete = new C_aio_Complete(c); + +#if defined(WITH_EVENTTRACE) + ((C_aio_Complete *) oncomplete)->oid = oid; +#endif + c->is_read = true; + c->io = this; + c->bl.clear(); + c->bl.push_back(buffer::create_static(out_len, buf)); + c->blp = &c->bl; + c->out_buf = buf; + + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.call(cls, method, inbl); + Objecter::Op *o = objecter->prepare_read_op( + oid, oloc, rd, snap_seq, &c->bl, extra_op_flags, oncomplete, &c->objver); + objecter->op_submit(o, &c->tid); + return 0; +} + +int librados::IoCtxImpl::read(const object_t& oid, + bufferlist& bl, size_t len, uint64_t off) +{ + if (len > (size_t) INT_MAX) + return -EDOM; + OID_EVENT_TRACE(oid.name.c_str(), "RADOS_READ_OP_BEGIN"); + + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.read(off, len, &bl, NULL, NULL); + int r = operate_read(oid, &rd, &bl); + if (r < 0) + return r; + + if (bl.length() < len) { + ldout(client->cct, 10) << "Returned length " << bl.length() + << " less than original length "<< len << dendl; + } + + return bl.length(); +} + +int librados::IoCtxImpl::cmpext(const object_t& oid, uint64_t off, + bufferlist& cmp_bl) +{ + if (cmp_bl.length() > UINT_MAX/2) + return -E2BIG; + + ::ObjectOperation op; + prepare_assert_ops(&op); + op.cmpext(off, cmp_bl, NULL); + return operate_read(oid, &op, NULL); +} + +int librados::IoCtxImpl::mapext(const object_t& oid, + uint64_t off, size_t len, + std::map<uint64_t,uint64_t>& m) +{ + bufferlist bl; + + ceph::mutex mylock = ceph::make_mutex("IoCtxImpl::read::mylock"); + ceph::condition_variable cond; + bool done; + int r; + Context *onack = new C_SafeCond(mylock, cond, &done, &r); + + objecter->mapext(oid, oloc, + off, len, snap_seq, &bl, extra_op_flags, + onack); + + { + unique_lock l{mylock}; + cond.wait(l, [&done] { return done;}); + } + ldout(client->cct, 10) << "Objecter returned from read r=" << r << dendl; + + if (r < 0) + return r; + + auto iter = bl.cbegin(); + decode(m, iter); + + return m.size(); +} + +int librados::IoCtxImpl::sparse_read(const object_t& oid, + std::map<uint64_t,uint64_t>& m, + bufferlist& data_bl, size_t len, + uint64_t off) +{ + if (len > (size_t) INT_MAX) + return -EDOM; + + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.sparse_read(off, len, &m, &data_bl, NULL); + + int r = operate_read(oid, &rd, NULL); + if (r < 0) + return r; + + return m.size(); +} + +int librados::IoCtxImpl::checksum(const object_t& oid, uint8_t type, + const bufferlist &init_value, size_t len, + uint64_t off, size_t chunk_size, + bufferlist *pbl) +{ + if (len > (size_t) INT_MAX) { + return -EDOM; + } + + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.checksum(type, init_value, off, len, chunk_size, pbl, nullptr, nullptr); + + int r = operate_read(oid, &rd, nullptr); + if (r < 0) { + return r; + } + + return 0; +} + +int librados::IoCtxImpl::stat(const object_t& oid, uint64_t *psize, time_t *pmtime) +{ + uint64_t size; + real_time mtime; + + if (!psize) + psize = &size; + + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.stat(psize, &mtime, nullptr); + int r = operate_read(oid, &rd, NULL); + + if (r >= 0 && pmtime) { + *pmtime = real_clock::to_time_t(mtime); + } + + return r; +} + +int librados::IoCtxImpl::stat2(const object_t& oid, uint64_t *psize, struct timespec *pts) +{ + uint64_t size; + ceph::real_time mtime; + + if (!psize) + psize = &size; + + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.stat(psize, &mtime, nullptr); + int r = operate_read(oid, &rd, NULL); + if (r < 0) { + return r; + } + + if (pts) { + *pts = ceph::real_clock::to_timespec(mtime); + } + + return 0; +} + +int librados::IoCtxImpl::getxattr(const object_t& oid, + const char *name, bufferlist& bl) +{ + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.getxattr(name, &bl, NULL); + int r = operate_read(oid, &rd, &bl); + if (r < 0) + return r; + + return bl.length(); +} + +int librados::IoCtxImpl::rmxattr(const object_t& oid, const char *name) +{ + ::ObjectOperation op; + prepare_assert_ops(&op); + op.rmxattr(name); + return operate(oid, &op, NULL); +} + +int librados::IoCtxImpl::setxattr(const object_t& oid, + const char *name, bufferlist& bl) +{ + ::ObjectOperation op; + prepare_assert_ops(&op); + op.setxattr(name, bl); + return operate(oid, &op, NULL); +} + +int librados::IoCtxImpl::getxattrs(const object_t& oid, + map<std::string, bufferlist>& attrset) +{ + map<string, bufferlist> aset; + + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.getxattrs(&aset, NULL); + int r = operate_read(oid, &rd, NULL); + + attrset.clear(); + if (r >= 0) { + for (map<string,bufferlist>::iterator p = aset.begin(); p != aset.end(); ++p) { + ldout(client->cct, 10) << "IoCtxImpl::getxattrs: xattr=" << p->first << dendl; + attrset[p->first.c_str()] = p->second; + } + } + + return r; +} + +void librados::IoCtxImpl::set_sync_op_version(version_t ver) +{ + ANNOTATE_BENIGN_RACE_SIZED(&last_objver, sizeof(last_objver), + "IoCtxImpl last_objver"); + last_objver = ver; +} + +namespace librados { +void intrusive_ptr_add_ref(IoCtxImpl *p) { p->get(); } +void intrusive_ptr_release(IoCtxImpl *p) { p->put(); } +} + +struct WatchInfo { + boost::intrusive_ptr<librados::IoCtxImpl> ioctx; + object_t oid; + librados::WatchCtx *ctx; + librados::WatchCtx2 *ctx2; + + WatchInfo(librados::IoCtxImpl *io, object_t o, + librados::WatchCtx *c, librados::WatchCtx2 *c2) + : ioctx(io), oid(o), ctx(c), ctx2(c2) {} + + void handle_notify(uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist& bl) { + ldout(ioctx->client->cct, 10) << __func__ << " " << notify_id + << " cookie " << cookie + << " notifier_id " << notifier_id + << " len " << bl.length() + << dendl; + + if (ctx2) + ctx2->handle_notify(notify_id, cookie, notifier_id, bl); + if (ctx) { + ctx->notify(0, 0, bl); + + // send ACK back to OSD if using legacy protocol + bufferlist empty; + ioctx->notify_ack(oid, notify_id, cookie, empty); + } + } + void handle_error(uint64_t cookie, int err) { + ldout(ioctx->client->cct, 10) << __func__ << " cookie " << cookie + << " err " << err + << dendl; + if (ctx2) + ctx2->handle_error(cookie, err); + } + + void operator()(bs::error_code ec, + uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_id, + bufferlist&& bl) { + if (ec) { + handle_error(cookie, ceph::from_error_code(ec)); + } else { + handle_notify(notify_id, cookie, notifier_id, bl); + } + } +}; + +// internal WatchInfo that owns the context memory +struct InternalWatchInfo : public WatchInfo { + std::unique_ptr<librados::WatchCtx> ctx; + std::unique_ptr<librados::WatchCtx2> ctx2; + + InternalWatchInfo(librados::IoCtxImpl *io, object_t o, + librados::WatchCtx *c, librados::WatchCtx2 *c2) + : WatchInfo(io, o, c, c2), ctx(c), ctx2(c2) {} +}; + +int librados::IoCtxImpl::watch(const object_t& oid, uint64_t *handle, + librados::WatchCtx *ctx, + librados::WatchCtx2 *ctx2, + bool internal) +{ + return watch(oid, handle, ctx, ctx2, 0, internal); +} + +int librados::IoCtxImpl::watch(const object_t& oid, uint64_t *handle, + librados::WatchCtx *ctx, + librados::WatchCtx2 *ctx2, + uint32_t timeout, + bool internal) +{ + ::ObjectOperation wr; + version_t objver; + C_SaferCond onfinish; + + Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc, + extra_op_flags); + *handle = linger_op->get_cookie(); + if (internal) { + linger_op->handle = InternalWatchInfo(this, oid, ctx, ctx2); + } else { + linger_op->handle = WatchInfo(this, oid, ctx, ctx2); + } + prepare_assert_ops(&wr); + wr.watch(*handle, CEPH_OSD_WATCH_OP_WATCH, timeout); + bufferlist bl; + objecter->linger_watch(linger_op, wr, + snapc, ceph::real_clock::now(), bl, + &onfinish, + &objver); + + int r = onfinish.wait(); + + set_sync_op_version(objver); + + if (r < 0) { + objecter->linger_cancel(linger_op); + *handle = 0; + } + + return r; +} + +int librados::IoCtxImpl::aio_watch(const object_t& oid, + AioCompletionImpl *c, + uint64_t *handle, + librados::WatchCtx *ctx, + librados::WatchCtx2 *ctx2, + bool internal) { + return aio_watch(oid, c, handle, ctx, ctx2, 0, internal); +} + +int librados::IoCtxImpl::aio_watch(const object_t& oid, + AioCompletionImpl *c, + uint64_t *handle, + librados::WatchCtx *ctx, + librados::WatchCtx2 *ctx2, + uint32_t timeout, + bool internal) +{ + Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc, + extra_op_flags); + c->io = this; + Context *oncomplete = new C_aio_linger_Complete(c, linger_op, false); + + ::ObjectOperation wr; + *handle = linger_op->get_cookie(); + if (internal) { + linger_op->handle = InternalWatchInfo(this, oid, ctx, ctx2); + } else { + linger_op->handle = WatchInfo(this, oid, ctx, ctx2); + } + + prepare_assert_ops(&wr); + wr.watch(*handle, CEPH_OSD_WATCH_OP_WATCH, timeout); + bufferlist bl; + objecter->linger_watch(linger_op, wr, + snapc, ceph::real_clock::now(), bl, + oncomplete, &c->objver); + + return 0; +} + + +int librados::IoCtxImpl::notify_ack( + const object_t& oid, + uint64_t notify_id, + uint64_t cookie, + bufferlist& bl) +{ + ::ObjectOperation rd; + prepare_assert_ops(&rd); + rd.notify_ack(notify_id, cookie, bl); + objecter->read(oid, oloc, rd, snap_seq, (bufferlist*)NULL, extra_op_flags, 0, 0); + return 0; +} + +int librados::IoCtxImpl::watch_check(uint64_t cookie) +{ + auto linger_op = reinterpret_cast<Objecter::LingerOp*>(cookie); + auto r = objecter->linger_check(linger_op); + if (r) + return 1 + std::chrono::duration_cast< + std::chrono::milliseconds>(*r).count(); + else + return ceph::from_error_code(r.error()); +} + +int librados::IoCtxImpl::unwatch(uint64_t cookie) +{ + Objecter::LingerOp *linger_op = reinterpret_cast<Objecter::LingerOp*>(cookie); + C_SaferCond onfinish; + version_t ver = 0; + + ::ObjectOperation wr; + prepare_assert_ops(&wr); + wr.watch(cookie, CEPH_OSD_WATCH_OP_UNWATCH); + objecter->mutate(linger_op->target.base_oid, oloc, wr, + snapc, ceph::real_clock::now(), extra_op_flags, + &onfinish, &ver); + objecter->linger_cancel(linger_op); + + int r = onfinish.wait(); + set_sync_op_version(ver); + return r; +} + +int librados::IoCtxImpl::aio_unwatch(uint64_t cookie, AioCompletionImpl *c) +{ + c->io = this; + Objecter::LingerOp *linger_op = reinterpret_cast<Objecter::LingerOp*>(cookie); + Context *oncomplete = new C_aio_linger_Complete(c, linger_op, true); + + ::ObjectOperation wr; + prepare_assert_ops(&wr); + wr.watch(cookie, CEPH_OSD_WATCH_OP_UNWATCH); + objecter->mutate(linger_op->target.base_oid, oloc, wr, + snapc, ceph::real_clock::now(), extra_op_flags, + oncomplete, &c->objver); + return 0; +} + +int librados::IoCtxImpl::notify(const object_t& oid, bufferlist& bl, + uint64_t timeout_ms, + bufferlist *preply_bl, + char **preply_buf, size_t *preply_buf_len) +{ + Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc, + extra_op_flags); + + C_SaferCond notify_finish_cond; + linger_op->on_notify_finish = + Objecter::LingerOp::OpComp::create( + objecter->service.get_executor(), + CB_notify_Finish(client->cct, ¬ify_finish_cond, + objecter, linger_op, preply_bl, + preply_buf, preply_buf_len)); + uint32_t timeout = notify_timeout; + if (timeout_ms) + timeout = timeout_ms / 1000; + + // Construct RADOS op + ::ObjectOperation rd; + prepare_assert_ops(&rd); + bufferlist inbl; + rd.notify(linger_op->get_cookie(), 1, timeout, bl, &inbl); + + // Issue RADOS op + C_SaferCond onack; + version_t objver; + objecter->linger_notify(linger_op, + rd, snap_seq, inbl, NULL, + &onack, &objver); + + ldout(client->cct, 10) << __func__ << " issued linger op " << linger_op << dendl; + int r = onack.wait(); + ldout(client->cct, 10) << __func__ << " linger op " << linger_op + << " acked (" << r << ")" << dendl; + + if (r == 0) { + ldout(client->cct, 10) << __func__ << " waiting for watch_notify finish " + << linger_op << dendl; + r = notify_finish_cond.wait(); + + } else { + ldout(client->cct, 10) << __func__ << " failed to initiate notify, r = " + << r << dendl; + notify_finish_cond.wait(); + } + + objecter->linger_cancel(linger_op); + + set_sync_op_version(objver); + return r; +} + +int librados::IoCtxImpl::aio_notify(const object_t& oid, AioCompletionImpl *c, + bufferlist& bl, uint64_t timeout_ms, + bufferlist *preply_bl, char **preply_buf, + size_t *preply_buf_len) +{ + Objecter::LingerOp *linger_op = objecter->linger_register(oid, oloc, + extra_op_flags); + + c->io = this; + + C_aio_notify_Complete *oncomplete = new C_aio_notify_Complete(c, linger_op); + linger_op->on_notify_finish = + Objecter::LingerOp::OpComp::create( + objecter->service.get_executor(), + CB_notify_Finish(client->cct, oncomplete, + objecter, linger_op, + preply_bl, preply_buf, + preply_buf_len)); + Context *onack = new C_aio_notify_Ack(client->cct, oncomplete); + + uint32_t timeout = notify_timeout; + if (timeout_ms) + timeout = timeout_ms / 1000; + + // Construct RADOS op + ::ObjectOperation rd; + prepare_assert_ops(&rd); + bufferlist inbl; + rd.notify(linger_op->get_cookie(), 1, timeout, bl, &inbl); + + // Issue RADOS op + objecter->linger_notify(linger_op, + rd, snap_seq, inbl, NULL, + onack, &c->objver); + return 0; +} + +int librados::IoCtxImpl::set_alloc_hint(const object_t& oid, + uint64_t expected_object_size, + uint64_t expected_write_size, + uint32_t flags) +{ + ::ObjectOperation wr; + prepare_assert_ops(&wr); + wr.set_alloc_hint(expected_object_size, expected_write_size, flags); + return operate(oid, &wr, NULL); +} + +version_t librados::IoCtxImpl::last_version() +{ + return last_objver; +} + +void librados::IoCtxImpl::set_assert_version(uint64_t ver) +{ + assert_ver = ver; +} + +void librados::IoCtxImpl::set_notify_timeout(uint32_t timeout) +{ + notify_timeout = timeout; +} + +int librados::IoCtxImpl::cache_pin(const object_t& oid) +{ + ::ObjectOperation wr; + prepare_assert_ops(&wr); + wr.cache_pin(); + return operate(oid, &wr, NULL); +} + +int librados::IoCtxImpl::cache_unpin(const object_t& oid) +{ + ::ObjectOperation wr; + prepare_assert_ops(&wr); + wr.cache_unpin(); + return operate(oid, &wr, NULL); +} + + +///////////////////////////// C_aio_stat_Ack //////////////////////////// + +librados::IoCtxImpl::C_aio_stat_Ack::C_aio_stat_Ack(AioCompletionImpl *_c, + time_t *pm) + : c(_c), pmtime(pm) +{ + ceph_assert(!c->io); + c->get(); +} + +void librados::IoCtxImpl::C_aio_stat_Ack::finish(int r) +{ + c->lock.lock(); + c->rval = r; + c->complete = true; + c->cond.notify_all(); + + if (r >= 0 && pmtime) { + *pmtime = real_clock::to_time_t(mtime); + } + + if (c->callback_complete) { + boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c)); + } + + c->put_unlock(); +} + +///////////////////////////// C_aio_stat2_Ack //////////////////////////// + +librados::IoCtxImpl::C_aio_stat2_Ack::C_aio_stat2_Ack(AioCompletionImpl *_c, + struct timespec *pt) + : c(_c), pts(pt) +{ + ceph_assert(!c->io); + c->get(); +} + +void librados::IoCtxImpl::C_aio_stat2_Ack::finish(int r) +{ + c->lock.lock(); + c->rval = r; + c->complete = true; + c->cond.notify_all(); + + if (r >= 0 && pts) { + *pts = real_clock::to_timespec(mtime); + } + + if (c->callback_complete) { + boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c)); + } + + c->put_unlock(); +} + +//////////////////////////// C_aio_Complete //////////////////////////////// + +librados::IoCtxImpl::C_aio_Complete::C_aio_Complete(AioCompletionImpl *_c) + : c(_c) +{ + c->get(); +} + +void librados::IoCtxImpl::C_aio_Complete::finish(int r) +{ + c->lock.lock(); + // Leave an existing rval unless r != 0 + if (r) + c->rval = r; // This clears the error set in C_ObjectOperation_scrub_ls::finish() + c->complete = true; + c->cond.notify_all(); + + if (r == 0 && c->blp && c->blp->length() > 0) { + if (c->out_buf && !c->blp->is_contiguous()) { + c->rval = -ERANGE; + } else { + if (c->out_buf && !c->blp->is_provided_buffer(c->out_buf)) + c->blp->begin().copy(c->blp->length(), c->out_buf); + + c->rval = c->blp->length(); + } + } + + if (c->callback_complete || + c->callback_safe) { + boost::asio::defer(c->io->client->finish_strand, CB_AioComplete(c)); + } + + if (c->aio_write_seq) { + c->io->complete_aio_write(c); + } + +#if defined(WITH_EVENTTRACE) + OID_EVENT_TRACE(oid.name.c_str(), "RADOS_OP_COMPLETE"); +#endif + c->put_unlock(); +} + +void librados::IoCtxImpl::object_list_slice( + const hobject_t start, + const hobject_t finish, + const size_t n, + const size_t m, + hobject_t *split_start, + hobject_t *split_finish) +{ + if (start.is_max()) { + *split_start = hobject_t::get_max(); + *split_finish = hobject_t::get_max(); + return; + } + + uint64_t start_hash = hobject_t::_reverse_bits(start.get_hash()); + uint64_t finish_hash = + finish.is_max() ? 0x100000000 : + hobject_t::_reverse_bits(finish.get_hash()); + + uint64_t diff = finish_hash - start_hash; + uint64_t rev_start = start_hash + (diff * n / m); + uint64_t rev_finish = start_hash + (diff * (n + 1) / m); + if (n == 0) { + *split_start = start; + } else { + *split_start = hobject_t( + object_t(), string(), CEPH_NOSNAP, + hobject_t::_reverse_bits(rev_start), poolid, string()); + } + + if (n == m - 1) + *split_finish = finish; + else if (rev_finish >= 0x100000000) + *split_finish = hobject_t::get_max(); + else + *split_finish = hobject_t( + object_t(), string(), CEPH_NOSNAP, + hobject_t::_reverse_bits(rev_finish), poolid, string()); +} + +int librados::IoCtxImpl::application_enable(const std::string& app_name, + bool force) +{ + auto c = new PoolAsyncCompletionImpl(); + application_enable_async(app_name, force, c); + + int r = c->wait(); + ceph_assert(r == 0); + + r = c->get_return_value(); + c->release(); + c->put(); + if (r < 0) { + return r; + } + + return client->wait_for_latest_osdmap(); +} + +void librados::IoCtxImpl::application_enable_async(const std::string& app_name, + bool force, + PoolAsyncCompletionImpl *c) +{ + // pre-Luminous clusters will return -EINVAL and application won't be + // preserved until Luminous is configured as minimim version. + if (!client->get_required_monitor_features().contains_all( + ceph::features::mon::FEATURE_LUMINOUS)) { + boost::asio::defer(client->finish_strand, + [cb = CB_PoolAsync_Safe(c)]() mutable { + cb(-EOPNOTSUPP); + }); + return; + } + + std::stringstream cmd; + cmd << "{" + << "\"prefix\": \"osd pool application enable\"," + << "\"pool\": \"" << get_cached_pool_name() << "\"," + << "\"app\": \"" << app_name << "\""; + if (force) { + cmd << ",\"yes_i_really_mean_it\": true"; + } + cmd << "}"; + + std::vector<std::string> cmds; + cmds.push_back(cmd.str()); + bufferlist inbl; + client->mon_command_async(cmds, inbl, nullptr, nullptr, + make_lambda_context(CB_PoolAsync_Safe(c))); +} + +int librados::IoCtxImpl::application_list(std::set<std::string> *app_names) +{ + int r = 0; + app_names->clear(); + objecter->with_osdmap([&](const OSDMap& o) { + auto pg_pool = o.get_pg_pool(poolid); + if (pg_pool == nullptr) { + r = -ENOENT; + return; + } + + for (auto &pair : pg_pool->application_metadata) { + app_names->insert(pair.first); + } + }); + return r; +} + +int librados::IoCtxImpl::application_metadata_get(const std::string& app_name, + const std::string &key, + std::string* value) +{ + int r = 0; + objecter->with_osdmap([&](const OSDMap& o) { + auto pg_pool = o.get_pg_pool(poolid); + if (pg_pool == nullptr) { + r = -ENOENT; + return; + } + + auto app_it = pg_pool->application_metadata.find(app_name); + if (app_it == pg_pool->application_metadata.end()) { + r = -ENOENT; + return; + } + + auto it = app_it->second.find(key); + if (it == app_it->second.end()) { + r = -ENOENT; + return; + } + + *value = it->second; + }); + return r; +} + +int librados::IoCtxImpl::application_metadata_set(const std::string& app_name, + const std::string &key, + const std::string& value) +{ + std::stringstream cmd; + cmd << "{" + << "\"prefix\":\"osd pool application set\"," + << "\"pool\":\"" << get_cached_pool_name() << "\"," + << "\"app\":\"" << app_name << "\"," + << "\"key\":\"" << key << "\"," + << "\"value\":\"" << value << "\"" + << "}"; + + std::vector<std::string> cmds; + cmds.push_back(cmd.str()); + bufferlist inbl; + int r = client->mon_command(cmds, inbl, nullptr, nullptr); + if (r < 0) { + return r; + } + + // ensure we have the latest osd map epoch before proceeding + return client->wait_for_latest_osdmap(); +} + +int librados::IoCtxImpl::application_metadata_remove(const std::string& app_name, + const std::string &key) +{ + std::stringstream cmd; + cmd << "{" + << "\"prefix\":\"osd pool application rm\"," + << "\"pool\":\"" << get_cached_pool_name() << "\"," + << "\"app\":\"" << app_name << "\"," + << "\"key\":\"" << key << "\"" + << "}"; + + std::vector<std::string> cmds; + cmds.push_back(cmd.str()); + bufferlist inbl; + int r = client->mon_command(cmds, inbl, nullptr, nullptr); + if (r < 0) { + return r; + } + + // ensure we have the latest osd map epoch before proceeding + return client->wait_for_latest_osdmap(); +} + +int librados::IoCtxImpl::application_metadata_list(const std::string& app_name, + std::map<std::string, std::string> *values) +{ + int r = 0; + values->clear(); + objecter->with_osdmap([&](const OSDMap& o) { + auto pg_pool = o.get_pg_pool(poolid); + if (pg_pool == nullptr) { + r = -ENOENT; + return; + } + + auto it = pg_pool->application_metadata.find(app_name); + if (it == pg_pool->application_metadata.end()) { + r = -ENOENT; + return; + } + + *values = it->second; + }); + return r; +} + diff --git a/src/librados/IoCtxImpl.h b/src/librados/IoCtxImpl.h new file mode 100644 index 000000000..afc5de6ff --- /dev/null +++ b/src/librados/IoCtxImpl.h @@ -0,0 +1,299 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_LIBRADOS_IOCTXIMPL_H +#define CEPH_LIBRADOS_IOCTXIMPL_H + +#include <atomic> + +#include "common/Cond.h" +#include "common/ceph_mutex.h" +#include "common/snap_types.h" +#include "common/zipkin_trace.h" +#include "include/types.h" +#include "include/rados/librados.h" +#include "include/rados/librados.hpp" +#include "include/xlist.h" +#include "osd/osd_types.h" +#include "osdc/Objecter.h" + +class RadosClient; + +struct librados::IoCtxImpl { + std::atomic<uint64_t> ref_cnt = { 0 }; + RadosClient *client = nullptr; + int64_t poolid = 0; + snapid_t snap_seq; + ::SnapContext snapc; + uint64_t assert_ver = 0; + version_t last_objver = 0; + uint32_t notify_timeout = 30; + object_locator_t oloc; + int extra_op_flags = 0; + + ceph::mutex aio_write_list_lock = + ceph::make_mutex("librados::IoCtxImpl::aio_write_list_lock"); + ceph_tid_t aio_write_seq = 0; + ceph::condition_variable aio_write_cond; + xlist<AioCompletionImpl*> aio_write_list; + map<ceph_tid_t, std::list<AioCompletionImpl*> > aio_write_waiters; + + Objecter *objecter = nullptr; + + IoCtxImpl(); + IoCtxImpl(RadosClient *c, Objecter *objecter, + int64_t poolid, snapid_t s); + + void dup(const IoCtxImpl& rhs) { + // Copy everything except the ref count + client = rhs.client; + poolid = rhs.poolid; + snap_seq = rhs.snap_seq; + snapc = rhs.snapc; + assert_ver = rhs.assert_ver; + last_objver = rhs.last_objver; + notify_timeout = rhs.notify_timeout; + oloc = rhs.oloc; + extra_op_flags = rhs.extra_op_flags; + objecter = rhs.objecter; + } + + void set_snap_read(snapid_t s); + int set_snap_write_context(snapid_t seq, vector<snapid_t>& snaps); + + void get() { + ref_cnt++; + } + + void put() { + if (--ref_cnt == 0) + delete this; + } + + void queue_aio_write(struct AioCompletionImpl *c); + void complete_aio_write(struct AioCompletionImpl *c); + void flush_aio_writes_async(AioCompletionImpl *c); + void flush_aio_writes(); + + int64_t get_id() { + return poolid; + } + + string get_cached_pool_name(); + + int get_object_hash_position(const std::string& oid, uint32_t *hash_position); + int get_object_pg_hash_position(const std::string& oid, uint32_t *pg_hash_position); + + ::ObjectOperation *prepare_assert_ops(::ObjectOperation *op); + + // snaps + int snap_list(vector<uint64_t> *snaps); + int snap_lookup(const char *name, uint64_t *snapid); + int snap_get_name(uint64_t snapid, std::string *s); + int snap_get_stamp(uint64_t snapid, time_t *t); + int snap_create(const char* snapname); + int selfmanaged_snap_create(uint64_t *snapid); + void aio_selfmanaged_snap_create(uint64_t *snapid, AioCompletionImpl *c); + int snap_remove(const char* snapname); + int rollback(const object_t& oid, const char *snapName); + int selfmanaged_snap_remove(uint64_t snapid); + void aio_selfmanaged_snap_remove(uint64_t snapid, AioCompletionImpl *c); + int selfmanaged_snap_rollback_object(const object_t& oid, + ::SnapContext& snapc, uint64_t snapid); + + // io + int nlist(Objecter::NListContext *context, int max_entries); + uint32_t nlist_seek(Objecter::NListContext *context, uint32_t pos); + uint32_t nlist_seek(Objecter::NListContext *context, const rados_object_list_cursor& cursor); + rados_object_list_cursor nlist_get_cursor(Objecter::NListContext *context); + void object_list_slice( + const hobject_t start, + const hobject_t finish, + const size_t n, + const size_t m, + hobject_t *split_start, + hobject_t *split_finish); + + int create(const object_t& oid, bool exclusive); + int write(const object_t& oid, bufferlist& bl, size_t len, uint64_t off); + int append(const object_t& oid, bufferlist& bl, size_t len); + int write_full(const object_t& oid, bufferlist& bl); + int writesame(const object_t& oid, bufferlist& bl, + size_t write_len, uint64_t offset); + int read(const object_t& oid, bufferlist& bl, size_t len, uint64_t off); + int mapext(const object_t& oid, uint64_t off, size_t len, + std::map<uint64_t,uint64_t>& m); + int sparse_read(const object_t& oid, std::map<uint64_t,uint64_t>& m, + bufferlist& bl, size_t len, uint64_t off); + int checksum(const object_t& oid, uint8_t type, const bufferlist &init_value, + size_t len, uint64_t off, size_t chunk_size, bufferlist *pbl); + int remove(const object_t& oid); + int remove(const object_t& oid, int flags); + int stat(const object_t& oid, uint64_t *psize, time_t *pmtime); + int stat2(const object_t& oid, uint64_t *psize, struct timespec *pts); + int trunc(const object_t& oid, uint64_t size); + int cmpext(const object_t& oid, uint64_t off, bufferlist& cmp_bl); + + int tmap_update(const object_t& oid, bufferlist& cmdbl); + + int exec(const object_t& oid, const char *cls, const char *method, bufferlist& inbl, bufferlist& outbl); + + int getxattr(const object_t& oid, const char *name, bufferlist& bl); + int setxattr(const object_t& oid, const char *name, bufferlist& bl); + int getxattrs(const object_t& oid, map<string, bufferlist>& attrset); + int rmxattr(const object_t& oid, const char *name); + + int operate(const object_t& oid, ::ObjectOperation *o, ceph::real_time *pmtime, int flags=0); + int operate_read(const object_t& oid, ::ObjectOperation *o, bufferlist *pbl, int flags=0); + int aio_operate(const object_t& oid, ::ObjectOperation *o, + AioCompletionImpl *c, const SnapContext& snap_context, + int flags, const blkin_trace_info *trace_info = nullptr); + int aio_operate_read(const object_t& oid, ::ObjectOperation *o, + AioCompletionImpl *c, int flags, bufferlist *pbl, const blkin_trace_info *trace_info = nullptr); + + struct C_aio_stat_Ack : public Context { + librados::AioCompletionImpl *c; + time_t *pmtime; + ceph::real_time mtime; + C_aio_stat_Ack(AioCompletionImpl *_c, time_t *pm); + void finish(int r) override; + }; + + struct C_aio_stat2_Ack : public Context { + librados::AioCompletionImpl *c; + struct timespec *pts; + ceph::real_time mtime; + C_aio_stat2_Ack(AioCompletionImpl *_c, struct timespec *pts); + void finish(int r) override; + }; + + struct C_aio_Complete : public Context { +#if defined(WITH_EVENTTRACE) + object_t oid; +#endif + AioCompletionImpl *c; + explicit C_aio_Complete(AioCompletionImpl *_c); + void finish(int r) override; + }; + + int aio_read(const object_t oid, AioCompletionImpl *c, + bufferlist *pbl, size_t len, uint64_t off, uint64_t snapid, + const blkin_trace_info *info = nullptr); + int aio_read(object_t oid, AioCompletionImpl *c, + char *buf, size_t len, uint64_t off, uint64_t snapid, + const blkin_trace_info *info = nullptr); + int aio_sparse_read(const object_t oid, AioCompletionImpl *c, + std::map<uint64_t,uint64_t> *m, bufferlist *data_bl, + size_t len, uint64_t off, uint64_t snapid); + int aio_cmpext(const object_t& oid, AioCompletionImpl *c, uint64_t off, + bufferlist& cmp_bl); + int aio_cmpext(const object_t& oid, AioCompletionImpl *c, + const char *cmp_buf, size_t cmp_len, uint64_t off); + int aio_write(const object_t &oid, AioCompletionImpl *c, + const bufferlist& bl, size_t len, uint64_t off, + const blkin_trace_info *info = nullptr); + int aio_append(const object_t &oid, AioCompletionImpl *c, + const bufferlist& bl, size_t len); + int aio_write_full(const object_t &oid, AioCompletionImpl *c, + const bufferlist& bl); + int aio_writesame(const object_t &oid, AioCompletionImpl *c, + const bufferlist& bl, size_t write_len, uint64_t off); + int aio_remove(const object_t &oid, AioCompletionImpl *c, int flags=0); + int aio_exec(const object_t& oid, AioCompletionImpl *c, const char *cls, + const char *method, bufferlist& inbl, bufferlist *outbl); + int aio_exec(const object_t& oid, AioCompletionImpl *c, const char *cls, + const char *method, bufferlist& inbl, char *buf, size_t out_len); + int aio_stat(const object_t& oid, AioCompletionImpl *c, uint64_t *psize, time_t *pmtime); + int aio_stat2(const object_t& oid, AioCompletionImpl *c, uint64_t *psize, struct timespec *pts); + int aio_getxattr(const object_t& oid, AioCompletionImpl *c, + const char *name, bufferlist& bl); + int aio_setxattr(const object_t& oid, AioCompletionImpl *c, + const char *name, bufferlist& bl); + int aio_getxattrs(const object_t& oid, AioCompletionImpl *c, + map<string, bufferlist>& attrset); + int aio_rmxattr(const object_t& oid, AioCompletionImpl *c, + const char *name); + int aio_cancel(AioCompletionImpl *c); + + int hit_set_list(uint32_t hash, AioCompletionImpl *c, + std::list< std::pair<time_t, time_t> > *pls); + int hit_set_get(uint32_t hash, AioCompletionImpl *c, time_t stamp, + bufferlist *pbl); + + int get_inconsistent_objects(const pg_t& pg, + const librados::object_id_t& start_after, + uint64_t max_to_get, + AioCompletionImpl *c, + std::vector<inconsistent_obj_t>* objects, + uint32_t* interval); + + int get_inconsistent_snapsets(const pg_t& pg, + const librados::object_id_t& start_after, + uint64_t max_to_get, + AioCompletionImpl *c, + std::vector<inconsistent_snapset_t>* snapsets, + uint32_t* interval); + + void set_sync_op_version(version_t ver); + int watch(const object_t& oid, uint64_t *cookie, librados::WatchCtx *ctx, + librados::WatchCtx2 *ctx2, bool internal = false); + int watch(const object_t& oid, uint64_t *cookie, librados::WatchCtx *ctx, + librados::WatchCtx2 *ctx2, uint32_t timeout, bool internal = false); + int aio_watch(const object_t& oid, AioCompletionImpl *c, uint64_t *cookie, + librados::WatchCtx *ctx, librados::WatchCtx2 *ctx2, + bool internal = false); + int aio_watch(const object_t& oid, AioCompletionImpl *c, uint64_t *cookie, + librados::WatchCtx *ctx, librados::WatchCtx2 *ctx2, + uint32_t timeout, bool internal = false); + int watch_check(uint64_t cookie); + int unwatch(uint64_t cookie); + int aio_unwatch(uint64_t cookie, AioCompletionImpl *c); + int notify(const object_t& oid, bufferlist& bl, uint64_t timeout_ms, + bufferlist *preplybl, char **preply_buf, size_t *preply_buf_len); + int notify_ack(const object_t& oid, uint64_t notify_id, uint64_t cookie, + bufferlist& bl); + int aio_notify(const object_t& oid, AioCompletionImpl *c, bufferlist& bl, + uint64_t timeout_ms, bufferlist *preplybl, char **preply_buf, + size_t *preply_buf_len); + + int set_alloc_hint(const object_t& oid, + uint64_t expected_object_size, + uint64_t expected_write_size, + uint32_t flags); + + version_t last_version(); + void set_assert_version(uint64_t ver); + void set_notify_timeout(uint32_t timeout); + + int cache_pin(const object_t& oid); + int cache_unpin(const object_t& oid); + + int application_enable(const std::string& app_name, bool force); + void application_enable_async(const std::string& app_name, bool force, + PoolAsyncCompletionImpl *c); + int application_list(std::set<std::string> *app_names); + int application_metadata_get(const std::string& app_name, + const std::string &key, + std::string* value); + int application_metadata_set(const std::string& app_name, + const std::string &key, + const std::string& value); + int application_metadata_remove(const std::string& app_name, + const std::string &key); + int application_metadata_list(const std::string& app_name, + std::map<std::string, std::string> *values); + +}; + +#endif diff --git a/src/librados/ListObjectImpl.h b/src/librados/ListObjectImpl.h new file mode 100644 index 000000000..7396c1210 --- /dev/null +++ b/src/librados/ListObjectImpl.h @@ -0,0 +1,81 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 David Zafman <dzafman@redhat.com> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_LIBRADOS_LISTOBJECTIMPL_H +#define CEPH_LIBRADOS_LISTOBJECTIMPL_H + +#include <string> +#include <include/rados/librados.hpp> + +#include "include/cmp.h" + +namespace librados { +struct ListObjectImpl { + std::string nspace; + std::string oid; + std::string locator; + + ListObjectImpl() {} + ListObjectImpl(std::string n, std::string o, std::string l): + nspace(n), oid(o), locator(l) {} + + const std::string& get_nspace() const { return nspace; } + const std::string& get_oid() const { return oid; } + const std::string& get_locator() const { return locator; } +}; +WRITE_EQ_OPERATORS_3(ListObjectImpl, nspace, oid, locator) +WRITE_CMP_OPERATORS_3(ListObjectImpl, nspace, oid, locator) +inline std::ostream& operator<<(std::ostream& out, const struct ListObjectImpl& lop) { + out << (lop.nspace.size() ? lop.nspace + "/" : "") << lop.oid + << (lop.locator.size() ? "@" + lop.locator : ""); + return out; +} + +class NObjectIteratorImpl { + public: + NObjectIteratorImpl() {} + ~NObjectIteratorImpl(); + NObjectIteratorImpl(const NObjectIteratorImpl &rhs); + NObjectIteratorImpl& operator=(const NObjectIteratorImpl& rhs); + + bool operator==(const NObjectIteratorImpl& rhs) const; + bool operator!=(const NObjectIteratorImpl& rhs) const; + const ListObject& operator*() const; + const ListObject* operator->() const; + NObjectIteratorImpl &operator++(); // Preincrement + NObjectIteratorImpl operator++(int); // Postincrement + const ListObject *get_listobjectp() { return &cur_obj; } + + /// get current hash position of the iterator, rounded to the current pg + uint32_t get_pg_hash_position() const; + + /// move the iterator to a given hash position. this may (will!) be rounded to the nearest pg. + uint32_t seek(uint32_t pos); + + /// move the iterator to a given cursor position + uint32_t seek(const librados::ObjectCursor& cursor); + + /// get current cursor position + librados::ObjectCursor get_cursor(); + + void set_filter(const bufferlist &bl); + + NObjectIteratorImpl(ObjListCtx *ctx_); + void get_next(); + std::shared_ptr < ObjListCtx > ctx; + ListObject cur_obj; +}; + +} +#endif diff --git a/src/librados/PoolAsyncCompletionImpl.h b/src/librados/PoolAsyncCompletionImpl.h new file mode 100644 index 000000000..73420fe35 --- /dev/null +++ b/src/librados/PoolAsyncCompletionImpl.h @@ -0,0 +1,110 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_LIBRADOS_POOLASYNCCOMPLETIONIMPL_H +#define CEPH_LIBRADOS_POOLASYNCCOMPLETIONIMPL_H + +#include "common/ceph_mutex.h" + +#include <boost/intrusive_ptr.hpp> + +#include "include/rados/librados.h" +#include "include/rados/librados.hpp" + +namespace librados { + struct PoolAsyncCompletionImpl { + ceph::mutex lock = ceph::make_mutex("PoolAsyncCompletionImpl lock"); + ceph::condition_variable cond; + int ref = 1; + int rval = 0; + bool released = false; + bool done = false; + + rados_callback_t callback = nullptr; + void *callback_arg = nullptr; + + PoolAsyncCompletionImpl() = default; + + int set_callback(void *cb_arg, rados_callback_t cb) { + std::scoped_lock l(lock); + callback = cb; + callback_arg = cb_arg; + return 0; + } + int wait() { + std::unique_lock l(lock); + while (!done) + cond.wait(l); + return 0; + } + int is_complete() { + std::scoped_lock l(lock); + return done; + } + int get_return_value() { + std::scoped_lock l(lock); + return rval; + } + void get() { + std::scoped_lock l(lock); + ceph_assert(ref > 0); + ref++; + } + void release() { + std::scoped_lock l(lock); + ceph_assert(!released); + released = true; + } + void put() { + std::unique_lock l(lock); + int n = --ref; + l.unlock(); + if (!n) + delete this; + } + }; + + inline void intrusive_ptr_add_ref(PoolAsyncCompletionImpl* p) { + p->get(); + } + inline void intrusive_ptr_release(PoolAsyncCompletionImpl* p) { + p->put(); + } + + class CB_PoolAsync_Safe { + boost::intrusive_ptr<PoolAsyncCompletionImpl> p; + + public: + explicit CB_PoolAsync_Safe(boost::intrusive_ptr<PoolAsyncCompletionImpl> p) + : p(p) {} + ~CB_PoolAsync_Safe() = default; + + void operator()(int r) { + auto c(std::move(p)); + std::unique_lock l(c->lock); + c->rval = r; + c->done = true; + c->cond.notify_all(); + + if (c->callback) { + rados_callback_t cb = c->callback; + void *cb_arg = c->callback_arg; + l.unlock(); + cb(c.get(), cb_arg); + l.lock(); + } + } + }; +} +#endif diff --git a/src/librados/RadosClient.cc b/src/librados/RadosClient.cc new file mode 100644 index 000000000..04ea14f31 --- /dev/null +++ b/src/librados/RadosClient.cc @@ -0,0 +1,1180 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> + +#include <iostream> +#include <string> +#include <sstream> +#include <pthread.h> +#include <errno.h> + +#include "common/ceph_context.h" +#include "common/config.h" +#include "common/common_init.h" +#include "common/ceph_json.h" +#include "common/errno.h" +#include "common/ceph_json.h" +#include "common/async/blocked_completion.h" +#include "include/buffer.h" +#include "include/stringify.h" +#include "include/util.h" + +#include "msg/Messenger.h" + +// needed for static_cast +#include "messages/MLog.h" + +#include "AioCompletionImpl.h" +#include "IoCtxImpl.h" +#include "PoolAsyncCompletionImpl.h" +#include "RadosClient.h" + +#include "include/ceph_assert.h" +#include "common/EventTrace.h" + +#define dout_subsys ceph_subsys_rados +#undef dout_prefix +#define dout_prefix *_dout << "librados: " + +namespace bc = boost::container; +namespace bs = boost::system; +namespace ca = ceph::async; +namespace cb = ceph::buffer; + +librados::RadosClient::RadosClient(CephContext *cct_) + : Dispatcher(cct_->get()), + cct_deleter{cct, [](CephContext *p) {p->put();}} +{ + auto& conf = cct->_conf; + conf.add_observer(this); + rados_mon_op_timeout = conf.get_val<std::chrono::seconds>("rados_mon_op_timeout"); +} + +int64_t librados::RadosClient::lookup_pool(const char *name) +{ + int r = wait_for_osdmap(); + if (r < 0) { + return r; + } + + int64_t ret = objecter->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name), + name); + if (-ENOENT == ret) { + // Make sure we have the latest map + int r = wait_for_latest_osdmap(); + if (r < 0) + return r; + ret = objecter->with_osdmap(std::mem_fn(&OSDMap::lookup_pg_pool_name), + name); + } + + return ret; +} + +bool librados::RadosClient::pool_requires_alignment(int64_t pool_id) +{ + bool requires; + int r = pool_requires_alignment2(pool_id, &requires); + if (r < 0) { + // Cast answer to false, this is a little bit problematic + // since we really don't know the answer yet, say. + return false; + } + + return requires; +} + +// a safer version of pool_requires_alignment +int librados::RadosClient::pool_requires_alignment2(int64_t pool_id, + bool *requires) +{ + if (!requires) + return -EINVAL; + + int r = wait_for_osdmap(); + if (r < 0) { + return r; + } + + return objecter->with_osdmap([requires, pool_id](const OSDMap& o) { + if (!o.have_pg_pool(pool_id)) { + return -ENOENT; + } + *requires = o.get_pg_pool(pool_id)->requires_aligned_append(); + return 0; + }); +} + +uint64_t librados::RadosClient::pool_required_alignment(int64_t pool_id) +{ + uint64_t alignment; + int r = pool_required_alignment2(pool_id, &alignment); + if (r < 0) { + return 0; + } + + return alignment; +} + +// a safer version of pool_required_alignment +int librados::RadosClient::pool_required_alignment2(int64_t pool_id, + uint64_t *alignment) +{ + if (!alignment) + return -EINVAL; + + int r = wait_for_osdmap(); + if (r < 0) { + return r; + } + + return objecter->with_osdmap([alignment, pool_id](const OSDMap &o) { + if (!o.have_pg_pool(pool_id)) { + return -ENOENT; + } + *alignment = o.get_pg_pool(pool_id)->required_alignment(); + return 0; + }); +} + +int librados::RadosClient::pool_get_name(uint64_t pool_id, std::string *s, bool wait_latest_map) +{ + int r = wait_for_osdmap(); + if (r < 0) + return r; + retry: + objecter->with_osdmap([&](const OSDMap& o) { + if (!o.have_pg_pool(pool_id)) { + r = -ENOENT; + } else { + r = 0; + *s = o.get_pool_name(pool_id); + } + }); + if (r == -ENOENT && wait_latest_map) { + r = wait_for_latest_osdmap(); + if (r < 0) + return r; + wait_latest_map = false; + goto retry; + } + + return r; +} + +int librados::RadosClient::get_fsid(std::string *s) +{ + if (!s) + return -EINVAL; + std::lock_guard l(lock); + ostringstream oss; + oss << monclient.get_fsid(); + *s = oss.str(); + return 0; +} + +int librados::RadosClient::ping_monitor(const string mon_id, string *result) +{ + int err = 0; + /* If we haven't yet connected, we have no way of telling whether we + * already built monc's initial monmap. IF we are in CONNECTED state, + * then it is safe to assume that we went through connect(), which does + * build a monmap. + */ + if (state != CONNECTED) { + ldout(cct, 10) << __func__ << " build monmap" << dendl; + err = monclient.build_initial_monmap(); + } + if (err < 0) { + return err; + } + + err = monclient.ping_monitor(mon_id, result); + return err; +} + +int librados::RadosClient::connect() +{ + int err; + + // already connected? + if (state == CONNECTING) + return -EINPROGRESS; + if (state == CONNECTED) + return -EISCONN; + state = CONNECTING; + + if (!cct->_log->is_started()) { + cct->_log->start(); + } + + { + MonClient mc_bootstrap(cct, poolctx); + err = mc_bootstrap.get_monmap_and_config(); + if (err < 0) + return err; + } + + common_init_finish(cct); + + poolctx.start(cct->_conf.get_val<std::uint64_t>("librados_thread_count")); + + // get monmap + err = monclient.build_initial_monmap(); + if (err < 0) + goto out; + + err = -ENOMEM; + messenger = Messenger::create_client_messenger(cct, "radosclient"); + if (!messenger) + goto out; + + // require OSDREPLYMUX feature. this means we will fail to talk to + // old servers. this is necessary because otherwise we won't know + // how to decompose the reply data into its constituent pieces. + messenger->set_default_policy(Messenger::Policy::lossy_client(CEPH_FEATURE_OSDREPLYMUX)); + + ldout(cct, 1) << "starting msgr at " << messenger->get_myaddrs() << dendl; + + ldout(cct, 1) << "starting objecter" << dendl; + + objecter = new (std::nothrow) Objecter(cct, messenger, &monclient, poolctx); + if (!objecter) + goto out; + objecter->set_balanced_budget(); + + monclient.set_messenger(messenger); + mgrclient.set_messenger(messenger); + + objecter->init(); + messenger->add_dispatcher_head(&mgrclient); + messenger->add_dispatcher_tail(objecter); + messenger->add_dispatcher_tail(this); + + messenger->start(); + + ldout(cct, 1) << "setting wanted keys" << dendl; + monclient.set_want_keys( + CEPH_ENTITY_TYPE_MON | CEPH_ENTITY_TYPE_OSD | CEPH_ENTITY_TYPE_MGR); + ldout(cct, 1) << "calling monclient init" << dendl; + err = monclient.init(); + if (err) { + ldout(cct, 0) << conf->name << " initialization error " << cpp_strerror(-err) << dendl; + shutdown(); + goto out; + } + + err = monclient.authenticate(std::chrono::duration<double>(conf.get_val<std::chrono::seconds>("client_mount_timeout")).count()); + if (err) { + ldout(cct, 0) << conf->name << " authentication error " << cpp_strerror(-err) << dendl; + shutdown(); + goto out; + } + messenger->set_myname(entity_name_t::CLIENT(monclient.get_global_id())); + + // Detect older cluster, put mgrclient into compatible mode + mgrclient.set_mgr_optional( + !get_required_monitor_features().contains_all( + ceph::features::mon::FEATURE_LUMINOUS)); + + // MgrClient needs this (it doesn't have MonClient reference itself) + monclient.sub_want("mgrmap", 0, 0); + monclient.renew_subs(); + + if (service_daemon) { + ldout(cct, 10) << __func__ << " registering as " << service_name << "." + << daemon_name << dendl; + mgrclient.service_daemon_register(service_name, daemon_name, + daemon_metadata); + } + mgrclient.init(); + + objecter->set_client_incarnation(0); + objecter->start(); + lock.lock(); + + state = CONNECTED; + instance_id = monclient.get_global_id(); + + lock.unlock(); + + ldout(cct, 1) << "init done" << dendl; + err = 0; + + out: + if (err) { + state = DISCONNECTED; + + if (objecter) { + delete objecter; + objecter = NULL; + } + if (messenger) { + delete messenger; + messenger = NULL; + } + } + + return err; +} + +void librados::RadosClient::shutdown() +{ + std::unique_lock l{lock}; + if (state == DISCONNECTED) { + return; + } + + bool need_objecter = false; + if (objecter && objecter->initialized) { + need_objecter = true; + } + + if (state == CONNECTED) { + if (need_objecter) { + // make sure watch callbacks are flushed + watch_flush(); + } + } + state = DISCONNECTED; + instance_id = 0; + l.unlock(); + if (need_objecter) { + objecter->shutdown(); + } + mgrclient.shutdown(); + + monclient.shutdown(); + if (messenger) { + messenger->shutdown(); + messenger->wait(); + } + poolctx.stop(); + ldout(cct, 1) << "shutdown" << dendl; +} + +int librados::RadosClient::watch_flush() +{ + ldout(cct, 10) << __func__ << " enter" << dendl; + objecter->linger_callback_flush(ca::use_blocked); + + ldout(cct, 10) << __func__ << " exit" << dendl; + return 0; +} + +struct CB_aio_watch_flush_Complete { + librados::RadosClient *client; + librados::AioCompletionImpl *c; + + CB_aio_watch_flush_Complete(librados::RadosClient *_client, librados::AioCompletionImpl *_c) + : client(_client), c(_c) { + c->get(); + } + + CB_aio_watch_flush_Complete(const CB_aio_watch_flush_Complete&) = delete; + CB_aio_watch_flush_Complete operator =(const CB_aio_watch_flush_Complete&) = delete; + CB_aio_watch_flush_Complete(CB_aio_watch_flush_Complete&& rhs) { + client = rhs.client; + c = rhs.c; + } + CB_aio_watch_flush_Complete& operator =(CB_aio_watch_flush_Complete&& rhs) { + client = rhs.client; + c = rhs.c; + return *this; + } + + void operator()() { + c->lock.lock(); + c->rval = 0; + c->complete = true; + c->cond.notify_all(); + + if (c->callback_complete || + c->callback_safe) { + boost::asio::defer(client->finish_strand, librados::CB_AioComplete(c)); + } + c->put_unlock(); + } +}; + +int librados::RadosClient::async_watch_flush(AioCompletionImpl *c) +{ + ldout(cct, 10) << __func__ << " enter" << dendl; + objecter->linger_callback_flush(CB_aio_watch_flush_Complete(this, c)); + ldout(cct, 10) << __func__ << " exit" << dendl; + return 0; +} + +uint64_t librados::RadosClient::get_instance_id() +{ + return instance_id; +} + +int librados::RadosClient::get_min_compatible_osd(int8_t* require_osd_release) +{ + int r = wait_for_osdmap(); + if (r < 0) { + return r; + } + + objecter->with_osdmap( + [require_osd_release](const OSDMap& o) { + *require_osd_release = to_integer<int8_t>(o.require_osd_release); + }); + return 0; +} + +int librados::RadosClient::get_min_compatible_client(int8_t* min_compat_client, + int8_t* require_min_compat_client) +{ + int r = wait_for_osdmap(); + if (r < 0) { + return r; + } + + objecter->with_osdmap( + [min_compat_client, require_min_compat_client](const OSDMap& o) { + *min_compat_client = to_integer<int8_t>(o.get_min_compat_client()); + *require_min_compat_client = + to_integer<int8_t>(o.get_require_min_compat_client()); + }); + return 0; +} + +librados::RadosClient::~RadosClient() +{ + cct->_conf.remove_observer(this); + if (messenger) + delete messenger; + if (objecter) + delete objecter; + cct = NULL; +} + +int librados::RadosClient::create_ioctx(const char *name, IoCtxImpl **io) +{ + int64_t poolid = lookup_pool(name); + if (poolid < 0) { + return (int)poolid; + } + + *io = new librados::IoCtxImpl(this, objecter, poolid, CEPH_NOSNAP); + return 0; +} + +int librados::RadosClient::create_ioctx(int64_t pool_id, IoCtxImpl **io) +{ + std::string pool_name; + int r = pool_get_name(pool_id, &pool_name, true); + if (r < 0) + return r; + *io = new librados::IoCtxImpl(this, objecter, pool_id, CEPH_NOSNAP); + return 0; +} + +bool librados::RadosClient::ms_dispatch(Message *m) +{ + bool ret; + + std::lock_guard l(lock); + if (state == DISCONNECTED) { + ldout(cct, 10) << "disconnected, discarding " << *m << dendl; + m->put(); + ret = true; + } else { + ret = _dispatch(m); + } + return ret; +} + +void librados::RadosClient::ms_handle_connect(Connection *con) +{ +} + +bool librados::RadosClient::ms_handle_reset(Connection *con) +{ + return false; +} + +void librados::RadosClient::ms_handle_remote_reset(Connection *con) +{ +} + +bool librados::RadosClient::ms_handle_refused(Connection *con) +{ + return false; +} + +bool librados::RadosClient::_dispatch(Message *m) +{ + ceph_assert(ceph_mutex_is_locked(lock)); + switch (m->get_type()) { + // OSD + case CEPH_MSG_OSD_MAP: + cond.notify_all(); + m->put(); + break; + + case CEPH_MSG_MDS_MAP: + m->put(); + break; + + case MSG_LOG: + handle_log(static_cast<MLog *>(m)); + break; + + default: + return false; + } + + return true; +} + + +int librados::RadosClient::wait_for_osdmap() +{ + ceph_assert(ceph_mutex_is_not_locked_by_me(lock)); + + if (state != CONNECTED) { + return -ENOTCONN; + } + + bool need_map = false; + objecter->with_osdmap([&](const OSDMap& o) { + if (o.get_epoch() == 0) { + need_map = true; + } + }); + + if (need_map) { + std::unique_lock l(lock); + + ceph::timespan timeout = rados_mon_op_timeout; + if (objecter->with_osdmap(std::mem_fn(&OSDMap::get_epoch)) == 0) { + ldout(cct, 10) << __func__ << " waiting" << dendl; + while (objecter->with_osdmap(std::mem_fn(&OSDMap::get_epoch)) == 0) { + if (timeout == timeout.zero()) { + cond.wait(l); + } else { + if (cond.wait_for(l, timeout) == std::cv_status::timeout) { + lderr(cct) << "timed out waiting for first osdmap from monitors" + << dendl; + return -ETIMEDOUT; + } + } + } + ldout(cct, 10) << __func__ << " done waiting" << dendl; + } + return 0; + } else { + return 0; + } +} + + +int librados::RadosClient::wait_for_latest_osdmap() +{ + bs::error_code ec; + objecter->wait_for_latest_osdmap(ca::use_blocked[ec]); + return ceph::from_error_code(ec); +} + +int librados::RadosClient::pool_list(std::list<std::pair<int64_t, string> >& v) +{ + int r = wait_for_osdmap(); + if (r < 0) + return r; + + objecter->with_osdmap([&](const OSDMap& o) { + for (auto p : o.get_pools()) + v.push_back(std::make_pair(p.first, o.get_pool_name(p.first))); + }); + return 0; +} + +int librados::RadosClient::get_pool_stats(std::list<string>& pools, + map<string,::pool_stat_t> *result, + bool *pper_pool) +{ + bs::error_code ec; + + std::vector<std::string> v(pools.begin(), pools.end()); + + auto [res, per_pool] = objecter->get_pool_stats(v, ca::use_blocked[ec]); + if (ec) + return ceph::from_error_code(ec); + + if (per_pool) + *pper_pool = per_pool; + if (result) + result->insert(res.begin(), res.end()); + + return 0; +} + +bool librados::RadosClient::get_pool_is_selfmanaged_snaps_mode( + const std::string& pool) +{ + bool ret = false; + objecter->with_osdmap([&](const OSDMap& osdmap) { + int64_t poolid = osdmap.lookup_pg_pool_name(pool); + if (poolid >= 0) + ret = osdmap.get_pg_pool(poolid)->is_unmanaged_snaps_mode(); + }); + return ret; +} + +int librados::RadosClient::get_fs_stats(ceph_statfs& stats) +{ + ceph::mutex mylock = ceph::make_mutex("RadosClient::get_fs_stats::mylock"); + ceph::condition_variable cond; + bool done; + int ret = 0; + { + std::lock_guard l{mylock}; + objecter->get_fs_stats(stats, boost::optional<int64_t> (), + new C_SafeCond(mylock, cond, &done, &ret)); + } + { + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done;}); + } + return ret; +} + +void librados::RadosClient::get() { + std::lock_guard l(lock); + ceph_assert(refcnt > 0); + refcnt++; +} + +bool librados::RadosClient::put() { + std::lock_guard l(lock); + ceph_assert(refcnt > 0); + refcnt--; + return (refcnt == 0); +} + +int librados::RadosClient::pool_create(string& name, + int16_t crush_rule) +{ + if (!name.length()) + return -EINVAL; + + int r = wait_for_osdmap(); + if (r < 0) { + return r; + } + + ceph::mutex mylock = ceph::make_mutex("RadosClient::pool_create::mylock"); + int reply; + ceph::condition_variable cond; + bool done; + Context *onfinish = new C_SafeCond(mylock, cond, &done, &reply); + objecter->create_pool(name, onfinish, crush_rule); + + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done; }); + return reply; +} + +int librados::RadosClient::pool_create_async(string& name, + PoolAsyncCompletionImpl *c, + int16_t crush_rule) +{ + int r = wait_for_osdmap(); + if (r < 0) + return r; + + Context *onfinish = make_lambda_context(CB_PoolAsync_Safe(c)); + objecter->create_pool(name, onfinish, crush_rule); + return r; +} + +int librados::RadosClient::pool_get_base_tier(int64_t pool_id, int64_t* base_tier) +{ + int r = wait_for_osdmap(); + if (r < 0) { + return r; + } + + objecter->with_osdmap([&](const OSDMap& o) { + const pg_pool_t* pool = o.get_pg_pool(pool_id); + if (pool) { + if (pool->tier_of < 0) { + *base_tier = pool_id; + } else { + *base_tier = pool->tier_of; + } + r = 0; + } else { + r = -ENOENT; + } + }); + return r; +} + +int librados::RadosClient::pool_delete(const char *name) +{ + int r = wait_for_osdmap(); + if (r < 0) { + return r; + } + + ceph::mutex mylock = ceph::make_mutex("RadosClient::pool_delete::mylock"); + ceph::condition_variable cond; + bool done; + int ret; + Context *onfinish = new C_SafeCond(mylock, cond, &done, &ret); + objecter->delete_pool(name, onfinish); + + std::unique_lock l{mylock}; + cond.wait(l, [&done] { return done;}); + return ret; +} + +int librados::RadosClient::pool_delete_async(const char *name, PoolAsyncCompletionImpl *c) +{ + int r = wait_for_osdmap(); + if (r < 0) + return r; + + Context *onfinish = make_lambda_context(CB_PoolAsync_Safe(c)); + objecter->delete_pool(name, onfinish); + return r; +} + +void librados::RadosClient::blocklist_self(bool set) { + std::lock_guard l(lock); + objecter->blocklist_self(set); +} + +std::string librados::RadosClient::get_addrs() const { + CachedStackStringStream cos; + *cos << messenger->get_myaddrs(); + return std::string(cos->strv()); +} + +int librados::RadosClient::blocklist_add(const string& client_address, + uint32_t expire_seconds) +{ + entity_addr_t addr; + if (!addr.parse(client_address.c_str(), 0)) { + lderr(cct) << "unable to parse address " << client_address << dendl; + return -EINVAL; + } + + std::stringstream cmd; + cmd << "{" + << "\"prefix\": \"osd blocklist\", " + << "\"blocklistop\": \"add\", " + << "\"addr\": \"" << client_address << "\""; + if (expire_seconds != 0) { + cmd << ", \"expire\": " << expire_seconds << ".0"; + } + cmd << "}"; + + std::vector<std::string> cmds; + cmds.push_back(cmd.str()); + bufferlist inbl; + int r = mon_command(cmds, inbl, NULL, NULL); + if (r == -EINVAL) { + // try legacy blacklist command + std::stringstream cmd; + cmd << "{" + << "\"prefix\": \"osd blacklist\", " + << "\"blacklistop\": \"add\", " + << "\"addr\": \"" << client_address << "\""; + if (expire_seconds != 0) { + cmd << ", \"expire\": " << expire_seconds << ".0"; + } + cmd << "}"; + cmds.clear(); + cmds.push_back(cmd.str()); + r = mon_command(cmds, inbl, NULL, NULL); + } + if (r < 0) { + return r; + } + + // ensure we have the latest osd map epoch before proceeding + r = wait_for_latest_osdmap(); + return r; +} + +int librados::RadosClient::mon_command(const vector<string>& cmd, + const bufferlist &inbl, + bufferlist *outbl, string *outs) +{ + C_SaferCond ctx; + mon_command_async(cmd, inbl, outbl, outs, &ctx); + return ctx.wait(); +} + +void librados::RadosClient::mon_command_async(const vector<string>& cmd, + const bufferlist &inbl, + bufferlist *outbl, string *outs, + Context *on_finish) +{ + std::lock_guard l{lock}; + monclient.start_mon_command(cmd, inbl, + [outs, outbl, + on_finish = std::unique_ptr<Context>(on_finish)] + (bs::error_code e, + std::string&& s, + ceph::bufferlist&& b) mutable { + if (outs) + *outs = std::move(s); + if (outbl) + *outbl = std::move(b); + if (on_finish) + on_finish.release()->complete( + ceph::from_error_code(e)); + }); +} + +int librados::RadosClient::mgr_command(const vector<string>& cmd, + const bufferlist &inbl, + bufferlist *outbl, string *outs) +{ + std::lock_guard l(lock); + + C_SaferCond cond; + int r = mgrclient.start_command(cmd, inbl, outbl, outs, &cond); + if (r < 0) + return r; + + lock.unlock(); + if (rados_mon_op_timeout.count() > 0) { + r = cond.wait_for(rados_mon_op_timeout); + } else { + r = cond.wait(); + } + lock.lock(); + + return r; +} + +int librados::RadosClient::mgr_command( + const string& name, + const vector<string>& cmd, + const bufferlist &inbl, + bufferlist *outbl, string *outs) +{ + std::lock_guard l(lock); + + C_SaferCond cond; + int r = mgrclient.start_tell_command(name, cmd, inbl, outbl, outs, &cond); + if (r < 0) + return r; + + lock.unlock(); + if (rados_mon_op_timeout.count() > 0) { + r = cond.wait_for(rados_mon_op_timeout); + } else { + r = cond.wait(); + } + lock.lock(); + + return r; +} + + +int librados::RadosClient::mon_command(int rank, const vector<string>& cmd, + const bufferlist &inbl, + bufferlist *outbl, string *outs) +{ + bs::error_code ec; + auto&& [s, bl] = monclient.start_mon_command(rank, cmd, inbl, + ca::use_blocked[ec]); + if (outs) + *outs = std::move(s); + if (outbl) + *outbl = std::move(bl); + + return ceph::from_error_code(ec); +} + +int librados::RadosClient::mon_command(string name, const vector<string>& cmd, + const bufferlist &inbl, + bufferlist *outbl, string *outs) +{ + bs::error_code ec; + auto&& [s, bl] = monclient.start_mon_command(name, cmd, inbl, + ca::use_blocked[ec]); + if (outs) + *outs = std::move(s); + if (outbl) + *outbl = std::move(bl); + + return ceph::from_error_code(ec); +} + +int librados::RadosClient::osd_command(int osd, vector<string>& cmd, + const bufferlist& inbl, + bufferlist *poutbl, string *prs) +{ + ceph_tid_t tid; + + if (osd < 0) + return -EINVAL; + + + // XXX do anything with tid? + bs::error_code ec; + auto [s, bl] = objecter->osd_command(osd, std::move(cmd), cb::list(inbl), + &tid, ca::use_blocked[ec]); + if (poutbl) + *poutbl = std::move(bl); + if (prs) + *prs = std::move(s); + return ceph::from_error_code(ec); +} + +int librados::RadosClient::pg_command(pg_t pgid, vector<string>& cmd, + const bufferlist& inbl, + bufferlist *poutbl, string *prs) +{ + ceph_tid_t tid; + bs::error_code ec; + auto [s, bl] = objecter->pg_command(pgid, std::move(cmd), inbl, &tid, + ca::use_blocked[ec]); + if (poutbl) + *poutbl = std::move(bl); + if (prs) + *prs = std::move(s); + return ceph::from_error_code(ec); +} + +int librados::RadosClient::monitor_log(const string& level, + rados_log_callback_t cb, + rados_log_callback2_t cb2, + void *arg) +{ + std::lock_guard l(lock); + + if (state != CONNECTED) { + return -ENOTCONN; + } + + if (cb == NULL && cb2 == NULL) { + // stop watch + ldout(cct, 10) << __func__ << " removing cb " << (void*)log_cb + << " " << (void*)log_cb2 << dendl; + monclient.sub_unwant(log_watch); + log_watch.clear(); + log_cb = NULL; + log_cb2 = NULL; + log_cb_arg = NULL; + return 0; + } + + string watch_level; + if (level == "debug") { + watch_level = "log-debug"; + } else if (level == "info") { + watch_level = "log-info"; + } else if (level == "warn" || level == "warning") { + watch_level = "log-warn"; + } else if (level == "err" || level == "error") { + watch_level = "log-error"; + } else if (level == "sec") { + watch_level = "log-sec"; + } else { + ldout(cct, 10) << __func__ << " invalid level " << level << dendl; + return -EINVAL; + } + + if (log_cb || log_cb2) + monclient.sub_unwant(log_watch); + + // (re)start watch + ldout(cct, 10) << __func__ << " add cb " << (void*)cb << " " << (void*)cb2 + << " level " << level << dendl; + monclient.sub_want(watch_level, 0, 0); + + monclient.renew_subs(); + log_cb = cb; + log_cb2 = cb2; + log_cb_arg = arg; + log_watch = watch_level; + return 0; +} + +void librados::RadosClient::handle_log(MLog *m) +{ + ceph_assert(ceph_mutex_is_locked(lock)); + ldout(cct, 10) << __func__ << " version " << m->version << dendl; + + if (log_last_version < m->version) { + log_last_version = m->version; + + if (log_cb || log_cb2) { + for (std::deque<LogEntry>::iterator it = m->entries.begin(); it != m->entries.end(); ++it) { + LogEntry e = *it; + ostringstream ss; + ss << e.stamp << " " << e.name << " " << e.prio << " " << e.msg; + string line = ss.str(); + string who = stringify(e.rank) + " " + stringify(e.addrs); + string name = stringify(e.name); + string level = stringify(e.prio); + struct timespec stamp; + e.stamp.to_timespec(&stamp); + + ldout(cct, 20) << __func__ << " delivering " << ss.str() << dendl; + if (log_cb) + log_cb(log_cb_arg, line.c_str(), who.c_str(), + stamp.tv_sec, stamp.tv_nsec, + e.seq, level.c_str(), e.msg.c_str()); + if (log_cb2) + log_cb2(log_cb_arg, line.c_str(), + e.channel.c_str(), + who.c_str(), name.c_str(), + stamp.tv_sec, stamp.tv_nsec, + e.seq, level.c_str(), e.msg.c_str()); + } + } + + monclient.sub_got(log_watch, log_last_version); + } + + m->put(); +} + +int librados::RadosClient::service_daemon_register( + const std::string& service, ///< service name (e.g., 'rgw') + const std::string& name, ///< daemon name (e.g., 'gwfoo') + const std::map<std::string,std::string>& metadata) +{ + if (service_daemon) { + return -EEXIST; + } + if (service == "osd" || + service == "mds" || + service == "client" || + service == "mon" || + service == "mgr") { + // normal ceph entity types are not allowed! + return -EINVAL; + } + if (service.empty() || name.empty()) { + return -EINVAL; + } + + collect_sys_info(&daemon_metadata, cct); + + ldout(cct,10) << __func__ << " " << service << "." << name << dendl; + service_daemon = true; + service_name = service; + daemon_name = name; + daemon_metadata.insert(metadata.begin(), metadata.end()); + + if (state == DISCONNECTED) { + return 0; + } + if (state == CONNECTING) { + return -EBUSY; + } + mgrclient.service_daemon_register(service_name, daemon_name, + daemon_metadata); + return 0; +} + +int librados::RadosClient::service_daemon_update_status( + std::map<std::string,std::string>&& status) +{ + if (state != CONNECTED) { + return -ENOTCONN; + } + return mgrclient.service_daemon_update_status(std::move(status)); +} + +mon_feature_t librados::RadosClient::get_required_monitor_features() const +{ + return monclient.with_monmap([](const MonMap &monmap) { + return monmap.get_required_features(); } ); +} + +int librados::RadosClient::get_inconsistent_pgs(int64_t pool_id, + std::vector<std::string>* pgs) +{ + vector<string> cmd = { + "{\"prefix\": \"pg ls\"," + "\"pool\": " + std::to_string(pool_id) + "," + "\"states\": [\"inconsistent\"]," + "\"format\": \"json\"}" + }; + bufferlist inbl, outbl; + string outstring; + if (auto ret = mgr_command(cmd, inbl, &outbl, &outstring); ret) { + return ret; + } + if (!outbl.length()) { + // no pg returned + return 0; + } + JSONParser parser; + if (!parser.parse(outbl.c_str(), outbl.length())) { + return -EINVAL; + } + vector<string> v; + if (!parser.is_array()) { + JSONObj *pgstat_obj = parser.find_obj("pg_stats"); + if (!pgstat_obj) + return 0; + auto s = pgstat_obj->get_data(); + JSONParser pg_stats; + if (!pg_stats.parse(s.c_str(), s.length())) { + return -EINVAL; + } + v = pg_stats.get_array_elements(); + } else { + v = parser.get_array_elements(); + } + for (auto i : v) { + JSONParser pg_json; + if (!pg_json.parse(i.c_str(), i.length())) { + return -EINVAL; + } + string pgid; + JSONDecoder::decode_json("pgid", pgid, &pg_json); + pgs->emplace_back(std::move(pgid)); + } + return 0; +} + +const char** librados::RadosClient::get_tracked_conf_keys() const +{ + static const char *config_keys[] = { + "librados_thread_count", + "rados_mon_op_timeout", + nullptr + }; + return config_keys; +} + +void librados::RadosClient::handle_conf_change(const ConfigProxy& conf, + const std::set<std::string> &changed) +{ + if (changed.count("librados_thread_count")) { + poolctx.stop(); + poolctx.start(conf.get_val<std::uint64_t>("librados_thread_count")); + } + if (changed.count("rados_mon_op_timeout")) { + rados_mon_op_timeout = conf.get_val<std::chrono::seconds>("rados_mon_op_timeout"); + } +} diff --git a/src/librados/RadosClient.h b/src/librados/RadosClient.h new file mode 100644 index 000000000..0db094b18 --- /dev/null +++ b/src/librados/RadosClient.h @@ -0,0 +1,198 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ +#ifndef CEPH_LIBRADOS_RADOSCLIENT_H +#define CEPH_LIBRADOS_RADOSCLIENT_H + +#include <functional> +#include <memory> +#include <string> + +#include "msg/Dispatcher.h" + +#include "common/async/context_pool.h" +#include "common/config_fwd.h" +#include "common/Cond.h" +#include "common/ceph_mutex.h" +#include "common/ceph_time.h" +#include "common/config_obs.h" +#include "include/common_fwd.h" +#include "include/rados/librados.h" +#include "include/rados/librados.hpp" +#include "mon/MonClient.h" +#include "mgr/MgrClient.h" + +#include "IoCtxImpl.h" + +struct Context; +class Message; +class MLog; +class Messenger; +class AioCompletionImpl; + +namespace neorados { namespace detail { class RadosClient; }} + +class librados::RadosClient : public Dispatcher, + public md_config_obs_t +{ + friend neorados::detail::RadosClient; +public: + using Dispatcher::cct; +private: + std::unique_ptr<CephContext, + std::function<void(CephContext*)>> cct_deleter; + +public: + const ConfigProxy& conf{cct->_conf}; + ceph::async::io_context_pool poolctx; +private: + enum { + DISCONNECTED, + CONNECTING, + CONNECTED, + } state{DISCONNECTED}; + + MonClient monclient{cct, poolctx}; + MgrClient mgrclient{cct, nullptr, &monclient.monmap}; + Messenger *messenger{nullptr}; + + uint64_t instance_id{0}; + + bool _dispatch(Message *m); + bool ms_dispatch(Message *m) override; + + void ms_handle_connect(Connection *con) override; + bool ms_handle_reset(Connection *con) override; + void ms_handle_remote_reset(Connection *con) override; + bool ms_handle_refused(Connection *con) override; + + Objecter *objecter{nullptr}; + + ceph::mutex lock = ceph::make_mutex("librados::RadosClient::lock"); + ceph::condition_variable cond; + int refcnt{1}; + + version_t log_last_version{0}; + rados_log_callback_t log_cb{nullptr}; + rados_log_callback2_t log_cb2{nullptr}; + void *log_cb_arg{nullptr}; + string log_watch; + + bool service_daemon = false; + string daemon_name, service_name; + map<string,string> daemon_metadata; + ceph::timespan rados_mon_op_timeout{}; + + int wait_for_osdmap(); + +public: + boost::asio::io_context::strand finish_strand{poolctx.get_io_context()}; + + explicit RadosClient(CephContext *cct); + ~RadosClient() override; + int ping_monitor(std::string mon_id, std::string *result); + int connect(); + void shutdown(); + + int watch_flush(); + int async_watch_flush(AioCompletionImpl *c); + + uint64_t get_instance_id(); + + int get_min_compatible_osd(int8_t* require_osd_release); + int get_min_compatible_client(int8_t* min_compat_client, + int8_t* require_min_compat_client); + + int wait_for_latest_osdmap(); + + int create_ioctx(const char *name, IoCtxImpl **io); + int create_ioctx(int64_t, IoCtxImpl **io); + + int get_fsid(std::string *s); + int64_t lookup_pool(const char *name); + bool pool_requires_alignment(int64_t pool_id); + int pool_requires_alignment2(int64_t pool_id, bool *requires); + uint64_t pool_required_alignment(int64_t pool_id); + int pool_required_alignment2(int64_t pool_id, uint64_t *alignment); + int pool_get_name(uint64_t pool_id, std::string *name, + bool wait_latest_map = false); + + int pool_list(std::list<std::pair<int64_t, string> >& ls); + int get_pool_stats(std::list<string>& ls, map<string,::pool_stat_t> *result, + bool *per_pool); + int get_fs_stats(ceph_statfs& result); + bool get_pool_is_selfmanaged_snaps_mode(const std::string& pool); + + /* + -1 was set as the default value and monitor will pickup the right crush rule with below order: + a) osd pool default crush replicated ruleset + b) the first ruleset in crush ruleset + c) error out if no value find + */ + int pool_create(string& name, int16_t crush_rule=-1); + int pool_create_async(string& name, PoolAsyncCompletionImpl *c, + int16_t crush_rule=-1); + int pool_get_base_tier(int64_t pool_id, int64_t* base_tier); + int pool_delete(const char *name); + + int pool_delete_async(const char *name, PoolAsyncCompletionImpl *c); + + int blocklist_add(const string& client_address, uint32_t expire_seconds); + + int mon_command(const vector<string>& cmd, const bufferlist &inbl, + bufferlist *outbl, string *outs); + void mon_command_async(const vector<string>& cmd, const bufferlist &inbl, + bufferlist *outbl, string *outs, Context *on_finish); + int mon_command(int rank, + const vector<string>& cmd, const bufferlist &inbl, + bufferlist *outbl, string *outs); + int mon_command(string name, + const vector<string>& cmd, const bufferlist &inbl, + bufferlist *outbl, string *outs); + int mgr_command(const vector<string>& cmd, const bufferlist &inbl, + bufferlist *outbl, string *outs); + int mgr_command( + const string& name, + const vector<string>& cmd, const bufferlist &inbl, + bufferlist *outbl, string *outs); + int osd_command(int osd, vector<string>& cmd, const bufferlist& inbl, + bufferlist *poutbl, string *prs); + int pg_command(pg_t pgid, vector<string>& cmd, const bufferlist& inbl, + bufferlist *poutbl, string *prs); + + void handle_log(MLog *m); + int monitor_log(const string& level, rados_log_callback_t cb, + rados_log_callback2_t cb2, void *arg); + + void get(); + bool put(); + void blocklist_self(bool set); + + std::string get_addrs() const; + + int service_daemon_register( + const std::string& service, ///< service name (e.g., 'rgw') + const std::string& name, ///< daemon name (e.g., 'gwfoo') + const std::map<std::string,std::string>& metadata); ///< static metadata about daemon + int service_daemon_update_status( + std::map<std::string,std::string>&& status); + + mon_feature_t get_required_monitor_features() const; + + int get_inconsistent_pgs(int64_t pool_id, std::vector<std::string>* pgs); + const char** get_tracked_conf_keys() const override; + void handle_conf_change(const ConfigProxy& conf, + const std::set <std::string> &changed) override; +}; + +#endif diff --git a/src/librados/RadosXattrIter.cc b/src/librados/RadosXattrIter.cc new file mode 100644 index 000000000..f4fb39dd5 --- /dev/null +++ b/src/librados/RadosXattrIter.cc @@ -0,0 +1,29 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <stdlib.h> + +#include "RadosXattrIter.h" + +librados::RadosXattrsIter::RadosXattrsIter() + : val(NULL) +{ + i = attrset.end(); +} + +librados::RadosXattrsIter::~RadosXattrsIter() +{ + free(val); + val = NULL; +} diff --git a/src/librados/RadosXattrIter.h b/src/librados/RadosXattrIter.h new file mode 100644 index 000000000..20a926140 --- /dev/null +++ b/src/librados/RadosXattrIter.h @@ -0,0 +1,38 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_LIBRADOS_XATTRITER_H +#define CEPH_LIBRADOS_XATTRITER_H + +#include <string> +#include <map> + +#include "include/buffer.h" // for bufferlist + +namespace librados { + + /** + * iterator object used in implementation of the external + * attributes part of the C interface of librados + */ + struct RadosXattrsIter { + RadosXattrsIter(); + ~RadosXattrsIter(); + std::map<std::string, bufferlist> attrset; + std::map<std::string, bufferlist>::iterator i; + char *val; + }; +}; + +#endif diff --git a/src/librados/librados.map b/src/librados/librados.map new file mode 100644 index 000000000..279a0ba06 --- /dev/null +++ b/src/librados/librados.map @@ -0,0 +1,41 @@ +LIBRADOS_PRIVATE { + global: + extern "C++" { + "guard variable for boost::asio::detail::call_stack<boost::asio::detail::strand_executor_service::strand_impl, unsigned char>::top_"; + "guard variable for boost::asio::detail::call_stack<boost::asio::detail::strand_service::strand_impl, unsigned char>::top_"; + "guard variable for boost::asio::detail::call_stack<boost::asio::detail::thread_context, boost::asio::detail::thread_info_base>::top_"; + "boost::asio::detail::call_stack<boost::asio::detail::strand_executor_service::strand_impl, unsigned char>::top_"; + "boost::asio::detail::call_stack<boost::asio::detail::strand_service::strand_impl, unsigned char>::top_"; + "boost::asio::detail::call_stack<boost::asio::detail::thread_context, boost::asio::detail::thread_info_base>::top_"; + + }; + local: *; +}; + +LIBRADOS_14.2.0 { + global: + extern "C++" { + ceph::buffer::v15_2_0::*; + librados::v14_2_0::*; + + "typeinfo for librados::v14_2_0::ObjectOperation"; + "typeinfo name for librados::v14_2_0::ObjectOperation"; + "vtable for librados::v14_2_0::ObjectOperation"; + + "typeinfo for librados::v14_2_0::ObjectReadOperation"; + "typeinfo name for librados::v14_2_0::ObjectReadOperation"; + "vtable for librados::v14_2_0::ObjectReadOperation"; + + "typeinfo for librados::v14_2_0::ObjectWriteOperation"; + "typeinfo name for librados::v14_2_0::ObjectWriteOperation"; + "vtable for librados::v14_2_0::ObjectWriteOperation"; + + "typeinfo for librados::v14_2_0::WatchCtx"; + "typeinfo name for librados::v14_2_0::WatchCtx"; + "vtable for librados::v14_2_0::WatchCtx"; + + "typeinfo for librados::v14_2_0::WatchCtx2"; + "typeinfo name for librados::v14_2_0::WatchCtx2"; + "vtable for librados::v14_2_0::WatchCtx2"; + }; +} LIBRADOS_PRIVATE; diff --git a/src/librados/librados_asio.h b/src/librados/librados_asio.h new file mode 100644 index 000000000..c9b5ffba7 --- /dev/null +++ b/src/librados/librados_asio.h @@ -0,0 +1,213 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2017 Red Hat, Inc. + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + */ + +#ifndef LIBRADOS_ASIO_H +#define LIBRADOS_ASIO_H + +#include "include/rados/librados.hpp" +#include "common/async/completion.h" + +/// Defines asynchronous librados operations that satisfy all of the +/// "Requirements on asynchronous operations" imposed by the C++ Networking TS +/// in section 13.2.7. Many of the type and variable names below are taken +/// directly from those requirements. +/// +/// The current draft of the Networking TS (as of 2017-11-27) is available here: +/// http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2017/n4711.pdf +/// +/// The boost::asio documentation duplicates these requirements here: +/// http://www.boost.org/doc/libs/1_66_0/doc/html/boost_asio/reference/asynchronous_operations.html + +namespace librados { + +namespace detail { + +/// unique_ptr with custom deleter for AioCompletion +struct AioCompletionDeleter { + void operator()(AioCompletion *c) { c->release(); } +}; +using unique_aio_completion_ptr = + std::unique_ptr<AioCompletion, AioCompletionDeleter>; + +/// Invokes the given completion handler. When the type of Result is not void, +/// storage is provided for it and that result is passed as an additional +/// argument to the handler. +template <typename Result> +struct Invoker { + using Signature = void(boost::system::error_code, Result); + Result result; + template <typename Completion> + void dispatch(Completion&& completion, boost::system::error_code ec) { + ceph::async::dispatch(std::move(completion), ec, std::move(result)); + } +}; +// specialization for Result=void +template <> +struct Invoker<void> { + using Signature = void(boost::system::error_code); + template <typename Completion> + void dispatch(Completion&& completion, boost::system::error_code ec) { + ceph::async::dispatch(std::move(completion), ec); + } +}; + +template <typename Result> +struct AsyncOp : Invoker<Result> { + unique_aio_completion_ptr aio_completion; + + using Signature = typename Invoker<Result>::Signature; + using Completion = ceph::async::Completion<Signature, AsyncOp<Result>>; + + static void aio_dispatch(completion_t cb, void *arg) { + // reclaim ownership of the completion + auto p = std::unique_ptr<Completion>{static_cast<Completion*>(arg)}; + // move result out of Completion memory being freed + auto op = std::move(p->user_data); + const int ret = op.aio_completion->get_return_value(); + boost::system::error_code ec; + if (ret < 0) { + ec.assign(-ret, boost::system::system_category()); + } + op.dispatch(std::move(p), ec); + } + + template <typename Executor1, typename CompletionHandler> + static auto create(const Executor1& ex1, CompletionHandler&& handler) { + auto p = Completion::create(ex1, std::move(handler)); + p->user_data.aio_completion.reset( + Rados::aio_create_completion(p.get(), aio_dispatch)); + return p; + } +}; + +} // namespace detail + + +/// Calls IoCtx::aio_read() and arranges for the AioCompletion to call a +/// given handler with signature (boost::system::error_code, bufferlist). +template <typename ExecutionContext, typename CompletionToken> +auto async_read(ExecutionContext& ctx, IoCtx& io, const std::string& oid, + size_t len, uint64_t off, CompletionToken&& token) +{ + using Op = detail::AsyncOp<bufferlist>; + using Signature = typename Op::Signature; + boost::asio::async_completion<CompletionToken, Signature> init(token); + auto p = Op::create(ctx.get_executor(), init.completion_handler); + auto& op = p->user_data; + + int ret = io.aio_read(oid, op.aio_completion.get(), &op.result, len, off); + if (ret < 0) { + auto ec = boost::system::error_code{-ret, boost::system::system_category()}; + ceph::async::post(std::move(p), ec, bufferlist{}); + } else { + p.release(); // release ownership until completion + } + return init.result.get(); +} + +/// Calls IoCtx::aio_write() and arranges for the AioCompletion to call a +/// given handler with signature (boost::system::error_code). +template <typename ExecutionContext, typename CompletionToken> +auto async_write(ExecutionContext& ctx, IoCtx& io, const std::string& oid, + bufferlist &bl, size_t len, uint64_t off, + CompletionToken&& token) +{ + using Op = detail::AsyncOp<void>; + using Signature = typename Op::Signature; + boost::asio::async_completion<CompletionToken, Signature> init(token); + auto p = Op::create(ctx.get_executor(), init.completion_handler); + auto& op = p->user_data; + + int ret = io.aio_write(oid, op.aio_completion.get(), bl, len, off); + if (ret < 0) { + auto ec = boost::system::error_code{-ret, boost::system::system_category()}; + ceph::async::post(std::move(p), ec); + } else { + p.release(); // release ownership until completion + } + return init.result.get(); +} + +/// Calls IoCtx::aio_operate() and arranges for the AioCompletion to call a +/// given handler with signature (boost::system::error_code, bufferlist). +template <typename ExecutionContext, typename CompletionToken> +auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid, + ObjectReadOperation *read_op, int flags, + CompletionToken&& token) +{ + using Op = detail::AsyncOp<bufferlist>; + using Signature = typename Op::Signature; + boost::asio::async_completion<CompletionToken, Signature> init(token); + auto p = Op::create(ctx.get_executor(), init.completion_handler); + auto& op = p->user_data; + + int ret = io.aio_operate(oid, op.aio_completion.get(), read_op, + flags, &op.result); + if (ret < 0) { + auto ec = boost::system::error_code{-ret, boost::system::system_category()}; + ceph::async::post(std::move(p), ec, bufferlist{}); + } else { + p.release(); // release ownership until completion + } + return init.result.get(); +} + +/// Calls IoCtx::aio_operate() and arranges for the AioCompletion to call a +/// given handler with signature (boost::system::error_code). +template <typename ExecutionContext, typename CompletionToken> +auto async_operate(ExecutionContext& ctx, IoCtx& io, const std::string& oid, + ObjectWriteOperation *write_op, int flags, + CompletionToken &&token) +{ + using Op = detail::AsyncOp<void>; + using Signature = typename Op::Signature; + boost::asio::async_completion<CompletionToken, Signature> init(token); + auto p = Op::create(ctx.get_executor(), init.completion_handler); + auto& op = p->user_data; + + int ret = io.aio_operate(oid, op.aio_completion.get(), write_op, flags); + if (ret < 0) { + auto ec = boost::system::error_code{-ret, boost::system::system_category()}; + ceph::async::post(std::move(p), ec); + } else { + p.release(); // release ownership until completion + } + return init.result.get(); +} + +/// Calls IoCtx::aio_notify() and arranges for the AioCompletion to call a +/// given handler with signature (boost::system::error_code, bufferlist). +template <typename ExecutionContext, typename CompletionToken> +auto async_notify(ExecutionContext& ctx, IoCtx& io, const std::string& oid, + bufferlist& bl, uint64_t timeout_ms, CompletionToken &&token) +{ + using Op = detail::AsyncOp<bufferlist>; + using Signature = typename Op::Signature; + boost::asio::async_completion<CompletionToken, Signature> init(token); + auto p = Op::create(ctx.get_executor(), init.completion_handler); + auto& op = p->user_data; + + int ret = io.aio_notify(oid, op.aio_completion.get(), + bl, timeout_ms, &op.result); + if (ret < 0) { + auto ec = boost::system::error_code{-ret, boost::system::system_category()}; + ceph::async::post(std::move(p), ec, bufferlist{}); + } else { + p.release(); // release ownership until completion + } + return init.result.get(); +} + +} // namespace librados + +#endif // LIBRADOS_ASIO_H diff --git a/src/librados/librados_c.cc b/src/librados/librados_c.cc new file mode 100644 index 000000000..6448cd6a8 --- /dev/null +++ b/src/librados/librados_c.cc @@ -0,0 +1,4611 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <limits.h> + +#include "acconfig.h" +#include "common/config.h" +#include "common/errno.h" +#include "common/ceph_argparse.h" +#include "common/ceph_json.h" +#include "common/common_init.h" +#include "common/TracepointProvider.h" +#include "common/hobject.h" +#include "common/async/waiter.h" +#include "include/rados/librados.h" +#include "include/types.h" +#include <include/stringify.h> + +#include "librados/librados_c.h" +#include "librados/AioCompletionImpl.h" +#include "librados/IoCtxImpl.h" +#include "librados/PoolAsyncCompletionImpl.h" +#include "librados/RadosClient.h" +#include "librados/RadosXattrIter.h" +#include "librados/ListObjectImpl.h" +#include "librados/librados_util.h" +#include <cls/lock/cls_lock_client.h> + +#include <string> +#include <map> +#include <set> +#include <vector> +#include <list> +#include <stdexcept> + +#ifdef WITH_LTTNG +#define TRACEPOINT_DEFINE +#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#include "tracing/librados.h" +#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#undef TRACEPOINT_DEFINE +#else +#define tracepoint(...) +#endif + +#if defined(HAVE_ASM_SYMVER) || defined(HAVE_ATTR_SYMVER) +// prefer __attribute__() over global asm(".symver"). because the latter +// is not parsed by the compiler and is partitioned away by GCC if +// lto-partitions is enabled, in other words, these asm() statements +// are dropped by the -flto option by default. the way to address it is +// to use __attribute__. so this information can be processed by the +// C compiler, and be preserved after LTO partitions the code +#ifdef HAVE_ATTR_SYMVER +#define LIBRADOS_C_API_BASE(fn) \ + extern __typeof (_##fn##_base) _##fn##_base __attribute__((__symver__ (#fn "@"))) +#define LIBRADOS_C_API_BASE_DEFAULT(fn) \ + extern __typeof (_##fn) _##fn __attribute__((__symver__ (#fn "@@"))) +#define LIBRADOS_C_API_DEFAULT(fn, ver) \ + extern __typeof (_##fn) _##fn __attribute__((__symver__ (#fn "@@LIBRADOS_" #ver))) +#else +#define LIBRADOS_C_API_BASE(fn) \ + asm(".symver _" #fn "_base, " #fn "@") +#define LIBRADOS_C_API_BASE_DEFAULT(fn) \ + asm(".symver _" #fn ", " #fn "@@") +#define LIBRADOS_C_API_DEFAULT(fn, ver) \ + asm(".symver _" #fn ", " #fn "@@LIBRADOS_" #ver) +#endif + +#define LIBRADOS_C_API_BASE_F(fn) _ ## fn ## _base +#define LIBRADOS_C_API_DEFAULT_F(fn) _ ## fn + +#else +#define LIBRADOS_C_API_BASE(fn) +#define LIBRADOS_C_API_BASE_DEFAULT(fn) +#define LIBRADOS_C_API_DEFAULT(fn, ver) + +#define LIBRADOS_C_API_BASE_F(fn) _ ## fn ## _base +// There shouldn't be multiple default versions of the same +// function. +#define LIBRADOS_C_API_DEFAULT_F(fn) fn +#endif + +using std::string; +using std::map; +using std::set; +using std::vector; +using std::list; + +#define dout_subsys ceph_subsys_rados +#undef dout_prefix +#define dout_prefix *_dout << "librados: " + +#define RADOS_LIST_MAX_ENTRIES 1024 + +static TracepointProvider::Traits tracepoint_traits("librados_tp.so", "rados_tracing"); + +/* + * Structure of this file + * + * RadosClient and the related classes are the internal implementation of librados. + * Above that layer sits the C API, found in include/rados/librados.h, and + * the C++ API, found in include/rados/librados.hpp + * + * The C++ API sometimes implements things in terms of the C API. + * Both the C++ and C API rely on RadosClient. + * + * Visually: + * +--------------------------------------+ + * | C++ API | + * +--------------------+ | + * | C API | | + * +--------------------+-----------------+ + * | RadosClient | + * +--------------------------------------+ + */ + +///////////////////////////// C API ////////////////////////////// + +static CephContext *rados_create_cct( + const char * const clustername, + CephInitParameters *iparams) +{ + // missing things compared to global_init: + // g_ceph_context, g_conf, g_lockdep, signal handlers + CephContext *cct = common_preinit(*iparams, CODE_ENVIRONMENT_LIBRARY, 0); + if (clustername) + cct->_conf->cluster = clustername; + cct->_conf.parse_env(cct->get_module_type()); // environment variables override + cct->_conf.apply_changes(nullptr); + + TracepointProvider::initialize<tracepoint_traits>(cct); + return cct; +} + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_create)( + rados_t *pcluster, + const char * const id) +{ + CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT); + if (id) { + iparams.name.set(CEPH_ENTITY_TYPE_CLIENT, id); + } + CephContext *cct = rados_create_cct("", &iparams); + + tracepoint(librados, rados_create_enter, id); + *pcluster = reinterpret_cast<rados_t>(new librados::RadosClient(cct)); + tracepoint(librados, rados_create_exit, 0, *pcluster); + + cct->put(); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_create); + +// as above, but +// 1) don't assume 'client.'; name is a full type.id namestr +// 2) allow setting clustername +// 3) flags is for future expansion (maybe some of the global_init() +// behavior is appropriate for some consumers of librados, for instance) + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_create2)( + rados_t *pcluster, + const char *const clustername, + const char * const name, + uint64_t flags) +{ + // client is assumed, but from_str will override + int retval = 0; + CephInitParameters iparams(CEPH_ENTITY_TYPE_CLIENT); + if (!name || !iparams.name.from_str(name)) { + retval = -EINVAL; + } + + CephContext *cct = rados_create_cct(clustername, &iparams); + tracepoint(librados, rados_create2_enter, clustername, name, flags); + if (retval == 0) { + *pcluster = reinterpret_cast<rados_t>(new librados::RadosClient(cct)); + } + tracepoint(librados, rados_create2_exit, retval, *pcluster); + + cct->put(); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_create2); + +/* This function is intended for use by Ceph daemons. These daemons have + * already called global_init and want to use that particular configuration for + * their cluster. + */ +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_create_with_context)( + rados_t *pcluster, + rados_config_t cct_) +{ + CephContext *cct = (CephContext *)cct_; + TracepointProvider::initialize<tracepoint_traits>(cct); + + tracepoint(librados, rados_create_with_context_enter, cct_); + librados::RadosClient *radosp = new librados::RadosClient(cct); + *pcluster = (void *)radosp; + tracepoint(librados, rados_create_with_context_exit, 0, *pcluster); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_create_with_context); + +extern "C" rados_config_t LIBRADOS_C_API_DEFAULT_F(rados_cct)(rados_t cluster) +{ + tracepoint(librados, rados_cct_enter, cluster); + librados::RadosClient *client = (librados::RadosClient *)cluster; + rados_config_t retval = (rados_config_t)client->cct; + tracepoint(librados, rados_cct_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_cct); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_connect)(rados_t cluster) +{ + tracepoint(librados, rados_connect_enter, cluster); + librados::RadosClient *client = (librados::RadosClient *)cluster; + int retval = client->connect(); + tracepoint(librados, rados_connect_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_connect); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_shutdown)(rados_t cluster) +{ + tracepoint(librados, rados_shutdown_enter, cluster); + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + radosp->shutdown(); + delete radosp; + tracepoint(librados, rados_shutdown_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_shutdown); + +extern "C" uint64_t LIBRADOS_C_API_DEFAULT_F(rados_get_instance_id)( + rados_t cluster) +{ + tracepoint(librados, rados_get_instance_id_enter, cluster); + librados::RadosClient *client = (librados::RadosClient *)cluster; + uint64_t retval = client->get_instance_id(); + tracepoint(librados, rados_get_instance_id_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_get_instance_id); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_get_min_compatible_osd)( + rados_t cluster, + int8_t* require_osd_release) +{ + librados::RadosClient *client = (librados::RadosClient *)cluster; + return client->get_min_compatible_osd(require_osd_release); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_get_min_compatible_osd); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_get_min_compatible_client)( + rados_t cluster, + int8_t* min_compat_client, + int8_t* require_min_compat_client) +{ + librados::RadosClient *client = (librados::RadosClient *)cluster; + return client->get_min_compatible_client(min_compat_client, + require_min_compat_client); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_get_min_compatible_client); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_version)( + int *major, int *minor, int *extra) +{ + tracepoint(librados, rados_version_enter, major, minor, extra); + if (major) + *major = LIBRADOS_VER_MAJOR; + if (minor) + *minor = LIBRADOS_VER_MINOR; + if (extra) + *extra = LIBRADOS_VER_EXTRA; + tracepoint(librados, rados_version_exit, LIBRADOS_VER_MAJOR, LIBRADOS_VER_MINOR, LIBRADOS_VER_EXTRA); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_version); + + +// -- config -- +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_read_file)( + rados_t cluster, + const char *path_list) +{ + tracepoint(librados, rados_conf_read_file_enter, cluster, path_list); + librados::RadosClient *client = (librados::RadosClient *)cluster; + auto& conf = client->cct->_conf; + ostringstream warnings; + int ret = conf.parse_config_files(path_list, &warnings, 0); + if (ret) { + if (warnings.tellp() > 0) + lderr(client->cct) << warnings.str() << dendl; + client->cct->_conf.complain_about_parse_error(client->cct); + tracepoint(librados, rados_conf_read_file_exit, ret); + return ret; + } + conf.parse_env(client->cct->get_module_type()); // environment variables override + + conf.apply_changes(nullptr); + client->cct->_conf.complain_about_parse_error(client->cct); + tracepoint(librados, rados_conf_read_file_exit, 0); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_conf_read_file); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_parse_argv)( + rados_t cluster, + int argc, + const char **argv) +{ + tracepoint(librados, rados_conf_parse_argv_enter, cluster, argc); + int i; + for(i = 0; i < argc; i++) { + tracepoint(librados, rados_conf_parse_argv_arg, argv[i]); + } + librados::RadosClient *client = (librados::RadosClient *)cluster; + auto& conf = client->cct->_conf; + vector<const char*> args; + argv_to_vec(argc, argv, args); + int ret = conf.parse_argv(args); + if (ret) { + tracepoint(librados, rados_conf_parse_argv_exit, ret); + return ret; + } + conf.apply_changes(nullptr); + tracepoint(librados, rados_conf_parse_argv_exit, 0); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_conf_parse_argv); + +// like above, but return the remainder of argv to contain remaining +// unparsed args. Must be allocated to at least argc by caller. +// remargv will contain n <= argc pointers to original argv[], the end +// of which may be NULL + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_parse_argv_remainder)( + rados_t cluster, int argc, + const char **argv, + const char **remargv) +{ + tracepoint(librados, rados_conf_parse_argv_remainder_enter, cluster, argc); + unsigned int i; + for(i = 0; i < (unsigned int) argc; i++) { + tracepoint(librados, rados_conf_parse_argv_remainder_arg, argv[i]); + } + librados::RadosClient *client = (librados::RadosClient *)cluster; + auto& conf = client->cct->_conf; + vector<const char*> args; + for (int i=0; i<argc; i++) + args.push_back(argv[i]); + int ret = conf.parse_argv(args); + if (ret) { + tracepoint(librados, rados_conf_parse_argv_remainder_exit, ret); + return ret; + } + conf.apply_changes(NULL); + ceph_assert(args.size() <= (unsigned int)argc); + for (i = 0; i < (unsigned int)argc; ++i) { + if (i < args.size()) + remargv[i] = args[i]; + else + remargv[i] = (const char *)NULL; + tracepoint(librados, rados_conf_parse_argv_remainder_remarg, remargv[i]); + } + tracepoint(librados, rados_conf_parse_argv_remainder_exit, 0); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_conf_parse_argv_remainder); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_parse_env)( + rados_t cluster, const char *env) +{ + tracepoint(librados, rados_conf_parse_env_enter, cluster, env); + librados::RadosClient *client = (librados::RadosClient *)cluster; + auto& conf = client->cct->_conf; + conf.parse_env(client->cct->get_module_type(), env); + conf.apply_changes(nullptr); + tracepoint(librados, rados_conf_parse_env_exit, 0); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_conf_parse_env); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_set)( + rados_t cluster, + const char *option, + const char *value) +{ + tracepoint(librados, rados_conf_set_enter, cluster, option, value); + librados::RadosClient *client = (librados::RadosClient *)cluster; + auto& conf = client->cct->_conf; + int ret = conf.set_val(option, value); + if (ret) { + tracepoint(librados, rados_conf_set_exit, ret); + return ret; + } + conf.apply_changes(nullptr); + tracepoint(librados, rados_conf_set_exit, 0); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_conf_set); + +/* cluster info */ +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cluster_stat)( + rados_t cluster, + rados_cluster_stat_t *result) +{ + tracepoint(librados, rados_cluster_stat_enter, cluster); + librados::RadosClient *client = (librados::RadosClient *)cluster; + + ceph_statfs stats; + int r = client->get_fs_stats(stats); + result->kb = stats.kb; + result->kb_used = stats.kb_used; + result->kb_avail = stats.kb_avail; + result->num_objects = stats.num_objects; + tracepoint(librados, rados_cluster_stat_exit, r, result->kb, result->kb_used, result->kb_avail, result->num_objects); + return r; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_cluster_stat); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_conf_get)( + rados_t cluster, + const char *option, + char *buf, size_t len) +{ + tracepoint(librados, rados_conf_get_enter, cluster, option, len); + char *tmp = buf; + librados::RadosClient *client = (librados::RadosClient *)cluster; + const auto& conf = client->cct->_conf; + int retval = conf.get_val(option, &tmp, len); + tracepoint(librados, rados_conf_get_exit, retval, retval ? "" : option); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_conf_get); + +extern "C" int64_t LIBRADOS_C_API_DEFAULT_F(rados_pool_lookup)( + rados_t cluster, + const char *name) +{ + tracepoint(librados, rados_pool_lookup_enter, cluster, name); + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + int64_t retval = radosp->lookup_pool(name); + tracepoint(librados, rados_pool_lookup_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pool_lookup); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_reverse_lookup)( + rados_t cluster, + int64_t id, + char *buf, + size_t maxlen) +{ + tracepoint(librados, rados_pool_reverse_lookup_enter, cluster, id, maxlen); + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + std::string name; + int r = radosp->pool_get_name(id, &name, true); + if (r < 0) { + tracepoint(librados, rados_pool_reverse_lookup_exit, r, ""); + return r; + } + if (name.length() >= maxlen) { + tracepoint(librados, rados_pool_reverse_lookup_exit, -ERANGE, ""); + return -ERANGE; + } + strcpy(buf, name.c_str()); + int retval = name.length(); + tracepoint(librados, rados_pool_reverse_lookup_exit, retval, buf); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pool_reverse_lookup); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cluster_fsid)( + rados_t cluster, + char *buf, + size_t maxlen) +{ + tracepoint(librados, rados_cluster_fsid_enter, cluster, maxlen); + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + std::string fsid; + radosp->get_fsid(&fsid); + if (fsid.length() >= maxlen) { + tracepoint(librados, rados_cluster_fsid_exit, -ERANGE, ""); + return -ERANGE; + } + strcpy(buf, fsid.c_str()); + int retval = fsid.length(); + tracepoint(librados, rados_cluster_fsid_exit, retval, buf); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_cluster_fsid); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_wait_for_latest_osdmap)( + rados_t cluster) +{ + tracepoint(librados, rados_wait_for_latest_osdmap_enter, cluster); + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + int retval = radosp->wait_for_latest_osdmap(); + tracepoint(librados, rados_wait_for_latest_osdmap_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_wait_for_latest_osdmap); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_blocklist_add)( + rados_t cluster, + char *client_address, + uint32_t expire_seconds) +{ + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + return radosp->blocklist_add(client_address, expire_seconds); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_blocklist_add); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_blacklist_add)( + rados_t cluster, + char *client_address, + uint32_t expire_seconds) +{ + return LIBRADOS_C_API_DEFAULT_F(rados_blocklist_add)( + cluster, client_address, expire_seconds); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_blacklist_add); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_getaddrs)( + rados_t cluster, + char** addrs) +{ + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + auto s = radosp->get_addrs(); + *addrs = strdup(s.c_str()); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_getaddrs); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_set_osdmap_full_try)( + rados_ioctx_t io) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + ctx->extra_op_flags |= CEPH_OSD_FLAG_FULL_TRY; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_set_osdmap_full_try); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_unset_osdmap_full_try)( + rados_ioctx_t io) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + ctx->extra_op_flags &= ~CEPH_OSD_FLAG_FULL_TRY; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_unset_osdmap_full_try); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_set_pool_full_try)( + rados_ioctx_t io) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + ctx->extra_op_flags |= CEPH_OSD_FLAG_FULL_TRY; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_set_pool_full_try); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_unset_pool_full_try)( + rados_ioctx_t io) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + ctx->extra_op_flags &= ~CEPH_OSD_FLAG_FULL_TRY; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_unset_pool_full_try); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_enable)( + rados_ioctx_t io, + const char *app_name, + int force) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + return ctx->application_enable(app_name, force != 0); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_application_enable); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_list)( + rados_ioctx_t io, + char *values, + size_t *values_len) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + std::set<std::string> app_names; + int r = ctx->application_list(&app_names); + if (r < 0) { + return r; + } + + size_t total_len = 0; + for (auto app_name : app_names) { + total_len += app_name.size() + 1; + } + + if (*values_len < total_len) { + *values_len = total_len; + return -ERANGE; + } + + char *values_p = values; + for (auto app_name : app_names) { + size_t len = app_name.size() + 1; + strncpy(values_p, app_name.c_str(), len); + values_p += len; + } + *values_p = '\0'; + *values_len = total_len; + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_application_list); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_metadata_get)( + rados_ioctx_t io, + const char *app_name, + const char *key, + char *value, + size_t *value_len) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + std::string value_str; + int r = ctx->application_metadata_get(app_name, key, &value_str); + if (r < 0) { + return r; + } + + size_t len = value_str.size() + 1; + if (*value_len < len) { + *value_len = len; + return -ERANGE; + } + + strncpy(value, value_str.c_str(), len); + *value_len = len; + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_application_metadata_get); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_metadata_set)( + rados_ioctx_t io, + const char *app_name, + const char *key, + const char *value) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + return ctx->application_metadata_set(app_name, key, value); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_application_metadata_set); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_metadata_remove)( + rados_ioctx_t io, + const char *app_name, + const char *key) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + return ctx->application_metadata_remove(app_name, key); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_application_metadata_remove); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_application_metadata_list)( + rados_ioctx_t io, + const char *app_name, + char *keys, size_t *keys_len, + char *values, size_t *vals_len) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + std::map<std::string, std::string> metadata; + int r = ctx->application_metadata_list(app_name, &metadata); + if (r < 0) { + return r; + } + + size_t total_key_len = 0; + size_t total_val_len = 0; + for (auto pair : metadata) { + total_key_len += pair.first.size() + 1; + total_val_len += pair.second.size() + 1; + } + + if (*keys_len < total_key_len || *vals_len < total_val_len) { + *keys_len = total_key_len; + *vals_len = total_val_len; + return -ERANGE; + } + + char *keys_p = keys; + char *vals_p = values; + for (auto pair : metadata) { + size_t key_len = pair.first.size() + 1; + strncpy(keys_p, pair.first.c_str(), key_len); + keys_p += key_len; + + size_t val_len = pair.second.size() + 1; + strncpy(vals_p, pair.second.c_str(), val_len); + vals_p += val_len; + } + *keys_p = '\0'; + *keys_len = total_key_len; + + *vals_p = '\0'; + *vals_len = total_val_len; + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_application_metadata_list); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_list)( + rados_t cluster, + char *buf, + size_t len) +{ + tracepoint(librados, rados_pool_list_enter, cluster, len); + librados::RadosClient *client = (librados::RadosClient *)cluster; + std::list<std::pair<int64_t, std::string> > pools; + int r = client->pool_list(pools); + if (r < 0) { + tracepoint(librados, rados_pool_list_exit, r); + return r; + } + + if (len > 0 && !buf) { + tracepoint(librados, rados_pool_list_exit, -EINVAL); + return -EINVAL; + } + + char *b = buf; + if (b) { + // FIPS zeroization audit 20191116: this memset is not security related. + memset(b, 0, len); + } + int needed = 0; + std::list<std::pair<int64_t, std::string> >::const_iterator i = pools.begin(); + std::list<std::pair<int64_t, std::string> >::const_iterator p_end = + pools.end(); + for (; i != p_end; ++i) { + int rl = i->second.length() + 1; + if (len < (unsigned)rl) + break; + const char* pool = i->second.c_str(); + tracepoint(librados, rados_pool_list_pool, pool); + if (b) { + strncat(b, pool, rl); + b += rl; + } + needed += rl; + len -= rl; + } + for (; i != p_end; ++i) { + int rl = i->second.length() + 1; + needed += rl; + } + int retval = needed + 1; + tracepoint(librados, rados_pool_list_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pool_list); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_inconsistent_pg_list)( + rados_t cluster, + int64_t pool_id, + char *buf, + size_t len) +{ + tracepoint(librados, rados_inconsistent_pg_list_enter, cluster, pool_id, len); + librados::RadosClient *client = (librados::RadosClient *)cluster; + std::vector<std::string> pgs; + if (int r = client->get_inconsistent_pgs(pool_id, &pgs); r < 0) { + tracepoint(librados, rados_inconsistent_pg_list_exit, r); + return r; + } + + if (len > 0 && !buf) { + tracepoint(librados, rados_inconsistent_pg_list_exit, -EINVAL); + return -EINVAL; + } + + char *b = buf; + if (b) { + // FIPS zeroization audit 20191116: this memset is not security related. + memset(b, 0, len); + } + int needed = 0; + for (const auto& s : pgs) { + unsigned rl = s.length() + 1; + if (b && len >= rl) { + tracepoint(librados, rados_inconsistent_pg_list_pg, s.c_str()); + strncat(b, s.c_str(), rl); + b += rl; + len -= rl; + } + needed += rl; + } + int retval = needed + 1; + tracepoint(librados, rados_inconsistent_pg_list_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_inconsistent_pg_list); + + +static void dict_to_map(const char *dict, + std::map<std::string, std::string>* dict_map) +{ + while (*dict != '\0') { + const char* key = dict; + dict += strlen(key) + 1; + const char* value = dict; + dict += strlen(value) + 1; + (*dict_map)[key] = value; + } +} + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_service_register)( + rados_t cluster, + const char *service, + const char *daemon, + const char *metadata_dict) +{ + librados::RadosClient *client = (librados::RadosClient *)cluster; + + std::map<std::string, std::string> metadata; + dict_to_map(metadata_dict, &metadata); + + return client->service_daemon_register(service, daemon, metadata); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_service_register); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_service_update_status)( + rados_t cluster, + const char *status_dict) +{ + librados::RadosClient *client = (librados::RadosClient *)cluster; + + std::map<std::string, std::string> status; + dict_to_map(status_dict, &status); + + return client->service_daemon_update_status(std::move(status)); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_service_update_status); + +static void do_out_buffer(bufferlist& outbl, char **outbuf, size_t *outbuflen) +{ + if (outbuf) { + if (outbl.length() > 0) { + *outbuf = (char *)malloc(outbl.length()); + memcpy(*outbuf, outbl.c_str(), outbl.length()); + } else { + *outbuf = NULL; + } + } + if (outbuflen) + *outbuflen = outbl.length(); +} + +static void do_out_buffer(string& outbl, char **outbuf, size_t *outbuflen) +{ + if (outbuf) { + if (outbl.length() > 0) { + *outbuf = (char *)malloc(outbl.length()); + memcpy(*outbuf, outbl.c_str(), outbl.length()); + } else { + *outbuf = NULL; + } + } + if (outbuflen) + *outbuflen = outbl.length(); +} + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ping_monitor)( + rados_t cluster, + const char *mon_id, + char **outstr, + size_t *outstrlen) +{ + tracepoint(librados, rados_ping_monitor_enter, cluster, mon_id); + librados::RadosClient *client = (librados::RadosClient *)cluster; + string str; + + if (!mon_id) { + tracepoint(librados, rados_ping_monitor_exit, -EINVAL, NULL, NULL); + return -EINVAL; + } + + int ret = client->ping_monitor(mon_id, &str); + if (ret == 0) { + do_out_buffer(str, outstr, outstrlen); + } + tracepoint(librados, rados_ping_monitor_exit, ret, ret < 0 ? NULL : outstr, ret < 0 ? NULL : outstrlen); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ping_monitor); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_mon_command)( + rados_t cluster, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) +{ + tracepoint(librados, rados_mon_command_enter, cluster, cmdlen, inbuf, inbuflen); + librados::RadosClient *client = (librados::RadosClient *)cluster; + bufferlist inbl; + bufferlist outbl; + string outstring; + vector<string> cmdvec; + + for (size_t i = 0; i < cmdlen; i++) { + tracepoint(librados, rados_mon_command_cmd, cmd[i]); + cmdvec.push_back(cmd[i]); + } + + inbl.append(inbuf, inbuflen); + int ret = client->mon_command(cmdvec, inbl, &outbl, &outstring); + + do_out_buffer(outbl, outbuf, outbuflen); + do_out_buffer(outstring, outs, outslen); + tracepoint(librados, rados_mon_command_exit, ret, outbuf, outbuflen, outs, outslen); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_mon_command); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_mon_command_target)( + rados_t cluster, + const char *name, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) +{ + tracepoint(librados, rados_mon_command_target_enter, cluster, name, cmdlen, inbuf, inbuflen); + librados::RadosClient *client = (librados::RadosClient *)cluster; + bufferlist inbl; + bufferlist outbl; + string outstring; + vector<string> cmdvec; + + // is this a numeric id? + char *endptr; + errno = 0; + long rank = strtol(name, &endptr, 10); + if ((errno == ERANGE && (rank == LONG_MAX || rank == LONG_MIN)) || + (errno != 0 && rank == 0) || + endptr == name || // no digits + *endptr != '\0') { // extra characters + rank = -1; + } + + for (size_t i = 0; i < cmdlen; i++) { + tracepoint(librados, rados_mon_command_target_cmd, cmd[i]); + cmdvec.push_back(cmd[i]); + } + + inbl.append(inbuf, inbuflen); + int ret; + if (rank >= 0) + ret = client->mon_command(rank, cmdvec, inbl, &outbl, &outstring); + else + ret = client->mon_command(name, cmdvec, inbl, &outbl, &outstring); + + do_out_buffer(outbl, outbuf, outbuflen); + do_out_buffer(outstring, outs, outslen); + tracepoint(librados, rados_mon_command_target_exit, ret, outbuf, outbuflen, outs, outslen); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_mon_command_target); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_osd_command)( + rados_t cluster, int osdid, const char **cmd, + size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) +{ + tracepoint(librados, rados_osd_command_enter, cluster, osdid, cmdlen, inbuf, inbuflen); + librados::RadosClient *client = (librados::RadosClient *)cluster; + bufferlist inbl; + bufferlist outbl; + string outstring; + vector<string> cmdvec; + + for (size_t i = 0; i < cmdlen; i++) { + tracepoint(librados, rados_osd_command_cmd, cmd[i]); + cmdvec.push_back(cmd[i]); + } + + inbl.append(inbuf, inbuflen); + int ret = client->osd_command(osdid, cmdvec, inbl, &outbl, &outstring); + + do_out_buffer(outbl, outbuf, outbuflen); + do_out_buffer(outstring, outs, outslen); + tracepoint(librados, rados_osd_command_exit, ret, outbuf, outbuflen, outs, outslen); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_osd_command); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_mgr_command)( + rados_t cluster, const char **cmd, + size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) +{ + tracepoint(librados, rados_mgr_command_enter, cluster, cmdlen, inbuf, + inbuflen); + + librados::RadosClient *client = (librados::RadosClient *)cluster; + bufferlist inbl; + bufferlist outbl; + string outstring; + vector<string> cmdvec; + + for (size_t i = 0; i < cmdlen; i++) { + tracepoint(librados, rados_mgr_command_cmd, cmd[i]); + cmdvec.push_back(cmd[i]); + } + + inbl.append(inbuf, inbuflen); + int ret = client->mgr_command(cmdvec, inbl, &outbl, &outstring); + + do_out_buffer(outbl, outbuf, outbuflen); + do_out_buffer(outstring, outs, outslen); + tracepoint(librados, rados_mgr_command_exit, ret, outbuf, outbuflen, outs, + outslen); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_mgr_command); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_mgr_command_target)( + rados_t cluster, + const char *name, + const char **cmd, + size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) +{ + tracepoint(librados, rados_mgr_command_target_enter, cluster, name, cmdlen, + inbuf, inbuflen); + + librados::RadosClient *client = (librados::RadosClient *)cluster; + bufferlist inbl; + bufferlist outbl; + string outstring; + vector<string> cmdvec; + + for (size_t i = 0; i < cmdlen; i++) { + tracepoint(librados, rados_mgr_command_target_cmd, cmd[i]); + cmdvec.push_back(cmd[i]); + } + + inbl.append(inbuf, inbuflen); + int ret = client->mgr_command(name, cmdvec, inbl, &outbl, &outstring); + + do_out_buffer(outbl, outbuf, outbuflen); + do_out_buffer(outstring, outs, outslen); + tracepoint(librados, rados_mgr_command_target_exit, ret, outbuf, outbuflen, + outs, outslen); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_mgr_command_target); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pg_command)( + rados_t cluster, const char *pgstr, + const char **cmd, size_t cmdlen, + const char *inbuf, size_t inbuflen, + char **outbuf, size_t *outbuflen, + char **outs, size_t *outslen) +{ + tracepoint(librados, rados_pg_command_enter, cluster, pgstr, cmdlen, inbuf, inbuflen); + librados::RadosClient *client = (librados::RadosClient *)cluster; + bufferlist inbl; + bufferlist outbl; + string outstring; + pg_t pgid; + vector<string> cmdvec; + + for (size_t i = 0; i < cmdlen; i++) { + tracepoint(librados, rados_pg_command_cmd, cmd[i]); + cmdvec.push_back(cmd[i]); + } + + inbl.append(inbuf, inbuflen); + if (!pgid.parse(pgstr)) + return -EINVAL; + + int ret = client->pg_command(pgid, cmdvec, inbl, &outbl, &outstring); + + do_out_buffer(outbl, outbuf, outbuflen); + do_out_buffer(outstring, outs, outslen); + tracepoint(librados, rados_pg_command_exit, ret, outbuf, outbuflen, outs, outslen); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pg_command); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_buffer_free)(char *buf) +{ + tracepoint(librados, rados_buffer_free_enter, buf); + if (buf) + free(buf); + tracepoint(librados, rados_buffer_free_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_buffer_free); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_monitor_log)( + rados_t cluster, + const char *level, + rados_log_callback_t cb, + void *arg) +{ + tracepoint(librados, rados_monitor_log_enter, cluster, level, cb, arg); + librados::RadosClient *client = (librados::RadosClient *)cluster; + int retval = client->monitor_log(level, cb, nullptr, arg); + tracepoint(librados, rados_monitor_log_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_monitor_log); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_monitor_log2)( + rados_t cluster, + const char *level, + rados_log_callback2_t cb, + void *arg) +{ + tracepoint(librados, rados_monitor_log2_enter, cluster, level, cb, arg); + librados::RadosClient *client = (librados::RadosClient *)cluster; + int retval = client->monitor_log(level, nullptr, cb, arg); + tracepoint(librados, rados_monitor_log2_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_monitor_log2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_create)( + rados_t cluster, + const char *name, + rados_ioctx_t *io) +{ + tracepoint(librados, rados_ioctx_create_enter, cluster, name); + librados::RadosClient *client = (librados::RadosClient *)cluster; + librados::IoCtxImpl *ctx; + + int r = client->create_ioctx(name, &ctx); + if (r < 0) { + tracepoint(librados, rados_ioctx_create_exit, r, NULL); + return r; + } + + *io = ctx; + ctx->get(); + tracepoint(librados, rados_ioctx_create_exit, 0, ctx); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_create); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_create2)( + rados_t cluster, + int64_t pool_id, + rados_ioctx_t *io) +{ + tracepoint(librados, rados_ioctx_create2_enter, cluster, pool_id); + librados::RadosClient *client = (librados::RadosClient *)cluster; + librados::IoCtxImpl *ctx; + + int r = client->create_ioctx(pool_id, &ctx); + if (r < 0) { + tracepoint(librados, rados_ioctx_create2_exit, r, NULL); + return r; + } + + *io = ctx; + ctx->get(); + tracepoint(librados, rados_ioctx_create2_exit, 0, ctx); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_create2); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_ioctx_destroy)(rados_ioctx_t io) +{ + tracepoint(librados, rados_ioctx_destroy_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + if (ctx) { + ctx->put(); + } + tracepoint(librados, rados_ioctx_destroy_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_destroy); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_stat)( + rados_ioctx_t io, + struct rados_pool_stat_t *stats) +{ + tracepoint(librados, rados_ioctx_pool_stat_enter, io); + librados::IoCtxImpl *io_ctx_impl = (librados::IoCtxImpl *)io; + list<string> ls; + std::string pool_name; + + int err = io_ctx_impl->client->pool_get_name(io_ctx_impl->get_id(), &pool_name); + if (err) { + tracepoint(librados, rados_ioctx_pool_stat_exit, err, stats); + return err; + } + ls.push_back(pool_name); + + map<string, ::pool_stat_t> rawresult; + bool per_pool = false; + err = io_ctx_impl->client->get_pool_stats(ls, &rawresult, &per_pool); + if (err) { + tracepoint(librados, rados_ioctx_pool_stat_exit, err, stats); + return err; + } + + ::pool_stat_t& r = rawresult[pool_name]; + uint64_t allocated_bytes = r.get_allocated_data_bytes(per_pool) + + r.get_allocated_omap_bytes(per_pool); + // FIXME: raw_used_rate is unknown hence use 1.0 here + // meaning we keep net amount aggregated over all replicas + // Not a big deal so far since this field isn't exposed + uint64_t user_bytes = r.get_user_data_bytes(1.0, per_pool) + + r.get_user_omap_bytes(1.0, per_pool); + + stats->num_kb = shift_round_up(allocated_bytes, 10); + stats->num_bytes = allocated_bytes; + stats->num_objects = r.stats.sum.num_objects; + stats->num_object_clones = r.stats.sum.num_object_clones; + stats->num_object_copies = r.stats.sum.num_object_copies; + stats->num_objects_missing_on_primary = r.stats.sum.num_objects_missing_on_primary; + stats->num_objects_unfound = r.stats.sum.num_objects_unfound; + stats->num_objects_degraded = + r.stats.sum.num_objects_degraded + + r.stats.sum.num_objects_misplaced; // FIXME: this is imprecise + stats->num_rd = r.stats.sum.num_rd; + stats->num_rd_kb = r.stats.sum.num_rd_kb; + stats->num_wr = r.stats.sum.num_wr; + stats->num_wr_kb = r.stats.sum.num_wr_kb; + stats->num_user_bytes = user_bytes; + stats->compressed_bytes_orig = r.store_stats.data_compressed_original; + stats->compressed_bytes = r.store_stats.data_compressed; + stats->compressed_bytes_alloc = r.store_stats.data_compressed_allocated; + + tracepoint(librados, rados_ioctx_pool_stat_exit, 0, stats); + return 0; +} +LIBRADOS_C_API_DEFAULT(rados_ioctx_pool_stat, 14.2.0); + +extern "C" int LIBRADOS_C_API_BASE_F(rados_ioctx_pool_stat)( + rados_ioctx_t io, struct __librados_base::rados_pool_stat_t *stats) +{ + struct rados_pool_stat_t new_stats; + int r = LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_stat)(io, &new_stats); + if (r < 0) { + return r; + } + + stats->num_bytes = new_stats.num_bytes; + stats->num_kb = new_stats.num_kb; + stats->num_objects = new_stats.num_objects; + stats->num_object_clones = new_stats.num_object_clones; + stats->num_object_copies = new_stats.num_object_copies; + stats->num_objects_missing_on_primary = new_stats.num_objects_missing_on_primary; + stats->num_objects_unfound = new_stats.num_objects_unfound; + stats->num_objects_degraded = new_stats.num_objects_degraded; + stats->num_rd = new_stats.num_rd; + stats->num_rd_kb = new_stats.num_rd_kb; + stats->num_wr = new_stats.num_wr; + stats->num_wr_kb = new_stats.num_wr_kb; + return 0; +} +LIBRADOS_C_API_BASE(rados_ioctx_pool_stat); + +extern "C" rados_config_t LIBRADOS_C_API_DEFAULT_F(rados_ioctx_cct)( + rados_ioctx_t io) +{ + tracepoint(librados, rados_ioctx_cct_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + rados_config_t retval = (rados_config_t)ctx->client->cct; + tracepoint(librados, rados_ioctx_cct_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_cct); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_set_read)( + rados_ioctx_t io, + rados_snap_t seq) +{ + tracepoint(librados, rados_ioctx_snap_set_read_enter, io, seq); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + ctx->set_snap_read((snapid_t)seq); + tracepoint(librados, rados_ioctx_snap_set_read_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_set_read); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_selfmanaged_snap_set_write_ctx)( + rados_ioctx_t io, + rados_snap_t seq, + rados_snap_t *snaps, + int num_snaps) +{ + tracepoint(librados, rados_ioctx_selfmanaged_snap_set_write_ctx_enter, io, seq, snaps, num_snaps); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + vector<snapid_t> snv; + snv.resize(num_snaps); + for (int i=0; i<num_snaps; i++) { + snv[i] = (snapid_t)snaps[i]; + } + int retval = ctx->set_snap_write_context((snapid_t)seq, snv); + tracepoint(librados, rados_ioctx_selfmanaged_snap_set_write_ctx_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_selfmanaged_snap_set_write_ctx); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_write)( + rados_ioctx_t io, + const char *o, + const char *buf, + size_t len, + uint64_t off) +{ + tracepoint(librados, rados_write_enter, io, o, buf, len, off); + if (len > UINT_MAX/2) + return -E2BIG; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, len); + int retval = ctx->write(oid, bl, len, off); + tracepoint(librados, rados_write_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_append)( + rados_ioctx_t io, + const char *o, + const char *buf, + size_t len) +{ + tracepoint(librados, rados_append_enter, io, o, buf, len); + if (len > UINT_MAX/2) + return -E2BIG; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, len); + int retval = ctx->append(oid, bl, len); + tracepoint(librados, rados_append_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_append); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_write_full)( + rados_ioctx_t io, + const char *o, + const char *buf, + size_t len) +{ + tracepoint(librados, rados_write_full_enter, io, o, buf, len); + if (len > UINT_MAX/2) + return -E2BIG; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, len); + int retval = ctx->write_full(oid, bl); + tracepoint(librados, rados_write_full_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_full); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_writesame)( + rados_ioctx_t io, + const char *o, + const char *buf, + size_t data_len, + size_t write_len, + uint64_t off) +{ + tracepoint(librados, rados_writesame_enter, io, o, buf, data_len, write_len, off); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, data_len); + int retval = ctx->writesame(oid, bl, write_len, off); + tracepoint(librados, rados_writesame_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_writesame); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_trunc)( + rados_ioctx_t io, + const char *o, + uint64_t size) +{ + tracepoint(librados, rados_trunc_enter, io, o, size); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->trunc(oid, size); + tracepoint(librados, rados_trunc_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_trunc); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_remove)( + rados_ioctx_t io, + const char *o) +{ + tracepoint(librados, rados_remove_enter, io, o); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->remove(oid); + tracepoint(librados, rados_remove_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_remove); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_read)( + rados_ioctx_t io, + const char *o, + char *buf, + size_t len, + uint64_t off) +{ + tracepoint(librados, rados_read_enter, io, o, buf, len, off); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int ret; + object_t oid(o); + + bufferlist bl; + bufferptr bp = buffer::create_static(len, buf); + bl.push_back(bp); + + ret = ctx->read(oid, bl, len, off); + if (ret >= 0) { + if (bl.length() > len) { + tracepoint(librados, rados_read_exit, -ERANGE, NULL); + return -ERANGE; + } + if (!bl.is_provided_buffer(buf)) + bl.begin().copy(bl.length(), buf); + ret = bl.length(); // hrm :/ + } + + tracepoint(librados, rados_read_exit, ret, buf); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_checksum)( + rados_ioctx_t io, const char *o, + rados_checksum_type_t type, + const char *init_value, size_t init_value_len, + size_t len, uint64_t off, size_t chunk_size, + char *pchecksum, size_t checksum_len) +{ + tracepoint(librados, rados_checksum_enter, io, o, type, init_value, + init_value_len, len, off, chunk_size); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + + bufferlist init_value_bl; + init_value_bl.append(init_value, init_value_len); + + bufferlist checksum_bl; + + int retval = ctx->checksum(oid, get_checksum_op_type(type), init_value_bl, + len, off, chunk_size, &checksum_bl); + if (retval >= 0) { + if (checksum_bl.length() > checksum_len) { + tracepoint(librados, rados_checksum_exit, -ERANGE, NULL, 0); + return -ERANGE; + } + + checksum_bl.begin().copy(checksum_bl.length(), pchecksum); + } + tracepoint(librados, rados_checksum_exit, retval, pchecksum, checksum_len); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_checksum); + +extern "C" uint64_t LIBRADOS_C_API_DEFAULT_F(rados_get_last_version)( + rados_ioctx_t io) +{ + tracepoint(librados, rados_get_last_version_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + uint64_t retval = ctx->last_version(); + tracepoint(librados, rados_get_last_version_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_get_last_version); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_create)( + rados_t cluster, + const char *name) +{ + tracepoint(librados, rados_pool_create_enter, cluster, name); + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + string sname(name); + int retval = radosp->pool_create(sname); + tracepoint(librados, rados_pool_create_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pool_create); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_create_with_auid)( + rados_t cluster, + const char *name, + uint64_t auid) +{ + tracepoint(librados, rados_pool_create_with_auid_enter, cluster, name, auid); + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + string sname(name); + int retval = 0; + if (auid != CEPH_AUTH_UID_DEFAULT) { + retval = -EINVAL; + } else { + retval = radosp->pool_create(sname); + } + tracepoint(librados, rados_pool_create_with_auid_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pool_create_with_auid); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_create_with_crush_rule)( + rados_t cluster, + const char *name, + __u8 crush_rule_num) +{ + tracepoint(librados, rados_pool_create_with_crush_rule_enter, cluster, name, crush_rule_num); + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + string sname(name); + int retval = radosp->pool_create(sname, crush_rule_num); + tracepoint(librados, rados_pool_create_with_crush_rule_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pool_create_with_crush_rule); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_create_with_all)( + rados_t cluster, + const char *name, + uint64_t auid, + __u8 crush_rule_num) +{ + tracepoint(librados, rados_pool_create_with_all_enter, cluster, name, auid, crush_rule_num); + librados::RadosClient *radosp = (librados::RadosClient *)cluster; + string sname(name); + int retval = 0; + if (auid != CEPH_AUTH_UID_DEFAULT) { + retval = -EINVAL; + } else { + retval = radosp->pool_create(sname, crush_rule_num); + } + tracepoint(librados, rados_pool_create_with_all_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pool_create_with_all); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_get_base_tier)( + rados_t cluster, + int64_t pool_id, + int64_t* base_tier) +{ + tracepoint(librados, rados_pool_get_base_tier_enter, cluster, pool_id); + librados::RadosClient *client = (librados::RadosClient *)cluster; + int retval = client->pool_get_base_tier(pool_id, base_tier); + tracepoint(librados, rados_pool_get_base_tier_exit, retval, *base_tier); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pool_get_base_tier); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_pool_delete)( + rados_t cluster, + const char *pool_name) +{ + tracepoint(librados, rados_pool_delete_enter, cluster, pool_name); + librados::RadosClient *client = (librados::RadosClient *)cluster; + int retval = client->pool_delete(pool_name); + tracepoint(librados, rados_pool_delete_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_pool_delete); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_set_auid)( + rados_ioctx_t io, + uint64_t auid) +{ + tracepoint(librados, rados_ioctx_pool_set_auid_enter, io, auid); + int retval = -EOPNOTSUPP; + tracepoint(librados, rados_ioctx_pool_set_auid_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_set_auid); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_get_auid)( + rados_ioctx_t io, + uint64_t *auid) +{ + tracepoint(librados, rados_ioctx_pool_get_auid_enter, io); + int retval = -EOPNOTSUPP; + tracepoint(librados, rados_ioctx_pool_get_auid_exit, retval, *auid); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_get_auid); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_requires_alignment)( + rados_ioctx_t io) +{ + tracepoint(librados, rados_ioctx_pool_requires_alignment_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->client->pool_requires_alignment(ctx->get_id()); + tracepoint(librados, rados_ioctx_pool_requires_alignment_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_requires_alignment); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_requires_alignment2)( + rados_ioctx_t io, + int *requires) +{ + tracepoint(librados, rados_ioctx_pool_requires_alignment_enter2, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + bool requires_alignment; + int retval = ctx->client->pool_requires_alignment2(ctx->get_id(), + &requires_alignment); + tracepoint(librados, rados_ioctx_pool_requires_alignment_exit2, retval, + requires_alignment); + if (requires) + *requires = requires_alignment; + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_requires_alignment2); + +extern "C" uint64_t LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_required_alignment)( + rados_ioctx_t io) +{ + tracepoint(librados, rados_ioctx_pool_required_alignment_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + uint64_t retval = ctx->client->pool_required_alignment(ctx->get_id()); + tracepoint(librados, rados_ioctx_pool_required_alignment_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_required_alignment); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_pool_required_alignment2)( + rados_ioctx_t io, + uint64_t *alignment) +{ + tracepoint(librados, rados_ioctx_pool_required_alignment_enter2, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->client->pool_required_alignment2(ctx->get_id(), + alignment); + tracepoint(librados, rados_ioctx_pool_required_alignment_exit2, retval, + *alignment); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_pool_required_alignment2); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_ioctx_locator_set_key)( + rados_ioctx_t io, + const char *key) +{ + tracepoint(librados, rados_ioctx_locator_set_key_enter, io, key); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + if (key) + ctx->oloc.key = key; + else + ctx->oloc.key = ""; + tracepoint(librados, rados_ioctx_locator_set_key_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_locator_set_key); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_ioctx_set_namespace)( + rados_ioctx_t io, + const char *nspace) +{ + tracepoint(librados, rados_ioctx_set_namespace_enter, io, nspace); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + if (nspace) + ctx->oloc.nspace = nspace; + else + ctx->oloc.nspace = ""; + tracepoint(librados, rados_ioctx_set_namespace_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_set_namespace); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_get_namespace)( + rados_ioctx_t io, + char *s, + unsigned maxlen) +{ + tracepoint(librados, rados_ioctx_get_namespace_enter, io, maxlen); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + auto length = ctx->oloc.nspace.length(); + if (length >= maxlen) { + tracepoint(librados, rados_ioctx_get_namespace_exit, -ERANGE, ""); + return -ERANGE; + } + strcpy(s, ctx->oloc.nspace.c_str()); + int retval = (int)length; + tracepoint(librados, rados_ioctx_get_namespace_exit, retval, s); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_get_namespace); + +extern "C" rados_t LIBRADOS_C_API_DEFAULT_F(rados_ioctx_get_cluster)( + rados_ioctx_t io) +{ + tracepoint(librados, rados_ioctx_get_cluster_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + rados_t retval = (rados_t)ctx->client; + tracepoint(librados, rados_ioctx_get_cluster_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_get_cluster); + +extern "C" int64_t LIBRADOS_C_API_DEFAULT_F(rados_ioctx_get_id)( + rados_ioctx_t io) +{ + tracepoint(librados, rados_ioctx_get_id_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int64_t retval = ctx->get_id(); + tracepoint(librados, rados_ioctx_get_id_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_get_id); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_get_pool_name)( + rados_ioctx_t io, + char *s, + unsigned maxlen) +{ + tracepoint(librados, rados_ioctx_get_pool_name_enter, io, maxlen); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + std::string pool_name; + + int err = ctx->client->pool_get_name(ctx->get_id(), &pool_name); + if (err) { + tracepoint(librados, rados_ioctx_get_pool_name_exit, err, ""); + return err; + } + if (pool_name.length() >= maxlen) { + tracepoint(librados, rados_ioctx_get_pool_name_exit, -ERANGE, ""); + return -ERANGE; + } + strcpy(s, pool_name.c_str()); + int retval = pool_name.length(); + tracepoint(librados, rados_ioctx_get_pool_name_exit, retval, s); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_get_pool_name); + +// snaps + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_create)( + rados_ioctx_t io, + const char *snapname) +{ + tracepoint(librados, rados_ioctx_snap_create_enter, io, snapname); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->snap_create(snapname); + tracepoint(librados, rados_ioctx_snap_create_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_create); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_remove)( + rados_ioctx_t io, + const char *snapname) +{ + tracepoint(librados, rados_ioctx_snap_remove_enter, io, snapname); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->snap_remove(snapname); + tracepoint(librados, rados_ioctx_snap_remove_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_remove); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_rollback)( + rados_ioctx_t io, + const char *oid, + const char *snapname) +{ + tracepoint(librados, rados_ioctx_snap_rollback_enter, io, oid, snapname); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->rollback(oid, snapname); + tracepoint(librados, rados_ioctx_snap_rollback_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_rollback); + +// Deprecated name kept for backward compatibility +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_rollback)( + rados_ioctx_t io, + const char *oid, + const char *snapname) +{ + return LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_rollback)(io, oid, snapname); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_rollback); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_selfmanaged_snap_create)( + rados_ioctx_t io, + uint64_t *snapid) +{ + tracepoint(librados, rados_ioctx_selfmanaged_snap_create_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->selfmanaged_snap_create(snapid); + tracepoint(librados, rados_ioctx_selfmanaged_snap_create_exit, retval, *snapid); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_selfmanaged_snap_create); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_aio_ioctx_selfmanaged_snap_create)( + rados_ioctx_t io, + rados_snap_t *snapid, + rados_completion_t completion) +{ + tracepoint(librados, rados_ioctx_selfmanaged_snap_create_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion; + ctx->aio_selfmanaged_snap_create(snapid, c); + tracepoint(librados, rados_ioctx_selfmanaged_snap_create_exit, 0, 0); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_ioctx_selfmanaged_snap_create); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_selfmanaged_snap_remove)( + rados_ioctx_t io, + uint64_t snapid) +{ + tracepoint(librados, rados_ioctx_selfmanaged_snap_remove_enter, io, snapid); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->selfmanaged_snap_remove(snapid); + tracepoint(librados, rados_ioctx_selfmanaged_snap_remove_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_selfmanaged_snap_remove); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_aio_ioctx_selfmanaged_snap_remove)( + rados_ioctx_t io, + rados_snap_t snapid, + rados_completion_t completion) +{ + tracepoint(librados, rados_ioctx_selfmanaged_snap_remove_enter, io, snapid); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion; + ctx->aio_selfmanaged_snap_remove(snapid, c); + tracepoint(librados, rados_ioctx_selfmanaged_snap_remove_exit, 0); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_ioctx_selfmanaged_snap_remove); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_selfmanaged_snap_rollback)( + rados_ioctx_t io, + const char *oid, + uint64_t snapid) +{ + tracepoint(librados, rados_ioctx_selfmanaged_snap_rollback_enter, io, oid, snapid); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->selfmanaged_snap_rollback_object(oid, ctx->snapc, snapid); + tracepoint(librados, rados_ioctx_selfmanaged_snap_rollback_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_selfmanaged_snap_rollback); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_list)( + rados_ioctx_t io, + rados_snap_t *snaps, + int maxlen) +{ + tracepoint(librados, rados_ioctx_snap_list_enter, io, maxlen); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + vector<uint64_t> snapvec; + int r = ctx->snap_list(&snapvec); + if (r < 0) { + tracepoint(librados, rados_ioctx_snap_list_exit, r, snaps, 0); + return r; + } + if ((int)snapvec.size() <= maxlen) { + for (unsigned i=0; i<snapvec.size(); i++) { + snaps[i] = snapvec[i]; + } + int retval = snapvec.size(); + tracepoint(librados, rados_ioctx_snap_list_exit, retval, snaps, retval); + return retval; + } + int retval = -ERANGE; + tracepoint(librados, rados_ioctx_snap_list_exit, retval, snaps, 0); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_list); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_lookup)( + rados_ioctx_t io, + const char *name, + rados_snap_t *id) +{ + tracepoint(librados, rados_ioctx_snap_lookup_enter, io, name); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->snap_lookup(name, (uint64_t *)id); + tracepoint(librados, rados_ioctx_snap_lookup_exit, retval, *id); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_lookup); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_get_name)( + rados_ioctx_t io, + rados_snap_t id, + char *name, + int maxlen) +{ + tracepoint(librados, rados_ioctx_snap_get_name_enter, io, id, maxlen); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + std::string sname; + int r = ctx->snap_get_name(id, &sname); + if (r < 0) { + tracepoint(librados, rados_ioctx_snap_get_name_exit, r, ""); + return r; + } + if ((int)sname.length() >= maxlen) { + int retval = -ERANGE; + tracepoint(librados, rados_ioctx_snap_get_name_exit, retval, ""); + return retval; + } + strncpy(name, sname.c_str(), maxlen); + tracepoint(librados, rados_ioctx_snap_get_name_exit, 0, name); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_get_name); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_ioctx_snap_get_stamp)( + rados_ioctx_t io, + rados_snap_t id, + time_t *t) +{ + tracepoint(librados, rados_ioctx_snap_get_stamp_enter, io, id); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->snap_get_stamp(id, t); + tracepoint(librados, rados_ioctx_snap_get_stamp_exit, retval, *t); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_ioctx_snap_get_stamp); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cmpext)( + rados_ioctx_t io, + const char *o, + const char *cmp_buf, + size_t cmp_len, + uint64_t off) +{ + tracepoint(librados, rados_cmpext_enter, io, o, cmp_buf, cmp_len, off); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int ret; + object_t oid(o); + + bufferlist cmp_bl; + cmp_bl.append(cmp_buf, cmp_len); + + ret = ctx->cmpext(oid, off, cmp_bl); + tracepoint(librados, rados_cmpext_exit, ret); + + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_cmpext); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_getxattr)( + rados_ioctx_t io, + const char *o, + const char *name, + char *buf, + size_t len) +{ + tracepoint(librados, rados_getxattr_enter, io, o, name, len); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int ret; + object_t oid(o); + bufferlist bl; + bl.push_back(buffer::create_static(len, buf)); + ret = ctx->getxattr(oid, name, bl); + if (ret >= 0) { + if (bl.length() > len) { + tracepoint(librados, rados_getxattr_exit, -ERANGE, buf, 0); + return -ERANGE; + } + if (!bl.is_provided_buffer(buf)) + bl.begin().copy(bl.length(), buf); + ret = bl.length(); + } + + tracepoint(librados, rados_getxattr_exit, ret, buf, ret); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_getxattr); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_getxattrs)( + rados_ioctx_t io, + const char *oid, + rados_xattrs_iter_t *iter) +{ + tracepoint(librados, rados_getxattrs_enter, io, oid); + librados::RadosXattrsIter *it = new librados::RadosXattrsIter(); + if (!it) { + tracepoint(librados, rados_getxattrs_exit, -ENOMEM, NULL); + return -ENOMEM; + } + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t obj(oid); + int ret = ctx->getxattrs(obj, it->attrset); + if (ret) { + delete it; + tracepoint(librados, rados_getxattrs_exit, ret, NULL); + return ret; + } + it->i = it->attrset.begin(); + + *iter = it; + tracepoint(librados, rados_getxattrs_exit, 0, *iter); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_getxattrs); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_getxattrs_next)( + rados_xattrs_iter_t iter, + const char **name, + const char **val, + size_t *len) +{ + tracepoint(librados, rados_getxattrs_next_enter, iter); + librados::RadosXattrsIter *it = static_cast<librados::RadosXattrsIter*>(iter); + if (it->val) { + free(it->val); + it->val = NULL; + } + if (it->i == it->attrset.end()) { + *name = NULL; + *val = NULL; + *len = 0; + tracepoint(librados, rados_getxattrs_next_exit, 0, NULL, NULL, 0); + return 0; + } + const std::string &s(it->i->first); + *name = s.c_str(); + bufferlist &bl(it->i->second); + size_t bl_len = bl.length(); + if (!bl_len) { + // malloc(0) is not guaranteed to return a valid pointer + *val = (char *)NULL; + } else { + it->val = (char*)malloc(bl_len); + if (!it->val) { + tracepoint(librados, rados_getxattrs_next_exit, -ENOMEM, *name, NULL, 0); + return -ENOMEM; + } + memcpy(it->val, bl.c_str(), bl_len); + *val = it->val; + } + *len = bl_len; + ++it->i; + tracepoint(librados, rados_getxattrs_next_exit, 0, *name, *val, *len); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_getxattrs_next); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_getxattrs_end)( + rados_xattrs_iter_t iter) +{ + tracepoint(librados, rados_getxattrs_end_enter, iter); + librados::RadosXattrsIter *it = static_cast<librados::RadosXattrsIter*>(iter); + delete it; + tracepoint(librados, rados_getxattrs_end_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_getxattrs_end); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_setxattr)( + rados_ioctx_t io, + const char *o, + const char *name, + const char *buf, + size_t len) +{ + tracepoint(librados, rados_setxattr_enter, io, o, name, buf, len); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, len); + int retval = ctx->setxattr(oid, name, bl); + tracepoint(librados, rados_setxattr_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_setxattr); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_rmxattr)( + rados_ioctx_t io, + const char *o, + const char *name) +{ + tracepoint(librados, rados_rmxattr_enter, io, o, name); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->rmxattr(oid, name); + tracepoint(librados, rados_rmxattr_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_rmxattr); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_stat)( + rados_ioctx_t io, + const char *o, + uint64_t *psize, + time_t *pmtime) +{ + tracepoint(librados, rados_stat_enter, io, o); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->stat(oid, psize, pmtime); + tracepoint(librados, rados_stat_exit, retval, psize, pmtime); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_stat); + +extern "C" int LIBRADOS_C_API_BASE_F(rados_tmap_update)( + rados_ioctx_t io, + const char *o, + const char *cmdbuf, + size_t cmdbuflen) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist cmdbl; + cmdbl.append(cmdbuf, cmdbuflen); + return ctx->tmap_update(oid, cmdbl); +} +LIBRADOS_C_API_BASE(rados_tmap_update); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_tmap_update)( + rados_ioctx_t io, + const char *o, + const char *cmdbuf, + size_t cmdbuflen) +{ + return -ENOTSUP; +} +LIBRADOS_C_API_DEFAULT(rados_tmap_update, 14.2.0); + +extern "C" int LIBRADOS_C_API_BASE_F(rados_tmap_put)( + rados_ioctx_t io, + const char *o, + const char *buf, + size_t buflen) +{ + bufferlist bl; + bl.append(buf, buflen); + + bufferlist header; + std::map<std::string, bufferlist> m; + bufferlist::const_iterator bl_it = bl.begin(); + decode(header, bl_it); + decode(m, bl_it); + + bufferlist out_bl; + encode(header, out_bl); + encode(m, out_bl); + + return LIBRADOS_C_API_DEFAULT_F(rados_write_full)( + io, o, out_bl.c_str(), out_bl.length()); +} +LIBRADOS_C_API_BASE(rados_tmap_put); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_tmap_put)( + rados_ioctx_t io, + const char *o, + const char *buf, + size_t buflen) +{ + return -EOPNOTSUPP; +} +LIBRADOS_C_API_DEFAULT(rados_tmap_put, 14.2.0); + +extern "C" int LIBRADOS_C_API_BASE_F(rados_tmap_get)( + rados_ioctx_t io, + const char *o, + char *buf, + size_t buflen) +{ + return LIBRADOS_C_API_DEFAULT_F(rados_read)(io, o, buf, buflen, 0); +} +LIBRADOS_C_API_BASE(rados_tmap_get); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_tmap_get)( + rados_ioctx_t io, + const char *o, + char *buf, + size_t buflen) +{ + return -EOPNOTSUPP; +} +LIBRADOS_C_API_DEFAULT(rados_tmap_get, 14.2.0); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_exec)( + rados_ioctx_t io, + const char *o, + const char *cls, + const char *method, + const char *inbuf, + size_t in_len, + char *buf, + size_t out_len) +{ + tracepoint(librados, rados_exec_enter, io, o, cls, method, inbuf, in_len, out_len); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist inbl, outbl; + int ret; + inbl.append(inbuf, in_len); + ret = ctx->exec(oid, cls, method, inbl, outbl); + if (ret >= 0) { + if (outbl.length()) { + if (outbl.length() > out_len) { + tracepoint(librados, rados_exec_exit, -ERANGE, buf, 0); + return -ERANGE; + } + outbl.begin().copy(outbl.length(), buf); + ret = outbl.length(); // hrm :/ + } + } + tracepoint(librados, rados_exec_exit, ret, buf, ret); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_exec); + +extern "C" rados_object_list_cursor LIBRADOS_C_API_DEFAULT_F(rados_object_list_begin)( + rados_ioctx_t io) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + + hobject_t *result = new hobject_t(ctx->objecter->enumerate_objects_begin()); + return (rados_object_list_cursor)result; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_begin); + +extern "C" rados_object_list_cursor LIBRADOS_C_API_DEFAULT_F(rados_object_list_end)( + rados_ioctx_t io) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + + hobject_t *result = new hobject_t(ctx->objecter->enumerate_objects_end()); + return (rados_object_list_cursor)result; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_end); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_object_list_is_end)( + rados_ioctx_t io, + rados_object_list_cursor cur) +{ + hobject_t *hobj = (hobject_t*)cur; + return hobj->is_max(); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_is_end); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_object_list_cursor_free)( + rados_ioctx_t io, + rados_object_list_cursor cur) +{ + hobject_t *hobj = (hobject_t*)cur; + delete hobj; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_cursor_free); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_object_list_cursor_cmp)( + rados_ioctx_t io, + rados_object_list_cursor lhs_cur, + rados_object_list_cursor rhs_cur) +{ + hobject_t *lhs = (hobject_t*)lhs_cur; + hobject_t *rhs = (hobject_t*)rhs_cur; + return cmp(*lhs, *rhs); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_cursor_cmp); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_object_list)(rados_ioctx_t io, + const rados_object_list_cursor start, + const rados_object_list_cursor finish, + const size_t result_item_count, + const char *filter_buf, + const size_t filter_buf_len, + rados_object_list_item *result_items, + rados_object_list_cursor *next) +{ + ceph_assert(next); + + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + + // Zero out items so that they will be safe to free later + // FIPS zeroization audit 20191116: this memset is not security related. + memset(result_items, 0, sizeof(rados_object_list_item) * result_item_count); + + bufferlist filter_bl; + if (filter_buf != nullptr) { + filter_bl.append(filter_buf, filter_buf_len); + } + + ceph::async::waiter<boost::system::error_code, + std::vector<librados::ListObjectImpl>, + hobject_t> w; + ctx->objecter->enumerate_objects<librados::ListObjectImpl>( + ctx->poolid, + ctx->oloc.nspace, + *((hobject_t*)start), + *((hobject_t*)finish), + result_item_count, + filter_bl, + w); + + hobject_t *next_hobj = (hobject_t*)(*next); + ceph_assert(next_hobj); + + auto [ec, result, next_hash] = w.wait(); + + if (ec) { + *next_hobj = hobject_t::get_max(); + return ceph::from_error_code(ec); + } + + ceph_assert(result.size() <= result_item_count); // Don't overflow! + + int k = 0; + for (auto i = result.begin(); i != result.end(); ++i) { + rados_object_list_item &item = result_items[k++]; + do_out_buffer(i->oid, &item.oid, &item.oid_length); + do_out_buffer(i->nspace, &item.nspace, &item.nspace_length); + do_out_buffer(i->locator, &item.locator, &item.locator_length); + } + + *next_hobj = next_hash; + + return result.size(); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_object_list); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_object_list_free)( + const size_t result_size, + rados_object_list_item *results) +{ + ceph_assert(results); + + for (unsigned int i = 0; i < result_size; ++i) { + LIBRADOS_C_API_DEFAULT_F(rados_buffer_free)(results[i].oid); + LIBRADOS_C_API_DEFAULT_F(rados_buffer_free)(results[i].locator); + LIBRADOS_C_API_DEFAULT_F(rados_buffer_free)(results[i].nspace); + } +} +LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_free); + +/* list objects */ + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_open)( + rados_ioctx_t io, + rados_list_ctx_t *listh) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + + tracepoint(librados, rados_nobjects_list_open_enter, io); + + Objecter::NListContext *h = new Objecter::NListContext; + h->pool_id = ctx->poolid; + h->pool_snap_seq = ctx->snap_seq; + h->nspace = ctx->oloc.nspace; // After dropping compatibility need nspace + *listh = (void *)new librados::ObjListCtx(ctx, h); + tracepoint(librados, rados_nobjects_list_open_exit, 0, *listh); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_open); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_close)( + rados_list_ctx_t h) +{ + tracepoint(librados, rados_nobjects_list_close_enter, h); + librados::ObjListCtx *lh = (librados::ObjListCtx *)h; + delete lh; + tracepoint(librados, rados_nobjects_list_close_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_close); + +extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_seek)( + rados_list_ctx_t listctx, + uint32_t pos) +{ + librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx; + tracepoint(librados, rados_nobjects_list_seek_enter, listctx, pos); + uint32_t r = lh->ctx->nlist_seek(lh->nlc, pos); + tracepoint(librados, rados_nobjects_list_seek_exit, r); + return r; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_seek); + +extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_seek_cursor)( + rados_list_ctx_t listctx, + rados_object_list_cursor cursor) +{ + librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx; + + tracepoint(librados, rados_nobjects_list_seek_cursor_enter, listctx); + uint32_t r = lh->ctx->nlist_seek(lh->nlc, cursor); + tracepoint(librados, rados_nobjects_list_seek_cursor_exit, r); + return r; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_seek_cursor); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_get_cursor)( + rados_list_ctx_t listctx, + rados_object_list_cursor *cursor) +{ + librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx; + + tracepoint(librados, rados_nobjects_list_get_cursor_enter, listctx); + *cursor = lh->ctx->nlist_get_cursor(lh->nlc); + tracepoint(librados, rados_nobjects_list_get_cursor_exit, 0); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_get_cursor); + +extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_get_pg_hash_position)( + rados_list_ctx_t listctx) +{ + librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx; + tracepoint(librados, rados_nobjects_list_get_pg_hash_position_enter, listctx); + uint32_t retval = lh->nlc->get_pg_hash_position(); + tracepoint(librados, rados_nobjects_list_get_pg_hash_position_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_get_pg_hash_position); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_next)( + rados_list_ctx_t listctx, + const char **entry, + const char **key, + const char **nspace) +{ + tracepoint(librados, rados_nobjects_list_next_enter, listctx); + uint32_t retval = rados_nobjects_list_next2(listctx, entry, key, nspace, NULL, NULL, NULL); + tracepoint(librados, rados_nobjects_list_next_exit, 0, *entry, key, nspace); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_next); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_nobjects_list_next2)( + rados_list_ctx_t listctx, + const char **entry, + const char **key, + const char **nspace, + size_t *entry_size, + size_t *key_size, + size_t *nspace_size) +{ + tracepoint(librados, rados_nobjects_list_next2_enter, listctx); + librados::ObjListCtx *lh = (librados::ObjListCtx *)listctx; + Objecter::NListContext *h = lh->nlc; + + // if the list is non-empty, this method has been called before + if (!h->list.empty()) + // so let's kill the previously-returned object + h->list.pop_front(); + + if (h->list.empty()) { + int ret = lh->ctx->nlist(lh->nlc, RADOS_LIST_MAX_ENTRIES); + if (ret < 0) { + tracepoint(librados, rados_nobjects_list_next2_exit, ret, NULL, NULL, NULL, NULL, NULL, NULL); + return ret; + } + if (h->list.empty()) { + tracepoint(librados, rados_nobjects_list_next2_exit, -ENOENT, NULL, NULL, NULL, NULL, NULL, NULL); + return -ENOENT; + } + } + + *entry = h->list.front().oid.c_str(); + + if (key) { + if (h->list.front().locator.size()) + *key = h->list.front().locator.c_str(); + else + *key = NULL; + } + if (nspace) + *nspace = h->list.front().nspace.c_str(); + + if (entry_size) + *entry_size = h->list.front().oid.size(); + if (key_size) + *key_size = h->list.front().locator.size(); + if (nspace_size) + *nspace_size = h->list.front().nspace.size(); + + tracepoint(librados, rados_nobjects_list_next2_exit, 0, entry, key, nspace, + entry_size, key_size, nspace_size); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_nobjects_list_next2); + + +/* + * removed legacy v2 list objects stubs + * + * thse return -ENOTSUP where possible. + */ +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_objects_list_open)( + rados_ioctx_t io, + rados_list_ctx_t *ctx) +{ + return -ENOTSUP; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_open); + +extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_objects_list_get_pg_hash_position)( + rados_list_ctx_t ctx) +{ + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_get_pg_hash_position); + +extern "C" uint32_t LIBRADOS_C_API_DEFAULT_F(rados_objects_list_seek)( + rados_list_ctx_t ctx, + uint32_t pos) +{ + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_seek); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_objects_list_next)( + rados_list_ctx_t ctx, + const char **entry, + const char **key) +{ + return -ENOTSUP; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_next); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_objects_list_close)( + rados_list_ctx_t ctx) +{ +} +LIBRADOS_C_API_BASE_DEFAULT(rados_objects_list_close); + + +// ------------------------- +// aio + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_create_completion)( + void *cb_arg, + rados_callback_t cb_complete, + rados_callback_t cb_safe, + rados_completion_t *pc) +{ + tracepoint(librados, rados_aio_create_completion_enter, cb_arg, cb_complete, cb_safe); + librados::AioCompletionImpl *c = new librados::AioCompletionImpl; + if (cb_complete) + c->set_complete_callback(cb_arg, cb_complete); + if (cb_safe) + c->set_safe_callback(cb_arg, cb_safe); + *pc = c; + tracepoint(librados, rados_aio_create_completion_exit, 0, *pc); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_create_completion); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_create_completion2)( + void *cb_arg, + rados_callback_t cb_complete, + rados_completion_t *pc) +{ + tracepoint(librados, rados_aio_create_completion2_enter, cb_arg, cb_complete); + librados::AioCompletionImpl *c = new librados::AioCompletionImpl; + if (cb_complete) + c->set_complete_callback(cb_arg, cb_complete); + *pc = c; + tracepoint(librados, rados_aio_create_completion2_exit, 0, *pc); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_create_completion2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_wait_for_complete)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_wait_for_complete_enter, c); + int retval = ((librados::AioCompletionImpl*)c)->wait_for_complete(); + tracepoint(librados, rados_aio_wait_for_complete_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_wait_for_complete); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_wait_for_safe)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_wait_for_safe_enter, c); + int retval = ((librados::AioCompletionImpl*)c)->wait_for_complete(); + tracepoint(librados, rados_aio_wait_for_safe_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_wait_for_safe); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_is_complete)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_is_complete_enter, c); + int retval = ((librados::AioCompletionImpl*)c)->is_complete(); + tracepoint(librados, rados_aio_is_complete_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_is_complete); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_is_safe)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_is_safe_enter, c); + int retval = ((librados::AioCompletionImpl*)c)->is_safe(); + tracepoint(librados, rados_aio_is_safe_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_is_safe); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_wait_for_complete_and_cb)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_wait_for_complete_and_cb_enter, c); + int retval = ((librados::AioCompletionImpl*)c)->wait_for_complete_and_cb(); + tracepoint(librados, rados_aio_wait_for_complete_and_cb_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_wait_for_complete_and_cb); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_wait_for_safe_and_cb)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_wait_for_safe_and_cb_enter, c); + int retval = ((librados::AioCompletionImpl*)c)->wait_for_safe_and_cb(); + tracepoint(librados, rados_aio_wait_for_safe_and_cb_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_wait_for_safe_and_cb); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_is_complete_and_cb)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_is_complete_and_cb_enter, c); + int retval = ((librados::AioCompletionImpl*)c)->is_complete_and_cb(); + tracepoint(librados, rados_aio_is_complete_and_cb_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_is_complete_and_cb); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_is_safe_and_cb)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_is_safe_and_cb_enter, c); + int retval = ((librados::AioCompletionImpl*)c)->is_safe_and_cb(); + tracepoint(librados, rados_aio_is_safe_and_cb_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_is_safe_and_cb); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_get_return_value)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_get_return_value_enter, c); + int retval = ((librados::AioCompletionImpl*)c)->get_return_value(); + tracepoint(librados, rados_aio_get_return_value_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_get_return_value); + +extern "C" uint64_t LIBRADOS_C_API_DEFAULT_F(rados_aio_get_version)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_get_version_enter, c); + uint64_t retval = ((librados::AioCompletionImpl*)c)->get_version(); + tracepoint(librados, rados_aio_get_version_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_get_version); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_aio_release)( + rados_completion_t c) +{ + tracepoint(librados, rados_aio_release_enter, c); + ((librados::AioCompletionImpl*)c)->put(); + tracepoint(librados, rados_aio_release_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_release); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_read)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + char *buf, size_t len, uint64_t off) +{ + tracepoint(librados, rados_aio_read_enter, io, o, completion, len, off); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->aio_read(oid, (librados::AioCompletionImpl*)completion, + buf, len, off, ctx->snap_seq); + tracepoint(librados, rados_aio_read_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_read); + +#ifdef WITH_BLKIN +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_read_traced)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + char *buf, size_t len, uint64_t off, + struct blkin_trace_info *info) +{ + tracepoint(librados, rados_aio_read_enter, io, o, completion, len, off); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->aio_read(oid, (librados::AioCompletionImpl*)completion, + buf, len, off, ctx->snap_seq, info); + tracepoint(librados, rados_aio_read_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_read_traced); +#endif + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_write)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *buf, size_t len, uint64_t off) +{ + tracepoint(librados, rados_aio_write_enter, io, o, completion, buf, len, off); + if (len > UINT_MAX/2) + return -E2BIG; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, len); + int retval = ctx->aio_write(oid, (librados::AioCompletionImpl*)completion, + bl, len, off); + tracepoint(librados, rados_aio_write_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_write); + +#ifdef WITH_BLKIN +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_write_traced)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *buf, size_t len, uint64_t off, + struct blkin_trace_info *info) +{ + tracepoint(librados, rados_aio_write_enter, io, o, completion, buf, len, off); + if (len > UINT_MAX/2) + return -E2BIG; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, len); + int retval = ctx->aio_write(oid, (librados::AioCompletionImpl*)completion, + bl, len, off, info); + tracepoint(librados, rados_aio_write_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_write_traced); +#endif + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_append)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *buf, size_t len) +{ + tracepoint(librados, rados_aio_append_enter, io, o, completion, buf, len); + if (len > UINT_MAX/2) + return -E2BIG; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, len); + int retval = ctx->aio_append(oid, (librados::AioCompletionImpl*)completion, + bl, len); + tracepoint(librados, rados_aio_append_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_append); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_write_full)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *buf, size_t len) +{ + tracepoint(librados, rados_aio_write_full_enter, io, o, completion, buf, len); + if (len > UINT_MAX/2) + return -E2BIG; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, len); + int retval = ctx->aio_write_full(oid, (librados::AioCompletionImpl*)completion, bl); + tracepoint(librados, rados_aio_write_full_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_write_full); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_writesame)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *buf, size_t data_len, + size_t write_len, uint64_t off) +{ + tracepoint(librados, rados_aio_writesame_enter, io, o, completion, buf, + data_len, write_len, off); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, data_len); + int retval = ctx->aio_writesame(o, (librados::AioCompletionImpl*)completion, + bl, write_len, off); + tracepoint(librados, rados_aio_writesame_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_writesame); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_remove)( + rados_ioctx_t io, const char *o, + rados_completion_t completion) +{ + tracepoint(librados, rados_aio_remove_enter, io, o, completion); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->aio_remove(oid, (librados::AioCompletionImpl*)completion); + tracepoint(librados, rados_aio_remove_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_remove); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_flush_async)( + rados_ioctx_t io, + rados_completion_t completion) +{ + tracepoint(librados, rados_aio_flush_async_enter, io, completion); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + ctx->flush_aio_writes_async((librados::AioCompletionImpl*)completion); + tracepoint(librados, rados_aio_flush_async_exit, 0); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_flush_async); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_flush)(rados_ioctx_t io) +{ + tracepoint(librados, rados_aio_flush_enter, io); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + ctx->flush_aio_writes(); + tracepoint(librados, rados_aio_flush_exit, 0); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_flush); + +struct AioGetxattrData { + AioGetxattrData(char* buf, rados_completion_t c, size_t l) : + user_buf(buf), len(l), user_completion((librados::AioCompletionImpl*)c) {} + bufferlist bl; + char* user_buf; + size_t len; + struct librados::CB_AioCompleteAndSafe user_completion; +}; + +static void rados_aio_getxattr_complete(rados_completion_t c, void *arg) { + AioGetxattrData *cdata = reinterpret_cast<AioGetxattrData*>(arg); + int rc = LIBRADOS_C_API_DEFAULT_F(rados_aio_get_return_value)(c); + if (rc >= 0) { + if (cdata->bl.length() > cdata->len) { + rc = -ERANGE; + } else { + if (!cdata->bl.is_provided_buffer(cdata->user_buf)) + cdata->bl.begin().copy(cdata->bl.length(), cdata->user_buf); + rc = cdata->bl.length(); + } + } + cdata->user_completion(rc); + reinterpret_cast<librados::AioCompletionImpl*>(c)->put(); + delete cdata; +} + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_getxattr)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *name, char *buf, size_t len) +{ + tracepoint(librados, rados_aio_getxattr_enter, io, o, completion, name, len); + // create data object to be passed to async callback + AioGetxattrData *cdata = new AioGetxattrData(buf, completion, len); + if (!cdata) { + tracepoint(librados, rados_aio_getxattr_exit, -ENOMEM, NULL, 0); + return -ENOMEM; + } + cdata->bl.push_back(buffer::create_static(len, buf)); + // create completion callback + librados::AioCompletionImpl *c = new librados::AioCompletionImpl; + c->set_complete_callback(cdata, rados_aio_getxattr_complete); + // call async getxattr of IoCtx + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int ret = ctx->aio_getxattr(oid, c, name, cdata->bl); + tracepoint(librados, rados_aio_getxattr_exit, ret, buf, ret); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_getxattr); + +namespace { +struct AioGetxattrsData { + AioGetxattrsData(rados_completion_t c, rados_xattrs_iter_t *_iter) : + iter(_iter), user_completion((librados::AioCompletionImpl*)c) { + it = new librados::RadosXattrsIter(); + } + ~AioGetxattrsData() { + if (it) delete it; + } + librados::RadosXattrsIter *it; + rados_xattrs_iter_t *iter; + struct librados::CB_AioCompleteAndSafe user_completion; +}; +} + +static void rados_aio_getxattrs_complete(rados_completion_t c, void *arg) { + AioGetxattrsData *cdata = reinterpret_cast<AioGetxattrsData*>(arg); + int rc = LIBRADOS_C_API_DEFAULT_F(rados_aio_get_return_value)(c); + if (rc) { + cdata->user_completion(rc); + } else { + cdata->it->i = cdata->it->attrset.begin(); + *cdata->iter = cdata->it; + cdata->it = 0; + cdata->user_completion(0); + } + reinterpret_cast<librados::AioCompletionImpl*>(c)->put(); + delete cdata; +} + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_getxattrs)( + rados_ioctx_t io, const char *oid, + rados_completion_t completion, + rados_xattrs_iter_t *iter) +{ + tracepoint(librados, rados_aio_getxattrs_enter, io, oid, completion); + // create data object to be passed to async callback + AioGetxattrsData *cdata = new AioGetxattrsData(completion, iter); + if (!cdata) { + tracepoint(librados, rados_getxattrs_exit, -ENOMEM, NULL); + return -ENOMEM; + } + // create completion callback + librados::AioCompletionImpl *c = new librados::AioCompletionImpl; + c->set_complete_callback(cdata, rados_aio_getxattrs_complete); + // call async getxattrs of IoCtx + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t obj(oid); + int ret = ctx->aio_getxattrs(obj, c, cdata->it->attrset); + tracepoint(librados, rados_aio_getxattrs_exit, ret, cdata->it); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_getxattrs); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_setxattr)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *name, const char *buf, size_t len) +{ + tracepoint(librados, rados_aio_setxattr_enter, io, o, completion, name, buf, len); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + bl.append(buf, len); + int retval = ctx->aio_setxattr(oid, (librados::AioCompletionImpl*)completion, name, bl); + tracepoint(librados, rados_aio_setxattr_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_setxattr); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_rmxattr)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *name) +{ + tracepoint(librados, rados_aio_rmxattr_enter, io, o, completion, name); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->aio_rmxattr(oid, (librados::AioCompletionImpl*)completion, name); + tracepoint(librados, rados_aio_rmxattr_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_rmxattr); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_stat)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + uint64_t *psize, time_t *pmtime) +{ + tracepoint(librados, rados_aio_stat_enter, io, o, completion); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->aio_stat(oid, (librados::AioCompletionImpl*)completion, + psize, pmtime); + tracepoint(librados, rados_aio_stat_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_stat); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_cmpext)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, const char *cmp_buf, + size_t cmp_len, uint64_t off) +{ + tracepoint(librados, rados_aio_cmpext_enter, io, o, completion, cmp_buf, + cmp_len, off); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->aio_cmpext(oid, (librados::AioCompletionImpl*)completion, + cmp_buf, cmp_len, off); + tracepoint(librados, rados_aio_cmpext_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_cmpext); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_cancel)( + rados_ioctx_t io, + rados_completion_t completion) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + return ctx->aio_cancel((librados::AioCompletionImpl*)completion); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_cancel); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_exec)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *cls, const char *method, + const char *inbuf, size_t in_len, + char *buf, size_t out_len) +{ + tracepoint(librados, rados_aio_exec_enter, io, o, completion); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist inbl; + inbl.append(inbuf, in_len); + int retval = ctx->aio_exec(oid, (librados::AioCompletionImpl*)completion, + cls, method, inbl, buf, out_len); + tracepoint(librados, rados_aio_exec_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_exec); + +struct C_WatchCB : public librados::WatchCtx { + rados_watchcb_t wcb; + void *arg; + C_WatchCB(rados_watchcb_t _wcb, void *_arg) : wcb(_wcb), arg(_arg) {} + void notify(uint8_t opcode, uint64_t ver, bufferlist& bl) override { + wcb(opcode, ver, arg); + } +}; + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch)( + rados_ioctx_t io, const char *o, uint64_t ver, + uint64_t *handle, + rados_watchcb_t watchcb, void *arg) +{ + tracepoint(librados, rados_watch_enter, io, o, ver, watchcb, arg); + uint64_t *cookie = handle; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + C_WatchCB *wc = new C_WatchCB(watchcb, arg); + int retval = ctx->watch(oid, cookie, wc, NULL, true); + tracepoint(librados, rados_watch_exit, retval, *handle); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_watch); + +struct C_WatchCB2 : public librados::WatchCtx2 { + rados_watchcb2_t wcb; + rados_watcherrcb_t errcb; + void *arg; + C_WatchCB2(rados_watchcb2_t _wcb, + rados_watcherrcb_t _errcb, + void *_arg) : wcb(_wcb), errcb(_errcb), arg(_arg) {} + void handle_notify(uint64_t notify_id, + uint64_t cookie, + uint64_t notifier_gid, + bufferlist& bl) override { + wcb(arg, notify_id, cookie, notifier_gid, bl.c_str(), bl.length()); + } + void handle_error(uint64_t cookie, int err) override { + if (errcb) + errcb(arg, cookie, err); + } +}; + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch3)( + rados_ioctx_t io, const char *o, uint64_t *handle, + rados_watchcb2_t watchcb, + rados_watcherrcb_t watcherrcb, + uint32_t timeout, + void *arg) +{ + tracepoint(librados, rados_watch3_enter, io, o, handle, watchcb, timeout, arg); + int ret; + if (!watchcb || !o || !handle) { + ret = -EINVAL; + } else { + uint64_t *cookie = handle; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + C_WatchCB2 *wc = new C_WatchCB2(watchcb, watcherrcb, arg); + ret = ctx->watch(oid, cookie, NULL, wc, timeout, true); + } + tracepoint(librados, rados_watch3_exit, ret, handle ? *handle : 0); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_watch3); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch2)( + rados_ioctx_t io, const char *o, uint64_t *handle, + rados_watchcb2_t watchcb, + rados_watcherrcb_t watcherrcb, + void *arg) +{ + return LIBRADOS_C_API_DEFAULT_F(rados_watch3)( + io, o, handle, watchcb, watcherrcb, 0, arg); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_watch2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_watch2)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + uint64_t *handle, + rados_watchcb2_t watchcb, + rados_watcherrcb_t watcherrcb, + uint32_t timeout, void *arg) +{ + tracepoint(librados, rados_aio_watch2_enter, io, o, completion, handle, watchcb, timeout, arg); + int ret; + if (!completion || !watchcb || !o || !handle) { + ret = -EINVAL; + } else { + uint64_t *cookie = handle; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + librados::AioCompletionImpl *c = + reinterpret_cast<librados::AioCompletionImpl*>(completion); + C_WatchCB2 *wc = new C_WatchCB2(watchcb, watcherrcb, arg); + ret = ctx->aio_watch(oid, c, cookie, NULL, wc, timeout, true); + } + tracepoint(librados, rados_aio_watch2_exit, ret, handle ? *handle : 0); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_watch2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_watch)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + uint64_t *handle, + rados_watchcb2_t watchcb, + rados_watcherrcb_t watcherrcb, void *arg) +{ + return LIBRADOS_C_API_DEFAULT_F(rados_aio_watch2)( + io, o, completion, handle, watchcb, watcherrcb, 0, arg); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_watch); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_unwatch)( + rados_ioctx_t io, + const char *o, + uint64_t handle) +{ + tracepoint(librados, rados_unwatch_enter, io, o, handle); + uint64_t cookie = handle; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->unwatch(cookie); + tracepoint(librados, rados_unwatch_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_unwatch); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_unwatch2)( + rados_ioctx_t io, + uint64_t handle) +{ + tracepoint(librados, rados_unwatch2_enter, io, handle); + uint64_t cookie = handle; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->unwatch(cookie); + tracepoint(librados, rados_unwatch2_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_unwatch2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_unwatch)( + rados_ioctx_t io, uint64_t handle, + rados_completion_t completion) +{ + tracepoint(librados, rados_aio_unwatch_enter, io, handle, completion); + uint64_t cookie = handle; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + librados::AioCompletionImpl *c = + reinterpret_cast<librados::AioCompletionImpl*>(completion); + int retval = ctx->aio_unwatch(cookie, c); + tracepoint(librados, rados_aio_unwatch_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_unwatch); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch_check)( + rados_ioctx_t io, + uint64_t handle) +{ + tracepoint(librados, rados_watch_check_enter, io, handle); + uint64_t cookie = handle; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->watch_check(cookie); + tracepoint(librados, rados_watch_check_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_watch_check); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_notify)( + rados_ioctx_t io, const char *o, + uint64_t ver, const char *buf, int buf_len) +{ + tracepoint(librados, rados_notify_enter, io, o, ver, buf, buf_len); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + if (buf) { + bufferptr p = buffer::create(buf_len); + memcpy(p.c_str(), buf, buf_len); + bl.push_back(p); + } + int retval = ctx->notify(oid, bl, 0, NULL, NULL, NULL); + tracepoint(librados, rados_notify_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_notify); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_notify2)( + rados_ioctx_t io, const char *o, + const char *buf, int buf_len, + uint64_t timeout_ms, + char **reply_buffer, + size_t *reply_buffer_len) +{ + tracepoint(librados, rados_notify2_enter, io, o, buf, buf_len, timeout_ms); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + if (buf) { + bufferptr p = buffer::create(buf_len); + memcpy(p.c_str(), buf, buf_len); + bl.push_back(p); + } + int ret = ctx->notify(oid, bl, timeout_ms, NULL, reply_buffer, reply_buffer_len); + tracepoint(librados, rados_notify2_exit, ret); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_notify2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_decode_notify_response)( + char *reply_buffer, size_t reply_buffer_len, + struct notify_ack_t **acks, size_t *nr_acks, + struct notify_timeout_t **timeouts, size_t *nr_timeouts) +{ + if (!reply_buffer || !reply_buffer_len) { + return -EINVAL; + } + + bufferlist bl; + bl.append(reply_buffer, reply_buffer_len); + + map<pair<uint64_t,uint64_t>,bufferlist> acked; + set<pair<uint64_t,uint64_t>> missed; + auto iter = bl.cbegin(); + decode(acked, iter); + decode(missed, iter); + + *acks = nullptr; + *nr_acks = acked.size(); + if (*nr_acks) { + *acks = new notify_ack_t[*nr_acks]; + struct notify_ack_t *ack = *acks; + for (auto &[who, payload] : acked) { + ack->notifier_id = who.first; + ack->cookie = who.second; + ack->payload = nullptr; + ack->payload_len = payload.length(); + if (ack->payload_len) { + ack->payload = (char *)malloc(ack->payload_len); + memcpy(ack->payload, payload.c_str(), ack->payload_len); + } + + ack++; + } + } + + *timeouts = nullptr; + *nr_timeouts = missed.size(); + if (*nr_timeouts) { + *timeouts = new notify_timeout_t[*nr_timeouts]; + struct notify_timeout_t *timeout = *timeouts; + for (auto &[notifier_id, cookie] : missed) { + timeout->notifier_id = notifier_id; + timeout->cookie = cookie; + timeout++; + } + } + + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_decode_notify_response); + + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_free_notify_response)( + struct notify_ack_t *acks, size_t nr_acks, + struct notify_timeout_t *timeouts) +{ + for (uint64_t n = 0; n < nr_acks; ++n) { + assert(acks); + if (acks[n].payload) { + free(acks[n].payload); + } + } + if (acks) { + delete[] acks; + } + if (timeouts) { + delete[] timeouts; + } +} +LIBRADOS_C_API_BASE_DEFAULT(rados_free_notify_response); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_notify)( + rados_ioctx_t io, const char *o, + rados_completion_t completion, + const char *buf, int buf_len, + uint64_t timeout_ms, char **reply_buffer, + size_t *reply_buffer_len) +{ + tracepoint(librados, rados_aio_notify_enter, io, o, completion, buf, buf_len, + timeout_ms); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + if (buf) { + bl.push_back(buffer::copy(buf, buf_len)); + } + librados::AioCompletionImpl *c = + reinterpret_cast<librados::AioCompletionImpl*>(completion); + int ret = ctx->aio_notify(oid, c, bl, timeout_ms, NULL, reply_buffer, + reply_buffer_len); + tracepoint(librados, rados_aio_notify_exit, ret); + return ret; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_notify); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_notify_ack)( + rados_ioctx_t io, const char *o, + uint64_t notify_id, uint64_t handle, + const char *buf, int buf_len) +{ + tracepoint(librados, rados_notify_ack_enter, io, o, notify_id, handle, buf, buf_len); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + bufferlist bl; + if (buf) { + bufferptr p = buffer::create(buf_len); + memcpy(p.c_str(), buf, buf_len); + bl.push_back(p); + } + ctx->notify_ack(oid, notify_id, handle, bl); + tracepoint(librados, rados_notify_ack_exit, 0); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_notify_ack); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_watch_flush)(rados_t cluster) +{ + tracepoint(librados, rados_watch_flush_enter, cluster); + librados::RadosClient *client = (librados::RadosClient *)cluster; + int retval = client->watch_flush(); + tracepoint(librados, rados_watch_flush_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_watch_flush); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_watch_flush)( + rados_t cluster, + rados_completion_t completion) +{ + tracepoint(librados, rados_aio_watch_flush_enter, cluster, completion); + librados::RadosClient *client = (librados::RadosClient *)cluster; + librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion; + int retval = client->async_watch_flush(c); + tracepoint(librados, rados_aio_watch_flush_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_watch_flush); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_set_alloc_hint)( + rados_ioctx_t io, const char *o, + uint64_t expected_object_size, + uint64_t expected_write_size) +{ + tracepoint(librados, rados_set_alloc_hint_enter, io, o, expected_object_size, expected_write_size); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->set_alloc_hint(oid, expected_object_size, + expected_write_size, 0); + tracepoint(librados, rados_set_alloc_hint_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_set_alloc_hint); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_set_alloc_hint2)( + rados_ioctx_t io, const char *o, + uint64_t expected_object_size, + uint64_t expected_write_size, + uint32_t flags) +{ + tracepoint(librados, rados_set_alloc_hint2_enter, io, o, expected_object_size, expected_write_size, flags); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->set_alloc_hint(oid, expected_object_size, + expected_write_size, flags); + tracepoint(librados, rados_set_alloc_hint2_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_set_alloc_hint2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_lock_exclusive)( + rados_ioctx_t io, const char * o, + const char * name, const char * cookie, + const char * desc, + struct timeval * duration, uint8_t flags) +{ + tracepoint(librados, rados_lock_exclusive_enter, io, o, name, cookie, desc, duration, flags); + librados::IoCtx ctx; + librados::IoCtx::from_rados_ioctx_t(io, ctx); + + int retval = ctx.lock_exclusive(o, name, cookie, desc, duration, flags); + tracepoint(librados, rados_lock_exclusive_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_lock_exclusive); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_lock_shared)( + rados_ioctx_t io, const char * o, + const char * name, const char * cookie, + const char * tag, const char * desc, + struct timeval * duration, uint8_t flags) +{ + tracepoint(librados, rados_lock_shared_enter, io, o, name, cookie, tag, desc, duration, flags); + librados::IoCtx ctx; + librados::IoCtx::from_rados_ioctx_t(io, ctx); + + int retval = ctx.lock_shared(o, name, cookie, tag, desc, duration, flags); + tracepoint(librados, rados_lock_shared_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_lock_shared); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_unlock)( + rados_ioctx_t io, const char *o, const char *name, + const char *cookie) +{ + tracepoint(librados, rados_unlock_enter, io, o, name, cookie); + librados::IoCtx ctx; + librados::IoCtx::from_rados_ioctx_t(io, ctx); + + int retval = ctx.unlock(o, name, cookie); + tracepoint(librados, rados_unlock_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_unlock); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_unlock)( + rados_ioctx_t io, const char *o, const char *name, + const char *cookie, rados_completion_t completion) +{ + tracepoint(librados, rados_aio_unlock_enter, io, o, name, cookie, completion); + librados::IoCtx ctx; + librados::IoCtx::from_rados_ioctx_t(io, ctx); + librados::AioCompletionImpl *comp = (librados::AioCompletionImpl*)completion; + comp->get(); + librados::AioCompletion c(comp); + int retval = ctx.aio_unlock(o, name, cookie, &c); + tracepoint(librados, rados_aio_unlock_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_unlock); + +extern "C" ssize_t LIBRADOS_C_API_DEFAULT_F(rados_list_lockers)( + rados_ioctx_t io, const char *o, + const char *name, int *exclusive, + char *tag, size_t *tag_len, + char *clients, size_t *clients_len, + char *cookies, size_t *cookies_len, + char *addrs, size_t *addrs_len) +{ + tracepoint(librados, rados_list_lockers_enter, io, o, name, *tag_len, *clients_len, *cookies_len, *addrs_len); + librados::IoCtx ctx; + librados::IoCtx::from_rados_ioctx_t(io, ctx); + std::string name_str = name; + std::string oid = o; + std::string tag_str; + int tmp_exclusive; + std::list<librados::locker_t> lockers; + int r = ctx.list_lockers(oid, name_str, &tmp_exclusive, &tag_str, &lockers); + if (r < 0) { + tracepoint(librados, rados_list_lockers_exit, r, *exclusive, "", *tag_len, *clients_len, *cookies_len, *addrs_len); + return r; + } + + size_t clients_total = 0; + size_t cookies_total = 0; + size_t addrs_total = 0; + list<librados::locker_t>::const_iterator it; + for (it = lockers.begin(); it != lockers.end(); ++it) { + clients_total += it->client.length() + 1; + cookies_total += it->cookie.length() + 1; + addrs_total += it->address.length() + 1; + } + + bool too_short = ((clients_total > *clients_len) || + (cookies_total > *cookies_len) || + (addrs_total > *addrs_len) || + (tag_str.length() + 1 > *tag_len)); + *clients_len = clients_total; + *cookies_len = cookies_total; + *addrs_len = addrs_total; + *tag_len = tag_str.length() + 1; + if (too_short) { + tracepoint(librados, rados_list_lockers_exit, -ERANGE, *exclusive, "", *tag_len, *clients_len, *cookies_len, *addrs_len); + return -ERANGE; + } + + strcpy(tag, tag_str.c_str()); + char *clients_p = clients; + char *cookies_p = cookies; + char *addrs_p = addrs; + for (it = lockers.begin(); it != lockers.end(); ++it) { + strcpy(clients_p, it->client.c_str()); + strcpy(cookies_p, it->cookie.c_str()); + strcpy(addrs_p, it->address.c_str()); + tracepoint(librados, rados_list_lockers_locker, clients_p, cookies_p, addrs_p); + clients_p += it->client.length() + 1; + cookies_p += it->cookie.length() + 1; + addrs_p += it->address.length() + 1; + } + if (tmp_exclusive) + *exclusive = 1; + else + *exclusive = 0; + + int retval = lockers.size(); + tracepoint(librados, rados_list_lockers_exit, retval, *exclusive, tag, *tag_len, *clients_len, *cookies_len, *addrs_len); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_list_lockers); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_break_lock)( + rados_ioctx_t io, const char *o, + const char *name, const char *client, + const char *cookie) +{ + tracepoint(librados, rados_break_lock_enter, io, o, name, client, cookie); + librados::IoCtx ctx; + librados::IoCtx::from_rados_ioctx_t(io, ctx); + + int retval = ctx.break_lock(o, name, client, cookie); + tracepoint(librados, rados_break_lock_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_break_lock); + +extern "C" rados_write_op_t LIBRADOS_C_API_DEFAULT_F(rados_create_write_op)() +{ + tracepoint(librados, rados_create_write_op_enter); + rados_write_op_t retval = new (std::nothrow)::ObjectOperation; + tracepoint(librados, rados_create_write_op_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_create_write_op); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_release_write_op)( + rados_write_op_t write_op) +{ + tracepoint(librados, rados_release_write_op_enter, write_op); + delete (::ObjectOperation*)write_op; + tracepoint(librados, rados_release_write_op_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_release_write_op); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_set_flags)( + rados_write_op_t write_op, + int flags) +{ + tracepoint(librados, rados_write_op_set_flags_enter, write_op, flags); + ((::ObjectOperation *)write_op)->set_last_op_flags(get_op_flags(flags)); + tracepoint(librados, rados_write_op_set_flags_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_set_flags); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_assert_version)( + rados_write_op_t write_op, + uint64_t ver) +{ + tracepoint(librados, rados_write_op_assert_version_enter, write_op, ver); + ((::ObjectOperation *)write_op)->assert_version(ver); + tracepoint(librados, rados_write_op_assert_version_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_assert_version); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_assert_exists)( + rados_write_op_t write_op) +{ + tracepoint(librados, rados_write_op_assert_exists_enter, write_op); + ((::ObjectOperation *)write_op)->stat(nullptr, nullptr, nullptr); + tracepoint(librados, rados_write_op_assert_exists_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_assert_exists); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_cmpext)( + rados_write_op_t write_op, + const char *cmp_buf, + size_t cmp_len, + uint64_t off, + int *prval) +{ + tracepoint(librados, rados_write_op_cmpext_enter, write_op, cmp_buf, + cmp_len, off, prval); + ((::ObjectOperation *)write_op)->cmpext(off, cmp_len, cmp_buf, prval); + tracepoint(librados, rados_write_op_cmpext_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_cmpext); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_cmpxattr)( + rados_write_op_t write_op, + const char *name, + uint8_t comparison_operator, + const char *value, + size_t value_len) +{ + tracepoint(librados, rados_write_op_cmpxattr_enter, write_op, name, comparison_operator, value, value_len); + bufferlist bl; + bl.append(value, value_len); + ((::ObjectOperation *)write_op)->cmpxattr(name, + comparison_operator, + CEPH_OSD_CMPXATTR_MODE_STRING, + bl); + tracepoint(librados, rados_write_op_cmpxattr_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_cmpxattr); + +static void rados_c_omap_cmp(ObjectOperation *op, + const char *key, + uint8_t comparison_operator, + const char *val, + size_t key_len, + size_t val_len, + int *prval) +{ + bufferlist bl; + bl.append(val, val_len); + std::map<std::string, pair<bufferlist, int> > assertions; + string lkey = string(key, key_len); + + assertions[lkey] = std::make_pair(bl, comparison_operator); + op->omap_cmp(assertions, prval); +} + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_cmp)( + rados_write_op_t write_op, + const char *key, + uint8_t comparison_operator, + const char *val, + size_t val_len, + int *prval) +{ + tracepoint(librados, rados_write_op_omap_cmp_enter, write_op, key, comparison_operator, val, val_len, prval); + rados_c_omap_cmp((::ObjectOperation *)write_op, key, comparison_operator, + val, strlen(key), val_len, prval); + tracepoint(librados, rados_write_op_omap_cmp_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_cmp); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_cmp2)( + rados_write_op_t write_op, + const char *key, + uint8_t comparison_operator, + const char *val, + size_t key_len, + size_t val_len, + int *prval) +{ + tracepoint(librados, rados_write_op_omap_cmp_enter, write_op, key, comparison_operator, val, val_len, prval); + rados_c_omap_cmp((::ObjectOperation *)write_op, key, comparison_operator, + val, key_len, val_len, prval); + tracepoint(librados, rados_write_op_omap_cmp_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_cmp2); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_setxattr)( + rados_write_op_t write_op, + const char *name, + const char *value, + size_t value_len) +{ + tracepoint(librados, rados_write_op_setxattr_enter, write_op, name, value, value_len); + bufferlist bl; + bl.append(value, value_len); + ((::ObjectOperation *)write_op)->setxattr(name, bl); + tracepoint(librados, rados_write_op_setxattr_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_setxattr); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_rmxattr)( + rados_write_op_t write_op, + const char *name) +{ + tracepoint(librados, rados_write_op_rmxattr_enter, write_op, name); + ((::ObjectOperation *)write_op)->rmxattr(name); + tracepoint(librados, rados_write_op_rmxattr_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_rmxattr); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_create)( + rados_write_op_t write_op, + int exclusive, + const char* category) // unused +{ + tracepoint(librados, rados_write_op_create_enter, write_op, exclusive); + ::ObjectOperation *oo = (::ObjectOperation *) write_op; + oo->create(!!exclusive); + tracepoint(librados, rados_write_op_create_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_create); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_write)( + rados_write_op_t write_op, + const char *buffer, + size_t len, + uint64_t offset) +{ + tracepoint(librados, rados_write_op_write_enter, write_op, buffer, len, offset); + bufferlist bl; + bl.append(buffer,len); + ((::ObjectOperation *)write_op)->write(offset, bl); + tracepoint(librados, rados_write_op_write_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_write); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_write_full)( + rados_write_op_t write_op, + const char *buffer, + size_t len) +{ + tracepoint(librados, rados_write_op_write_full_enter, write_op, buffer, len); + bufferlist bl; + bl.append(buffer,len); + ((::ObjectOperation *)write_op)->write_full(bl); + tracepoint(librados, rados_write_op_write_full_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_write_full); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_writesame)( + rados_write_op_t write_op, + const char *buffer, + size_t data_len, + size_t write_len, + uint64_t offset) +{ + tracepoint(librados, rados_write_op_writesame_enter, write_op, buffer, data_len, write_len, offset); + bufferlist bl; + bl.append(buffer, data_len); + ((::ObjectOperation *)write_op)->writesame(offset, write_len, bl); + tracepoint(librados, rados_write_op_writesame_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_writesame); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_append)( + rados_write_op_t write_op, + const char *buffer, + size_t len) +{ + tracepoint(librados, rados_write_op_append_enter, write_op, buffer, len); + bufferlist bl; + bl.append(buffer,len); + ((::ObjectOperation *)write_op)->append(bl); + tracepoint(librados, rados_write_op_append_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_append); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_remove)( + rados_write_op_t write_op) +{ + tracepoint(librados, rados_write_op_remove_enter, write_op); + ((::ObjectOperation *)write_op)->remove(); + tracepoint(librados, rados_write_op_remove_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_remove); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_truncate)( + rados_write_op_t write_op, + uint64_t offset) +{ + tracepoint(librados, rados_write_op_truncate_enter, write_op, offset); + ((::ObjectOperation *)write_op)->truncate(offset); + tracepoint(librados, rados_write_op_truncate_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_truncate); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_zero)( + rados_write_op_t write_op, + uint64_t offset, + uint64_t len) +{ + tracepoint(librados, rados_write_op_zero_enter, write_op, offset, len); + ((::ObjectOperation *)write_op)->zero(offset, len); + tracepoint(librados, rados_write_op_zero_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_zero); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_exec)( + rados_write_op_t write_op, + const char *cls, + const char *method, + const char *in_buf, + size_t in_len, + int *prval) +{ + tracepoint(librados, rados_write_op_exec_enter, write_op, cls, method, in_buf, in_len, prval); + bufferlist inbl; + inbl.append(in_buf, in_len); + ((::ObjectOperation *)write_op)->call(cls, method, inbl, NULL, NULL, prval); + tracepoint(librados, rados_write_op_exec_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_exec); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_set)( + rados_write_op_t write_op, + char const* const* keys, + char const* const* vals, + const size_t *lens, + size_t num) +{ + tracepoint(librados, rados_write_op_omap_set_enter, write_op, num); + std::map<std::string, bufferlist> entries; + for (size_t i = 0; i < num; ++i) { + tracepoint(librados, rados_write_op_omap_set_entry, keys[i], vals[i], lens[i]); + bufferlist bl(lens[i]); + bl.append(vals[i], lens[i]); + entries[keys[i]] = bl; + } + ((::ObjectOperation *)write_op)->omap_set(entries); + tracepoint(librados, rados_write_op_omap_set_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_set); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_set2)( + rados_write_op_t write_op, + char const* const* keys, + char const* const* vals, + const size_t *key_lens, + const size_t *val_lens, + size_t num) +{ + tracepoint(librados, rados_write_op_omap_set_enter, write_op, num); + std::map<std::string, bufferlist> entries; + for (size_t i = 0; i < num; ++i) { + bufferlist bl(val_lens[i]); + bl.append(vals[i], val_lens[i]); + string key(keys[i], key_lens[i]); + entries[key] = bl; + } + ((::ObjectOperation *)write_op)->omap_set(entries); + tracepoint(librados, rados_write_op_omap_set_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_set2); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_rm_keys)( + rados_write_op_t write_op, + char const* const* keys, + size_t keys_len) +{ + tracepoint(librados, rados_write_op_omap_rm_keys_enter, write_op, keys_len); + for(size_t i = 0; i < keys_len; i++) { + tracepoint(librados, rados_write_op_omap_rm_keys_entry, keys[i]); + } + std::set<std::string> to_remove(keys, keys + keys_len); + ((::ObjectOperation *)write_op)->omap_rm_keys(to_remove); + tracepoint(librados, rados_write_op_omap_rm_keys_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_rm_keys); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_rm_keys2)( + rados_write_op_t write_op, + char const* const* keys, + const size_t* key_lens, + size_t keys_len) +{ + tracepoint(librados, rados_write_op_omap_rm_keys_enter, write_op, keys_len); + std::set<std::string> to_remove; + for(size_t i = 0; i < keys_len; i++) { + to_remove.emplace(keys[i], key_lens[i]); + } + ((::ObjectOperation *)write_op)->omap_rm_keys(to_remove); + tracepoint(librados, rados_write_op_omap_rm_keys_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_rm_keys2); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_rm_range2)( + rados_write_op_t write_op, + const char *key_begin, + size_t key_begin_len, + const char *key_end, + size_t key_end_len) +{ + tracepoint(librados, rados_write_op_omap_rm_range_enter, + write_op, key_begin, key_end); + ((::ObjectOperation *)write_op)->omap_rm_range({key_begin, key_begin_len}, + {key_end, key_end_len}); + tracepoint(librados, rados_write_op_omap_rm_range_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_rm_range2); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_omap_clear)( + rados_write_op_t write_op) +{ + tracepoint(librados, rados_write_op_omap_clear_enter, write_op); + ((::ObjectOperation *)write_op)->omap_clear(); + tracepoint(librados, rados_write_op_omap_clear_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_omap_clear); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_set_alloc_hint)( + rados_write_op_t write_op, + uint64_t expected_object_size, + uint64_t expected_write_size) +{ + tracepoint(librados, rados_write_op_set_alloc_hint_enter, write_op, expected_object_size, expected_write_size); + ((::ObjectOperation *)write_op)->set_alloc_hint(expected_object_size, + expected_write_size, 0); + tracepoint(librados, rados_write_op_set_alloc_hint_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_set_alloc_hint); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_write_op_set_alloc_hint2)( + rados_write_op_t write_op, + uint64_t expected_object_size, + uint64_t expected_write_size, + uint32_t flags) +{ + tracepoint(librados, rados_write_op_set_alloc_hint2_enter, write_op, expected_object_size, expected_write_size, flags); + ((::ObjectOperation *)write_op)->set_alloc_hint(expected_object_size, + expected_write_size, + flags); + tracepoint(librados, rados_write_op_set_alloc_hint2_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_set_alloc_hint2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_write_op_operate)( + rados_write_op_t write_op, + rados_ioctx_t io, + const char *oid, + time_t *mtime, + int flags) +{ + tracepoint(librados, rados_write_op_operate_enter, write_op, io, oid, mtime, flags); + object_t obj(oid); + ::ObjectOperation *oo = (::ObjectOperation *) write_op; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + + ceph::real_time *prt = NULL; + ceph::real_time rt; + + if (mtime) { + rt = ceph::real_clock::from_time_t(*mtime); + prt = &rt; + } + + int retval = ctx->operate(obj, oo, prt, translate_flags(flags)); + tracepoint(librados, rados_write_op_operate_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_operate); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_write_op_operate2)( + rados_write_op_t write_op, + rados_ioctx_t io, + const char *oid, + struct timespec *ts, + int flags) +{ + tracepoint(librados, rados_write_op_operate2_enter, write_op, io, oid, ts, flags); + object_t obj(oid); + ::ObjectOperation *oo = (::ObjectOperation *) write_op; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + + ceph::real_time *prt = NULL; + ceph::real_time rt; + + if (ts) { + rt = ceph::real_clock::from_timespec(*ts); + prt = &rt; + } + + int retval = ctx->operate(obj, oo, prt, translate_flags(flags)); + tracepoint(librados, rados_write_op_operate_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_write_op_operate2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_write_op_operate)( + rados_write_op_t write_op, + rados_ioctx_t io, + rados_completion_t completion, + const char *oid, + time_t *mtime, + int flags) +{ + tracepoint(librados, rados_aio_write_op_operate_enter, write_op, io, completion, oid, mtime, flags); + object_t obj(oid); + ::ObjectOperation *oo = (::ObjectOperation *) write_op; + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion; + int retval = ctx->aio_operate(obj, oo, c, ctx->snapc, translate_flags(flags)); + tracepoint(librados, rados_aio_write_op_operate_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_write_op_operate); + +extern "C" rados_read_op_t LIBRADOS_C_API_DEFAULT_F(rados_create_read_op)() +{ + tracepoint(librados, rados_create_read_op_enter); + rados_read_op_t retval = new (std::nothrow)::ObjectOperation; + tracepoint(librados, rados_create_read_op_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_create_read_op); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_release_read_op)( + rados_read_op_t read_op) +{ + tracepoint(librados, rados_release_read_op_enter, read_op); + delete (::ObjectOperation *)read_op; + tracepoint(librados, rados_release_read_op_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_release_read_op); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_set_flags)( + rados_read_op_t read_op, + int flags) +{ + tracepoint(librados, rados_read_op_set_flags_enter, read_op, flags); + ((::ObjectOperation *)read_op)->set_last_op_flags(get_op_flags(flags)); + tracepoint(librados, rados_read_op_set_flags_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_set_flags); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_assert_version)( + rados_read_op_t read_op, + uint64_t ver) +{ + tracepoint(librados, rados_read_op_assert_version_enter, read_op, ver); + ((::ObjectOperation *)read_op)->assert_version(ver); + tracepoint(librados, rados_read_op_assert_version_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_assert_version); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_assert_exists)( + rados_read_op_t read_op) +{ + tracepoint(librados, rados_read_op_assert_exists_enter, read_op); + ((::ObjectOperation *)read_op)->stat(nullptr, nullptr, nullptr); + tracepoint(librados, rados_read_op_assert_exists_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_assert_exists); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_cmpext)( + rados_read_op_t read_op, + const char *cmp_buf, + size_t cmp_len, + uint64_t off, + int *prval) +{ + tracepoint(librados, rados_read_op_cmpext_enter, read_op, cmp_buf, + cmp_len, off, prval); + ((::ObjectOperation *)read_op)->cmpext(off, cmp_len, cmp_buf, prval); + tracepoint(librados, rados_read_op_cmpext_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_cmpext); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_cmpxattr)( + rados_read_op_t read_op, + const char *name, + uint8_t comparison_operator, + const char *value, + size_t value_len) +{ + tracepoint(librados, rados_read_op_cmpxattr_enter, read_op, name, comparison_operator, value, value_len); + bufferlist bl; + bl.append(value, value_len); + ((::ObjectOperation *)read_op)->cmpxattr(name, + comparison_operator, + CEPH_OSD_CMPXATTR_MODE_STRING, + bl); + tracepoint(librados, rados_read_op_cmpxattr_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_cmpxattr); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_cmp)( + rados_read_op_t read_op, + const char *key, + uint8_t comparison_operator, + const char *val, + size_t val_len, + int *prval) +{ + tracepoint(librados, rados_read_op_omap_cmp_enter, read_op, key, comparison_operator, val, val_len, prval); + rados_c_omap_cmp((::ObjectOperation *)read_op, key, comparison_operator, + val, strlen(key), val_len, prval); + tracepoint(librados, rados_read_op_omap_cmp_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_cmp); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_cmp2)( + rados_read_op_t read_op, + const char *key, + uint8_t comparison_operator, + const char *val, + size_t key_len, + size_t val_len, + int *prval) +{ + tracepoint(librados, rados_read_op_omap_cmp_enter, read_op, key, comparison_operator, val, val_len, prval); + rados_c_omap_cmp((::ObjectOperation *)read_op, key, comparison_operator, + val, key_len, val_len, prval); + tracepoint(librados, rados_read_op_omap_cmp_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_cmp2); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_stat)( + rados_read_op_t read_op, + uint64_t *psize, + time_t *pmtime, + int *prval) +{ + tracepoint(librados, rados_read_op_stat_enter, read_op, psize, pmtime, prval); + ((::ObjectOperation *)read_op)->stat(psize, pmtime, prval); + tracepoint(librados, rados_read_op_stat_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_stat); + +class C_bl_to_buf : public Context { + char *out_buf; + size_t out_len; + size_t *bytes_read; + int *prval; +public: + bufferlist out_bl; + C_bl_to_buf(char *out_buf, + size_t out_len, + size_t *bytes_read, + int *prval) : out_buf(out_buf), out_len(out_len), + bytes_read(bytes_read), prval(prval) {} + void finish(int r) override { + if (out_bl.length() > out_len) { + if (prval) + *prval = -ERANGE; + if (bytes_read) + *bytes_read = 0; + return; + } + if (bytes_read) + *bytes_read = out_bl.length(); + if (out_buf && !out_bl.is_provided_buffer(out_buf)) + out_bl.begin().copy(out_bl.length(), out_buf); + } +}; + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_read)( + rados_read_op_t read_op, + uint64_t offset, + size_t len, + char *buf, + size_t *bytes_read, + int *prval) +{ + tracepoint(librados, rados_read_op_read_enter, read_op, offset, len, buf, bytes_read, prval); + C_bl_to_buf *ctx = new C_bl_to_buf(buf, len, bytes_read, prval); + ctx->out_bl.push_back(buffer::create_static(len, buf)); + ((::ObjectOperation *)read_op)->read(offset, len, &ctx->out_bl, prval, ctx); + tracepoint(librados, rados_read_op_read_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_read); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_checksum)( + rados_read_op_t read_op, + rados_checksum_type_t type, + const char *init_value, + size_t init_value_len, + uint64_t offset, size_t len, + size_t chunk_size, char *pchecksum, + size_t checksum_len, int *prval) +{ + tracepoint(librados, rados_read_op_checksum_enter, read_op, type, init_value, + init_value_len, offset, len, chunk_size); + bufferlist init_value_bl; + init_value_bl.append(init_value, init_value_len); + + C_bl_to_buf *ctx = nullptr; + if (pchecksum != nullptr) { + ctx = new C_bl_to_buf(pchecksum, checksum_len, nullptr, prval); + } + ((::ObjectOperation *)read_op)->checksum(get_checksum_op_type(type), + init_value_bl, offset, len, + chunk_size, + (ctx ? &ctx->out_bl : nullptr), + prval, ctx); + tracepoint(librados, rados_read_op_checksum_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_checksum); + +class C_out_buffer : public Context { + char **out_buf; + size_t *out_len; +public: + bufferlist out_bl; + C_out_buffer(char **out_buf, size_t *out_len) : out_buf(out_buf), + out_len(out_len) {} + void finish(int r) override { + // ignore r since we don't know the meaning of return values + // from custom class methods + do_out_buffer(out_bl, out_buf, out_len); + } +}; + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_exec)( + rados_read_op_t read_op, + const char *cls, + const char *method, + const char *in_buf, + size_t in_len, + char **out_buf, + size_t *out_len, + int *prval) +{ + tracepoint(librados, rados_read_op_exec_enter, read_op, cls, method, in_buf, in_len, out_buf, out_len, prval); + bufferlist inbl; + inbl.append(in_buf, in_len); + C_out_buffer *ctx = new C_out_buffer(out_buf, out_len); + ((::ObjectOperation *)read_op)->call(cls, method, inbl, &ctx->out_bl, ctx, + prval); + tracepoint(librados, rados_read_op_exec_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_exec); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_exec_user_buf)( + rados_read_op_t read_op, + const char *cls, + const char *method, + const char *in_buf, + size_t in_len, + char *out_buf, + size_t out_len, + size_t *used_len, + int *prval) +{ + tracepoint(librados, rados_read_op_exec_user_buf_enter, read_op, cls, method, in_buf, in_len, out_buf, out_len, used_len, prval); + C_bl_to_buf *ctx = new C_bl_to_buf(out_buf, out_len, used_len, prval); + bufferlist inbl; + inbl.append(in_buf, in_len); + ((::ObjectOperation *)read_op)->call(cls, method, inbl, &ctx->out_bl, ctx, + prval); + tracepoint(librados, rados_read_op_exec_user_buf_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_exec_user_buf); + +struct RadosOmapIter { + std::map<std::string, bufferlist> values; + std::map<std::string, bufferlist>::iterator i; +}; + +class C_OmapIter : public Context { + RadosOmapIter *iter; +public: + explicit C_OmapIter(RadosOmapIter *iter) : iter(iter) {} + void finish(int r) override { + iter->i = iter->values.begin(); + } +}; + +class C_XattrsIter : public Context { + librados::RadosXattrsIter *iter; +public: + explicit C_XattrsIter(librados::RadosXattrsIter *iter) : iter(iter) {} + void finish(int r) override { + iter->i = iter->attrset.begin(); + } +}; + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_getxattrs)( + rados_read_op_t read_op, + rados_xattrs_iter_t *iter, + int *prval) +{ + tracepoint(librados, rados_read_op_getxattrs_enter, read_op, prval); + librados::RadosXattrsIter *xattrs_iter = new librados::RadosXattrsIter; + ((::ObjectOperation *)read_op)->getxattrs(&xattrs_iter->attrset, prval); + ((::ObjectOperation *)read_op)->set_handler(new C_XattrsIter(xattrs_iter)); + *iter = xattrs_iter; + tracepoint(librados, rados_read_op_getxattrs_exit, *iter); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_getxattrs); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_vals)( + rados_read_op_t read_op, + const char *start_after, + const char *filter_prefix, + uint64_t max_return, + rados_omap_iter_t *iter, + int *prval) +{ + tracepoint(librados, rados_read_op_omap_get_vals_enter, read_op, start_after, filter_prefix, max_return, prval); + RadosOmapIter *omap_iter = new RadosOmapIter; + const char *start = start_after ? start_after : ""; + const char *filter = filter_prefix ? filter_prefix : ""; + ((::ObjectOperation *)read_op)->omap_get_vals( + start, + filter, + max_return, + &omap_iter->values, + nullptr, + prval); + ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter)); + *iter = omap_iter; + tracepoint(librados, rados_read_op_omap_get_vals_exit, *iter); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_vals); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_vals2)( + rados_read_op_t read_op, + const char *start_after, + const char *filter_prefix, + uint64_t max_return, + rados_omap_iter_t *iter, + unsigned char *pmore, + int *prval) +{ + tracepoint(librados, rados_read_op_omap_get_vals_enter, read_op, start_after, filter_prefix, max_return, prval); + RadosOmapIter *omap_iter = new RadosOmapIter; + const char *start = start_after ? start_after : ""; + const char *filter = filter_prefix ? filter_prefix : ""; + ((::ObjectOperation *)read_op)->omap_get_vals( + start, + filter, + max_return, + &omap_iter->values, + (bool*)pmore, + prval); + ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter)); + *iter = omap_iter; + tracepoint(librados, rados_read_op_omap_get_vals_exit, *iter); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_vals2); + +struct C_OmapKeysIter : public Context { + RadosOmapIter *iter; + std::set<std::string> keys; + explicit C_OmapKeysIter(RadosOmapIter *iter) : iter(iter) {} + void finish(int r) override { + // map each key to an empty bl + for (std::set<std::string>::const_iterator i = keys.begin(); + i != keys.end(); ++i) { + iter->values[*i]; + } + iter->i = iter->values.begin(); + } +}; + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_keys)( + rados_read_op_t read_op, + const char *start_after, + uint64_t max_return, + rados_omap_iter_t *iter, + int *prval) +{ + tracepoint(librados, rados_read_op_omap_get_keys_enter, read_op, start_after, max_return, prval); + RadosOmapIter *omap_iter = new RadosOmapIter; + C_OmapKeysIter *ctx = new C_OmapKeysIter(omap_iter); + ((::ObjectOperation *)read_op)->omap_get_keys( + start_after ? start_after : "", + max_return, &ctx->keys, nullptr, prval); + ((::ObjectOperation *)read_op)->set_handler(ctx); + *iter = omap_iter; + tracepoint(librados, rados_read_op_omap_get_keys_exit, *iter); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_keys); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_keys2)( + rados_read_op_t read_op, + const char *start_after, + uint64_t max_return, + rados_omap_iter_t *iter, + unsigned char *pmore, + int *prval) +{ + tracepoint(librados, rados_read_op_omap_get_keys_enter, read_op, start_after, max_return, prval); + RadosOmapIter *omap_iter = new RadosOmapIter; + C_OmapKeysIter *ctx = new C_OmapKeysIter(omap_iter); + ((::ObjectOperation *)read_op)->omap_get_keys( + start_after ? start_after : "", + max_return, &ctx->keys, + (bool*)pmore, prval); + ((::ObjectOperation *)read_op)->set_handler(ctx); + *iter = omap_iter; + tracepoint(librados, rados_read_op_omap_get_keys_exit, *iter); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_keys2); + +static void internal_rados_read_op_omap_get_vals_by_keys(rados_read_op_t read_op, + set<string>& to_get, + rados_omap_iter_t *iter, + int *prval) +{ + RadosOmapIter *omap_iter = new RadosOmapIter; + ((::ObjectOperation *)read_op)->omap_get_vals_by_keys(to_get, + &omap_iter->values, + prval); + ((::ObjectOperation *)read_op)->set_handler(new C_OmapIter(omap_iter)); + *iter = omap_iter; +} + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_vals_by_keys)( + rados_read_op_t read_op, + char const* const* keys, + size_t keys_len, + rados_omap_iter_t *iter, + int *prval) +{ + tracepoint(librados, rados_read_op_omap_get_vals_by_keys_enter, read_op, keys, keys_len, iter, prval); + std::set<std::string> to_get(keys, keys + keys_len); + internal_rados_read_op_omap_get_vals_by_keys(read_op, to_get, iter, prval); + tracepoint(librados, rados_read_op_omap_get_vals_by_keys_exit, *iter); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_vals_by_keys); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_read_op_omap_get_vals_by_keys2)( + rados_read_op_t read_op, + char const* const* keys, + size_t num_keys, + const size_t* key_lens, + rados_omap_iter_t *iter, + int *prval) +{ + tracepoint(librados, rados_read_op_omap_get_vals_by_keys_enter, read_op, keys, num_keys, iter, prval); + std::set<std::string> to_get; + for (size_t i = 0; i < num_keys; i++) { + to_get.emplace(keys[i], key_lens[i]); + } + internal_rados_read_op_omap_get_vals_by_keys(read_op, to_get, iter, prval); + tracepoint(librados, rados_read_op_omap_get_vals_by_keys_exit, *iter); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_omap_get_vals_by_keys2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_omap_get_next2)( + rados_omap_iter_t iter, + char **key, + char **val, + size_t *key_len, + size_t *val_len) +{ + tracepoint(librados, rados_omap_get_next_enter, iter); + RadosOmapIter *it = static_cast<RadosOmapIter *>(iter); + if (it->i == it->values.end()) { + if (key) + *key = NULL; + if (val) + *val = NULL; + if (key_len) + *key_len = 0; + if (val_len) + *val_len = 0; + tracepoint(librados, rados_omap_get_next_exit, 0, key, val, val_len); + return 0; + } + if (key) + *key = (char*)it->i->first.c_str(); + if (val) + *val = it->i->second.c_str(); + if (key_len) + *key_len = it->i->first.length(); + if (val_len) + *val_len = it->i->second.length(); + ++it->i; + tracepoint(librados, rados_omap_get_next_exit, 0, key, val, val_len); + return 0; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_omap_get_next2); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_omap_get_next)( + rados_omap_iter_t iter, + char **key, + char **val, + size_t *len) +{ + return LIBRADOS_C_API_DEFAULT_F(rados_omap_get_next2)(iter, key, val, nullptr, len); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_omap_get_next); + +extern "C" unsigned int LIBRADOS_C_API_DEFAULT_F(rados_omap_iter_size)( + rados_omap_iter_t iter) +{ + RadosOmapIter *it = static_cast<RadosOmapIter *>(iter); + return it->values.size(); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_omap_iter_size); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_omap_get_end)( + rados_omap_iter_t iter) +{ + tracepoint(librados, rados_omap_get_end_enter, iter); + RadosOmapIter *it = static_cast<RadosOmapIter *>(iter); + delete it; + tracepoint(librados, rados_omap_get_end_exit); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_omap_get_end); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_read_op_operate)( + rados_read_op_t read_op, + rados_ioctx_t io, + const char *oid, + int flags) +{ + tracepoint(librados, rados_read_op_operate_enter, read_op, io, oid, flags); + object_t obj(oid); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + int retval = ctx->operate_read(obj, (::ObjectOperation *)read_op, NULL, + translate_flags(flags)); + tracepoint(librados, rados_read_op_operate_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_read_op_operate); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_aio_read_op_operate)( + rados_read_op_t read_op, + rados_ioctx_t io, + rados_completion_t completion, + const char *oid, + int flags) +{ + tracepoint(librados, rados_aio_read_op_operate_enter, read_op, io, completion, oid, flags); + object_t obj(oid); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + librados::AioCompletionImpl *c = (librados::AioCompletionImpl*)completion; + int retval = ctx->aio_operate_read(obj, (::ObjectOperation *)read_op, + c, translate_flags(flags), NULL); + tracepoint(librados, rados_aio_read_op_operate_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_aio_read_op_operate); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cache_pin)( + rados_ioctx_t io, + const char *o) +{ + tracepoint(librados, rados_cache_pin_enter, io, o); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->cache_pin(oid); + tracepoint(librados, rados_cache_pin_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_cache_pin); + +extern "C" int LIBRADOS_C_API_DEFAULT_F(rados_cache_unpin)( + rados_ioctx_t io, + const char *o) +{ + tracepoint(librados, rados_cache_unpin_enter, io, o); + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + object_t oid(o); + int retval = ctx->cache_unpin(oid); + tracepoint(librados, rados_cache_unpin_exit, retval); + return retval; +} +LIBRADOS_C_API_BASE_DEFAULT(rados_cache_unpin); + +extern "C" void LIBRADOS_C_API_DEFAULT_F(rados_object_list_slice)( + rados_ioctx_t io, + const rados_object_list_cursor start, + const rados_object_list_cursor finish, + const size_t n, + const size_t m, + rados_object_list_cursor *split_start, + rados_object_list_cursor *split_finish) +{ + librados::IoCtxImpl *ctx = (librados::IoCtxImpl *)io; + + ceph_assert(split_start); + ceph_assert(split_finish); + hobject_t *split_start_hobj = (hobject_t*)(*split_start); + hobject_t *split_finish_hobj = (hobject_t*)(*split_finish); + ceph_assert(split_start_hobj); + ceph_assert(split_finish_hobj); + hobject_t *start_hobj = (hobject_t*)(start); + hobject_t *finish_hobj = (hobject_t*)(finish); + + ctx->object_list_slice( + *start_hobj, + *finish_hobj, + n, + m, + split_start_hobj, + split_finish_hobj); +} +LIBRADOS_C_API_BASE_DEFAULT(rados_object_list_slice); diff --git a/src/librados/librados_c.h b/src/librados/librados_c.h new file mode 100644 index 000000000..33381d518 --- /dev/null +++ b/src/librados/librados_c.h @@ -0,0 +1,29 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef LIBRADOS_C_H +#define LIBRADOS_C_H + +#include "include/types.h" +#include "include/rados/librados.h" + +namespace __librados_base { + +struct rados_pool_stat_t { + uint64_t num_bytes; + uint64_t num_kb; + uint64_t num_objects; + uint64_t num_object_clones; + uint64_t num_object_copies; + uint64_t num_objects_missing_on_primary; + uint64_t num_objects_unfound; + uint64_t num_objects_degraded; + uint64_t num_rd; + uint64_t num_rd_kb; + uint64_t num_wr; + uint64_t num_wr_kb; +}; + +} // namespace __librados_base + +#endif // LIBRADOS_C_H diff --git a/src/librados/librados_cxx.cc b/src/librados/librados_cxx.cc new file mode 100644 index 000000000..8c9ac3c91 --- /dev/null +++ b/src/librados/librados_cxx.cc @@ -0,0 +1,3177 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2004-2012 Sage Weil <sage@newdream.net> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <limits.h> + +#include "common/config.h" +#include "common/errno.h" +#include "common/ceph_argparse.h" +#include "common/ceph_json.h" +#include "common/common_init.h" +#include "common/TracepointProvider.h" +#include "common/hobject.h" +#include "common/async/waiter.h" +#include "include/rados/librados.h" +#include "include/rados/librados.hpp" +#include "include/types.h" +#include <include/stringify.h> + +#include "librados/AioCompletionImpl.h" +#include "librados/IoCtxImpl.h" +#include "librados/PoolAsyncCompletionImpl.h" +#include "librados/RadosClient.h" +#include "librados/RadosXattrIter.h" +#include "librados/ListObjectImpl.h" +#include "librados/librados_util.h" +#include "cls/lock/cls_lock_client.h" + +#include <string> +#include <map> +#include <set> +#include <vector> +#include <list> +#include <stdexcept> +#include <system_error> + +#ifdef WITH_LTTNG +#define TRACEPOINT_DEFINE +#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#include "tracing/librados.h" +#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#undef TRACEPOINT_DEFINE +#else +#define tracepoint(...) +#endif + +using std::string; +using std::map; +using std::set; +using std::vector; +using std::list; + +#define dout_subsys ceph_subsys_rados +#undef dout_prefix +#define dout_prefix *_dout << "librados: " + +static TracepointProvider::Traits tracepoint_traits("librados_tp.so", "rados_tracing"); + +/* + * Structure of this file + * + * RadosClient and the related classes are the internal implementation of librados. + * Above that layer sits the C API, found in include/rados/librados.h, and + * the C++ API, found in include/rados/librados.hpp + * + * The C++ API sometimes implements things in terms of the C API. + * Both the C++ and C API rely on RadosClient. + * + * Visually: + * +--------------------------------------+ + * | C++ API | + * +--------------------+ | + * | C API | | + * +--------------------+-----------------+ + * | RadosClient | + * +--------------------------------------+ + */ + +namespace librados { + +struct ObjectOperationImpl { + ::ObjectOperation o; + real_time rt; + real_time *prt; + + ObjectOperationImpl() : prt(NULL) {} +}; + +} + +size_t librados::ObjectOperation::size() +{ + ::ObjectOperation *o = &impl->o; + if (o) + return o->size(); + else + return 0; +} + +//deprcated +void librados::ObjectOperation::set_op_flags(ObjectOperationFlags flags) +{ + set_op_flags2((int)flags); +} + +void librados::ObjectOperation::set_op_flags2(int flags) +{ + ceph_assert(impl); + impl->o.set_last_op_flags(get_op_flags(flags)); +} + +void librados::ObjectOperation::cmpext(uint64_t off, + const bufferlist &cmp_bl, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + bufferlist c = cmp_bl; + o->cmpext(off, c, prval); +} + +void librados::ObjectOperation::cmpxattr(const char *name, uint8_t op, const bufferlist& v) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->cmpxattr(name, op, CEPH_OSD_CMPXATTR_MODE_STRING, v); +} + +void librados::ObjectOperation::cmpxattr(const char *name, uint8_t op, uint64_t v) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + bufferlist bl; + encode(v, bl); + o->cmpxattr(name, op, CEPH_OSD_CMPXATTR_MODE_U64, bl); +} + +void librados::ObjectOperation::assert_version(uint64_t ver) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->assert_version(ver); +} + +void librados::ObjectOperation::assert_exists() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->stat(nullptr, nullptr, nullptr); +} + +void librados::ObjectOperation::exec(const char *cls, const char *method, + bufferlist& inbl) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->call(cls, method, inbl); +} + +void librados::ObjectOperation::exec(const char *cls, const char *method, bufferlist& inbl, bufferlist *outbl, int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->call(cls, method, inbl, outbl, NULL, prval); +} + +class ObjectOpCompletionCtx : public Context { + librados::ObjectOperationCompletion *completion; + bufferlist bl; +public: + explicit ObjectOpCompletionCtx(librados::ObjectOperationCompletion *c) : completion(c) {} + void finish(int r) override { + completion->handle_completion(r, bl); + delete completion; + } + + bufferlist *outbl() { + return &bl; + } +}; + +void librados::ObjectOperation::exec(const char *cls, const char *method, bufferlist& inbl, librados::ObjectOperationCompletion *completion) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + + ObjectOpCompletionCtx *ctx = new ObjectOpCompletionCtx(completion); + + o->call(cls, method, inbl, ctx->outbl(), ctx, NULL); +} + +void librados::ObjectReadOperation::stat(uint64_t *psize, time_t *pmtime, int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->stat(psize, pmtime, prval); +} + +void librados::ObjectReadOperation::stat2(uint64_t *psize, struct timespec *pts, int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->stat(psize, pts, prval); +} + +void librados::ObjectReadOperation::read(size_t off, uint64_t len, bufferlist *pbl, int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->read(off, len, pbl, prval, NULL); +} + +void librados::ObjectReadOperation::sparse_read(uint64_t off, uint64_t len, + std::map<uint64_t,uint64_t> *m, + bufferlist *data_bl, int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->sparse_read(off, len, m, data_bl, prval); +} + +void librados::ObjectReadOperation::checksum(rados_checksum_type_t type, + const bufferlist &init_value_bl, + uint64_t off, size_t len, + size_t chunk_size, bufferlist *pbl, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->checksum(get_checksum_op_type(type), init_value_bl, off, len, chunk_size, + pbl, prval, nullptr); +} + +void librados::ObjectReadOperation::getxattr(const char *name, bufferlist *pbl, int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->getxattr(name, pbl, prval); +} + +void librados::ObjectReadOperation::omap_get_vals( + const std::string &start_after, + const std::string &filter_prefix, + uint64_t max_return, + std::map<std::string, bufferlist> *out_vals, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_get_vals(start_after, filter_prefix, max_return, out_vals, nullptr, + prval); +} + +void librados::ObjectReadOperation::omap_get_vals2( + const std::string &start_after, + const std::string &filter_prefix, + uint64_t max_return, + std::map<std::string, bufferlist> *out_vals, + bool *pmore, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_get_vals(start_after, filter_prefix, max_return, out_vals, pmore, + prval); +} + +void librados::ObjectReadOperation::omap_get_vals( + const std::string &start_after, + uint64_t max_return, + std::map<std::string, bufferlist> *out_vals, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_get_vals(start_after, "", max_return, out_vals, nullptr, prval); +} + +void librados::ObjectReadOperation::omap_get_vals2( + const std::string &start_after, + uint64_t max_return, + std::map<std::string, bufferlist> *out_vals, + bool *pmore, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_get_vals(start_after, "", max_return, out_vals, pmore, prval); +} + +void librados::ObjectReadOperation::omap_get_keys( + const std::string &start_after, + uint64_t max_return, + std::set<std::string> *out_keys, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_get_keys(start_after, max_return, out_keys, nullptr, prval); +} + +void librados::ObjectReadOperation::omap_get_keys2( + const std::string &start_after, + uint64_t max_return, + std::set<std::string> *out_keys, + bool *pmore, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_get_keys(start_after, max_return, out_keys, pmore, prval); +} + +void librados::ObjectReadOperation::omap_get_header(bufferlist *bl, int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_get_header(bl, prval); +} + +void librados::ObjectReadOperation::omap_get_vals_by_keys( + const std::set<std::string> &keys, + std::map<std::string, bufferlist> *map, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_get_vals_by_keys(keys, map, prval); +} + +void librados::ObjectOperation::omap_cmp( + const std::map<std::string, pair<bufferlist, int> > &assertions, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_cmp(assertions, prval); +} + +void librados::ObjectReadOperation::list_watchers( + list<obj_watch_t> *out_watchers, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->list_watchers(out_watchers, prval); +} + +void librados::ObjectReadOperation::list_snaps( + snap_set_t *out_snaps, + int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->list_snaps(out_snaps, prval); +} + +void librados::ObjectReadOperation::is_dirty(bool *is_dirty, int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->is_dirty(is_dirty, prval); +} + +int librados::IoCtx::omap_get_vals(const std::string& oid, + const std::string& orig_start_after, + const std::string& filter_prefix, + uint64_t max_return, + std::map<std::string, bufferlist> *out_vals) +{ + bool first = true; + string start_after = orig_start_after; + bool more = true; + while (max_return > 0 && more) { + std::map<std::string,bufferlist> out; + ObjectReadOperation op; + op.omap_get_vals2(start_after, filter_prefix, max_return, &out, &more, + nullptr); + bufferlist bl; + int ret = operate(oid, &op, &bl); + if (ret < 0) { + return ret; + } + if (more) { + if (out.empty()) { + return -EINVAL; // wth + } + start_after = out.rbegin()->first; + } + if (out.size() <= max_return) { + max_return -= out.size(); + } else { + max_return = 0; + } + if (first) { + out_vals->swap(out); + first = false; + } else { + out_vals->insert(out.begin(), out.end()); + out.clear(); + } + } + return 0; +} + +int librados::IoCtx::omap_get_vals2( + const std::string& oid, + const std::string& start_after, + const std::string& filter_prefix, + uint64_t max_return, + std::map<std::string, bufferlist> *out_vals, + bool *pmore) +{ + ObjectReadOperation op; + int r; + op.omap_get_vals2(start_after, filter_prefix, max_return, out_vals, pmore, &r); + bufferlist bl; + int ret = operate(oid, &op, &bl); + if (ret < 0) + return ret; + return r; +} + +void librados::ObjectReadOperation::getxattrs(map<string, bufferlist> *pattrs, int *prval) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->getxattrs(pattrs, prval); +} + +void librados::ObjectWriteOperation::mtime(time_t *pt) +{ + ceph_assert(impl); + if (pt) { + impl->rt = ceph::real_clock::from_time_t(*pt); + impl->prt = &impl->rt; + } +} + +void librados::ObjectWriteOperation::mtime2(struct timespec *pts) +{ + ceph_assert(impl); + if (pts) { + impl->rt = ceph::real_clock::from_timespec(*pts); + impl->prt = &impl->rt; + } +} + +void librados::ObjectWriteOperation::create(bool exclusive) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->create(exclusive); +} + +void librados::ObjectWriteOperation::create(bool exclusive, + const std::string& category) // unused +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->create(exclusive); +} + +void librados::ObjectWriteOperation::write(uint64_t off, const bufferlist& bl) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + bufferlist c = bl; + o->write(off, c); +} + +void librados::ObjectWriteOperation::write_full(const bufferlist& bl) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + bufferlist c = bl; + o->write_full(c); +} + +void librados::ObjectWriteOperation::writesame(uint64_t off, uint64_t write_len, + const bufferlist& bl) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + bufferlist c = bl; + o->writesame(off, write_len, c); +} + +void librados::ObjectWriteOperation::append(const bufferlist& bl) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + bufferlist c = bl; + o->append(c); +} + +void librados::ObjectWriteOperation::remove() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->remove(); +} + +void librados::ObjectWriteOperation::truncate(uint64_t off) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->truncate(off); +} + +void librados::ObjectWriteOperation::zero(uint64_t off, uint64_t len) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->zero(off, len); +} + +void librados::ObjectWriteOperation::rmxattr(const char *name) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->rmxattr(name); +} + +void librados::ObjectWriteOperation::setxattr(const char *name, const bufferlist& v) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->setxattr(name, v); +} + +void librados::ObjectWriteOperation::setxattr(const char *name, + const buffer::list&& v) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->setxattr(name, std::move(v)); +} + +void librados::ObjectWriteOperation::omap_set( + const map<string, bufferlist> &map) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_set(map); +} + +void librados::ObjectWriteOperation::omap_set_header(const bufferlist &bl) +{ + ceph_assert(impl); + bufferlist c = bl; + ::ObjectOperation *o = &impl->o; + o->omap_set_header(c); +} + +void librados::ObjectWriteOperation::omap_clear() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_clear(); +} + +void librados::ObjectWriteOperation::omap_rm_keys( + const std::set<std::string> &to_rm) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->omap_rm_keys(to_rm); +} + +void librados::ObjectWriteOperation::copy_from(const std::string& src, + const IoCtx& src_ioctx, + uint64_t src_version, + uint32_t src_fadvise_flags) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->copy_from(object_t(src), src_ioctx.io_ctx_impl->snap_seq, + src_ioctx.io_ctx_impl->oloc, src_version, 0, src_fadvise_flags); +} + +void librados::ObjectWriteOperation::copy_from2(const std::string& src, + const IoCtx& src_ioctx, + uint64_t src_version, + uint32_t truncate_seq, + uint64_t truncate_size, + uint32_t src_fadvise_flags) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->copy_from2(object_t(src), src_ioctx.io_ctx_impl->snap_seq, + src_ioctx.io_ctx_impl->oloc, src_version, 0, + truncate_seq, truncate_size, src_fadvise_flags); +} + +void librados::ObjectWriteOperation::undirty() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->undirty(); +} + +void librados::ObjectReadOperation::cache_flush() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->cache_flush(); +} + +void librados::ObjectReadOperation::cache_try_flush() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->cache_try_flush(); +} + +void librados::ObjectReadOperation::cache_evict() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->cache_evict(); +} + +void librados::ObjectReadOperation::tier_flush() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->tier_flush(); +} + +void librados::ObjectReadOperation::tier_evict() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->tier_evict(); +} + +void librados::ObjectWriteOperation::set_redirect(const std::string& tgt_obj, + const IoCtx& tgt_ioctx, + uint64_t tgt_version, + int flag) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->set_redirect(object_t(tgt_obj), tgt_ioctx.io_ctx_impl->snap_seq, + tgt_ioctx.io_ctx_impl->oloc, tgt_version, flag); +} + +void librados::ObjectReadOperation::set_chunk(uint64_t src_offset, + uint64_t src_length, + const IoCtx& tgt_ioctx, + string tgt_oid, + uint64_t tgt_offset, + int flag) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->set_chunk(src_offset, src_length, + tgt_ioctx.io_ctx_impl->oloc, object_t(tgt_oid), tgt_offset, flag); +} + +void librados::ObjectWriteOperation::tier_promote() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->tier_promote(); +} + +void librados::ObjectWriteOperation::unset_manifest() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->unset_manifest(); +} + +void librados::ObjectWriteOperation::tmap_update(const bufferlist& cmdbl) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + bufferlist c = cmdbl; + o->tmap_update(c); +} + +void librados::ObjectWriteOperation::selfmanaged_snap_rollback(snap_t snapid) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->rollback(snapid); +} + +// You must specify the snapid not the name normally used with pool snapshots +void librados::ObjectWriteOperation::snap_rollback(snap_t snapid) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->rollback(snapid); +} + +void librados::ObjectWriteOperation::set_alloc_hint( + uint64_t expected_object_size, + uint64_t expected_write_size) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->set_alloc_hint(expected_object_size, expected_write_size, 0); +} +void librados::ObjectWriteOperation::set_alloc_hint2( + uint64_t expected_object_size, + uint64_t expected_write_size, + uint32_t flags) +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->set_alloc_hint(expected_object_size, expected_write_size, flags); +} + +void librados::ObjectWriteOperation::cache_pin() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->cache_pin(); +} + +void librados::ObjectWriteOperation::cache_unpin() +{ + ceph_assert(impl); + ::ObjectOperation *o = &impl->o; + o->cache_unpin(); +} + +librados::WatchCtx:: +~WatchCtx() +{ +} + +librados::WatchCtx2:: +~WatchCtx2() +{ +} + +///////////////////////////// NObjectIteratorImpl ///////////////////////////// +librados::NObjectIteratorImpl::NObjectIteratorImpl(ObjListCtx *ctx_) + : ctx(ctx_) +{ +} + +librados::NObjectIteratorImpl::~NObjectIteratorImpl() +{ + ctx.reset(); +} + +librados::NObjectIteratorImpl::NObjectIteratorImpl(const NObjectIteratorImpl &rhs) +{ + *this = rhs; +} + +librados::NObjectIteratorImpl& librados::NObjectIteratorImpl::operator=(const librados::NObjectIteratorImpl &rhs) +{ + if (&rhs == this) + return *this; + if (rhs.ctx.get() == NULL) { + ctx.reset(); + return *this; + } + Objecter::NListContext *list_ctx = new Objecter::NListContext(*rhs.ctx->nlc); + ctx.reset(new ObjListCtx(rhs.ctx->ctx, list_ctx)); + cur_obj = rhs.cur_obj; + return *this; +} + +bool librados::NObjectIteratorImpl::operator==(const librados::NObjectIteratorImpl& rhs) const { + + if (ctx.get() == NULL) { + if (rhs.ctx.get() == NULL) + return true; + return rhs.ctx->nlc->at_end(); + } + if (rhs.ctx.get() == NULL) { + // Redundant but same as ObjectIterator version + if (ctx.get() == NULL) + return true; + return ctx->nlc->at_end(); + } + return ctx.get() == rhs.ctx.get(); +} + +bool librados::NObjectIteratorImpl::operator!=(const librados::NObjectIteratorImpl& rhs) const { + return !(*this == rhs); +} + +const librados::ListObject& librados::NObjectIteratorImpl::operator*() const { + return cur_obj; +} + +const librados::ListObject* librados::NObjectIteratorImpl::operator->() const { + return &cur_obj; +} + +librados::NObjectIteratorImpl& librados::NObjectIteratorImpl::operator++() +{ + get_next(); + return *this; +} + +librados::NObjectIteratorImpl librados::NObjectIteratorImpl::operator++(int) +{ + librados::NObjectIteratorImpl ret(*this); + get_next(); + return ret; +} + +uint32_t librados::NObjectIteratorImpl::seek(uint32_t pos) +{ + uint32_t r = rados_nobjects_list_seek(ctx.get(), pos); + get_next(); + return r; +} + +uint32_t librados::NObjectIteratorImpl::seek(const ObjectCursor& cursor) +{ + uint32_t r = rados_nobjects_list_seek_cursor(ctx.get(), (rados_object_list_cursor)cursor.c_cursor); + get_next(); + return r; +} + +librados::ObjectCursor librados::NObjectIteratorImpl::get_cursor() +{ + librados::ObjListCtx *lh = (librados::ObjListCtx *)ctx.get(); + librados::ObjectCursor oc; + oc.set(lh->ctx->nlist_get_cursor(lh->nlc)); + return oc; +} + +void librados::NObjectIteratorImpl::set_filter(const bufferlist &bl) +{ + ceph_assert(ctx); + ctx->nlc->filter = bl; +} + +void librados::NObjectIteratorImpl::get_next() +{ + const char *entry, *key, *nspace; + size_t entry_size, key_size, nspace_size; + if (ctx->nlc->at_end()) + return; + int ret = rados_nobjects_list_next2(ctx.get(), &entry, &key, &nspace, + &entry_size, &key_size, &nspace_size); + if (ret == -ENOENT) { + return; + } + else if (ret) { + throw std::system_error(-ret, std::system_category(), + "rados_nobjects_list_next2"); + } + + if (cur_obj.impl == NULL) + cur_obj.impl = new ListObjectImpl(); + cur_obj.impl->nspace = string{nspace, nspace_size}; + cur_obj.impl->oid = string{entry, entry_size}; + cur_obj.impl->locator = key ? string(key, key_size) : string(); +} + +uint32_t librados::NObjectIteratorImpl::get_pg_hash_position() const +{ + return ctx->nlc->get_pg_hash_position(); +} + +///////////////////////////// NObjectIterator ///////////////////////////// +librados::NObjectIterator::NObjectIterator(ObjListCtx *ctx_) +{ + impl = new NObjectIteratorImpl(ctx_); +} + +librados::NObjectIterator::~NObjectIterator() +{ + delete impl; +} + +librados::NObjectIterator::NObjectIterator(const NObjectIterator &rhs) +{ + if (rhs.impl == NULL) { + impl = NULL; + return; + } + impl = new NObjectIteratorImpl(); + *impl = *(rhs.impl); +} + +librados::NObjectIterator& librados::NObjectIterator::operator=(const librados::NObjectIterator &rhs) +{ + if (rhs.impl == NULL) { + delete impl; + impl = NULL; + return *this; + } + if (impl == NULL) + impl = new NObjectIteratorImpl(); + *impl = *(rhs.impl); + return *this; +} + +bool librados::NObjectIterator::operator==(const librados::NObjectIterator& rhs) const +{ + if (impl && rhs.impl) { + return *impl == *(rhs.impl); + } else { + return impl == rhs.impl; + } +} + +bool librados::NObjectIterator::operator!=(const librados::NObjectIterator& rhs) const +{ + return !(*this == rhs); +} + +const librados::ListObject& librados::NObjectIterator::operator*() const { + ceph_assert(impl); + return *(impl->get_listobjectp()); +} + +const librados::ListObject* librados::NObjectIterator::operator->() const { + ceph_assert(impl); + return impl->get_listobjectp(); +} + +librados::NObjectIterator& librados::NObjectIterator::operator++() +{ + ceph_assert(impl); + impl->get_next(); + return *this; +} + +librados::NObjectIterator librados::NObjectIterator::operator++(int) +{ + librados::NObjectIterator ret(*this); + impl->get_next(); + return ret; +} + +uint32_t librados::NObjectIterator::seek(uint32_t pos) +{ + ceph_assert(impl); + return impl->seek(pos); +} + +uint32_t librados::NObjectIterator::seek(const ObjectCursor& cursor) +{ + ceph_assert(impl); + return impl->seek(cursor); +} + +librados::ObjectCursor librados::NObjectIterator::get_cursor() +{ + ceph_assert(impl); + return impl->get_cursor(); +} + +void librados::NObjectIterator::set_filter(const bufferlist &bl) +{ + impl->set_filter(bl); +} + +void librados::NObjectIterator::get_next() +{ + ceph_assert(impl); + impl->get_next(); +} + +uint32_t librados::NObjectIterator::get_pg_hash_position() const +{ + ceph_assert(impl); + return impl->get_pg_hash_position(); +} + +const librados::NObjectIterator librados::NObjectIterator::__EndObjectIterator(NULL); + +///////////////////////////// PoolAsyncCompletion ////////////////////////////// +librados::PoolAsyncCompletion::PoolAsyncCompletion::~PoolAsyncCompletion() +{ + auto c = reinterpret_cast<PoolAsyncCompletionImpl *>(pc); + c->release(); +} + +int librados::PoolAsyncCompletion::PoolAsyncCompletion::set_callback(void *cb_arg, + rados_callback_t cb) +{ + PoolAsyncCompletionImpl *c = (PoolAsyncCompletionImpl *)pc; + return c->set_callback(cb_arg, cb); +} + +int librados::PoolAsyncCompletion::PoolAsyncCompletion::wait() +{ + PoolAsyncCompletionImpl *c = (PoolAsyncCompletionImpl *)pc; + return c->wait(); +} + +bool librados::PoolAsyncCompletion::PoolAsyncCompletion::is_complete() +{ + PoolAsyncCompletionImpl *c = (PoolAsyncCompletionImpl *)pc; + return c->is_complete(); +} + +int librados::PoolAsyncCompletion::PoolAsyncCompletion::get_return_value() +{ + PoolAsyncCompletionImpl *c = (PoolAsyncCompletionImpl *)pc; + return c->get_return_value(); +} + +void librados::PoolAsyncCompletion::PoolAsyncCompletion::release() +{ + delete this; +} + +///////////////////////////// AioCompletion ////////////////////////////// +librados::AioCompletion::AioCompletion::~AioCompletion() +{ + auto c = reinterpret_cast<AioCompletionImpl *>(pc); + c->release(); +} + +int librados::AioCompletion::AioCompletion::set_complete_callback(void *cb_arg, rados_callback_t cb) +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->set_complete_callback(cb_arg, cb); +} + +int librados::AioCompletion::AioCompletion::set_safe_callback(void *cb_arg, rados_callback_t cb) +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->set_safe_callback(cb_arg, cb); +} + +int librados::AioCompletion::AioCompletion::wait_for_complete() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->wait_for_complete(); +} + +int librados::AioCompletion::AioCompletion::wait_for_safe() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->wait_for_complete(); +} + +bool librados::AioCompletion::AioCompletion::is_complete() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->is_complete(); +} + +bool librados::AioCompletion::AioCompletion::is_safe() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->is_safe(); +} + +int librados::AioCompletion::AioCompletion::wait_for_complete_and_cb() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->wait_for_complete_and_cb(); +} + +int librados::AioCompletion::AioCompletion::wait_for_safe_and_cb() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->wait_for_safe_and_cb(); +} + +bool librados::AioCompletion::AioCompletion::is_complete_and_cb() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->is_complete_and_cb(); +} + +bool librados::AioCompletion::AioCompletion::is_safe_and_cb() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->is_safe_and_cb(); +} + +int librados::AioCompletion::AioCompletion::get_return_value() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->get_return_value(); +} + +int librados::AioCompletion::AioCompletion::get_version() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->get_version(); +} + +uint64_t librados::AioCompletion::AioCompletion::get_version64() +{ + AioCompletionImpl *c = (AioCompletionImpl *)pc; + return c->get_version(); +} + +void librados::AioCompletion::AioCompletion::release() +{ + delete this; +} + +///////////////////////////// IoCtx ////////////////////////////// +librados::IoCtx::IoCtx() : io_ctx_impl(NULL) +{ +} + +void librados::IoCtx::from_rados_ioctx_t(rados_ioctx_t p, IoCtx &io) +{ + IoCtxImpl *io_ctx_impl = (IoCtxImpl*)p; + + io.io_ctx_impl = io_ctx_impl; + if (io_ctx_impl) { + io_ctx_impl->get(); + } +} + +librados::IoCtx::IoCtx(const IoCtx& rhs) +{ + io_ctx_impl = rhs.io_ctx_impl; + if (io_ctx_impl) { + io_ctx_impl->get(); + } +} + +librados::IoCtx& librados::IoCtx::operator=(const IoCtx& rhs) +{ + if (io_ctx_impl) + io_ctx_impl->put(); + io_ctx_impl = rhs.io_ctx_impl; + io_ctx_impl->get(); + return *this; +} + +librados::IoCtx::IoCtx(IoCtx&& rhs) noexcept + : io_ctx_impl(std::exchange(rhs.io_ctx_impl, nullptr)) +{ +} + +librados::IoCtx& librados::IoCtx::operator=(IoCtx&& rhs) noexcept +{ + if (io_ctx_impl) + io_ctx_impl->put(); + io_ctx_impl = std::exchange(rhs.io_ctx_impl, nullptr); + return *this; +} + +librados::IoCtx::~IoCtx() +{ + close(); +} + +bool librados::IoCtx::is_valid() const { + return io_ctx_impl != nullptr; +} + +void librados::IoCtx::close() +{ + if (io_ctx_impl) + io_ctx_impl->put(); + io_ctx_impl = 0; +} + +void librados::IoCtx::dup(const IoCtx& rhs) +{ + if (io_ctx_impl) + io_ctx_impl->put(); + io_ctx_impl = new IoCtxImpl(); + io_ctx_impl->get(); + io_ctx_impl->dup(*rhs.io_ctx_impl); +} + +int librados::IoCtx::set_auid(uint64_t auid_) +{ + return -EOPNOTSUPP; +} + +int librados::IoCtx::set_auid_async(uint64_t auid_, PoolAsyncCompletion *c) +{ + return -EOPNOTSUPP; +} + +int librados::IoCtx::get_auid(uint64_t *auid_) +{ + return -EOPNOTSUPP; +} + +bool librados::IoCtx::pool_requires_alignment() +{ + return io_ctx_impl->client->pool_requires_alignment(get_id()); +} + +int librados::IoCtx::pool_requires_alignment2(bool *requires) +{ + return io_ctx_impl->client->pool_requires_alignment2(get_id(), requires); +} + +uint64_t librados::IoCtx::pool_required_alignment() +{ + return io_ctx_impl->client->pool_required_alignment(get_id()); +} + +int librados::IoCtx::pool_required_alignment2(uint64_t *alignment) +{ + return io_ctx_impl->client->pool_required_alignment2(get_id(), alignment); +} + +std::string librados::IoCtx::get_pool_name() +{ + std::string s; + io_ctx_impl->client->pool_get_name(get_id(), &s); + return s; +} + +std::string librados::IoCtx::get_pool_name() const +{ + return io_ctx_impl->get_cached_pool_name(); +} + +uint64_t librados::IoCtx::get_instance_id() const +{ + return io_ctx_impl->client->get_instance_id(); +} + +int librados::IoCtx::create(const std::string& oid, bool exclusive) +{ + object_t obj(oid); + return io_ctx_impl->create(obj, exclusive); +} + +int librados::IoCtx::create(const std::string& oid, bool exclusive, + const std::string& category) // unused +{ + object_t obj(oid); + return io_ctx_impl->create(obj, exclusive); +} + +int librados::IoCtx::write(const std::string& oid, bufferlist& bl, size_t len, uint64_t off) +{ + object_t obj(oid); + return io_ctx_impl->write(obj, bl, len, off); +} + +int librados::IoCtx::append(const std::string& oid, bufferlist& bl, size_t len) +{ + object_t obj(oid); + return io_ctx_impl->append(obj, bl, len); +} + +int librados::IoCtx::write_full(const std::string& oid, bufferlist& bl) +{ + object_t obj(oid); + return io_ctx_impl->write_full(obj, bl); +} + +int librados::IoCtx::writesame(const std::string& oid, bufferlist& bl, + size_t write_len, uint64_t off) +{ + object_t obj(oid); + return io_ctx_impl->writesame(obj, bl, write_len, off); +} + + +int librados::IoCtx::read(const std::string& oid, bufferlist& bl, size_t len, uint64_t off) +{ + object_t obj(oid); + return io_ctx_impl->read(obj, bl, len, off); +} + +int librados::IoCtx::checksum(const std::string& oid, + rados_checksum_type_t type, + const bufferlist &init_value_bl, size_t len, + uint64_t off, size_t chunk_size, bufferlist *pbl) +{ + object_t obj(oid); + return io_ctx_impl->checksum(obj, get_checksum_op_type(type), init_value_bl, + len, off, chunk_size, pbl); +} + +int librados::IoCtx::remove(const std::string& oid) +{ + object_t obj(oid); + return io_ctx_impl->remove(obj); +} + +int librados::IoCtx::remove(const std::string& oid, int flags) +{ + object_t obj(oid); + return io_ctx_impl->remove(obj, flags); +} + +int librados::IoCtx::trunc(const std::string& oid, uint64_t size) +{ + object_t obj(oid); + return io_ctx_impl->trunc(obj, size); +} + +int librados::IoCtx::mapext(const std::string& oid, uint64_t off, size_t len, + std::map<uint64_t,uint64_t>& m) +{ + object_t obj(oid); + return io_ctx_impl->mapext(obj, off, len, m); +} + +int librados::IoCtx::cmpext(const std::string& oid, uint64_t off, bufferlist& cmp_bl) +{ + object_t obj(oid); + return io_ctx_impl->cmpext(obj, off, cmp_bl); +} + +int librados::IoCtx::sparse_read(const std::string& oid, std::map<uint64_t,uint64_t>& m, + bufferlist& bl, size_t len, uint64_t off) +{ + object_t obj(oid); + return io_ctx_impl->sparse_read(obj, m, bl, len, off); +} + +int librados::IoCtx::getxattr(const std::string& oid, const char *name, bufferlist& bl) +{ + object_t obj(oid); + return io_ctx_impl->getxattr(obj, name, bl); +} + +int librados::IoCtx::getxattrs(const std::string& oid, map<std::string, bufferlist>& attrset) +{ + object_t obj(oid); + return io_ctx_impl->getxattrs(obj, attrset); +} + +int librados::IoCtx::setxattr(const std::string& oid, const char *name, bufferlist& bl) +{ + object_t obj(oid); + return io_ctx_impl->setxattr(obj, name, bl); +} + +int librados::IoCtx::rmxattr(const std::string& oid, const char *name) +{ + object_t obj(oid); + return io_ctx_impl->rmxattr(obj, name); +} + +int librados::IoCtx::stat(const std::string& oid, uint64_t *psize, time_t *pmtime) +{ + object_t obj(oid); + return io_ctx_impl->stat(obj, psize, pmtime); +} + +int librados::IoCtx::stat2(const std::string& oid, uint64_t *psize, struct timespec *pts) +{ + object_t obj(oid); + return io_ctx_impl->stat2(obj, psize, pts); +} + +int librados::IoCtx::exec(const std::string& oid, const char *cls, const char *method, + bufferlist& inbl, bufferlist& outbl) +{ + object_t obj(oid); + return io_ctx_impl->exec(obj, cls, method, inbl, outbl); +} + +int librados::IoCtx::tmap_update(const std::string& oid, bufferlist& cmdbl) +{ + object_t obj(oid); + return io_ctx_impl->tmap_update(obj, cmdbl); +} + +int librados::IoCtx::omap_get_vals(const std::string& oid, + const std::string& start_after, + uint64_t max_return, + std::map<std::string, bufferlist> *out_vals) +{ + return omap_get_vals(oid, start_after, string(), max_return, out_vals); +} + +int librados::IoCtx::omap_get_vals2( + const std::string& oid, + const std::string& start_after, + uint64_t max_return, + std::map<std::string, bufferlist> *out_vals, + bool *pmore) +{ + ObjectReadOperation op; + int r; + op.omap_get_vals2(start_after, max_return, out_vals, pmore, &r); + bufferlist bl; + int ret = operate(oid, &op, &bl); + if (ret < 0) + return ret; + return r; +} + +int librados::IoCtx::omap_get_keys(const std::string& oid, + const std::string& orig_start_after, + uint64_t max_return, + std::set<std::string> *out_keys) +{ + bool first = true; + string start_after = orig_start_after; + bool more = true; + while (max_return > 0 && more) { + std::set<std::string> out; + ObjectReadOperation op; + op.omap_get_keys2(start_after, max_return, &out, &more, nullptr); + bufferlist bl; + int ret = operate(oid, &op, &bl); + if (ret < 0) { + return ret; + } + if (more) { + if (out.empty()) { + return -EINVAL; // wth + } + start_after = *out.rbegin(); + } + if (out.size() <= max_return) { + max_return -= out.size(); + } else { + max_return = 0; + } + if (first) { + out_keys->swap(out); + first = false; + } else { + out_keys->insert(out.begin(), out.end()); + out.clear(); + } + } + return 0; +} + +int librados::IoCtx::omap_get_keys2( + const std::string& oid, + const std::string& start_after, + uint64_t max_return, + std::set<std::string> *out_keys, + bool *pmore) +{ + ObjectReadOperation op; + int r; + op.omap_get_keys2(start_after, max_return, out_keys, pmore, &r); + bufferlist bl; + int ret = operate(oid, &op, &bl); + if (ret < 0) + return ret; + return r; +} + +int librados::IoCtx::omap_get_header(const std::string& oid, + bufferlist *bl) +{ + ObjectReadOperation op; + int r; + op.omap_get_header(bl, &r); + bufferlist b; + int ret = operate(oid, &op, &b); + if (ret < 0) + return ret; + + return r; +} + +int librados::IoCtx::omap_get_vals_by_keys(const std::string& oid, + const std::set<std::string>& keys, + std::map<std::string, bufferlist> *vals) +{ + ObjectReadOperation op; + int r; + bufferlist bl; + op.omap_get_vals_by_keys(keys, vals, &r); + int ret = operate(oid, &op, &bl); + if (ret < 0) + return ret; + + return r; +} + +int librados::IoCtx::omap_set(const std::string& oid, + const map<string, bufferlist>& m) +{ + ObjectWriteOperation op; + op.omap_set(m); + return operate(oid, &op); +} + +int librados::IoCtx::omap_set_header(const std::string& oid, + const bufferlist& bl) +{ + ObjectWriteOperation op; + op.omap_set_header(bl); + return operate(oid, &op); +} + +int librados::IoCtx::omap_clear(const std::string& oid) +{ + ObjectWriteOperation op; + op.omap_clear(); + return operate(oid, &op); +} + +int librados::IoCtx::omap_rm_keys(const std::string& oid, + const std::set<std::string>& keys) +{ + ObjectWriteOperation op; + op.omap_rm_keys(keys); + return operate(oid, &op); +} + +int librados::IoCtx::operate(const std::string& oid, librados::ObjectWriteOperation *o) +{ + object_t obj(oid); + if (unlikely(!o->impl)) + return -EINVAL; + return io_ctx_impl->operate(obj, &o->impl->o, (ceph::real_time *)o->impl->prt); +} + +int librados::IoCtx::operate(const std::string& oid, librados::ObjectWriteOperation *o, int flags) +{ + object_t obj(oid); + if (unlikely(!o->impl)) + return -EINVAL; + return io_ctx_impl->operate(obj, &o->impl->o, (ceph::real_time *)o->impl->prt, translate_flags(flags)); +} + +int librados::IoCtx::operate(const std::string& oid, librados::ObjectReadOperation *o, bufferlist *pbl) +{ + object_t obj(oid); + if (unlikely(!o->impl)) + return -EINVAL; + return io_ctx_impl->operate_read(obj, &o->impl->o, pbl); +} + +int librados::IoCtx::operate(const std::string& oid, librados::ObjectReadOperation *o, bufferlist *pbl, int flags) +{ + object_t obj(oid); + if (unlikely(!o->impl)) + return -EINVAL; + return io_ctx_impl->operate_read(obj, &o->impl->o, pbl, translate_flags(flags)); +} + +int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, + librados::ObjectWriteOperation *o) +{ + object_t obj(oid); + if (unlikely(!o->impl)) + return -EINVAL; + return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc, + io_ctx_impl->snapc, 0); +} +int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, + ObjectWriteOperation *o, int flags) +{ + object_t obj(oid); + if (unlikely(!o->impl)) + return -EINVAL; + return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc, + io_ctx_impl->snapc, + translate_flags(flags)); +} + +int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, + librados::ObjectWriteOperation *o, + snap_t snap_seq, std::vector<snap_t>& snaps) +{ + if (unlikely(!o->impl)) + return -EINVAL; + object_t obj(oid); + vector<snapid_t> snv; + snv.resize(snaps.size()); + for (size_t i = 0; i < snaps.size(); ++i) + snv[i] = snaps[i]; + SnapContext snapc(snap_seq, snv); + return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc, + snapc, 0); +} + +int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, + librados::ObjectWriteOperation *o, + snap_t snap_seq, std::vector<snap_t>& snaps, + const blkin_trace_info *trace_info) +{ + if (unlikely(!o->impl)) + return -EINVAL; + object_t obj(oid); + vector<snapid_t> snv; + snv.resize(snaps.size()); + for (size_t i = 0; i < snaps.size(); ++i) + snv[i] = snaps[i]; + SnapContext snapc(snap_seq, snv); + return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc, + snapc, 0, trace_info); +} + +int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, + librados::ObjectWriteOperation *o, + snap_t snap_seq, std::vector<snap_t>& snaps, int flags, + const blkin_trace_info *trace_info) +{ + if (unlikely(!o->impl)) + return -EINVAL; + object_t obj(oid); + vector<snapid_t> snv; + snv.resize(snaps.size()); + for (size_t i = 0; i < snaps.size(); ++i) + snv[i] = snaps[i]; + SnapContext snapc(snap_seq, snv); + return io_ctx_impl->aio_operate(obj, &o->impl->o, c->pc, snapc, + translate_flags(flags), trace_info); +} + +int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, + librados::ObjectReadOperation *o, + bufferlist *pbl) +{ + if (unlikely(!o->impl)) + return -EINVAL; + object_t obj(oid); + return io_ctx_impl->aio_operate_read(obj, &o->impl->o, c->pc, + 0, pbl); +} + +// deprecated +int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, + librados::ObjectReadOperation *o, + snap_t snapid_unused_deprecated, + int flags, bufferlist *pbl) +{ + if (unlikely(!o->impl)) + return -EINVAL; + object_t obj(oid); + int op_flags = 0; + if (flags & OPERATION_BALANCE_READS) + op_flags |= CEPH_OSD_FLAG_BALANCE_READS; + if (flags & OPERATION_LOCALIZE_READS) + op_flags |= CEPH_OSD_FLAG_LOCALIZE_READS; + if (flags & OPERATION_ORDER_READS_WRITES) + op_flags |= CEPH_OSD_FLAG_RWORDERED; + + return io_ctx_impl->aio_operate_read(obj, &o->impl->o, c->pc, + op_flags, pbl); +} + +int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, + librados::ObjectReadOperation *o, + int flags, bufferlist *pbl) +{ + if (unlikely(!o->impl)) + return -EINVAL; + object_t obj(oid); + return io_ctx_impl->aio_operate_read(obj, &o->impl->o, c->pc, + translate_flags(flags), pbl); +} + +int librados::IoCtx::aio_operate(const std::string& oid, AioCompletion *c, + librados::ObjectReadOperation *o, + int flags, bufferlist *pbl, const blkin_trace_info *trace_info) +{ + if (unlikely(!o->impl)) + return -EINVAL; + object_t obj(oid); + return io_ctx_impl->aio_operate_read(obj, &o->impl->o, c->pc, + translate_flags(flags), pbl, trace_info); +} + +void librados::IoCtx::snap_set_read(snap_t seq) +{ + io_ctx_impl->set_snap_read(seq); +} + +int librados::IoCtx::selfmanaged_snap_set_write_ctx(snap_t seq, vector<snap_t>& snaps) +{ + vector<snapid_t> snv; + snv.resize(snaps.size()); + for (unsigned i=0; i<snaps.size(); i++) + snv[i] = snaps[i]; + return io_ctx_impl->set_snap_write_context(seq, snv); +} + +int librados::IoCtx::snap_create(const char *snapname) +{ + return io_ctx_impl->snap_create(snapname); +} + +int librados::IoCtx::snap_lookup(const char *name, snap_t *snapid) +{ + return io_ctx_impl->snap_lookup(name, snapid); +} + +int librados::IoCtx::snap_get_stamp(snap_t snapid, time_t *t) +{ + return io_ctx_impl->snap_get_stamp(snapid, t); +} + +int librados::IoCtx::snap_get_name(snap_t snapid, std::string *s) +{ + return io_ctx_impl->snap_get_name(snapid, s); +} + +int librados::IoCtx::snap_remove(const char *snapname) +{ + return io_ctx_impl->snap_remove(snapname); +} + +int librados::IoCtx::snap_list(std::vector<snap_t> *snaps) +{ + return io_ctx_impl->snap_list(snaps); +} + +int librados::IoCtx::snap_rollback(const std::string& oid, const char *snapname) +{ + return io_ctx_impl->rollback(oid, snapname); +} + +// Deprecated name kept for backward compatibility +int librados::IoCtx::rollback(const std::string& oid, const char *snapname) +{ + return snap_rollback(oid, snapname); +} + +int librados::IoCtx::selfmanaged_snap_create(uint64_t *snapid) +{ + return io_ctx_impl->selfmanaged_snap_create(snapid); +} + +void librados::IoCtx::aio_selfmanaged_snap_create(uint64_t *snapid, + AioCompletion *c) +{ + io_ctx_impl->aio_selfmanaged_snap_create(snapid, c->pc); +} + +int librados::IoCtx::selfmanaged_snap_remove(uint64_t snapid) +{ + return io_ctx_impl->selfmanaged_snap_remove(snapid); +} + +void librados::IoCtx::aio_selfmanaged_snap_remove(uint64_t snapid, + AioCompletion *c) +{ + io_ctx_impl->aio_selfmanaged_snap_remove(snapid, c->pc); +} + +int librados::IoCtx::selfmanaged_snap_rollback(const std::string& oid, uint64_t snapid) +{ + return io_ctx_impl->selfmanaged_snap_rollback_object(oid, + io_ctx_impl->snapc, + snapid); +} + +int librados::IoCtx::lock_exclusive(const std::string &oid, const std::string &name, + const std::string &cookie, + const std::string &description, + struct timeval * duration, uint8_t flags) +{ + utime_t dur = utime_t(); + if (duration) + dur.set_from_timeval(duration); + + return rados::cls::lock::lock(this, oid, name, ClsLockType::EXCLUSIVE, cookie, "", + description, dur, flags); +} + +int librados::IoCtx::lock_shared(const std::string &oid, const std::string &name, + const std::string &cookie, const std::string &tag, + const std::string &description, + struct timeval * duration, uint8_t flags) +{ + utime_t dur = utime_t(); + if (duration) + dur.set_from_timeval(duration); + + return rados::cls::lock::lock(this, oid, name, ClsLockType::SHARED, cookie, tag, + description, dur, flags); +} + +int librados::IoCtx::unlock(const std::string &oid, const std::string &name, + const std::string &cookie) +{ + return rados::cls::lock::unlock(this, oid, name, cookie); +} + +struct AioUnlockCompletion : public librados::ObjectOperationCompletion { + librados::AioCompletionImpl *completion; + AioUnlockCompletion(librados::AioCompletion *c) : completion(c->pc) { + completion->get(); + }; + void handle_completion(int r, bufferlist& outbl) override { + rados_callback_t cb = completion->callback_complete; + void *cb_arg = completion->callback_complete_arg; + cb(completion, cb_arg); + completion->lock.lock(); + completion->callback_complete = NULL; + completion->cond.notify_all(); + completion->put_unlock(); + } +}; + +int librados::IoCtx::aio_unlock(const std::string &oid, const std::string &name, + const std::string &cookie, AioCompletion *c) +{ + return rados::cls::lock::aio_unlock(this, oid, name, cookie, c); +} + +int librados::IoCtx::break_lock(const std::string &oid, const std::string &name, + const std::string &client, const std::string &cookie) +{ + entity_name_t locker; + if (!locker.parse(client)) + return -EINVAL; + return rados::cls::lock::break_lock(this, oid, name, cookie, locker); +} + +int librados::IoCtx::list_lockers(const std::string &oid, const std::string &name, + int *exclusive, + std::string *tag, + std::list<librados::locker_t> *lockers) +{ + std::list<librados::locker_t> tmp_lockers; + map<rados::cls::lock::locker_id_t, rados::cls::lock::locker_info_t> rados_lockers; + std::string tmp_tag; + ClsLockType tmp_type; + int r = rados::cls::lock::get_lock_info(this, oid, name, &rados_lockers, &tmp_type, &tmp_tag); + if (r < 0) + return r; + + map<rados::cls::lock::locker_id_t, rados::cls::lock::locker_info_t>::iterator map_it; + for (map_it = rados_lockers.begin(); map_it != rados_lockers.end(); ++map_it) { + librados::locker_t locker; + locker.client = stringify(map_it->first.locker); + locker.cookie = map_it->first.cookie; + locker.address = stringify(map_it->second.addr); + tmp_lockers.push_back(locker); + } + + if (lockers) + *lockers = tmp_lockers; + if (tag) + *tag = tmp_tag; + if (exclusive) { + if (tmp_type == ClsLockType::EXCLUSIVE) + *exclusive = 1; + else + *exclusive = 0; + } + + return tmp_lockers.size(); +} + +librados::NObjectIterator librados::IoCtx::nobjects_begin( + const bufferlist &filter) +{ + rados_list_ctx_t listh; + rados_nobjects_list_open(io_ctx_impl, &listh); + NObjectIterator iter((ObjListCtx*)listh); + if (filter.length() > 0) { + iter.set_filter(filter); + } + iter.get_next(); + return iter; +} + +librados::NObjectIterator librados::IoCtx::nobjects_begin( + uint32_t pos, const bufferlist &filter) +{ + rados_list_ctx_t listh; + rados_nobjects_list_open(io_ctx_impl, &listh); + NObjectIterator iter((ObjListCtx*)listh); + if (filter.length() > 0) { + iter.set_filter(filter); + } + iter.seek(pos); + return iter; +} + +librados::NObjectIterator librados::IoCtx::nobjects_begin( + const ObjectCursor& cursor, const bufferlist &filter) +{ + rados_list_ctx_t listh; + rados_nobjects_list_open(io_ctx_impl, &listh); + NObjectIterator iter((ObjListCtx*)listh); + if (filter.length() > 0) { + iter.set_filter(filter); + } + iter.seek(cursor); + return iter; +} + +const librados::NObjectIterator& librados::IoCtx::nobjects_end() const +{ + return NObjectIterator::__EndObjectIterator; +} + +int librados::IoCtx::hit_set_list(uint32_t hash, AioCompletion *c, + std::list< std::pair<time_t, time_t> > *pls) +{ + return io_ctx_impl->hit_set_list(hash, c->pc, pls); +} + +int librados::IoCtx::hit_set_get(uint32_t hash, AioCompletion *c, time_t stamp, + bufferlist *pbl) +{ + return io_ctx_impl->hit_set_get(hash, c->pc, stamp, pbl); +} + + + +uint64_t librados::IoCtx::get_last_version() +{ + return io_ctx_impl->last_version(); +} + +int librados::IoCtx::aio_read(const std::string& oid, librados::AioCompletion *c, + bufferlist *pbl, size_t len, uint64_t off) +{ + return io_ctx_impl->aio_read(oid, c->pc, pbl, len, off, + io_ctx_impl->snap_seq); +} + +int librados::IoCtx::aio_read(const std::string& oid, librados::AioCompletion *c, + bufferlist *pbl, size_t len, uint64_t off, + uint64_t snapid) +{ + return io_ctx_impl->aio_read(oid, c->pc, pbl, len, off, snapid); +} + +int librados::IoCtx::aio_exec(const std::string& oid, + librados::AioCompletion *c, const char *cls, + const char *method, bufferlist& inbl, + bufferlist *outbl) +{ + object_t obj(oid); + return io_ctx_impl->aio_exec(obj, c->pc, cls, method, inbl, outbl); +} + +int librados::IoCtx::aio_cmpext(const std::string& oid, + librados::AioCompletion *c, + uint64_t off, + bufferlist& cmp_bl) +{ + return io_ctx_impl->aio_cmpext(oid, c->pc, off, cmp_bl); +} + +int librados::IoCtx::aio_sparse_read(const std::string& oid, librados::AioCompletion *c, + std::map<uint64_t,uint64_t> *m, bufferlist *data_bl, + size_t len, uint64_t off) +{ + return io_ctx_impl->aio_sparse_read(oid, c->pc, + m, data_bl, len, off, + io_ctx_impl->snap_seq); +} + +int librados::IoCtx::aio_sparse_read(const std::string& oid, librados::AioCompletion *c, + std::map<uint64_t,uint64_t> *m, bufferlist *data_bl, + size_t len, uint64_t off, uint64_t snapid) +{ + return io_ctx_impl->aio_sparse_read(oid, c->pc, + m, data_bl, len, off, snapid); +} + +int librados::IoCtx::aio_write(const std::string& oid, librados::AioCompletion *c, + const bufferlist& bl, size_t len, uint64_t off) +{ + return io_ctx_impl->aio_write(oid, c->pc, bl, len, off); +} + +int librados::IoCtx::aio_append(const std::string& oid, librados::AioCompletion *c, + const bufferlist& bl, size_t len) +{ + return io_ctx_impl->aio_append(oid, c->pc, bl, len); +} + +int librados::IoCtx::aio_write_full(const std::string& oid, librados::AioCompletion *c, + const bufferlist& bl) +{ + object_t obj(oid); + return io_ctx_impl->aio_write_full(obj, c->pc, bl); +} + +int librados::IoCtx::aio_writesame(const std::string& oid, librados::AioCompletion *c, + const bufferlist& bl, size_t write_len, + uint64_t off) +{ + return io_ctx_impl->aio_writesame(oid, c->pc, bl, write_len, off); +} + + +int librados::IoCtx::aio_remove(const std::string& oid, librados::AioCompletion *c) +{ + return io_ctx_impl->aio_remove(oid, c->pc); +} + +int librados::IoCtx::aio_remove(const std::string& oid, librados::AioCompletion *c, int flags) +{ + return io_ctx_impl->aio_remove(oid, c->pc, flags); +} + +int librados::IoCtx::aio_flush_async(librados::AioCompletion *c) +{ + io_ctx_impl->flush_aio_writes_async(c->pc); + return 0; +} + +int librados::IoCtx::aio_flush() +{ + io_ctx_impl->flush_aio_writes(); + return 0; +} + +struct AioGetxattrDataPP { + AioGetxattrDataPP(librados::AioCompletionImpl *c, bufferlist *_bl) : + bl(_bl), completion(c) {} + bufferlist *bl; + struct librados::CB_AioCompleteAndSafe completion; +}; + +static void rados_aio_getxattr_completepp(rados_completion_t c, void *arg) { + AioGetxattrDataPP *cdata = reinterpret_cast<AioGetxattrDataPP*>(arg); + int rc = rados_aio_get_return_value(c); + if (rc >= 0) { + rc = cdata->bl->length(); + } + cdata->completion(rc); + delete cdata; +} + +int librados::IoCtx::aio_getxattr(const std::string& oid, librados::AioCompletion *c, + const char *name, bufferlist& bl) +{ + // create data object to be passed to async callback + AioGetxattrDataPP *cdata = new AioGetxattrDataPP(c->pc, &bl); + if (!cdata) { + return -ENOMEM; + } + // create completion callback + librados::AioCompletionImpl *comp = new librados::AioCompletionImpl; + comp->set_complete_callback(cdata, rados_aio_getxattr_completepp); + // call actual getxattr from IoCtxImpl + object_t obj(oid); + return io_ctx_impl->aio_getxattr(obj, comp, name, bl); +} + +int librados::IoCtx::aio_getxattrs(const std::string& oid, AioCompletion *c, + map<std::string, bufferlist>& attrset) +{ + object_t obj(oid); + return io_ctx_impl->aio_getxattrs(obj, c->pc, attrset); +} + +int librados::IoCtx::aio_setxattr(const std::string& oid, AioCompletion *c, + const char *name, bufferlist& bl) +{ + object_t obj(oid); + return io_ctx_impl->aio_setxattr(obj, c->pc, name, bl); +} + +int librados::IoCtx::aio_rmxattr(const std::string& oid, AioCompletion *c, + const char *name) +{ + object_t obj(oid); + return io_ctx_impl->aio_rmxattr(obj, c->pc, name); +} + +int librados::IoCtx::aio_stat(const std::string& oid, librados::AioCompletion *c, + uint64_t *psize, time_t *pmtime) +{ + object_t obj(oid); + return io_ctx_impl->aio_stat(obj, c->pc, psize, pmtime); +} + +int librados::IoCtx::aio_cancel(librados::AioCompletion *c) +{ + return io_ctx_impl->aio_cancel(c->pc); +} + +int librados::IoCtx::watch(const string& oid, uint64_t ver, uint64_t *cookie, + librados::WatchCtx *ctx) +{ + object_t obj(oid); + return io_ctx_impl->watch(obj, cookie, ctx, NULL); +} + +int librados::IoCtx::watch2(const string& oid, uint64_t *cookie, + librados::WatchCtx2 *ctx2) +{ + object_t obj(oid); + return io_ctx_impl->watch(obj, cookie, NULL, ctx2); +} + +int librados::IoCtx::watch3(const string& oid, uint64_t *cookie, + librados::WatchCtx2 *ctx2, uint32_t timeout) +{ + object_t obj(oid); + return io_ctx_impl->watch(obj, cookie, NULL, ctx2, timeout); +} + +int librados::IoCtx::aio_watch(const string& oid, AioCompletion *c, + uint64_t *cookie, + librados::WatchCtx2 *ctx2) +{ + object_t obj(oid); + return io_ctx_impl->aio_watch(obj, c->pc, cookie, NULL, ctx2); +} + +int librados::IoCtx::aio_watch2(const string& oid, AioCompletion *c, + uint64_t *cookie, + librados::WatchCtx2 *ctx2, + uint32_t timeout) +{ + object_t obj(oid); + return io_ctx_impl->aio_watch(obj, c->pc, cookie, NULL, ctx2, timeout); +} + +int librados::IoCtx::unwatch(const string& oid, uint64_t handle) +{ + return io_ctx_impl->unwatch(handle); +} + +int librados::IoCtx::unwatch2(uint64_t handle) +{ + return io_ctx_impl->unwatch(handle); +} + +int librados::IoCtx::aio_unwatch(uint64_t handle, AioCompletion *c) +{ + return io_ctx_impl->aio_unwatch(handle, c->pc); +} + +int librados::IoCtx::watch_check(uint64_t handle) +{ + return io_ctx_impl->watch_check(handle); +} + +int librados::IoCtx::notify(const string& oid, uint64_t ver, bufferlist& bl) +{ + object_t obj(oid); + return io_ctx_impl->notify(obj, bl, 0, NULL, NULL, NULL); +} + +int librados::IoCtx::notify2(const string& oid, bufferlist& bl, + uint64_t timeout_ms, bufferlist *preplybl) +{ + object_t obj(oid); + return io_ctx_impl->notify(obj, bl, timeout_ms, preplybl, NULL, NULL); +} + +int librados::IoCtx::aio_notify(const string& oid, AioCompletion *c, + bufferlist& bl, uint64_t timeout_ms, + bufferlist *preplybl) +{ + object_t obj(oid); + return io_ctx_impl->aio_notify(obj, c->pc, bl, timeout_ms, preplybl, NULL, + NULL); +} + +void librados::IoCtx::decode_notify_response(bufferlist &bl, + std::vector<librados::notify_ack_t> *acks, + std::vector<librados::notify_timeout_t> *timeouts) +{ + map<pair<uint64_t,uint64_t>,bufferlist> acked; + set<pair<uint64_t,uint64_t>> missed; + + auto iter = bl.cbegin(); + decode(acked, iter); + decode(missed, iter); + + for (auto &[who, payload] : acked) { + acks->emplace_back(librados::notify_ack_t{who.first, who.second, payload}); + } + for (auto &[notifier_id, cookie] : missed) { + timeouts->emplace_back(librados::notify_timeout_t{notifier_id, cookie}); + } +} + +void librados::IoCtx::notify_ack(const std::string& o, + uint64_t notify_id, uint64_t handle, + bufferlist& bl) +{ + io_ctx_impl->notify_ack(o, notify_id, handle, bl); +} + +int librados::IoCtx::list_watchers(const std::string& oid, + std::list<obj_watch_t> *out_watchers) +{ + ObjectReadOperation op; + int r; + op.list_watchers(out_watchers, &r); + bufferlist bl; + int ret = operate(oid, &op, &bl); + if (ret < 0) + return ret; + + return r; +} + +int librados::IoCtx::list_snaps(const std::string& oid, + snap_set_t *out_snaps) +{ + ObjectReadOperation op; + int r; + if (io_ctx_impl->snap_seq != CEPH_SNAPDIR) + return -EINVAL; + op.list_snaps(out_snaps, &r); + bufferlist bl; + int ret = operate(oid, &op, &bl); + if (ret < 0) + return ret; + + return r; +} + +void librados::IoCtx::set_notify_timeout(uint32_t timeout) +{ + io_ctx_impl->set_notify_timeout(timeout); +} + +int librados::IoCtx::set_alloc_hint(const std::string& o, + uint64_t expected_object_size, + uint64_t expected_write_size) +{ + object_t oid(o); + return io_ctx_impl->set_alloc_hint(oid, expected_object_size, + expected_write_size, 0); +} + +int librados::IoCtx::set_alloc_hint2(const std::string& o, + uint64_t expected_object_size, + uint64_t expected_write_size, + uint32_t flags) +{ + object_t oid(o); + return io_ctx_impl->set_alloc_hint(oid, expected_object_size, + expected_write_size, flags); +} + +void librados::IoCtx::set_assert_version(uint64_t ver) +{ + io_ctx_impl->set_assert_version(ver); +} + +void librados::IoCtx::locator_set_key(const string& key) +{ + io_ctx_impl->oloc.key = key; +} + +void librados::IoCtx::set_namespace(const string& nspace) +{ + io_ctx_impl->oloc.nspace = nspace; +} + +std::string librados::IoCtx::get_namespace() const +{ + return io_ctx_impl->oloc.nspace; +} + +int64_t librados::IoCtx::get_id() +{ + return io_ctx_impl->get_id(); +} + +uint32_t librados::IoCtx::get_object_hash_position(const std::string& oid) +{ + uint32_t hash; + int r = io_ctx_impl->get_object_hash_position(oid, &hash); + if (r < 0) + hash = 0; + return hash; +} + +uint32_t librados::IoCtx::get_object_pg_hash_position(const std::string& oid) +{ + uint32_t hash; + int r = io_ctx_impl->get_object_pg_hash_position(oid, &hash); + if (r < 0) + hash = 0; + return hash; +} + +int librados::IoCtx::get_object_hash_position2( + const std::string& oid, uint32_t *hash_position) +{ + return io_ctx_impl->get_object_hash_position(oid, hash_position); +} + +int librados::IoCtx::get_object_pg_hash_position2( + const std::string& oid, uint32_t *pg_hash_position) +{ + return io_ctx_impl->get_object_pg_hash_position(oid, pg_hash_position); +} + +librados::config_t librados::IoCtx::cct() +{ + return (config_t)io_ctx_impl->client->cct; +} + +librados::IoCtx::IoCtx(IoCtxImpl *io_ctx_impl_) + : io_ctx_impl(io_ctx_impl_) +{ +} + +void librados::IoCtx::set_osdmap_full_try() +{ + io_ctx_impl->extra_op_flags |= CEPH_OSD_FLAG_FULL_TRY; +} + +void librados::IoCtx::unset_osdmap_full_try() +{ + io_ctx_impl->extra_op_flags &= ~CEPH_OSD_FLAG_FULL_TRY; +} + +bool librados::IoCtx::get_pool_full_try() +{ + return (io_ctx_impl->extra_op_flags & CEPH_OSD_FLAG_FULL_TRY) != 0; +} + +void librados::IoCtx::set_pool_full_try() +{ + io_ctx_impl->extra_op_flags |= CEPH_OSD_FLAG_FULL_TRY; +} + +void librados::IoCtx::unset_pool_full_try() +{ + io_ctx_impl->extra_op_flags &= ~CEPH_OSD_FLAG_FULL_TRY; +} + +///////////////////////////// Rados ////////////////////////////// +void librados::Rados::version(int *major, int *minor, int *extra) +{ + rados_version(major, minor, extra); +} + +librados::Rados::Rados() : client(NULL) +{ +} + +librados::Rados::Rados(IoCtx &ioctx) +{ + client = ioctx.io_ctx_impl->client; + ceph_assert(client != NULL); + client->get(); +} + +librados::Rados::~Rados() +{ + shutdown(); +} + +void librados::Rados::from_rados_t(rados_t cluster, Rados &rados) { + if (rados.client) { + rados.client->put(); + } + rados.client = static_cast<RadosClient*>(cluster); + if (rados.client) { + rados.client->get(); + } +} + +int librados::Rados::init(const char * const id) +{ + return rados_create((rados_t *)&client, id); +} + +int librados::Rados::init2(const char * const name, + const char * const clustername, uint64_t flags) +{ + return rados_create2((rados_t *)&client, clustername, name, flags); +} + +int librados::Rados::init_with_context(config_t cct_) +{ + return rados_create_with_context((rados_t *)&client, (rados_config_t)cct_); +} + +int librados::Rados::connect() +{ + return client->connect(); +} + +librados::config_t librados::Rados::cct() +{ + return (config_t)client->cct; +} + +int librados::Rados::watch_flush() +{ + if (!client) + return -EINVAL; + return client->watch_flush(); +} + +int librados::Rados::aio_watch_flush(AioCompletion *c) +{ + if (!client) + return -EINVAL; + return client->async_watch_flush(c->pc); +} + +void librados::Rados::shutdown() +{ + if (!client) + return; + if (client->put()) { + client->shutdown(); + delete client; + client = NULL; + } +} + +uint64_t librados::Rados::get_instance_id() +{ + return client->get_instance_id(); +} + +int librados::Rados::get_min_compatible_osd(int8_t* require_osd_release) +{ + return client->get_min_compatible_osd(require_osd_release); +} + +int librados::Rados::get_min_compatible_client(int8_t* min_compat_client, + int8_t* require_min_compat_client) +{ + return client->get_min_compatible_client(min_compat_client, + require_min_compat_client); +} + +int librados::Rados::conf_read_file(const char * const path) const +{ + return rados_conf_read_file((rados_t)client, path); +} + +int librados::Rados::conf_parse_argv(int argc, const char ** argv) const +{ + return rados_conf_parse_argv((rados_t)client, argc, argv); +} + +int librados::Rados::conf_parse_argv_remainder(int argc, const char ** argv, + const char ** remargv) const +{ + return rados_conf_parse_argv_remainder((rados_t)client, argc, argv, remargv); +} + +int librados::Rados::conf_parse_env(const char *name) const +{ + return rados_conf_parse_env((rados_t)client, name); +} + +int librados::Rados::conf_set(const char *option, const char *value) +{ + return rados_conf_set((rados_t)client, option, value); +} + +int librados::Rados::conf_get(const char *option, std::string &val) +{ + char *str = NULL; + const auto& conf = client->cct->_conf; + int ret = conf.get_val(option, &str, -1); + if (ret) { + free(str); + return ret; + } + val = str; + free(str); + return 0; +} + +int librados::Rados::service_daemon_register( + const std::string& service, ///< service name (e.g., 'rgw') + const std::string& name, ///< daemon name (e.g., 'gwfoo') + const std::map<std::string,std::string>& metadata) ///< static metadata about daemon +{ + return client->service_daemon_register(service, name, metadata); +} + +int librados::Rados::service_daemon_update_status( + std::map<std::string,std::string>&& status) +{ + return client->service_daemon_update_status(std::move(status)); +} + +int librados::Rados::pool_create(const char *name) +{ + string str(name); + return client->pool_create(str); +} + +int librados::Rados::pool_create(const char *name, uint64_t auid) +{ + if (auid != CEPH_AUTH_UID_DEFAULT) { + return -EINVAL; + } + string str(name); + return client->pool_create(str); +} + +int librados::Rados::pool_create(const char *name, uint64_t auid, __u8 crush_rule) +{ + if (auid != CEPH_AUTH_UID_DEFAULT) { + return -EINVAL; + } + string str(name); + return client->pool_create(str, crush_rule); +} + +int librados::Rados::pool_create_with_rule(const char *name, __u8 crush_rule) +{ + string str(name); + return client->pool_create(str, crush_rule); +} + +int librados::Rados::pool_create_async(const char *name, PoolAsyncCompletion *c) +{ + string str(name); + return client->pool_create_async(str, c->pc); +} + +int librados::Rados::pool_create_async(const char *name, uint64_t auid, PoolAsyncCompletion *c) +{ + if (auid != CEPH_AUTH_UID_DEFAULT) { + return -EINVAL; + } + string str(name); + return client->pool_create_async(str, c->pc); +} + +int librados::Rados::pool_create_async(const char *name, uint64_t auid, __u8 crush_rule, + PoolAsyncCompletion *c) +{ + if (auid != CEPH_AUTH_UID_DEFAULT) { + return -EINVAL; + } + string str(name); + return client->pool_create_async(str, c->pc, crush_rule); +} + +int librados::Rados::pool_create_with_rule_async( + const char *name, __u8 crush_rule, + PoolAsyncCompletion *c) +{ + string str(name); + return client->pool_create_async(str, c->pc, crush_rule); +} + +int librados::Rados::pool_get_base_tier(int64_t pool_id, int64_t* base_tier) +{ + tracepoint(librados, rados_pool_get_base_tier_enter, (rados_t)client, pool_id); + int retval = client->pool_get_base_tier(pool_id, base_tier); + tracepoint(librados, rados_pool_get_base_tier_exit, retval, *base_tier); + return retval; +} + +int librados::Rados::pool_delete(const char *name) +{ + return client->pool_delete(name); +} + +int librados::Rados::pool_delete_async(const char *name, PoolAsyncCompletion *c) +{ + return client->pool_delete_async(name, c->pc); +} + +int librados::Rados::pool_list(std::list<std::string>& v) +{ + std::list<std::pair<int64_t, std::string> > pools; + int r = client->pool_list(pools); + if (r < 0) { + return r; + } + + v.clear(); + for (std::list<std::pair<int64_t, std::string> >::iterator it = pools.begin(); + it != pools.end(); ++it) { + v.push_back(it->second); + } + return 0; +} + +int librados::Rados::pool_list2(std::list<std::pair<int64_t, std::string> >& v) +{ + return client->pool_list(v); +} + +int64_t librados::Rados::pool_lookup(const char *name) +{ + return client->lookup_pool(name); +} + +int librados::Rados::pool_reverse_lookup(int64_t id, std::string *name) +{ + return client->pool_get_name(id, name, true); +} + +int librados::Rados::mon_command(string cmd, const bufferlist& inbl, + bufferlist *outbl, string *outs) +{ + vector<string> cmdvec; + cmdvec.push_back(cmd); + return client->mon_command(cmdvec, inbl, outbl, outs); +} + +int librados::Rados::osd_command(int osdid, std::string cmd, const bufferlist& inbl, + bufferlist *outbl, std::string *outs) +{ + vector<string> cmdvec; + cmdvec.push_back(cmd); + return client->osd_command(osdid, cmdvec, inbl, outbl, outs); +} + +int librados::Rados::mgr_command(std::string cmd, const bufferlist& inbl, + bufferlist *outbl, std::string *outs) +{ + vector<string> cmdvec; + cmdvec.push_back(cmd); + return client->mgr_command(cmdvec, inbl, outbl, outs); +} + + + +int librados::Rados::pg_command(const char *pgstr, std::string cmd, const bufferlist& inbl, + bufferlist *outbl, std::string *outs) +{ + vector<string> cmdvec; + cmdvec.push_back(cmd); + + pg_t pgid; + if (!pgid.parse(pgstr)) + return -EINVAL; + + return client->pg_command(pgid, cmdvec, inbl, outbl, outs); +} + +int librados::Rados::ioctx_create(const char *name, IoCtx &io) +{ + rados_ioctx_t p; + int ret = rados_ioctx_create((rados_t)client, name, &p); + if (ret) + return ret; + io.close(); + io.io_ctx_impl = (IoCtxImpl*)p; + return 0; +} + +int librados::Rados::ioctx_create2(int64_t pool_id, IoCtx &io) +{ + rados_ioctx_t p; + int ret = rados_ioctx_create2((rados_t)client, pool_id, &p); + if (ret) + return ret; + io.close(); + io.io_ctx_impl = (IoCtxImpl*)p; + return 0; +} + +void librados::Rados::test_blocklist_self(bool set) +{ + client->blocklist_self(set); +} + +int librados::Rados::get_pool_stats(std::list<string>& v, + stats_map& result) +{ + map<string,::pool_stat_t> rawresult; + bool per_pool = false; + int r = client->get_pool_stats(v, &rawresult, &per_pool); + for (map<string,::pool_stat_t>::iterator p = rawresult.begin(); + p != rawresult.end(); + ++p) { + pool_stat_t& pv = result[p->first]; + auto& pstat = p->second; + store_statfs_t &statfs = pstat.store_stats; + uint64_t allocated_bytes = pstat.get_allocated_data_bytes(per_pool) + + pstat.get_allocated_omap_bytes(per_pool); + // FIXME: raw_used_rate is unknown hence use 1.0 here + // meaning we keep net amount aggregated over all replicas + // Not a big deal so far since this field isn't exposed + uint64_t user_bytes = pstat.get_user_data_bytes(1.0, per_pool) + + pstat.get_user_omap_bytes(1.0, per_pool); + + object_stat_sum_t *sum = &p->second.stats.sum; + pv.num_kb = shift_round_up(allocated_bytes, 10); + pv.num_bytes = allocated_bytes; + pv.num_objects = sum->num_objects; + pv.num_object_clones = sum->num_object_clones; + pv.num_object_copies = sum->num_object_copies; + pv.num_objects_missing_on_primary = sum->num_objects_missing_on_primary; + pv.num_objects_unfound = sum->num_objects_unfound; + pv.num_objects_degraded = sum->num_objects_degraded; + pv.num_rd = sum->num_rd; + pv.num_rd_kb = sum->num_rd_kb; + pv.num_wr = sum->num_wr; + pv.num_wr_kb = sum->num_wr_kb; + pv.num_user_bytes = user_bytes; + pv.compressed_bytes_orig = statfs.data_compressed_original; + pv.compressed_bytes = statfs.data_compressed; + pv.compressed_bytes_alloc = statfs.data_compressed_allocated; + } + return r; +} + +int librados::Rados::get_pool_stats(std::list<string>& v, + std::map<string, stats_map>& result) +{ + stats_map m; + int r = get_pool_stats(v, m); + if (r < 0) + return r; + for (map<string,pool_stat_t>::iterator p = m.begin(); + p != m.end(); + ++p) { + result[p->first][string()] = p->second; + } + return r; +} + +int librados::Rados::get_pool_stats(std::list<string>& v, + string& category, // unused + std::map<string, stats_map>& result) +{ + return -EOPNOTSUPP; +} + +bool librados::Rados::get_pool_is_selfmanaged_snaps_mode(const std::string& pool) +{ + return client->get_pool_is_selfmanaged_snaps_mode(pool); +} + +int librados::Rados::cluster_stat(cluster_stat_t& result) +{ + ceph_statfs stats; + int r = client->get_fs_stats(stats); + result.kb = stats.kb; + result.kb_used = stats.kb_used; + result.kb_avail = stats.kb_avail; + result.num_objects = stats.num_objects; + return r; +} + +int librados::Rados::cluster_fsid(string *fsid) +{ + return client->get_fsid(fsid); +} + +namespace librados { + struct PlacementGroupImpl { + pg_t pgid; + }; + + PlacementGroup::PlacementGroup() + : impl{new PlacementGroupImpl} + {} + + PlacementGroup::PlacementGroup(const PlacementGroup& pg) + : impl{new PlacementGroupImpl} + { + impl->pgid = pg.impl->pgid; + } + + PlacementGroup::~PlacementGroup() + {} + + bool PlacementGroup::parse(const char* s) + { + return impl->pgid.parse(s); + } +} + +std::ostream& librados::operator<<(std::ostream& out, + const librados::PlacementGroup& pg) +{ + return out << pg.impl->pgid; +} + +int librados::Rados::get_inconsistent_pgs(int64_t pool_id, + std::vector<PlacementGroup>* pgs) +{ + std::vector<string> pgids; + if (auto ret = client->get_inconsistent_pgs(pool_id, &pgids); ret) { + return ret; + } + for (const auto& pgid : pgids) { + librados::PlacementGroup pg; + if (!pg.parse(pgid.c_str())) { + return -EINVAL; + } + pgs->emplace_back(pg); + } + return 0; +} + +int librados::Rados::get_inconsistent_objects(const PlacementGroup& pg, + const object_id_t &start_after, + unsigned max_return, + AioCompletion *c, + std::vector<inconsistent_obj_t>* objects, + uint32_t* interval) +{ + IoCtx ioctx; + const pg_t pgid = pg.impl->pgid; + int r = ioctx_create2(pgid.pool(), ioctx); + if (r < 0) { + return r; + } + + return ioctx.io_ctx_impl->get_inconsistent_objects(pgid, + start_after, + max_return, + c->pc, + objects, + interval); +} + +int librados::Rados::get_inconsistent_snapsets(const PlacementGroup& pg, + const object_id_t &start_after, + unsigned max_return, + AioCompletion *c, + std::vector<inconsistent_snapset_t>* snapsets, + uint32_t* interval) +{ + IoCtx ioctx; + const pg_t pgid = pg.impl->pgid; + int r = ioctx_create2(pgid.pool(), ioctx); + if (r < 0) { + return r; + } + + return ioctx.io_ctx_impl->get_inconsistent_snapsets(pgid, + start_after, + max_return, + c->pc, + snapsets, + interval); +} + +int librados::Rados::wait_for_latest_osdmap() +{ + return client->wait_for_latest_osdmap(); +} + +int librados::Rados::blocklist_add(const std::string& client_address, + uint32_t expire_seconds) +{ + return client->blocklist_add(client_address, expire_seconds); +} + +std::string librados::Rados::get_addrs() const { + return client->get_addrs(); +} + +librados::PoolAsyncCompletion *librados::Rados::pool_async_create_completion() +{ + PoolAsyncCompletionImpl *c = new PoolAsyncCompletionImpl; + return new PoolAsyncCompletion(c); +} + +librados::AioCompletion *librados::Rados::aio_create_completion() +{ + AioCompletionImpl *c = new AioCompletionImpl; + return new AioCompletion(c); +} + +librados::AioCompletion *librados::Rados::aio_create_completion(void *cb_arg, + callback_t cb_complete, + callback_t cb_safe) +{ + AioCompletionImpl *c; + int r = rados_aio_create_completion(cb_arg, cb_complete, cb_safe, (void**)&c); + ceph_assert(r == 0); + return new AioCompletion(c); +} + +librados::AioCompletion *librados::Rados::aio_create_completion(void *cb_arg, + callback_t cb_complete) +{ + AioCompletionImpl *c; + int r = rados_aio_create_completion2(cb_arg, cb_complete, (void**)&c); + ceph_assert(r == 0); + return new AioCompletion(c); +} + +librados::ObjectOperation::ObjectOperation() : impl(new ObjectOperationImpl) {} + +librados::ObjectOperation::ObjectOperation(ObjectOperation&& rhs) + : impl(rhs.impl) { + rhs.impl = nullptr; +} + +librados::ObjectOperation& +librados::ObjectOperation::operator =(ObjectOperation&& rhs) { + delete impl; + impl = rhs.impl; + rhs.impl = nullptr; + return *this; +} + +librados::ObjectOperation::~ObjectOperation() { + delete impl; +} + +///////////////////////////// ListObject ////////////////////////////// +librados::ListObject::ListObject() : impl(NULL) +{ +} + +librados::ListObject::ListObject(librados::ListObjectImpl *i): impl(i) +{ +} + +librados::ListObject::ListObject(const ListObject& rhs) +{ + if (rhs.impl == NULL) { + impl = NULL; + return; + } + impl = new ListObjectImpl(); + *impl = *(rhs.impl); +} + +librados::ListObject& librados::ListObject::operator=(const ListObject& rhs) +{ + if (rhs.impl == NULL) { + delete impl; + impl = NULL; + return *this; + } + if (impl == NULL) + impl = new ListObjectImpl(); + *impl = *(rhs.impl); + return *this; +} + +librados::ListObject::~ListObject() +{ + if (impl) + delete impl; + impl = NULL; +} + +const std::string& librados::ListObject::get_nspace() const +{ + return impl->get_nspace(); +} + +const std::string& librados::ListObject::get_oid() const +{ + return impl->get_oid(); +} + +const std::string& librados::ListObject::get_locator() const +{ + return impl->get_locator(); +} + +std::ostream& librados::operator<<(std::ostream& out, const librados::ListObject& lop) +{ + out << *(lop.impl); + return out; +} + +librados::ObjectCursor::ObjectCursor() +{ + c_cursor = (rados_object_list_cursor)new hobject_t(); +} + +librados::ObjectCursor::~ObjectCursor() +{ + hobject_t *h = (hobject_t *)c_cursor; + delete h; +} + +librados::ObjectCursor::ObjectCursor(rados_object_list_cursor c) +{ + if (!c) { + c_cursor = nullptr; + } else { + c_cursor = (rados_object_list_cursor)new hobject_t(*(hobject_t *)c); + } +} + +librados::ObjectCursor& librados::ObjectCursor::operator=(const librados::ObjectCursor& rhs) +{ + if (rhs.c_cursor != nullptr) { + hobject_t *h = (hobject_t*)rhs.c_cursor; + c_cursor = (rados_object_list_cursor)(new hobject_t(*h)); + } else { + c_cursor = nullptr; + } + return *this; +} + +bool librados::ObjectCursor::operator<(const librados::ObjectCursor &rhs) const +{ + const hobject_t lhs_hobj = (c_cursor == nullptr) ? hobject_t() : *((hobject_t*)c_cursor); + const hobject_t rhs_hobj = (rhs.c_cursor == nullptr) ? hobject_t() : *((hobject_t*)(rhs.c_cursor)); + return lhs_hobj < rhs_hobj; +} + +bool librados::ObjectCursor::operator==(const librados::ObjectCursor &rhs) const +{ + const hobject_t lhs_hobj = (c_cursor == nullptr) ? hobject_t() : *((hobject_t*)c_cursor); + const hobject_t rhs_hobj = (rhs.c_cursor == nullptr) ? hobject_t() : *((hobject_t*)(rhs.c_cursor)); + return cmp(lhs_hobj, rhs_hobj) == 0; +} +librados::ObjectCursor::ObjectCursor(const librados::ObjectCursor &rhs) +{ + *this = rhs; +} + +librados::ObjectCursor librados::IoCtx::object_list_begin() +{ + hobject_t *h = new hobject_t(io_ctx_impl->objecter->enumerate_objects_begin()); + ObjectCursor oc; + oc.set((rados_object_list_cursor)h); + return oc; +} + + +librados::ObjectCursor librados::IoCtx::object_list_end() +{ + hobject_t *h = new hobject_t(io_ctx_impl->objecter->enumerate_objects_end()); + librados::ObjectCursor oc; + oc.set((rados_object_list_cursor)h); + return oc; +} + + +void librados::ObjectCursor::set(rados_object_list_cursor c) +{ + delete (hobject_t*)c_cursor; + c_cursor = c; +} + +string librados::ObjectCursor::to_str() const +{ + stringstream ss; + ss << *(hobject_t *)c_cursor; + return ss.str(); +} + +bool librados::ObjectCursor::from_str(const string& s) +{ + if (s.empty()) { + *(hobject_t *)c_cursor = hobject_t(); + return true; + } + return ((hobject_t *)c_cursor)->parse(s); +} + +CEPH_RADOS_API std::ostream& librados::operator<<(std::ostream& os, const librados::ObjectCursor& oc) +{ + if (oc.c_cursor) { + os << *(hobject_t *)oc.c_cursor; + } else { + os << hobject_t(); + } + return os; +} + +bool librados::IoCtx::object_list_is_end(const ObjectCursor &oc) +{ + hobject_t *h = (hobject_t *)oc.c_cursor; + return h->is_max(); +} + +int librados::IoCtx::object_list(const ObjectCursor &start, + const ObjectCursor &finish, + const size_t result_item_count, + const bufferlist &filter, + std::vector<ObjectItem> *result, + ObjectCursor *next) +{ + ceph_assert(result != nullptr); + ceph_assert(next != nullptr); + result->clear(); + + ceph::async::waiter<boost::system::error_code, + std::vector<librados::ListObjectImpl>, + hobject_t> w; + io_ctx_impl->objecter->enumerate_objects<librados::ListObjectImpl>( + io_ctx_impl->poolid, + io_ctx_impl->oloc.nspace, + *((hobject_t*)start.c_cursor), + *((hobject_t*)finish.c_cursor), + result_item_count, + filter, + w); + + auto [ec, obj_result, next_hash] = w.wait(); + if (ec) { + next->set((rados_object_list_cursor)(new hobject_t(hobject_t::get_max()))); + return ceph::from_error_code(ec); + } + + next->set((rados_object_list_cursor)(new hobject_t(next_hash))); + + for (auto i = obj_result.begin(); + i != obj_result.end(); ++i) { + ObjectItem oi; + oi.oid = i->oid; + oi.nspace = i->nspace; + oi.locator = i->locator; + result->push_back(oi); + } + + return obj_result.size(); +} + +void librados::IoCtx::object_list_slice( + const ObjectCursor start, + const ObjectCursor finish, + const size_t n, + const size_t m, + ObjectCursor *split_start, + ObjectCursor *split_finish) +{ + ceph_assert(split_start != nullptr); + ceph_assert(split_finish != nullptr); + + io_ctx_impl->object_list_slice( + *((hobject_t*)(start.c_cursor)), + *((hobject_t*)(finish.c_cursor)), + n, + m, + (hobject_t*)(split_start->c_cursor), + (hobject_t*)(split_finish->c_cursor)); +} + +int librados::IoCtx::application_enable(const std::string& app_name, + bool force) +{ + return io_ctx_impl->application_enable(app_name, force); +} + +int librados::IoCtx::application_enable_async(const std::string& app_name, + bool force, + PoolAsyncCompletion *c) +{ + io_ctx_impl->application_enable_async(app_name, force, c->pc); + return 0; +} + +int librados::IoCtx::application_list(std::set<std::string> *app_names) +{ + return io_ctx_impl->application_list(app_names); +} + +int librados::IoCtx::application_metadata_get(const std::string& app_name, + const std::string &key, + std::string* value) +{ + return io_ctx_impl->application_metadata_get(app_name, key, value); +} + +int librados::IoCtx::application_metadata_set(const std::string& app_name, + const std::string &key, + const std::string& value) +{ + return io_ctx_impl->application_metadata_set(app_name, key, value); +} + +int librados::IoCtx::application_metadata_remove(const std::string& app_name, + const std::string &key) +{ + return io_ctx_impl->application_metadata_remove(app_name, key); +} + +int librados::IoCtx::application_metadata_list(const std::string& app_name, + std::map<std::string, std::string> *values) +{ + return io_ctx_impl->application_metadata_list(app_name, values); +} diff --git a/src/librados/librados_tp.cc b/src/librados/librados_tp.cc new file mode 100644 index 000000000..b696de871 --- /dev/null +++ b/src/librados/librados_tp.cc @@ -0,0 +1,9 @@ +#include "acconfig.h" + +#ifdef WITH_LTTNG +#define TRACEPOINT_DEFINE +#define TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#include "tracing/librados.h" +#undef TRACEPOINT_PROBE_DYNAMIC_LINKAGE +#undef TRACEPOINT_DEFINE +#endif diff --git a/src/librados/librados_util.cc b/src/librados/librados_util.cc new file mode 100644 index 000000000..72cd96947 --- /dev/null +++ b/src/librados/librados_util.cc @@ -0,0 +1,63 @@ +#include "librados_util.h" + +uint8_t get_checksum_op_type(rados_checksum_type_t type) { + switch (type) { + case LIBRADOS_CHECKSUM_TYPE_XXHASH32: + return CEPH_OSD_CHECKSUM_OP_TYPE_XXHASH32; + case LIBRADOS_CHECKSUM_TYPE_XXHASH64: + return CEPH_OSD_CHECKSUM_OP_TYPE_XXHASH64; + case LIBRADOS_CHECKSUM_TYPE_CRC32C: + return CEPH_OSD_CHECKSUM_OP_TYPE_CRC32C; + default: + return -1; + } +} + +int get_op_flags(int flags) +{ + int rados_flags = 0; + if (flags & LIBRADOS_OP_FLAG_EXCL) + rados_flags |= CEPH_OSD_OP_FLAG_EXCL; + if (flags & LIBRADOS_OP_FLAG_FAILOK) + rados_flags |= CEPH_OSD_OP_FLAG_FAILOK; + if (flags & LIBRADOS_OP_FLAG_FADVISE_RANDOM) + rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_RANDOM; + if (flags & LIBRADOS_OP_FLAG_FADVISE_SEQUENTIAL) + rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_SEQUENTIAL; + if (flags & LIBRADOS_OP_FLAG_FADVISE_WILLNEED) + rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_WILLNEED; + if (flags & LIBRADOS_OP_FLAG_FADVISE_DONTNEED) + rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_DONTNEED; + if (flags & LIBRADOS_OP_FLAG_FADVISE_NOCACHE) + rados_flags |= CEPH_OSD_OP_FLAG_FADVISE_NOCACHE; + return rados_flags; +} + +int translate_flags(int flags) +{ + int op_flags = 0; + if (flags & librados::OPERATION_BALANCE_READS) + op_flags |= CEPH_OSD_FLAG_BALANCE_READS; + if (flags & librados::OPERATION_LOCALIZE_READS) + op_flags |= CEPH_OSD_FLAG_LOCALIZE_READS; + if (flags & librados::OPERATION_ORDER_READS_WRITES) + op_flags |= CEPH_OSD_FLAG_RWORDERED; + if (flags & librados::OPERATION_IGNORE_CACHE) + op_flags |= CEPH_OSD_FLAG_IGNORE_CACHE; + if (flags & librados::OPERATION_SKIPRWLOCKS) + op_flags |= CEPH_OSD_FLAG_SKIPRWLOCKS; + if (flags & librados::OPERATION_IGNORE_OVERLAY) + op_flags |= CEPH_OSD_FLAG_IGNORE_OVERLAY; + if (flags & librados::OPERATION_FULL_TRY) + op_flags |= CEPH_OSD_FLAG_FULL_TRY; + if (flags & librados::OPERATION_FULL_FORCE) + op_flags |= CEPH_OSD_FLAG_FULL_FORCE; + if (flags & librados::OPERATION_IGNORE_REDIRECT) + op_flags |= CEPH_OSD_FLAG_IGNORE_REDIRECT; + if (flags & librados::OPERATION_ORDERSNAP) + op_flags |= CEPH_OSD_FLAG_ORDERSNAP; + if (flags & librados::OPERATION_RETURNVEC) + op_flags |= CEPH_OSD_FLAG_RETURNVEC; + + return op_flags; +} diff --git a/src/librados/librados_util.h b/src/librados/librados_util.h new file mode 100644 index 000000000..ab9c461f4 --- /dev/null +++ b/src/librados/librados_util.h @@ -0,0 +1,34 @@ +#include <cstdint> +#include "acconfig.h" +#include "include/rados/librados.h" +#include "IoCtxImpl.h" + +#ifdef WITH_LTTNG +#include "tracing/librados.h" +#else +#define tracepoint(...) +#endif + +uint8_t get_checksum_op_type(rados_checksum_type_t type); +int get_op_flags(int flags); +int translate_flags(int flags); + +struct librados::ObjListCtx { + librados::IoCtxImpl dupctx; + librados::IoCtxImpl *ctx; + Objecter::NListContext *nlc; + bool legacy_list_api; + + ObjListCtx(IoCtxImpl *c, Objecter::NListContext *nl, bool legacy=false) + : nlc(nl), + legacy_list_api(legacy) { + // Get our own private IoCtxImpl so that namespace setting isn't + // changed by caller between uses. + ctx = &dupctx; + dupctx.dup(*c); + } + ~ObjListCtx() { + ctx = NULL; + delete nlc; + } +}; diff --git a/src/librados/snap_set_diff.cc b/src/librados/snap_set_diff.cc new file mode 100644 index 000000000..b42ad9bcd --- /dev/null +++ b/src/librados/snap_set_diff.cc @@ -0,0 +1,116 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#include <vector> + +#include "snap_set_diff.h" +#include "common/ceph_context.h" +#include "include/rados/librados.hpp" +#include "include/interval_set.h" +#include "common/debug.h" + +#define dout_subsys ceph_subsys_rados + +/** + * calculate intervals/extents that vary between two snapshots + */ +void calc_snap_set_diff(CephContext *cct, const librados::snap_set_t& snap_set, + librados::snap_t start, librados::snap_t end, + interval_set<uint64_t> *diff, uint64_t *end_size, + bool *end_exists, librados::snap_t *clone_end_snap_id, + bool *whole_object) +{ + ldout(cct, 10) << "calc_snap_set_diff start " << start << " end " << end + << ", snap_set seq " << snap_set.seq << dendl; + bool saw_start = false; + uint64_t start_size = 0; + diff->clear(); + *end_size = 0; + *end_exists = false; + *clone_end_snap_id = 0; + *whole_object = false; + + for (vector<librados::clone_info_t>::const_iterator r = snap_set.clones.begin(); + r != snap_set.clones.end(); + ) { + // make an interval, and hide the fact that the HEAD doesn't + // include itself in the snaps list + librados::snap_t a, b; + if (r->cloneid == librados::SNAP_HEAD) { + // head is valid starting from right after the last seen seq + a = snap_set.seq + 1; + b = librados::SNAP_HEAD; + } else if (r->snaps.empty()) { + ldout(cct, 1) << "clone " << r->cloneid + << ": empty snaps, return whole object" << dendl; + diff->clear(); + *whole_object = true; + return; + } else { + a = r->snaps[0]; + // note: b might be < r->cloneid if a snap has been trimmed. + b = r->snaps[r->snaps.size()-1]; + } + ldout(cct, 20) << " clone " << r->cloneid << " snaps " << r->snaps + << " -> [" << a << "," << b << "]" + << " size " << r->size << " overlap to next " << r->overlap << dendl; + + if (b < start) { + // this is before start + ++r; + continue; + } + + if (!saw_start) { + if (start < a) { + ldout(cct, 20) << " start, after " << start << dendl; + // this means the object didn't exist at start + if (r->size) + diff->insert(0, r->size); + start_size = 0; + } else { + ldout(cct, 20) << " start" << dendl; + start_size = r->size; + } + saw_start = true; + } + + *end_size = r->size; + if (end < a) { + ldout(cct, 20) << " past end " << end << ", end object does not exist" << dendl; + *end_exists = false; + diff->clear(); + if (start_size) { + diff->insert(0, start_size); + } + break; + } + if (end <= b) { + ldout(cct, 20) << " end" << dendl; + *end_exists = true; + *clone_end_snap_id = b; + break; + } + + // start with the max(this size, next size), and subtract off any + // overlap + const vector<pair<uint64_t, uint64_t> > *overlap = &r->overlap; + interval_set<uint64_t> diff_to_next; + uint64_t max_size = r->size; + ++r; + if (r != snap_set.clones.end()) { + if (r->size > max_size) + max_size = r->size; + } + if (max_size) + diff_to_next.insert(0, max_size); + for (vector<pair<uint64_t, uint64_t> >::const_iterator p = overlap->begin(); + p != overlap->end(); + ++p) { + diff_to_next.erase(p->first, p->second); + } + ldout(cct, 20) << " diff_to_next " << diff_to_next << dendl; + diff->union_of(diff_to_next); + ldout(cct, 20) << " diff now " << *diff << dendl; + } +} diff --git a/src/librados/snap_set_diff.h b/src/librados/snap_set_diff.h new file mode 100644 index 000000000..33deeb3ae --- /dev/null +++ b/src/librados/snap_set_diff.h @@ -0,0 +1,18 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab + +#ifndef __CEPH_OSDC_SNAP_SET_DIFF_H +#define __CEPH_OSDC_SNAP_SET_DIFF_H + +#include "include/common_fwd.h" +#include "include/rados/rados_types.hpp" +#include "include/interval_set.h" + +void calc_snap_set_diff(CephContext *cct, + const librados::snap_set_t& snap_set, + librados::snap_t start, librados::snap_t end, + interval_set<uint64_t> *diff, uint64_t *end_size, + bool *end_exists, librados::snap_t *clone_end_snap_id, + bool *whole_object); + +#endif diff --git a/src/libradosstriper/CMakeLists.txt b/src/libradosstriper/CMakeLists.txt new file mode 100644 index 000000000..a69192465 --- /dev/null +++ b/src/libradosstriper/CMakeLists.txt @@ -0,0 +1,17 @@ +set(libradosstriper_srcs + libradosstriper.cc + RadosStriperImpl.cc + MultiAioCompletionImpl.cc) +add_library(radosstriper ${CEPH_SHARED} + ${libradosstriper_srcs}) +target_link_libraries(radosstriper + PRIVATE + librados + librados_impl cls_lock_client osdc ceph-common + pthread ${CRYPTO_LIBS} ${EXTRALIBS}) +set_target_properties(radosstriper PROPERTIES + OUPUT_NAME radosstriper + VERSION 1.0.0 + SOVERSION 1) + +install(TARGETS radosstriper DESTINATION ${CMAKE_INSTALL_LIBDIR}) diff --git a/src/libradosstriper/MultiAioCompletionImpl.cc b/src/libradosstriper/MultiAioCompletionImpl.cc new file mode 100644 index 000000000..acf9e0b6b --- /dev/null +++ b/src/libradosstriper/MultiAioCompletionImpl.cc @@ -0,0 +1,60 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include "common/dout.h" + +#include "libradosstriper/MultiAioCompletionImpl.h" + +void libradosstriper::MultiAioCompletionImpl::complete_request(ssize_t r) +{ + lock.lock(); + if (rval >= 0) { + if (r < 0 && r != -EEXIST) + rval = r; + else if (r > 0) + rval += r; + } + ceph_assert(pending_complete); + int count = --pending_complete; + if (!count && !building) { + complete(); + } + put_unlock(); +} + +void libradosstriper::MultiAioCompletionImpl::safe_request(ssize_t r) +{ + lock.lock(); + if (rval >= 0) { + if (r < 0 && r != -EEXIST) + rval = r; + } + ceph_assert(pending_safe); + int count = --pending_safe; + if (!count && !building) { + safe(); + } + put_unlock(); +} + +void libradosstriper::MultiAioCompletionImpl::finish_adding_requests() +{ + std::scoped_lock l{lock}; + ceph_assert(building); + building = false; + if (!pending_complete) + complete(); + if (!pending_safe) + safe(); +} diff --git a/src/libradosstriper/MultiAioCompletionImpl.h b/src/libradosstriper/MultiAioCompletionImpl.h new file mode 100644 index 000000000..3ac3aae44 --- /dev/null +++ b/src/libradosstriper/MultiAioCompletionImpl.h @@ -0,0 +1,169 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_LIBRADOSSTRIPERSTRIPER_MULTIAIOCOMPLETIONIMPL_H +#define CEPH_LIBRADOSSTRIPERSTRIPER_MULTIAIOCOMPLETIONIMPL_H + +#include <list> +#include <mutex> +#include "common/ceph_mutex.h" +#include "include/radosstriper/libradosstriper.hpp" + +namespace libradosstriper { + +struct MultiAioCompletionImpl { + + ceph::mutex lock = ceph::make_mutex("MultiAioCompletionImpl lock", false); + ceph::condition_variable cond; + int ref, rval; + int pending_complete, pending_safe; + rados_callback_t callback_complete, callback_safe; + void *callback_complete_arg, *callback_safe_arg; + bool building; ///< true if we are still building this completion + bufferlist bl; /// only used for read case in C api of rados striper + std::list<bufferlist*> bllist; /// keep temporary buffer lists used for destriping + + MultiAioCompletionImpl() + : ref(1), rval(0), + pending_complete(0), pending_safe(0), + callback_complete(0), callback_safe(0), + callback_complete_arg(0), callback_safe_arg(0), + building(true) {}; + + ~MultiAioCompletionImpl() { + // deallocate temporary buffer lists + for (std::list<bufferlist*>::iterator it = bllist.begin(); + it != bllist.end(); + it++) { + delete *it; + } + bllist.clear(); + } + + int set_complete_callback(void *cb_arg, rados_callback_t cb) { + std::scoped_lock l{lock}; + callback_complete = cb; + callback_complete_arg = cb_arg; + return 0; + } + int set_safe_callback(void *cb_arg, rados_callback_t cb) { + std::scoped_lock l{lock}; + callback_safe = cb; + callback_safe_arg = cb_arg; + return 0; + } + int wait_for_complete() { + std::unique_lock l{lock}; + cond.wait(l, [this] { return !pending_complete; }); + return 0; + } + int wait_for_safe() { + std::unique_lock l{lock}; + cond.wait(l, [this] { return !pending_safe; }); + return 0; + } + bool is_complete() { + std::scoped_lock l{lock}; + return pending_complete == 0; + } + bool is_safe() { + std::scoped_lock l{lock}; + return pending_safe == 0; + } + void wait_for_complete_and_cb() { + std::unique_lock l{lock}; + cond.wait(l, [this] { return !pending_complete && !callback_complete; }); + } + void wait_for_safe_and_cb() { + std::unique_lock l{lock}; + cond.wait(l, [this] { return !pending_safe && !callback_safe; }); + } + bool is_complete_and_cb() { + std::scoped_lock l{lock}; + return ((0 == pending_complete) && !callback_complete); + } + bool is_safe_and_cb() { + std::scoped_lock l{lock}; + return ((0 == pending_safe) && !callback_safe); + } + int get_return_value() { + std::scoped_lock l{lock}; + return rval; + } + void get() { + std::scoped_lock l{lock}; + _get(); + } + void _get() { + ceph_assert(ceph_mutex_is_locked(lock)); + ceph_assert(ref > 0); + ++ref; + } + void put() { + lock.lock(); + put_unlock(); + } + void put_unlock() { + ceph_assert(ref > 0); + int n = --ref; + lock.unlock(); + if (!n) + delete this; + } + void add_request() { + std::scoped_lock l{lock}; + pending_complete++; + _get(); + pending_safe++; + _get(); + } + void add_safe_request() { + std::scoped_lock l{lock}; + pending_complete++; + _get(); + } + void complete() { + ceph_assert(ceph_mutex_is_locked(lock)); + if (callback_complete) { + callback_complete(this, callback_complete_arg); + callback_complete = 0; + } + cond.notify_all(); + } + void safe() { + ceph_assert(ceph_mutex_is_locked(lock)); + if (callback_safe) { + callback_safe(this, callback_safe_arg); + callback_safe = 0; + } + cond.notify_all(); + }; + + void complete_request(ssize_t r); + void safe_request(ssize_t r); + void finish_adding_requests(); +}; + +inline void intrusive_ptr_add_ref(MultiAioCompletionImpl* ptr) +{ + ptr->get(); +} + +inline void intrusive_ptr_release(MultiAioCompletionImpl* ptr) +{ + ptr->put(); +} +} + +#endif // CEPH_LIBRADOSSTRIPERSTRIPER_MULTIAIOCOMPLETIONIMPL_H diff --git a/src/libradosstriper/RadosStriperImpl.cc b/src/libradosstriper/RadosStriperImpl.cc new file mode 100644 index 000000000..60fafd463 --- /dev/null +++ b/src/libradosstriper/RadosStriperImpl.cc @@ -0,0 +1,1606 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <boost/algorithm/string/replace.hpp> + +#include "libradosstriper/RadosStriperImpl.h" + +#include <errno.h> + +#include <sstream> +#include <iomanip> +#include <algorithm> + +#include "include/types.h" +#include "include/uuid.h" +#include "include/ceph_fs.h" +#include "common/dout.h" +#include "common/strtol.h" +#include "common/RefCountedObj.h" +#include "osdc/Striper.h" +#include "librados/AioCompletionImpl.h" +#include <cls/lock/cls_lock_client.h> + +/* + * This file contents the actual implementation of the rados striped objects interface. + * + * Striped objects are stored in rados in a set of regular rados objects, after their + * content has been striped using the osdc/Striper interface. + * + * The external attributes of the striped object are mapped to the attributes of the + * first underlying object. This first object has a set of extra external attributes + * storing the layout of the striped object for future read back. These attributes are : + * - striper.layout.object_size : the size of rados objects used. + * Must be a multiple of striper.layout.stripe_unit + * - striper.layout.stripe_unit : the size of a stripe unit + * - striper.layout.stripe_count : the number of stripes used + * - striper.size : total striped object size + * + * In general operations on striped objects are not atomic. + * However, a certain number of safety guards have been put to make the interface closer + * to atomicity : + * - each data operation takes a shared lock on the first rados object for the + * whole time of the operation + * - the remove and trunc operations take an exclusive lock on the first rados object + * for the whole time of the operation + * This makes sure that no removal/truncation of a striped object occurs while + * data operations are happening and vice versa. It thus makes sure that the layout + * of a striped object does not change during data operation, which is essential for + * data consistency. + * + * Still the writing to a striped object is not atomic. This means in particular that + * the size of an object may not be in sync with its content at all times. + * As the size is always guaranteed to be updated first and in an atomic way, and as + * sparse striped objects are supported (see below), what will typically happen is + * that a reader that comes too soon after a write will read 0s instead of the actual + * data. + * + * Note that remove handles the pieces of the striped object in reverse order, + * so that the head object is removed last, making the completion of the deletion atomic. + * + * Striped objects can be sparse, typically in case data was written at the end of the + * striped object only. In such a case, some rados objects constituing the striped object + * may be missing. Other can be partial (only the beginning will have data) + * When dealing with such sparse striped files, missing objects are detected and + * considered as full of 0s. They are however not created until real data is written + * to them. + * + * There are a number of missing features/improvements that could be implemented. + * Here are some ideas : + * - implementation of missing entry points (compared to rados) + * In particular : clone_range, sparse_read, exec, aio_flush_async, tmaps, omaps, ... + * + */ + +#define dout_subsys ceph_subsys_rados +#undef dout_prefix +#define dout_prefix *_dout << "libradosstriper: " + +/// size of xattr buffer +#define XATTR_BUFFER_SIZE 32 + +/// names of the different xattr entries +#define XATTR_LAYOUT_STRIPE_UNIT "striper.layout.stripe_unit" +#define XATTR_LAYOUT_STRIPE_COUNT "striper.layout.stripe_count" +#define XATTR_LAYOUT_OBJECT_SIZE "striper.layout.object_size" +#define XATTR_SIZE "striper.size" +#define LOCK_PREFIX "lock." + +/// name of the lock used on objects to ensure layout stability during IO +#define RADOS_LOCK_NAME "striper.lock" + +/// format of the extension of rados objects created for a given striped object +#define RADOS_OBJECT_EXTENSION_FORMAT ".%016llx" + +/// default object layout +struct ceph_file_layout default_file_layout = { + init_le32(1<<22), // fl_stripe_unit + init_le32(1), // fl_stripe_count + init_le32(1<<22), // fl_object_size + init_le32(0), // fl_cas_hash + init_le32(0), // fl_object_stripe_unit + init_le32(-1), // fl_unused + init_le32(-1), // fl_pg_pool +}; + +using libradosstriper::MultiAioCompletionImplPtr; + +namespace { + +///////////////////////// CompletionData ///////////////////////////// + +/** + * struct handling the data needed to pass to the call back + * function in asynchronous operations + */ +struct CompletionData : RefCountedObject { + /// complete method + void complete(int r); + /// striper to be used to handle the write completion + libradosstriper::RadosStriperImpl *m_striper; + /// striped object concerned by the write operation + std::string m_soid; + /// shared lock to be released at completion + std::string m_lockCookie; + /// completion handler + librados::IoCtxImpl::C_aio_Complete *m_ack; +protected: + CompletionData(libradosstriper::RadosStriperImpl * striper, + const std::string& soid, + const std::string& lockCookie, + librados::AioCompletionImpl *userCompletion = 0); + ~CompletionData() override; + +}; + +CompletionData::CompletionData +(libradosstriper::RadosStriperImpl* striper, + const std::string& soid, + const std::string& lockCookie, + librados::AioCompletionImpl *userCompletion) : + RefCountedObject(striper->cct()), + m_striper(striper), m_soid(soid), m_lockCookie(lockCookie), m_ack(0) { + m_striper->get(); + if (userCompletion) { + m_ack = new librados::IoCtxImpl::C_aio_Complete(userCompletion); + userCompletion->io = striper->m_ioCtxImpl; + } +} + +CompletionData::~CompletionData() { + if (m_ack) delete m_ack; + m_striper->put(); +} + +void CompletionData::complete(int r) { + if (m_ack) m_ack->finish(r); +} + +/** + * struct handling the data needed to pass to the call back + * function in asynchronous read operations + */ +struct ReadCompletionData : CompletionData { + /// bufferlist containing final result + bufferlist* m_bl; + /// extents that will be read + std::vector<ObjectExtent>* m_extents; + /// intermediate results + std::vector<bufferlist>* m_resultbl; + /// return code of read completion, to be remembered until unlocking happened + int m_readRc; + /// completion object for the unlocking of the striped object at the end of the read + librados::AioCompletion *m_unlockCompletion; + /// complete method for when reading is over + void complete_read(int r); + /// complete method for when object is unlocked + void complete_unlock(int r); + +private: + FRIEND_MAKE_REF(ReadCompletionData); + ReadCompletionData(libradosstriper::RadosStriperImpl * striper, + const std::string& soid, + const std::string& lockCookie, + librados::AioCompletionImpl *userCompletion, + bufferlist* bl, + std::vector<ObjectExtent>* extents, + std::vector<bufferlist>* resultbl); + ~ReadCompletionData() override; +}; + +ReadCompletionData::ReadCompletionData +(libradosstriper::RadosStriperImpl* striper, + const std::string& soid, + const std::string& lockCookie, + librados::AioCompletionImpl *userCompletion, + bufferlist* bl, + std::vector<ObjectExtent>* extents, + std::vector<bufferlist>* resultbl) : + CompletionData(striper, soid, lockCookie, userCompletion), + m_bl(bl), m_extents(extents), m_resultbl(resultbl), m_readRc(0), + m_unlockCompletion(0) {} + +ReadCompletionData::~ReadCompletionData() { + m_unlockCompletion->release(); + delete m_extents; + delete m_resultbl; +} + +void ReadCompletionData::complete_read(int r) { + // gather data into final buffer + Striper::StripedReadResult readResult; + vector<bufferlist>::iterator bit = m_resultbl->begin(); + for (vector<ObjectExtent>::iterator eit = m_extents->begin(); + eit != m_extents->end(); + ++eit, ++bit) { + readResult.add_partial_result(m_striper->cct(), *bit, eit->buffer_extents); + } + m_bl->clear(); + readResult.assemble_result(m_striper->cct(), *m_bl, true); + // Remember return code + m_readRc = r; +} + +void ReadCompletionData::complete_unlock(int r) { + // call parent's completion method + // Note that we ignore the return code of the unlock as we cannot do much about it + CompletionData::complete(m_readRc?m_readRc:m_bl->length()); +} + +/** + * struct handling the data needed to pass to the call back + * function in asynchronous write operations + */ +struct WriteCompletionData : CompletionData { + /// safe completion handler + librados::IoCtxImpl::C_aio_Complete *m_safe; + /// completion object for the unlocking of the striped object at the end of the write + librados::AioCompletion *m_unlockCompletion; + /// return code of write completion, to be remembered until unlocking happened + int m_writeRc; + /// complete method for when writing is over + void complete_write(int r); + /// complete method for when object is unlocked + void complete_unlock(int r); + /// safe method + void safe(int r); +private: + FRIEND_MAKE_REF(WriteCompletionData); + /// constructor + WriteCompletionData(libradosstriper::RadosStriperImpl * striper, + const std::string& soid, + const std::string& lockCookie, + librados::AioCompletionImpl *userCompletion); + /// destructor + ~WriteCompletionData() override; +}; + +WriteCompletionData::WriteCompletionData +(libradosstriper::RadosStriperImpl* striper, + const std::string& soid, + const std::string& lockCookie, + librados::AioCompletionImpl *userCompletion) : + CompletionData(striper, soid, lockCookie, userCompletion), + m_safe(0), m_unlockCompletion(0), m_writeRc(0) { + if (userCompletion) { + m_safe = new librados::IoCtxImpl::C_aio_Complete(userCompletion); + } +} + +WriteCompletionData::~WriteCompletionData() { + m_unlockCompletion->release(); + if (m_safe) delete m_safe; +} + +void WriteCompletionData::complete_unlock(int r) { + // call parent's completion method + // Note that we ignore the return code of the unlock as we cannot do much about it + CompletionData::complete(m_writeRc); +} + +void WriteCompletionData::complete_write(int r) { + // Remember return code + m_writeRc = r; +} + +void WriteCompletionData::safe(int r) { + if (m_safe) m_safe->finish(r); +} + +struct RemoveCompletionData : CompletionData { + /// removal flags + int flags; + +private: + FRIEND_MAKE_REF(RemoveCompletionData); + /** + * constructor + * note that the constructed object will take ownership of the lock + */ + RemoveCompletionData(libradosstriper::RadosStriperImpl * striper, + const std::string& soid, + const std::string& lockCookie, + librados::AioCompletionImpl *userCompletion, + int flags = 0) : + CompletionData(striper, soid, lockCookie, userCompletion), flags(flags) {} +}; + +/** + * struct handling the data needed to pass to the call back + * function in asynchronous truncate operations + */ +struct TruncateCompletionData : RefCountedObject { + /// striper to be used + libradosstriper::RadosStriperImpl *m_striper; + /// striped object concerned by the truncate operation + std::string m_soid; + /// the final size of the truncated object + uint64_t m_size; + +private: + FRIEND_MAKE_REF(TruncateCompletionData); + /// constructor + TruncateCompletionData(libradosstriper::RadosStriperImpl* striper, + const std::string& soid, + uint64_t size) : + RefCountedObject(striper->cct()), + m_striper(striper), m_soid(soid), m_size(size) { + m_striper->get(); + } + /// destructor + ~TruncateCompletionData() override { + m_striper->put(); + } +}; + +/** + * struct handling the data needed to pass to the call back + * function in asynchronous read operations of a Rados File + */ +struct RadosReadCompletionData : RefCountedObject { + /// the multi asynch io completion object to be used + MultiAioCompletionImplPtr m_multiAioCompl; + /// the expected number of bytes + uint64_t m_expectedBytes; + /// the bufferlist object where data have been written + bufferlist *m_bl; + +private: + FRIEND_MAKE_REF(RadosReadCompletionData); + /// constructor + RadosReadCompletionData(MultiAioCompletionImplPtr multiAioCompl, + uint64_t expectedBytes, + bufferlist *bl, + CephContext *context) : + RefCountedObject(context), + m_multiAioCompl(multiAioCompl), m_expectedBytes(expectedBytes), m_bl(bl) {} +}; + +/** + * struct handling (most of) the data needed to pass to the call back + * function in asynchronous stat operations. + * Inherited by the actual type for adding time information in different + * versions (time_t or struct timespec) + */ +struct BasicStatCompletionData : CompletionData { + // MultiAioCompletionImpl used to handle the double aysnc + // call in the back (stat + getxattr) + libradosstriper::MultiAioCompletionImpl *m_multiCompletion; + // where to store the size of first objct + // this will be ignored but we need a place to store it when + // async stat is called + uint64_t m_objectSize; + // where to store the file size + uint64_t *m_psize; + /// the bufferlist object used for the getxattr call + bufferlist m_bl; + /// return code of the stat + int m_statRC; + /// return code of the getxattr + int m_getxattrRC; + +protected: + /// constructor + BasicStatCompletionData(libradosstriper::RadosStriperImpl* striper, + const std::string& soid, + librados::AioCompletionImpl *userCompletion, + libradosstriper::MultiAioCompletionImpl *multiCompletion, + uint64_t *psize) : + CompletionData(striper, soid, "", userCompletion), + m_multiCompletion(multiCompletion), m_psize(psize), + m_statRC(0), m_getxattrRC(0) {}; + +}; + +/** + * struct handling the data needed to pass to the call back + * function in asynchronous stat operations. + * Simple templated extension of BasicStatCompletionData. + * The template parameter is the type of the time information + * (used with time_t for stat and struct timespec for stat2) + */ +template<class TimeType> +struct StatCompletionData : BasicStatCompletionData { + // where to store the file time + TimeType *m_pmtime; +private: + FRIEND_MAKE_REF(StatCompletionData); + /// constructor + StatCompletionData<TimeType>(libradosstriper::RadosStriperImpl* striper, + const std::string& soid, + librados::AioCompletionImpl *userCompletion, + libradosstriper::MultiAioCompletionImpl *multiCompletion, + uint64_t *psize, + TimeType *pmtime) : + BasicStatCompletionData(striper, soid, userCompletion, multiCompletion, psize), + m_pmtime(pmtime) {}; +}; + +/** + * struct handling the data needed to pass to the call back + * function in asynchronous remove operations of a Rados File + */ +struct RadosRemoveCompletionData : RefCountedObject { + /// the multi asynch io completion object to be used + MultiAioCompletionImplPtr m_multiAioCompl; +private: + FRIEND_MAKE_REF(RadosRemoveCompletionData); + /// constructor + RadosRemoveCompletionData(MultiAioCompletionImplPtr multiAioCompl, + CephContext *context) : + RefCountedObject(context), + m_multiAioCompl(multiAioCompl) {}; +}; + + +} // namespace { + +///////////////////////// constructor ///////////////////////////// + +libradosstriper::RadosStriperImpl::RadosStriperImpl(librados::IoCtx& ioctx, librados::IoCtxImpl *ioctx_impl) : + m_refCnt(0), m_radosCluster(ioctx), m_ioCtx(ioctx), m_ioCtxImpl(ioctx_impl), + m_layout(default_file_layout) {} + +///////////////////////// layout ///////////////////////////// + +int libradosstriper::RadosStriperImpl::setObjectLayoutStripeUnit +(unsigned int stripe_unit) +{ + /* stripe unit must be non-zero, 64k increment */ + if (!stripe_unit || (stripe_unit & (CEPH_MIN_STRIPE_UNIT-1))) + return -EINVAL; + m_layout.fl_stripe_unit = stripe_unit; + return 0; +} + +int libradosstriper::RadosStriperImpl::setObjectLayoutStripeCount +(unsigned int stripe_count) +{ + /* stripe count must be non-zero */ + if (!stripe_count) + return -EINVAL; + m_layout.fl_stripe_count = stripe_count; + return 0; +} + +int libradosstriper::RadosStriperImpl::setObjectLayoutObjectSize +(unsigned int object_size) +{ + /* object size must be non-zero, 64k increment */ + if (!object_size || (object_size & (CEPH_MIN_STRIPE_UNIT-1))) + return -EINVAL; + /* object size must be a multiple of stripe unit */ + if (object_size < m_layout.fl_stripe_unit || + object_size % m_layout.fl_stripe_unit) + return -EINVAL; + m_layout.fl_object_size = object_size; + return 0; +} + +///////////////////////// xattrs ///////////////////////////// + +int libradosstriper::RadosStriperImpl::getxattr(const object_t& soid, + const char *name, + bufferlist& bl) +{ + std::string firstObjOid = getObjectId(soid, 0); + return m_ioCtx.getxattr(firstObjOid, name, bl); +} + +int libradosstriper::RadosStriperImpl::setxattr(const object_t& soid, + const char *name, + bufferlist& bl) +{ + std::string firstObjOid = getObjectId(soid, 0); + return m_ioCtx.setxattr(firstObjOid, name, bl); +} + +int libradosstriper::RadosStriperImpl::getxattrs(const object_t& soid, + map<string, bufferlist>& attrset) +{ + std::string firstObjOid = getObjectId(soid, 0); + int rc = m_ioCtx.getxattrs(firstObjOid, attrset); + if (rc) return rc; + // cleanup internal attributes dedicated to striping and locking + attrset.erase(XATTR_LAYOUT_STRIPE_UNIT); + attrset.erase(XATTR_LAYOUT_STRIPE_COUNT); + attrset.erase(XATTR_LAYOUT_OBJECT_SIZE); + attrset.erase(XATTR_SIZE); + attrset.erase(std::string(LOCK_PREFIX) + RADOS_LOCK_NAME); + return rc; +} + +int libradosstriper::RadosStriperImpl::rmxattr(const object_t& soid, + const char *name) +{ + std::string firstObjOid = getObjectId(soid, 0); + return m_ioCtx.rmxattr(firstObjOid, name); +} + +///////////////////////// io ///////////////////////////// + +int libradosstriper::RadosStriperImpl::write(const std::string& soid, + const bufferlist& bl, + size_t len, + uint64_t off) +{ + // open the object. This will create it if needed, retrieve its layout + // and size and take a shared lock on it + ceph_file_layout layout; + std::string lockCookie; + int rc = createAndOpenStripedObject(soid, &layout, len+off, &lockCookie, true); + if (rc) return rc; + return write_in_open_object(soid, layout, lockCookie, bl, len, off); +} + +int libradosstriper::RadosStriperImpl::append(const std::string& soid, + const bufferlist& bl, + size_t len) +{ + // open the object. This will create it if needed, retrieve its layout + // and size and take a shared lock on it + ceph_file_layout layout; + uint64_t size = len; + std::string lockCookie; + int rc = openStripedObjectForWrite(soid, &layout, &size, &lockCookie, false); + if (rc) return rc; + return write_in_open_object(soid, layout, lockCookie, bl, len, size); +} + +int libradosstriper::RadosStriperImpl::write_full(const std::string& soid, + const bufferlist& bl) +{ + int rc = trunc(soid, 0); + if (rc && rc != -ENOENT) return rc; // ENOENT is obviously ok + return write(soid, bl, bl.length(), 0); +} + +int libradosstriper::RadosStriperImpl::read(const std::string& soid, + bufferlist* bl, + size_t len, + uint64_t off) +{ + // create a completion object + librados::AioCompletionImpl c; + // call asynchronous method + int rc = aio_read(soid, &c, bl, len, off); + // and wait for completion + if (!rc) { + // wait for completion + c.wait_for_complete_and_cb(); + // return result + rc = c.get_return_value(); + } + return rc; +} + +///////////////////////// asynchronous io ///////////////////////////// + +int libradosstriper::RadosStriperImpl::aio_write(const std::string& soid, + librados::AioCompletionImpl *c, + const bufferlist& bl, + size_t len, + uint64_t off) +{ + ceph_file_layout layout; + std::string lockCookie; + int rc = createAndOpenStripedObject(soid, &layout, len+off, &lockCookie, true); + if (rc) return rc; + return aio_write_in_open_object(soid, c, layout, lockCookie, bl, len, off); +} + +int libradosstriper::RadosStriperImpl::aio_append(const std::string& soid, + librados::AioCompletionImpl *c, + const bufferlist& bl, + size_t len) +{ + ceph_file_layout layout; + uint64_t size = len; + std::string lockCookie; + int rc = openStripedObjectForWrite(soid, &layout, &size, &lockCookie, false); + if (rc) return rc; + // create a completion object + return aio_write_in_open_object(soid, c, layout, lockCookie, bl, len, size); +} + +int libradosstriper::RadosStriperImpl::aio_write_full(const std::string& soid, + librados::AioCompletionImpl *c, + const bufferlist& bl) +{ + int rc = trunc(soid, 0); + if (rc) return rc; + return aio_write(soid, c, bl, bl.length(), 0); +} + +static void rados_read_aio_unlock_complete(rados_striper_multi_completion_t c, void *arg) +{ + auto cdata = ceph::ref_t<ReadCompletionData>(static_cast<ReadCompletionData*>(arg), false); + libradosstriper::MultiAioCompletionImpl *comp = + reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c); + cdata->complete_unlock(comp->rval); +} + +static void striper_read_aio_req_complete(rados_striper_multi_completion_t c, void *arg) +{ + auto cdata = static_cast<ReadCompletionData*>(arg); + // launch the async unlocking of the object + cdata->m_striper->aio_unlockObject(cdata->m_soid, cdata->m_lockCookie, cdata->m_unlockCompletion); + // complete the read part in parallel + libradosstriper::MultiAioCompletionImpl *comp = + reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c); + cdata->complete_read(comp->rval); +} + +static void rados_req_read_complete(rados_completion_t c, void *arg) +{ + auto data = static_cast<RadosReadCompletionData*>(arg); + int rc = rados_aio_get_return_value(c); + // We need to handle the case of sparse files here + if (rc == -ENOENT) { + // the object did not exist at all. This can happen for sparse files. + // we consider we've read 0 bytes and it will fall into next case + rc = 0; + } + ssize_t nread = rc; + if (rc >= 0 && (((uint64_t)rc) < data->m_expectedBytes)) { + // only partial data were present in the object (or the object did not + // even exist if we've gone through previous case). + // This is typical of sparse file and we need to complete with 0s. + unsigned int lenOfZeros = data->m_expectedBytes-rc; + unsigned int existingDataToZero = min(data->m_bl->length()-rc, lenOfZeros); + if (existingDataToZero > 0) { + data->m_bl->zero(rc, existingDataToZero); + } + if (lenOfZeros > existingDataToZero) { + ceph::bufferptr zeros(ceph::buffer::create(lenOfZeros-existingDataToZero)); + zeros.zero(); + data->m_bl->push_back(zeros); + } + nread = data->m_expectedBytes; + } + auto multi_aio_comp = data->m_multiAioCompl; + multi_aio_comp->complete_request(nread); + multi_aio_comp->safe_request(rc); +} + +int libradosstriper::RadosStriperImpl::aio_read(const std::string& soid, + librados::AioCompletionImpl *c, + bufferlist* bl, + size_t len, + uint64_t off) +{ + // open the object. This will retrieve its layout and size + // and take a shared lock on it + ceph_file_layout layout; + uint64_t size; + std::string lockCookie; + int rc = openStripedObjectForRead(soid, &layout, &size, &lockCookie); + if (rc) return rc; + // find out the actual number of bytes we can read + uint64_t read_len; + if (off >= size) { + // nothing to read ! We are done. + read_len = 0; + } else { + read_len = min(len, (size_t)(size-off)); + } + // get list of extents to be read from + vector<ObjectExtent> *extents = new vector<ObjectExtent>(); + if (read_len > 0) { + std::string format = soid; + boost::replace_all(format, "%", "%%"); + format += RADOS_OBJECT_EXTENSION_FORMAT; + file_layout_t l; + l.from_legacy(layout); + Striper::file_to_extents(cct(), format.c_str(), &l, off, read_len, + 0, *extents); + } + + // create a completion object and transfer ownership of extents and resultbl + vector<bufferlist> *resultbl = new vector<bufferlist>(extents->size()); + auto cdata = ceph::make_ref<ReadCompletionData>(this, soid, lockCookie, c, bl, extents, resultbl); + c->is_read = true; + c->io = m_ioCtxImpl; + // create a completion for the unlocking of the striped object at the end of the read + librados::AioCompletion *unlock_completion = + librados::Rados::aio_create_completion(cdata->get() /* create ref! */, rados_read_aio_unlock_complete); + cdata->m_unlockCompletion = unlock_completion; + // create the multiCompletion object handling the reads + MultiAioCompletionImplPtr nc{new libradosstriper::MultiAioCompletionImpl, + false}; + nc->set_complete_callback(cdata.get(), striper_read_aio_req_complete); + // go through the extents + int r = 0, i = 0; + for (vector<ObjectExtent>::iterator p = extents->begin(); p != extents->end(); ++p) { + // create a buffer list describing where to place data read from current extend + bufferlist *oid_bl = &((*resultbl)[i++]); + for (vector<pair<uint64_t,uint64_t> >::iterator q = p->buffer_extents.begin(); + q != p->buffer_extents.end(); + ++q) { + bufferlist buffer_bl; + buffer_bl.substr_of(*bl, q->first, q->second); + oid_bl->append(buffer_bl); + } + // read all extends of a given object in one go + nc->add_request(); + // we need 2 references on data as both rados_req_read_safe and rados_req_read_complete + // will release one + auto data = ceph::make_ref<RadosReadCompletionData>(nc, p->length, oid_bl, cct()); + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(data.detach(), rados_req_read_complete); + r = m_ioCtx.aio_read(p->oid.name, rados_completion, oid_bl, p->length, p->offset); + rados_completion->release(); + if (r < 0) + break; + } + nc->finish_adding_requests(); + return r; +} + +int libradosstriper::RadosStriperImpl::aio_read(const std::string& soid, + librados::AioCompletionImpl *c, + char* buf, + size_t len, + uint64_t off) +{ + // create a buffer list and store it inside the completion object + c->bl.clear(); + c->bl.push_back(buffer::create_static(len, buf)); + // call the bufferlist version of this method + return aio_read(soid, c, &c->bl, len, off); +} + +int libradosstriper::RadosStriperImpl::aio_flush() +{ + int ret; + // pass to the rados level + ret = m_ioCtx.aio_flush(); + if (ret < 0) + return ret; + //wait all CompletionData are released + std::unique_lock l{lock}; + cond.wait(l, [this] {return m_refCnt <= 1;}); + return ret; +} + +///////////////////////// stat and deletion ///////////////////////////// + +int libradosstriper::RadosStriperImpl::stat(const std::string& soid, uint64_t *psize, time_t *pmtime) +{ + // create a completion object + librados::AioCompletionImpl c; + // call asynchronous version of stat + int rc = aio_stat(soid, &c, psize, pmtime); + if (rc == 0) { + // wait for completion of the remove + c.wait_for_complete(); + // get result + rc = c.get_return_value(); + } + return rc; +} + +static void striper_stat_aio_stat_complete(rados_completion_t c, void *arg) { + auto data = ceph::ref_t<BasicStatCompletionData>(static_cast<BasicStatCompletionData*>(arg), false); + int rc = rados_aio_get_return_value(c); + if (rc == -ENOENT) { + // remember this has failed + data->m_statRC = rc; + } + data->m_multiCompletion->complete_request(rc); +} + +static void striper_stat_aio_getxattr_complete(rados_completion_t c, void *arg) { + auto data = ceph::ref_t<BasicStatCompletionData>(static_cast<BasicStatCompletionData*>(arg), false); + int rc = rados_aio_get_return_value(c); + // We need to handle the case of sparse files here + if (rc < 0) { + // remember this has failed + data->m_getxattrRC = rc; + } else { + // this intermediate string allows to add a null terminator before calling strtol + std::string err; + std::string strsize(data->m_bl.c_str(), data->m_bl.length()); + *data->m_psize = strict_strtoll(strsize.c_str(), 10, &err); + if (!err.empty()) { + lderr(data->m_striper->cct()) << XATTR_SIZE << " : " << err << dendl; + data->m_getxattrRC = -EINVAL; + } + rc = 0; + } + data->m_multiCompletion->complete_request(rc); +} + +static void striper_stat_aio_req_complete(rados_striper_multi_completion_t c, + void *arg) { + auto data = ceph::ref_t<BasicStatCompletionData>(static_cast<BasicStatCompletionData*>(arg), false); + if (data->m_statRC) { + data->complete(data->m_statRC); + } else { + if (data->m_getxattrRC < 0) { + data->complete(data->m_getxattrRC); + } else { + data->complete(0); + } + } +} + +template<class TimeType> +int libradosstriper::RadosStriperImpl::aio_generic_stat +(const std::string& soid, + librados::AioCompletionImpl *c, + uint64_t *psize, + TimeType *pmtime, + typename libradosstriper::RadosStriperImpl::StatFunction<TimeType>::Type statFunction) +{ + // use a MultiAioCompletion object for dealing with the fact + // that we'll do 2 asynchronous calls in parallel + MultiAioCompletionImplPtr multi_completion{ + new libradosstriper::MultiAioCompletionImpl, false}; + // Data object used for passing context to asynchronous calls + std::string firstObjOid = getObjectId(soid, 0); + auto cdata = ceph::make_ref<StatCompletionData<TimeType>>(this, firstObjOid, c, multi_completion.get(), psize, pmtime); + multi_completion->set_complete_callback(cdata->get() /* create ref! */, striper_stat_aio_req_complete); + // use a regular AioCompletion for the stat async call + librados::AioCompletion *stat_completion = + librados::Rados::aio_create_completion(cdata->get() /* create ref! */, striper_stat_aio_stat_complete); + multi_completion->add_safe_request(); + object_t obj(firstObjOid); + int rc = (m_ioCtxImpl->*statFunction)(obj, stat_completion->pc, + &cdata->m_objectSize, cdata->m_pmtime); + stat_completion->release(); + if (rc < 0) { + // nothing is really started so cancel everything + delete cdata.detach(); + return rc; + } + // use a regular AioCompletion for the getxattr async call + librados::AioCompletion *getxattr_completion = + librados::Rados::aio_create_completion(cdata->get() /* create ref! */, striper_stat_aio_getxattr_complete); + multi_completion->add_safe_request(); + // in parallel, get the pmsize from the first object asynchronously + rc = m_ioCtxImpl->aio_getxattr(obj, getxattr_completion->pc, + XATTR_SIZE, cdata->m_bl); + getxattr_completion->release(); + multi_completion->finish_adding_requests(); + if (rc < 0) { + // the async stat is ongoing, so we need to go on + // we mark the getxattr as failed in the data object + cdata->m_getxattrRC = rc; + multi_completion->complete_request(rc); + return rc; + } + return 0; +} + +int libradosstriper::RadosStriperImpl::aio_stat(const std::string& soid, + librados::AioCompletionImpl *c, + uint64_t *psize, + time_t *pmtime) +{ + return aio_generic_stat<time_t>(soid, c, psize, pmtime, &librados::IoCtxImpl::aio_stat); +} + +int libradosstriper::RadosStriperImpl::stat2(const std::string& soid, uint64_t *psize, struct timespec *pts) +{ + // create a completion object + librados::AioCompletionImpl c; + // call asynchronous version of stat + int rc = aio_stat2(soid, &c, psize, pts); + if (rc == 0) { + // wait for completion of the remove + c.wait_for_complete_and_cb(); + // get result + rc = c.get_return_value(); + } + return rc; +} + +int libradosstriper::RadosStriperImpl::aio_stat2(const std::string& soid, + librados::AioCompletionImpl *c, + uint64_t *psize, + struct timespec *pts) +{ + return aio_generic_stat<struct timespec>(soid, c, psize, pts, &librados::IoCtxImpl::aio_stat2); +} + +static void rados_req_remove_complete(rados_completion_t c, void *arg) +{ + auto cdata = static_cast<RadosRemoveCompletionData*>(arg); + int rc = rados_aio_get_return_value(c); + // in case the object did not exist, it means we had a sparse file, all is fine + if (rc == -ENOENT) { + rc = 0; + } + cdata->m_multiAioCompl->complete_request(rc); + cdata->m_multiAioCompl->safe_request(rc); +} + +static void striper_remove_aio_req_complete(rados_striper_multi_completion_t c, void *arg) +{ + auto cdata = ceph::ref_t<RemoveCompletionData>(static_cast<RemoveCompletionData*>(arg), false); + libradosstriper::MultiAioCompletionImpl *comp = + reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c); + ldout(cdata->m_striper->cct(), 10) + << "RadosStriperImpl : striper_remove_aio_req_complete called for " + << cdata->m_soid << dendl; + int rc = comp->rval; + if (rc == 0) { + // All went fine, synchronously remove first object + rc = cdata->m_striper->m_ioCtx.remove(cdata->m_striper->getObjectId(cdata->m_soid, 0), + cdata->flags); + } else { + lderr(cdata->m_striper->cct()) + << "RadosStriperImpl : deletion/truncation incomplete for " << cdata->m_soid + << ", as errors were encountered. The file is left present but it's content " + << " has been partially removed" + << dendl; + } + cdata->complete(rc); +} + +int libradosstriper::RadosStriperImpl::remove(const std::string& soid, int flags) +{ + // create a completion object + librados::AioCompletionImpl c; + // call asynchronous version of remove + int rc = aio_remove(soid, &c, flags); + if (rc == 0) { + // wait for completion of the remove + c.wait_for_complete_and_cb(); + // get result + rc = c.get_return_value(); + } + return rc; +} + +int libradosstriper::RadosStriperImpl::aio_remove(const std::string& soid, + librados::AioCompletionImpl *c, + int flags) +{ + // the RemoveCompletionData object will lock the given soid for the duration + // of the removal + std::string lockCookie = getUUID(); + int rc = m_ioCtx.lock_exclusive(getObjectId(soid, 0), RADOS_LOCK_NAME, lockCookie, "", 0, 0); + if (rc) return rc; + // create CompletionData for the async remove call + auto cdata = ceph::make_ref<RemoveCompletionData>(this, soid, lockCookie, c, flags); + MultiAioCompletionImplPtr multi_completion{ + new libradosstriper::MultiAioCompletionImpl, false}; + multi_completion->set_complete_callback(cdata->get() /* create ref! */, striper_remove_aio_req_complete); + // call asynchronous internal version of remove + ldout(cct(), 10) + << "RadosStriperImpl : Aio_remove starting for " + << soid << dendl; + rc = internal_aio_remove(soid, multi_completion); + return rc; +} + +int libradosstriper::RadosStriperImpl::internal_aio_remove( + const std::string& soid, + MultiAioCompletionImplPtr multi_completion, + int flags) +{ + std::string firstObjOid = getObjectId(soid, 0); + try { + // check size and get number of rados objects to delete + uint64_t nb_objects = 0; + bufferlist bl2; + int rc = getxattr(soid, XATTR_SIZE, bl2); + if (rc < 0) { + // no object size (or not able to get it) + // try to find the number of object "by hand" + uint64_t psize; + time_t pmtime; + while (!m_ioCtx.stat(getObjectId(soid, nb_objects), &psize, &pmtime)) { + nb_objects++; + } + } else { + // count total number of rados objects in the striped object + std::string err; + // this intermediate string allows to add a null terminator before calling strtol + std::string strsize(bl2.c_str(), bl2.length()); + uint64_t size = strict_strtoll(strsize.c_str(), 10, &err); + if (!err.empty()) { + lderr(cct()) << XATTR_SIZE << " : " << err << dendl; + + return -EINVAL; + } + uint64_t object_size = m_layout.fl_object_size; + uint64_t su = m_layout.fl_stripe_unit; + uint64_t stripe_count = m_layout.fl_stripe_count; + uint64_t nb_complete_sets = size / (object_size*stripe_count); + uint64_t remaining_data = size % (object_size*stripe_count); + uint64_t remaining_stripe_units = (remaining_data + su -1) / su; + uint64_t remaining_objects = std::min(remaining_stripe_units, stripe_count); + nb_objects = nb_complete_sets * stripe_count + remaining_objects; + } + // delete rados objects in reverse order + // Note that we do not drop the first object. This one will only be dropped + // if all other removals have been successful, and this is done in the + // callback of the multi_completion object + int rcr = 0; + for (int i = nb_objects-1; i >= 1; i--) { + multi_completion->add_request(); + auto data = ceph::make_ref<RadosRemoveCompletionData>(multi_completion, cct()); + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(data->get() /* create ref! */, + rados_req_remove_complete); + if (flags == 0) { + rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion); + } else { + rcr = m_ioCtx.aio_remove(getObjectId(soid, i), rados_completion, flags); + } + rados_completion->release(); + if (rcr < 0 and -ENOENT != rcr) { + lderr(cct()) << "RadosStriperImpl::remove : deletion incomplete for " << soid + << ", as " << getObjectId(soid, i) << " could not be deleted (rc=" << rc << ")" + << dendl; + break; + } + } + // we are over adding requests to the multi_completion object + multi_completion->finish_adding_requests(); + // return + return rcr; + } catch (ErrorCode &e) { + // error caught when trying to take the exclusive lock + return e.m_code; + } + +} + +int libradosstriper::RadosStriperImpl::trunc(const std::string& soid, uint64_t size) +{ + // lock the object in exclusive mode + std::string firstObjOid = getObjectId(soid, 0); + librados::ObjectWriteOperation op; + op.assert_exists(); + std::string lockCookie = RadosStriperImpl::getUUID(); + utime_t dur = utime_t(); + rados::cls::lock::lock(&op, RADOS_LOCK_NAME, ClsLockType::EXCLUSIVE, lockCookie, "", "", dur, 0); + int rc = m_ioCtx.operate(firstObjOid, &op); + if (rc) return rc; + // load layout and size + ceph_file_layout layout; + uint64_t original_size; + rc = internal_get_layout_and_size(firstObjOid, &layout, &original_size); + if (!rc) { + if (size < original_size) { + rc = truncate(soid, original_size, size, layout); + } else if (size > original_size) { + rc = grow(soid, original_size, size, layout); + } + } + // unlock object, ignore return code as we cannot do much + m_ioCtx.unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie); + // final return + return rc; +} + + +///////////////////////// private helpers ///////////////////////////// + +std::string libradosstriper::RadosStriperImpl::getObjectId(const object_t& soid, + long long unsigned objectno) +{ + std::ostringstream s; + s << soid << '.' << std::setfill ('0') << std::setw(16) << std::hex << objectno; + return s.str(); +} + +void libradosstriper::RadosStriperImpl::unlockObject(const std::string& soid, + const std::string& lockCookie) +{ + // unlock the shared lock on the first rados object + std::string firstObjOid = getObjectId(soid, 0); + m_ioCtx.unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie); +} + +void libradosstriper::RadosStriperImpl::aio_unlockObject(const std::string& soid, + const std::string& lockCookie, + librados::AioCompletion *c) +{ + // unlock the shared lock on the first rados object + std::string firstObjOid = getObjectId(soid, 0); + m_ioCtx.aio_unlock(firstObjOid, RADOS_LOCK_NAME, lockCookie, c); +} + +static void rados_write_aio_unlock_complete(rados_striper_multi_completion_t c, void *arg) +{ + auto cdata = ceph::ref_t<WriteCompletionData>(static_cast<WriteCompletionData*>(arg), false); + libradosstriper::MultiAioCompletionImpl *comp = + reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c); + cdata->complete_unlock(comp->rval); +} + +static void striper_write_aio_req_complete(rados_striper_multi_completion_t c, void *arg) +{ + auto cdata = ceph::ref_t<WriteCompletionData>(static_cast<WriteCompletionData*>(arg), false); + // launch the async unlocking of the object + cdata->m_striper->aio_unlockObject(cdata->m_soid, cdata->m_lockCookie, cdata->m_unlockCompletion); + // complete the write part in parallel + libradosstriper::MultiAioCompletionImpl *comp = + reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c); + cdata->complete_write(comp->rval); +} + +static void striper_write_aio_req_safe(rados_striper_multi_completion_t c, void *arg) +{ + auto cdata = ceph::ref_t<WriteCompletionData>(static_cast<WriteCompletionData*>(arg), false); + libradosstriper::MultiAioCompletionImpl *comp = + reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c); + cdata->safe(comp->rval); +} + +int libradosstriper::RadosStriperImpl::write_in_open_object(const std::string& soid, + const ceph_file_layout& layout, + const std::string& lockCookie, + const bufferlist& bl, + size_t len, + uint64_t off) { + // create a completion object to be passed to the callbacks of the multicompletion + // we need 3 references as striper_write_aio_req_complete will release two and + // striper_write_aio_req_safe will release one + auto cdata = ceph::make_ref<WriteCompletionData>(this, soid, lockCookie, nullptr); + // create a completion object for the unlocking of the striped object at the end of the write + librados::AioCompletion *unlock_completion = + librados::Rados::aio_create_completion(cdata->get() /* create ref! */, rados_write_aio_unlock_complete); + cdata->m_unlockCompletion = unlock_completion; + // create the multicompletion that will handle the write completion + MultiAioCompletionImplPtr c{new libradosstriper::MultiAioCompletionImpl, + false}; + c->set_complete_callback(cdata->get() /* create ref! */, striper_write_aio_req_complete); + c->set_safe_callback(cdata->get() /* create ref! */, striper_write_aio_req_safe); + // call the asynchronous API + int rc = internal_aio_write(soid, c, bl, len, off, layout); + if (!rc) { + // wait for completion and safety of data + c->wait_for_complete_and_cb(); + c->wait_for_safe_and_cb(); + // wait for the unlocking + unlock_completion->wait_for_complete(); + // return result + rc = c->get_return_value(); + } + return rc; +} + +int libradosstriper::RadosStriperImpl::aio_write_in_open_object(const std::string& soid, + librados::AioCompletionImpl *c, + const ceph_file_layout& layout, + const std::string& lockCookie, + const bufferlist& bl, + size_t len, + uint64_t off) { + // create a completion object to be passed to the callbacks of the multicompletion + // we need 3 references as striper_write_aio_req_complete will release two and + // striper_write_aio_req_safe will release one + auto cdata = ceph::make_ref<WriteCompletionData>(this, soid, lockCookie, c); + m_ioCtxImpl->get(); + c->io = m_ioCtxImpl; + // create a completion object for the unlocking of the striped object at the end of the write + librados::AioCompletion *unlock_completion = + librados::Rados::aio_create_completion(cdata->get() /* create ref! */, rados_write_aio_unlock_complete); + cdata->m_unlockCompletion = unlock_completion; + // create the multicompletion that will handle the write completion + libradosstriper::MultiAioCompletionImplPtr nc{ + new libradosstriper::MultiAioCompletionImpl, false}; + nc->set_complete_callback(cdata->get() /* create ref! */, striper_write_aio_req_complete); + nc->set_safe_callback(cdata->get() /* create ref! */, striper_write_aio_req_safe); + // internal asynchronous API + int rc = internal_aio_write(soid, nc, bl, len, off, layout); + return rc; +} + +static void rados_req_write_complete(rados_completion_t c, void *arg) +{ + auto comp = reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(arg); + comp->complete_request(rados_aio_get_return_value(c)); + comp->safe_request(rados_aio_get_return_value(c)); +} + +int +libradosstriper::RadosStriperImpl::internal_aio_write(const std::string& soid, + libradosstriper::MultiAioCompletionImplPtr c, + const bufferlist& bl, + size_t len, + uint64_t off, + const ceph_file_layout& layout) +{ + int r = 0; + // Do not try anything if we are called with empty buffer, + // file_to_extents would raise an exception + if (len > 0) { + // get list of extents to be written to + vector<ObjectExtent> extents; + std::string format = soid; + boost::replace_all(format, "%", "%%"); + format += RADOS_OBJECT_EXTENSION_FORMAT; + file_layout_t l; + l.from_legacy(layout); + Striper::file_to_extents(cct(), format.c_str(), &l, off, len, 0, extents); + // go through the extents + for (vector<ObjectExtent>::iterator p = extents.begin(); p != extents.end(); ++p) { + // assemble pieces of a given object into a single buffer list + bufferlist oid_bl; + for (vector<pair<uint64_t,uint64_t> >::iterator q = p->buffer_extents.begin(); + q != p->buffer_extents.end(); + ++q) { + bufferlist buffer_bl; + buffer_bl.substr_of(bl, q->first, q->second); + oid_bl.append(buffer_bl); + } + // and write the object + c->add_request(); + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(c.get(), + rados_req_write_complete); + r = m_ioCtx.aio_write(p->oid.name, rados_completion, oid_bl, + p->length, p->offset); + rados_completion->release(); + if (r < 0) + break; + } + } + c->finish_adding_requests(); + return r; +} + +int libradosstriper::RadosStriperImpl::extract_uint32_attr +(std::map<std::string, bufferlist> &attrs, + const std::string& key, + ceph_le32 *value) +{ + std::map<std::string, bufferlist>::iterator attrsIt = attrs.find(key); + if (attrsIt != attrs.end()) { + // this intermediate string allows to add a null terminator before calling strtol + std::string strvalue(attrsIt->second.c_str(), attrsIt->second.length()); + std::string err; + *value = strict_strtol(strvalue.c_str(), 10, &err); + if (!err.empty()) { + lderr(cct()) << key << " : " << err << dendl; + return -EINVAL; + } + } else { + return -ENOENT; + } + return 0; +} + +int libradosstriper::RadosStriperImpl::extract_sizet_attr +(std::map<std::string, bufferlist> &attrs, + const std::string& key, + size_t *value) +{ + std::map<std::string, bufferlist>::iterator attrsIt = attrs.find(key); + if (attrsIt != attrs.end()) { + // this intermediate string allows to add a null terminator before calling strtol + std::string strvalue(attrsIt->second.c_str(), attrsIt->second.length()); + std::string err; + *value = strict_strtoll(strvalue.c_str(), 10, &err); + if (!err.empty()) { + lderr(cct()) << key << " : " << err << dendl; + return -EINVAL; + } + } else { + return -ENOENT; + } + return 0; +} + +int libradosstriper::RadosStriperImpl::internal_get_layout_and_size( + const std::string& oid, + ceph_file_layout *layout, + uint64_t *size) +{ + // get external attributes of the first rados object + std::map<std::string, bufferlist> attrs; + int rc = m_ioCtx.getxattrs(oid, attrs); + if (rc) return rc; + // deal with stripe_unit + rc = extract_uint32_attr(attrs, XATTR_LAYOUT_STRIPE_UNIT, &layout->fl_stripe_unit); + if (rc) return rc; + // deal with stripe_count + rc = extract_uint32_attr(attrs, XATTR_LAYOUT_STRIPE_COUNT, &layout->fl_stripe_count); + if (rc) return rc; + // deal with object_size + rc = extract_uint32_attr(attrs, XATTR_LAYOUT_OBJECT_SIZE, &layout->fl_object_size); + if (rc) return rc; + // deal with size + size_t ssize; + rc = extract_sizet_attr(attrs, XATTR_SIZE, &ssize); + if (rc) { + return rc; + } + *size = ssize; + // make valgrind happy by setting unused fl_pg_pool + layout->fl_pg_pool = 0; + return 0; +} + +int libradosstriper::RadosStriperImpl::openStripedObjectForRead( + const std::string& soid, + ceph_file_layout *layout, + uint64_t *size, + std::string *lockCookie) +{ + // take a lock the first rados object, if it exists and gets its size + // check, lock and size reading must be atomic and are thus done within a single operation + librados::ObjectWriteOperation op; + op.assert_exists(); + *lockCookie = getUUID(); + utime_t dur = utime_t(); + rados::cls::lock::lock(&op, RADOS_LOCK_NAME, ClsLockType::SHARED, *lockCookie, "Tag", "", dur, 0); + std::string firstObjOid = getObjectId(soid, 0); + int rc = m_ioCtx.operate(firstObjOid, &op); + if (rc) { + // error case (including -ENOENT) + return rc; + } + rc = internal_get_layout_and_size(firstObjOid, layout, size); + if (rc) { + unlockObject(soid, *lockCookie); + lderr(cct()) << "RadosStriperImpl::openStripedObjectForRead : " + << "could not load layout and size for " + << soid << " : rc = " << rc << dendl; + } + return rc; +} + +int libradosstriper::RadosStriperImpl::openStripedObjectForWrite(const std::string& soid, + ceph_file_layout *layout, + uint64_t *size, + std::string *lockCookie, + bool isFileSizeAbsolute) +{ + // take a lock the first rados object, if it exists + // check and lock must be atomic and are thus done within a single operation + librados::ObjectWriteOperation op; + op.assert_exists(); + *lockCookie = getUUID(); + utime_t dur = utime_t(); + rados::cls::lock::lock(&op, RADOS_LOCK_NAME, ClsLockType::SHARED, *lockCookie, "Tag", "", dur, 0); + std::string firstObjOid = getObjectId(soid, 0); + int rc = m_ioCtx.operate(firstObjOid, &op); + if (rc) { + if (rc == -ENOENT) { + // object does not exist, delegate to createEmptyStripedObject + int rc = createAndOpenStripedObject(soid, layout, *size, lockCookie, isFileSizeAbsolute); + // return original size + *size = 0; + return rc; + } else { + return rc; + } + } + // all fine + uint64_t curSize; + rc = internal_get_layout_and_size(firstObjOid, layout, &curSize); + if (rc) { + unlockObject(soid, *lockCookie); + lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : " + << "could not load layout and size for " + << soid << " : rc = " << rc << dendl; + return rc; + } + // atomically update object size, only if smaller than current one + if (!isFileSizeAbsolute) + *size += curSize; + librados::ObjectWriteOperation writeOp; + writeOp.cmpxattr(XATTR_SIZE, LIBRADOS_CMPXATTR_OP_GT, *size); + std::ostringstream oss; + oss << *size; + bufferlist bl; + bl.append(oss.str()); + writeOp.setxattr(XATTR_SIZE, bl); + rc = m_ioCtx.operate(firstObjOid, &writeOp); + // return current size + *size = curSize; + // handle case where objectsize is already bigger than size + if (-ECANCELED == rc) + rc = 0; + if (rc) { + unlockObject(soid, *lockCookie); + lderr(cct()) << "RadosStriperImpl::openStripedObjectForWrite : " + << "could not set new size for " + << soid << " : rc = " << rc << dendl; + } + return rc; +} + +int libradosstriper::RadosStriperImpl::createAndOpenStripedObject(const std::string& soid, + ceph_file_layout *layout, + uint64_t size, + std::string *lockCookie, + bool isFileSizeAbsolute) +{ + // build atomic write operation + librados::ObjectWriteOperation writeOp; + writeOp.create(true); + // object_size + std::ostringstream oss_object_size; + oss_object_size << m_layout.fl_object_size; + bufferlist bl_object_size; + bl_object_size.append(oss_object_size.str()); + writeOp.setxattr(XATTR_LAYOUT_OBJECT_SIZE, bl_object_size); + // stripe unit + std::ostringstream oss_stripe_unit; + oss_stripe_unit << m_layout.fl_stripe_unit; + bufferlist bl_stripe_unit; + bl_stripe_unit.append(oss_stripe_unit.str()); + writeOp.setxattr(XATTR_LAYOUT_STRIPE_UNIT, bl_stripe_unit); + // stripe count + std::ostringstream oss_stripe_count; + oss_stripe_count << m_layout.fl_stripe_count; + bufferlist bl_stripe_count; + bl_stripe_count.append(oss_stripe_count.str()); + writeOp.setxattr(XATTR_LAYOUT_STRIPE_COUNT, bl_stripe_count); + // size + std::ostringstream oss_size; + oss_size << (isFileSizeAbsolute?size:0); + bufferlist bl_size; + bl_size.append(oss_size.str()); + writeOp.setxattr(XATTR_SIZE, bl_size); + // effectively change attributes + std::string firstObjOid = getObjectId(soid, 0); + int rc = m_ioCtx.operate(firstObjOid, &writeOp); + // in case of error (but no EEXIST which would mean the object existed), return + if (rc && -EEXIST != rc) return rc; + // Otherwise open the object + uint64_t fileSize = size; + return openStripedObjectForWrite(soid, layout, &fileSize, lockCookie, isFileSizeAbsolute); +} + +static void striper_truncate_aio_req_complete(rados_striper_multi_completion_t c, void *arg) +{ + auto cdata = ceph::ref_t<TruncateCompletionData>(static_cast<TruncateCompletionData*>(arg), false); + libradosstriper::MultiAioCompletionImpl *comp = + reinterpret_cast<libradosstriper::MultiAioCompletionImpl*>(c); + if (0 == comp->rval) { + // all went fine, change size in the external attributes + std::ostringstream oss; + oss << cdata->m_size; + bufferlist bl; + bl.append(oss.str()); + cdata->m_striper->setxattr(cdata->m_soid, XATTR_SIZE, bl); + } +} + +int libradosstriper::RadosStriperImpl::truncate(const std::string& soid, + uint64_t original_size, + uint64_t size, + ceph_file_layout &layout) +{ + auto cdata = ceph::make_ref<TruncateCompletionData>(this, soid, size); + libradosstriper::MultiAioCompletionImplPtr multi_completion{ + new libradosstriper::MultiAioCompletionImpl, false}; + multi_completion->set_complete_callback(cdata->get() /* create ref! */, striper_truncate_aio_req_complete); + // call asynchrous version of truncate + int rc = aio_truncate(soid, multi_completion, original_size, size, layout); + // wait for completion of the truncation + multi_completion->finish_adding_requests(); + multi_completion->wait_for_complete_and_cb(); + // return result + if (rc == 0) { + rc = multi_completion->get_return_value(); + } + return rc; +} + +int libradosstriper::RadosStriperImpl::aio_truncate +(const std::string& soid, + libradosstriper::MultiAioCompletionImplPtr multi_completion, + uint64_t original_size, + uint64_t size, + ceph_file_layout &layout) +{ + // handle the underlying rados objects. 3 cases here : + // -- the objects belonging to object sets entirely located + // before the truncation are unchanged + // -- the objects belonging to the object set where the + // truncation took place are truncated or removed + // -- the objects belonging to object sets entirely located + // after the truncation are removed + // Note that we do it backward and that we change the size in + // the external attributes only at the end. This make sure that + // no rados object stays behind if we remove the striped object + // after a truncation has failed + uint64_t trunc_objectsetno = size / layout.fl_object_size / layout.fl_stripe_count; + uint64_t last_objectsetno = original_size / layout.fl_object_size / layout.fl_stripe_count; + bool exists = false; + for (int64_t objectno = (last_objectsetno+1) * layout.fl_stripe_count-1; + objectno >= (int64_t)((trunc_objectsetno + 1) * layout.fl_stripe_count); + objectno--) { + // if no object existed so far, check object existence + if (!exists) { + uint64_t nb_full_object_set = objectno / layout.fl_stripe_count; + uint64_t object_index_in_set = objectno % layout.fl_stripe_count; + uint64_t set_start_off = nb_full_object_set * layout.fl_object_size * layout.fl_stripe_count; + uint64_t object_start_off = set_start_off + object_index_in_set * layout.fl_stripe_unit; + exists = (original_size > object_start_off); + } + if (exists) { + // remove asynchronously + multi_completion->add_request(); + auto data = ceph::make_ref<RadosRemoveCompletionData>(multi_completion, cct()); + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(data->get() /* create ref! */, + rados_req_remove_complete); + int rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion); + rados_completion->release(); + // in case the object did not exist, it means we had a sparse file, all is fine + if (rc && rc != -ENOENT) return rc; + } + } + for (int64_t objectno = ((trunc_objectsetno + 1) * layout.fl_stripe_count) -1; + objectno >= (int64_t)(trunc_objectsetno * layout.fl_stripe_count); + objectno--) { + // if no object existed so far, check object existence + if (!exists) { + uint64_t object_start_off = ((objectno / layout.fl_stripe_count) * layout.fl_object_size) + + ((objectno % layout.fl_stripe_count) * layout.fl_stripe_unit); + exists = (original_size > object_start_off); + } + if (exists) { + // truncate + file_layout_t l; + l.from_legacy(layout); + uint64_t new_object_size = Striper::object_truncate_size(cct(), &l, objectno, size); + int rc; + if (new_object_size > 0 or 0 == objectno) { + // trunc is synchronous as there is no async version + // but note that only a single object will be truncated + // reducing the overload to a fixed amount + rc = m_ioCtx.trunc(getObjectId(soid, objectno), new_object_size); + } else { + // removes are asynchronous in order to speed up truncations of big files + multi_completion->add_request(); + auto data = ceph::make_ref<RadosRemoveCompletionData>(multi_completion, cct()); + librados::AioCompletion *rados_completion = + librados::Rados::aio_create_completion(data->get() /* create ref! */, + rados_req_remove_complete); + rc = m_ioCtx.aio_remove(getObjectId(soid, objectno), rados_completion); + rados_completion->release(); + } + // in case the object did not exist, it means we had a sparse file, all is fine + if (rc && rc != -ENOENT) return rc; + } + } + return 0; +} + +int libradosstriper::RadosStriperImpl::grow(const std::string& soid, + uint64_t original_size, + uint64_t size, + ceph_file_layout &layout) +{ + // handle the underlying rados objects. As we support sparse objects, + // we only have to change the size in the external attributes + std::ostringstream oss; + oss << size; + bufferlist bl; + bl.append(oss.str()); + int rc = m_ioCtx.setxattr(getObjectId(soid, 0), XATTR_SIZE, bl); + return rc; +} + +std::string libradosstriper::RadosStriperImpl::getUUID() +{ + struct uuid_d uuid; + uuid.generate_random(); + char suuid[37]; + uuid.print(suuid); + return std::string(suuid); +} diff --git a/src/libradosstriper/RadosStriperImpl.h b/src/libradosstriper/RadosStriperImpl.h new file mode 100644 index 000000000..8226a9ba2 --- /dev/null +++ b/src/libradosstriper/RadosStriperImpl.h @@ -0,0 +1,276 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#ifndef CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H +#define CEPH_LIBRADOSSTRIPER_RADOSSTRIPERIMPL_H + +#include <string> + +#include <boost/intrusive_ptr.hpp> + +#include "include/rados/librados.h" +#include "include/rados/librados.hpp" +#include "include/radosstriper/libradosstriper.h" +#include "include/radosstriper/libradosstriper.hpp" +#include "MultiAioCompletionImpl.h" + +#include "librados/IoCtxImpl.h" +#include "librados/AioCompletionImpl.h" +#include "common/RefCountedObj.h" +#include "common/ceph_context.h" + +namespace libradosstriper { + +using MultiAioCompletionImplPtr = + boost::intrusive_ptr<MultiAioCompletionImpl>; + +struct RadosStriperImpl { + + /** + * exception wrapper around an error code + */ + struct ErrorCode { + ErrorCode(int error) : m_code(error) {}; + int m_code; + }; + + /* + * Constructor + * @param cluster_name name of the cluster, can be NULL + * @param client_name has 2 meanings depending on cluster_name + * - if cluster_name is null : this is the client id + * - else : this is the full client name in format type.id + */ + RadosStriperImpl(librados::IoCtx& ioctx, librados::IoCtxImpl *ioctx_impl); + /// Destructor + ~RadosStriperImpl() {}; + + // configuration + int setObjectLayoutStripeUnit(unsigned int stripe_unit); + int setObjectLayoutStripeCount(unsigned int stripe_count); + int setObjectLayoutObjectSize(unsigned int object_size); + + // xattrs + int getxattr(const object_t& soid, const char *name, bufferlist& bl); + int setxattr(const object_t& soid, const char *name, bufferlist& bl); + int getxattrs(const object_t& soid, map<string, bufferlist>& attrset); + int rmxattr(const object_t& soid, const char *name); + + // io + int write(const std::string& soid, const bufferlist& bl, size_t len, uint64_t off); + int append(const std::string& soid, const bufferlist& bl, size_t len); + int write_full(const std::string& soid, const bufferlist& bl); + int read(const std::string& soid, bufferlist* pbl, size_t len, uint64_t off); + + // asynchronous io + int aio_write(const std::string& soid, librados::AioCompletionImpl *c, + const bufferlist& bl, size_t len, uint64_t off); + int aio_append(const std::string& soid, librados::AioCompletionImpl *c, + const bufferlist& bl, size_t len); + int aio_write_full(const std::string& soid, librados::AioCompletionImpl *c, + const bufferlist& bl); + int aio_read(const std::string& soid, librados::AioCompletionImpl *c, + bufferlist* pbl, size_t len, uint64_t off); + int aio_read(const std::string& soid, librados::AioCompletionImpl *c, + char* buf, size_t len, uint64_t off); + int aio_flush(); + + // stat, deletion and truncation + int stat(const std::string& soid, uint64_t *psize, time_t *pmtime); + int stat2(const std::string& soid, uint64_t *psize, struct timespec *pts); + template<class TimeType> + struct StatFunction { + typedef int (librados::IoCtxImpl::*Type) (const object_t& oid, + librados::AioCompletionImpl *c, + uint64_t *psize, TimeType *pmtime); + }; + template<class TimeType> + int aio_generic_stat(const std::string& soid, librados::AioCompletionImpl *c, + uint64_t *psize, TimeType *pmtime, + typename StatFunction<TimeType>::Type statFunction); + int aio_stat(const std::string& soid, librados::AioCompletionImpl *c, + uint64_t *psize, time_t *pmtime); + int aio_stat2(const std::string& soid, librados::AioCompletionImpl *c, + uint64_t *psize, struct timespec *pts); + int remove(const std::string& soid, int flags=0); + int trunc(const std::string& soid, uint64_t size); + + // asynchronous remove. Note that the removal is not 100% parallelized : + // the removal of the first rados object of the striped object will be + // done via a syncrhonous call after the completion of all other removals. + // These are done asynchrounously and in parallel + int aio_remove(const std::string& soid, librados::AioCompletionImpl *c, int flags=0); + + // reference counting + void get() { + std::lock_guard l{lock}; + m_refCnt ++ ; + } + void put() { + bool deleteme = false; + lock.lock(); + m_refCnt --; + if (m_refCnt == 0) + deleteme = true; + cond.notify_all(); + lock.unlock(); + if (deleteme) + delete this; + } + + // objectid manipulation + std::string getObjectId(const object_t& soid, long long unsigned objectno); + + // opening and closing of striped objects + void unlockObject(const std::string& soid, + const std::string& lockCookie); + void aio_unlockObject(const std::string& soid, + const std::string& lockCookie, + librados::AioCompletion *c); + + // internal versions of IO method + int write_in_open_object(const std::string& soid, + const ceph_file_layout& layout, + const std::string& lockCookie, + const bufferlist& bl, + size_t len, + uint64_t off); + int aio_write_in_open_object(const std::string& soid, + librados::AioCompletionImpl *c, + const ceph_file_layout& layout, + const std::string& lockCookie, + const bufferlist& bl, + size_t len, + uint64_t off); + int internal_aio_write(const std::string& soid, + MultiAioCompletionImplPtr c, + const bufferlist& bl, + size_t len, + uint64_t off, + const ceph_file_layout& layout); + + int extract_uint32_attr(std::map<std::string, bufferlist> &attrs, + const std::string& key, + ceph_le32 *value); + + int extract_sizet_attr(std::map<std::string, bufferlist> &attrs, + const std::string& key, + size_t *value); + + int internal_get_layout_and_size(const std::string& oid, + ceph_file_layout *layout, + uint64_t *size); + + int internal_aio_remove(const std::string& soid, + MultiAioCompletionImplPtr multi_completion, + int flags=0); + + /** + * opens an existing striped object and takes a shared lock on it + * @return 0 if everything is ok and the lock was taken. -errcode otherwise + * In particulae, if the striped object does not exists, -ENOENT is returned + * In case the return code in not 0, no lock is taken + */ + int openStripedObjectForRead(const std::string& soid, + ceph_file_layout *layout, + uint64_t *size, + std::string *lockCookie); + + /** + * opens an existing striped object, takes a shared lock on it + * and sets its size to the size it will have after the write. + * In case the striped object does not exists, it will create it by + * calling createOrOpenStripedObject. + * @param layout this is filled with the layout of the file + * @param size new size of the file (together with isFileSizeAbsolute) + * In case of success, this is filled with the size of the file before the opening + * @param isFileSizeAbsolute if false, this means that the given size should + * be added to the current file size (append mode) + * @return 0 if everything is ok and the lock was taken. -errcode otherwise + * In case the return code in not 0, no lock is taken + */ + int openStripedObjectForWrite(const std::string& soid, + ceph_file_layout *layout, + uint64_t *size, + std::string *lockCookie, + bool isFileSizeAbsolute); + /** + * creates an empty striped object with the given size and opens it calling + * openStripedObjectForWrite, which implies taking a shared lock on it + * Also deals with the cases where the object was created in the mean time + * @param isFileSizeAbsolute if false, this means that the given size should + * be added to the current file size (append mode). This of course only makes + * sense in case the striped object already exists + * @return 0 if everything is ok and the lock was taken. -errcode otherwise + * In case the return code in not 0, no lock is taken + */ + int createAndOpenStripedObject(const std::string& soid, + ceph_file_layout *layout, + uint64_t size, + std::string *lockCookie, + bool isFileSizeAbsolute); + + /** + * truncates an object synchronously. Should only be called with size < original_size + */ + int truncate(const std::string& soid, + uint64_t original_size, + uint64_t size, + ceph_file_layout &layout); + + /** + * truncates an object asynchronously. Should only be called with size < original_size + * note that the method is not 100% asynchronous, only the removal of rados objects + * is, the (potential) truncation of the rados object residing just at the truncation + * point is synchronous for lack of asynchronous truncation in the rados layer + */ + int aio_truncate(const std::string& soid, + MultiAioCompletionImplPtr c, + uint64_t original_size, + uint64_t size, + ceph_file_layout &layout); + + /** + * grows an object (adding 0s). Should only be called with size > original_size + */ + int grow(const std::string& soid, + uint64_t original_size, + uint64_t size, + ceph_file_layout &layout); + + /** + * creates a unique identifier + */ + static std::string getUUID(); + + CephContext *cct() { + return (CephContext*)m_radosCluster.cct(); + } + + // reference counting + std::condition_variable cond; + int m_refCnt; + std::mutex lock; + + + // Context + librados::Rados m_radosCluster; + librados::IoCtx m_ioCtx; + librados::IoCtxImpl *m_ioCtxImpl; + + // Default layout + ceph_file_layout m_layout; +}; +} +#endif diff --git a/src/libradosstriper/libradosstriper.cc b/src/libradosstriper/libradosstriper.cc new file mode 100644 index 000000000..e98dfc179 --- /dev/null +++ b/src/libradosstriper/libradosstriper.cc @@ -0,0 +1,669 @@ +// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- +// vim: ts=8 sw=2 smarttab +/* + * Ceph - scalable distributed file system + * + * Copyright (C) 2014 Sebastien Ponce <sebastien.ponce@cern.ch> + * + * This is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License version 2.1, as published by the Free Software + * Foundation. See file COPYING. + * + */ + +#include <errno.h> + +#include "libradosstriper/RadosStriperImpl.h" +#include "libradosstriper/MultiAioCompletionImpl.h" + +#include "include/types.h" + +#include "include/radosstriper/libradosstriper.h" +#include "include/radosstriper/libradosstriper.hpp" +#include "librados/RadosXattrIter.h" + +/* + * This file implements the rados striper API. + * There are 2 flavours of it : + * - the C API, found in include/rados/libradosstriper.h + * - the C++ API, found in include/rados/libradosstriper.hpp + */ + +///////////////////////////// C++ API ////////////////////////////// + +libradosstriper::MultiAioCompletion::~MultiAioCompletion() +{ + ceph_assert(pc->ref == 1); + pc->put(); +} + +int libradosstriper::MultiAioCompletion::set_complete_callback +(void *cb_arg, rados_callback_t cb) +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + return c->set_complete_callback(cb_arg, cb); +} + +int libradosstriper::MultiAioCompletion::set_safe_callback +(void *cb_arg, rados_callback_t cb) +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + return c->set_safe_callback(cb_arg, cb); +} + +void libradosstriper::MultiAioCompletion::wait_for_complete() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + c->wait_for_complete(); +} + +void libradosstriper::MultiAioCompletion::wait_for_safe() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + c->wait_for_safe(); +} + +bool libradosstriper::MultiAioCompletion::is_complete() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + return c->is_complete(); +} + +bool libradosstriper::MultiAioCompletion::is_safe() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + return c->is_safe(); +} + +void libradosstriper::MultiAioCompletion::wait_for_complete_and_cb() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + c->wait_for_complete_and_cb(); +} + +void libradosstriper::MultiAioCompletion::MultiAioCompletion::wait_for_safe_and_cb() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + c->wait_for_safe_and_cb(); +} + +bool libradosstriper::MultiAioCompletion::is_complete_and_cb() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + return c->is_complete_and_cb(); +} + +bool libradosstriper::MultiAioCompletion::is_safe_and_cb() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + return c->is_safe_and_cb(); +} + +int libradosstriper::MultiAioCompletion::get_return_value() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + return c->get_return_value(); +} + +void libradosstriper::MultiAioCompletion::release() +{ + MultiAioCompletionImpl *c = (MultiAioCompletionImpl *)pc; + c->put(); + delete this; +} + +libradosstriper::RadosStriper::RadosStriper() : + rados_striper_impl(0) +{ +} + +void libradosstriper::RadosStriper::to_rados_striper_t(RadosStriper &striper, rados_striper_t *s) +{ + *s = (rados_striper_t)striper.rados_striper_impl; + striper.rados_striper_impl->get(); +} + +libradosstriper::RadosStriper::RadosStriper(const RadosStriper& rs) +{ + rados_striper_impl = rs.rados_striper_impl; + if (rados_striper_impl) { + rados_striper_impl->get(); + } +} + +libradosstriper::RadosStriper& libradosstriper::RadosStriper::operator=(const RadosStriper& rs) +{ + if (rados_striper_impl) + rados_striper_impl->put(); + rados_striper_impl = rs.rados_striper_impl; + rados_striper_impl->get(); + return *this; +} + +libradosstriper::RadosStriper::~RadosStriper() +{ + if (rados_striper_impl) + rados_striper_impl->put(); + rados_striper_impl = 0; +} + +int libradosstriper::RadosStriper::striper_create(librados::IoCtx& ioctx, + RadosStriper *striper) +{ + try { + striper->rados_striper_impl = new libradosstriper::RadosStriperImpl(ioctx, ioctx.io_ctx_impl); + striper->rados_striper_impl->get(); + } catch (int rc) { + return rc; + } + return 0; +} + +int libradosstriper::RadosStriper::set_object_layout_stripe_unit +(unsigned int stripe_unit) +{ + return rados_striper_impl->setObjectLayoutStripeUnit(stripe_unit); +} + +int libradosstriper::RadosStriper::set_object_layout_stripe_count +(unsigned int stripe_count) +{ + return rados_striper_impl->setObjectLayoutStripeCount(stripe_count); +} + +int libradosstriper::RadosStriper::set_object_layout_object_size +(unsigned int object_size) +{ + return rados_striper_impl->setObjectLayoutObjectSize(object_size); +} + +int libradosstriper::RadosStriper::getxattr(const std::string& oid, const char *name, bufferlist& bl) +{ + return rados_striper_impl->getxattr(oid, name, bl); +} + +int libradosstriper::RadosStriper::setxattr(const std::string& oid, const char *name, bufferlist& bl) +{ + return rados_striper_impl->setxattr(oid, name, bl); +} + +int libradosstriper::RadosStriper::rmxattr(const std::string& oid, const char *name) +{ + return rados_striper_impl->rmxattr(oid, name); +} + +int libradosstriper::RadosStriper::getxattrs(const std::string& oid, + std::map<std::string, bufferlist>& attrset) +{ + return rados_striper_impl->getxattrs(oid, attrset); +} + +int libradosstriper::RadosStriper::write(const std::string& soid, + const bufferlist& bl, + size_t len, + uint64_t off) +{ + return rados_striper_impl->write(soid, bl, len, off); +} + +int libradosstriper::RadosStriper::write_full(const std::string& soid, + const bufferlist& bl) +{ + return rados_striper_impl->write_full(soid, bl); +} + +int libradosstriper::RadosStriper::append(const std::string& soid, + const bufferlist& bl, + size_t len) +{ + return rados_striper_impl->append(soid, bl, len); +} + +int libradosstriper::RadosStriper::aio_write(const std::string& soid, + librados::AioCompletion *c, + const bufferlist& bl, + size_t len, + uint64_t off) +{ + return rados_striper_impl->aio_write(soid, c->pc, bl, len, off); +} + +int libradosstriper::RadosStriper::aio_write_full(const std::string& soid, + librados::AioCompletion *c, + const bufferlist& bl) +{ + return rados_striper_impl->aio_write_full(soid, c->pc, bl); +} + +int libradosstriper::RadosStriper::aio_append(const std::string& soid, + librados::AioCompletion *c, + const bufferlist& bl, + size_t len) +{ + return rados_striper_impl->aio_append(soid, c->pc, bl, len); +} + +int libradosstriper::RadosStriper::read(const std::string& soid, + bufferlist* bl, + size_t len, + uint64_t off) +{ + bl->clear(); + bl->push_back(buffer::create(len)); + return rados_striper_impl->read(soid, bl, len, off); +} + +int libradosstriper::RadosStriper::aio_read(const std::string& soid, + librados::AioCompletion *c, + bufferlist* bl, + size_t len, + uint64_t off) +{ + bl->clear(); + bl->push_back(buffer::create(len)); + return rados_striper_impl->aio_read(soid, c->pc, bl, len, off); +} + +int libradosstriper::RadosStriper::stat(const std::string& soid, uint64_t *psize, time_t *pmtime) +{ + return rados_striper_impl->stat(soid, psize, pmtime); +} + +int libradosstriper::RadosStriper::aio_stat(const std::string& soid, + librados::AioCompletion *c, + uint64_t *psize, + time_t *pmtime) +{ + return rados_striper_impl->aio_stat(soid, c->pc, psize, pmtime); +} + +int libradosstriper::RadosStriper::stat2(const std::string& soid, uint64_t *psize, struct timespec *pts) +{ + return rados_striper_impl->stat2(soid, psize, pts); +} + +int libradosstriper::RadosStriper::aio_stat2(const std::string& soid, + librados::AioCompletion *c, + uint64_t *psize, + struct timespec *pts) +{ + return rados_striper_impl->aio_stat2(soid, c->pc, psize, pts); +} + +int libradosstriper::RadosStriper::remove(const std::string& soid) +{ + return rados_striper_impl->remove(soid); +} + +int libradosstriper::RadosStriper::aio_remove(const std::string& soid, + librados::AioCompletion *c) +{ + return rados_striper_impl->aio_remove(soid, c->pc); +} + +int libradosstriper::RadosStriper::remove(const std::string& soid, int flags) +{ + return rados_striper_impl->remove(soid, flags); +} + +int libradosstriper::RadosStriper::aio_remove(const std::string& soid, + librados::AioCompletion *c, + int flags) +{ + return rados_striper_impl->aio_remove(soid, c->pc, flags); +} + +int libradosstriper::RadosStriper::trunc(const std::string& soid, uint64_t size) +{ + return rados_striper_impl->trunc(soid, size); +} + +int libradosstriper::RadosStriper::aio_flush() +{ + return rados_striper_impl->aio_flush(); +} + +libradosstriper::MultiAioCompletion* libradosstriper::RadosStriper::multi_aio_create_completion() +{ + MultiAioCompletionImpl *c = new MultiAioCompletionImpl; + return new MultiAioCompletion(c); +} + +libradosstriper::MultiAioCompletion* +libradosstriper::RadosStriper::multi_aio_create_completion(void *cb_arg, + librados::callback_t cb_complete, + librados::callback_t cb_safe) +{ + MultiAioCompletionImpl *c; + int r = rados_striper_multi_aio_create_completion(cb_arg, cb_complete, cb_safe, (void**)&c); + ceph_assert(r == 0); + return new MultiAioCompletion(c); +} + +///////////////////////////// C API ////////////////////////////// + +extern "C" int rados_striper_create(rados_ioctx_t ioctx, + rados_striper_t *striper) +{ + librados::IoCtx ctx; + librados::IoCtx::from_rados_ioctx_t(ioctx, ctx); + libradosstriper::RadosStriper striperp; + int rc = libradosstriper::RadosStriper::striper_create(ctx, &striperp); + if (0 == rc) + libradosstriper::RadosStriper::to_rados_striper_t(striperp, striper); + return rc; +} + +extern "C" void rados_striper_destroy(rados_striper_t striper) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + impl->put(); +} + +extern "C" int rados_striper_set_object_layout_stripe_unit(rados_striper_t striper, + unsigned int stripe_unit) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + return impl->setObjectLayoutStripeUnit(stripe_unit); +} + +extern "C" int rados_striper_set_object_layout_stripe_count(rados_striper_t striper, + unsigned int stripe_count) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + return impl->setObjectLayoutStripeCount(stripe_count); +} + +extern "C" int rados_striper_set_object_layout_object_size(rados_striper_t striper, + unsigned int object_size) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + return impl->setObjectLayoutObjectSize(object_size); +} + +extern "C" int rados_striper_write(rados_striper_t striper, + const char *soid, + const char *buf, + size_t len, + uint64_t off) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + bufferlist bl; + bl.append(buf, len); + return impl->write(soid, bl, len, off); +} + +extern "C" int rados_striper_write_full(rados_striper_t striper, + const char *soid, + const char *buf, + size_t len) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + bufferlist bl; + bl.append(buf, len); + return impl->write_full(soid, bl); +} + + +extern "C" int rados_striper_append(rados_striper_t striper, + const char *soid, + const char *buf, + size_t len) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + bufferlist bl; + bl.append(buf, len); + return impl->append(soid, bl, len); +} + +extern "C" int rados_striper_read(rados_striper_t striper, + const char *soid, + char *buf, + size_t len, + uint64_t off) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + bufferlist bl; + bufferptr bp = buffer::create_static(len, buf); + bl.push_back(bp); + int ret = impl->read(soid, &bl, len, off); + if (ret >= 0) { + if (bl.length() > len) + return -ERANGE; + if (!bl.is_provided_buffer(buf)) + bl.begin().copy(bl.length(), buf); + ret = bl.length(); // hrm :/ + } + return ret; +} + +extern "C" int rados_striper_remove(rados_striper_t striper, const char* soid) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + return impl->remove(soid); +} + +extern "C" int rados_striper_trunc(rados_striper_t striper, const char* soid, uint64_t size) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + return impl->trunc(soid, size); +} + +extern "C" int rados_striper_getxattr(rados_striper_t striper, + const char *oid, + const char *name, + char *buf, + size_t len) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + object_t obj(oid); + bufferlist bl; + int ret = impl->getxattr(oid, name, bl); + if (ret >= 0) { + if (bl.length() > len) + return -ERANGE; + bl.begin().copy(bl.length(), buf); + ret = bl.length(); + } + return ret; +} + +extern "C" int rados_striper_setxattr(rados_striper_t striper, + const char *oid, + const char *name, + const char *buf, + size_t len) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + object_t obj(oid); + bufferlist bl; + bl.append(buf, len); + return impl->setxattr(obj, name, bl); +} + +extern "C" int rados_striper_rmxattr(rados_striper_t striper, + const char *oid, + const char *name) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + object_t obj(oid); + return impl->rmxattr(obj, name); +} + +extern "C" int rados_striper_getxattrs(rados_striper_t striper, + const char *oid, + rados_xattrs_iter_t *iter) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + object_t obj(oid); + librados::RadosXattrsIter *it = new librados::RadosXattrsIter(); + if (!it) + return -ENOMEM; + int ret = impl->getxattrs(obj, it->attrset); + if (ret) { + delete it; + return ret; + } + it->i = it->attrset.begin(); + *iter = it; + return 0; +} + +extern "C" int rados_striper_getxattrs_next(rados_xattrs_iter_t iter, + const char **name, + const char **val, + size_t *len) +{ + return rados_getxattrs_next(iter, name, val, len); +} + +extern "C" void rados_striper_getxattrs_end(rados_xattrs_iter_t iter) +{ + return rados_getxattrs_end(iter); +} + +extern "C" int rados_striper_stat(rados_striper_t striper, + const char* soid, + uint64_t *psize, + time_t *pmtime) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + return impl->stat(soid, psize, pmtime); +} + +extern "C" int rados_striper_multi_aio_create_completion(void *cb_arg, + rados_callback_t cb_complete, + rados_callback_t cb_safe, + rados_striper_multi_completion_t *pc) +{ + libradosstriper::MultiAioCompletionImpl *c = new libradosstriper::MultiAioCompletionImpl; + if (cb_complete) + c->set_complete_callback(cb_arg, cb_complete); + if (cb_safe) + c->set_safe_callback(cb_arg, cb_safe); + *pc = c; + return 0; +} + +extern "C" void rados_striper_multi_aio_wait_for_complete(rados_striper_multi_completion_t c) +{ + ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_complete(); +} + +extern "C" void rados_striper_multi_aio_wait_for_safe(rados_striper_multi_completion_t c) +{ + ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_safe(); +} + +extern "C" int rados_striper_multi_aio_is_complete(rados_striper_multi_completion_t c) +{ + return ((libradosstriper::MultiAioCompletionImpl*)c)->is_complete(); +} + +extern "C" int rados_striper_multi_aio_is_safe(rados_striper_multi_completion_t c) +{ + return ((libradosstriper::MultiAioCompletionImpl*)c)->is_safe(); +} + +extern "C" void rados_striper_multi_aio_wait_for_complete_and_cb(rados_striper_multi_completion_t c) +{ + ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_complete_and_cb(); +} + +extern "C" void rados_striper_multi_aio_wait_for_safe_and_cb(rados_striper_multi_completion_t c) +{ + ((libradosstriper::MultiAioCompletionImpl*)c)->wait_for_safe_and_cb(); +} + +extern "C" int rados_striper_multi_aio_is_complete_and_cb(rados_striper_multi_completion_t c) +{ + return ((libradosstriper::MultiAioCompletionImpl*)c)->is_complete_and_cb(); +} + +extern "C" int rados_striper_multi_aio_is_safe_and_cb(rados_striper_multi_completion_t c) +{ + return ((libradosstriper::MultiAioCompletionImpl*)c)->is_safe_and_cb(); +} + +extern "C" int rados_striper_multi_aio_get_return_value(rados_striper_multi_completion_t c) +{ + return ((libradosstriper::MultiAioCompletionImpl*)c)->get_return_value(); +} + +extern "C" void rados_striper_multi_aio_release(rados_striper_multi_completion_t c) +{ + ((libradosstriper::MultiAioCompletionImpl*)c)->put(); +} + +extern "C" int rados_striper_aio_write(rados_striper_t striper, + const char* soid, + rados_completion_t completion, + const char *buf, + size_t len, + uint64_t off) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + bufferlist bl; + bl.append(buf, len); + return impl->aio_write(soid, (librados::AioCompletionImpl*)completion, bl, len, off); +} + +extern "C" int rados_striper_aio_append(rados_striper_t striper, + const char* soid, + rados_completion_t completion, + const char *buf, + size_t len) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + bufferlist bl; + bl.append(buf, len); + return impl->aio_append(soid, (librados::AioCompletionImpl*)completion, bl, len); +} + +extern "C" int rados_striper_aio_write_full(rados_striper_t striper, + const char* soid, + rados_completion_t completion, + const char *buf, + size_t len) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + bufferlist bl; + bl.append(buf, len); + return impl->aio_write_full(soid, (librados::AioCompletionImpl*)completion, bl); +} + +extern "C" int rados_striper_aio_read(rados_striper_t striper, + const char *soid, + rados_completion_t completion, + char *buf, + size_t len, + uint64_t off) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + return impl->aio_read(soid, (librados::AioCompletionImpl*)completion, buf, len, off); +} + +extern "C" int rados_striper_aio_remove(rados_striper_t striper, + const char* soid, + rados_completion_t completion) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + return impl->aio_remove(soid, (librados::AioCompletionImpl*)completion); +} + +extern "C" void rados_striper_aio_flush(rados_striper_t striper) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + impl->aio_flush(); +} + +extern "C" int rados_striper_aio_stat(rados_striper_t striper, + const char* soid, + rados_completion_t completion, + uint64_t *psize, + time_t *pmtime) +{ + libradosstriper::RadosStriperImpl *impl = (libradosstriper::RadosStriperImpl *)striper; + return impl->aio_stat(soid, (librados::AioCompletionImpl*)completion, psize, pmtime); +} |